334 lines
12 KiB
Diff
334 lines
12 KiB
Diff
From d6b654d355dfbe8cd0fce3e73eeacbd068419416 Mon Sep 17 00:00:00 2001
|
|
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
|
|
Date: Wed, 15 Dec 2021 18:08:10 +0000
|
|
Subject: [PATCH 1/2] refactor(lsp): always crop matching text from end of
|
|
incremental sync range
|
|
|
|
When constructing incremental updates to be sent to the server,
|
|
previously we only attempted to crop matching text from the end of
|
|
the old and new `on_lines` ranges on single-line updates.
|
|
This refactor makes it so that we always attempt to crop from the end,
|
|
no matter how many lines were changed. This reduces the total size of
|
|
increments sent to the server when possible by not including
|
|
text that wasn't changed at the end of the incremental update range.
|
|
---
|
|
runtime/lua/vim/lsp/sync.lua | 127 +++++++++---------
|
|
.../plugin/lsp/incremental_sync_spec.lua | 18 +--
|
|
2 files changed, 69 insertions(+), 76 deletions(-)
|
|
|
|
diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
|
|
index d01f45ad8f2a..c745fadf876e 100644
|
|
--- a/runtime/lua/vim/lsp/sync.lua
|
|
+++ b/runtime/lua/vim/lsp/sync.lua
|
|
@@ -93,31 +93,38 @@ end
|
|
-- utf-8 index and either the utf-16, or utf-32 index.
|
|
---@param line string the line to index into
|
|
---@param byte integer the byte idx
|
|
+---@param start boolean true for start align, false for end align
|
|
---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
|
|
---@returns table<string, int> byte_idx and char_idx of first change position
|
|
-local function align_end_position(line, byte, offset_encoding)
|
|
- local char
|
|
- -- If on the first byte, or an empty string: the trivial case
|
|
- if byte == 1 or #line == 0 then
|
|
- char = byte
|
|
- -- Called in the case of extending an empty line "" -> "a"
|
|
- elseif byte == #line + 1 then
|
|
- char = compute_line_length(line, offset_encoding) + 1
|
|
- else
|
|
+local function align_position(line, byte, start, offset_encoding)
|
|
+ if byte ~= 1 and byte <= #line then
|
|
-- Modifying line, find the nearest utf codepoint
|
|
local offset = str_utf_start(line, byte)
|
|
+
|
|
-- If the byte does not fall on the start of the character, then
|
|
- -- align to the start of the next character.
|
|
+ -- align to the start of the next character if end align, and start
|
|
+ -- of this character if start align
|
|
if offset < 0 then
|
|
- byte = byte + str_utf_end(line, byte) + 1
|
|
- end
|
|
- if byte <= #line then
|
|
- char = byte_to_utf(line, byte, offset_encoding)
|
|
- else
|
|
- char = compute_line_length(line, offset_encoding) + 1
|
|
+ if start then
|
|
+ byte = byte + offset
|
|
+ else
|
|
+ byte = byte + str_utf_end(line, byte) + 1
|
|
+ end
|
|
end
|
|
- -- Extending line, find the nearest utf codepoint for the last valid character
|
|
end
|
|
+
|
|
+ local char
|
|
+
|
|
+ -- optimize for first byte case
|
|
+ if byte == 1 then
|
|
+ char = 1
|
|
+ -- Called in the case of extending an empty line "" -> "a"
|
|
+ elseif byte == #line + 1 then
|
|
+ char = compute_line_length(line, offset_encoding) + 1
|
|
+ else
|
|
+ char = byte_to_utf(line, byte, offset_encoding)
|
|
+ end
|
|
+
|
|
return byte, char
|
|
end
|
|
|
|
@@ -158,18 +165,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
|
|
end
|
|
|
|
-- Convert byte to codepoint if applicable
|
|
- local char_idx
|
|
- local byte_idx
|
|
- if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then
|
|
- byte_idx = start_byte_idx
|
|
- char_idx = 1
|
|
- elseif start_byte_idx == #prev_line + 1 then
|
|
- byte_idx = start_byte_idx
|
|
- char_idx = compute_line_length(prev_line, offset_encoding) + 1
|
|
- else
|
|
- byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
|
|
- char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding)
|
|
- end
|
|
+ local byte_idx, char_idx = align_position(prev_line, start_byte_idx, true, offset_encoding)
|
|
|
|
-- Return the start difference (shared for new and prev lines)
|
|
return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
|
|
@@ -210,51 +206,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
|
|
local prev_line_length = #prev_line
|
|
local curr_line_length = #curr_line
|
|
|
|
- local byte_offset = 0
|
|
+ local prev_line_range, curr_line_range
|
|
+ if start_line_idx == prev_line_idx then
|
|
+ prev_line_range = prev_line_length - start_range.byte_idx
|
|
+ -- start_line_idx < prev_line_idx
|
|
+ else
|
|
+ prev_line_range = prev_line_length - 1
|
|
+ end
|
|
+ if start_line_idx == curr_line_idx then
|
|
+ curr_line_range = curr_line_length - start_range.byte_idx
|
|
+ -- start_line_idx < curr_line_idx
|
|
+ else
|
|
+ curr_line_range = curr_line_length - 1
|
|
+ end
|
|
|
|
- -- Editing the same line
|
|
- -- If the byte offset is zero, that means there is a difference on the last byte (not newline)
|
|
- if prev_line_idx == curr_line_idx then
|
|
- local max_length
|
|
- if start_line_idx == prev_line_idx then
|
|
- -- Search until beginning of difference
|
|
- max_length = min(prev_line_length - start_range.byte_idx, curr_line_length - start_range.byte_idx) + 1
|
|
- else
|
|
- max_length = min(prev_line_length, curr_line_length) + 1
|
|
- end
|
|
- for idx = 0, max_length do
|
|
- byte_offset = idx
|
|
- if
|
|
- str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
|
|
- then
|
|
- break
|
|
- end
|
|
+ -- Maximum number of bytes to search backwards for mismatch
|
|
+ local max_length = min(prev_line_range, curr_line_range)
|
|
+
|
|
+ -- Negative offset to last shared byte between prev_line and curr_line
|
|
+ -- -1 offset indicates no shared byte
|
|
+ local byte_offset = -1
|
|
+
|
|
+ -- Iterate from end to beginning of shortest line
|
|
+ for idx = 0, max_length do
|
|
+ byte_offset = idx
|
|
+ if
|
|
+ str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
|
|
+ then
|
|
+ -- If there was a mismatched byte, need to go back to next byte (which did match)
|
|
+ byte_offset = byte_offset - 1
|
|
+ break
|
|
end
|
|
end
|
|
|
|
- -- Iterate from end to beginning of shortest line
|
|
- local prev_end_byte_idx = prev_line_length - byte_offset + 1
|
|
+ local prev_end_byte_idx = prev_line_length - byte_offset
|
|
|
|
- -- Handle case where lines match
|
|
- if prev_end_byte_idx == 0 then
|
|
- prev_end_byte_idx = 1
|
|
- end
|
|
- local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding)
|
|
+ local prev_byte_idx, prev_char_idx = align_position(prev_line, prev_end_byte_idx, false, offset_encoding)
|
|
local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
|
|
|
|
- local curr_end_range
|
|
- -- Deletion event, new_range cannot be before start
|
|
- if curr_line_idx < start_line_idx then
|
|
- curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 }
|
|
- else
|
|
- local curr_end_byte_idx = curr_line_length - byte_offset + 1
|
|
- -- Handle case where lines match
|
|
- if curr_end_byte_idx == 0 then
|
|
- curr_end_byte_idx = 1
|
|
- end
|
|
- local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding)
|
|
- curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
|
|
- end
|
|
+ local curr_end_byte_idx = curr_line_length - byte_offset
|
|
+
|
|
+ local curr_byte_idx, curr_char_idx = align_position(curr_line, curr_end_byte_idx, false, offset_encoding)
|
|
+ local curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
|
|
|
|
return prev_end_range, curr_end_range
|
|
end
|
|
diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua
|
|
index 4e3eddb9607e..e13a5acf3c9a 100644
|
|
--- a/test/functional/plugin/lsp/incremental_sync_spec.lua
|
|
+++ b/test/functional/plugin/lsp/incremental_sync_spec.lua
|
|
@@ -327,12 +327,12 @@ describe('incremental synchronization', function()
|
|
line = 1
|
|
},
|
|
['end'] = {
|
|
- character = 9,
|
|
+ character = 4,
|
|
line = 1
|
|
}
|
|
},
|
|
- rangeLength = 5,
|
|
- text = "_fdsa\nhello world\n1234 asdf"
|
|
+ rangeLength = 0,
|
|
+ text = "_fdsa\nhello world\n1234"
|
|
},
|
|
-- redo entire deletion
|
|
{
|
|
@@ -342,12 +342,12 @@ describe('incremental synchronization', function()
|
|
line = 1
|
|
},
|
|
['end'] = {
|
|
- character = 9,
|
|
+ character = 4,
|
|
line = 3
|
|
}
|
|
},
|
|
- rangeLength = 27,
|
|
- text = ' asdf'
|
|
+ rangeLength = 22,
|
|
+ text = ''
|
|
},
|
|
}
|
|
local original_lines = {
|
|
@@ -460,12 +460,12 @@ describe('incremental synchronization', function()
|
|
line = 0
|
|
},
|
|
['end'] = {
|
|
- character = 17,
|
|
+ character = 12,
|
|
line = 0
|
|
}
|
|
},
|
|
- rangeLength = 6,
|
|
- text = '\ntest3'
|
|
+ rangeLength = 1,
|
|
+ text = '\n'
|
|
},
|
|
}
|
|
test_edit({"test1 test2", "test3"}, {"J", "u"}, expected_text_changes, 'utf-16', '\n')
|
|
|
|
From 7a4877f61de0616964b8f939ad132fc256235d93 Mon Sep 17 00:00:00 2001
|
|
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
|
|
Date: Tue, 21 Dec 2021 02:05:36 +0000
|
|
Subject: [PATCH 2/2] Replace UTF helper functions with
|
|
`vim.lsp.util._str_utfindex_enc()`.
|
|
|
|
---
|
|
runtime/lua/vim/lsp/sync.lua | 49 +++---------------------------------
|
|
1 file changed, 4 insertions(+), 45 deletions(-)
|
|
|
|
diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
|
|
index c745fadf876e..f725d3272291 100644
|
|
--- a/runtime/lua/vim/lsp/sync.lua
|
|
+++ b/runtime/lua/vim/lsp/sync.lua
|
|
@@ -44,50 +44,9 @@ local M = {}
|
|
-- local string.byte, unclear if this is necessary for JIT compilation
|
|
local str_byte = string.byte
|
|
local min = math.min
|
|
-local str_utfindex = vim.str_utfindex
|
|
local str_utf_start = vim.str_utf_start
|
|
local str_utf_end = vim.str_utf_end
|
|
|
|
----@private
|
|
--- Given a line, byte idx, and offset_encoding convert to the
|
|
--- utf-8, utf-16, or utf-32 index.
|
|
----@param line string the line to index into
|
|
----@param byte integer the byte idx
|
|
----@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
|
|
---@returns integer the utf idx for the given encoding
|
|
-local function byte_to_utf(line, byte, offset_encoding)
|
|
- -- convert to 0 based indexing for str_utfindex
|
|
- byte = byte - 1
|
|
-
|
|
- local utf_idx
|
|
- local _
|
|
- -- Convert the byte range to utf-{8,16,32} and convert 1-based (lua) indexing to 0-based
|
|
- if offset_encoding == 'utf-16' then
|
|
- _, utf_idx = str_utfindex(line, byte)
|
|
- elseif offset_encoding == 'utf-32' then
|
|
- utf_idx, _ = str_utfindex(line, byte)
|
|
- else
|
|
- utf_idx = byte
|
|
- end
|
|
-
|
|
- -- convert to 1 based indexing
|
|
- return utf_idx + 1
|
|
-end
|
|
-
|
|
----@private
|
|
-local function compute_line_length(line, offset_encoding)
|
|
- local length
|
|
- local _
|
|
- if offset_encoding == 'utf-16' then
|
|
- _, length = str_utfindex(line)
|
|
- elseif offset_encoding == 'utf-32' then
|
|
- length, _ = str_utfindex(line)
|
|
- else
|
|
- length = #line
|
|
- end
|
|
- return length
|
|
-end
|
|
-
|
|
---@private
|
|
-- Given a line, byte idx, alignment, and offset_encoding convert to the aligned
|
|
-- utf-8 index and either the utf-16, or utf-32 index.
|
|
@@ -120,9 +79,9 @@ local function align_position(line, byte, start, offset_encoding)
|
|
char = 1
|
|
-- Called in the case of extending an empty line "" -> "a"
|
|
elseif byte == #line + 1 then
|
|
- char = compute_line_length(line, offset_encoding) + 1
|
|
+ char = vim.lsp.util._str_utfindex_enc(line, nil, offset_encoding) + 1
|
|
else
|
|
- char = byte_to_utf(line, byte, offset_encoding)
|
|
+ char = vim.lsp.util._str_utfindex_enc(line, byte - 1, offset_encoding) + 1
|
|
end
|
|
|
|
return byte, char
|
|
@@ -306,7 +265,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi
|
|
local start_line = lines[start_range.line_idx]
|
|
local range_length
|
|
if start_line and #start_line > 0 then
|
|
- range_length = compute_line_length(start_line, offset_encoding) - start_range.char_idx + 1 + line_ending_length
|
|
+ range_length = vim.lsp.util._str_utfindex_enc(start_line, nil, offset_encoding) - start_range.char_idx + 1 + line_ending_length
|
|
else
|
|
-- Length of newline character
|
|
range_length = line_ending_length
|
|
@@ -316,7 +275,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi
|
|
for idx = start_range.line_idx + 1, end_range.line_idx - 1 do
|
|
-- Length full line plus newline character
|
|
if #lines[idx] > 0 then
|
|
- range_length = range_length + compute_line_length(lines[idx], offset_encoding) + #line_ending
|
|
+ range_length = range_length + vim.lsp.util._str_utfindex_enc(lines[idx], nil, offset_encoding) + #line_ending
|
|
else
|
|
range_length = range_length + line_ending_length
|
|
end
|