From d6b654d355dfbe8cd0fce3e73eeacbd068419416 Mon Sep 17 00:00:00 2001 From: Rishikesh Vaishnav Date: Wed, 15 Dec 2021 18:08:10 +0000 Subject: [PATCH 1/2] refactor(lsp): always crop matching text from end of incremental sync range When constructing incremental updates to be sent to the server, previously we only attempted to crop matching text from the end of the old and new `on_lines` ranges on single-line updates. This refactor makes it so that we always attempt to crop from the end, no matter how many lines were changed. This reduces the total size of increments sent to the server when possible by not including text that wasn't changed at the end of the incremental update range. --- runtime/lua/vim/lsp/sync.lua | 127 +++++++++--------- .../plugin/lsp/incremental_sync_spec.lua | 18 +-- 2 files changed, 69 insertions(+), 76 deletions(-) diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua index d01f45ad8f2a..c745fadf876e 100644 --- a/runtime/lua/vim/lsp/sync.lua +++ b/runtime/lua/vim/lsp/sync.lua @@ -93,31 +93,38 @@ end -- utf-8 index and either the utf-16, or utf-32 index. ---@param line string the line to index into ---@param byte integer the byte idx +---@param start boolean true for start align, false for end align ---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8) ---@returns table byte_idx and char_idx of first change position -local function align_end_position(line, byte, offset_encoding) - local char - -- If on the first byte, or an empty string: the trivial case - if byte == 1 or #line == 0 then - char = byte - -- Called in the case of extending an empty line "" -> "a" - elseif byte == #line + 1 then - char = compute_line_length(line, offset_encoding) + 1 - else +local function align_position(line, byte, start, offset_encoding) + if byte ~= 1 and byte <= #line then -- Modifying line, find the nearest utf codepoint local offset = str_utf_start(line, byte) + -- If the byte does not fall on the start of the character, then - -- align to the start of the next character. + -- align to the start of the next character if end align, and start + -- of this character if start align if offset < 0 then - byte = byte + str_utf_end(line, byte) + 1 - end - if byte <= #line then - char = byte_to_utf(line, byte, offset_encoding) - else - char = compute_line_length(line, offset_encoding) + 1 + if start then + byte = byte + offset + else + byte = byte + str_utf_end(line, byte) + 1 + end end - -- Extending line, find the nearest utf codepoint for the last valid character end + + local char + + -- optimize for first byte case + if byte == 1 then + char = 1 + -- Called in the case of extending an empty line "" -> "a" + elseif byte == #line + 1 then + char = compute_line_length(line, offset_encoding) + 1 + else + char = byte_to_utf(line, byte, offset_encoding) + end + return byte, char end @@ -158,18 +165,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline, end -- Convert byte to codepoint if applicable - local char_idx - local byte_idx - if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then - byte_idx = start_byte_idx - char_idx = 1 - elseif start_byte_idx == #prev_line + 1 then - byte_idx = start_byte_idx - char_idx = compute_line_length(prev_line, offset_encoding) + 1 - else - byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx) - char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding) - end + local byte_idx, char_idx = align_position(prev_line, start_byte_idx, true, offset_encoding) -- Return the start difference (shared for new and prev lines) return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx } @@ -210,51 +206,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline, local prev_line_length = #prev_line local curr_line_length = #curr_line - local byte_offset = 0 + local prev_line_range, curr_line_range + if start_line_idx == prev_line_idx then + prev_line_range = prev_line_length - start_range.byte_idx + -- start_line_idx < prev_line_idx + else + prev_line_range = prev_line_length - 1 + end + if start_line_idx == curr_line_idx then + curr_line_range = curr_line_length - start_range.byte_idx + -- start_line_idx < curr_line_idx + else + curr_line_range = curr_line_length - 1 + end - -- Editing the same line - -- If the byte offset is zero, that means there is a difference on the last byte (not newline) - if prev_line_idx == curr_line_idx then - local max_length - if start_line_idx == prev_line_idx then - -- Search until beginning of difference - max_length = min(prev_line_length - start_range.byte_idx, curr_line_length - start_range.byte_idx) + 1 - else - max_length = min(prev_line_length, curr_line_length) + 1 - end - for idx = 0, max_length do - byte_offset = idx - if - str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset) - then - break - end + -- Maximum number of bytes to search backwards for mismatch + local max_length = min(prev_line_range, curr_line_range) + + -- Negative offset to last shared byte between prev_line and curr_line + -- -1 offset indicates no shared byte + local byte_offset = -1 + + -- Iterate from end to beginning of shortest line + for idx = 0, max_length do + byte_offset = idx + if + str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset) + then + -- If there was a mismatched byte, need to go back to next byte (which did match) + byte_offset = byte_offset - 1 + break end end - -- Iterate from end to beginning of shortest line - local prev_end_byte_idx = prev_line_length - byte_offset + 1 + local prev_end_byte_idx = prev_line_length - byte_offset - -- Handle case where lines match - if prev_end_byte_idx == 0 then - prev_end_byte_idx = 1 - end - local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding) + local prev_byte_idx, prev_char_idx = align_position(prev_line, prev_end_byte_idx, false, offset_encoding) local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx } - local curr_end_range - -- Deletion event, new_range cannot be before start - if curr_line_idx < start_line_idx then - curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 } - else - local curr_end_byte_idx = curr_line_length - byte_offset + 1 - -- Handle case where lines match - if curr_end_byte_idx == 0 then - curr_end_byte_idx = 1 - end - local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding) - curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx } - end + local curr_end_byte_idx = curr_line_length - byte_offset + + local curr_byte_idx, curr_char_idx = align_position(curr_line, curr_end_byte_idx, false, offset_encoding) + local curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx } return prev_end_range, curr_end_range end diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua index 4e3eddb9607e..e13a5acf3c9a 100644 --- a/test/functional/plugin/lsp/incremental_sync_spec.lua +++ b/test/functional/plugin/lsp/incremental_sync_spec.lua @@ -327,12 +327,12 @@ describe('incremental synchronization', function() line = 1 }, ['end'] = { - character = 9, + character = 4, line = 1 } }, - rangeLength = 5, - text = "_fdsa\nhello world\n1234 asdf" + rangeLength = 0, + text = "_fdsa\nhello world\n1234" }, -- redo entire deletion { @@ -342,12 +342,12 @@ describe('incremental synchronization', function() line = 1 }, ['end'] = { - character = 9, + character = 4, line = 3 } }, - rangeLength = 27, - text = ' asdf' + rangeLength = 22, + text = '' }, } local original_lines = { @@ -460,12 +460,12 @@ describe('incremental synchronization', function() line = 0 }, ['end'] = { - character = 17, + character = 12, line = 0 } }, - rangeLength = 6, - text = '\ntest3' + rangeLength = 1, + text = '\n' }, } test_edit({"test1 test2", "test3"}, {"J", "u"}, expected_text_changes, 'utf-16', '\n') From 7a4877f61de0616964b8f939ad132fc256235d93 Mon Sep 17 00:00:00 2001 From: Rishikesh Vaishnav Date: Tue, 21 Dec 2021 02:05:36 +0000 Subject: [PATCH 2/2] Replace UTF helper functions with `vim.lsp.util._str_utfindex_enc()`. --- runtime/lua/vim/lsp/sync.lua | 49 +++--------------------------------- 1 file changed, 4 insertions(+), 45 deletions(-) diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua index c745fadf876e..f725d3272291 100644 --- a/runtime/lua/vim/lsp/sync.lua +++ b/runtime/lua/vim/lsp/sync.lua @@ -44,50 +44,9 @@ local M = {} -- local string.byte, unclear if this is necessary for JIT compilation local str_byte = string.byte local min = math.min -local str_utfindex = vim.str_utfindex local str_utf_start = vim.str_utf_start local str_utf_end = vim.str_utf_end ----@private --- Given a line, byte idx, and offset_encoding convert to the --- utf-8, utf-16, or utf-32 index. ----@param line string the line to index into ----@param byte integer the byte idx ----@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8) ---@returns integer the utf idx for the given encoding -local function byte_to_utf(line, byte, offset_encoding) - -- convert to 0 based indexing for str_utfindex - byte = byte - 1 - - local utf_idx - local _ - -- Convert the byte range to utf-{8,16,32} and convert 1-based (lua) indexing to 0-based - if offset_encoding == 'utf-16' then - _, utf_idx = str_utfindex(line, byte) - elseif offset_encoding == 'utf-32' then - utf_idx, _ = str_utfindex(line, byte) - else - utf_idx = byte - end - - -- convert to 1 based indexing - return utf_idx + 1 -end - ----@private -local function compute_line_length(line, offset_encoding) - local length - local _ - if offset_encoding == 'utf-16' then - _, length = str_utfindex(line) - elseif offset_encoding == 'utf-32' then - length, _ = str_utfindex(line) - else - length = #line - end - return length -end - ---@private -- Given a line, byte idx, alignment, and offset_encoding convert to the aligned -- utf-8 index and either the utf-16, or utf-32 index. @@ -120,9 +79,9 @@ local function align_position(line, byte, start, offset_encoding) char = 1 -- Called in the case of extending an empty line "" -> "a" elseif byte == #line + 1 then - char = compute_line_length(line, offset_encoding) + 1 + char = vim.lsp.util._str_utfindex_enc(line, nil, offset_encoding) + 1 else - char = byte_to_utf(line, byte, offset_encoding) + char = vim.lsp.util._str_utfindex_enc(line, byte - 1, offset_encoding) + 1 end return byte, char @@ -306,7 +265,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi local start_line = lines[start_range.line_idx] local range_length if start_line and #start_line > 0 then - range_length = compute_line_length(start_line, offset_encoding) - start_range.char_idx + 1 + line_ending_length + range_length = vim.lsp.util._str_utfindex_enc(start_line, nil, offset_encoding) - start_range.char_idx + 1 + line_ending_length else -- Length of newline character range_length = line_ending_length @@ -316,7 +275,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi for idx = start_range.line_idx + 1, end_range.line_idx - 1 do -- Length full line plus newline character if #lines[idx] > 0 then - range_length = range_length + compute_line_length(lines[idx], offset_encoding) + #line_ending + range_length = range_length + vim.lsp.util._str_utfindex_enc(lines[idx], nil, offset_encoding) + #line_ending else range_length = range_length + line_ending_length end