From 166c4d54efc9e8c10502db13c094d7de3f245ce5 Mon Sep 17 00:00:00 2001 From: Rishikesh Vaishnav Date: Wed, 15 Dec 2021 21:19:43 +0000 Subject: [PATCH] fix(lsp): incremental sync UTF fixes (#16624) Aligning end position: - fix check for preexisting UTF-8 alignment: check `vim.str_utf_start() == 0` instead of `vim.str_utf_end() == 0` - fix setting of byte index when not already aligned: set to beginning of next codepoint rather than end of this one - set char index after aligning byte index, removing unnecessary adjustment of byte index when already aligned Aligning start position: - fix setting of char index: use aligned byte index rather than original in `byte_to_utf()` Add tests to cover these fixes as well as some other previously untested paths. --- runtime/lua/vim/lsp/sync.lua | 15 +- .../plugin/lsp/incremental_sync_spec.lua | 269 ++++++++++++++++++ 2 files changed, 277 insertions(+), 7 deletions(-) diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua index 5df2a4d144d2..d01f45ad8f2a 100644 --- a/runtime/lua/vim/lsp/sync.lua +++ b/runtime/lua/vim/lsp/sync.lua @@ -105,15 +105,16 @@ local function align_end_position(line, byte, offset_encoding) char = compute_line_length(line, offset_encoding) + 1 else -- Modifying line, find the nearest utf codepoint - local offset = str_utf_end(line, byte) + local offset = str_utf_start(line, byte) -- If the byte does not fall on the start of the character, then -- align to the start of the next character. - if offset > 0 then - char = byte_to_utf(line, byte, offset_encoding) + 1 - byte = byte + offset - else + if offset < 0 then + byte = byte + str_utf_end(line, byte) + 1 + end + if byte <= #line then char = byte_to_utf(line, byte, offset_encoding) - byte = byte + offset + else + char = compute_line_length(line, offset_encoding) + 1 end -- Extending line, find the nearest utf codepoint for the last valid character end @@ -167,7 +168,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline, char_idx = compute_line_length(prev_line, offset_encoding) + 1 else byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx) - char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding) + char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding) end -- Return the start difference (shared for new and prev lines) diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua index 5dd34e766528..4e3eddb9607e 100644 --- a/test/functional/plugin/lsp/incremental_sync_spec.lua +++ b/test/functional/plugin/lsp/incremental_sync_spec.lua @@ -164,6 +164,201 @@ describe('incremental synchronization', function() } test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n') end) + it('deleting a line', function() + local expected_text_changes = { + { + range = { + ['start'] = { + character = 0, + line = 0 + }, + ['end'] = { + character = 0, + line = 1 + } + }, + rangeLength = 12, + text = '' + } + } + test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n') + end) + it('deleting an empty line', function() + local expected_text_changes = { + { + range = { + ['start'] = { + character = 0, + line = 1 + }, + ['end'] = { + character = 0, + line = 2 + } + }, + rangeLength = 1, + text = '' + } + } + test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n') + end) + it('adding a line', function() + local expected_text_changes = { + { + range = { + ['start'] = { + character = 0, + line = 1 + }, + ['end'] = { + character = 0, + line = 1 + } + }, + rangeLength = 0, + text = 'hello world\n' + } + } + test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n') + end) + it('adding an empty line', function() + local expected_text_changes = { + { + range = { + ['start'] = { + character = 0, + line = 1 + }, + ['end'] = { + character = 0, + line = 1 + } + }, + rangeLength = 0, + text = '\n' + } + } + test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n') + end) + end) + describe('multi line edit', function() + it('deletion and insertion', function() + local expected_text_changes = { + -- delete "_fsda" from end of line 1 + { + range = { + ['start'] = { + character = 4, + line = 1 + }, + ['end'] = { + character = 9, + line = 1 + } + }, + rangeLength = 5, + text = '' + }, + -- delete "hello world\n" from line 2 + { + range = { + ['start'] = { + character = 0, + line = 2 + }, + ['end'] = { + character = 0, + line = 3 + } + }, + rangeLength = 12, + text = '' + }, + -- delete "1234" from beginning of line 2 + { + range = { + ['start'] = { + character = 0, + line = 2 + }, + ['end'] = { + character = 4, + line = 2 + } + }, + rangeLength = 4, + text = '' + }, + -- add " asdf" to end of line 1 + { + range = { + ['start'] = { + character = 4, + line = 1 + }, + ['end'] = { + character = 4, + line = 1 + } + }, + rangeLength = 0, + text = ' asdf' + }, + -- delete " asdf\n" from line 2 + { + range = { + ['start'] = { + character = 0, + line = 2 + }, + ['end'] = { + character = 0, + line = 3 + } + }, + rangeLength = 6, + text = '' + }, + -- undo entire deletion + { + range = { + ['start'] = { + character = 4, + line = 1 + }, + ['end'] = { + character = 9, + line = 1 + } + }, + rangeLength = 5, + text = "_fdsa\nhello world\n1234 asdf" + }, + -- redo entire deletion + { + range = { + ['start'] = { + character = 4, + line = 1 + }, + ['end'] = { + character = 9, + line = 3 + } + }, + rangeLength = 27, + text = ' asdf' + }, + } + local original_lines = { + "\\begin{document}", + "test_fdsa", + "hello world", + "1234 asdf", + "\\end{document}" + } + test_edit(original_lines, {"jf_vejjbhhdu"}, expected_text_changes, 'utf-16', '\n') + end) end) describe('multi-operation edits', function() @@ -297,6 +492,80 @@ describe('incremental synchronization', function() } test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n') end) + it('replacing a multibyte character with matching prefix', function() + local expected_text_changes = { + { + range = { + ['start'] = { + character = 0, + line = 1 + }, + ['end'] = { + character = 1, + line = 1 + } + }, + rangeLength = 1, + text = '⟩' + } + } + -- ⟨ is e29fa8, ⟩ is e29fa9 + local original_lines = { + "\\begin{document}", + "⟨", + "\\end{document}", + } + test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n') + end) + it('replacing a multibyte character with matching suffix', function() + local expected_text_changes = { + { + range = { + ['start'] = { + character = 0, + line = 1 + }, + ['end'] = { + character = 1, + line = 1 + } + }, + rangeLength = 1, + text = 'ḟ' + } + } + -- ฟ is e0b89f, ḟ is e1b89f + local original_lines = { + "\\begin{document}", + "ฟ", + "\\end{document}", + } + test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n') + end) + it('inserting before a multibyte character', function() + local expected_text_changes = { + { + range = { + ['start'] = { + character = 0, + line = 1 + }, + ['end'] = { + character = 0, + line = 1 + } + }, + rangeLength = 0, + text = ' ' + } + } + local original_lines = { + "\\begin{document}", + "→", + "\\end{document}", + } + test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n') + end) it('deleting a multibyte character from a long line', function() local expected_text_changes = { {