diff --git a/16669.patch b/16669.patch new file mode 100644 index 0000000..08c2d84 --- /dev/null +++ b/16669.patch @@ -0,0 +1,500 @@ +From 0500ace6e138c3fcbd15b951f6af76a45e80f0ab Mon Sep 17 00:00:00 2001 +From: Rishikesh Vaishnav +Date: Wed, 15 Dec 2021 18:08:10 +0000 +Subject: [PATCH] fix(lsp): general fix/simplification of incremental sync + +--- + runtime/lua/vim/lsp/sync.lua | 125 ++++---- + src/nvim/lua/stdlib.c | 4 +- + .../plugin/lsp/incremental_sync_spec.lua | 275 +++++++++++++++++- + 3 files changed, 332 insertions(+), 72 deletions(-) + +diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua +index 5df2a4d144d2..ce3c8c1b2e5d 100644 +--- a/runtime/lua/vim/lsp/sync.lua ++++ b/runtime/lua/vim/lsp/sync.lua +@@ -93,30 +93,35 @@ end + -- utf-8 index and either the utf-16, or utf-32 index. + ---@param line string the line to index into + ---@param byte integer the byte idx ++---@param start boolean true for start align, false for end align + ---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8) + ---@returns table byte_idx and char_idx of first change position +-local function align_end_position(line, byte, offset_encoding) ++local function align_position(line, byte, start, offset_encoding) ++ if byte <= #line then ++ -- Modifying line, find the nearest utf codepoint ++ local offset = str_utf_start(line, byte) ++ ++ -- If the byte does not fall on the start of the character, then ++ -- align to the start of the next character if end align, and start ++ -- of this character if start align ++ if offset < 0 then ++ if start then ++ byte = byte + offset ++ else ++ byte = byte + str_utf_end(line, byte) + 1 ++ end ++ end ++ end ++ + local char +- -- If on the first byte, or an empty string: the trivial case +- if byte == 1 or #line == 0 then +- char = byte ++ + -- Called in the case of extending an empty line "" -> "a" +- elseif byte == #line + 1 then ++ if byte == #line + 1 then + char = compute_line_length(line, offset_encoding) + 1 + else +- -- Modifying line, find the nearest utf codepoint +- local offset = str_utf_end(line, byte) +- -- If the byte does not fall on the start of the character, then +- -- align to the start of the next character. +- if offset > 0 then +- char = byte_to_utf(line, byte, offset_encoding) + 1 +- byte = byte + offset +- else +- char = byte_to_utf(line, byte, offset_encoding) +- byte = byte + offset +- end +- -- Extending line, find the nearest utf codepoint for the last valid character ++ char = byte_to_utf(line, byte, offset_encoding) + end ++ + return byte, char + end + +@@ -157,18 +162,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline, + end + + -- Convert byte to codepoint if applicable +- local char_idx +- local byte_idx +- if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then +- byte_idx = start_byte_idx +- char_idx = 1 +- elseif start_byte_idx == #prev_line + 1 then +- byte_idx = start_byte_idx +- char_idx = compute_line_length(prev_line, offset_encoding) + 1 +- else +- byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx) +- char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding) +- end ++ local byte_idx, char_idx = align_position(prev_line, start_byte_idx, true, offset_encoding) + + -- Return the start difference (shared for new and prev lines) + return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx } +@@ -209,51 +203,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline, + local prev_line_length = #prev_line + local curr_line_length = #curr_line + +- local byte_offset = 0 ++ local prev_line_range, curr_line_range ++ if start_line_idx == prev_line_idx then ++ prev_line_range = prev_line_length - start_range.byte_idx ++ -- start_line_idx < prev_line_idx ++ else ++ prev_line_range = prev_line_length - 1 ++ end ++ if start_line_idx == curr_line_idx then ++ curr_line_range = curr_line_length - start_range.byte_idx ++ -- start_line_idx < curr_line_idx ++ else ++ curr_line_range = curr_line_length - 1 ++ end ++ ++ -- Maximum number of bytes to search backwards for mismatch ++ local max_length = min(prev_line_range, curr_line_range) + +- -- Editing the same line +- -- If the byte offset is zero, that means there is a difference on the last byte (not newline) +- if prev_line_idx == curr_line_idx then +- local max_length +- if start_line_idx == prev_line_idx then +- -- Search until beginning of difference +- max_length = min(prev_line_length - start_range.byte_idx, curr_line_length - start_range.byte_idx) + 1 +- else +- max_length = min(prev_line_length, curr_line_length) + 1 +- end +- for idx = 0, max_length do +- byte_offset = idx +- if +- str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset) +- then +- break +- end ++ -- Negative offset to last shared byte between prev_line and curr_line ++ -- -1 offset indicates no shared byte ++ local byte_offset = -1 ++ ++ -- Iterate from end to beginning of shortest line ++ for idx = 0, max_length do ++ byte_offset = idx ++ if ++ str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset) ++ then ++ -- If there was a mismatched byte, need to go back to next byte (which did match) ++ byte_offset = byte_offset - 1 ++ break + end + end + +- -- Iterate from end to beginning of shortest line +- local prev_end_byte_idx = prev_line_length - byte_offset + 1 ++ local prev_end_byte_idx = prev_line_length - byte_offset + +- -- Handle case where lines match +- if prev_end_byte_idx == 0 then +- prev_end_byte_idx = 1 +- end +- local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding) ++ local prev_byte_idx, prev_char_idx = align_position(prev_line, prev_end_byte_idx, false, offset_encoding) + local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx } + +- local curr_end_range +- -- Deletion event, new_range cannot be before start +- if curr_line_idx < start_line_idx then +- curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 } +- else +- local curr_end_byte_idx = curr_line_length - byte_offset + 1 +- -- Handle case where lines match +- if curr_end_byte_idx == 0 then +- curr_end_byte_idx = 1 +- end +- local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding) +- curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx } +- end ++ local curr_end_byte_idx = curr_line_length - byte_offset ++ ++ local curr_byte_idx, curr_char_idx = align_position(curr_line, curr_end_byte_idx, false, offset_encoding) ++ local curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx } + + return prev_end_range, curr_end_range + end +diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c +index b746e03625ad..9441b88cfb98 100644 +--- a/src/nvim/lua/stdlib.c ++++ b/src/nvim/lua/stdlib.c +@@ -230,7 +230,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL + if (offset < 0 || offset > (intptr_t)s1_len) { + return luaL_error(lstate, "index out of range"); + } +- int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1); ++ int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1); + lua_pushinteger(lstate, tail_offset); + return 1; + } +@@ -250,7 +250,7 @@ static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL + if (offset < 0 || offset > (intptr_t)s1_len) { + return luaL_error(lstate, "index out of range"); + } +- int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1); ++ int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + offset - 1); + lua_pushinteger(lstate, tail_offset); + return 1; + } +diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua +index 5dd34e766528..60e35c3620cc 100644 +--- a/test/functional/plugin/lsp/incremental_sync_spec.lua ++++ b/test/functional/plugin/lsp/incremental_sync_spec.lua +@@ -164,6 +164,201 @@ describe('incremental synchronization', function() + } + test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n') + end) ++ it('deleting a line', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 0 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 1 ++ } ++ }, ++ rangeLength = 12, ++ text = '' ++ } ++ } ++ test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('deleting an empty line', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 2 ++ } ++ }, ++ rangeLength = 1, ++ text = '' ++ } ++ } ++ test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('adding a line', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = 'hello world\n' ++ } ++ } ++ test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('adding an empty line', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = '\n' ++ } ++ } ++ test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ end) ++ describe('multi line edit', function() ++ it('deletion and insertion', function() ++ local expected_text_changes = { ++ -- delete "_fsda" from end of line 1 ++ { ++ range = { ++ ['start'] = { ++ character = 4, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 9, ++ line = 1 ++ } ++ }, ++ rangeLength = 5, ++ text = '' ++ }, ++ -- delete "hello world\n" from line 2 ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 2 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 3 ++ } ++ }, ++ rangeLength = 12, ++ text = '' ++ }, ++ -- delete "1234" from beginning of line 2 ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 2 ++ }, ++ ['end'] = { ++ character = 4, ++ line = 2 ++ } ++ }, ++ rangeLength = 4, ++ text = '' ++ }, ++ -- add " asdf" to end of line 1 ++ { ++ range = { ++ ['start'] = { ++ character = 4, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 4, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = ' asdf' ++ }, ++ -- delete " asdf\n" from line 2 ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 2 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 3 ++ } ++ }, ++ rangeLength = 6, ++ text = '' ++ }, ++ -- undo entire deletion ++ { ++ range = { ++ ['start'] = { ++ character = 4, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 4, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = "_fdsa\nhello world\n1234" ++ }, ++ -- redo entire deletion ++ { ++ range = { ++ ['start'] = { ++ character = 4, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 4, ++ line = 3 ++ } ++ }, ++ rangeLength = 22, ++ text = '' ++ }, ++ } ++ local original_lines = { ++ "\\begin{document}", ++ "test_fdsa", ++ "hello world", ++ "1234 asdf", ++ "\\end{document}" ++ } ++ test_edit(original_lines, {"jf_vejjbhhdu"}, expected_text_changes, 'utf-16', '\n') ++ end) + end) + + describe('multi-operation edits', function() +@@ -265,12 +460,12 @@ describe('incremental synchronization', function() + line = 0 + }, + ['end'] = { +- character = 17, ++ character = 12, + line = 0 + } + }, +- rangeLength = 6, +- text = '\ntest3' ++ rangeLength = 1, ++ text = '\n' + }, + } + test_edit({"test1 test2", "test3"}, {"J", "u"}, expected_text_changes, 'utf-16', '\n') +@@ -297,6 +492,80 @@ describe('incremental synchronization', function() + } + test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n') + end) ++ it('replacing a multibyte character with matching prefix', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 1, ++ line = 1 ++ } ++ }, ++ rangeLength = 1, ++ text = '⟩' ++ } ++ } ++ -- ⟨ is e29fa8, ⟩ is e29fa9 ++ local original_lines = { ++ "\\begin{document}", ++ "⟨", ++ "\\end{document}", ++ } ++ test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('replacing a multibyte character with matching suffix', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 1, ++ line = 1 ++ } ++ }, ++ rangeLength = 1, ++ text = 'ḟ' ++ } ++ } ++ -- ฟ is e0b89f, ḟ is e1b89f ++ local original_lines = { ++ "\\begin{document}", ++ "ฟ", ++ "\\end{document}", ++ } ++ test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('inserting before a multibyte character', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = ' ' ++ } ++ } ++ local original_lines = { ++ "\\begin{document}", ++ "→", ++ "\\end{document}", ++ } ++ test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n') ++ end) + it('deleting a multibyte character from a long line', function() + local expected_text_changes = { + { diff --git a/nvim05.nix b/nvim05.nix index 703e692..55ac3c3 100644 --- a/nvim05.nix +++ b/nvim05.nix @@ -4,6 +4,10 @@ # nixpkgs.overlays = [ inputs.neovim-overlay.overlay ]; nixpkgs.config.packageOverrides = pkgs: with pkgs; { + neovim-unwrapped = neovim-unwrapped.overrideDerivation (orig: { + patches = orig.patches ++ [ ./16669.patch ]; + }); + neovim-qt-unwrapped = neovim-qt-unwrapped.overrideDerivation (_: { version = "0.2.17.9999"; src = fetchFromGitHub {