Add unicode LSP sync patch for neovim.
This commit is contained in:
parent
c147fb9a69
commit
f938fd473e
500
16669.patch
Normal file
500
16669.patch
Normal file
@ -0,0 +1,500 @@
|
||||
From 0500ace6e138c3fcbd15b951f6af76a45e80f0ab Mon Sep 17 00:00:00 2001
|
||||
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
|
||||
Date: Wed, 15 Dec 2021 18:08:10 +0000
|
||||
Subject: [PATCH] fix(lsp): general fix/simplification of incremental sync
|
||||
|
||||
---
|
||||
runtime/lua/vim/lsp/sync.lua | 125 ++++----
|
||||
src/nvim/lua/stdlib.c | 4 +-
|
||||
.../plugin/lsp/incremental_sync_spec.lua | 275 +++++++++++++++++-
|
||||
3 files changed, 332 insertions(+), 72 deletions(-)
|
||||
|
||||
diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
|
||||
index 5df2a4d144d2..ce3c8c1b2e5d 100644
|
||||
--- a/runtime/lua/vim/lsp/sync.lua
|
||||
+++ b/runtime/lua/vim/lsp/sync.lua
|
||||
@@ -93,30 +93,35 @@ end
|
||||
-- utf-8 index and either the utf-16, or utf-32 index.
|
||||
---@param line string the line to index into
|
||||
---@param byte integer the byte idx
|
||||
+---@param start boolean true for start align, false for end align
|
||||
---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
|
||||
---@returns table<string, int> byte_idx and char_idx of first change position
|
||||
-local function align_end_position(line, byte, offset_encoding)
|
||||
+local function align_position(line, byte, start, offset_encoding)
|
||||
+ if byte <= #line then
|
||||
+ -- Modifying line, find the nearest utf codepoint
|
||||
+ local offset = str_utf_start(line, byte)
|
||||
+
|
||||
+ -- If the byte does not fall on the start of the character, then
|
||||
+ -- align to the start of the next character if end align, and start
|
||||
+ -- of this character if start align
|
||||
+ if offset < 0 then
|
||||
+ if start then
|
||||
+ byte = byte + offset
|
||||
+ else
|
||||
+ byte = byte + str_utf_end(line, byte) + 1
|
||||
+ end
|
||||
+ end
|
||||
+ end
|
||||
+
|
||||
local char
|
||||
- -- If on the first byte, or an empty string: the trivial case
|
||||
- if byte == 1 or #line == 0 then
|
||||
- char = byte
|
||||
+
|
||||
-- Called in the case of extending an empty line "" -> "a"
|
||||
- elseif byte == #line + 1 then
|
||||
+ if byte == #line + 1 then
|
||||
char = compute_line_length(line, offset_encoding) + 1
|
||||
else
|
||||
- -- Modifying line, find the nearest utf codepoint
|
||||
- local offset = str_utf_end(line, byte)
|
||||
- -- If the byte does not fall on the start of the character, then
|
||||
- -- align to the start of the next character.
|
||||
- if offset > 0 then
|
||||
- char = byte_to_utf(line, byte, offset_encoding) + 1
|
||||
- byte = byte + offset
|
||||
- else
|
||||
- char = byte_to_utf(line, byte, offset_encoding)
|
||||
- byte = byte + offset
|
||||
- end
|
||||
- -- Extending line, find the nearest utf codepoint for the last valid character
|
||||
+ char = byte_to_utf(line, byte, offset_encoding)
|
||||
end
|
||||
+
|
||||
return byte, char
|
||||
end
|
||||
|
||||
@@ -157,18 +162,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
|
||||
end
|
||||
|
||||
-- Convert byte to codepoint if applicable
|
||||
- local char_idx
|
||||
- local byte_idx
|
||||
- if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then
|
||||
- byte_idx = start_byte_idx
|
||||
- char_idx = 1
|
||||
- elseif start_byte_idx == #prev_line + 1 then
|
||||
- byte_idx = start_byte_idx
|
||||
- char_idx = compute_line_length(prev_line, offset_encoding) + 1
|
||||
- else
|
||||
- byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
|
||||
- char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
|
||||
- end
|
||||
+ local byte_idx, char_idx = align_position(prev_line, start_byte_idx, true, offset_encoding)
|
||||
|
||||
-- Return the start difference (shared for new and prev lines)
|
||||
return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
|
||||
@@ -209,51 +203,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
|
||||
local prev_line_length = #prev_line
|
||||
local curr_line_length = #curr_line
|
||||
|
||||
- local byte_offset = 0
|
||||
+ local prev_line_range, curr_line_range
|
||||
+ if start_line_idx == prev_line_idx then
|
||||
+ prev_line_range = prev_line_length - start_range.byte_idx
|
||||
+ -- start_line_idx < prev_line_idx
|
||||
+ else
|
||||
+ prev_line_range = prev_line_length - 1
|
||||
+ end
|
||||
+ if start_line_idx == curr_line_idx then
|
||||
+ curr_line_range = curr_line_length - start_range.byte_idx
|
||||
+ -- start_line_idx < curr_line_idx
|
||||
+ else
|
||||
+ curr_line_range = curr_line_length - 1
|
||||
+ end
|
||||
+
|
||||
+ -- Maximum number of bytes to search backwards for mismatch
|
||||
+ local max_length = min(prev_line_range, curr_line_range)
|
||||
|
||||
- -- Editing the same line
|
||||
- -- If the byte offset is zero, that means there is a difference on the last byte (not newline)
|
||||
- if prev_line_idx == curr_line_idx then
|
||||
- local max_length
|
||||
- if start_line_idx == prev_line_idx then
|
||||
- -- Search until beginning of difference
|
||||
- max_length = min(prev_line_length - start_range.byte_idx, curr_line_length - start_range.byte_idx) + 1
|
||||
- else
|
||||
- max_length = min(prev_line_length, curr_line_length) + 1
|
||||
- end
|
||||
- for idx = 0, max_length do
|
||||
- byte_offset = idx
|
||||
- if
|
||||
- str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
|
||||
- then
|
||||
- break
|
||||
- end
|
||||
+ -- Negative offset to last shared byte between prev_line and curr_line
|
||||
+ -- -1 offset indicates no shared byte
|
||||
+ local byte_offset = -1
|
||||
+
|
||||
+ -- Iterate from end to beginning of shortest line
|
||||
+ for idx = 0, max_length do
|
||||
+ byte_offset = idx
|
||||
+ if
|
||||
+ str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
|
||||
+ then
|
||||
+ -- If there was a mismatched byte, need to go back to next byte (which did match)
|
||||
+ byte_offset = byte_offset - 1
|
||||
+ break
|
||||
end
|
||||
end
|
||||
|
||||
- -- Iterate from end to beginning of shortest line
|
||||
- local prev_end_byte_idx = prev_line_length - byte_offset + 1
|
||||
+ local prev_end_byte_idx = prev_line_length - byte_offset
|
||||
|
||||
- -- Handle case where lines match
|
||||
- if prev_end_byte_idx == 0 then
|
||||
- prev_end_byte_idx = 1
|
||||
- end
|
||||
- local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding)
|
||||
+ local prev_byte_idx, prev_char_idx = align_position(prev_line, prev_end_byte_idx, false, offset_encoding)
|
||||
local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
|
||||
|
||||
- local curr_end_range
|
||||
- -- Deletion event, new_range cannot be before start
|
||||
- if curr_line_idx < start_line_idx then
|
||||
- curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 }
|
||||
- else
|
||||
- local curr_end_byte_idx = curr_line_length - byte_offset + 1
|
||||
- -- Handle case where lines match
|
||||
- if curr_end_byte_idx == 0 then
|
||||
- curr_end_byte_idx = 1
|
||||
- end
|
||||
- local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding)
|
||||
- curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
|
||||
- end
|
||||
+ local curr_end_byte_idx = curr_line_length - byte_offset
|
||||
+
|
||||
+ local curr_byte_idx, curr_char_idx = align_position(curr_line, curr_end_byte_idx, false, offset_encoding)
|
||||
+ local curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
|
||||
|
||||
return prev_end_range, curr_end_range
|
||||
end
|
||||
diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c
|
||||
index b746e03625ad..9441b88cfb98 100644
|
||||
--- a/src/nvim/lua/stdlib.c
|
||||
+++ b/src/nvim/lua/stdlib.c
|
||||
@@ -230,7 +230,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
|
||||
if (offset < 0 || offset > (intptr_t)s1_len) {
|
||||
return luaL_error(lstate, "index out of range");
|
||||
}
|
||||
- int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
|
||||
+ int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1);
|
||||
lua_pushinteger(lstate, tail_offset);
|
||||
return 1;
|
||||
}
|
||||
@@ -250,7 +250,7 @@ static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
|
||||
if (offset < 0 || offset > (intptr_t)s1_len) {
|
||||
return luaL_error(lstate, "index out of range");
|
||||
}
|
||||
- int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
|
||||
+ int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + offset - 1);
|
||||
lua_pushinteger(lstate, tail_offset);
|
||||
return 1;
|
||||
}
|
||||
diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua
|
||||
index 5dd34e766528..60e35c3620cc 100644
|
||||
--- a/test/functional/plugin/lsp/incremental_sync_spec.lua
|
||||
+++ b/test/functional/plugin/lsp/incremental_sync_spec.lua
|
||||
@@ -164,6 +164,201 @@ describe('incremental synchronization', function()
|
||||
}
|
||||
test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n')
|
||||
end)
|
||||
+ it('deleting a line', function()
|
||||
+ local expected_text_changes = {
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 0
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 12,
|
||||
+ text = ''
|
||||
+ }
|
||||
+ }
|
||||
+ test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n')
|
||||
+ end)
|
||||
+ it('deleting an empty line', function()
|
||||
+ local expected_text_changes = {
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 0,
|
||||
+ line = 2
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 1,
|
||||
+ text = ''
|
||||
+ }
|
||||
+ }
|
||||
+ test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n')
|
||||
+ end)
|
||||
+ it('adding a line', function()
|
||||
+ local expected_text_changes = {
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 0,
|
||||
+ text = 'hello world\n'
|
||||
+ }
|
||||
+ }
|
||||
+ test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n')
|
||||
+ end)
|
||||
+ it('adding an empty line', function()
|
||||
+ local expected_text_changes = {
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 0,
|
||||
+ text = '\n'
|
||||
+ }
|
||||
+ }
|
||||
+ test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n')
|
||||
+ end)
|
||||
+ end)
|
||||
+ describe('multi line edit', function()
|
||||
+ it('deletion and insertion', function()
|
||||
+ local expected_text_changes = {
|
||||
+ -- delete "_fsda" from end of line 1
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 4,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 9,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 5,
|
||||
+ text = ''
|
||||
+ },
|
||||
+ -- delete "hello world\n" from line 2
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 2
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 0,
|
||||
+ line = 3
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 12,
|
||||
+ text = ''
|
||||
+ },
|
||||
+ -- delete "1234" from beginning of line 2
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 2
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 4,
|
||||
+ line = 2
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 4,
|
||||
+ text = ''
|
||||
+ },
|
||||
+ -- add " asdf" to end of line 1
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 4,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 4,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 0,
|
||||
+ text = ' asdf'
|
||||
+ },
|
||||
+ -- delete " asdf\n" from line 2
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 2
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 0,
|
||||
+ line = 3
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 6,
|
||||
+ text = ''
|
||||
+ },
|
||||
+ -- undo entire deletion
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 4,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 4,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 0,
|
||||
+ text = "_fdsa\nhello world\n1234"
|
||||
+ },
|
||||
+ -- redo entire deletion
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 4,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 4,
|
||||
+ line = 3
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 22,
|
||||
+ text = ''
|
||||
+ },
|
||||
+ }
|
||||
+ local original_lines = {
|
||||
+ "\\begin{document}",
|
||||
+ "test_fdsa",
|
||||
+ "hello world",
|
||||
+ "1234 asdf",
|
||||
+ "\\end{document}"
|
||||
+ }
|
||||
+ test_edit(original_lines, {"jf_vejjbhhdu<C-R>"}, expected_text_changes, 'utf-16', '\n')
|
||||
+ end)
|
||||
end)
|
||||
|
||||
describe('multi-operation edits', function()
|
||||
@@ -265,12 +460,12 @@ describe('incremental synchronization', function()
|
||||
line = 0
|
||||
},
|
||||
['end'] = {
|
||||
- character = 17,
|
||||
+ character = 12,
|
||||
line = 0
|
||||
}
|
||||
},
|
||||
- rangeLength = 6,
|
||||
- text = '\ntest3'
|
||||
+ rangeLength = 1,
|
||||
+ text = '\n'
|
||||
},
|
||||
}
|
||||
test_edit({"test1 test2", "test3"}, {"J", "u"}, expected_text_changes, 'utf-16', '\n')
|
||||
@@ -297,6 +492,80 @@ describe('incremental synchronization', function()
|
||||
}
|
||||
test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
|
||||
end)
|
||||
+ it('replacing a multibyte character with matching prefix', function()
|
||||
+ local expected_text_changes = {
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 1,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 1,
|
||||
+ text = '⟩'
|
||||
+ }
|
||||
+ }
|
||||
+ -- ⟨ is e29fa8, ⟩ is e29fa9
|
||||
+ local original_lines = {
|
||||
+ "\\begin{document}",
|
||||
+ "⟨",
|
||||
+ "\\end{document}",
|
||||
+ }
|
||||
+ test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n')
|
||||
+ end)
|
||||
+ it('replacing a multibyte character with matching suffix', function()
|
||||
+ local expected_text_changes = {
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 1,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 1,
|
||||
+ text = 'ḟ'
|
||||
+ }
|
||||
+ }
|
||||
+ -- ฟ is e0b89f, ḟ is e1b89f
|
||||
+ local original_lines = {
|
||||
+ "\\begin{document}",
|
||||
+ "ฟ",
|
||||
+ "\\end{document}",
|
||||
+ }
|
||||
+ test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n')
|
||||
+ end)
|
||||
+ it('inserting before a multibyte character', function()
|
||||
+ local expected_text_changes = {
|
||||
+ {
|
||||
+ range = {
|
||||
+ ['start'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ },
|
||||
+ ['end'] = {
|
||||
+ character = 0,
|
||||
+ line = 1
|
||||
+ }
|
||||
+ },
|
||||
+ rangeLength = 0,
|
||||
+ text = ' '
|
||||
+ }
|
||||
+ }
|
||||
+ local original_lines = {
|
||||
+ "\\begin{document}",
|
||||
+ "→",
|
||||
+ "\\end{document}",
|
||||
+ }
|
||||
+ test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n')
|
||||
+ end)
|
||||
it('deleting a multibyte character from a long line', function()
|
||||
local expected_text_changes = {
|
||||
{
|
@ -4,6 +4,10 @@
|
||||
# nixpkgs.overlays = [ inputs.neovim-overlay.overlay ];
|
||||
|
||||
nixpkgs.config.packageOverrides = pkgs: with pkgs; {
|
||||
neovim-unwrapped = neovim-unwrapped.overrideDerivation (orig: {
|
||||
patches = orig.patches ++ [ ./16669.patch ];
|
||||
});
|
||||
|
||||
neovim-qt-unwrapped = neovim-qt-unwrapped.overrideDerivation (_: {
|
||||
version = "0.2.17.9999";
|
||||
src = fetchFromGitHub {
|
||||
|
Loading…
Reference in New Issue
Block a user