Update nvim lsp patches.

This commit is contained in:
Gabriel Ebner 2021-12-26 19:36:47 +01:00
parent ef5860df46
commit e1d5bea5cc
5 changed files with 904 additions and 337 deletions

319
16382.patch Normal file

@ -0,0 +1,319 @@
From 4687d853a5b5475aa572e6004cb5c1a1ed45ce94 Mon Sep 17 00:00:00 2001
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
Date: Fri, 26 Nov 2021 00:34:54 +0000
Subject: [PATCH] fix(lsp): handle offset encoding
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Co-authored-by: black-desk <clx814727823@gmail.com>
Co-authored-by: Mathias Fußenegger <mfussenegger@users.noreply.github.com>
---
runtime/lua/vim/lsp.lua | 6 +-
runtime/lua/vim/lsp/util.lua | 157 +++++++++++++++++++++++++----------
2 files changed, 116 insertions(+), 47 deletions(-)
diff --git a/runtime/lua/vim/lsp.lua b/runtime/lua/vim/lsp.lua
index dbbfd7d1d810..c8440d62f315 100644
--- a/runtime/lua/vim/lsp.lua
+++ b/runtime/lua/vim/lsp.lua
@@ -1492,11 +1492,7 @@ local function adjust_start_col(lnum, line, items, encoding)
end
end
if min_start_char then
- if encoding == 'utf-8' then
- return min_start_char
- else
- return vim.str_byteindex(line, min_start_char, encoding == 'utf-16')
- end
+ return util._str_byteindex_enc(line, min_start_char, encoding)
else
return nil
end
diff --git a/runtime/lua/vim/lsp/util.lua b/runtime/lua/vim/lsp/util.lua
index 059e66c53a06..cefacb26233d 100644
--- a/runtime/lua/vim/lsp/util.lua
+++ b/runtime/lua/vim/lsp/util.lua
@@ -90,6 +90,42 @@ local function split_lines(value)
return split(value, '\n', true)
end
+--- Convert byte index to `encoding` index.
+--- Convenience wrapper around vim.str_utfindex
+---@param line string line to be indexed
+---@param index number byte index (utf-8), or `nil` for length
+---@param encoding string utf-8|utf-16|utf-32|nil defaults to utf-16
+---@return number `encoding` index of `index` in `line`
+function M._str_utfindex_enc(line, index, encoding)
+ if encoding ~= 'utf-8' then
+ local col32, col16 = vim.str_utfindex(line, index)
+ if encoding == 'utf-32' then
+ return col32
+ else
+ return col16
+ end
+ else
+ return index
+ end
+end
+
+--- Convert UTF index to `encoding` index.
+--- Convenience wrapper around vim.str_byteindex
+---Alternative to vim.str_byteindex that takes an encoding.
+---@param line string line to be indexed
+---@param index number UTF index
+---@param encoding string utf-8|utf-16|utf-32|nil defaults to utf-16
+---@return number byte (utf-8) index of `encoding` index `index` in `line`
+function M._str_byteindex_enc(line, index, encoding)
+ if encoding ~= 'utf-8' then
+ return vim.str_byteindex(line, index, not encoding or encoding ~= 'utf-32')
+ else
+ return index
+ end
+end
+
+local _str_utfindex_enc = M._str_utfindex_enc
+local _str_byteindex_enc = M._str_byteindex_enc
--- Replaces text in a range with new text.
---
--- CAUTION: Changes in-place!
@@ -237,6 +273,7 @@ end
---@private
--- Position is a https://microsoft.github.io/language-server-protocol/specifications/specification-current/#position
--- Returns a zero-indexed column, since set_lines() does the conversion to
+---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to utf-16
--- 1-indexed
local function get_line_byte_from_position(bufnr, position, offset_encoding)
-- LSP's line and characters are 0-indexed
@@ -247,13 +284,7 @@ local function get_line_byte_from_position(bufnr, position, offset_encoding)
if col > 0 then
local line = get_line(bufnr, position.line)
local ok, result
-
- if offset_encoding == "utf-16" or not offset_encoding then
- ok, result = pcall(vim.str_byteindex, line, col, true)
- elseif offset_encoding == "utf-32" then
- ok, result = pcall(vim.str_byteindex, line, col, false)
- end
-
+ ok, result = pcall(_str_byteindex_enc, line, col, offset_encoding)
if ok then
return result
end
@@ -325,12 +356,15 @@ end
--- Applies a list of text edits to a buffer.
---@param text_edits table list of `TextEdit` objects
---@param bufnr number Buffer id
+---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to encoding of first client of `bufnr`
---@see https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textEdit
-function M.apply_text_edits(text_edits, bufnr)
+function M.apply_text_edits(text_edits, bufnr, offset_encoding)
validate {
text_edits = { text_edits, 't', false };
bufnr = { bufnr, 'number', false };
+ offset_encoding = { offset_encoding, 'string', true };
}
+ offset_encoding = offset_encoding or M._get_offset_encoding(bufnr)
if not next(text_edits) then return end
if not api.nvim_buf_is_loaded(bufnr) then
vim.fn.bufload(bufnr)
@@ -367,8 +401,7 @@ function M.apply_text_edits(text_edits, bufnr)
-- Some LSP servers may return +1 range of the buffer content but nvim_buf_set_text can't accept it so we should fix it here.
local has_eol_text_edit = false
local max = vim.api.nvim_buf_line_count(bufnr)
- -- TODO handle offset_encoding
- local _, len = vim.str_utfindex(vim.api.nvim_buf_get_lines(bufnr, -2, -1, false)[1] or '')
+ local len = _str_utfindex_enc(vim.api.nvim_buf_get_lines(bufnr, -2, -1, false)[1] or '', nil, offset_encoding)
text_edits = vim.tbl_map(function(text_edit)
if max <= text_edit.range.start.line then
text_edit.range.start.line = max - 1
@@ -1432,11 +1465,11 @@ do --[[ References ]]
---
---@param bufnr number Buffer id
---@param references table List of `DocumentHighlight` objects to highlight
- ---@param offset_encoding string One of "utf-8", "utf-16", "utf-32", or nil. Defaults to utf-16
+ ---@param offset_encoding string One of "utf-8", "utf-16", "utf-32", or nil. Defaults to `offset_encoding` of first client of `bufnr`
---@see https://microsoft.github.io/language-server-protocol/specifications/specification-3-17/#documentHighlight
function M.buf_highlight_references(bufnr, references, offset_encoding)
validate { bufnr = {bufnr, 'n', true} }
- offset_encoding = offset_encoding or 'utf-16'
+ offset_encoding = offset_encoding or M._get_offset_encoding(bufnr)
for _, reference in ipairs(references) do
local start_line, start_char = reference["range"]["start"]["line"], reference["range"]["start"]["character"]
local end_line, end_char = reference["range"]["end"]["line"], reference["range"]["end"]["character"]
@@ -1647,30 +1680,61 @@ function M.try_trim_markdown_code_blocks(lines)
return 'markdown'
end
-local str_utfindex = vim.str_utfindex
---@private
-local function make_position_param()
- local row, col = unpack(api.nvim_win_get_cursor(0))
+---@param window (optional, number): window handle or 0 for current, defaults to current
+---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of buffer of `window`
+local function make_position_param(window, offset_encoding)
+ window = window or 0
+ local buf = vim.api.nvim_win_get_buf(window)
+ local row, col = unpack(api.nvim_win_get_cursor(window))
+ offset_encoding = offset_encoding or M._get_offset_encoding(buf)
row = row - 1
- local line = api.nvim_buf_get_lines(0, row, row+1, true)[1]
+ local line = api.nvim_buf_get_lines(buf, row, row+1, true)[1]
if not line then
return { line = 0; character = 0; }
end
- -- TODO handle offset_encoding
- local _
- _, col = str_utfindex(line, col)
+
+ col = _str_utfindex_enc(line, col, offset_encoding)
+
return { line = row; character = col; }
end
--- Creates a `TextDocumentPositionParams` object for the current buffer and cursor position.
---
+---@param window (optional, number): window handle or 0 for current, defaults to current
+---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of buffer of `window`
---@returns `TextDocumentPositionParams` object
---@see https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocumentPositionParams
-function M.make_position_params()
+function M.make_position_params(window, offset_encoding)
+ window = window or 0
+ local buf = vim.api.nvim_win_get_buf(window)
+ offset_encoding = offset_encoding or M._get_offset_encoding(buf)
return {
- textDocument = M.make_text_document_params();
- position = make_position_param()
+ textDocument = M.make_text_document_params(buf);
+ position = make_position_param(window, offset_encoding)
+ }
+end
+
+--- Utility function for getting the encoding of the first LSP client on the given buffer.
+---@param bufnr (number) buffer handle or 0 for current, defaults to current
+---@returns (string) encoding first client if there is one, nil otherwise
+function M._get_offset_encoding(bufnr)
+ validate {
+ bufnr = {bufnr, 'n', true};
}
+
+ local offset_encoding
+
+ for _, client in pairs(vim.lsp.buf_get_clients(bufnr)) do
+ local this_offset_encoding = client.offset_encoding or "utf-16"
+ if not offset_encoding then
+ offset_encoding = this_offset_encoding
+ elseif offset_encoding ~= this_offset_encoding then
+ vim.notify("warning: multiple different client offset_encodings detected for buffer, this is not supported yet", vim.log.levels.WARN)
+ end
+ end
+
+ return offset_encoding
end
--- Using the current position in the current buffer, creates an object that
@@ -1678,12 +1742,16 @@ end
--- `textDocument/codeAction`, `textDocument/colorPresentation`,
--- `textDocument/rangeFormatting`.
---
+---@param window (optional, number): window handle or 0 for current, defaults to current
+---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of buffer of `window`
---@returns { textDocument = { uri = `current_file_uri` }, range = { start =
---`current_position`, end = `current_position` } }
-function M.make_range_params()
- local position = make_position_param()
+function M.make_range_params(window, offset_encoding)
+ local buf = vim.api.nvim_win_get_buf(window)
+ offset_encoding = offset_encoding or M._get_offset_encoding(buf)
+ local position = make_position_param(window, offset_encoding)
return {
- textDocument = M.make_text_document_params(),
+ textDocument = M.make_text_document_params(buf),
range = { start = position; ["end"] = position; }
}
end
@@ -1695,27 +1763,29 @@ end
---Defaults to the start of the last visual selection.
---@param end_pos ({number, number}, optional) mark-indexed position.
---Defaults to the end of the last visual selection.
+---@param bufnr (optional, number): buffer handle or 0 for current, defaults to current
+---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of `bufnr`
---@returns { textDocument = { uri = `current_file_uri` }, range = { start =
---`start_position`, end = `end_position` } }
-function M.make_given_range_params(start_pos, end_pos)
+function M.make_given_range_params(start_pos, end_pos, bufnr, offset_encoding)
validate {
start_pos = {start_pos, 't', true};
end_pos = {end_pos, 't', true};
+ offset_encoding = {offset_encoding, 's', true};
}
- local A = list_extend({}, start_pos or api.nvim_buf_get_mark(0, '<'))
- local B = list_extend({}, end_pos or api.nvim_buf_get_mark(0, '>'))
+ bufnr = bufnr or 0
+ offset_encoding = offset_encoding or M._get_offset_encoding(bufnr)
+ local A = list_extend({}, start_pos or api.nvim_buf_get_mark(bufnr, '<'))
+ local B = list_extend({}, end_pos or api.nvim_buf_get_mark(bufnr, '>'))
-- convert to 0-index
A[1] = A[1] - 1
B[1] = B[1] - 1
- -- account for encoding.
- -- TODO handle offset_encoding
+ -- account for offset_encoding.
if A[2] > 0 then
- local _, char = M.character_offset(0, A[1], A[2])
- A = {A[1], char}
+ A = {A[1], M.character_offset(bufnr, A[1], A[2], offset_encoding)}
end
if B[2] > 0 then
- local _, char = M.character_offset(0, B[1], B[2])
- B = {B[1], char}
+ B = {B[1], M.character_offset(bufnr, B[1], B[2], offset_encoding)}
end
-- we need to offset the end character position otherwise we loose the last
-- character of the selection, as LSP end position is exclusive
@@ -1724,7 +1794,7 @@ function M.make_given_range_params(start_pos, end_pos)
B[2] = B[2] + 1
end
return {
- textDocument = M.make_text_document_params(),
+ textDocument = M.make_text_document_params(bufnr),
range = {
start = {line = A[1], character = A[2]},
['end'] = {line = B[1], character = B[2]}
@@ -1734,10 +1804,11 @@ end
--- Creates a `TextDocumentIdentifier` object for the current buffer.
---
+---@param bufnr (optional, number): Buffer handle, defaults to current
---@returns `TextDocumentIdentifier`
---@see https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocumentIdentifier
-function M.make_text_document_params()
- return { uri = vim.uri_from_bufnr(0) }
+function M.make_text_document_params(bufnr)
+ return { uri = vim.uri_from_bufnr(bufnr or 0) }
end
--- Create the workspace params
@@ -1780,14 +1851,16 @@ end
---@param buf buffer id (0 for current)
---@param row 0-indexed line
---@param col 0-indexed byte offset in line
----@returns (number, number) UTF-32 and UTF-16 index of the character in line {row} column {col} in buffer {buf}
-function M.character_offset(bufnr, row, col)
- local line = get_line(bufnr, row)
+---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of `buf`
+---@returns (number, number) `offset_encoding` index of the character in line {row} column {col} in buffer {buf}
+function M.character_offset(buf, row, col, offset_encoding)
+ local line = get_line(buf, row)
+ offset_encoding = offset_encoding or M._get_offset_encoding(buf)
-- If the col is past the EOL, use the line length.
if col > #line then
- return str_utfindex(line)
+ return _str_utfindex_enc(line, nil, offset_encoding)
end
- return str_utfindex(line, col)
+ return _str_utfindex_enc(line, col, offset_encoding)
end
--- Helper function to return nested values in language server settings

60
16666.patch Normal file

@ -0,0 +1,60 @@
From 1a887293ef66b51220d40f8f91dfc8245f8aeec5 Mon Sep 17 00:00:00 2001
From: Michael Lingelbach <m.j.lbach@gmail.com>
Date: Wed, 15 Dec 2021 09:07:23 -0800
Subject: [PATCH 1/2] fix: do not cast offset to char_u
* str_utf_start/end both cast the offset into the utf string
to a char_u, a pointer + long is well-defined and the cast is
unnecessary. This previously resulted in issues for offsets greater than
256.
---
src/nvim/lua/stdlib.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c
index b5553060a1dc..e7dba1239280 100644
--- a/src/nvim/lua/stdlib.c
+++ b/src/nvim/lua/stdlib.c
@@ -231,7 +231,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
if (offset < 0 || offset > (intptr_t)s1_len) {
return luaL_error(lstate, "index out of range");
}
- int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+ int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1);
lua_pushinteger(lstate, tail_offset);
return 1;
}
@@ -251,7 +251,7 @@ static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
if (offset < 0 || offset > (intptr_t)s1_len) {
return luaL_error(lstate, "index out of range");
}
- int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+ int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + offset - 1);
lua_pushinteger(lstate, tail_offset);
return 1;
}
From fcbffcd92a2f53c224e2297c4807c14ac553bdbf Mon Sep 17 00:00:00 2001
From: Michael Lingelbach <m.j.lbach@gmail.com>
Date: Wed, 15 Dec 2021 09:07:23 -0800
Subject: [PATCH 2/2] chore: improve naming consistency in str_utf_start
---
src/nvim/lua/stdlib.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c
index e7dba1239280..c9f82a2df127 100644
--- a/src/nvim/lua/stdlib.c
+++ b/src/nvim/lua/stdlib.c
@@ -231,8 +231,8 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
if (offset < 0 || offset > (intptr_t)s1_len) {
return luaL_error(lstate, "index out of range");
}
- int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1);
- lua_pushinteger(lstate, tail_offset);
+ int head_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1);
+ lua_pushinteger(lstate, head_offset);
return 1;
}

@ -1,8 +1,26 @@
From d6b654d355dfbe8cd0fce3e73eeacbd068419416 Mon Sep 17 00:00:00 2001
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
Date: Wed, 15 Dec 2021 18:08:10 +0000
Subject: [PATCH 1/2] refactor(lsp): always crop matching text from end of
incremental sync range
When constructing incremental updates to be sent to the server,
previously we only attempted to crop matching text from the end of
the old and new `on_lines` ranges on single-line updates.
This refactor makes it so that we always attempt to crop from the end,
no matter how many lines were changed. This reduces the total size of
increments sent to the server when possible by not including
text that wasn't changed at the end of the incremental update range.
---
runtime/lua/vim/lsp/sync.lua | 127 +++++++++---------
.../plugin/lsp/incremental_sync_spec.lua | 18 +--
2 files changed, 69 insertions(+), 76 deletions(-)
diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
index 5df2a4d144d2..c745fadf876e 100644
index d01f45ad8f2a..c745fadf876e 100644
--- a/runtime/lua/vim/lsp/sync.lua
+++ b/runtime/lua/vim/lsp/sync.lua
@@ -93,30 +93,38 @@ end
@@ -93,31 +93,38 @@ end
-- utf-8 index and either the utf-16, or utf-32 index.
---@param line string the line to index into
---@param byte integer the byte idx
@ -10,54 +28,55 @@ index 5df2a4d144d2..c745fadf876e 100644
---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
---@returns table<string, int> byte_idx and char_idx of first change position
-local function align_end_position(line, byte, offset_encoding)
- local char
- -- If on the first byte, or an empty string: the trivial case
- if byte == 1 or #line == 0 then
- char = byte
- -- Called in the case of extending an empty line "" -> "a"
- elseif byte == #line + 1 then
- char = compute_line_length(line, offset_encoding) + 1
- else
+local function align_position(line, byte, start, offset_encoding)
+ if byte ~= 1 and byte <= #line then
+ -- Modifying line, find the nearest utf codepoint
+ local offset = str_utf_start(line, byte)
-- Modifying line, find the nearest utf codepoint
local offset = str_utf_start(line, byte)
+
+ -- If the byte does not fall on the start of the character, then
-- If the byte does not fall on the start of the character, then
- -- align to the start of the next character.
+ -- align to the start of the next character if end align, and start
+ -- of this character if start align
+ if offset < 0 then
if offset < 0 then
- byte = byte + str_utf_end(line, byte) + 1
- end
- if byte <= #line then
- char = byte_to_utf(line, byte, offset_encoding)
- else
- char = compute_line_length(line, offset_encoding) + 1
+ if start then
+ byte = byte + offset
+ else
+ byte = byte + str_utf_end(line, byte) + 1
+ end
+ end
+ end
end
- -- Extending line, find the nearest utf codepoint for the last valid character
end
+
local char
- -- If on the first byte, or an empty string: the trivial case
- if byte == 1 or #line == 0 then
- char = byte
+ local char
+
+ -- optimize for first byte case
+ if byte == 1 then
+ char = 1
-- Called in the case of extending an empty line "" -> "a"
elseif byte == #line + 1 then
char = compute_line_length(line, offset_encoding) + 1
else
- -- Modifying line, find the nearest utf codepoint
- local offset = str_utf_end(line, byte)
- -- If the byte does not fall on the start of the character, then
- -- align to the start of the next character.
- if offset > 0 then
- char = byte_to_utf(line, byte, offset_encoding) + 1
- byte = byte + offset
- else
- char = byte_to_utf(line, byte, offset_encoding)
- byte = byte + offset
- end
- -- Extending line, find the nearest utf codepoint for the last valid character
+ -- Called in the case of extending an empty line "" -> "a"
+ elseif byte == #line + 1 then
+ char = compute_line_length(line, offset_encoding) + 1
+ else
+ char = byte_to_utf(line, byte, offset_encoding)
end
+ end
+
return byte, char
end
@@ -157,18 +165,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
@@ -158,18 +165,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
end
-- Convert byte to codepoint if applicable
@ -71,13 +90,13 @@ index 5df2a4d144d2..c745fadf876e 100644
- char_idx = compute_line_length(prev_line, offset_encoding) + 1
- else
- byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
- char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
- char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding)
- end
+ local byte_idx, char_idx = align_position(prev_line, start_byte_idx, true, offset_encoding)
-- Return the start difference (shared for new and prev lines)
return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
@@ -209,51 +206,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
@@ -210,51 +206,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
local prev_line_length = #prev_line
local curr_line_length = #curr_line
@ -95,9 +114,6 @@ index 5df2a4d144d2..c745fadf876e 100644
+ else
+ curr_line_range = curr_line_length - 1
+ end
+
+ -- Maximum number of bytes to search backwards for mismatch
+ local max_length = min(prev_line_range, curr_line_range)
- -- Editing the same line
- -- If the byte offset is zero, that means there is a difference on the last byte (not newline)
@ -116,6 +132,9 @@ index 5df2a4d144d2..c745fadf876e 100644
- then
- break
- end
+ -- Maximum number of bytes to search backwards for mismatch
+ local max_length = min(prev_line_range, curr_line_range)
+
+ -- Negative offset to last shared byte between prev_line and curr_line
+ -- -1 offset indicates no shared byte
+ local byte_offset = -1
@ -164,235 +183,43 @@ index 5df2a4d144d2..c745fadf876e 100644
return prev_end_range, curr_end_range
end
diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c
index b746e03625ad..9441b88cfb98 100644
--- a/src/nvim/lua/stdlib.c
+++ b/src/nvim/lua/stdlib.c
@@ -230,7 +230,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
if (offset < 0 || offset > (intptr_t)s1_len) {
return luaL_error(lstate, "index out of range");
}
- int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+ int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1);
lua_pushinteger(lstate, tail_offset);
return 1;
}
@@ -250,7 +250,7 @@ static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
if (offset < 0 || offset > (intptr_t)s1_len) {
return luaL_error(lstate, "index out of range");
}
- int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+ int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + offset - 1);
lua_pushinteger(lstate, tail_offset);
return 1;
}
diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua
index 5dd34e766528..60e35c3620cc 100644
index 4e3eddb9607e..e13a5acf3c9a 100644
--- a/test/functional/plugin/lsp/incremental_sync_spec.lua
+++ b/test/functional/plugin/lsp/incremental_sync_spec.lua
@@ -164,6 +164,201 @@ describe('incremental synchronization', function()
@@ -327,12 +327,12 @@ describe('incremental synchronization', function()
line = 1
},
['end'] = {
- character = 9,
+ character = 4,
line = 1
}
test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n')
end)
+ it('deleting a line', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 0
+ },
+ ['end'] = {
+ character = 0,
+ line = 1
+ }
+ },
+ rangeLength = 12,
+ text = ''
+ }
+ }
+ test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('deleting an empty line', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 0,
+ line = 2
+ }
+ },
+ rangeLength = 1,
+ text = ''
+ }
+ }
+ test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('adding a line', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 0,
+ line = 1
+ }
+ },
+ rangeLength = 0,
+ text = 'hello world\n'
+ }
+ }
+ test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('adding an empty line', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 0,
+ line = 1
+ }
+ },
+ rangeLength = 0,
+ text = '\n'
+ }
+ }
+ test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ end)
+ describe('multi line edit', function()
+ it('deletion and insertion', function()
+ local expected_text_changes = {
+ -- delete "_fsda" from end of line 1
+ {
+ range = {
+ ['start'] = {
+ character = 4,
+ line = 1
+ },
+ ['end'] = {
+ character = 9,
+ line = 1
+ }
+ },
+ rangeLength = 5,
+ text = ''
+ },
+ -- delete "hello world\n" from line 2
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 2
+ },
+ ['end'] = {
+ character = 0,
+ line = 3
+ }
+ },
+ rangeLength = 12,
+ text = ''
+ },
+ -- delete "1234" from beginning of line 2
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 2
+ },
+ ['end'] = {
+ character = 4,
+ line = 2
+ }
+ },
+ rangeLength = 4,
+ text = ''
+ },
+ -- add " asdf" to end of line 1
+ {
+ range = {
+ ['start'] = {
+ character = 4,
+ line = 1
+ },
+ ['end'] = {
+ character = 4,
+ line = 1
+ }
+ },
+ rangeLength = 0,
+ text = ' asdf'
+ },
+ -- delete " asdf\n" from line 2
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 2
+ },
+ ['end'] = {
+ character = 0,
+ line = 3
+ }
+ },
+ rangeLength = 6,
+ text = ''
+ },
+ -- undo entire deletion
+ {
+ range = {
+ ['start'] = {
+ character = 4,
+ line = 1
+ },
+ ['end'] = {
+ character = 4,
+ line = 1
+ }
+ },
},
- rangeLength = 5,
- text = "_fdsa\nhello world\n1234 asdf"
+ rangeLength = 0,
+ text = "_fdsa\nhello world\n1234"
+ },
+ -- redo entire deletion
+ {
+ range = {
+ ['start'] = {
},
-- redo entire deletion
{
@@ -342,12 +342,12 @@ describe('incremental synchronization', function()
line = 1
},
['end'] = {
- character = 9,
+ character = 4,
+ line = 1
+ },
+ ['end'] = {
+ character = 4,
+ line = 3
+ }
+ },
line = 3
}
},
- rangeLength = 27,
- text = ' asdf'
+ rangeLength = 22,
+ text = ''
+ },
+ }
+ local original_lines = {
+ "\\begin{document}",
+ "test_fdsa",
+ "hello world",
+ "1234 asdf",
+ "\\end{document}"
+ }
+ test_edit(original_lines, {"jf_vejjbhhdu<C-R>"}, expected_text_changes, 'utf-16', '\n')
+ end)
end)
describe('multi-operation edits', function()
@@ -265,12 +460,12 @@ describe('incremental synchronization', function()
},
}
local original_lines = {
@@ -460,12 +460,12 @@ describe('incremental synchronization', function()
line = 0
},
['end'] = {
@ -408,84 +235,99 @@ index 5dd34e766528..60e35c3620cc 100644
},
}
test_edit({"test1 test2", "test3"}, {"J", "u"}, expected_text_changes, 'utf-16', '\n')
@@ -297,6 +492,80 @@ describe('incremental synchronization', function()
}
test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
end)
+ it('replacing a multibyte character with matching prefix', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 1,
+ line = 1
+ }
+ },
+ rangeLength = 1,
+ text = '⟩'
+ }
+ }
+ -- ⟨ is e29fa8, ⟩ is e29fa9
+ local original_lines = {
+ "\\begin{document}",
+ "⟨",
+ "\\end{document}",
+ }
+ test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('replacing a multibyte character with matching suffix', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 1,
+ line = 1
+ }
+ },
+ rangeLength = 1,
+ text = 'ḟ'
+ }
+ }
+ -- ฟ is e0b89f, ḟ is e1b89f
+ local original_lines = {
+ "\\begin{document}",
+ "ฟ",
+ "\\end{document}",
+ }
+ test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('inserting before a multibyte character', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 0,
+ line = 1
+ }
+ },
+ rangeLength = 0,
+ text = ' '
+ }
+ }
+ local original_lines = {
+ "\\begin{document}",
+ "→",
+ "\\end{document}",
+ }
+ test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n')
+ end)
it('deleting a multibyte character from a long line', function()
local expected_text_changes = {
{
From 7a4877f61de0616964b8f939ad132fc256235d93 Mon Sep 17 00:00:00 2001
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
Date: Tue, 21 Dec 2021 02:05:36 +0000
Subject: [PATCH 2/2] Replace UTF helper functions with
`vim.lsp.util._str_utfindex_enc()`.
---
runtime/lua/vim/lsp/sync.lua | 49 +++---------------------------------
1 file changed, 4 insertions(+), 45 deletions(-)
diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
index c745fadf876e..f725d3272291 100644
--- a/runtime/lua/vim/lsp/sync.lua
+++ b/runtime/lua/vim/lsp/sync.lua
@@ -44,50 +44,9 @@ local M = {}
-- local string.byte, unclear if this is necessary for JIT compilation
local str_byte = string.byte
local min = math.min
-local str_utfindex = vim.str_utfindex
local str_utf_start = vim.str_utf_start
local str_utf_end = vim.str_utf_end
----@private
--- Given a line, byte idx, and offset_encoding convert to the
--- utf-8, utf-16, or utf-32 index.
----@param line string the line to index into
----@param byte integer the byte idx
----@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
---@returns integer the utf idx for the given encoding
-local function byte_to_utf(line, byte, offset_encoding)
- -- convert to 0 based indexing for str_utfindex
- byte = byte - 1
-
- local utf_idx
- local _
- -- Convert the byte range to utf-{8,16,32} and convert 1-based (lua) indexing to 0-based
- if offset_encoding == 'utf-16' then
- _, utf_idx = str_utfindex(line, byte)
- elseif offset_encoding == 'utf-32' then
- utf_idx, _ = str_utfindex(line, byte)
- else
- utf_idx = byte
- end
-
- -- convert to 1 based indexing
- return utf_idx + 1
-end
-
----@private
-local function compute_line_length(line, offset_encoding)
- local length
- local _
- if offset_encoding == 'utf-16' then
- _, length = str_utfindex(line)
- elseif offset_encoding == 'utf-32' then
- length, _ = str_utfindex(line)
- else
- length = #line
- end
- return length
-end
-
---@private
-- Given a line, byte idx, alignment, and offset_encoding convert to the aligned
-- utf-8 index and either the utf-16, or utf-32 index.
@@ -120,9 +79,9 @@ local function align_position(line, byte, start, offset_encoding)
char = 1
-- Called in the case of extending an empty line "" -> "a"
elseif byte == #line + 1 then
- char = compute_line_length(line, offset_encoding) + 1
+ char = vim.lsp.util._str_utfindex_enc(line, nil, offset_encoding) + 1
else
- char = byte_to_utf(line, byte, offset_encoding)
+ char = vim.lsp.util._str_utfindex_enc(line, byte - 1, offset_encoding) + 1
end
return byte, char
@@ -306,7 +265,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi
local start_line = lines[start_range.line_idx]
local range_length
if start_line and #start_line > 0 then
- range_length = compute_line_length(start_line, offset_encoding) - start_range.char_idx + 1 + line_ending_length
+ range_length = vim.lsp.util._str_utfindex_enc(start_line, nil, offset_encoding) - start_range.char_idx + 1 + line_ending_length
else
-- Length of newline character
range_length = line_ending_length
@@ -316,7 +275,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi
for idx = start_range.line_idx + 1, end_range.line_idx - 1 do
-- Length full line plus newline character
if #lines[idx] > 0 then
- range_length = range_length + compute_line_length(lines[idx], offset_encoding) + #line_ending
+ range_length = range_length + vim.lsp.util._str_utfindex_enc(lines[idx], nil, offset_encoding) + #line_ending
else
range_length = range_length + line_ending_length
end

346
16670.patch Normal file

@ -0,0 +1,346 @@
From 166c4d54efc9e8c10502db13c094d7de3f245ce5 Mon Sep 17 00:00:00 2001
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
Date: Wed, 15 Dec 2021 21:19:43 +0000
Subject: [PATCH] fix(lsp): incremental sync UTF fixes (#16624)
Aligning end position:
- fix check for preexisting UTF-8 alignment:
check `vim.str_utf_start() == 0` instead of `vim.str_utf_end() == 0`
- fix setting of byte index when not already aligned:
set to beginning of next codepoint rather than end of this one
- set char index after aligning byte index, removing unnecessary
adjustment of byte index when already aligned
Aligning start position:
- fix setting of char index:
use aligned byte index rather than original in `byte_to_utf()`
Add tests to cover these fixes as well as some other previously untested paths.
---
runtime/lua/vim/lsp/sync.lua | 15 +-
.../plugin/lsp/incremental_sync_spec.lua | 269 ++++++++++++++++++
2 files changed, 277 insertions(+), 7 deletions(-)
diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
index 5df2a4d144d2..d01f45ad8f2a 100644
--- a/runtime/lua/vim/lsp/sync.lua
+++ b/runtime/lua/vim/lsp/sync.lua
@@ -105,15 +105,16 @@ local function align_end_position(line, byte, offset_encoding)
char = compute_line_length(line, offset_encoding) + 1
else
-- Modifying line, find the nearest utf codepoint
- local offset = str_utf_end(line, byte)
+ local offset = str_utf_start(line, byte)
-- If the byte does not fall on the start of the character, then
-- align to the start of the next character.
- if offset > 0 then
- char = byte_to_utf(line, byte, offset_encoding) + 1
- byte = byte + offset
- else
+ if offset < 0 then
+ byte = byte + str_utf_end(line, byte) + 1
+ end
+ if byte <= #line then
char = byte_to_utf(line, byte, offset_encoding)
- byte = byte + offset
+ else
+ char = compute_line_length(line, offset_encoding) + 1
end
-- Extending line, find the nearest utf codepoint for the last valid character
end
@@ -167,7 +168,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
char_idx = compute_line_length(prev_line, offset_encoding) + 1
else
byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
- char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
+ char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding)
end
-- Return the start difference (shared for new and prev lines)
diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua
index 5dd34e766528..4e3eddb9607e 100644
--- a/test/functional/plugin/lsp/incremental_sync_spec.lua
+++ b/test/functional/plugin/lsp/incremental_sync_spec.lua
@@ -164,6 +164,201 @@ describe('incremental synchronization', function()
}
test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n')
end)
+ it('deleting a line', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 0
+ },
+ ['end'] = {
+ character = 0,
+ line = 1
+ }
+ },
+ rangeLength = 12,
+ text = ''
+ }
+ }
+ test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('deleting an empty line', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 0,
+ line = 2
+ }
+ },
+ rangeLength = 1,
+ text = ''
+ }
+ }
+ test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('adding a line', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 0,
+ line = 1
+ }
+ },
+ rangeLength = 0,
+ text = 'hello world\n'
+ }
+ }
+ test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('adding an empty line', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 0,
+ line = 1
+ }
+ },
+ rangeLength = 0,
+ text = '\n'
+ }
+ }
+ test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ end)
+ describe('multi line edit', function()
+ it('deletion and insertion', function()
+ local expected_text_changes = {
+ -- delete "_fsda" from end of line 1
+ {
+ range = {
+ ['start'] = {
+ character = 4,
+ line = 1
+ },
+ ['end'] = {
+ character = 9,
+ line = 1
+ }
+ },
+ rangeLength = 5,
+ text = ''
+ },
+ -- delete "hello world\n" from line 2
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 2
+ },
+ ['end'] = {
+ character = 0,
+ line = 3
+ }
+ },
+ rangeLength = 12,
+ text = ''
+ },
+ -- delete "1234" from beginning of line 2
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 2
+ },
+ ['end'] = {
+ character = 4,
+ line = 2
+ }
+ },
+ rangeLength = 4,
+ text = ''
+ },
+ -- add " asdf" to end of line 1
+ {
+ range = {
+ ['start'] = {
+ character = 4,
+ line = 1
+ },
+ ['end'] = {
+ character = 4,
+ line = 1
+ }
+ },
+ rangeLength = 0,
+ text = ' asdf'
+ },
+ -- delete " asdf\n" from line 2
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 2
+ },
+ ['end'] = {
+ character = 0,
+ line = 3
+ }
+ },
+ rangeLength = 6,
+ text = ''
+ },
+ -- undo entire deletion
+ {
+ range = {
+ ['start'] = {
+ character = 4,
+ line = 1
+ },
+ ['end'] = {
+ character = 9,
+ line = 1
+ }
+ },
+ rangeLength = 5,
+ text = "_fdsa\nhello world\n1234 asdf"
+ },
+ -- redo entire deletion
+ {
+ range = {
+ ['start'] = {
+ character = 4,
+ line = 1
+ },
+ ['end'] = {
+ character = 9,
+ line = 3
+ }
+ },
+ rangeLength = 27,
+ text = ' asdf'
+ },
+ }
+ local original_lines = {
+ "\\begin{document}",
+ "test_fdsa",
+ "hello world",
+ "1234 asdf",
+ "\\end{document}"
+ }
+ test_edit(original_lines, {"jf_vejjbhhdu<C-R>"}, expected_text_changes, 'utf-16', '\n')
+ end)
end)
describe('multi-operation edits', function()
@@ -297,6 +492,80 @@ describe('incremental synchronization', function()
}
test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
end)
+ it('replacing a multibyte character with matching prefix', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 1,
+ line = 1
+ }
+ },
+ rangeLength = 1,
+ text = '⟩'
+ }
+ }
+ -- ⟨ is e29fa8, ⟩ is e29fa9
+ local original_lines = {
+ "\\begin{document}",
+ "⟨",
+ "\\end{document}",
+ }
+ test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('replacing a multibyte character with matching suffix', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 1,
+ line = 1
+ }
+ },
+ rangeLength = 1,
+ text = 'ḟ'
+ }
+ }
+ -- ฟ is e0b89f, ḟ is e1b89f
+ local original_lines = {
+ "\\begin{document}",
+ "ฟ",
+ "\\end{document}",
+ }
+ test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n')
+ end)
+ it('inserting before a multibyte character', function()
+ local expected_text_changes = {
+ {
+ range = {
+ ['start'] = {
+ character = 0,
+ line = 1
+ },
+ ['end'] = {
+ character = 0,
+ line = 1
+ }
+ },
+ rangeLength = 0,
+ text = ' '
+ }
+ }
+ local original_lines = {
+ "\\begin{document}",
+ "→",
+ "\\end{document}",
+ }
+ test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n')
+ end)
it('deleting a multibyte character from a long line', function()
local expected_text_changes = {
{

@ -5,7 +5,7 @@
nixpkgs.config.packageOverrides = pkgs: with pkgs; {
neovim-unwrapped = neovim-unwrapped.overrideDerivation (orig: {
patches = orig.patches ++ [ ./16669.patch ];
patches = orig.patches ++ [ ./16382.patch ./16666.patch ./16670.patch ./16669.patch ];
});
neovim-qt-unwrapped = neovim-qt-unwrapped.overrideDerivation (_: {