From e1d5bea5cce6e863b723c3f4007acaac34bd448e Mon Sep 17 00:00:00 2001 From: Gabriel Ebner Date: Sun, 26 Dec 2021 19:36:47 +0100 Subject: [PATCH] Update nvim lsp patches. --- 16382.patch | 319 ++++++++++++++++++++++++++++++++ 16666.patch | 60 ++++++ 16669.patch | 514 ++++++++++++++++++---------------------------------- 16670.patch | 346 +++++++++++++++++++++++++++++++++++ nvim05.nix | 2 +- 5 files changed, 904 insertions(+), 337 deletions(-) create mode 100644 16382.patch create mode 100644 16666.patch create mode 100644 16670.patch diff --git a/16382.patch b/16382.patch new file mode 100644 index 0000000..2f12916 --- /dev/null +++ b/16382.patch @@ -0,0 +1,319 @@ +From 4687d853a5b5475aa572e6004cb5c1a1ed45ce94 Mon Sep 17 00:00:00 2001 +From: Rishikesh Vaishnav +Date: Fri, 26 Nov 2021 00:34:54 +0000 +Subject: [PATCH] fix(lsp): handle offset encoding +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Co-authored-by: black-desk +Co-authored-by: Mathias Fußenegger +--- + runtime/lua/vim/lsp.lua | 6 +- + runtime/lua/vim/lsp/util.lua | 157 +++++++++++++++++++++++++---------- + 2 files changed, 116 insertions(+), 47 deletions(-) + +diff --git a/runtime/lua/vim/lsp.lua b/runtime/lua/vim/lsp.lua +index dbbfd7d1d810..c8440d62f315 100644 +--- a/runtime/lua/vim/lsp.lua ++++ b/runtime/lua/vim/lsp.lua +@@ -1492,11 +1492,7 @@ local function adjust_start_col(lnum, line, items, encoding) + end + end + if min_start_char then +- if encoding == 'utf-8' then +- return min_start_char +- else +- return vim.str_byteindex(line, min_start_char, encoding == 'utf-16') +- end ++ return util._str_byteindex_enc(line, min_start_char, encoding) + else + return nil + end +diff --git a/runtime/lua/vim/lsp/util.lua b/runtime/lua/vim/lsp/util.lua +index 059e66c53a06..cefacb26233d 100644 +--- a/runtime/lua/vim/lsp/util.lua ++++ b/runtime/lua/vim/lsp/util.lua +@@ -90,6 +90,42 @@ local function split_lines(value) + return split(value, '\n', true) + end + ++--- Convert byte index to `encoding` index. ++--- Convenience wrapper around vim.str_utfindex ++---@param line string line to be indexed ++---@param index number byte index (utf-8), or `nil` for length ++---@param encoding string utf-8|utf-16|utf-32|nil defaults to utf-16 ++---@return number `encoding` index of `index` in `line` ++function M._str_utfindex_enc(line, index, encoding) ++ if encoding ~= 'utf-8' then ++ local col32, col16 = vim.str_utfindex(line, index) ++ if encoding == 'utf-32' then ++ return col32 ++ else ++ return col16 ++ end ++ else ++ return index ++ end ++end ++ ++--- Convert UTF index to `encoding` index. ++--- Convenience wrapper around vim.str_byteindex ++---Alternative to vim.str_byteindex that takes an encoding. ++---@param line string line to be indexed ++---@param index number UTF index ++---@param encoding string utf-8|utf-16|utf-32|nil defaults to utf-16 ++---@return number byte (utf-8) index of `encoding` index `index` in `line` ++function M._str_byteindex_enc(line, index, encoding) ++ if encoding ~= 'utf-8' then ++ return vim.str_byteindex(line, index, not encoding or encoding ~= 'utf-32') ++ else ++ return index ++ end ++end ++ ++local _str_utfindex_enc = M._str_utfindex_enc ++local _str_byteindex_enc = M._str_byteindex_enc + --- Replaces text in a range with new text. + --- + --- CAUTION: Changes in-place! +@@ -237,6 +273,7 @@ end + ---@private + --- Position is a https://microsoft.github.io/language-server-protocol/specifications/specification-current/#position + --- Returns a zero-indexed column, since set_lines() does the conversion to ++---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to utf-16 + --- 1-indexed + local function get_line_byte_from_position(bufnr, position, offset_encoding) + -- LSP's line and characters are 0-indexed +@@ -247,13 +284,7 @@ local function get_line_byte_from_position(bufnr, position, offset_encoding) + if col > 0 then + local line = get_line(bufnr, position.line) + local ok, result +- +- if offset_encoding == "utf-16" or not offset_encoding then +- ok, result = pcall(vim.str_byteindex, line, col, true) +- elseif offset_encoding == "utf-32" then +- ok, result = pcall(vim.str_byteindex, line, col, false) +- end +- ++ ok, result = pcall(_str_byteindex_enc, line, col, offset_encoding) + if ok then + return result + end +@@ -325,12 +356,15 @@ end + --- Applies a list of text edits to a buffer. + ---@param text_edits table list of `TextEdit` objects + ---@param bufnr number Buffer id ++---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to encoding of first client of `bufnr` + ---@see https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textEdit +-function M.apply_text_edits(text_edits, bufnr) ++function M.apply_text_edits(text_edits, bufnr, offset_encoding) + validate { + text_edits = { text_edits, 't', false }; + bufnr = { bufnr, 'number', false }; ++ offset_encoding = { offset_encoding, 'string', true }; + } ++ offset_encoding = offset_encoding or M._get_offset_encoding(bufnr) + if not next(text_edits) then return end + if not api.nvim_buf_is_loaded(bufnr) then + vim.fn.bufload(bufnr) +@@ -367,8 +401,7 @@ function M.apply_text_edits(text_edits, bufnr) + -- Some LSP servers may return +1 range of the buffer content but nvim_buf_set_text can't accept it so we should fix it here. + local has_eol_text_edit = false + local max = vim.api.nvim_buf_line_count(bufnr) +- -- TODO handle offset_encoding +- local _, len = vim.str_utfindex(vim.api.nvim_buf_get_lines(bufnr, -2, -1, false)[1] or '') ++ local len = _str_utfindex_enc(vim.api.nvim_buf_get_lines(bufnr, -2, -1, false)[1] or '', nil, offset_encoding) + text_edits = vim.tbl_map(function(text_edit) + if max <= text_edit.range.start.line then + text_edit.range.start.line = max - 1 +@@ -1432,11 +1465,11 @@ do --[[ References ]] + --- + ---@param bufnr number Buffer id + ---@param references table List of `DocumentHighlight` objects to highlight +- ---@param offset_encoding string One of "utf-8", "utf-16", "utf-32", or nil. Defaults to utf-16 ++ ---@param offset_encoding string One of "utf-8", "utf-16", "utf-32", or nil. Defaults to `offset_encoding` of first client of `bufnr` + ---@see https://microsoft.github.io/language-server-protocol/specifications/specification-3-17/#documentHighlight + function M.buf_highlight_references(bufnr, references, offset_encoding) + validate { bufnr = {bufnr, 'n', true} } +- offset_encoding = offset_encoding or 'utf-16' ++ offset_encoding = offset_encoding or M._get_offset_encoding(bufnr) + for _, reference in ipairs(references) do + local start_line, start_char = reference["range"]["start"]["line"], reference["range"]["start"]["character"] + local end_line, end_char = reference["range"]["end"]["line"], reference["range"]["end"]["character"] +@@ -1647,30 +1680,61 @@ function M.try_trim_markdown_code_blocks(lines) + return 'markdown' + end + +-local str_utfindex = vim.str_utfindex + ---@private +-local function make_position_param() +- local row, col = unpack(api.nvim_win_get_cursor(0)) ++---@param window (optional, number): window handle or 0 for current, defaults to current ++---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of buffer of `window` ++local function make_position_param(window, offset_encoding) ++ window = window or 0 ++ local buf = vim.api.nvim_win_get_buf(window) ++ local row, col = unpack(api.nvim_win_get_cursor(window)) ++ offset_encoding = offset_encoding or M._get_offset_encoding(buf) + row = row - 1 +- local line = api.nvim_buf_get_lines(0, row, row+1, true)[1] ++ local line = api.nvim_buf_get_lines(buf, row, row+1, true)[1] + if not line then + return { line = 0; character = 0; } + end +- -- TODO handle offset_encoding +- local _ +- _, col = str_utfindex(line, col) ++ ++ col = _str_utfindex_enc(line, col, offset_encoding) ++ + return { line = row; character = col; } + end + + --- Creates a `TextDocumentPositionParams` object for the current buffer and cursor position. + --- ++---@param window (optional, number): window handle or 0 for current, defaults to current ++---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of buffer of `window` + ---@returns `TextDocumentPositionParams` object + ---@see https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocumentPositionParams +-function M.make_position_params() ++function M.make_position_params(window, offset_encoding) ++ window = window or 0 ++ local buf = vim.api.nvim_win_get_buf(window) ++ offset_encoding = offset_encoding or M._get_offset_encoding(buf) + return { +- textDocument = M.make_text_document_params(); +- position = make_position_param() ++ textDocument = M.make_text_document_params(buf); ++ position = make_position_param(window, offset_encoding) ++ } ++end ++ ++--- Utility function for getting the encoding of the first LSP client on the given buffer. ++---@param bufnr (number) buffer handle or 0 for current, defaults to current ++---@returns (string) encoding first client if there is one, nil otherwise ++function M._get_offset_encoding(bufnr) ++ validate { ++ bufnr = {bufnr, 'n', true}; + } ++ ++ local offset_encoding ++ ++ for _, client in pairs(vim.lsp.buf_get_clients(bufnr)) do ++ local this_offset_encoding = client.offset_encoding or "utf-16" ++ if not offset_encoding then ++ offset_encoding = this_offset_encoding ++ elseif offset_encoding ~= this_offset_encoding then ++ vim.notify("warning: multiple different client offset_encodings detected for buffer, this is not supported yet", vim.log.levels.WARN) ++ end ++ end ++ ++ return offset_encoding + end + + --- Using the current position in the current buffer, creates an object that +@@ -1678,12 +1742,16 @@ end + --- `textDocument/codeAction`, `textDocument/colorPresentation`, + --- `textDocument/rangeFormatting`. + --- ++---@param window (optional, number): window handle or 0 for current, defaults to current ++---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of buffer of `window` + ---@returns { textDocument = { uri = `current_file_uri` }, range = { start = + ---`current_position`, end = `current_position` } } +-function M.make_range_params() +- local position = make_position_param() ++function M.make_range_params(window, offset_encoding) ++ local buf = vim.api.nvim_win_get_buf(window) ++ offset_encoding = offset_encoding or M._get_offset_encoding(buf) ++ local position = make_position_param(window, offset_encoding) + return { +- textDocument = M.make_text_document_params(), ++ textDocument = M.make_text_document_params(buf), + range = { start = position; ["end"] = position; } + } + end +@@ -1695,27 +1763,29 @@ end + ---Defaults to the start of the last visual selection. + ---@param end_pos ({number, number}, optional) mark-indexed position. + ---Defaults to the end of the last visual selection. ++---@param bufnr (optional, number): buffer handle or 0 for current, defaults to current ++---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of `bufnr` + ---@returns { textDocument = { uri = `current_file_uri` }, range = { start = + ---`start_position`, end = `end_position` } } +-function M.make_given_range_params(start_pos, end_pos) ++function M.make_given_range_params(start_pos, end_pos, bufnr, offset_encoding) + validate { + start_pos = {start_pos, 't', true}; + end_pos = {end_pos, 't', true}; ++ offset_encoding = {offset_encoding, 's', true}; + } +- local A = list_extend({}, start_pos or api.nvim_buf_get_mark(0, '<')) +- local B = list_extend({}, end_pos or api.nvim_buf_get_mark(0, '>')) ++ bufnr = bufnr or 0 ++ offset_encoding = offset_encoding or M._get_offset_encoding(bufnr) ++ local A = list_extend({}, start_pos or api.nvim_buf_get_mark(bufnr, '<')) ++ local B = list_extend({}, end_pos or api.nvim_buf_get_mark(bufnr, '>')) + -- convert to 0-index + A[1] = A[1] - 1 + B[1] = B[1] - 1 +- -- account for encoding. +- -- TODO handle offset_encoding ++ -- account for offset_encoding. + if A[2] > 0 then +- local _, char = M.character_offset(0, A[1], A[2]) +- A = {A[1], char} ++ A = {A[1], M.character_offset(bufnr, A[1], A[2], offset_encoding)} + end + if B[2] > 0 then +- local _, char = M.character_offset(0, B[1], B[2]) +- B = {B[1], char} ++ B = {B[1], M.character_offset(bufnr, B[1], B[2], offset_encoding)} + end + -- we need to offset the end character position otherwise we loose the last + -- character of the selection, as LSP end position is exclusive +@@ -1724,7 +1794,7 @@ function M.make_given_range_params(start_pos, end_pos) + B[2] = B[2] + 1 + end + return { +- textDocument = M.make_text_document_params(), ++ textDocument = M.make_text_document_params(bufnr), + range = { + start = {line = A[1], character = A[2]}, + ['end'] = {line = B[1], character = B[2]} +@@ -1734,10 +1804,11 @@ end + + --- Creates a `TextDocumentIdentifier` object for the current buffer. + --- ++---@param bufnr (optional, number): Buffer handle, defaults to current + ---@returns `TextDocumentIdentifier` + ---@see https://microsoft.github.io/language-server-protocol/specifications/specification-current/#textDocumentIdentifier +-function M.make_text_document_params() +- return { uri = vim.uri_from_bufnr(0) } ++function M.make_text_document_params(bufnr) ++ return { uri = vim.uri_from_bufnr(bufnr or 0) } + end + + --- Create the workspace params +@@ -1780,14 +1851,16 @@ end + ---@param buf buffer id (0 for current) + ---@param row 0-indexed line + ---@param col 0-indexed byte offset in line +----@returns (number, number) UTF-32 and UTF-16 index of the character in line {row} column {col} in buffer {buf} +-function M.character_offset(bufnr, row, col) +- local line = get_line(bufnr, row) ++---@param offset_encoding string utf-8|utf-16|utf-32|nil defaults to `offset_encoding` of first client of `buf` ++---@returns (number, number) `offset_encoding` index of the character in line {row} column {col} in buffer {buf} ++function M.character_offset(buf, row, col, offset_encoding) ++ local line = get_line(buf, row) ++ offset_encoding = offset_encoding or M._get_offset_encoding(buf) + -- If the col is past the EOL, use the line length. + if col > #line then +- return str_utfindex(line) ++ return _str_utfindex_enc(line, nil, offset_encoding) + end +- return str_utfindex(line, col) ++ return _str_utfindex_enc(line, col, offset_encoding) + end + + --- Helper function to return nested values in language server settings diff --git a/16666.patch b/16666.patch new file mode 100644 index 0000000..26bab2d --- /dev/null +++ b/16666.patch @@ -0,0 +1,60 @@ +From 1a887293ef66b51220d40f8f91dfc8245f8aeec5 Mon Sep 17 00:00:00 2001 +From: Michael Lingelbach +Date: Wed, 15 Dec 2021 09:07:23 -0800 +Subject: [PATCH 1/2] fix: do not cast offset to char_u + +* str_utf_start/end both cast the offset into the utf string +to a char_u, a pointer + long is well-defined and the cast is +unnecessary. This previously resulted in issues for offsets greater than +256. +--- + src/nvim/lua/stdlib.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c +index b5553060a1dc..e7dba1239280 100644 +--- a/src/nvim/lua/stdlib.c ++++ b/src/nvim/lua/stdlib.c +@@ -231,7 +231,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL + if (offset < 0 || offset > (intptr_t)s1_len) { + return luaL_error(lstate, "index out of range"); + } +- int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1); ++ int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1); + lua_pushinteger(lstate, tail_offset); + return 1; + } +@@ -251,7 +251,7 @@ static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL + if (offset < 0 || offset > (intptr_t)s1_len) { + return luaL_error(lstate, "index out of range"); + } +- int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1); ++ int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + offset - 1); + lua_pushinteger(lstate, tail_offset); + return 1; + } + +From fcbffcd92a2f53c224e2297c4807c14ac553bdbf Mon Sep 17 00:00:00 2001 +From: Michael Lingelbach +Date: Wed, 15 Dec 2021 09:07:23 -0800 +Subject: [PATCH 2/2] chore: improve naming consistency in str_utf_start + +--- + src/nvim/lua/stdlib.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c +index e7dba1239280..c9f82a2df127 100644 +--- a/src/nvim/lua/stdlib.c ++++ b/src/nvim/lua/stdlib.c +@@ -231,8 +231,8 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL + if (offset < 0 || offset > (intptr_t)s1_len) { + return luaL_error(lstate, "index out of range"); + } +- int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1); +- lua_pushinteger(lstate, tail_offset); ++ int head_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1); ++ lua_pushinteger(lstate, head_offset); + return 1; + } + diff --git a/16669.patch b/16669.patch index 3cd7e23..83535ee 100644 --- a/16669.patch +++ b/16669.patch @@ -1,8 +1,26 @@ +From d6b654d355dfbe8cd0fce3e73eeacbd068419416 Mon Sep 17 00:00:00 2001 +From: Rishikesh Vaishnav +Date: Wed, 15 Dec 2021 18:08:10 +0000 +Subject: [PATCH 1/2] refactor(lsp): always crop matching text from end of + incremental sync range + +When constructing incremental updates to be sent to the server, +previously we only attempted to crop matching text from the end of +the old and new `on_lines` ranges on single-line updates. +This refactor makes it so that we always attempt to crop from the end, +no matter how many lines were changed. This reduces the total size of +increments sent to the server when possible by not including +text that wasn't changed at the end of the incremental update range. +--- + runtime/lua/vim/lsp/sync.lua | 127 +++++++++--------- + .../plugin/lsp/incremental_sync_spec.lua | 18 +-- + 2 files changed, 69 insertions(+), 76 deletions(-) + diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua -index 5df2a4d144d2..c745fadf876e 100644 +index d01f45ad8f2a..c745fadf876e 100644 --- a/runtime/lua/vim/lsp/sync.lua +++ b/runtime/lua/vim/lsp/sync.lua -@@ -93,30 +93,38 @@ end +@@ -93,31 +93,38 @@ end -- utf-8 index and either the utf-16, or utf-32 index. ---@param line string the line to index into ---@param byte integer the byte idx @@ -10,54 +28,55 @@ index 5df2a4d144d2..c745fadf876e 100644 ---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8) ---@returns table byte_idx and char_idx of first change position -local function align_end_position(line, byte, offset_encoding) +- local char +- -- If on the first byte, or an empty string: the trivial case +- if byte == 1 or #line == 0 then +- char = byte +- -- Called in the case of extending an empty line "" -> "a" +- elseif byte == #line + 1 then +- char = compute_line_length(line, offset_encoding) + 1 +- else +local function align_position(line, byte, start, offset_encoding) + if byte ~= 1 and byte <= #line then -+ -- Modifying line, find the nearest utf codepoint -+ local offset = str_utf_start(line, byte) + -- Modifying line, find the nearest utf codepoint + local offset = str_utf_start(line, byte) + -+ -- If the byte does not fall on the start of the character, then + -- If the byte does not fall on the start of the character, then +- -- align to the start of the next character. + -- align to the start of the next character if end align, and start + -- of this character if start align -+ if offset < 0 then + if offset < 0 then +- byte = byte + str_utf_end(line, byte) + 1 +- end +- if byte <= #line then +- char = byte_to_utf(line, byte, offset_encoding) +- else +- char = compute_line_length(line, offset_encoding) + 1 + if start then + byte = byte + offset + else + byte = byte + str_utf_end(line, byte) + 1 + end -+ end -+ end + end +- -- Extending line, find the nearest utf codepoint for the last valid character + end + - local char -- -- If on the first byte, or an empty string: the trivial case -- if byte == 1 or #line == 0 then -- char = byte ++ local char + + -- optimize for first byte case + if byte == 1 then + char = 1 - -- Called in the case of extending an empty line "" -> "a" - elseif byte == #line + 1 then - char = compute_line_length(line, offset_encoding) + 1 - else -- -- Modifying line, find the nearest utf codepoint -- local offset = str_utf_end(line, byte) -- -- If the byte does not fall on the start of the character, then -- -- align to the start of the next character. -- if offset > 0 then -- char = byte_to_utf(line, byte, offset_encoding) + 1 -- byte = byte + offset -- else -- char = byte_to_utf(line, byte, offset_encoding) -- byte = byte + offset -- end -- -- Extending line, find the nearest utf codepoint for the last valid character ++ -- Called in the case of extending an empty line "" -> "a" ++ elseif byte == #line + 1 then ++ char = compute_line_length(line, offset_encoding) + 1 ++ else + char = byte_to_utf(line, byte, offset_encoding) - end ++ end + return byte, char end -@@ -157,18 +165,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline, +@@ -158,18 +165,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline, end -- Convert byte to codepoint if applicable @@ -71,13 +90,13 @@ index 5df2a4d144d2..c745fadf876e 100644 - char_idx = compute_line_length(prev_line, offset_encoding) + 1 - else - byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx) -- char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding) +- char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding) - end + local byte_idx, char_idx = align_position(prev_line, start_byte_idx, true, offset_encoding) -- Return the start difference (shared for new and prev lines) return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx } -@@ -209,51 +206,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline, +@@ -210,51 +206,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline, local prev_line_length = #prev_line local curr_line_length = #curr_line @@ -95,9 +114,6 @@ index 5df2a4d144d2..c745fadf876e 100644 + else + curr_line_range = curr_line_length - 1 + end -+ -+ -- Maximum number of bytes to search backwards for mismatch -+ local max_length = min(prev_line_range, curr_line_range) - -- Editing the same line - -- If the byte offset is zero, that means there is a difference on the last byte (not newline) @@ -116,6 +132,9 @@ index 5df2a4d144d2..c745fadf876e 100644 - then - break - end ++ -- Maximum number of bytes to search backwards for mismatch ++ local max_length = min(prev_line_range, curr_line_range) ++ + -- Negative offset to last shared byte between prev_line and curr_line + -- -1 offset indicates no shared byte + local byte_offset = -1 @@ -164,235 +183,43 @@ index 5df2a4d144d2..c745fadf876e 100644 return prev_end_range, curr_end_range end -diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c -index b746e03625ad..9441b88cfb98 100644 ---- a/src/nvim/lua/stdlib.c -+++ b/src/nvim/lua/stdlib.c -@@ -230,7 +230,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL - if (offset < 0 || offset > (intptr_t)s1_len) { - return luaL_error(lstate, "index out of range"); - } -- int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1); -+ int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1); - lua_pushinteger(lstate, tail_offset); - return 1; - } -@@ -250,7 +250,7 @@ static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL - if (offset < 0 || offset > (intptr_t)s1_len) { - return luaL_error(lstate, "index out of range"); - } -- int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1); -+ int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + offset - 1); - lua_pushinteger(lstate, tail_offset); - return 1; - } diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua -index 5dd34e766528..60e35c3620cc 100644 +index 4e3eddb9607e..e13a5acf3c9a 100644 --- a/test/functional/plugin/lsp/incremental_sync_spec.lua +++ b/test/functional/plugin/lsp/incremental_sync_spec.lua -@@ -164,6 +164,201 @@ describe('incremental synchronization', function() - } - test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n') - end) -+ it('deleting a line', function() -+ local expected_text_changes = { -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 0 -+ }, -+ ['end'] = { -+ character = 0, -+ line = 1 -+ } -+ }, -+ rangeLength = 12, -+ text = '' -+ } -+ } -+ test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n') -+ end) -+ it('deleting an empty line', function() -+ local expected_text_changes = { -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 0, -+ line = 2 -+ } -+ }, -+ rangeLength = 1, -+ text = '' -+ } -+ } -+ test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n') -+ end) -+ it('adding a line', function() -+ local expected_text_changes = { -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 0, -+ line = 1 -+ } -+ }, -+ rangeLength = 0, -+ text = 'hello world\n' -+ } -+ } -+ test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n') -+ end) -+ it('adding an empty line', function() -+ local expected_text_changes = { -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 0, -+ line = 1 -+ } -+ }, -+ rangeLength = 0, -+ text = '\n' -+ } -+ } -+ test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n') -+ end) -+ end) -+ describe('multi line edit', function() -+ it('deletion and insertion', function() -+ local expected_text_changes = { -+ -- delete "_fsda" from end of line 1 -+ { -+ range = { -+ ['start'] = { +@@ -327,12 +327,12 @@ describe('incremental synchronization', function() + line = 1 + }, + ['end'] = { +- character = 9, + character = 4, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 9, -+ line = 1 -+ } -+ }, -+ rangeLength = 5, -+ text = '' -+ }, -+ -- delete "hello world\n" from line 2 -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 2 -+ }, -+ ['end'] = { -+ character = 0, -+ line = 3 -+ } -+ }, -+ rangeLength = 12, -+ text = '' -+ }, -+ -- delete "1234" from beginning of line 2 -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 2 -+ }, -+ ['end'] = { -+ character = 4, -+ line = 2 -+ } -+ }, -+ rangeLength = 4, -+ text = '' -+ }, -+ -- add " asdf" to end of line 1 -+ { -+ range = { -+ ['start'] = { -+ character = 4, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 4, -+ line = 1 -+ } -+ }, -+ rangeLength = 0, -+ text = ' asdf' -+ }, -+ -- delete " asdf\n" from line 2 -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 2 -+ }, -+ ['end'] = { -+ character = 0, -+ line = 3 -+ } -+ }, -+ rangeLength = 6, -+ text = '' -+ }, -+ -- undo entire deletion -+ { -+ range = { -+ ['start'] = { -+ character = 4, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 4, -+ line = 1 -+ } -+ }, + line = 1 + } + }, +- rangeLength = 5, +- text = "_fdsa\nhello world\n1234 asdf" + rangeLength = 0, + text = "_fdsa\nhello world\n1234" -+ }, -+ -- redo entire deletion -+ { -+ range = { -+ ['start'] = { + }, + -- redo entire deletion + { +@@ -342,12 +342,12 @@ describe('incremental synchronization', function() + line = 1 + }, + ['end'] = { +- character = 9, + character = 4, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 4, -+ line = 3 -+ } -+ }, + line = 3 + } + }, +- rangeLength = 27, +- text = ' asdf' + rangeLength = 22, + text = '' -+ }, -+ } -+ local original_lines = { -+ "\\begin{document}", -+ "test_fdsa", -+ "hello world", -+ "1234 asdf", -+ "\\end{document}" -+ } -+ test_edit(original_lines, {"jf_vejjbhhdu"}, expected_text_changes, 'utf-16', '\n') -+ end) - end) - - describe('multi-operation edits', function() -@@ -265,12 +460,12 @@ describe('incremental synchronization', function() + }, + } + local original_lines = { +@@ -460,12 +460,12 @@ describe('incremental synchronization', function() line = 0 }, ['end'] = { @@ -408,84 +235,99 @@ index 5dd34e766528..60e35c3620cc 100644 }, } test_edit({"test1 test2", "test3"}, {"J", "u"}, expected_text_changes, 'utf-16', '\n') -@@ -297,6 +492,80 @@ describe('incremental synchronization', function() - } - test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n') - end) -+ it('replacing a multibyte character with matching prefix', function() -+ local expected_text_changes = { -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 1, -+ line = 1 -+ } -+ }, -+ rangeLength = 1, -+ text = '⟩' -+ } -+ } -+ -- ⟨ is e29fa8, ⟩ is e29fa9 -+ local original_lines = { -+ "\\begin{document}", -+ "⟨", -+ "\\end{document}", -+ } -+ test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n') -+ end) -+ it('replacing a multibyte character with matching suffix', function() -+ local expected_text_changes = { -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 1, -+ line = 1 -+ } -+ }, -+ rangeLength = 1, -+ text = 'ḟ' -+ } -+ } -+ -- ฟ is e0b89f, ḟ is e1b89f -+ local original_lines = { -+ "\\begin{document}", -+ "ฟ", -+ "\\end{document}", -+ } -+ test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n') -+ end) -+ it('inserting before a multibyte character', function() -+ local expected_text_changes = { -+ { -+ range = { -+ ['start'] = { -+ character = 0, -+ line = 1 -+ }, -+ ['end'] = { -+ character = 0, -+ line = 1 -+ } -+ }, -+ rangeLength = 0, -+ text = ' ' -+ } -+ } -+ local original_lines = { -+ "\\begin{document}", -+ "→", -+ "\\end{document}", -+ } -+ test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n') -+ end) - it('deleting a multibyte character from a long line', function() - local expected_text_changes = { - { + +From 7a4877f61de0616964b8f939ad132fc256235d93 Mon Sep 17 00:00:00 2001 +From: Rishikesh Vaishnav +Date: Tue, 21 Dec 2021 02:05:36 +0000 +Subject: [PATCH 2/2] Replace UTF helper functions with + `vim.lsp.util._str_utfindex_enc()`. + +--- + runtime/lua/vim/lsp/sync.lua | 49 +++--------------------------------- + 1 file changed, 4 insertions(+), 45 deletions(-) + +diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua +index c745fadf876e..f725d3272291 100644 +--- a/runtime/lua/vim/lsp/sync.lua ++++ b/runtime/lua/vim/lsp/sync.lua +@@ -44,50 +44,9 @@ local M = {} + -- local string.byte, unclear if this is necessary for JIT compilation + local str_byte = string.byte + local min = math.min +-local str_utfindex = vim.str_utfindex + local str_utf_start = vim.str_utf_start + local str_utf_end = vim.str_utf_end + +----@private +--- Given a line, byte idx, and offset_encoding convert to the +--- utf-8, utf-16, or utf-32 index. +----@param line string the line to index into +----@param byte integer the byte idx +----@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8) +---@returns integer the utf idx for the given encoding +-local function byte_to_utf(line, byte, offset_encoding) +- -- convert to 0 based indexing for str_utfindex +- byte = byte - 1 +- +- local utf_idx +- local _ +- -- Convert the byte range to utf-{8,16,32} and convert 1-based (lua) indexing to 0-based +- if offset_encoding == 'utf-16' then +- _, utf_idx = str_utfindex(line, byte) +- elseif offset_encoding == 'utf-32' then +- utf_idx, _ = str_utfindex(line, byte) +- else +- utf_idx = byte +- end +- +- -- convert to 1 based indexing +- return utf_idx + 1 +-end +- +----@private +-local function compute_line_length(line, offset_encoding) +- local length +- local _ +- if offset_encoding == 'utf-16' then +- _, length = str_utfindex(line) +- elseif offset_encoding == 'utf-32' then +- length, _ = str_utfindex(line) +- else +- length = #line +- end +- return length +-end +- + ---@private + -- Given a line, byte idx, alignment, and offset_encoding convert to the aligned + -- utf-8 index and either the utf-16, or utf-32 index. +@@ -120,9 +79,9 @@ local function align_position(line, byte, start, offset_encoding) + char = 1 + -- Called in the case of extending an empty line "" -> "a" + elseif byte == #line + 1 then +- char = compute_line_length(line, offset_encoding) + 1 ++ char = vim.lsp.util._str_utfindex_enc(line, nil, offset_encoding) + 1 + else +- char = byte_to_utf(line, byte, offset_encoding) ++ char = vim.lsp.util._str_utfindex_enc(line, byte - 1, offset_encoding) + 1 + end + + return byte, char +@@ -306,7 +265,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi + local start_line = lines[start_range.line_idx] + local range_length + if start_line and #start_line > 0 then +- range_length = compute_line_length(start_line, offset_encoding) - start_range.char_idx + 1 + line_ending_length ++ range_length = vim.lsp.util._str_utfindex_enc(start_line, nil, offset_encoding) - start_range.char_idx + 1 + line_ending_length + else + -- Length of newline character + range_length = line_ending_length +@@ -316,7 +275,7 @@ local function compute_range_length(lines, start_range, end_range, offset_encodi + for idx = start_range.line_idx + 1, end_range.line_idx - 1 do + -- Length full line plus newline character + if #lines[idx] > 0 then +- range_length = range_length + compute_line_length(lines[idx], offset_encoding) + #line_ending ++ range_length = range_length + vim.lsp.util._str_utfindex_enc(lines[idx], nil, offset_encoding) + #line_ending + else + range_length = range_length + line_ending_length + end diff --git a/16670.patch b/16670.patch new file mode 100644 index 0000000..6f68824 --- /dev/null +++ b/16670.patch @@ -0,0 +1,346 @@ +From 166c4d54efc9e8c10502db13c094d7de3f245ce5 Mon Sep 17 00:00:00 2001 +From: Rishikesh Vaishnav +Date: Wed, 15 Dec 2021 21:19:43 +0000 +Subject: [PATCH] fix(lsp): incremental sync UTF fixes (#16624) + +Aligning end position: +- fix check for preexisting UTF-8 alignment: + check `vim.str_utf_start() == 0` instead of `vim.str_utf_end() == 0` +- fix setting of byte index when not already aligned: + set to beginning of next codepoint rather than end of this one +- set char index after aligning byte index, removing unnecessary + adjustment of byte index when already aligned + +Aligning start position: +- fix setting of char index: + use aligned byte index rather than original in `byte_to_utf()` + +Add tests to cover these fixes as well as some other previously untested paths. +--- + runtime/lua/vim/lsp/sync.lua | 15 +- + .../plugin/lsp/incremental_sync_spec.lua | 269 ++++++++++++++++++ + 2 files changed, 277 insertions(+), 7 deletions(-) + +diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua +index 5df2a4d144d2..d01f45ad8f2a 100644 +--- a/runtime/lua/vim/lsp/sync.lua ++++ b/runtime/lua/vim/lsp/sync.lua +@@ -105,15 +105,16 @@ local function align_end_position(line, byte, offset_encoding) + char = compute_line_length(line, offset_encoding) + 1 + else + -- Modifying line, find the nearest utf codepoint +- local offset = str_utf_end(line, byte) ++ local offset = str_utf_start(line, byte) + -- If the byte does not fall on the start of the character, then + -- align to the start of the next character. +- if offset > 0 then +- char = byte_to_utf(line, byte, offset_encoding) + 1 +- byte = byte + offset +- else ++ if offset < 0 then ++ byte = byte + str_utf_end(line, byte) + 1 ++ end ++ if byte <= #line then + char = byte_to_utf(line, byte, offset_encoding) +- byte = byte + offset ++ else ++ char = compute_line_length(line, offset_encoding) + 1 + end + -- Extending line, find the nearest utf codepoint for the last valid character + end +@@ -167,7 +168,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline, + char_idx = compute_line_length(prev_line, offset_encoding) + 1 + else + byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx) +- char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding) ++ char_idx = byte_to_utf(prev_line, byte_idx, offset_encoding) + end + + -- Return the start difference (shared for new and prev lines) +diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua +index 5dd34e766528..4e3eddb9607e 100644 +--- a/test/functional/plugin/lsp/incremental_sync_spec.lua ++++ b/test/functional/plugin/lsp/incremental_sync_spec.lua +@@ -164,6 +164,201 @@ describe('incremental synchronization', function() + } + test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n') + end) ++ it('deleting a line', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 0 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 1 ++ } ++ }, ++ rangeLength = 12, ++ text = '' ++ } ++ } ++ test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('deleting an empty line', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 2 ++ } ++ }, ++ rangeLength = 1, ++ text = '' ++ } ++ } ++ test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('adding a line', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = 'hello world\n' ++ } ++ } ++ test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('adding an empty line', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = '\n' ++ } ++ } ++ test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ end) ++ describe('multi line edit', function() ++ it('deletion and insertion', function() ++ local expected_text_changes = { ++ -- delete "_fsda" from end of line 1 ++ { ++ range = { ++ ['start'] = { ++ character = 4, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 9, ++ line = 1 ++ } ++ }, ++ rangeLength = 5, ++ text = '' ++ }, ++ -- delete "hello world\n" from line 2 ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 2 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 3 ++ } ++ }, ++ rangeLength = 12, ++ text = '' ++ }, ++ -- delete "1234" from beginning of line 2 ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 2 ++ }, ++ ['end'] = { ++ character = 4, ++ line = 2 ++ } ++ }, ++ rangeLength = 4, ++ text = '' ++ }, ++ -- add " asdf" to end of line 1 ++ { ++ range = { ++ ['start'] = { ++ character = 4, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 4, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = ' asdf' ++ }, ++ -- delete " asdf\n" from line 2 ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 2 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 3 ++ } ++ }, ++ rangeLength = 6, ++ text = '' ++ }, ++ -- undo entire deletion ++ { ++ range = { ++ ['start'] = { ++ character = 4, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 9, ++ line = 1 ++ } ++ }, ++ rangeLength = 5, ++ text = "_fdsa\nhello world\n1234 asdf" ++ }, ++ -- redo entire deletion ++ { ++ range = { ++ ['start'] = { ++ character = 4, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 9, ++ line = 3 ++ } ++ }, ++ rangeLength = 27, ++ text = ' asdf' ++ }, ++ } ++ local original_lines = { ++ "\\begin{document}", ++ "test_fdsa", ++ "hello world", ++ "1234 asdf", ++ "\\end{document}" ++ } ++ test_edit(original_lines, {"jf_vejjbhhdu"}, expected_text_changes, 'utf-16', '\n') ++ end) + end) + + describe('multi-operation edits', function() +@@ -297,6 +492,80 @@ describe('incremental synchronization', function() + } + test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n') + end) ++ it('replacing a multibyte character with matching prefix', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 1, ++ line = 1 ++ } ++ }, ++ rangeLength = 1, ++ text = '⟩' ++ } ++ } ++ -- ⟨ is e29fa8, ⟩ is e29fa9 ++ local original_lines = { ++ "\\begin{document}", ++ "⟨", ++ "\\end{document}", ++ } ++ test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('replacing a multibyte character with matching suffix', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 1, ++ line = 1 ++ } ++ }, ++ rangeLength = 1, ++ text = 'ḟ' ++ } ++ } ++ -- ฟ is e0b89f, ḟ is e1b89f ++ local original_lines = { ++ "\\begin{document}", ++ "ฟ", ++ "\\end{document}", ++ } ++ test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n') ++ end) ++ it('inserting before a multibyte character', function() ++ local expected_text_changes = { ++ { ++ range = { ++ ['start'] = { ++ character = 0, ++ line = 1 ++ }, ++ ['end'] = { ++ character = 0, ++ line = 1 ++ } ++ }, ++ rangeLength = 0, ++ text = ' ' ++ } ++ } ++ local original_lines = { ++ "\\begin{document}", ++ "→", ++ "\\end{document}", ++ } ++ test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n') ++ end) + it('deleting a multibyte character from a long line', function() + local expected_text_changes = { + { diff --git a/nvim05.nix b/nvim05.nix index 55ac3c3..29cfb96 100644 --- a/nvim05.nix +++ b/nvim05.nix @@ -5,7 +5,7 @@ nixpkgs.config.packageOverrides = pkgs: with pkgs; { neovim-unwrapped = neovim-unwrapped.overrideDerivation (orig: { - patches = orig.patches ++ [ ./16669.patch ]; + patches = orig.patches ++ [ ./16382.patch ./16666.patch ./16670.patch ./16669.patch ]; }); neovim-qt-unwrapped = neovim-qt-unwrapped.overrideDerivation (_: {