Add unicode LSP sync patch for neovim.

2021-12-15 19:26:30 +01:00 · 2021-12-15 19:26:30 +01:00 · f938fd473e
commit f938fd473e
parent c147fb9a69
2 changed files with 504 additions and 0 deletions
--- a/16669.patch
+++ b/16669.patch
@ -0,0 +1,500 @@
+From 0500ace6e138c3fcbd15b951f6af76a45e80f0ab Mon Sep 17 00:00:00 2001
+From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
+Date: Wed, 15 Dec 2021 18:08:10 +0000
+Subject: [PATCH] fix(lsp): general fix/simplification of incremental sync
+
+---
+ runtime/lua/vim/lsp/sync.lua                  | 125 ++++----
+ src/nvim/lua/stdlib.c                         |   4 +-
+ .../plugin/lsp/incremental_sync_spec.lua      | 275 +++++++++++++++++-
+ 3 files changed, 332 insertions(+), 72 deletions(-)
+
+diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
+index 5df2a4d144d2..ce3c8c1b2e5d 100644
+--- a/runtime/lua/vim/lsp/sync.lua
+++ b/runtime/lua/vim/lsp/sync.lua
+@@ -93,30 +93,35 @@ end
+ -- utf-8 index and either the utf-16, or utf-32 index.
+ ---@param line string the line to index into
+ ---@param byte integer the byte idx
+---@param start boolean true for start align, false for end align
+ ---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
+ ---@returns table<string, int> byte_idx and char_idx of first change position
+-local function align_end_position(line, byte, offset_encoding)
+local function align_position(line, byte, start, offset_encoding)
+  if byte <= #line then
+    -- Modifying line, find the nearest utf codepoint
+    local offset = str_utf_start(line, byte)
+
+    -- If the byte does not fall on the start of the character, then
+    -- align to the start of the next character if end align, and start
+    -- of this character if start align
+    if offset < 0 then
+      if start then
+        byte = byte + offset
+      else
+        byte = byte + str_utf_end(line, byte) + 1
+      end
+    end
+  end
+
+   local char
+-  -- If on the first byte, or an empty string: the trivial case
+-  if byte == 1 or #line == 0 then
+-    char = byte
+
+   -- Called in the case of extending an empty line "" -> "a"
+-  elseif byte == #line + 1 then
+  if byte == #line + 1 then
+     char = compute_line_length(line, offset_encoding) + 1
+   else
+-    -- Modifying line, find the nearest utf codepoint
+-    local offset = str_utf_end(line, byte)
+-    -- If the byte does not fall on the start of the character, then
+-    -- align to the start of the next character.
+-    if offset > 0 then
+-      char = byte_to_utf(line, byte, offset_encoding) + 1
+-      byte = byte + offset
+-    else
+-      char = byte_to_utf(line, byte, offset_encoding)
+-      byte = byte + offset
+-    end
+-    -- Extending line, find the nearest utf codepoint for the last valid character
+    char = byte_to_utf(line, byte, offset_encoding)
+   end
+
+   return byte, char
+ end
+ 
+@@ -157,18 +162,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
+   end
+ 
+   -- Convert byte to codepoint if applicable
+-  local char_idx
+-  local byte_idx
+-  if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then
+-    byte_idx = start_byte_idx
+-    char_idx = 1
+-  elseif start_byte_idx == #prev_line + 1 then
+-    byte_idx = start_byte_idx
+-    char_idx = compute_line_length(prev_line, offset_encoding)  + 1
+-  else
+-    byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
+-    char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
+-  end
+  local byte_idx, char_idx = align_position(prev_line, start_byte_idx, true, offset_encoding)
+ 
+   -- Return the start difference (shared for new and prev lines)
+   return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
+@@ -209,51 +203,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
+   local prev_line_length = #prev_line
+   local curr_line_length = #curr_line
+ 
+-  local byte_offset = 0
+  local prev_line_range, curr_line_range
+  if start_line_idx == prev_line_idx then
+    prev_line_range = prev_line_length - start_range.byte_idx
+  -- start_line_idx < prev_line_idx
+  else
+    prev_line_range = prev_line_length - 1
+  end
+  if start_line_idx == curr_line_idx then
+    curr_line_range = curr_line_length - start_range.byte_idx
+  -- start_line_idx < curr_line_idx
+  else
+    curr_line_range = curr_line_length - 1
+  end
+
+  -- Maximum number of bytes to search backwards for mismatch
+  local max_length = min(prev_line_range, curr_line_range)
+ 
+-  -- Editing the same line
+-  -- If the byte offset is zero, that means there is a difference on the last byte (not newline)
+-  if prev_line_idx == curr_line_idx then
+-    local max_length
+-    if start_line_idx == prev_line_idx then
+-      -- Search until beginning of difference
+-      max_length = min(prev_line_length - start_range.byte_idx, curr_line_length - start_range.byte_idx) + 1
+-    else
+-      max_length = min(prev_line_length, curr_line_length) + 1
+-    end
+-    for idx = 0, max_length do
+-      byte_offset = idx
+-      if
+-        str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
+-      then
+-        break
+-      end
+  -- Negative offset to last shared byte between prev_line and curr_line
+  -- -1 offset indicates no shared byte
+  local byte_offset = -1
+
+  -- Iterate from end to beginning of shortest line
+  for idx = 0, max_length do
+    byte_offset = idx
+    if
+      str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
+    then
+      -- If there was a mismatched byte, need to go back to next byte (which did match)
+      byte_offset = byte_offset - 1
+      break
+     end
+   end
+ 
+-  -- Iterate from end to beginning of shortest line
+-  local prev_end_byte_idx = prev_line_length - byte_offset + 1
+  local prev_end_byte_idx = prev_line_length - byte_offset
+ 
+-  -- Handle case where lines match
+-  if prev_end_byte_idx == 0 then
+-    prev_end_byte_idx = 1
+-  end
+-  local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding)
+  local prev_byte_idx, prev_char_idx = align_position(prev_line, prev_end_byte_idx, false, offset_encoding)
+   local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }
+ 
+-  local curr_end_range
+-  -- Deletion event, new_range cannot be before start
+-  if curr_line_idx < start_line_idx then
+-    curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 }
+-  else
+-    local curr_end_byte_idx = curr_line_length - byte_offset + 1
+-    -- Handle case where lines match
+-    if curr_end_byte_idx == 0 then
+-      curr_end_byte_idx = 1
+-    end
+-    local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding)
+-    curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
+-  end
+  local curr_end_byte_idx = curr_line_length - byte_offset
+
+  local curr_byte_idx, curr_char_idx = align_position(curr_line, curr_end_byte_idx, false, offset_encoding)
+  local curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
+ 
+   return prev_end_range, curr_end_range
+ end
+diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c
+index b746e03625ad..9441b88cfb98 100644
+--- a/src/nvim/lua/stdlib.c
+++ b/src/nvim/lua/stdlib.c
+@@ -230,7 +230,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
+   if (offset < 0 || offset > (intptr_t)s1_len) {
+     return luaL_error(lstate, "index out of range");
+   }
+-  int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+  int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1);
+   lua_pushinteger(lstate, tail_offset);
+   return 1;
+ }
+@@ -250,7 +250,7 @@ static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
+   if (offset < 0 || offset > (intptr_t)s1_len) {
+     return luaL_error(lstate, "index out of range");
+   }
+-  int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+  int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + offset - 1);
+   lua_pushinteger(lstate, tail_offset);
+   return 1;
+ }
+diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua
+index 5dd34e766528..60e35c3620cc 100644
+--- a/test/functional/plugin/lsp/incremental_sync_spec.lua
+++ b/test/functional/plugin/lsp/incremental_sync_spec.lua
+@@ -164,6 +164,201 @@ describe('incremental synchronization', function()
+       }
+       test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n')
+     end)
+    it('deleting a line', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 0
+            },
+            ['end'] = {
+              character = 0,
+              line = 1
+            }
+          },
+          rangeLength = 12,
+          text = ''
+        }
+      }
+      test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('deleting an empty line', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 0,
+              line = 2
+            }
+          },
+          rangeLength = 1,
+          text = ''
+        }
+      }
+      test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('adding a line', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 0,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = 'hello world\n'
+        }
+      }
+      test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('adding an empty line', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 0,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = '\n'
+        }
+      }
+      test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n')
+    end)
+  end)
+  describe('multi line edit', function()
+    it('deletion and insertion', function()
+      local expected_text_changes = {
+        -- delete "_fsda" from end of line 1
+        {
+          range = {
+            ['start'] = {
+              character = 4,
+              line = 1
+            },
+            ['end'] = {
+              character = 9,
+              line = 1
+            }
+          },
+          rangeLength = 5,
+          text = ''
+        },
+        -- delete "hello world\n" from line 2
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 2
+            },
+            ['end'] = {
+              character = 0,
+              line = 3
+            }
+          },
+          rangeLength = 12,
+          text = ''
+        },
+        -- delete "1234" from beginning of line 2
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 2
+            },
+            ['end'] = {
+              character = 4,
+              line = 2
+            }
+          },
+          rangeLength = 4,
+          text = ''
+        },
+        -- add " asdf" to end of line 1
+        {
+          range = {
+            ['start'] = {
+              character = 4,
+              line = 1
+            },
+            ['end'] = {
+              character = 4,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = ' asdf'
+        },
+        -- delete " asdf\n" from line 2 
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 2
+            },
+            ['end'] = {
+              character = 0,
+              line = 3
+            }
+          },
+          rangeLength = 6,
+          text = ''
+        },
+        -- undo entire deletion
+        {
+          range = {
+            ['start'] = {
+              character = 4,
+              line = 1
+            },
+            ['end'] = {
+              character = 4,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = "_fdsa\nhello world\n1234"
+        },
+        -- redo entire deletion
+        {
+          range = {
+            ['start'] = {
+              character = 4,
+              line = 1
+            },
+            ['end'] = {
+              character = 4,
+              line = 3
+            }
+          },
+          rangeLength = 22,
+          text = ''
+        },
+      }
+      local original_lines = {
+        "\\begin{document}",
+        "test_fdsa",
+        "hello world",
+        "1234 asdf",
+        "\\end{document}"
+      }
+      test_edit(original_lines, {"jf_vejjbhhdu<C-R>"}, expected_text_changes, 'utf-16', '\n')
+    end)
+   end)
+ 
+   describe('multi-operation edits', function()
+@@ -265,12 +460,12 @@ describe('incremental synchronization', function()
+               line = 0
+             },
+             ['end'] = {
+-              character = 17,
+              character = 12,
+               line = 0
+             }
+           },
+-          rangeLength = 6,
+-          text = '\ntest3'
+          rangeLength = 1,
+          text = '\n'
+         },
+       }
+       test_edit({"test1 test2", "test3"}, {"J", "u"}, expected_text_changes, 'utf-16', '\n')
+@@ -297,6 +492,80 @@ describe('incremental synchronization', function()
+       }
+       test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
+     end)
+    it('replacing a multibyte character with matching prefix', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 1,
+              line = 1
+            }
+          },
+          rangeLength = 1,
+          text = '⟩'
+        }
+      }
+      -- ⟨ is e29fa8, ⟩ is e29fa9
+      local original_lines = {
+        "\\begin{document}",
+        "⟨",
+        "\\end{document}",
+      }
+      test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('replacing a multibyte character with matching suffix', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 1,
+              line = 1
+            }
+          },
+          rangeLength = 1,
+          text = 'ḟ'
+        }
+      }
+      -- ฟ is e0b89f, ḟ is e1b89f
+      local original_lines = {
+        "\\begin{document}",
+        "ฟ",
+        "\\end{document}",
+      }
+      test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('inserting before a multibyte character', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 0,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = ' '
+        }
+      }
+      local original_lines = {
+        "\\begin{document}",
+        "→",
+        "\\end{document}",
+      }
+      test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n')
+    end)
+     it('deleting a multibyte character from a long line', function()
+       local expected_text_changes = {
+         {
--- a/nvim05.nix
+++ b/nvim05.nix
@ -4,6 +4,10 @@
  # nixpkgs.overlays = [ inputs.neovim-overlay.overlay ];

  nixpkgs.config.packageOverrides = pkgs: with pkgs; {
+    neovim-unwrapped = neovim-unwrapped.overrideDerivation (orig: {
+      patches = orig.patches ++ [ ./16669.patch ];
+    });
+
    neovim-qt-unwrapped = neovim-qt-unwrapped.overrideDerivation (_: {
      version = "0.2.17.9999";
      src = fetchFromGitHub {