nixos-config/16669.patch

From 0500ace6e138c3fcbd15b951f6af76a45e80f0ab Mon Sep 17 00:00:00 2001
From: Rishikesh Vaishnav <rishhvaishnav@gmail.com>
Date: Wed, 15 Dec 2021 18:08:10 +0000
Subject: [PATCH] fix(lsp): general fix/simplification of incremental sync

---
 runtime/lua/vim/lsp/sync.lua                  | 125 ++++----
 src/nvim/lua/stdlib.c                         |   4 +-
 .../plugin/lsp/incremental_sync_spec.lua      | 275 +++++++++++++++++-
 3 files changed, 332 insertions(+), 72 deletions(-)

diff --git a/runtime/lua/vim/lsp/sync.lua b/runtime/lua/vim/lsp/sync.lua
index 5df2a4d144d2..ce3c8c1b2e5d 100644
--- a/runtime/lua/vim/lsp/sync.lua
+++ b/runtime/lua/vim/lsp/sync.lua
@@ -93,30 +93,35 @@ end
 -- utf-8 index and either the utf-16, or utf-32 index.
 ---@param line string the line to index into
 ---@param byte integer the byte idx
+---@param start boolean true for start align, false for end align
 ---@param offset_encoding string utf-8|utf-16|utf-32|nil (default: utf-8)
 ---@returns table<string, int> byte_idx and char_idx of first change position
-local function align_end_position(line, byte, offset_encoding)
+local function align_position(line, byte, start, offset_encoding)
+  if byte <= #line then
+    -- Modifying line, find the nearest utf codepoint
+    local offset = str_utf_start(line, byte)
+
+    -- If the byte does not fall on the start of the character, then
+    -- align to the start of the next character if end align, and start
+    -- of this character if start align
+    if offset < 0 then
+      if start then
+        byte = byte + offset
+      else
+        byte = byte + str_utf_end(line, byte) + 1
+      end
+    end
+  end
+
   local char
-  -- If on the first byte, or an empty string: the trivial case
-  if byte == 1 or #line == 0 then
-    char = byte
+
   -- Called in the case of extending an empty line "" -> "a"
-  elseif byte == #line + 1 then
+  if byte == #line + 1 then
     char = compute_line_length(line, offset_encoding) + 1
   else
-    -- Modifying line, find the nearest utf codepoint
-    local offset = str_utf_end(line, byte)
-    -- If the byte does not fall on the start of the character, then
-    -- align to the start of the next character.
-    if offset > 0 then
-      char = byte_to_utf(line, byte, offset_encoding) + 1
-      byte = byte + offset
-    else
-      char = byte_to_utf(line, byte, offset_encoding)
-      byte = byte + offset
-    end
-    -- Extending line, find the nearest utf codepoint for the last valid character
+    char = byte_to_utf(line, byte, offset_encoding)
   end
+
   return byte, char
 end

@@ -157,18 +162,7 @@ local function compute_start_range(prev_lines, curr_lines, firstline, lastline,
   end

   -- Convert byte to codepoint if applicable
-  local char_idx
-  local byte_idx
-  if start_byte_idx == 1 or (#prev_line == 0 and start_byte_idx == 1)then
-    byte_idx = start_byte_idx
-    char_idx = 1
-  elseif start_byte_idx == #prev_line + 1 then
-    byte_idx = start_byte_idx
-    char_idx = compute_line_length(prev_line, offset_encoding)  + 1
-  else
-    byte_idx = start_byte_idx + str_utf_start(prev_line, start_byte_idx)
-    char_idx = byte_to_utf(prev_line, start_byte_idx, offset_encoding)
-  end
+  local byte_idx, char_idx = align_position(prev_line, start_byte_idx, true, offset_encoding)

   -- Return the start difference (shared for new and prev lines)
   return { line_idx = firstline, byte_idx = byte_idx, char_idx = char_idx }
@@ -209,51 +203,48 @@ local function compute_end_range(prev_lines, curr_lines, start_range, firstline,
   local prev_line_length = #prev_line
   local curr_line_length = #curr_line

-  local byte_offset = 0
+  local prev_line_range, curr_line_range
+  if start_line_idx == prev_line_idx then
+    prev_line_range = prev_line_length - start_range.byte_idx
+  -- start_line_idx < prev_line_idx
+  else
+    prev_line_range = prev_line_length - 1
+  end
+  if start_line_idx == curr_line_idx then
+    curr_line_range = curr_line_length - start_range.byte_idx
+  -- start_line_idx < curr_line_idx
+  else
+    curr_line_range = curr_line_length - 1
+  end
+
+  -- Maximum number of bytes to search backwards for mismatch
+  local max_length = min(prev_line_range, curr_line_range)

-  -- Editing the same line
-  -- If the byte offset is zero, that means there is a difference on the last byte (not newline)
-  if prev_line_idx == curr_line_idx then
-    local max_length
-    if start_line_idx == prev_line_idx then
-      -- Search until beginning of difference
-      max_length = min(prev_line_length - start_range.byte_idx, curr_line_length - start_range.byte_idx) + 1
-    else
-      max_length = min(prev_line_length, curr_line_length) + 1
-    end
-    for idx = 0, max_length do
-      byte_offset = idx
-      if
-        str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
-      then
-        break
-      end
+  -- Negative offset to last shared byte between prev_line and curr_line
+  -- -1 offset indicates no shared byte
+  local byte_offset = -1
+
+  -- Iterate from end to beginning of shortest line
+  for idx = 0, max_length do
+    byte_offset = idx
+    if
+      str_byte(prev_line, prev_line_length - byte_offset) ~= str_byte(curr_line, curr_line_length - byte_offset)
+    then
+      -- If there was a mismatched byte, need to go back to next byte (which did match)
+      byte_offset = byte_offset - 1
+      break
     end
   end

-  -- Iterate from end to beginning of shortest line
-  local prev_end_byte_idx = prev_line_length - byte_offset + 1
+  local prev_end_byte_idx = prev_line_length - byte_offset

-  -- Handle case where lines match
-  if prev_end_byte_idx == 0 then
-    prev_end_byte_idx = 1
-  end
-  local prev_byte_idx, prev_char_idx = align_end_position(prev_line, prev_end_byte_idx, offset_encoding)
+  local prev_byte_idx, prev_char_idx = align_position(prev_line, prev_end_byte_idx, false, offset_encoding)
   local prev_end_range = { line_idx = prev_line_idx, byte_idx = prev_byte_idx, char_idx = prev_char_idx }

-  local curr_end_range
-  -- Deletion event, new_range cannot be before start
-  if curr_line_idx < start_line_idx then
-    curr_end_range = { line_idx = start_line_idx, byte_idx = 1, char_idx = 1 }
-  else
-    local curr_end_byte_idx = curr_line_length - byte_offset + 1
-    -- Handle case where lines match
-    if curr_end_byte_idx == 0 then
-      curr_end_byte_idx = 1
-    end
-    local curr_byte_idx, curr_char_idx = align_end_position(curr_line, curr_end_byte_idx, offset_encoding)
-    curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }
-  end
+  local curr_end_byte_idx = curr_line_length - byte_offset
+
+  local curr_byte_idx, curr_char_idx = align_position(curr_line, curr_end_byte_idx, false, offset_encoding)
+  local curr_end_range = { line_idx = curr_line_idx, byte_idx = curr_byte_idx, char_idx = curr_char_idx }

   return prev_end_range, curr_end_range
 end
diff --git a/src/nvim/lua/stdlib.c b/src/nvim/lua/stdlib.c
index b746e03625ad..9441b88cfb98 100644
--- a/src/nvim/lua/stdlib.c
+++ b/src/nvim/lua/stdlib.c
@@ -230,7 +230,7 @@ static int nlua_str_utf_start(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
   if (offset < 0 || offset > (intptr_t)s1_len) {
     return luaL_error(lstate, "index out of range");
   }
-  int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+  int tail_offset = mb_head_off((char_u *)s1, (char_u *)s1 + offset - 1);
   lua_pushinteger(lstate, tail_offset);
   return 1;
 }
@@ -250,7 +250,7 @@ static int nlua_str_utf_end(lua_State *const lstate) FUNC_ATTR_NONNULL_ALL
   if (offset < 0 || offset > (intptr_t)s1_len) {
     return luaL_error(lstate, "index out of range");
   }
-  int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + (char_u)offset - 1);
+  int tail_offset = mb_tail_off((char_u *)s1, (char_u *)s1 + offset - 1);
   lua_pushinteger(lstate, tail_offset);
   return 1;
 }
diff --git a/test/functional/plugin/lsp/incremental_sync_spec.lua b/test/functional/plugin/lsp/incremental_sync_spec.lua
index 5dd34e766528..60e35c3620cc 100644
--- a/test/functional/plugin/lsp/incremental_sync_spec.lua
+++ b/test/functional/plugin/lsp/incremental_sync_spec.lua
@@ -164,6 +164,201 @@ describe('incremental synchronization', function()
       }
       test_edit({"a"}, {"rb"}, expected_text_changes, 'utf-16', '\n')
     end)
+    it('deleting a line', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 0
+            },
+            ['end'] = {
+              character = 0,
+              line = 1
+            }
+          },
+          rangeLength = 12,
+          text = ''
+        }
+      }
+      test_edit({"hello world"}, {"dd"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('deleting an empty line', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 0,
+              line = 2
+            }
+          },
+          rangeLength = 1,
+          text = ''
+        }
+      }
+      test_edit({"hello world", ""}, {"jdd"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('adding a line', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 0,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = 'hello world\n'
+        }
+      }
+      test_edit({"hello world"}, {"yyp"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('adding an empty line', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 0,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = '\n'
+        }
+      }
+      test_edit({"hello world"}, {"o"}, expected_text_changes, 'utf-16', '\n')
+    end)
+  end)
+  describe('multi line edit', function()
+    it('deletion and insertion', function()
+      local expected_text_changes = {
+        -- delete "_fsda" from end of line 1
+        {
+          range = {
+            ['start'] = {
+              character = 4,
+              line = 1
+            },
+            ['end'] = {
+              character = 9,
+              line = 1
+            }
+          },
+          rangeLength = 5,
+          text = ''
+        },
+        -- delete "hello world\n" from line 2
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 2
+            },
+            ['end'] = {
+              character = 0,
+              line = 3
+            }
+          },
+          rangeLength = 12,
+          text = ''
+        },
+        -- delete "1234" from beginning of line 2
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 2
+            },
+            ['end'] = {
+              character = 4,
+              line = 2
+            }
+          },
+          rangeLength = 4,
+          text = ''
+        },
+        -- add " asdf" to end of line 1
+        {
+          range = {
+            ['start'] = {
+              character = 4,
+              line = 1
+            },
+            ['end'] = {
+              character = 4,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = ' asdf'
+        },
+        -- delete " asdf\n" from line 2
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 2
+            },
+            ['end'] = {
+              character = 0,
+              line = 3
+            }
+          },
+          rangeLength = 6,
+          text = ''
+        },
+        -- undo entire deletion
+        {
+          range = {
+            ['start'] = {
+              character = 4,
+              line = 1
+            },
+            ['end'] = {
+              character = 4,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = "_fdsa\nhello world\n1234"
+        },
+        -- redo entire deletion
+        {
+          range = {
+            ['start'] = {
+              character = 4,
+              line = 1
+            },
+            ['end'] = {
+              character = 4,
+              line = 3
+            }
+          },
+          rangeLength = 22,
+          text = ''
+        },
+      }
+      local original_lines = {
+        "\\begin{document}",
+        "test_fdsa",
+        "hello world",
+        "1234 asdf",
+        "\\end{document}"
+      }
+      test_edit(original_lines, {"jf_vejjbhhdu<C-R>"}, expected_text_changes, 'utf-16', '\n')
+    end)
   end)

   describe('multi-operation edits', function()
@@ -265,12 +460,12 @@ describe('incremental synchronization', function()
               line = 0
             },
             ['end'] = {
-              character = 17,
+              character = 12,
               line = 0
             }
           },
-          rangeLength = 6,
-          text = '\ntest3'
+          rangeLength = 1,
+          text = '\n'
         },
       }
       test_edit({"test1 test2", "test3"}, {"J", "u"}, expected_text_changes, 'utf-16', '\n')
@@ -297,6 +492,80 @@ describe('incremental synchronization', function()
       }
       test_edit({"🔥"}, {"x"}, expected_text_changes, 'utf-16', '\n')
     end)
+    it('replacing a multibyte character with matching prefix', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 1,
+              line = 1
+            }
+          },
+          rangeLength = 1,
+          text = '⟩'
+        }
+      }
+      -- ⟨ is e29fa8, ⟩ is e29fa9
+      local original_lines = {
+        "\\begin{document}",
+        "⟨",
+        "\\end{document}",
+      }
+      test_edit(original_lines, {"jr⟩"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('replacing a multibyte character with matching suffix', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 1,
+              line = 1
+            }
+          },
+          rangeLength = 1,
+          text = 'ḟ'
+        }
+      }
+      -- ฟ is e0b89f, ḟ is e1b89f
+      local original_lines = {
+        "\\begin{document}",
+        "ฟ",
+        "\\end{document}",
+      }
+      test_edit(original_lines, {"jrḟ"}, expected_text_changes, 'utf-16', '\n')
+    end)
+    it('inserting before a multibyte character', function()
+      local expected_text_changes = {
+        {
+          range = {
+            ['start'] = {
+              character = 0,
+              line = 1
+            },
+            ['end'] = {
+              character = 0,
+              line = 1
+            }
+          },
+          rangeLength = 0,
+          text = ' '
+        }
+      }
+      local original_lines = {
+        "\\begin{document}",
+        "→",
+        "\\end{document}",
+      }
+      test_edit(original_lines, {"ji "}, expected_text_changes, 'utf-16', '\n')
+    end)
     it('deleting a multibyte character from a long line', function()
       local expected_text_changes = {
         {