root/src/treesit.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init_treesit_functions
  2. load_tree_sitter_if_necessary
  3. treesit_calloc_wrapper
  4. treesit_initialize
  5. treesit_symbol_to_c_name
  6. treesit_find_override_name
  7. treesit_load_language_push_for_each_suffix
  8. treesit_load_language
  9. DEFUN
  10. DEFUN
  11. treesit_check_parser
  12. treesit_tree_edit_1
  13. treesit_record_change
  14. treesit_sync_visible_region
  15. treesit_check_buffer_size
  16. treesit_call_after_change_functions
  17. treesit_ensure_parsed
  18. treesit_read_buffer
  19. make_treesit_parser
  20. make_treesit_node
  21. make_treesit_query
  22. treesit_delete_parser
  23. treesit_delete_query
  24. treesit_named_node_p
  25. treesit_query_error_to_string
  26. treesit_compose_query_signal_data
  27. treesit_ensure_query_compiled
  28. DEFUN
  29. DEFUN
  30. DEFUN
  31. DEFUN
  32. DEFUN
  33. DEFUN
  34. DEFUN
  35. DEFUN
  36. DEFUN
  37. DEFUN
  38. treesit_parser_live_p
  39. DEFUN
  40. treesit_check_range_argument
  41. treesit_make_ranges
  42. DEFUN
  43. DEFUN
  44. treesit_check_positive_integer
  45. treesit_check_node
  46. treesit_check_position
  47. treesit_node_uptodate_p
  48. DEFUN
  49. DEFUN
  50. DEFUN
  51. DEFUN
  52. DEFUN
  53. treesit_node_eq
  54. DEFUN
  55. DEFUN
  56. treesit_predicates_for_pattern
  57. treesit_predicate_capture_name_to_node
  58. treesit_predicate_capture_name_to_text
  59. treesit_predicate_equal
  60. treesit_predicate_match
  61. treesit_predicate_pred
  62. treesit_eval_predicates
  63. treesit_assume_true
  64. treesit_cursor_helper_1
  65. treesit_cursor_helper
  66. treesit_traverse_sibling_helper
  67. treesit_traverse_child_helper
  68. treesit_traverse_match_predicate
  69. treesit_search_dfs
  70. treesit_search_forward
  71. treesit_traverse_cleanup_cursor
  72. treesit_build_sparse_tree
  73. DEFUN
  74. DEFUN
  75. syms_of_treesit

     1 /* Tree-sitter integration for GNU Emacs.
     2 
     3 Copyright (C) 2021-2023 Free Software Foundation, Inc.
     4 
     5 Maintainer: Yuan Fu <casouri@gmail.com>
     6 
     7 This file is part of GNU Emacs.
     8 
     9 GNU Emacs is free software: you can redistribute it and/or modify
    10 it under the terms of the GNU General Public License as published by
    11 the Free Software Foundation, either version 3 of the License, or (at
    12 your option) any later version.
    13 
    14 GNU Emacs is distributed in the hope that it will be useful,
    15 but WITHOUT ANY WARRANTY; without even the implied warranty of
    16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    17 GNU General Public License for more details.
    18 
    19 You should have received a copy of the GNU General Public License
    20 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    21 
    22 #include <config.h>
    23 #include "lisp.h"
    24 #include "buffer.h"
    25 
    26 #include "treesit.h"
    27 
    28 #if HAVE_TREE_SITTER
    29 
    30 
    31 /* Dynamic loading of libtree-sitter.  */
    32 
    33 #ifdef WINDOWSNT
    34 # include "w32common.h"
    35 
    36 /* In alphabetical order.  */
    37 #undef ts_language_version
    38 #undef ts_node_child
    39 #undef ts_node_child_by_field_name
    40 #undef ts_node_child_count
    41 #undef ts_node_descendant_for_byte_range
    42 #undef ts_node_end_byte
    43 #undef ts_node_eq
    44 #undef ts_node_field_name_for_child
    45 #undef ts_node_has_error
    46 #undef ts_node_is_extra
    47 #undef ts_node_is_missing
    48 #undef ts_node_is_named
    49 #undef ts_node_is_null
    50 #undef ts_node_named_child
    51 #undef ts_node_named_child_count
    52 #undef ts_node_named_descendant_for_byte_range
    53 #undef ts_node_next_named_sibling
    54 #undef ts_node_next_sibling
    55 #undef ts_node_prev_named_sibling
    56 #undef ts_node_prev_sibling
    57 #undef ts_node_start_byte
    58 #undef ts_node_string
    59 #undef ts_node_type
    60 #undef ts_parser_delete
    61 #undef ts_parser_included_ranges
    62 #undef ts_parser_language
    63 #undef ts_parser_new
    64 #undef ts_parser_parse
    65 #undef ts_parser_set_included_ranges
    66 #undef ts_parser_set_language
    67 #undef ts_query_capture_name_for_id
    68 #undef ts_query_cursor_delete
    69 #undef ts_query_cursor_exec
    70 #undef ts_query_cursor_new
    71 #undef ts_query_cursor_next_match
    72 #undef ts_query_cursor_set_byte_range
    73 #undef ts_query_delete
    74 #undef ts_query_new
    75 #undef ts_query_pattern_count
    76 #undef ts_query_predicates_for_pattern
    77 #undef ts_query_string_value_for_id
    78 #undef ts_set_allocator
    79 #undef ts_tree_cursor_copy
    80 #undef ts_tree_cursor_current_node
    81 #undef ts_tree_cursor_delete
    82 #undef ts_tree_cursor_goto_first_child
    83 #undef ts_tree_cursor_goto_next_sibling
    84 #undef ts_tree_cursor_goto_parent
    85 #undef ts_tree_cursor_new
    86 #undef ts_tree_delete
    87 #undef ts_tree_edit
    88 #undef ts_tree_get_changed_ranges
    89 #undef ts_tree_root_node
    90 
    91 DEF_DLL_FN (uint32_t, ts_language_version, (const TSLanguage *));
    92 DEF_DLL_FN (TSNode, ts_node_child, (TSNode, uint32_t));
    93 DEF_DLL_FN (TSNode, ts_node_child_by_field_name,
    94             (TSNode, const char *, uint32_t));
    95 DEF_DLL_FN (uint32_t, ts_node_child_count, (TSNode));
    96 DEF_DLL_FN (TSNode, ts_node_descendant_for_byte_range,
    97             (TSNode, uint32_t, uint32_t));
    98 DEF_DLL_FN (uint32_t, ts_node_end_byte, (TSNode));
    99 DEF_DLL_FN (bool, ts_node_eq, (TSNode, TSNode));
   100 DEF_DLL_FN (const char *, ts_node_field_name_for_child, (TSNode, uint32_t));
   101 DEF_DLL_FN (bool, ts_node_has_error, (TSNode));
   102 DEF_DLL_FN (bool, ts_node_is_extra, (TSNode));
   103 DEF_DLL_FN (bool, ts_node_is_missing, (TSNode));
   104 DEF_DLL_FN (bool, ts_node_is_named, (TSNode));
   105 DEF_DLL_FN (bool, ts_node_is_null, (TSNode));
   106 DEF_DLL_FN (TSNode, ts_node_named_child, (TSNode, uint32_t));
   107 DEF_DLL_FN (uint32_t, ts_node_named_child_count, (TSNode));
   108 DEF_DLL_FN (TSNode, ts_node_named_descendant_for_byte_range,
   109             (TSNode, uint32_t, uint32_t));
   110 DEF_DLL_FN (TSNode, ts_node_next_named_sibling, (TSNode));
   111 DEF_DLL_FN (TSNode, ts_node_next_sibling, (TSNode));
   112 DEF_DLL_FN (TSNode, ts_node_prev_named_sibling, (TSNode));
   113 DEF_DLL_FN (TSNode, ts_node_prev_sibling, (TSNode));
   114 DEF_DLL_FN (uint32_t, ts_node_start_byte, (TSNode));
   115 DEF_DLL_FN (char *, ts_node_string, (TSNode));
   116 DEF_DLL_FN (const char *, ts_node_type, (TSNode));
   117 DEF_DLL_FN (void, ts_parser_delete, (TSParser *));
   118 DEF_DLL_FN (const TSRange *, ts_parser_included_ranges,
   119             (const TSParser *, uint32_t *));
   120 DEF_DLL_FN (const TSLanguage *, ts_parser_language, (const TSParser *));
   121 DEF_DLL_FN (TSParser *, ts_parser_new, (void));
   122 DEF_DLL_FN (TSTree *, ts_parser_parse, (TSParser *, const TSTree *, TSInput));
   123 DEF_DLL_FN (bool, ts_parser_set_included_ranges,
   124             (TSParser *, const TSRange *, uint32_t));
   125 DEF_DLL_FN (bool, ts_parser_set_language, (TSParser *, const TSLanguage *));
   126 DEF_DLL_FN (const char *, ts_query_capture_name_for_id,
   127             (const TSQuery *, uint32_t, uint32_t *));
   128 DEF_DLL_FN (void, ts_query_cursor_delete, (TSQueryCursor *));
   129 DEF_DLL_FN (void, ts_query_cursor_exec,
   130             (TSQueryCursor *, const TSQuery *, TSNode));
   131 DEF_DLL_FN (TSQueryCursor *, ts_query_cursor_new, (void));
   132 DEF_DLL_FN (bool, ts_query_cursor_next_match,
   133             (TSQueryCursor *, TSQueryMatch *));
   134 DEF_DLL_FN (void, ts_query_cursor_set_byte_range,
   135             (TSQueryCursor *, uint32_t, uint32_t));
   136 DEF_DLL_FN (void, ts_query_delete, (TSQuery *));
   137 DEF_DLL_FN (TSQuery *, ts_query_new,
   138             (const TSLanguage *, const char *, uint32_t, uint32_t *, TSQueryError *));
   139 DEF_DLL_FN (uint32_t, ts_query_pattern_count, (const TSQuery *));
   140 DEF_DLL_FN (const TSQueryPredicateStep *, ts_query_predicates_for_pattern,
   141             ( const TSQuery *, uint32_t, uint32_t *));
   142 DEF_DLL_FN (const char *, ts_query_string_value_for_id,
   143             (const TSQuery *, uint32_t, uint32_t *));
   144 DEF_DLL_FN (void, ts_set_allocator,
   145             (void *(*)(size_t), void *(*)(size_t, size_t), void *(*)(void *, size_t), void (*)(void *)));
   146 DEF_DLL_FN (TSTreeCursor, ts_tree_cursor_copy, (const TSTreeCursor *));
   147 DEF_DLL_FN (TSNode, ts_tree_cursor_current_node, (const TSTreeCursor *));
   148 DEF_DLL_FN (void, ts_tree_cursor_delete, (const TSTreeCursor *));
   149 DEF_DLL_FN (bool, ts_tree_cursor_goto_first_child, (TSTreeCursor *));
   150 DEF_DLL_FN (bool, ts_tree_cursor_goto_next_sibling, (TSTreeCursor *));
   151 DEF_DLL_FN (bool, ts_tree_cursor_goto_parent, (TSTreeCursor *));
   152 DEF_DLL_FN (TSTreeCursor, ts_tree_cursor_new, (TSNode));
   153 DEF_DLL_FN (void, ts_tree_delete, (TSTree *));
   154 DEF_DLL_FN (void, ts_tree_edit, (TSTree *, const TSInputEdit *));
   155 DEF_DLL_FN (TSRange *, ts_tree_get_changed_ranges,
   156             (const TSTree *, const TSTree *, uint32_t *));
   157 DEF_DLL_FN (TSNode, ts_tree_root_node, (const TSTree *));
   158 
   159 static bool
   160 init_treesit_functions (void)
   161 {
   162   HMODULE library = w32_delayed_load (Qtree_sitter);
   163 
   164   if (!library)
   165     return false;
   166 
   167   LOAD_DLL_FN (library, ts_language_version);
   168   LOAD_DLL_FN (library, ts_node_child);
   169   LOAD_DLL_FN (library, ts_node_child_by_field_name);
   170   LOAD_DLL_FN (library, ts_node_child_count);
   171   LOAD_DLL_FN (library, ts_node_descendant_for_byte_range);
   172   LOAD_DLL_FN (library, ts_node_end_byte);
   173   LOAD_DLL_FN (library, ts_node_eq);
   174   LOAD_DLL_FN (library, ts_node_field_name_for_child);
   175   LOAD_DLL_FN (library, ts_node_has_error);
   176   LOAD_DLL_FN (library, ts_node_is_extra);
   177   LOAD_DLL_FN (library, ts_node_is_missing);
   178   LOAD_DLL_FN (library, ts_node_is_named);
   179   LOAD_DLL_FN (library, ts_node_is_null);
   180   LOAD_DLL_FN (library, ts_node_named_child);
   181   LOAD_DLL_FN (library, ts_node_named_child_count);
   182   LOAD_DLL_FN (library, ts_node_named_descendant_for_byte_range);
   183   LOAD_DLL_FN (library, ts_node_next_named_sibling);
   184   LOAD_DLL_FN (library, ts_node_next_sibling);
   185   LOAD_DLL_FN (library, ts_node_prev_named_sibling);
   186   LOAD_DLL_FN (library, ts_node_prev_sibling);
   187   LOAD_DLL_FN (library, ts_node_start_byte);
   188   LOAD_DLL_FN (library, ts_node_string);
   189   LOAD_DLL_FN (library, ts_node_type);
   190   LOAD_DLL_FN (library, ts_parser_delete);
   191   LOAD_DLL_FN (library, ts_parser_included_ranges);
   192   LOAD_DLL_FN (library, ts_parser_language);
   193   LOAD_DLL_FN (library, ts_parser_new);
   194   LOAD_DLL_FN (library, ts_parser_parse);
   195   LOAD_DLL_FN (library, ts_parser_set_included_ranges);
   196   LOAD_DLL_FN (library, ts_parser_set_language);
   197   LOAD_DLL_FN (library, ts_query_capture_name_for_id);
   198   LOAD_DLL_FN (library, ts_query_cursor_delete);
   199   LOAD_DLL_FN (library, ts_query_cursor_exec);
   200   LOAD_DLL_FN (library, ts_query_cursor_new);
   201   LOAD_DLL_FN (library, ts_query_cursor_next_match);
   202   LOAD_DLL_FN (library, ts_query_cursor_set_byte_range);
   203   LOAD_DLL_FN (library, ts_query_delete);
   204   LOAD_DLL_FN (library, ts_query_new);
   205   LOAD_DLL_FN (library, ts_query_pattern_count);
   206   LOAD_DLL_FN (library, ts_query_predicates_for_pattern);
   207   LOAD_DLL_FN (library, ts_query_string_value_for_id);
   208   LOAD_DLL_FN (library, ts_set_allocator);
   209   LOAD_DLL_FN (library, ts_tree_cursor_copy);
   210   LOAD_DLL_FN (library, ts_tree_cursor_current_node);
   211   LOAD_DLL_FN (library, ts_tree_cursor_delete);
   212   LOAD_DLL_FN (library, ts_tree_cursor_goto_first_child);
   213   LOAD_DLL_FN (library, ts_tree_cursor_goto_next_sibling);
   214   LOAD_DLL_FN (library, ts_tree_cursor_goto_parent);
   215   LOAD_DLL_FN (library, ts_tree_cursor_new);
   216   LOAD_DLL_FN (library, ts_tree_delete);
   217   LOAD_DLL_FN (library, ts_tree_edit);
   218   LOAD_DLL_FN (library, ts_tree_get_changed_ranges);
   219   LOAD_DLL_FN (library, ts_tree_root_node);
   220 
   221   return true;
   222 }
   223 
   224 #define ts_language_version fn_ts_language_version
   225 #define ts_node_child fn_ts_node_child
   226 #define ts_node_child_by_field_name fn_ts_node_child_by_field_name
   227 #define ts_node_child_count fn_ts_node_child_count
   228 #define ts_node_descendant_for_byte_range fn_ts_node_descendant_for_byte_range
   229 #define ts_node_end_byte fn_ts_node_end_byte
   230 #define ts_node_eq fn_ts_node_eq
   231 #define ts_node_field_name_for_child fn_ts_node_field_name_for_child
   232 #define ts_node_has_error fn_ts_node_has_error
   233 #define ts_node_is_extra fn_ts_node_is_extra
   234 #define ts_node_is_missing fn_ts_node_is_missing
   235 #define ts_node_is_named fn_ts_node_is_named
   236 #define ts_node_is_null fn_ts_node_is_null
   237 #define ts_node_named_child fn_ts_node_named_child
   238 #define ts_node_named_child_count fn_ts_node_named_child_count
   239 #define ts_node_named_descendant_for_byte_range fn_ts_node_named_descendant_for_byte_range
   240 #define ts_node_next_named_sibling fn_ts_node_next_named_sibling
   241 #define ts_node_next_sibling fn_ts_node_next_sibling
   242 #define ts_node_prev_named_sibling fn_ts_node_prev_named_sibling
   243 #define ts_node_prev_sibling fn_ts_node_prev_sibling
   244 #define ts_node_start_byte fn_ts_node_start_byte
   245 #define ts_node_string fn_ts_node_string
   246 #define ts_node_type fn_ts_node_type
   247 #define ts_parser_delete fn_ts_parser_delete
   248 #define ts_parser_included_ranges fn_ts_parser_included_ranges
   249 #define ts_parser_language fn_ts_parser_language
   250 #define ts_parser_new fn_ts_parser_new
   251 #define ts_parser_parse fn_ts_parser_parse
   252 #define ts_parser_set_included_ranges fn_ts_parser_set_included_ranges
   253 #define ts_parser_set_language fn_ts_parser_set_language
   254 #define ts_query_capture_name_for_id fn_ts_query_capture_name_for_id
   255 #define ts_query_cursor_delete fn_ts_query_cursor_delete
   256 #define ts_query_cursor_exec fn_ts_query_cursor_exec
   257 #define ts_query_cursor_new fn_ts_query_cursor_new
   258 #define ts_query_cursor_next_match fn_ts_query_cursor_next_match
   259 #define ts_query_cursor_set_byte_range fn_ts_query_cursor_set_byte_range
   260 #define ts_query_delete fn_ts_query_delete
   261 #define ts_query_new fn_ts_query_new
   262 #define ts_query_pattern_count fn_ts_query_pattern_count
   263 #define ts_query_predicates_for_pattern fn_ts_query_predicates_for_pattern
   264 #define ts_query_string_value_for_id fn_ts_query_string_value_for_id
   265 #define ts_set_allocator fn_ts_set_allocator
   266 #define ts_tree_cursor_copy fn_ts_tree_cursor_copy
   267 #define ts_tree_cursor_current_node fn_ts_tree_cursor_current_node
   268 #define ts_tree_cursor_delete fn_ts_tree_cursor_delete
   269 #define ts_tree_cursor_goto_first_child fn_ts_tree_cursor_goto_first_child
   270 #define ts_tree_cursor_goto_next_sibling fn_ts_tree_cursor_goto_next_sibling
   271 #define ts_tree_cursor_goto_parent fn_ts_tree_cursor_goto_parent
   272 #define ts_tree_cursor_new fn_ts_tree_cursor_new
   273 #define ts_tree_delete fn_ts_tree_delete
   274 #define ts_tree_edit fn_ts_tree_edit
   275 #define ts_tree_get_changed_ranges fn_ts_tree_get_changed_ranges
   276 #define ts_tree_root_node fn_ts_tree_root_node
   277 
   278 #endif  /* WINDOWSNT */
   279 
   280 
   281 /* Commentary
   282 
   283    The Emacs wrapper of tree-sitter does not expose everything the C
   284    API provides, most notably:
   285 
   286    - It doesn't expose a syntax tree.  The syntax tree is part of the
   287      parser object, and updating the tree is handled on the C level.
   288 
   289    - It doesn't expose the tree cursor, either.  Presumably, Lisp is
   290      slow enough to make insignificant any performance advantages from
   291      using the cursor.  Not exposing the cursor also minimizes the
   292      number of new types this adds to Emacs Lisp; currently, this adds
   293      only the parser, node, and compiled query types.
   294 
   295    - Because updating the change is handled on the C level as each
   296      change is made in the buffer, there is no way for Lisp to update
   297      a node.  But since we can just retrieve a new node, it shouldn't
   298      be a limitation.
   299 
   300    - I didn't expose setting timeout and cancellation flag for a
   301      parser, mainly because I don't think they are really necessary
   302      in Emacs's use cases.
   303 
   304    - Many tree-sitter functions take a TSPoint, which is basically a
   305      row and column.  Emacs uses a gap buffer and does not keep
   306      information about the row and column position of a buffer.
   307      According to the author of tree-sitter, those functions only take
   308      a TSPoint so that it can be moved alongside the byte position and
   309      returned to the caller afterwards, and the position actually used
   310      is the specified byte position.  He also said that he _thinks_
   311      that just passing a byte position will also work.  As a result, a
   312      dummy value is used in place of each TSPoint.  Judging by the
   313      nature of parsing algorithms, I think it is safe to use only the
   314      byte position, and I don't think this will change in the future.
   315 
   316      See: https://github.com/tree-sitter/tree-sitter/issues/445
   317 
   318    treesit.h has some commentary on the two main data structure for
   319    the parser and node.  treesit_sync_visible_region has some
   320    commentary on how we make tree-sitter play well with narrowing (the
   321    tree-sitter parser only sees the visible region, so we need to
   322    translate positions back and forth).  Most action happens in
   323    treesit_ensure_parsed, treesit_read_buffer and
   324    treesit_record_change.
   325 
   326    A complete correspondence list between tree-sitter functions and
   327    exposed Lisp functions can be found in the manual node (elisp)API
   328    Correspondence.
   329 
   330    Placement of CHECK_xxx functions: call CHECK_xxx before using any
   331    unchecked Lisp values; these include arguments of Lisp functions,
   332    the return value of Fsymbol_value, and that of Fcar or Fcdr on
   333    user-specified conses.
   334 
   335    Initializing tree-sitter: there are two entry points to tree-sitter
   336    functions: 'treesit-parser-create' and
   337    'treesit-language-available-p'.  Technically we only need to call
   338    initialization function in those two functions, but in reality we
   339    check at the beginning of every Lisp function.  That should be more
   340    fool-proof.
   341 
   342    Tree-sitter offset (0-based) and buffer position (1-based):
   343      tree-sitter offset + buffer position = buffer position
   344      buffer position - buffer position = tree-sitter offset
   345 
   346    Tree-sitter-related code in other files:
   347    - src/alloc.c for gc for parser and node
   348    - src/casefiddle.c & src/insdel.c for notifying tree-sitter
   349      parser of buffer changes.
   350    - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
   351      node type.
   352    - print.c for printing tree-sitter objects (node, parser, query).
   353 
   354    Regarding signals: only raise signals in Lisp functions.
   355 
   356    Casts from EMACS_INT and ptrdiff_t to uint32_t: We install checks
   357    for buffer size and range and thus able to assume these casts never
   358    overflow.
   359 
   360    We don't parse at every keystroke.  Instead we only record the
   361    changes at each keystroke, and only parse when requested.  It is
   362    possible that lazy parsing is worse: instead of dispersed little
   363    pauses, now you have less frequent but larger pauses.  I doubt
   364    there will be any perceived difference, as the lazy parsing is
   365    going to be pretty frequent anyway.  Also this (lazy parsing) is
   366    what the mailing list guys wanted.
   367 
   368    Because it is pretty slow (comparing to other tree-sitter
   369    operations) for tree-sitter to parse the query and produce a query
   370    object, it is very wasteful to reparse the query every time
   371    treesit-query-capture is called, and it completely kills the
   372    performance of querying in a loop for a moderate amount of times
   373    (hundreds of queries takes seconds rather than milliseconds to
   374    complete).  Therefore we want some caching.  We can either use a
   375    search.c style transparent caching, or simply expose a new type,
   376    compiled-ts-query and let the user to manually compile AOT.  I
   377    believe AOT compiling gives users more control, makes the
   378    performance stable and easy to understand (compiled -> fast,
   379    uncompiled -> slow), and avoids some edge cases transparent cache
   380    could have (see below).  So I implemented the AOT compilation.
   381 
   382    Problems a transparent cache could have: Suppose we store cache
   383    entries in a fixed-length linked-list, and compare with EQ.  1)
   384    One-off query could kick out useful cache.  2) if the user messed
   385    up and the query doesn't EQ to the cache anymore, the performance
   386    mysteriously drops.  3) what if a user uses so many stuff that the
   387    default cache size (20) is not enough and we end up thrashing?
   388    These are all imaginary scenarios but they are not impossible
   389    :-)
   390 
   391    Parsers in indirect buffers: We make indirect buffers to share the
   392    parser of its base buffer.  Indirect buffers and their base buffer
   393    share the same buffer content but not other buffer attributes.  If
   394    they have separate parser lists, changes made in an indirect buffer
   395    will only update parsers of that indirect buffer, and not parsers
   396    in the base buffer or other indirect buffers, and vice versa.  We
   397    could keep track of all the base and indirect buffers, and update
   398    all of their parsers, but ultimately decide to take a simpler
   399    approach, which is to make indirect buffers share their base
   400    buffer's parser list.  The discussion can be found in bug#59693.  */
   401 
   402 
   403 /*** Initialization */
   404 
   405 static Lisp_Object Vtreesit_str_libtree_sitter;
   406 static Lisp_Object Vtreesit_str_tree_sitter;
   407 #ifndef WINDOWSNT
   408 static Lisp_Object Vtreesit_str_dot_0;
   409 #endif
   410 static Lisp_Object Vtreesit_str_dot;
   411 static Lisp_Object Vtreesit_str_question_mark;
   412 static Lisp_Object Vtreesit_str_star;
   413 static Lisp_Object Vtreesit_str_plus;
   414 static Lisp_Object Vtreesit_str_pound_equal;
   415 static Lisp_Object Vtreesit_str_pound_match;
   416 static Lisp_Object Vtreesit_str_pound_pred;
   417 static Lisp_Object Vtreesit_str_open_bracket;
   418 static Lisp_Object Vtreesit_str_close_bracket;
   419 static Lisp_Object Vtreesit_str_open_paren;
   420 static Lisp_Object Vtreesit_str_close_paren;
   421 static Lisp_Object Vtreesit_str_space;
   422 static Lisp_Object Vtreesit_str_equal;
   423 static Lisp_Object Vtreesit_str_match;
   424 static Lisp_Object Vtreesit_str_pred;
   425 
   426 /* This is the limit on recursion levels for some tree-sitter
   427    functions.  Remember to update docstrings when changing this
   428    value. */
   429 const ptrdiff_t treesit_recursion_limit = 1000;
   430 bool treesit_initialized = false;
   431 
   432 static bool
   433 load_tree_sitter_if_necessary (bool required)
   434 {
   435 #ifdef WINDOWSNT
   436   static bool tried_to_initialize_once;
   437   static bool tree_sitter_initialized;
   438 
   439   if (!tried_to_initialize_once)
   440     {
   441       Lisp_Object status;
   442 
   443       tried_to_initialize_once = true;
   444       tree_sitter_initialized = init_treesit_functions ();
   445       status = tree_sitter_initialized ? Qt : Qnil;
   446       Vlibrary_cache = Fcons (Fcons (Qtree_sitter, status), Vlibrary_cache);
   447     }
   448 
   449   if (required && !tree_sitter_initialized)
   450     xsignal1 (Qtreesit_error,
   451               build_string ("tree-sitter library not found or failed to load"));
   452 
   453   return tree_sitter_initialized;
   454 #else
   455   return true;
   456 #endif
   457 }
   458 
   459 static void *
   460 treesit_calloc_wrapper (size_t n, size_t size)
   461 {
   462   return xzalloc (n * size);
   463 }
   464 
   465 static void
   466 treesit_initialize (void)
   467 {
   468   if (!treesit_initialized)
   469     {
   470       load_tree_sitter_if_necessary (true);
   471       ts_set_allocator (xmalloc, treesit_calloc_wrapper, xrealloc, xfree);
   472       treesit_initialized = true;
   473     }
   474 }
   475 
   476 
   477 /*** Loading language library */
   478 
   479 /* Translates a symbol treesit-<lang> to a C name
   480    treesit_<lang>.  */
   481 static void
   482 treesit_symbol_to_c_name (char *symbol_name)
   483 {
   484   for (int idx = 0; idx < strlen (symbol_name); idx++)
   485     {
   486       if (symbol_name[idx] == '-')
   487         symbol_name[idx] = '_';
   488     }
   489 }
   490 
   491 static bool
   492 treesit_find_override_name (Lisp_Object language_symbol, Lisp_Object *name,
   493                             Lisp_Object *c_symbol)
   494 {
   495   Lisp_Object tem;
   496 
   497   CHECK_LIST (Vtreesit_load_name_override_list);
   498 
   499   tem = Vtreesit_load_name_override_list;
   500 
   501   FOR_EACH_TAIL (tem)
   502     {
   503       Lisp_Object lang = XCAR (XCAR (tem));
   504       CHECK_SYMBOL (lang);
   505 
   506       if (EQ (lang, language_symbol))
   507         {
   508           *name = Fnth (make_fixnum (1), XCAR (tem));
   509           CHECK_STRING (*name);
   510           *c_symbol = Fnth (make_fixnum (2), XCAR (tem));
   511           CHECK_STRING (*c_symbol);
   512 
   513           return true;
   514         }
   515     }
   516 
   517   CHECK_LIST_END (tem, Vtreesit_load_name_override_list);
   518 
   519   return false;
   520 }
   521 
   522 /* For example, if Vdynamic_library_suffixes is (".so", ".dylib"),
   523    this function pushes "lib_base_name.so" and "lib_base_name.dylib"
   524    into *path_candidates.  Obviously path_candidates should be a Lisp
   525    list of Lisp strings.  */
   526 static void
   527 treesit_load_language_push_for_each_suffix (Lisp_Object lib_base_name,
   528                                             Lisp_Object *path_candidates)
   529 {
   530   Lisp_Object suffixes;
   531 
   532   suffixes = Vdynamic_library_suffixes;
   533 
   534   FOR_EACH_TAIL (suffixes)
   535     {
   536       Lisp_Object candidate1 = concat2 (lib_base_name, XCAR (suffixes));
   537 #ifndef WINDOWSNT
   538       /* On Posix hosts, support libraries named with ABI version
   539          numbers.  In the foreseeable future we only need to support
   540          version 0.0.  For more details, see
   541          https://lists.gnu.org/archive/html/emacs-devel/2023-04/msg00386.html.  */
   542       Lisp_Object candidate2 = concat2 (candidate1, Vtreesit_str_dot_0);
   543       Lisp_Object candidate3 = concat2 (candidate2, Vtreesit_str_dot_0);
   544 
   545       *path_candidates = Fcons (candidate3, *path_candidates);
   546       *path_candidates = Fcons (candidate2, *path_candidates);
   547 #endif
   548       *path_candidates = Fcons (candidate1, *path_candidates);
   549     }
   550 }
   551 
   552 /* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer
   553    to the language definition.
   554 
   555    If error occurs, return NULL and fill SIGNAL_SYMBOL and SIGNAL_DATA
   556    with values suitable for xsignal.  */
   557 static TSLanguage *
   558 treesit_load_language (Lisp_Object language_symbol,
   559                        Lisp_Object *signal_symbol, Lisp_Object *signal_data)
   560 {
   561   Lisp_Object symbol_name = Fsymbol_name (language_symbol);
   562 
   563   CHECK_LIST (Vtreesit_extra_load_path);
   564 
   565   /* Figure out the library name and C name.  */
   566   Lisp_Object lib_base_name
   567     = concat2 (Vtreesit_str_libtree_sitter, symbol_name);
   568   Lisp_Object base_name
   569     = concat2 (Vtreesit_str_tree_sitter, symbol_name);
   570 
   571   /* Override the library name and C name, if appropriate.  */
   572   Lisp_Object override_name;
   573   Lisp_Object override_c_name;
   574   bool found_override = treesit_find_override_name (language_symbol,
   575                                                     &override_name,
   576                                                     &override_c_name);
   577   if (found_override)
   578     lib_base_name = override_name;
   579 
   580   /* Now we generate a list of possible library paths.  */
   581   Lisp_Object path_candidates = Qnil;
   582   /* First push just the filenames to the candidate list, which will
   583      make dynlib_open look under standard system load paths.  */
   584   treesit_load_language_push_for_each_suffix (lib_base_name, &path_candidates);
   585   /* This is used for reporting errors (i.e., just filenames).  */
   586   Lisp_Object base_candidates = path_candidates;
   587   /* Then push ~/.emacs.d/tree-sitter paths.  */
   588   Lisp_Object lib_name
   589     = Fexpand_file_name (concat2 (build_string ("tree-sitter/"), lib_base_name),
   590                          Fsymbol_value (Quser_emacs_directory));
   591   treesit_load_language_push_for_each_suffix (lib_name, &path_candidates);
   592   /* Then push paths from treesit-extra-load-path.  */
   593   Lisp_Object tail;
   594 
   595   tail = Freverse (Vtreesit_extra_load_path);
   596 
   597   FOR_EACH_TAIL (tail)
   598     {
   599       Lisp_Object expanded_lib = Fexpand_file_name (lib_base_name, XCAR (tail));
   600       treesit_load_language_push_for_each_suffix (expanded_lib,
   601                                                   &path_candidates);
   602     }
   603 
   604   /* Try loading the dynamic library by each path candidate.  Stop
   605      when succeed, record the error message and try the next one when
   606      fail.  */
   607   dynlib_handle_ptr handle;
   608   const char *error;
   609 
   610   tail = path_candidates;
   611   error = NULL;
   612   handle = NULL;
   613 
   614   FOR_EACH_TAIL (tail)
   615     {
   616       char *library_name = SSDATA (XCAR (tail));
   617       dynlib_error ();
   618       handle = dynlib_open (library_name);
   619       error = dynlib_error ();
   620       if (error == NULL)
   621         break;
   622     }
   623 
   624   if (error != NULL)
   625     {
   626       *signal_symbol = Qtreesit_load_language_error;
   627       *signal_data = list3 (Qnot_found, base_candidates,
   628                             build_string ("No such file or directory"));
   629       return NULL;
   630     }
   631 
   632   /* Load TSLanguage.  */
   633   eassume (handle != NULL);
   634   dynlib_error ();
   635   TSLanguage *(*langfn) (void);
   636   char *c_name;
   637   if (found_override)
   638     c_name = xstrdup (SSDATA (override_c_name));
   639   else
   640     {
   641       c_name = xstrdup (SSDATA (base_name));
   642       treesit_symbol_to_c_name (c_name);
   643     }
   644   langfn = dynlib_sym (handle, c_name);
   645   xfree (c_name);
   646   error = dynlib_error ();
   647   if (error != NULL)
   648     {
   649       *signal_symbol = Qtreesit_load_language_error;
   650       *signal_data = list2 (Qsymbol_error, build_string (error));
   651       return NULL;
   652     }
   653   TSLanguage *lang = (*langfn) ();
   654 
   655   /* Check if language version matches tree-sitter version.  */
   656   TSParser *parser = ts_parser_new ();
   657   bool success = ts_parser_set_language (parser, lang);
   658   ts_parser_delete (parser);
   659   if (!success)
   660     {
   661       *signal_symbol = Qtreesit_load_language_error;
   662       *signal_data = list2 (Qversion_mismatch,
   663                             make_fixnum (ts_language_version (lang)));
   664       return NULL;
   665     }
   666   return lang;
   667 }
   668 
   669 DEFUN ("treesit-language-available-p", Ftreesit_language_available_p,
   670        Streesit_language_available_p,
   671        1, 2, 0,
   672        doc: /* Return non-nil if LANGUAGE exists and is loadable.
   673 
   674 If DETAIL is non-nil, return (t . nil) when LANGUAGE is available,
   675 (nil . DATA) when unavailable.  DATA is the signal data of
   676 `treesit-load-language-error'.  */)
   677   (Lisp_Object language, Lisp_Object detail)
   678 {
   679   CHECK_SYMBOL (language);
   680   treesit_initialize ();
   681   Lisp_Object signal_symbol = Qnil;
   682   Lisp_Object signal_data = Qnil;
   683   if (treesit_load_language (language, &signal_symbol, &signal_data) == NULL)
   684     {
   685       if (NILP (detail))
   686         return Qnil;
   687       else
   688         return Fcons (Qnil, signal_data);
   689     }
   690   else
   691     {
   692       if (NILP (detail))
   693         return Qt;
   694       else
   695         return Fcons (Qt, Qnil);
   696     }
   697 }
   698 
   699 DEFUN ("treesit-library-abi-version", Ftreesit_library_abi_version,
   700        Streesit_library_abi_version,
   701        0, 1, 0,
   702        doc: /* Return the language ABI version of the tree-sitter library.
   703 
   704 By default, report the latest ABI version supported by the library for
   705 loading language support modules.  The library is backward-compatible
   706 with language modules which use older ABI versions; if MIN-COMPATIBLE
   707 is non-nil, return the oldest compatible ABI version.  */)
   708   (Lisp_Object min_compatible)
   709 {
   710   if (NILP (min_compatible))
   711     return make_fixnum (TREE_SITTER_LANGUAGE_VERSION);
   712   else
   713     return make_fixnum (TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION);
   714 }
   715 
   716 DEFUN ("treesit-language-abi-version", Ftreesit_language_abi_version,
   717        Streesit_language_abi_version,
   718        0, 1, 0,
   719        doc: /* Return the ABI version of the tree-sitter grammar for LANGUAGE.
   720 Return nil if a grammar library for LANGUAGE is not available.  */)
   721   (Lisp_Object language)
   722 {
   723   if (NILP (Ftreesit_language_available_p (language, Qnil)))
   724     return Qnil;
   725   else
   726     {
   727       Lisp_Object signal_symbol = Qnil;
   728       Lisp_Object signal_data = Qnil;
   729       TSLanguage *ts_language = treesit_load_language (language,
   730                                                        &signal_symbol,
   731                                                        &signal_data);
   732       if (ts_language == NULL)
   733         return Qnil;
   734       uint32_t version =  ts_language_version (ts_language);
   735       return make_fixnum((ptrdiff_t) version);
   736     }
   737 }
   738 
   739 
   740 /*** Parsing functions */
   741 
   742 static void
   743 treesit_check_parser (Lisp_Object obj)
   744 {
   745   CHECK_TS_PARSER (obj);
   746   if (XTS_PARSER (obj)->deleted)
   747     xsignal1 (Qtreesit_parser_deleted, obj);
   748 }
   749 
   750 /* An auxiliary function that saves a few lines of code.  Assumes TREE
   751    is not NULL.  START_BYTE, OLD_END_BYTE, NEW_END_BYTE must not be
   752    larger than UINT32_MAX.  */
   753 static inline void
   754 treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
   755                      ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
   756 {
   757   eassert (start_byte >= 0);
   758   eassert (start_byte <= old_end_byte);
   759   eassert (start_byte <= new_end_byte);
   760   TSPoint dummy_point = {0, 0};
   761   eassert (start_byte <= UINT32_MAX);
   762   eassert (old_end_byte <= UINT32_MAX);
   763   eassert (new_end_byte <= UINT32_MAX);
   764   TSInputEdit edit = {(uint32_t) start_byte,
   765                       (uint32_t) old_end_byte,
   766                       (uint32_t) new_end_byte,
   767                       dummy_point, dummy_point, dummy_point};
   768   ts_tree_edit (tree, &edit);
   769 }
   770 
   771 /* Update each parser's tree after the user made an edit.  This
   772    function does not parse the buffer and only updates the tree, so it
   773    should be very fast.  */
   774 void
   775 treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
   776                        ptrdiff_t new_end_byte)
   777 {
   778   struct buffer *base_buffer = current_buffer;
   779   if (current_buffer->base_buffer)
   780     base_buffer = current_buffer->base_buffer;
   781   Lisp_Object parser_list = BVAR (base_buffer, ts_parser_list);
   782 
   783   FOR_EACH_TAIL_SAFE (parser_list)
   784     {
   785       CHECK_CONS (parser_list);
   786       Lisp_Object lisp_parser = XCAR (parser_list);
   787       treesit_check_parser (lisp_parser);
   788       TSTree *tree = XTS_PARSER (lisp_parser)->tree;
   789       /* See comment (ref:visible-beg-null) if you wonder why we don't
   790          update visible_beg/end when tree is NULL.  */
   791 
   792       if (tree != NULL)
   793         {
   794           eassert (start_byte <= old_end_byte);
   795           eassert (start_byte <= new_end_byte);
   796           /* Think the recorded change as a delete followed by an
   797              insert, and think of them as moving unchanged text back
   798              and forth.  After all, the whole point of updating the
   799              tree is to update the position of unchanged text.  */
   800           ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
   801           ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
   802           eassert (visible_beg >= 0);
   803           eassert (visible_beg <= visible_end);
   804 
   805           /* AFFECTED_START/OLD_END/NEW_END are (0-based) offsets from
   806              VISIBLE_BEG.  min(visi_end, max(visi_beg, value)) clips
   807              value into [visi_beg, visi_end], and subtracting visi_beg
   808              gives the offset from visi_beg.  */
   809           ptrdiff_t start_offset = (min (visible_end,
   810                                          max (visible_beg, start_byte))
   811                                     - visible_beg);
   812           ptrdiff_t old_end_offset = (min (visible_end,
   813                                            max (visible_beg, old_end_byte))
   814                                       - visible_beg);
   815           /* We don't clip new_end_offset under visible_end, because
   816              otherwise we would miss updating the clipped part.  Plus,
   817              when inserting in narrowed region, the narrowed region
   818              will grow to accommodate the new text, so this is the
   819              correct behavior.  (Bug#61369).  */
   820           ptrdiff_t new_end_offset = (max (visible_beg, new_end_byte)
   821                                       - visible_beg);
   822           eassert (start_offset <= old_end_offset);
   823           eassert (start_offset <= new_end_offset);
   824 
   825           treesit_tree_edit_1 (tree, start_offset, old_end_offset,
   826                                new_end_offset);
   827           XTS_PARSER (lisp_parser)->need_reparse = true;
   828           XTS_PARSER (lisp_parser)->timestamp++;
   829 
   830           /* VISIBLE_BEG/END records tree-sitter's range of view in
   831              the buffer.  We need to adjust them when tree-sitter's
   832              view changes.  */
   833           ptrdiff_t visi_beg_delta;
   834           if (old_end_byte > new_end_byte)
   835             /* Move backward.  */
   836             visi_beg_delta = (min (visible_beg, new_end_byte)
   837                               - min (visible_beg, old_end_byte));
   838           else
   839             /* Move forward.  */
   840             visi_beg_delta = (old_end_byte < visible_beg
   841                               ? new_end_byte - old_end_byte : 0);
   842 
   843           XTS_PARSER (lisp_parser)->visible_beg = visible_beg + visi_beg_delta;
   844           XTS_PARSER (lisp_parser)->visible_end = (visible_end
   845                                                    + visi_beg_delta
   846                                                    + (new_end_offset
   847                                                       - old_end_offset));
   848 
   849           eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0);
   850           eassert (XTS_PARSER (lisp_parser)->visible_beg
   851                    <= XTS_PARSER (lisp_parser)->visible_end);
   852         }
   853     }
   854 }
   855 
   856 /* Comment (ref:visible-beg-null) The purpose of visible_beg/end is to
   857    keep track of "which part of the buffer does the tree-sitter tree
   858    see", in order to update the tree correctly.  Visible_beg/end have
   859    two purposes: they "clip" buffer changes within them, and they
   860    translate positions in the buffer to positions in the tree
   861    (buffer position - visible_beg = tree position).
   862 
   863    Conceptually, visible_beg/end hold the visible region of the buffer
   864    when we last reparsed.  In-between two reparses, we don't really
   865    care if the visible region of the buffer changes.
   866 
   867    Right before we reparse, we make tree-sitter's visible region
   868    match that of the buffer, and update visible_beg/end.
   869 
   870    That is, the whole purpose of visible_beg/end (and also of
   871    treesit_record_change and treesit_sync_visible_region) is to update
   872    the tree (by ts_tree_edit).  So if the tree is NULL,
   873    visible_beg/end are considered uninitialized.  Only when we already
   874    have a tree, do we need to keep track of position changes and
   875    update it correctly, so it can be fed into ts_parser_parse as the
   876    old tree, so that tree-sitter will only parse the changed part,
   877    incrementally.
   878 
   879    In a nutshell, tree-sitter incremental parsing in Emacs looks like:
   880 
   881    treesit_record_change (tree)  \
   882    treesit_record_change (tree)  | user edits buffer
   883    ...                           /
   884 
   885    treesit_sync_visible_region (tree) \ treesit_ensure_parsed
   886    ts_parser_parse(tree) -> tree      /
   887 
   888    treesit_record_change (tree)  \
   889    treesit_record_change (tree)  | user edits buffer
   890    ...                           /
   891 
   892    and so on.  */
   893 
   894 /* Make sure the tree's visible range is in sync with the buffer's
   895    visible range, and PARSER's visible_beg and visible_end are in sync
   896    with BUF_BEGV_BYTE and BUG_ZV_BYTE.  When calling this function you
   897    must make sure the current buffer's size in bytes is not larger than
   898    UINT32_MAX.  Basically, always call treesit_check_buffer_size before
   899    this function.
   900 
   901    If buffer range changed since last parse (visible_beg/end doesn't
   902    match buffer visible beginning/end), set need_reparse to true.  */
   903 static void
   904 treesit_sync_visible_region (Lisp_Object parser)
   905 {
   906   TSTree *tree = XTS_PARSER (parser)->tree;
   907   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
   908 
   909   /* If we are setting visible_beg/end for the first time, we can skip
   910   the offset acrobatics and updating the tree below.  */
   911   if (tree == NULL)
   912     {
   913       XTS_PARSER (parser)->visible_beg = BUF_BEGV_BYTE (buffer);
   914       XTS_PARSER (parser)->visible_end = BUF_ZV_BYTE (buffer);
   915       return;
   916     }
   917 
   918   ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
   919   ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
   920   eassert (0 <= visible_beg);
   921   eassert (visible_beg <= visible_end);
   922 
   923   eassert (BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
   924   eassert (BUF_ZV_BYTE (buffer) <= UINT32_MAX);
   925 
   926   /* If buffer restriction changed and user requests for a node (hence
   927      this function is called), we need to reparse.  */
   928   if (visible_beg != BUF_BEGV_BYTE (buffer)
   929       || visible_end != BUF_ZV_BYTE (buffer))
   930     XTS_PARSER (parser)->need_reparse = true;
   931 
   932   /* Before we parse or set ranges, catch up with the narrowing
   933      situation.  We change visible_beg and visible_end to match
   934      BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the
   935      change.  We want to move the visible range of tree-sitter to
   936      match the narrowed range.  For example,
   937      from ________|xxxx|__
   938      to   |xxxx|__________ */
   939 
   940   /* 1. Make sure visible_beg <= BUF_BEGV_BYTE.  */
   941   if (visible_beg > BUF_BEGV_BYTE (buffer))
   942     {
   943       /* Tree-sitter sees: insert at the beginning.  */
   944       treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
   945       visible_beg = BUF_BEGV_BYTE (buffer);
   946       eassert (visible_beg <= visible_end);
   947     }
   948   /* 2. Make sure visible_end = BUF_ZV_BYTE.  */
   949   if (visible_end < BUF_ZV_BYTE (buffer))
   950     {
   951       /* Tree-sitter sees: insert at the end.  */
   952       treesit_tree_edit_1 (tree, visible_end - visible_beg,
   953                            visible_end - visible_beg,
   954                            BUF_ZV_BYTE (buffer) - visible_beg);
   955       visible_end = BUF_ZV_BYTE (buffer);
   956       eassert (visible_beg <= visible_end);
   957     }
   958   else if (visible_end > BUF_ZV_BYTE (buffer))
   959     {
   960       /* Tree-sitter sees: delete at the end.  */
   961       treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
   962                            visible_end - visible_beg,
   963                            BUF_ZV_BYTE (buffer) - visible_beg);
   964       visible_end = BUF_ZV_BYTE (buffer);
   965       eassert (visible_beg <= visible_end);
   966     }
   967   /* 3. Make sure visible_beg = BUF_BEGV_BYTE.  */
   968   if (visible_beg < BUF_BEGV_BYTE (buffer))
   969     {
   970       /* Tree-sitter sees: delete at the beginning.  */
   971       treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
   972       visible_beg = BUF_BEGV_BYTE (buffer);
   973       eassert (visible_beg <= visible_end);
   974     }
   975   eassert (0 <= visible_beg);
   976   eassert (visible_beg <= visible_end);
   977   eassert (visible_beg == BUF_BEGV_BYTE (buffer));
   978   eassert (visible_end == BUF_ZV_BYTE (buffer));
   979 
   980   XTS_PARSER (parser)->visible_beg = visible_beg;
   981   XTS_PARSER (parser)->visible_end = visible_end;
   982 }
   983 
   984 static void
   985 treesit_check_buffer_size (struct buffer *buffer)
   986 {
   987   ptrdiff_t buffer_size_bytes = (BUF_Z_BYTE (buffer) - BUF_BEG_BYTE (buffer));
   988   if (buffer_size_bytes > UINT32_MAX)
   989     xsignal2 (Qtreesit_buffer_too_large,
   990               build_string ("Buffer size cannot be larger than 4GB"),
   991               make_fixnum (buffer_size_bytes));
   992 }
   993 
   994 static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct buffer *);
   995 
   996 static void
   997 treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
   998                                      Lisp_Object parser)
   999 {
  1000   /* If the old_tree is NULL, meaning this is the first parse, the
  1001      changed range is the whole buffer.  */
  1002   Lisp_Object lisp_ranges;
  1003   struct buffer *buf = XBUFFER (XTS_PARSER (parser)->buffer);
  1004   if (old_tree)
  1005     {
  1006       uint32_t len;
  1007       TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
  1008       lisp_ranges = treesit_make_ranges (ranges, len, buf);
  1009       xfree (ranges);
  1010     }
  1011   else
  1012     {
  1013       struct buffer *oldbuf = current_buffer;
  1014       set_buffer_internal (buf);
  1015       lisp_ranges = Fcons (Fcons (Fpoint_min (), Fpoint_max ()), Qnil);
  1016       set_buffer_internal (oldbuf);
  1017     }
  1018 
  1019   specpdl_ref count = SPECPDL_INDEX ();
  1020 
  1021   /* let's trust the after change functions and not clone a new ranges
  1022      for each of them.  */
  1023   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1024   FOR_EACH_TAIL (functions)
  1025     safe_call2 (XCAR (functions), lisp_ranges, parser);
  1026 
  1027   unbind_to (count, Qnil);
  1028 }
  1029 
  1030 /* Parse the buffer.  We don't parse until we have to.  When we have
  1031    to, we call this function to parse and update the tree.  */
  1032 static void
  1033 treesit_ensure_parsed (Lisp_Object parser)
  1034 {
  1035   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1036 
  1037   /* Before we parse, catch up with the narrowing situation.  */
  1038   treesit_check_buffer_size (buffer);
  1039   /* This function has to run before we check for need_reparse flag,
  1040      because it might set the flag to true.  */
  1041   treesit_sync_visible_region (parser);
  1042 
  1043   /* Make sure this comes before everything else, see comment
  1044      (ref:notifier-inside-ensure-parsed) for more detail.  */
  1045   if (!XTS_PARSER (parser)->need_reparse)
  1046     return;
  1047 
  1048   TSParser *treesit_parser = XTS_PARSER (parser)->parser;
  1049   TSTree *tree = XTS_PARSER (parser)->tree;
  1050   TSInput input = XTS_PARSER (parser)->input;
  1051 
  1052   TSTree *new_tree = ts_parser_parse (treesit_parser, tree, input);
  1053   /* This should be very rare (impossible, really): it only happens
  1054      when 1) language is not set (impossible in Emacs because the user
  1055      has to supply a language to create a parser), 2) parse canceled
  1056      due to timeout (impossible because we don't set a timeout), 3)
  1057      parse canceled due to cancellation flag (impossible because we
  1058      don't set the flag).  (See comments for ts_parser_parse in
  1059      tree_sitter/api.h.)  */
  1060   if (new_tree == NULL)
  1061     {
  1062       Lisp_Object buf;
  1063       XSETBUFFER (buf, buffer);
  1064       xsignal1 (Qtreesit_parse_error, buf);
  1065     }
  1066 
  1067   XTS_PARSER (parser)->tree = new_tree;
  1068   XTS_PARSER (parser)->need_reparse = false;
  1069 
  1070   /* After-change functions should run at the very end, most crucially
  1071      after need_reparse is set to false, this way if the function
  1072      calls some tree-sitter function which invokes
  1073      treesit_ensure_parsed again, it returns early and do not
  1074      recursively call the after change functions again.
  1075      (ref:notifier-inside-ensure-parsed)  */
  1076   treesit_call_after_change_functions (tree, new_tree, parser);
  1077   ts_tree_delete (tree);
  1078 }
  1079 
  1080 /* This is the read function provided to tree-sitter to read from a
  1081    buffer.  It reads one character at a time and automatically skips
  1082    the gap.  */
  1083 static const char*
  1084 treesit_read_buffer (void *parser, uint32_t byte_index,
  1085                      TSPoint position, uint32_t *bytes_read)
  1086 {
  1087   struct buffer *buffer = XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer);
  1088   ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg;
  1089   ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end;
  1090   ptrdiff_t byte_pos = byte_index + visible_beg;
  1091   /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in
  1092      treesit_ensure_parsed), so byte_pos will never be smaller than
  1093      BUF_BEG_BYTE.  */
  1094   eassert (visible_beg = BUF_BEGV_BYTE (buffer));
  1095   eassert (visible_end = BUF_ZV_BYTE (buffer));
  1096 
  1097   /* Read one character.  Tree-sitter wants us to set bytes_read to 0
  1098      if it reads to the end of buffer.  It doesn't say what it wants
  1099      for the return value in that case, so we just give it an empty
  1100      string.  */
  1101   char *beg;
  1102   int len;
  1103   /* This function could run from a user command, so it is better to
  1104      do nothing instead of raising an error.  (It was a pain in the a**
  1105      to decrypt mega-if-conditions in Emacs source, so I wrote the two
  1106      branches separately, you are welcome.)  */
  1107   if (!BUFFER_LIVE_P (buffer))
  1108     {
  1109       beg = NULL;
  1110       len = 0;
  1111     }
  1112   /* Reached visible end-of-buffer, tell tree-sitter to read no more.  */
  1113   else if (byte_pos >= visible_end)
  1114     {
  1115       beg = NULL;
  1116       len = 0;
  1117     }
  1118   /* Normal case, read a character.  */
  1119   else
  1120     {
  1121       beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
  1122       len = BYTES_BY_CHAR_HEAD ((int) *beg);
  1123     }
  1124   /* We never let tree-sitter to parse buffers that large so this
  1125      assertion should never hit.  */
  1126   eassert (len < UINT32_MAX);
  1127   *bytes_read = (uint32_t) len;
  1128   return beg;
  1129 }
  1130 
  1131 
  1132 /*** Functions for parser and node object */
  1133 
  1134 /* Wrap the parser in a Lisp_Object to be used in the Lisp
  1135    machine.  */
  1136 Lisp_Object
  1137 make_treesit_parser (Lisp_Object buffer, TSParser *parser,
  1138                      TSTree *tree, Lisp_Object language_symbol)
  1139 {
  1140   struct Lisp_TS_Parser *lisp_parser;
  1141 
  1142   lisp_parser = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Parser,
  1143                                        buffer, PVEC_TS_PARSER);
  1144 
  1145   lisp_parser->language_symbol = language_symbol;
  1146   lisp_parser->after_change_functions = Qnil;
  1147   lisp_parser->buffer = buffer;
  1148   lisp_parser->parser = parser;
  1149   lisp_parser->tree = tree;
  1150   TSInput input = {lisp_parser, treesit_read_buffer, TSInputEncodingUTF8};
  1151   lisp_parser->input = input;
  1152   lisp_parser->need_reparse = true;
  1153   lisp_parser->visible_beg = BUF_BEGV_BYTE (XBUFFER (buffer));
  1154   lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
  1155   lisp_parser->timestamp = 0;
  1156   lisp_parser->deleted = false;
  1157   lisp_parser->has_range = false;
  1158   eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
  1159   return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
  1160 }
  1161 
  1162 /* Wrap the node in a Lisp_Object to be used in the Lisp machine.  */
  1163 Lisp_Object
  1164 make_treesit_node (Lisp_Object parser, TSNode node)
  1165 {
  1166   struct Lisp_TS_Node *lisp_node;
  1167 
  1168   lisp_node = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node,
  1169                                      parser, PVEC_TS_NODE);
  1170   lisp_node->parser = parser;
  1171   lisp_node->node = node;
  1172   lisp_node->timestamp = XTS_PARSER (parser)->timestamp;
  1173   return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
  1174 }
  1175 
  1176 /* Make a compiled query.  QUERY has to be either a cons or a
  1177    string.  */
  1178 static Lisp_Object
  1179 make_treesit_query (Lisp_Object query, Lisp_Object language)
  1180 {
  1181   TSQueryCursor *treesit_cursor = ts_query_cursor_new ();
  1182   struct Lisp_TS_Query *lisp_query;
  1183 
  1184   lisp_query = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Query,
  1185                                       source, PVEC_TS_COMPILED_QUERY);
  1186 
  1187   lisp_query->language = language;
  1188   lisp_query->source = query;
  1189   lisp_query->query = NULL;
  1190   lisp_query->cursor = treesit_cursor;
  1191   return make_lisp_ptr (lisp_query, Lisp_Vectorlike);
  1192 }
  1193 
  1194 /* The following two functions are called from alloc.c:cleanup_vector.  */
  1195 void
  1196 treesit_delete_parser (struct Lisp_TS_Parser *lisp_parser)
  1197 {
  1198   ts_tree_delete (lisp_parser->tree);
  1199   ts_parser_delete (lisp_parser->parser);
  1200 }
  1201 
  1202 void
  1203 treesit_delete_query (struct Lisp_TS_Query *lisp_query)
  1204 {
  1205   ts_query_delete (lisp_query->query);
  1206   ts_query_cursor_delete (lisp_query->cursor);
  1207 }
  1208 
  1209 /* The following function is called from print.c:print_vectorlike.  */
  1210 bool
  1211 treesit_named_node_p (TSNode node)
  1212 {
  1213   return ts_node_is_named (node);
  1214 }
  1215 
  1216 static const char*
  1217 treesit_query_error_to_string (TSQueryError error)
  1218 {
  1219   switch (error)
  1220     {
  1221     case TSQueryErrorNone:
  1222       return "None";
  1223     case TSQueryErrorSyntax:
  1224       return "Syntax error at";
  1225     case TSQueryErrorNodeType:
  1226       return "Node type error at";
  1227     case TSQueryErrorField:
  1228       return "Field error at";
  1229     case TSQueryErrorCapture:
  1230       return "Capture error at";
  1231     case TSQueryErrorStructure:
  1232       return "Structure error at";
  1233     default:
  1234       return "Unknown error";
  1235     }
  1236 }
  1237 
  1238 static Lisp_Object
  1239 treesit_compose_query_signal_data (uint32_t error_offset,
  1240                                    TSQueryError error_type,
  1241                                    Lisp_Object query_source)
  1242 {
  1243   return list4 (build_string (treesit_query_error_to_string (error_type)),
  1244                 make_fixnum (error_offset + 1),
  1245                 query_source,
  1246                 build_string ("Debug the query with `treesit-query-validate'"));
  1247 }
  1248 
  1249 /* Ensure the QUERY is compiled.  Return the TSQuery.  It could be
  1250    NULL if error occurs, in which case ERROR_OFFSET and ERROR_TYPE are
  1251    bound.  If error occurs, return NULL, and assign SIGNAL_SYMBOL and
  1252    SIGNAL_DATA accordingly.  */
  1253 static TSQuery *
  1254 treesit_ensure_query_compiled (Lisp_Object query, Lisp_Object *signal_symbol,
  1255                                Lisp_Object *signal_data)
  1256 {
  1257   /* If query is already compiled (not null), return that, otherwise
  1258      compile and return it.  */
  1259   TSQuery *treesit_query = XTS_COMPILED_QUERY (query)->query;
  1260   if (treesit_query != NULL)
  1261     return treesit_query;
  1262 
  1263   /* Get query source and TSLanguage ready.  */
  1264   Lisp_Object source = XTS_COMPILED_QUERY (query)->source;
  1265   Lisp_Object language = XTS_COMPILED_QUERY (query)->language;
  1266   /* This is the main reason why we compile query lazily: to avoid
  1267      loading languages early.  */
  1268   TSLanguage *treesit_lang = treesit_load_language (language, signal_symbol,
  1269                                                     signal_data);
  1270   if (treesit_lang == NULL)
  1271     return NULL;
  1272 
  1273   if (CONSP (source))
  1274     source = Ftreesit_query_expand (source);
  1275 
  1276   /* Create TSQuery.  */
  1277   uint32_t error_offset;
  1278   TSQueryError error_type;
  1279   char *treesit_source = SSDATA (source);
  1280   treesit_query = ts_query_new (treesit_lang, treesit_source,
  1281                                 strlen (treesit_source),
  1282                                 &error_offset, &error_type);
  1283   if (treesit_query == NULL)
  1284     {
  1285       *signal_symbol = Qtreesit_query_error;
  1286       *signal_data = treesit_compose_query_signal_data (error_offset,
  1287                                                         error_type,
  1288                                                         source);
  1289     }
  1290   XTS_COMPILED_QUERY (query)->query = treesit_query;
  1291   return treesit_query;
  1292 }
  1293 
  1294 
  1295 /* Lisp definitions.  */
  1296 
  1297 DEFUN ("treesit-parser-p",
  1298        Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
  1299        doc: /* Return t if OBJECT is a tree-sitter parser.  */)
  1300   (Lisp_Object object)
  1301 {
  1302   if (TS_PARSERP (object))
  1303     return Qt;
  1304   else
  1305     return Qnil;
  1306 }
  1307 
  1308 DEFUN ("treesit-node-p",
  1309        Ftreesit_node_p, Streesit_node_p, 1, 1, 0,
  1310        doc: /* Return t if OBJECT is a tree-sitter node.  */)
  1311   (Lisp_Object object)
  1312 {
  1313   if (TS_NODEP (object))
  1314     return Qt;
  1315   else
  1316     return Qnil;
  1317 }
  1318 
  1319 DEFUN ("treesit-compiled-query-p",
  1320        Ftreesit_compiled_query_p, Streesit_compiled_query_p, 1, 1, 0,
  1321        doc: /* Return t if OBJECT is a compiled tree-sitter query.  */)
  1322   (Lisp_Object object)
  1323 {
  1324   if (TS_COMPILED_QUERY_P (object))
  1325     return Qt;
  1326   else
  1327     return Qnil;
  1328 }
  1329 
  1330 DEFUN ("treesit-query-p",
  1331        Ftreesit_query_p, Streesit_query_p, 1, 1, 0,
  1332        doc: /* Return t if OBJECT is a generic tree-sitter query.  */)
  1333   (Lisp_Object object)
  1334 {
  1335   if (TS_COMPILED_QUERY_P (object)
  1336       || CONSP (object) || STRINGP (object))
  1337     return Qt;
  1338   else
  1339     return Qnil;
  1340 }
  1341 
  1342 DEFUN ("treesit-query-language",
  1343        Ftreesit_query_language, Streesit_query_language, 1, 1, 0,
  1344        doc: /* Return the language of QUERY.
  1345 QUERY has to be a compiled query.  */)
  1346   (Lisp_Object query)
  1347 {
  1348   CHECK_TS_COMPILED_QUERY (query);
  1349   return XTS_COMPILED_QUERY (query)->language;
  1350 }
  1351 
  1352 DEFUN ("treesit-node-parser",
  1353        Ftreesit_node_parser, Streesit_node_parser,
  1354        1, 1, 0,
  1355        doc: /* Return the parser to which NODE belongs.  */)
  1356   (Lisp_Object node)
  1357 {
  1358   CHECK_TS_NODE (node);
  1359   return XTS_NODE (node)->parser;
  1360 }
  1361 
  1362 DEFUN ("treesit-parser-create",
  1363        Ftreesit_parser_create, Streesit_parser_create,
  1364        1, 3, 0,
  1365        doc: /* Create and return a parser in BUFFER for LANGUAGE.
  1366 
  1367 The parser is automatically added to BUFFER's parser list, as returned
  1368 by `treesit-parser-list'.  LANGUAGE is a language symbol.  If BUFFER
  1369 is nil or omitted, it defaults to the current buffer.  If BUFFER
  1370 already has a parser for LANGUAGE, return that parser, but if NO-REUSE
  1371 is non-nil, always create a new parser.
  1372 
  1373 If that buffer is an indirect buffer, its base buffer is used instead.
  1374 That is, indirect buffers use their base buffer's parsers.  Lisp
  1375 programs should widen as necessary should they want to use a parser in
  1376 an indirect buffer.  */)
  1377   (Lisp_Object language, Lisp_Object buffer, Lisp_Object no_reuse)
  1378 {
  1379   treesit_initialize ();
  1380 
  1381   CHECK_SYMBOL (language);
  1382   struct buffer *buf;
  1383   if (NILP (buffer))
  1384     buf = current_buffer;
  1385   else
  1386     {
  1387       CHECK_BUFFER (buffer);
  1388       buf = XBUFFER (buffer);
  1389     }
  1390   if (buf->base_buffer)
  1391     buf = buf->base_buffer;
  1392 
  1393   treesit_check_buffer_size (buf);
  1394 
  1395   /* See if we can reuse a parser.  */
  1396   if (NILP (no_reuse))
  1397     {
  1398       Lisp_Object tail = BVAR (buf, ts_parser_list);
  1399       FOR_EACH_TAIL (tail)
  1400       {
  1401         struct Lisp_TS_Parser *parser = XTS_PARSER (XCAR (tail));
  1402         if (EQ (parser->language_symbol, language))
  1403           return XCAR (tail);
  1404       }
  1405     }
  1406 
  1407   /* Load language.  */
  1408   Lisp_Object signal_symbol = Qnil;
  1409   Lisp_Object signal_data = Qnil;
  1410   TSParser *parser = ts_parser_new ();
  1411   TSLanguage *lang = treesit_load_language (language, &signal_symbol,
  1412                                             &signal_data);
  1413   if (lang == NULL)
  1414     xsignal (signal_symbol, signal_data);
  1415   /* We check language version when loading a language, so this should
  1416      always succeed.  */
  1417   ts_parser_set_language (parser, lang);
  1418 
  1419   /* Create parser.  */
  1420   Lisp_Object lisp_parser = make_treesit_parser (Fcurrent_buffer (),
  1421                                                  parser, NULL,
  1422                                                  language);
  1423 
  1424   /* Update parser-list.  */
  1425   BVAR (buf, ts_parser_list) = Fcons (lisp_parser, BVAR (buf, ts_parser_list));
  1426 
  1427   return lisp_parser;
  1428 }
  1429 
  1430 DEFUN ("treesit-parser-delete",
  1431        Ftreesit_parser_delete, Streesit_parser_delete,
  1432        1, 1, 0,
  1433        doc: /* Delete PARSER from its buffer's parser list.
  1434 See `treesit-parser-list' for the buffer's parser list.  */)
  1435   (Lisp_Object parser)
  1436 {
  1437   treesit_check_parser (parser);
  1438 
  1439   Lisp_Object buffer = XTS_PARSER (parser)->buffer;
  1440   struct buffer *buf = XBUFFER (buffer);
  1441 
  1442   BVAR (buf, ts_parser_list)
  1443     = Fdelete (parser, BVAR (buf, ts_parser_list));
  1444 
  1445   XTS_PARSER (parser)->deleted = true;
  1446   return Qnil;
  1447 }
  1448 
  1449 DEFUN ("treesit-parser-list",
  1450        Ftreesit_parser_list, Streesit_parser_list,
  1451        0, 1, 0,
  1452        doc: /* Return BUFFER's parser list.
  1453 
  1454 BUFFER defaults to the current buffer.  If that buffer is an indirect
  1455 buffer, its base buffer is used instead.  That is, indirect buffers
  1456 use their base buffer's parsers.  */)
  1457   (Lisp_Object buffer)
  1458 {
  1459   struct buffer *buf;
  1460   if (NILP (buffer))
  1461     buf = current_buffer;
  1462   else
  1463     {
  1464       CHECK_BUFFER (buffer);
  1465       buf = XBUFFER (buffer);
  1466     }
  1467   if (buf->base_buffer)
  1468     buf = buf->base_buffer;
  1469 
  1470   /* Return a fresh list so messing with that list doesn't affect our
  1471      internal data.  */
  1472   Lisp_Object return_list = Qnil;
  1473   Lisp_Object tail;
  1474 
  1475   tail = BVAR (buf, ts_parser_list);
  1476 
  1477   FOR_EACH_TAIL (tail)
  1478     return_list = Fcons (XCAR (tail), return_list);
  1479 
  1480   return Freverse (return_list);
  1481 }
  1482 
  1483 DEFUN ("treesit-parser-buffer",
  1484        Ftreesit_parser_buffer, Streesit_parser_buffer,
  1485        1, 1, 0,
  1486        doc: /* Return the buffer of PARSER.  */)
  1487   (Lisp_Object parser)
  1488 {
  1489   treesit_check_parser (parser);
  1490   Lisp_Object buf;
  1491   XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer));
  1492   return buf;
  1493 }
  1494 
  1495 DEFUN ("treesit-parser-language",
  1496        Ftreesit_parser_language, Streesit_parser_language,
  1497        1, 1, 0,
  1498        doc: /* Return PARSER's language symbol.
  1499 This symbol is the one used to create the parser.  */)
  1500   (Lisp_Object parser)
  1501 {
  1502   treesit_check_parser (parser);
  1503   return XTS_PARSER (parser)->language_symbol;
  1504 }
  1505 
  1506 /* Return true if PARSER is not deleted and its buffer is live.  */
  1507 static bool
  1508 treesit_parser_live_p (Lisp_Object parser)
  1509 {
  1510   CHECK_TS_PARSER (parser);
  1511   return ((!XTS_PARSER (parser)->deleted) &&
  1512           (!NILP (Fbuffer_live_p (XTS_PARSER (parser)->buffer))));
  1513 }
  1514 
  1515 
  1516 /*** Parser API */
  1517 
  1518 DEFUN ("treesit-parser-root-node",
  1519        Ftreesit_parser_root_node, Streesit_parser_root_node,
  1520        1, 1, 0,
  1521        doc: /* Return the root node of PARSER.  */)
  1522   (Lisp_Object parser)
  1523 {
  1524   treesit_check_parser (parser);
  1525   treesit_initialize ();
  1526   treesit_ensure_parsed (parser);
  1527   TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree);
  1528   return make_treesit_node (parser, root_node);
  1529 }
  1530 
  1531 /* Checks that the RANGES argument of
  1532    treesit-parser-set-included-ranges is valid.  */
  1533 static void
  1534 treesit_check_range_argument (Lisp_Object ranges)
  1535 {
  1536   struct buffer *buffer = current_buffer;
  1537   ptrdiff_t point_min = BUF_BEGV (buffer);
  1538   ptrdiff_t point_max = BUF_ZV (buffer);
  1539   EMACS_INT last_point = point_min;
  1540   Lisp_Object tail;
  1541 
  1542   tail = ranges;
  1543 
  1544   CHECK_LIST (tail);
  1545 
  1546   FOR_EACH_TAIL (tail)
  1547     {
  1548       CHECK_CONS (tail);
  1549       Lisp_Object range = XCAR (tail);
  1550       CHECK_CONS (range);
  1551       CHECK_FIXNUM (XCAR (range));
  1552       CHECK_FIXNUM (XCDR (range));
  1553       EMACS_INT beg = XFIXNUM (XCAR (range));
  1554       EMACS_INT end = XFIXNUM (XCDR (range));
  1555       if (!(last_point <= beg && beg <= end && end <= point_max))
  1556         xsignal2 (Qtreesit_range_invalid,
  1557                   build_string ("RANGE is either overlapping,"
  1558                                 " out-of-order or out-of-range"),
  1559                   ranges);
  1560       last_point = end;
  1561     }
  1562 
  1563   CHECK_LIST_END (tail, ranges);
  1564 }
  1565 
  1566 /* Generate a list of ranges in Lisp from RANGES.  Assumes tree-sitter
  1567    tree and the buffer has the same visible region (wrt narrowing).
  1568    This function doesn't take ownership of RANGES.  BUFFER is used to
  1569    convert between tree-sitter buffer offset and buffer position.  */
  1570 static Lisp_Object
  1571 treesit_make_ranges (const TSRange *ranges, uint32_t len,
  1572                      struct buffer *buffer)
  1573 {
  1574   Lisp_Object list = Qnil;
  1575   for (int idx = 0; idx < len; idx++)
  1576     {
  1577       TSRange range = ranges[idx];
  1578       uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
  1579       uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
  1580       eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
  1581       eassert (beg_byte <= end_byte);
  1582       eassert (end_byte <= BUF_ZV_BYTE (buffer));
  1583 
  1584       Lisp_Object lisp_range
  1585         = Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)),
  1586                  make_fixnum (buf_bytepos_to_charpos (buffer, end_byte)));
  1587       list = Fcons (lisp_range, list);
  1588     }
  1589   return Fnreverse (list);
  1590 }
  1591 
  1592 DEFUN ("treesit-parser-set-included-ranges",
  1593        Ftreesit_parser_set_included_ranges,
  1594        Streesit_parser_set_included_ranges,
  1595        2, 2, 0,
  1596        doc: /* Limit PARSER to RANGES.
  1597 
  1598 RANGES is a list of (BEG . END), each (BEG . END) defines a region in
  1599 which the parser should operate.  Regions must not overlap, and the
  1600 regions should come in order in the list.  Signal
  1601 `treesit-set-range-error' if the argument is invalid, or something
  1602 else went wrong.  If RANGES is nil, the PARSER is to parse the whole
  1603 buffer.  */)
  1604   (Lisp_Object parser, Lisp_Object ranges)
  1605 {
  1606   treesit_check_parser (parser);
  1607   if (!NILP (ranges))
  1608     CHECK_CONS (ranges);
  1609   treesit_check_range_argument (ranges);
  1610 
  1611   treesit_initialize ();
  1612   /* Before we parse, catch up with narrowing/widening.  */
  1613   treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
  1614   treesit_sync_visible_region (parser);
  1615 
  1616   bool success;
  1617   if (NILP (ranges))
  1618     {
  1619       XTS_PARSER (parser)->has_range = false;
  1620       /* If RANGES is nil, make parser to parse the whole document.
  1621          To do that we give tree-sitter a 0 length, the range is a
  1622          dummy.  */
  1623       TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0};
  1624       success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
  1625                                                &treesit_range , 0);
  1626     }
  1627   else
  1628     {
  1629       /* Set ranges for PARSER.  */
  1630       XTS_PARSER (parser)->has_range = true;
  1631 
  1632       if (list_length (ranges) > UINT32_MAX)
  1633         xsignal (Qargs_out_of_range, list2 (ranges, Flength (ranges)));
  1634       uint32_t len = (uint32_t) list_length (ranges);
  1635       TSRange *treesit_ranges = xmalloc (sizeof (TSRange) * len);
  1636       struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1637 
  1638       for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges))
  1639         {
  1640           Lisp_Object range = XCAR (ranges);
  1641           ptrdiff_t beg_byte = buf_charpos_to_bytepos (buffer,
  1642                                                        XFIXNUM (XCAR (range)));
  1643           ptrdiff_t end_byte = buf_charpos_to_bytepos (buffer,
  1644                                                        XFIXNUM (XCDR (range)));
  1645           /* Shouldn't violate assertion since we just checked for
  1646              buffer size at the beginning of this function.  */
  1647           eassert (beg_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
  1648           eassert (end_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
  1649           /* We don't care about start and end points, put in dummy
  1650              values.  */
  1651           TSRange rg = {{0, 0}, {0, 0},
  1652                         (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
  1653                         (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
  1654           treesit_ranges[idx] = rg;
  1655         }
  1656       success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
  1657                                                treesit_ranges, len);
  1658       /* Although XFIXNUM could signal, it should be impossible
  1659          because we have checked the input by treesit_check_range_argument.
  1660          So there is no need for unwind-protect.  */
  1661       xfree (treesit_ranges);
  1662     }
  1663 
  1664   if (!success)
  1665     xsignal2 (Qtreesit_range_invalid,
  1666               build_string ("Something went wrong when setting ranges"),
  1667               ranges);
  1668 
  1669   XTS_PARSER (parser)->need_reparse = true;
  1670   return Qnil;
  1671 }
  1672 
  1673 DEFUN ("treesit-parser-included-ranges",
  1674        Ftreesit_parser_included_ranges,
  1675        Streesit_parser_included_ranges,
  1676        1, 1, 0,
  1677        doc: /* Return the ranges set for PARSER.
  1678 If no ranges are set for PARSER, return nil.
  1679 See also `treesit-parser-set-included-ranges'.  */)
  1680   (Lisp_Object parser)
  1681 {
  1682   treesit_check_parser (parser);
  1683   treesit_initialize ();
  1684 
  1685   /* When the parser doesn't have a range set and we call
  1686      ts_parser_included_ranges on it, it doesn't return an empty list,
  1687      but rather return some garbled data. (A single range where
  1688      start_byte = 0, end_byte = UINT32_MAX).  So we need to track
  1689      whether the parser is ranged ourselves.  */
  1690   if (!XTS_PARSER (parser)->has_range)
  1691     return Qnil;
  1692 
  1693   uint32_t len;
  1694   const TSRange *ranges
  1695     = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
  1696 
  1697   /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
  1698      etc), so we need to sync up.  */
  1699   treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
  1700   treesit_sync_visible_region (parser);
  1701 
  1702   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1703   return treesit_make_ranges (ranges, len, buffer);
  1704 }
  1705 
  1706 DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers,
  1707        Streesit_parser_notifiers,
  1708        1, 1, 0,
  1709        doc: /* Return the list of after-change notifier functions for PARSER.  */)
  1710   (Lisp_Object parser)
  1711 {
  1712   treesit_check_parser (parser);
  1713 
  1714   Lisp_Object return_list = Qnil;
  1715   Lisp_Object tail = XTS_PARSER (parser)->after_change_functions;
  1716   FOR_EACH_TAIL (tail)
  1717     return_list = Fcons (XCAR (tail), return_list);
  1718 
  1719   return return_list;
  1720 }
  1721 
  1722 DEFUN ("treesit-parser-add-notifier", Ftreesit_parser_add_notifier,
  1723        Streesit_parser_add_notifier,
  1724        2, 2, 0,
  1725        doc: /* Add FUNCTION to the list of PARSER's after-change notifiers.
  1726 FUNCTION must be a function symbol, rather than a lambda form.
  1727 FUNCTION should take 2 arguments, RANGES and PARSER.  RANGES is a list
  1728 of cons cells of the form (START . END), where START and END are buffer
  1729 positions.  PARSER is the parser issuing the notification.  */)
  1730   (Lisp_Object parser, Lisp_Object function)
  1731 {
  1732   treesit_check_parser (parser);
  1733   /* For simplicity we don't accept lambda functions.  */
  1734   CHECK_SYMBOL (function);
  1735 
  1736   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1737   if (NILP (Fmemq (function, functions)))
  1738     XTS_PARSER (parser)->after_change_functions = Fcons (function, functions);
  1739   return Qnil;
  1740 }
  1741 
  1742 DEFUN ("treesit-parser-remove-notifier", Ftreesit_parser_remove_notifier,
  1743        Streesit_parser_remove_notifier,
  1744        2, 2, 0,
  1745        doc: /* Remove FUNCTION from the list of PARSER's after-change notifiers.
  1746   FUNCTION must be a function symbol, rather than a lambda form.
  1747 FUNCTION should take 2 arguments, RANGES and PARSER.  RANGES is a list
  1748 of cons of the form (START . END), where START and END are buffer
  1749 positions.  PARSER is the parser issuing the notification.   */)
  1750   (Lisp_Object parser, Lisp_Object function)
  1751 {
  1752   treesit_check_parser (parser);
  1753   /* For simplicity we don't accept lambda functions.  */
  1754   CHECK_SYMBOL (function);
  1755 
  1756   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1757   if (!NILP (Fmemq (function, functions)))
  1758     XTS_PARSER (parser)->after_change_functions = Fdelq (function, functions);
  1759   return Qnil;
  1760 }
  1761 
  1762 
  1763 /*** Node API  */
  1764 
  1765 /* Check that OBJ is a positive integer and signal an error if
  1766    otherwise.  */
  1767 static void
  1768 treesit_check_positive_integer (Lisp_Object obj)
  1769 {
  1770   CHECK_INTEGER (obj);
  1771   if (XFIXNUM (obj) < 0)
  1772     xsignal1 (Qargs_out_of_range, obj);
  1773 }
  1774 
  1775 static void
  1776 treesit_check_node (Lisp_Object obj)
  1777 {
  1778   CHECK_TS_NODE (obj);
  1779   if (!treesit_node_uptodate_p (obj))
  1780     xsignal1 (Qtreesit_node_outdated, obj);
  1781 }
  1782 
  1783 /* Checks that OBJ is a positive integer and it is within the visible
  1784    portion of BUF. */
  1785 static void
  1786 treesit_check_position (Lisp_Object obj, struct buffer *buf)
  1787 {
  1788   treesit_check_positive_integer (obj);
  1789   ptrdiff_t pos = XFIXNUM (obj);
  1790   if (pos < BUF_BEGV (buf) || pos > BUF_ZV (buf))
  1791     xsignal1 (Qargs_out_of_range, obj);
  1792 }
  1793 
  1794 bool
  1795 treesit_node_uptodate_p (Lisp_Object obj)
  1796 {
  1797   Lisp_Object lisp_parser = XTS_NODE (obj)->parser;
  1798   return XTS_NODE (obj)->timestamp == XTS_PARSER (lisp_parser)->timestamp;
  1799 }
  1800 
  1801 DEFUN ("treesit-node-type",
  1802        Ftreesit_node_type, Streesit_node_type, 1, 1, 0,
  1803        doc: /* Return the NODE's type as a string.
  1804 If NODE is nil, return nil.  */)
  1805   (Lisp_Object node)
  1806 {
  1807   if (NILP (node)) return Qnil;
  1808   treesit_check_node (node);
  1809   treesit_initialize ();
  1810 
  1811   TSNode treesit_node = XTS_NODE (node)->node;
  1812   const char *type = ts_node_type (treesit_node);
  1813   return build_string (type);
  1814 }
  1815 
  1816 DEFUN ("treesit-node-start",
  1817        Ftreesit_node_start, Streesit_node_start, 1, 1, 0,
  1818        doc: /* Return the NODE's start position in its buffer.
  1819 If NODE is nil, return nil.  */)
  1820   (Lisp_Object node)
  1821 {
  1822   if (NILP (node)) return Qnil;
  1823   treesit_check_node (node);
  1824   treesit_initialize ();
  1825 
  1826   TSNode treesit_node = XTS_NODE (node)->node;
  1827   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  1828   uint32_t start_byte_offset = ts_node_start_byte (treesit_node);
  1829   struct buffer *buffer
  1830     = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  1831   ptrdiff_t start_pos
  1832     = buf_bytepos_to_charpos (buffer,
  1833                               start_byte_offset + visible_beg);
  1834   return make_fixnum (start_pos);
  1835 }
  1836 
  1837 DEFUN ("treesit-node-end",
  1838        Ftreesit_node_end, Streesit_node_end, 1, 1, 0,
  1839        doc: /* Return the NODE's end position in its buffer.
  1840 If NODE is nil, return nil.  */)
  1841   (Lisp_Object node)
  1842 {
  1843   if (NILP (node)) return Qnil;
  1844   treesit_check_node (node);
  1845   treesit_initialize ();
  1846 
  1847   TSNode treesit_node = XTS_NODE (node)->node;
  1848   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  1849   uint32_t end_byte_offset = ts_node_end_byte (treesit_node);
  1850   struct buffer *buffer
  1851     = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  1852   ptrdiff_t end_pos
  1853     = buf_bytepos_to_charpos (buffer, end_byte_offset + visible_beg);
  1854   return make_fixnum (end_pos);
  1855 }
  1856 
  1857 DEFUN ("treesit-node-string",
  1858        Ftreesit_node_string, Streesit_node_string, 1, 1, 0,
  1859        doc: /* Return the string representation of NODE.
  1860 If NODE is nil, return nil.  */)
  1861   (Lisp_Object node)
  1862 {
  1863   if (NILP (node)) return Qnil;
  1864   treesit_check_node (node);
  1865   treesit_initialize ();
  1866 
  1867   TSNode treesit_node = XTS_NODE (node)->node;
  1868   char *string = ts_node_string (treesit_node);
  1869   return build_string (string);
  1870 }
  1871 
  1872 static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object);
  1873 
  1874 DEFUN ("treesit-node-parent",
  1875        Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
  1876        doc: /* Return the immediate parent of NODE.
  1877 Return nil if NODE has no parent.  If NODE is nil, return nil.  */)
  1878   (Lisp_Object node)
  1879 {
  1880   if (NILP (node)) return Qnil;
  1881   treesit_check_node (node);
  1882   treesit_initialize ();
  1883 
  1884   Lisp_Object return_value = Qnil;
  1885 
  1886   TSNode treesit_node = XTS_NODE (node)->node;
  1887   Lisp_Object parser = XTS_NODE (node)->parser;
  1888   TSTreeCursor cursor;
  1889   /* See the comments to treesit_cursor_helper about the algorithm for
  1890      finding the parent node.  The complexity is roughly proportional
  1891      to the square root of the current node's depth in the parse tree,
  1892      and we punt if the tree is too deep.  */
  1893   if (!treesit_cursor_helper (&cursor, treesit_node, parser))
  1894     return return_value;
  1895 
  1896   if (ts_tree_cursor_goto_parent (&cursor))
  1897   {
  1898     TSNode parent = ts_tree_cursor_current_node (&cursor);
  1899     return_value = make_treesit_node (parser, parent);
  1900   }
  1901   ts_tree_cursor_delete (&cursor);
  1902   return return_value;
  1903 }
  1904 
  1905 DEFUN ("treesit-node-child",
  1906        Ftreesit_node_child, Streesit_node_child, 2, 3, 0,
  1907        doc: /* Return the Nth child of NODE.
  1908 
  1909 Return nil if there is no Nth child.  If NAMED is non-nil, look for
  1910 named child only.  NAMED defaults to nil.  If NODE is nil, return
  1911 nil.
  1912 
  1913 N could be negative, e.g., -1 represents the last child.  */)
  1914   (Lisp_Object node, Lisp_Object n, Lisp_Object named)
  1915 {
  1916   if (NILP (node)) return Qnil;
  1917   treesit_check_node (node);
  1918   CHECK_INTEGER (n);
  1919   EMACS_INT idx = XFIXNUM (n);
  1920 
  1921   treesit_initialize ();
  1922 
  1923   TSNode treesit_node = XTS_NODE (node)->node;
  1924   TSNode child;
  1925 
  1926   /* Process negative index.  */
  1927   if (idx < 0)
  1928     {
  1929       if (NILP (named))
  1930         idx = ts_node_child_count (treesit_node) + idx;
  1931       else
  1932         idx = ts_node_named_child_count (treesit_node) + idx;
  1933     }
  1934   if (idx < 0)
  1935     return Qnil;
  1936   if (idx > UINT32_MAX)
  1937     xsignal1 (Qargs_out_of_range, n);
  1938 
  1939   if (NILP (named))
  1940     child = ts_node_child (treesit_node, (uint32_t) idx);
  1941   else
  1942     child = ts_node_named_child (treesit_node, (uint32_t) idx);
  1943 
  1944   if (ts_node_is_null (child))
  1945     return Qnil;
  1946 
  1947   return make_treesit_node (XTS_NODE (node)->parser, child);
  1948 }
  1949 
  1950 DEFUN ("treesit-node-check",
  1951        Ftreesit_node_check, Streesit_node_check, 2, 2, 0,
  1952        doc: /* Return non-nil if NODE has PROPERTY, nil otherwise.
  1953 
  1954 PROPERTY could be `named', `missing', `extra', `outdated',
  1955 `has-error', or `live'.
  1956 
  1957 Named nodes correspond to named rules in the language definition,
  1958 whereas "anonymous" nodes correspond to string literals in the
  1959 language definition.
  1960 
  1961 Missing nodes are inserted by the parser in order to recover from
  1962 certain kinds of syntax errors, i.e., should be there but not there.
  1963 
  1964 Extra nodes represent things like comments, which are not required the
  1965 language definition, but can appear anywhere.
  1966 
  1967 A node is "outdated" if the parser has reparsed at least once after
  1968 the node was created.
  1969 
  1970 A node "has error" if itself is a syntax error or contains any syntax
  1971 errors.
  1972 
  1973 A node is "live" if its parser is not deleted and its buffer is
  1974 live.  */)
  1975   (Lisp_Object node, Lisp_Object property)
  1976 {
  1977   if (NILP (node)) return Qnil;
  1978   CHECK_TS_NODE (node);
  1979   CHECK_SYMBOL (property);
  1980   treesit_initialize ();
  1981 
  1982   TSNode treesit_node = XTS_NODE (node)->node;
  1983   bool result;
  1984 
  1985   if (EQ (property, Qoutdated))
  1986     return treesit_node_uptodate_p (node) ? Qnil : Qt;
  1987 
  1988   treesit_check_node (node);
  1989   if (EQ (property, Qnamed))
  1990     result = ts_node_is_named (treesit_node);
  1991   else if (EQ (property, Qmissing))
  1992     result = ts_node_is_missing (treesit_node);
  1993   else if (EQ (property, Qextra))
  1994     result = ts_node_is_extra (treesit_node);
  1995   else if (EQ (property, Qhas_error))
  1996     result = ts_node_has_error (treesit_node);
  1997   else if (EQ (property, Qlive))
  1998     result = treesit_parser_live_p (XTS_NODE (node)->parser);
  1999   else
  2000     signal_error ("Expecting `named', `missing', `extra', "
  2001                   "`outdated', `has-error', or `live', but got",
  2002                   property);
  2003   return result ? Qt : Qnil;
  2004 }
  2005 
  2006 DEFUN ("treesit-node-field-name-for-child",
  2007        Ftreesit_node_field_name_for_child,
  2008        Streesit_node_field_name_for_child, 2, 2, 0,
  2009        doc: /* Return the field name of the Nth child of NODE.
  2010 
  2011 Return nil if there's no Nth child, or if it has no field.
  2012 If NODE is nil, return nil.
  2013 
  2014 N counts all children, i.e., named ones and anonymous ones.
  2015 
  2016 N could be negative, e.g., -1 represents the last child.  */)
  2017   (Lisp_Object node, Lisp_Object n)
  2018 {
  2019   if (NILP (node))
  2020     return Qnil;
  2021   treesit_check_node (node);
  2022   CHECK_INTEGER (n);
  2023   EMACS_INT idx = XFIXNUM (n);
  2024   treesit_initialize ();
  2025 
  2026   TSNode treesit_node = XTS_NODE (node)->node;
  2027 
  2028   /* Process negative index.  */
  2029   if (idx < 0)
  2030     idx = ts_node_child_count (treesit_node) + idx;
  2031   if (idx < 0)
  2032     return Qnil;
  2033   if (idx > UINT32_MAX)
  2034     xsignal1 (Qargs_out_of_range, n);
  2035 
  2036   const char *name
  2037     = ts_node_field_name_for_child (treesit_node, (uint32_t) idx);
  2038 
  2039   if (name == NULL)
  2040     return Qnil;
  2041 
  2042   return build_string (name);
  2043 }
  2044 
  2045 DEFUN ("treesit-node-child-count",
  2046        Ftreesit_node_child_count,
  2047        Streesit_node_child_count, 1, 2, 0,
  2048        doc: /* Return the number of children of NODE.
  2049 
  2050 If NAMED is non-nil, count named children only.  NAMED defaults to
  2051 nil.  If NODE is nil, return nil.  */)
  2052   (Lisp_Object node, Lisp_Object named)
  2053 {
  2054   if (NILP (node))
  2055     return Qnil;
  2056   treesit_check_node (node);
  2057   treesit_initialize ();
  2058 
  2059   TSNode treesit_node = XTS_NODE (node)->node;
  2060   uint32_t count;
  2061   if (NILP (named))
  2062     count = ts_node_child_count (treesit_node);
  2063   else
  2064     count = ts_node_named_child_count (treesit_node);
  2065   return make_fixnum (count);
  2066 }
  2067 
  2068 DEFUN ("treesit-node-child-by-field-name",
  2069        Ftreesit_node_child_by_field_name,
  2070        Streesit_node_child_by_field_name, 2, 2, 0,
  2071        doc: /* Return the child of NODE with FIELD-NAME.
  2072 Return nil if there is no such child.  If NODE is nil, return nil.  */)
  2073   (Lisp_Object node, Lisp_Object field_name)
  2074 {
  2075   if (NILP (node))
  2076     return Qnil;
  2077   treesit_check_node (node);
  2078   CHECK_STRING (field_name);
  2079   treesit_initialize ();
  2080 
  2081   char *name_str = SSDATA (field_name);
  2082   TSNode treesit_node = XTS_NODE (node)->node;
  2083   TSNode child
  2084     = ts_node_child_by_field_name (treesit_node, name_str,
  2085                                    strlen (name_str));
  2086 
  2087   if (ts_node_is_null (child))
  2088     return Qnil;
  2089 
  2090   return make_treesit_node (XTS_NODE (node)->parser, child);
  2091 }
  2092 
  2093 DEFUN ("treesit-node-next-sibling",
  2094        Ftreesit_node_next_sibling,
  2095        Streesit_node_next_sibling, 1, 2, 0,
  2096        doc: /* Return the next sibling of NODE.
  2097 
  2098 Return nil if there is no next sibling.  If NAMED is non-nil, look for named
  2099 siblings only.  NAMED defaults to nil.  If NODE is nil, return nil.  */)
  2100   (Lisp_Object node, Lisp_Object named)
  2101 {
  2102   if (NILP (node)) return Qnil;
  2103   treesit_check_node (node);
  2104   treesit_initialize ();
  2105 
  2106   TSNode treesit_node = XTS_NODE (node)->node;
  2107   TSNode sibling;
  2108   if (NILP (named))
  2109     sibling = ts_node_next_sibling (treesit_node);
  2110   else
  2111     sibling = ts_node_next_named_sibling (treesit_node);
  2112 
  2113   if (ts_node_is_null (sibling))
  2114     return Qnil;
  2115 
  2116   return make_treesit_node (XTS_NODE (node)->parser, sibling);
  2117 }
  2118 
  2119 DEFUN ("treesit-node-prev-sibling",
  2120        Ftreesit_node_prev_sibling,
  2121        Streesit_node_prev_sibling, 1, 2, 0,
  2122        doc: /* Return the previous sibling of NODE.
  2123 
  2124 Return nil if there is no previous sibling.  If NAMED is non-nil, look
  2125 for named siblings only.  NAMED defaults to nil.  If NODE is nil,
  2126 return nil.  */)
  2127   (Lisp_Object node, Lisp_Object named)
  2128 {
  2129   if (NILP (node)) return Qnil;
  2130   treesit_check_node (node);
  2131   treesit_initialize ();
  2132 
  2133   TSNode treesit_node = XTS_NODE (node)->node;
  2134   TSNode sibling;
  2135 
  2136   if (NILP (named))
  2137     sibling = ts_node_prev_sibling (treesit_node);
  2138   else
  2139     sibling = ts_node_prev_named_sibling (treesit_node);
  2140 
  2141   if (ts_node_is_null (sibling))
  2142     return Qnil;
  2143 
  2144   return make_treesit_node (XTS_NODE (node)->parser, sibling);
  2145 }
  2146 
  2147 /* Our reimplementation of ts_node_first_child_for_byte.  The current
  2148    implementation of that function has problems (see bug#60127), so
  2149    before it's fixed upstream, we use our own reimplementation of it.
  2150    Return true if there is a valid sibling, return false otherwise.
  2151    If the return value is false, the position of the cursor is
  2152    undefined.  (We use cursor because technically we can't make a null
  2153    node for ourselves, also, using cursor is more convenient.)
  2154 
  2155    TODO: Remove this function once tree-sitter fixed the bug.  */
  2156 static bool treesit_cursor_first_child_for_byte
  2157 (TSTreeCursor *cursor, ptrdiff_t pos, bool named)
  2158 {
  2159   if (!ts_tree_cursor_goto_first_child (cursor))
  2160     return false;
  2161 
  2162   TSNode node = ts_tree_cursor_current_node (cursor);
  2163   while (ts_node_end_byte (node) <= pos)
  2164     {
  2165       if (ts_tree_cursor_goto_next_sibling (cursor))
  2166         node = ts_tree_cursor_current_node (cursor);
  2167       else
  2168         /* Reached the end and still can't find a valid sibling.  */
  2169         return false;
  2170     }
  2171   while (named && (!ts_node_is_named (node)))
  2172     {
  2173       if (ts_tree_cursor_goto_next_sibling (cursor))
  2174         node = ts_tree_cursor_current_node (cursor);
  2175       else
  2176         /* Reached the end and still can't find a named sibling.  */
  2177         return false;
  2178     }
  2179   return true;
  2180 }
  2181 
  2182 DEFUN ("treesit-node-first-child-for-pos",
  2183        Ftreesit_node_first_child_for_pos,
  2184        Streesit_node_first_child_for_pos, 2, 3, 0,
  2185        doc: /* Return the first child of NODE for buffer position POS.
  2186 
  2187 Specifically, return the first child that extends beyond POS.
  2188 Return nil if there is no such child.
  2189 If NAMED is non-nil, look for named children only.  NAMED defaults to nil.
  2190 Note that this function returns an immediate child, not the smallest
  2191 (grand)child.  If NODE is nil, return nil.  */)
  2192   (Lisp_Object node, Lisp_Object pos, Lisp_Object named)
  2193 {
  2194   if (NILP (node))
  2195     return Qnil;
  2196   treesit_check_node (node);
  2197 
  2198   struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  2199   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2200 
  2201   treesit_check_position (pos, buf);
  2202   treesit_initialize ();
  2203 
  2204   ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
  2205   TSNode treesit_node = XTS_NODE (node)->node;
  2206 
  2207   TSTreeCursor cursor = ts_tree_cursor_new (treesit_node);
  2208   ptrdiff_t treesit_pos = byte_pos - visible_beg;
  2209   bool success;
  2210   success = treesit_cursor_first_child_for_byte (&cursor, treesit_pos,
  2211                                                  !NILP (named));
  2212   TSNode child = ts_tree_cursor_current_node (&cursor);
  2213   ts_tree_cursor_delete (&cursor);
  2214 
  2215   if (!success)
  2216     return Qnil;
  2217   return make_treesit_node (XTS_NODE (node)->parser, child);
  2218 }
  2219 
  2220 DEFUN ("treesit-node-descendant-for-range",
  2221        Ftreesit_node_descendant_for_range,
  2222        Streesit_node_descendant_for_range, 3, 4, 0,
  2223        doc: /* Return the smallest node that covers buffer positions BEG to END.
  2224 
  2225 The returned node is a descendant of NODE.
  2226 Return nil if there is no such node.
  2227 If NAMED is non-nil, look for named child only.  NAMED defaults to nil.
  2228 If NODE is nil, return nil.  */)
  2229   (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named)
  2230 {
  2231   if (NILP (node)) return Qnil;
  2232   treesit_check_node (node);
  2233 
  2234   struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  2235   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2236 
  2237   treesit_check_position (beg, buf);
  2238   treesit_check_position (end, buf);
  2239 
  2240   treesit_initialize ();
  2241 
  2242   ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
  2243   ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
  2244   TSNode treesit_node = XTS_NODE (node)->node;
  2245   TSNode child;
  2246   if (NILP (named))
  2247     child = ts_node_descendant_for_byte_range (treesit_node, byte_beg - visible_beg,
  2248                                                byte_end - visible_beg);
  2249   else
  2250     child = ts_node_named_descendant_for_byte_range (treesit_node,
  2251                                                      byte_beg - visible_beg,
  2252                                                      byte_end - visible_beg);
  2253 
  2254   if (ts_node_is_null (child))
  2255     return Qnil;
  2256 
  2257   return make_treesit_node (XTS_NODE (node)->parser, child);
  2258 }
  2259 
  2260 /* Return true if NODE1 and NODE2 are the same node.  Assumes they are
  2261    TS_NODE type.  */
  2262 bool treesit_node_eq (Lisp_Object node1, Lisp_Object node2)
  2263 {
  2264   treesit_initialize ();
  2265   TSNode treesit_node_1 = XTS_NODE (node1)->node;
  2266   TSNode treesit_node_2 = XTS_NODE (node2)->node;
  2267   return ts_node_eq (treesit_node_1, treesit_node_2);
  2268 }
  2269 
  2270 DEFUN ("treesit-node-eq",
  2271        Ftreesit_node_eq,
  2272        Streesit_node_eq, 2, 2, 0,
  2273        doc: /* Return non-nil if NODE1 and NODE2 refer to the same node.
  2274 If any one of NODE1 and NODE2 is nil, return nil.
  2275 This function uses the same equivalence metric as `equal', and returns
  2276 non-nil if NODE1 and NODE2 refer to the same node in a syntax tree
  2277 produced by tree-sitter.  */)
  2278   (Lisp_Object node1, Lisp_Object node2)
  2279 {
  2280   if (NILP (node1) || NILP (node2))
  2281     return Qnil;
  2282   CHECK_TS_NODE (node1);
  2283   CHECK_TS_NODE (node2);
  2284 
  2285   bool same_node = treesit_node_eq (node1, node2);
  2286   return same_node ? Qt : Qnil;
  2287 }
  2288 
  2289 
  2290 /*** Query functions */
  2291 
  2292 DEFUN ("treesit-pattern-expand",
  2293        Ftreesit_pattern_expand,
  2294        Streesit_pattern_expand, 1, 1, 0,
  2295        doc: /* Expand PATTERN to its string form.
  2296 
  2297 PATTERN can be
  2298 
  2299     :anchor
  2300     :?
  2301     :*
  2302     :+
  2303     :equal
  2304     :match
  2305     (TYPE PATTERN...)
  2306     [PATTERN...]
  2307     FIELD-NAME:
  2308     @CAPTURE-NAME
  2309     (_)
  2310     _
  2311     \"TYPE\"
  2312 
  2313 See Info node `(elisp)Pattern Matching' for detailed explanation.  */)
  2314   (Lisp_Object pattern)
  2315 {
  2316   if (EQ (pattern, QCanchor))
  2317     return Vtreesit_str_dot;
  2318   if (EQ (pattern, intern_c_string (":?")))
  2319     return Vtreesit_str_question_mark;
  2320   if (EQ (pattern, intern_c_string (":*")))
  2321     return Vtreesit_str_star;
  2322   if (EQ (pattern, intern_c_string (":+")))
  2323     return Vtreesit_str_plus;
  2324   if (EQ (pattern, QCequal))
  2325     return Vtreesit_str_pound_equal;
  2326   if (EQ (pattern, QCmatch))
  2327     return Vtreesit_str_pound_match;
  2328   if (EQ (pattern, QCpred))
  2329     return Vtreesit_str_pound_pred;
  2330   Lisp_Object opening_delimeter
  2331     = VECTORP (pattern)
  2332       ? Vtreesit_str_open_bracket : Vtreesit_str_open_paren;
  2333   Lisp_Object closing_delimiter
  2334     = VECTORP (pattern)
  2335       ? Vtreesit_str_close_bracket : Vtreesit_str_close_paren;
  2336   if (VECTORP (pattern) || CONSP (pattern))
  2337     return concat3 (opening_delimeter,
  2338                     Fmapconcat (Qtreesit_pattern_expand,
  2339                                 pattern,
  2340                                 Vtreesit_str_space),
  2341                     closing_delimiter);
  2342   return Fprin1_to_string (pattern, Qnil, Qt);
  2343 }
  2344 
  2345 DEFUN ("treesit-query-expand",
  2346        Ftreesit_query_expand,
  2347        Streesit_query_expand, 1, 1, 0,
  2348        doc: /* Expand sexp QUERY to its string form.
  2349 
  2350 A PATTERN in QUERY can be
  2351 
  2352     :anchor
  2353     :?
  2354     :*
  2355     :+
  2356     :equal
  2357     :match
  2358     (TYPE PATTERN...)
  2359     [PATTERN...]
  2360     FIELD-NAME:
  2361     @CAPTURE-NAME
  2362     (_)
  2363     _
  2364     \"TYPE\"
  2365 
  2366 See Info node `(elisp)Pattern Matching' for detailed explanation.  */)
  2367   (Lisp_Object query)
  2368 {
  2369   return Fmapconcat (Qtreesit_pattern_expand, query, Vtreesit_str_space);
  2370 }
  2371 
  2372 /* This struct is used for passing captures to be check against
  2373    predicates.  Captures we check for are the ones in START before
  2374    END.  For example, if START and END are
  2375 
  2376    START       END
  2377     v              v
  2378    (1 . (2 . (3 . (4 . (5 . (6 . nil))))))
  2379 
  2380    We only look at captures 1 2 3.  */
  2381 struct capture_range
  2382 {
  2383   Lisp_Object start;
  2384   Lisp_Object end;
  2385 };
  2386 
  2387 /* Collect predicates for this match and return them in a list.  Each
  2388    predicate is a list of strings and symbols.  */
  2389 static Lisp_Object
  2390 treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index)
  2391 {
  2392   uint32_t len;
  2393   const TSQueryPredicateStep *predicate_list
  2394     = ts_query_predicates_for_pattern (query, pattern_index, &len);
  2395   Lisp_Object result = Qnil;
  2396   Lisp_Object predicate = Qnil;
  2397   for (int idx = 0; idx < len; idx++)
  2398     {
  2399       TSQueryPredicateStep step = predicate_list[idx];
  2400       switch (step.type)
  2401         {
  2402         case TSQueryPredicateStepTypeCapture:
  2403           {
  2404             uint32_t str_len;
  2405             const char *str = ts_query_capture_name_for_id (query,
  2406                                                             step.value_id,
  2407                                                             &str_len);
  2408             predicate = Fcons (intern_c_string_1 (str, str_len),
  2409                                predicate);
  2410             break;
  2411           }
  2412         case TSQueryPredicateStepTypeString:
  2413           {
  2414             uint32_t str_len;
  2415             const char *str = ts_query_string_value_for_id (query,
  2416                                                             step.value_id,
  2417                                                             &str_len);
  2418             predicate = Fcons (make_string (str, str_len), predicate);
  2419             break;
  2420           }
  2421         case TSQueryPredicateStepTypeDone:
  2422           result = Fcons (Fnreverse (predicate), result);
  2423           predicate = Qnil;
  2424           break;
  2425         }
  2426     }
  2427   return Fnreverse (result);
  2428 }
  2429 
  2430 /* Translate a capture NAME (symbol) to a node.
  2431    Signals treesit-query-error if such node is not captured.  */
  2432 static Lisp_Object
  2433 treesit_predicate_capture_name_to_node (Lisp_Object name,
  2434                                         struct capture_range captures)
  2435 {
  2436   Lisp_Object node = Qnil;
  2437   for (Lisp_Object tail = captures.start; !EQ (tail, captures.end);
  2438        tail = XCDR (tail))
  2439     {
  2440       if (EQ (XCAR (XCAR (tail)), name))
  2441         {
  2442           node = XCDR (XCAR (tail));
  2443           break;
  2444         }
  2445     }
  2446 
  2447   if (NILP (node))
  2448     xsignal3 (Qtreesit_query_error,
  2449               build_string ("Cannot find captured node"),
  2450               name, build_string ("A predicate can only refer"
  2451                                   " to captured nodes in the "
  2452                                   "same pattern"));
  2453   return node;
  2454 }
  2455 
  2456 /* Translate a capture NAME (symbol) to the text of the captured node.
  2457    Signals treesit-query-error if such node is not captured.  */
  2458 static Lisp_Object
  2459 treesit_predicate_capture_name_to_text (Lisp_Object name,
  2460                                         struct capture_range captures)
  2461 {
  2462   Lisp_Object node = treesit_predicate_capture_name_to_node (name, captures);
  2463 
  2464   struct buffer *old_buffer = current_buffer;
  2465   set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
  2466   Lisp_Object text = Fbuffer_substring (Ftreesit_node_start (node),
  2467                                         Ftreesit_node_end (node));
  2468   set_buffer_internal (old_buffer);
  2469   return text;
  2470 }
  2471 
  2472 /* Handles predicate (#equal A B).  Return true if A equals B; return
  2473    false otherwise.  A and B can be either string, or a capture name.
  2474    The capture name evaluates to the text its captured node spans in
  2475    the buffer.  */
  2476 static bool
  2477 treesit_predicate_equal (Lisp_Object args, struct capture_range captures)
  2478 {
  2479   if (XFIXNUM (Flength (args)) != 2)
  2480     xsignal2 (Qtreesit_query_error,
  2481               build_string ("Predicate `equal' requires "
  2482                             "two arguments but only given"),
  2483               Flength (args));
  2484 
  2485   Lisp_Object arg1 = XCAR (args);
  2486   Lisp_Object arg2 = XCAR (XCDR (args));
  2487   Lisp_Object text1 = (STRINGP (arg1)
  2488                        ? arg1
  2489                        : treesit_predicate_capture_name_to_text (arg1,
  2490                                                                  captures));
  2491   Lisp_Object text2 = (STRINGP (arg2)
  2492                        ? arg2
  2493                        : treesit_predicate_capture_name_to_text (arg2,
  2494                                                                  captures));
  2495 
  2496   return !NILP (Fstring_equal (text1, text2));
  2497 }
  2498 
  2499 /* Handles predicate (#match "regexp" @node).  Return true if "regexp"
  2500    matches the text spanned by @node; return false otherwise.  Matching
  2501    is case-sensitive.  */
  2502 static bool
  2503 treesit_predicate_match (Lisp_Object args, struct capture_range captures)
  2504 {
  2505   if (XFIXNUM (Flength (args)) != 2)
  2506     xsignal2 (Qtreesit_query_error,
  2507               build_string ("Predicate `match' requires two "
  2508                             "arguments but only given"),
  2509               Flength (args));
  2510 
  2511   Lisp_Object regexp = XCAR (args);
  2512   Lisp_Object capture_name = XCAR (XCDR (args));
  2513 
  2514   /* It's probably common to get the argument order backwards.  Catch
  2515      this mistake early and show helpful explanation, because Emacs
  2516      loves you.  (We put the regexp first because that's what
  2517      string-match does.)  */
  2518   if (!STRINGP (regexp))
  2519     xsignal1 (Qtreesit_query_error,
  2520               build_string ("The first argument to `match' should "
  2521                             "be a regexp string, not a capture name"));
  2522   if (!SYMBOLP (capture_name))
  2523     xsignal1 (Qtreesit_query_error,
  2524               build_string ("The second argument to `match' should "
  2525                             "be a capture name, not a string"));
  2526 
  2527   Lisp_Object node = treesit_predicate_capture_name_to_node (capture_name,
  2528                                                              captures);
  2529 
  2530   struct buffer *old_buffer = current_buffer;
  2531   struct buffer *buffer = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  2532   set_buffer_internal (buffer);
  2533 
  2534   TSNode treesit_node = XTS_NODE (node)->node;
  2535   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2536   uint32_t start_byte_offset = ts_node_start_byte (treesit_node);
  2537   uint32_t end_byte_offset = ts_node_end_byte (treesit_node);
  2538   ptrdiff_t start_byte = visible_beg + start_byte_offset;
  2539   ptrdiff_t end_byte = visible_beg + end_byte_offset;
  2540   ptrdiff_t start_pos = BYTE_TO_CHAR (start_byte);
  2541   ptrdiff_t end_pos = BYTE_TO_CHAR (end_byte);
  2542   ptrdiff_t old_begv = BEGV;
  2543   ptrdiff_t old_begv_byte = BEGV_BYTE;
  2544   ptrdiff_t old_zv = ZV;
  2545   ptrdiff_t old_zv_byte = ZV_BYTE;
  2546 
  2547   BEGV = start_pos;
  2548   BEGV_BYTE = start_byte;
  2549   ZV = end_pos;
  2550   ZV_BYTE = end_byte;
  2551 
  2552   ptrdiff_t val = search_buffer (regexp, start_pos, start_byte,
  2553                                  end_pos, end_byte, 1, 1, Qnil, Qnil, false);
  2554 
  2555   BEGV = old_begv;
  2556   BEGV_BYTE = old_begv_byte;
  2557   ZV = old_zv;
  2558   ZV_BYTE = old_zv_byte;
  2559 
  2560   set_buffer_internal (old_buffer);
  2561 
  2562   return (val > 0);
  2563 }
  2564 
  2565 /* Handles predicate (#pred FN ARG...).  Return true if FN returns
  2566    non-nil; return false otherwise.  The arity of FN must match the
  2567    number of ARGs  */
  2568 static bool
  2569 treesit_predicate_pred (Lisp_Object args, struct capture_range captures)
  2570 {
  2571   if (XFIXNUM (Flength (args)) < 2)
  2572     xsignal2 (Qtreesit_query_error,
  2573               build_string ("Predicate `pred' requires "
  2574                             "at least two arguments, "
  2575                             "but was only given"),
  2576               Flength (args));
  2577 
  2578   Lisp_Object fn = Fintern (XCAR (args), Qnil);
  2579   Lisp_Object nodes = Qnil;
  2580   Lisp_Object tail = XCDR (args);
  2581   FOR_EACH_TAIL (tail)
  2582     nodes = Fcons (treesit_predicate_capture_name_to_node (XCAR (tail),
  2583                                                            captures),
  2584                    nodes);
  2585   nodes = Fnreverse (nodes);
  2586 
  2587   return !NILP (CALLN (Fapply, fn, nodes));
  2588 }
  2589 
  2590 /* If all predicates in PREDICATES passes, return true; otherwise
  2591    return false.  */
  2592 static bool
  2593 treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates)
  2594 {
  2595   bool pass = true;
  2596   /* Evaluate each predicates.  */
  2597   for (Lisp_Object tail = predicates;
  2598        !NILP (tail); tail = XCDR (tail))
  2599     {
  2600       Lisp_Object predicate = XCAR (tail);
  2601       Lisp_Object fn = XCAR (predicate);
  2602       Lisp_Object args = XCDR (predicate);
  2603       if (!NILP (Fstring_equal (fn, Vtreesit_str_equal)))
  2604         pass &= treesit_predicate_equal (args, captures);
  2605       else if (!NILP (Fstring_equal (fn, Vtreesit_str_match)))
  2606         pass &= treesit_predicate_match (args, captures);
  2607       else if (!NILP (Fstring_equal (fn, Vtreesit_str_pred)))
  2608         pass &= treesit_predicate_pred (args, captures);
  2609       else
  2610         xsignal3 (Qtreesit_query_error,
  2611                   build_string ("Invalid predicate"),
  2612                   fn, build_string ("Currently Emacs only supports"
  2613                                     " equal, match, and pred"
  2614                                     " predicate"));
  2615     }
  2616   /* If all predicates passed, add captures to result list.  */
  2617   return pass;
  2618 }
  2619 
  2620 DEFUN ("treesit-query-compile",
  2621        Ftreesit_query_compile,
  2622        Streesit_query_compile, 2, 3, 0,
  2623        doc: /* Compile QUERY to a compiled query.
  2624 
  2625 Querying with a compiled query is much faster than an uncompiled one.
  2626 LANGUAGE is the language this query is for.
  2627 
  2628 If EAGER is non-nil, immediately load LANGUAGE and compile the query.
  2629 Otherwise defer the compilation until the query is first used.
  2630 
  2631 Signal `treesit-query-error' if QUERY is malformed or something else
  2632 goes wrong.  (This only happens if EAGER is non-nil.)
  2633 You can use `treesit-query-validate' to validate and debug a query.  */)
  2634   (Lisp_Object language, Lisp_Object query, Lisp_Object eager)
  2635 {
  2636   if (NILP (Ftreesit_query_p (query)))
  2637     wrong_type_argument (Qtreesit_query_p, query);
  2638   CHECK_SYMBOL (language);
  2639   if (TS_COMPILED_QUERY_P (query))
  2640     return query;
  2641 
  2642   treesit_initialize ();
  2643 
  2644   Lisp_Object lisp_query = make_treesit_query (query, language);
  2645 
  2646   /* Maybe actually compile.  */
  2647   if (NILP (eager))
  2648     return lisp_query;
  2649   else
  2650     {
  2651       Lisp_Object signal_symbol = Qnil;
  2652       Lisp_Object signal_data = Qnil;
  2653       TSQuery *treesit_query = treesit_ensure_query_compiled (lisp_query,
  2654                                                          &signal_symbol,
  2655                                                          &signal_data);
  2656 
  2657       if (treesit_query == NULL)
  2658         xsignal (signal_symbol, signal_data);
  2659 
  2660       return lisp_query;
  2661     }
  2662 }
  2663 
  2664 DEFUN ("treesit-query-capture",
  2665        Ftreesit_query_capture,
  2666        Streesit_query_capture, 2, 5, 0,
  2667        doc: /* Query NODE with patterns in QUERY.
  2668 
  2669 Return a list of (CAPTURE_NAME . NODE).  CAPTURE_NAME is the name
  2670 assigned to the node in PATTERN.  NODE is the captured node.
  2671 
  2672 QUERY is either a string query, a sexp query, or a compiled query.
  2673 See Info node `(elisp)Pattern Matching' for how to write a query in
  2674 either string or sexp form.  When using repeatedly, a compiled query
  2675 is much faster than a string or sexp one, so it is recommend to
  2676 compile your query if it will be used repeatedly.
  2677 
  2678 BEG and END, if both non-nil, specify the region of buffer positions
  2679 in which the query is executed.  Any matching node whose span overlaps
  2680 with the region between BEG and END are captured, it doesn't have to
  2681 be completely in the region.
  2682 
  2683 If NODE-ONLY is non-nil, return a list of nodes.
  2684 
  2685 Besides a node, NODE can also be a parser, in which case the root node
  2686 of that parser is used.
  2687 NODE can also be a language symbol, in which case the root node of a
  2688 parser for that language is used.  If such a parser doesn't exist, it
  2689 is created.
  2690 
  2691 Signal `treesit-query-error' if QUERY is malformed or something else
  2692 goes wrong.  You can use `treesit-query-validate' to validate and debug
  2693 the query.  */)
  2694   (Lisp_Object node, Lisp_Object query,
  2695    Lisp_Object beg, Lisp_Object end, Lisp_Object node_only)
  2696 {
  2697   if (!(TS_COMPILED_QUERY_P (query)
  2698         || CONSP (query) || STRINGP (query)))
  2699     wrong_type_argument (Qtreesit_query_p, query);
  2700 
  2701   treesit_initialize ();
  2702 
  2703   /* Resolve NODE into an actual node.  */
  2704   Lisp_Object lisp_node;
  2705   if (TS_NODEP (node))
  2706     {
  2707       treesit_check_node (node); /* Check if up-to-date.  */
  2708       lisp_node = node;
  2709     }
  2710   else if (TS_PARSERP (node))
  2711     {
  2712       treesit_check_parser (node); /* Check if deleted.  */
  2713       lisp_node = Ftreesit_parser_root_node (node);
  2714     }
  2715   else if (SYMBOLP (node))
  2716     {
  2717       Lisp_Object parser
  2718         = Ftreesit_parser_create (node, Fcurrent_buffer (), Qnil);
  2719       lisp_node = Ftreesit_parser_root_node (parser);
  2720     }
  2721   else
  2722     xsignal2 (Qwrong_type_argument,
  2723               list4 (Qor, Qtreesit_node_p, Qtreesit_parser_p, Qsymbolp),
  2724               node);
  2725 
  2726   /* Extract C values from Lisp objects.  */
  2727   TSNode treesit_node
  2728     = XTS_NODE (lisp_node)->node;
  2729   Lisp_Object lisp_parser
  2730     = XTS_NODE (lisp_node)->parser;
  2731   ptrdiff_t visible_beg
  2732     = XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
  2733   const TSLanguage *lang
  2734     = ts_parser_language (XTS_PARSER (lisp_parser)->parser);
  2735 
  2736   /* Check BEG and END.  */
  2737   struct buffer *buf = XBUFFER (XTS_PARSER (lisp_parser)->buffer);
  2738   if (!NILP (beg))
  2739     treesit_check_position (beg, buf);
  2740   if (!NILP (end))
  2741     treesit_check_position (end, buf);
  2742 
  2743   /* Initialize query objects.  At the end of this block, we should
  2744      have a working TSQuery and a TSQueryCursor.  */
  2745   TSQuery *treesit_query;
  2746   TSQueryCursor *cursor;
  2747   bool needs_to_free_query_and_cursor;
  2748   if (TS_COMPILED_QUERY_P (query))
  2749     {
  2750       Lisp_Object signal_symbol = Qnil;
  2751       Lisp_Object signal_data = Qnil;
  2752       treesit_query = treesit_ensure_query_compiled (query, &signal_symbol,
  2753                                                      &signal_data);
  2754       cursor = XTS_COMPILED_QUERY (query)->cursor;
  2755       /* We don't need to free ts_query and cursor because they
  2756          are stored in a lisp object, which is tracked by gc.  */
  2757       needs_to_free_query_and_cursor = false;
  2758       if (treesit_query == NULL)
  2759         xsignal (signal_symbol, signal_data);
  2760     }
  2761   else
  2762     {
  2763       /* Since query is not TS_COMPILED_QUERY, it can only be a string
  2764          or a cons.  */
  2765       if (CONSP (query))
  2766         query = Ftreesit_query_expand (query);
  2767       char *query_string = SSDATA (query);
  2768       uint32_t error_offset;
  2769       TSQueryError error_type;
  2770       treesit_query = ts_query_new (lang, query_string, strlen (query_string),
  2771                                     &error_offset, &error_type);
  2772       if (treesit_query == NULL)
  2773         xsignal (Qtreesit_query_error,
  2774                  treesit_compose_query_signal_data (error_offset,
  2775                                                     error_type, query));
  2776       cursor = ts_query_cursor_new ();
  2777       needs_to_free_query_and_cursor = true;
  2778     }
  2779 
  2780   /* WARN: After this point, free treesit_query and cursor before every
  2781      signal and return.  */
  2782 
  2783   /* Set query range.  */
  2784   if (!NILP (beg) && !NILP (end))
  2785     {
  2786       ptrdiff_t beg_byte = CHAR_TO_BYTE (XFIXNUM (beg));
  2787       ptrdiff_t end_byte = CHAR_TO_BYTE (XFIXNUM (end));
  2788       /* We never let tree-sitter run on buffers too large, so these
  2789          assertion should never hit.  */
  2790       eassert (beg_byte - visible_beg <= UINT32_MAX);
  2791       eassert (end_byte - visible_beg <= UINT32_MAX);
  2792       ts_query_cursor_set_byte_range (cursor,
  2793                                       (uint32_t) (beg_byte - visible_beg),
  2794                                       (uint32_t) (end_byte - visible_beg));
  2795     }
  2796 
  2797   /* Execute query.  */
  2798   ts_query_cursor_exec (cursor, treesit_query, treesit_node);
  2799   TSQueryMatch match;
  2800 
  2801   /* Go over each match, collect captures and predicates.  Include the
  2802      captures in the RESULT list unconditionally as we get them, then
  2803      test for predicates.  If predicates pass, then all good, if
  2804      predicates don't pass, revert the result back to the result
  2805      before this loop (PREV_RESULT).  (Predicates control the entire
  2806      match.) This way we don't need to create a list of captures in
  2807      every for loop and nconc it to RESULT every time.  That is indeed
  2808      the initial implementation in which Yoav found nconc being the
  2809      bottleneck (98.4% of the running time spent on nconc).  */
  2810   uint32_t patterns_count = ts_query_pattern_count (treesit_query);
  2811   Lisp_Object result = Qnil;
  2812   Lisp_Object prev_result = result;
  2813   Lisp_Object predicates_table = make_vector (patterns_count, Qt);
  2814   while (ts_query_cursor_next_match (cursor, &match))
  2815     {
  2816       /* Record the checkpoint that we may roll back to.  */
  2817       prev_result = result;
  2818       /* Get captured nodes.  */
  2819       const TSQueryCapture *captures = match.captures;
  2820       for (int idx = 0; idx < match.capture_count; idx++)
  2821         {
  2822           uint32_t capture_name_len;
  2823           TSQueryCapture capture = captures[idx];
  2824           Lisp_Object captured_node = make_treesit_node (lisp_parser,
  2825                                                          capture.node);
  2826 
  2827           Lisp_Object cap;
  2828           if (NILP (node_only))
  2829             {
  2830               const char *capture_name
  2831                 = ts_query_capture_name_for_id (treesit_query, capture.index,
  2832                                                 &capture_name_len);
  2833               cap = Fcons (intern_c_string_1 (capture_name, capture_name_len),
  2834                            captured_node);
  2835             }
  2836           else
  2837             cap = captured_node;
  2838 
  2839           result = Fcons (cap, result);
  2840         }
  2841       /* Get predicates.  */
  2842       Lisp_Object predicates = AREF (predicates_table, match.pattern_index);
  2843       if (EQ (predicates, Qt))
  2844         {
  2845           predicates = treesit_predicates_for_pattern (treesit_query,
  2846                                                        match.pattern_index);
  2847           ASET (predicates_table, match.pattern_index, predicates);
  2848         }
  2849 
  2850       /* captures_lisp = Fnreverse (captures_lisp); */
  2851       struct capture_range captures_range = { result, prev_result };
  2852       if (!treesit_eval_predicates (captures_range, predicates))
  2853         /* Predicates didn't pass, roll back.  */
  2854         result = prev_result;
  2855     }
  2856   if (needs_to_free_query_and_cursor)
  2857     {
  2858       ts_query_delete (treesit_query);
  2859       ts_query_cursor_delete (cursor);
  2860     }
  2861   return Fnreverse (result);
  2862 }
  2863 
  2864 
  2865 /*** Navigation */
  2866 
  2867 static inline void
  2868 treesit_assume_true (bool val)
  2869 {
  2870   eassert (val == true);
  2871 }
  2872 
  2873 /* Tries to move CURSOR to point to TARGET.  END_POS is the end of
  2874    TARGET.  If success, return true, otherwise move CURSOR back to
  2875    starting position and return false.  LIMIT is the recursion
  2876    limit.  */
  2877 static bool
  2878 treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target,
  2879                          uint32_t end_pos, ptrdiff_t limit)
  2880 {
  2881   if (limit <= 0)
  2882     return false;
  2883 
  2884   TSNode cursor_node = ts_tree_cursor_current_node (cursor);
  2885   if (ts_node_eq (cursor_node, *target))
  2886     return true;
  2887 
  2888   if (!ts_tree_cursor_goto_first_child (cursor))
  2889     return false;
  2890 
  2891   /* Skip nodes that definitely don't contain TARGET.  */
  2892   while (ts_node_end_byte (cursor_node) < end_pos)
  2893     {
  2894       if (!ts_tree_cursor_goto_next_sibling (cursor))
  2895         break;
  2896       cursor_node = ts_tree_cursor_current_node (cursor);
  2897     }
  2898 
  2899   /* Go through each sibling that could contain TARGET.  Because of
  2900      missing nodes (their width is 0), there could be multiple
  2901      siblings that could contain TARGET.  */
  2902   while (ts_node_start_byte (cursor_node) <= end_pos)
  2903     {
  2904       if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1))
  2905         return true;
  2906 
  2907       if (!ts_tree_cursor_goto_next_sibling (cursor))
  2908         break;
  2909       cursor_node = ts_tree_cursor_current_node (cursor);
  2910     }
  2911 
  2912   /* Couldn't find TARGET, must be not in this subtree, move cursor
  2913      back and pray that other brothers and sisters can succeed.  */
  2914   treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  2915   return false;
  2916 }
  2917 
  2918 /* Create a TSTreeCursor pointing at NODE.  PARSER is the lisp parser
  2919    that produced NODE.  If success, return true, otherwise return
  2920    false.  This function should almost always succeed, but if the parse
  2921    tree is strangely too deep and exceeds the recursion limit, this
  2922    function will fail and return false.
  2923 
  2924    If this function returns true, caller needs to free CURSOR; if
  2925    returns false, caller don't need to free CURSOR.
  2926 
  2927    The reason we need this instead of simply using ts_tree_cursor_new
  2928    is that we have to create the cursor on the root node and traverse
  2929    down to NODE, in order to record the correct stack of parent nodes.
  2930    Otherwise going to sibling or parent of NODE wouldn't work.
  2931 
  2932    (Wow perfect filling.)  */
  2933 static bool
  2934 treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser)
  2935 {
  2936   uint32_t end_pos = ts_node_end_byte (node);
  2937   TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree);
  2938   *cursor = ts_tree_cursor_new (root);
  2939   bool success = treesit_cursor_helper_1 (cursor, &node, end_pos,
  2940                                           treesit_recursion_limit);
  2941   if (!success)
  2942     ts_tree_cursor_delete (cursor);
  2943   return success;
  2944 }
  2945 
  2946 /* Move CURSOR to the next/previous sibling.  FORWARD controls the
  2947    direction.  NAMED controls the namedness.  If there is a valid
  2948    sibling, move CURSOR to it and return true, otherwise return false.
  2949    When false is returned, CURSOR points to a sibling node of the node
  2950    we started at, but exactly which is undefined.  */
  2951 static bool
  2952 treesit_traverse_sibling_helper (TSTreeCursor *cursor,
  2953                                  bool forward, bool named)
  2954 {
  2955   if (forward)
  2956     {
  2957       if (!named)
  2958         return ts_tree_cursor_goto_next_sibling (cursor);
  2959       /* Else named...  */
  2960       while (ts_tree_cursor_goto_next_sibling (cursor))
  2961         {
  2962           if (ts_node_is_named (ts_tree_cursor_current_node (cursor)))
  2963             return true;
  2964         }
  2965       return false;
  2966     }
  2967   else /* Backward.  */
  2968     {
  2969       /* Go to first child and go through each sibling, until we find
  2970          the one just before the starting node.  */
  2971       TSNode start = ts_tree_cursor_current_node (cursor);
  2972       if (!ts_tree_cursor_goto_parent (cursor))
  2973         return false;
  2974       treesit_assume_true (ts_tree_cursor_goto_first_child (cursor));
  2975 
  2976       /* Now CURSOR is at the first child.  If we started at the first
  2977          child, then there is no further siblings.  */
  2978       TSNode first_child = ts_tree_cursor_current_node (cursor);
  2979       if (ts_node_eq (first_child, start))
  2980         return false;
  2981 
  2982       /* PROBE is always DELTA siblings ahead of CURSOR. */
  2983       TSTreeCursor probe = ts_tree_cursor_copy (cursor);
  2984       /* This is position of PROBE minus position of CURSOR.  */
  2985       ptrdiff_t delta = 0;
  2986       TSNode probe_node;
  2987       TSNode cursor_node;
  2988       while (ts_tree_cursor_goto_next_sibling (&probe))
  2989         {
  2990           /* Move PROBE forward, if it equals to the starting node,
  2991              CURSOR points to the node we want (prev valid sibling of
  2992              the starting node).  */
  2993           delta++;
  2994           probe_node = ts_tree_cursor_current_node (&probe);
  2995 
  2996           /* PROBE matched, depending on NAMED, return true/false.  */
  2997           if (ts_node_eq (probe_node, start))
  2998             {
  2999               ts_tree_cursor_delete (&probe);
  3000               cursor_node = ts_tree_cursor_current_node (cursor);
  3001               ts_tree_cursor_delete (&probe);
  3002               return (!named || (named && ts_node_is_named (cursor_node)));
  3003             }
  3004 
  3005           /* PROBE didn't match, move CURSOR forward to PROBE's
  3006              position, but if we are looking for named nodes, only
  3007              move CURSOR to PROBE if PROBE is at a named node.  */
  3008           if (!named || (named && ts_node_is_named (probe_node)))
  3009             for (; delta > 0; delta--)
  3010               treesit_assume_true (ts_tree_cursor_goto_next_sibling (cursor));
  3011         }
  3012       ts_tree_cursor_delete (&probe);
  3013       return false;
  3014     }
  3015 }
  3016 
  3017 /* Move CURSOR to the first/last child.  FORWARD controls the
  3018    direction.  NAMED controls the namedness.  If there is a valid
  3019    child, move CURSOR to it and return true, otherwise don't move
  3020    CURSOR and return false.  */
  3021 static bool
  3022 treesit_traverse_child_helper (TSTreeCursor *cursor,
  3023                                bool forward, bool named)
  3024 {
  3025   if (forward)
  3026     {
  3027       if (!named)
  3028         return ts_tree_cursor_goto_first_child (cursor);
  3029       else
  3030         {
  3031           if (!ts_tree_cursor_goto_first_child (cursor))
  3032             return false;
  3033           /* After this point, if you return false, make sure to go
  3034              back to parent.  */
  3035           TSNode first_child = ts_tree_cursor_current_node (cursor);
  3036           if (ts_node_is_named (first_child))
  3037             return true;
  3038 
  3039           if (treesit_traverse_sibling_helper (cursor, true, true))
  3040             return true;
  3041           else
  3042             {
  3043               treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3044               return false;
  3045             }
  3046         }
  3047     }
  3048   else /* Backward.  */
  3049     {
  3050       if (!ts_tree_cursor_goto_first_child (cursor))
  3051         return false;
  3052       /* After this point, if you return false, make sure to go
  3053          back to parent.  */
  3054 
  3055       /* First go to the last child.  */
  3056       while (ts_tree_cursor_goto_next_sibling (cursor));
  3057 
  3058       if (!named)
  3059         return true;
  3060       /* Else named... */
  3061       if (treesit_traverse_sibling_helper(cursor, false, true))
  3062         return true;
  3063       else
  3064         {
  3065           treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3066           return false;
  3067         }
  3068     }
  3069 }
  3070 
  3071 /* Return true if the node at CURSOR matches PRED.  PRED can be a
  3072    string or a function.  This function assumes PRED is either a
  3073    string or a function.  If NAMED is true, also check that the node
  3074    is named.  */
  3075 static bool
  3076 treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
  3077                                   Lisp_Object parser, bool named)
  3078 {
  3079   TSNode node = ts_tree_cursor_current_node (cursor);
  3080   if (named && !ts_node_is_named (node))
  3081     return false;
  3082 
  3083   if (STRINGP (pred))
  3084     {
  3085       const char *type = ts_node_type (node);
  3086       return fast_c_string_match (pred, type, strlen (type)) >= 0;
  3087     }
  3088   else
  3089     {
  3090       Lisp_Object lisp_node = make_treesit_node (parser, node);
  3091       return !NILP (CALLN (Ffuncall, pred, lisp_node));
  3092     }
  3093 }
  3094 
  3095 /* Traverse the parse tree starting from CURSOR.  PRED can be a
  3096    function (takes a node and returns nil/non-nil), or a string
  3097    (treated as regexp matching the node's type, must be all single
  3098    byte characters).  If the node satisfies PRED, leave CURSOR on that
  3099    node and return true.  If no node satisfies PRED, move CURSOR back
  3100    to starting position and return false.
  3101 
  3102    LIMIT is the number of levels we descend in the tree.  FORWARD
  3103    controls the direction in which we traverse the tree, true means
  3104    forward, false backward.  If SKIP_ROOT is true, don't match ROOT.
  3105    */
  3106 static bool
  3107 treesit_search_dfs (TSTreeCursor *cursor,
  3108                     Lisp_Object pred, Lisp_Object parser,
  3109                     bool forward, bool named, ptrdiff_t limit,
  3110                     bool skip_root)
  3111 {
  3112   if (!skip_root
  3113       && treesit_traverse_match_predicate (cursor, pred, parser, named))
  3114     return true;
  3115 
  3116   if (limit == 0)
  3117     return false;
  3118 
  3119   if (!treesit_traverse_child_helper (cursor, forward, named))
  3120     return false;
  3121   /* After this point, if you return false, make sure to go back to
  3122      parent.  */
  3123 
  3124   do /* Iterate through each child.  */
  3125     {
  3126       if (treesit_search_dfs (cursor, pred, parser, forward,
  3127                               named, limit - 1, false))
  3128         return true;
  3129     }
  3130   while (treesit_traverse_sibling_helper (cursor, forward, false));
  3131 
  3132   /* No match in any child's subtree, go back to starting node.  */
  3133   treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3134   return false;
  3135 }
  3136 
  3137 /* Go through the whole tree linearly, leaf-first, starting from
  3138    START.  PRED, PARSER, NAMED, FORWARD are the same as in
  3139    ts_search_subtree.  If a match is found, leave CURSOR at that node,
  3140    and return true, if no match is found, return false, and CURSOR's
  3141    position is undefined.  */
  3142 static bool
  3143 treesit_search_forward (TSTreeCursor *cursor,
  3144                         Lisp_Object pred, Lisp_Object parser,
  3145                         bool forward, bool named)
  3146 {
  3147   /* We don't search for subtree and always search from the leaf
  3148      nodes.  This way repeated call of this function traverses each
  3149      node in the tree once and only once:
  3150 
  3151      (while node (setq node (treesit-search-forward node)))
  3152   */
  3153   bool initial = true;
  3154   while (true)
  3155     {
  3156       if (!initial /* We don't match the starting node.  */
  3157           && treesit_traverse_match_predicate (cursor, pred, parser, named))
  3158         return true;
  3159       initial = false;
  3160 
  3161       /* Try going to the next sibling, if there is no next sibling,
  3162          go to parent and try again.  */
  3163       while (!treesit_traverse_sibling_helper (cursor, forward, named))
  3164         {
  3165           /* There is no next sibling, go to parent.  */
  3166           if (!ts_tree_cursor_goto_parent (cursor))
  3167             return false;
  3168 
  3169           if (treesit_traverse_match_predicate (cursor, pred, parser, named))
  3170               return true;
  3171         }
  3172       /* We are at the next sibling, deep dive into the first leaf
  3173          node.  */
  3174       while (treesit_traverse_child_helper (cursor, forward, false));
  3175       /* At this point CURSOR is at a leaf node.  */
  3176     }
  3177 }
  3178 
  3179 /* Cleanup function for cursor.  */
  3180 static void
  3181 treesit_traverse_cleanup_cursor(void *cursor)
  3182 {
  3183   ts_tree_cursor_delete ((TSTreeCursor *) cursor);
  3184 }
  3185 
  3186 DEFUN ("treesit-search-subtree",
  3187        Ftreesit_search_subtree,
  3188        Streesit_search_subtree, 2, 5, 0,
  3189        doc: /* Traverse the parse tree of NODE depth-first using PREDICATE.
  3190 
  3191 Traverse the subtree of NODE, and match PREDICATE with each node along
  3192 the way.  PREDICATE is a regexp string that matches against each
  3193 node's type, or a function that takes a node and returns nil/non-nil.
  3194 
  3195 By default, only traverse named nodes, but if ALL is non-nil, traverse
  3196 all nodes.  If BACKWARD is non-nil, traverse backwards.  If DEPTH is
  3197 non-nil, only traverse nodes up to that number of levels down in the
  3198 tree.  If DEPTH is nil, default to 1000.
  3199 
  3200 Return the first matched node, or nil if none matches.  */)
  3201   (Lisp_Object node, Lisp_Object predicate, Lisp_Object backward,
  3202    Lisp_Object all, Lisp_Object depth)
  3203 {
  3204   CHECK_TS_NODE (node);
  3205   CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
  3206               list3 (Qor, Qstringp, Qfunctionp), predicate);
  3207   CHECK_SYMBOL (all);
  3208   CHECK_SYMBOL (backward);
  3209 
  3210   /* We use a default limit of 1000.  See bug#59426 for the
  3211      discussion.  */
  3212   ptrdiff_t the_limit = treesit_recursion_limit;
  3213   if (!NILP (depth))
  3214     {
  3215       CHECK_FIXNUM (depth);
  3216       the_limit = XFIXNUM (depth);
  3217     }
  3218 
  3219   treesit_initialize ();
  3220 
  3221   Lisp_Object parser = XTS_NODE (node)->parser;
  3222   Lisp_Object return_value = Qnil;
  3223   TSTreeCursor cursor;
  3224   if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
  3225     return return_value;
  3226 
  3227   specpdl_ref count = SPECPDL_INDEX ();
  3228   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3229 
  3230   if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
  3231                           NILP (all), the_limit, false))
  3232     {
  3233       TSNode node = ts_tree_cursor_current_node (&cursor);
  3234       return_value = make_treesit_node (parser, node);
  3235     }
  3236 
  3237   return unbind_to (count, return_value);
  3238 }
  3239 
  3240 DEFUN ("treesit-search-forward",
  3241        Ftreesit_search_forward,
  3242        Streesit_search_forward, 2, 4, 0,
  3243        doc: /* Search for node matching PREDICATE in the parse tree of START.
  3244 
  3245 Start traversing the tree from node START, and match PREDICATE with
  3246 each node (except START itself) along the way.  PREDICATE is a regexp
  3247 string that matches against each node's type, or a function that takes
  3248 a node and returns non-nil if it matches.
  3249 
  3250 By default, only search for named nodes, but if ALL is non-nil, search
  3251 for all nodes.  If BACKWARD is non-nil, search backwards.
  3252 
  3253 Return the first matched node, or nil if none matches.
  3254 
  3255 For a tree like below, where START is marked by S, traverse as
  3256 numbered from 1 to 12:
  3257 
  3258                 12
  3259                 |
  3260        S--------3----------11
  3261        |        |          |
  3262   o--o-+--o  1--+--2    6--+-----10
  3263   |  |                  |        |
  3264   o  o                +-+-+   +--+--+
  3265                       |   |   |  |  |
  3266                       4   5   7  8  9
  3267 
  3268 Note that this function doesn't traverse the subtree of START, and it
  3269 always traverse leaf nodes first, then upwards.  */)
  3270   (Lisp_Object start, Lisp_Object predicate, Lisp_Object backward,
  3271    Lisp_Object all)
  3272 {
  3273   CHECK_TS_NODE (start);
  3274   CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
  3275               list3 (Qor, Qstringp, Qfunctionp), predicate);
  3276   CHECK_SYMBOL (all);
  3277   CHECK_SYMBOL (backward);
  3278 
  3279   treesit_initialize ();
  3280 
  3281   Lisp_Object parser = XTS_NODE (start)->parser;
  3282   Lisp_Object return_value = Qnil;
  3283   TSTreeCursor cursor;
  3284   if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
  3285     return return_value;
  3286 
  3287   specpdl_ref count = SPECPDL_INDEX ();
  3288   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3289 
  3290   if (treesit_search_forward (&cursor, predicate, parser,
  3291                               NILP (backward), NILP (all)))
  3292     {
  3293       TSNode node = ts_tree_cursor_current_node (&cursor);
  3294       return_value = make_treesit_node (parser, node);
  3295     }
  3296 
  3297   return unbind_to (count, return_value);
  3298 }
  3299 
  3300 /* Recursively traverse the tree under CURSOR, and append the result
  3301    subtree to PARENT's cdr.  See more in Ftreesit_induce_sparse_tree.
  3302    Note that the top-level children list is reversed, because
  3303    reasons.  */
  3304 static void
  3305 treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
  3306                            Lisp_Object pred, Lisp_Object process_fn,
  3307                            ptrdiff_t limit, Lisp_Object parser)
  3308 {
  3309   bool match = treesit_traverse_match_predicate (cursor, pred, parser, false);
  3310   if (match)
  3311     {
  3312       /* If this node matches pred, add a new node to the parent's
  3313          children list.  */
  3314       TSNode node = ts_tree_cursor_current_node (cursor);
  3315       Lisp_Object lisp_node = make_treesit_node (parser, node);
  3316       if (!NILP (process_fn))
  3317         lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
  3318 
  3319       Lisp_Object this = Fcons (lisp_node, Qnil);
  3320       Fsetcdr (parent, Fcons (this, Fcdr (parent)));
  3321       /* Now for children nodes, this is the new parent.  */
  3322       parent = this;
  3323     }
  3324   /* Go through each child.  */
  3325   if (limit > 0 && ts_tree_cursor_goto_first_child (cursor))
  3326     {
  3327       do
  3328         {
  3329           /* Make sure not to use node after the recursive funcall.
  3330              Then C compilers should be smart enough not to copy NODE
  3331              to stack.  */
  3332           treesit_build_sparse_tree (cursor, parent, pred, process_fn,
  3333                                      limit - 1, parser);
  3334         }
  3335       while (ts_tree_cursor_goto_next_sibling (cursor));
  3336       /* Don't forget to come back to this node.  */
  3337       ts_tree_cursor_goto_parent (cursor);
  3338     }
  3339   /* Before we go, reverse children in the sparse tree.  */
  3340   if (match)
  3341     /* When match == true, "parent" is actually the node we added in
  3342        this layer (parent = this).  */
  3343     Fsetcdr (parent, Fnreverse (Fcdr (parent)));
  3344 }
  3345 
  3346 DEFUN ("treesit-induce-sparse-tree",
  3347        Ftreesit_induce_sparse_tree,
  3348        Streesit_induce_sparse_tree, 2, 4, 0,
  3349        doc: /* Create a sparse tree of ROOT's subtree.
  3350 
  3351 This takes the subtree under ROOT, and combs it so only the nodes
  3352 that match PREDICATE are left, like picking out grapes on the vine.
  3353 PREDICATE is a regexp string that matches against each node's type.
  3354 
  3355 For a subtree on the left that consist of both numbers and letters, if
  3356 PREDICATE is "is letter", the returned tree is the one on the right.
  3357 
  3358         a                 a              a
  3359         |                 |              |
  3360     +---+---+         +---+---+      +---+---+
  3361     |   |   |         |   |   |      |   |   |
  3362     b   1   2         b   |   |      b   c   d
  3363         |   |     =>      |   |  =>      |
  3364         c   +--+          c   +          e
  3365         |   |  |          |   |
  3366      +--+   d  4       +--+   d
  3367      |  |              |
  3368      e  5              e
  3369 
  3370 If PROCESS-FN is non-nil, it should be a function of one argument.  In
  3371 that case, instead of returning the matched nodes, pass each node to
  3372 PROCESS-FN, and use its return value instead.
  3373 
  3374 If non-nil, DEPTH is the number of levels to go down the tree from
  3375 ROOT.  If DEPTH is nil or omitted, it defaults to 1000.
  3376 
  3377 Each node in the returned tree looks like (NODE . (CHILD ...)).  The
  3378 root of this tree might be nil, if ROOT doesn't match PREDICATE.
  3379 
  3380 If no node matches PREDICATE, return nil.
  3381 
  3382 PREDICATE can also be a function that takes a node and returns
  3383 nil/non-nil, but it is slower and more memory consuming than using
  3384 a regexp.  */)
  3385   (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn,
  3386    Lisp_Object depth)
  3387 {
  3388   CHECK_TS_NODE (root);
  3389   CHECK_TYPE (STRINGP (predicate) || FUNCTIONP (predicate),
  3390               list3 (Qor, Qstringp, Qfunctionp), predicate);
  3391 
  3392   if (!NILP (process_fn))
  3393     CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
  3394 
  3395   /* We use a default limit of 1000.  See bug#59426 for the
  3396      discussion.  */
  3397   ptrdiff_t the_limit = treesit_recursion_limit;
  3398   if (!NILP (depth))
  3399     {
  3400       CHECK_FIXNUM (depth);
  3401       the_limit = XFIXNUM (depth);
  3402     }
  3403 
  3404   treesit_initialize ();
  3405 
  3406   Lisp_Object parser = XTS_NODE (root)->parser;
  3407   Lisp_Object parent = Fcons (Qnil, Qnil);
  3408   /* In this function we never traverse above NODE, so we don't need
  3409      to use treesit_cursor_helper.  */
  3410   TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
  3411 
  3412   specpdl_ref count = SPECPDL_INDEX ();
  3413   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3414 
  3415   treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
  3416                              the_limit, parser);
  3417 
  3418   unbind_to (count, Qnil);
  3419 
  3420   Fsetcdr (parent, Fnreverse (Fcdr (parent)));
  3421   if (NILP (Fcdr (parent)))
  3422     return Qnil;
  3423   else
  3424     return parent;
  3425 }
  3426 
  3427 DEFUN ("treesit-subtree-stat",
  3428        Ftreesit_subtree_stat,
  3429        Streesit_subtree_stat, 1, 1, 0,
  3430        doc: /* Return information about the subtree of NODE.
  3431 
  3432 Return a list (MAX-DEPTH MAX-WIDTH COUNT), where MAX-DEPTH is the
  3433 maximum depth of the subtree, MAX-WIDTH is the maximum number of
  3434 direct children of nodes in the subtree, and COUNT is the number of
  3435 nodes in the subtree, including NODE.  */)
  3436   (Lisp_Object node)
  3437 {
  3438   /* Having a limit on the depth to traverse doesn't have much impact
  3439      on the time it takes, so I left that out.  */
  3440   CHECK_TS_NODE (node);
  3441 
  3442   treesit_initialize ();
  3443 
  3444   TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (node)->node);
  3445   ptrdiff_t max_depth = 1;
  3446   ptrdiff_t max_width = 0;
  3447   ptrdiff_t count = 0;
  3448   ptrdiff_t current_depth = 0;
  3449 
  3450   /* Traverse the subtree depth-first.  */
  3451   while (true)
  3452     {
  3453       count++;
  3454 
  3455       /* Go down depth-first.  */
  3456       while (ts_tree_cursor_goto_first_child (&cursor))
  3457         {
  3458           current_depth++;
  3459           count++;
  3460           /* While we're at here, measure the number of siblings.  */
  3461           ptrdiff_t width_count = 1;
  3462           while (ts_tree_cursor_goto_next_sibling (&cursor))
  3463             width_count++;
  3464           max_width = max (max_width, width_count);
  3465           /* Go back to the first sibling.  */
  3466           treesit_assume_true (ts_tree_cursor_goto_parent (&cursor));
  3467           treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor));
  3468         }
  3469       max_depth = max (max_depth, current_depth);
  3470 
  3471       /* Go to next sibling.  If there is no next sibling, go to
  3472          parent's next sibling, and so on.  If there is no more
  3473          parent, we've traversed the whole subtree, stop.  */
  3474       while (!ts_tree_cursor_goto_next_sibling (&cursor))
  3475         {
  3476           if (ts_tree_cursor_goto_parent (&cursor))
  3477             current_depth--;
  3478           else
  3479             {
  3480               ts_tree_cursor_delete (&cursor);
  3481               return list3 (make_fixnum (max_depth),
  3482                             make_fixnum (max_width),
  3483                             make_fixnum (count));
  3484             }
  3485         }
  3486     }
  3487 }
  3488 
  3489 #endif  /* HAVE_TREE_SITTER */
  3490 
  3491 DEFUN ("treesit-available-p", Ftreesit_available_p,
  3492        Streesit_available_p, 0, 0, 0,
  3493        doc: /* Return non-nil if tree-sitter support is built-in and available.  */)
  3494   (void)
  3495 {
  3496 #if HAVE_TREE_SITTER
  3497   return load_tree_sitter_if_necessary (false) ? Qt : Qnil;
  3498 #else
  3499   return Qnil;
  3500 #endif
  3501 }
  3502 
  3503 
  3504 /*** Initialization */
  3505 
  3506 /* Initialize the tree-sitter routines.  */
  3507 void
  3508 syms_of_treesit (void)
  3509 {
  3510 #if HAVE_TREE_SITTER
  3511   DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
  3512   DEFSYM (Qtreesit_node_p, "treesit-node-p");
  3513   DEFSYM (Qtreesit_compiled_query_p, "treesit-compiled-query-p");
  3514   DEFSYM (Qtreesit_query_p, "treesit-query-p");
  3515   DEFSYM (Qnamed, "named");
  3516   DEFSYM (Qmissing, "missing");
  3517   DEFSYM (Qextra, "extra");
  3518   DEFSYM (Qoutdated, "outdated");
  3519   DEFSYM (Qhas_error, "has-error");
  3520   DEFSYM (Qlive, "live");
  3521 
  3522   DEFSYM (QCanchor, ":anchor");
  3523   DEFSYM (QCequal, ":equal");
  3524   DEFSYM (QCmatch, ":match");
  3525   DEFSYM (QCpred, ":pred");
  3526 
  3527   DEFSYM (Qnot_found, "not-found");
  3528   DEFSYM (Qsymbol_error, "symbol-error");
  3529   DEFSYM (Qversion_mismatch, "version-mismatch");
  3530 
  3531   DEFSYM (Qtreesit_error, "treesit-error");
  3532   DEFSYM (Qtreesit_query_error, "treesit-query-error");
  3533   DEFSYM (Qtreesit_parse_error, "treesit-parse-error");
  3534   DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid");
  3535   DEFSYM (Qtreesit_buffer_too_large,
  3536           "treesit-buffer-too-large");
  3537   DEFSYM (Qtreesit_load_language_error,
  3538           "treesit-load-language-error");
  3539   DEFSYM (Qtreesit_node_outdated,
  3540           "treesit-node-outdated");
  3541   DEFSYM (Quser_emacs_directory,
  3542           "user-emacs-directory");
  3543   DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
  3544   DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand");
  3545 
  3546   DEFSYM (Qor, "or");
  3547 
  3548 #ifdef WINDOWSNT
  3549   DEFSYM (Qtree_sitter, "tree-sitter");
  3550 #endif
  3551 
  3552   define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
  3553   define_error (Qtreesit_query_error, "Query pattern is malformed",
  3554                 Qtreesit_error);
  3555   /* Should be impossible, no need to document this error.  */
  3556   define_error (Qtreesit_parse_error, "Parse failed",
  3557                 Qtreesit_error);
  3558   define_error (Qtreesit_range_invalid,
  3559                 "RANGES are invalid: they have to be ordered and should not overlap",
  3560                 Qtreesit_error);
  3561   define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GiB)",
  3562                 Qtreesit_error);
  3563   define_error (Qtreesit_load_language_error,
  3564                 "Cannot load language definition",
  3565                 Qtreesit_error);
  3566   define_error (Qtreesit_node_outdated,
  3567                 "This node is outdated, please retrieve a new one",
  3568                 Qtreesit_error);
  3569   define_error (Qtreesit_parser_deleted,
  3570                 "This parser is deleted and cannot be used",
  3571                 Qtreesit_error);
  3572 
  3573   DEFVAR_LISP ("treesit-load-name-override-list",
  3574                Vtreesit_load_name_override_list,
  3575                doc:
  3576                /* An override list for unconventional tree-sitter libraries.
  3577 
  3578 By default, Emacs assumes the dynamic library for LANG is
  3579 libtree-sitter-LANG.EXT, where EXT is the OS specific extension for
  3580 dynamic libraries.  Emacs also assumes that the name of the C function
  3581 the library provides is tree_sitter_LANG.  If that is not the case,
  3582 you can add an entry
  3583 
  3584     (LANG LIBRARY-BASE-NAME FUNCTION-NAME)
  3585 
  3586 to this list, where LIBRARY-BASE-NAME is the filename of the dynamic
  3587 library without the file-name extension, and FUNCTION-NAME is the
  3588 function provided by the library.  */);
  3589   Vtreesit_load_name_override_list = Qnil;
  3590 
  3591   DEFVAR_LISP ("treesit-extra-load-path",
  3592                Vtreesit_extra_load_path,
  3593                doc:
  3594                /* Additional directories to look for tree-sitter language definitions.
  3595 The value should be a list of directories.
  3596 When trying to load a tree-sitter language definition,
  3597 Emacs first looks in the directories mentioned in this variable,
  3598 then in the `tree-sitter' subdirectory of `user-emacs-directory', and
  3599 then in the system default locations for dynamic libraries, in that order.  */);
  3600   Vtreesit_extra_load_path = Qnil;
  3601 
  3602   staticpro (&Vtreesit_str_libtree_sitter);
  3603   Vtreesit_str_libtree_sitter = build_pure_c_string ("libtree-sitter-");
  3604   staticpro (&Vtreesit_str_tree_sitter);
  3605   Vtreesit_str_tree_sitter = build_pure_c_string ("tree-sitter-");
  3606 #ifndef WINDOWSNT
  3607   staticpro (&Vtreesit_str_dot_0);
  3608   Vtreesit_str_dot_0 = build_pure_c_string (".0");
  3609 #endif
  3610   staticpro (&Vtreesit_str_dot);
  3611   Vtreesit_str_dot = build_pure_c_string (".");
  3612   staticpro (&Vtreesit_str_question_mark);
  3613   Vtreesit_str_question_mark = build_pure_c_string ("?");
  3614   staticpro (&Vtreesit_str_star);
  3615   Vtreesit_str_star = build_pure_c_string ("*");
  3616   staticpro (&Vtreesit_str_plus);
  3617   Vtreesit_str_plus = build_pure_c_string ("+");
  3618   staticpro (&Vtreesit_str_pound_equal);
  3619   Vtreesit_str_pound_equal = build_pure_c_string ("#equal");
  3620   staticpro (&Vtreesit_str_pound_match);
  3621   Vtreesit_str_pound_match = build_pure_c_string ("#match");
  3622   staticpro (&Vtreesit_str_pound_pred);
  3623   Vtreesit_str_pound_pred = build_pure_c_string ("#pred");
  3624   staticpro (&Vtreesit_str_open_bracket);
  3625   Vtreesit_str_open_bracket = build_pure_c_string ("[");
  3626   staticpro (&Vtreesit_str_close_bracket);
  3627   Vtreesit_str_close_bracket = build_pure_c_string ("]");
  3628   staticpro (&Vtreesit_str_open_paren);
  3629   Vtreesit_str_open_paren = build_pure_c_string ("(");
  3630   staticpro (&Vtreesit_str_close_paren);
  3631   Vtreesit_str_close_paren = build_pure_c_string (")");
  3632   staticpro (&Vtreesit_str_space);
  3633   Vtreesit_str_space = build_pure_c_string (" ");
  3634   staticpro (&Vtreesit_str_equal);
  3635   Vtreesit_str_equal = build_pure_c_string ("equal");
  3636   staticpro (&Vtreesit_str_match);
  3637   Vtreesit_str_match = build_pure_c_string ("match");
  3638   staticpro (&Vtreesit_str_pred);
  3639   Vtreesit_str_pred = build_pure_c_string ("pred");
  3640 
  3641   defsubr (&Streesit_language_available_p);
  3642   defsubr (&Streesit_library_abi_version);
  3643   defsubr (&Streesit_language_abi_version);
  3644 
  3645   defsubr (&Streesit_parser_p);
  3646   defsubr (&Streesit_node_p);
  3647   defsubr (&Streesit_compiled_query_p);
  3648   defsubr (&Streesit_query_p);
  3649   defsubr (&Streesit_query_language);
  3650 
  3651   defsubr (&Streesit_node_parser);
  3652 
  3653   defsubr (&Streesit_parser_create);
  3654   defsubr (&Streesit_parser_delete);
  3655   defsubr (&Streesit_parser_list);
  3656   defsubr (&Streesit_parser_buffer);
  3657   defsubr (&Streesit_parser_language);
  3658 
  3659   defsubr (&Streesit_parser_root_node);
  3660   /* defsubr (&Streesit_parse_string); */
  3661 
  3662   defsubr (&Streesit_parser_set_included_ranges);
  3663   defsubr (&Streesit_parser_included_ranges);
  3664 
  3665   defsubr (&Streesit_parser_notifiers);
  3666   defsubr (&Streesit_parser_add_notifier);
  3667   defsubr (&Streesit_parser_remove_notifier);
  3668 
  3669   defsubr (&Streesit_node_type);
  3670   defsubr (&Streesit_node_start);
  3671   defsubr (&Streesit_node_end);
  3672   defsubr (&Streesit_node_string);
  3673   defsubr (&Streesit_node_parent);
  3674   defsubr (&Streesit_node_child);
  3675   defsubr (&Streesit_node_check);
  3676   defsubr (&Streesit_node_field_name_for_child);
  3677   defsubr (&Streesit_node_child_count);
  3678   defsubr (&Streesit_node_child_by_field_name);
  3679   defsubr (&Streesit_node_next_sibling);
  3680   defsubr (&Streesit_node_prev_sibling);
  3681   defsubr (&Streesit_node_first_child_for_pos);
  3682   defsubr (&Streesit_node_descendant_for_range);
  3683   defsubr (&Streesit_node_eq);
  3684 
  3685   defsubr (&Streesit_pattern_expand);
  3686   defsubr (&Streesit_query_expand);
  3687   defsubr (&Streesit_query_compile);
  3688   defsubr (&Streesit_query_capture);
  3689 
  3690   defsubr (&Streesit_search_subtree);
  3691   defsubr (&Streesit_search_forward);
  3692   defsubr (&Streesit_induce_sparse_tree);
  3693   defsubr (&Streesit_subtree_stat);
  3694 #endif /* HAVE_TREE_SITTER */
  3695   defsubr (&Streesit_available_p);
  3696 }

/* [<][>][^][v][top][bottom][index][help] */