root/src/treesit.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init_treesit_functions
  2. load_tree_sitter_if_necessary
  3. treesit_calloc_wrapper
  4. treesit_initialize
  5. treesit_symbol_to_c_name
  6. treesit_find_override_name
  7. treesit_load_language_push_for_each_suffix
  8. treesit_load_language
  9. DEFUN
  10. DEFUN
  11. treesit_check_parser
  12. treesit_tree_edit_1
  13. treesit_record_change
  14. treesit_sync_visible_region
  15. treesit_check_buffer_size
  16. treesit_call_after_change_functions
  17. treesit_ensure_parsed
  18. treesit_read_buffer
  19. make_treesit_parser
  20. make_treesit_node
  21. make_treesit_query
  22. treesit_delete_parser
  23. treesit_delete_query
  24. treesit_named_node_p
  25. treesit_query_error_to_string
  26. treesit_compose_query_signal_data
  27. treesit_ensure_query_compiled
  28. DEFUN
  29. DEFUN
  30. DEFUN
  31. DEFUN
  32. DEFUN
  33. DEFUN
  34. DEFUN
  35. DEFUN
  36. DEFUN
  37. DEFUN
  38. treesit_parser_live_p
  39. DEFUN
  40. treesit_check_range_argument
  41. treesit_make_ranges
  42. DEFUN
  43. DEFUN
  44. treesit_check_positive_integer
  45. treesit_check_node
  46. treesit_check_position
  47. treesit_node_uptodate_p
  48. DEFUN
  49. DEFUN
  50. DEFUN
  51. DEFUN
  52. DEFUN
  53. treesit_node_eq
  54. treesit_query_string_string
  55. DEFUN
  56. DEFUN
  57. treesit_predicates_for_pattern
  58. treesit_predicate_capture_name_to_node
  59. treesit_predicate_capture_name_to_text
  60. treesit_predicate_equal
  61. treesit_predicate_match
  62. treesit_predicate_pred
  63. treesit_eval_predicates
  64. treesit_resolve_node
  65. treesit_initialize_query
  66. treesit_assume_true
  67. treesit_cursor_helper_1
  68. treesit_cursor_helper
  69. treesit_traverse_sibling_helper
  70. treesit_traverse_child_helper
  71. treesit_traverse_get_predicate
  72. treesit_traverse_validate_predicate
  73. treesit_traverse_match_predicate
  74. treesit_search_dfs
  75. treesit_search_forward
  76. treesit_traverse_cleanup_cursor
  77. treesit_build_sparse_tree
  78. DEFUN
  79. DEFUN
  80. syms_of_treesit

     1 /* Tree-sitter integration for GNU Emacs.
     2 
     3 Copyright (C) 2021-2023 Free Software Foundation, Inc.
     4 
     5 Maintainer: Yuan Fu <casouri@gmail.com>
     6 
     7 This file is part of GNU Emacs.
     8 
     9 GNU Emacs is free software: you can redistribute it and/or modify
    10 it under the terms of the GNU General Public License as published by
    11 the Free Software Foundation, either version 3 of the License, or (at
    12 your option) any later version.
    13 
    14 GNU Emacs is distributed in the hope that it will be useful,
    15 but WITHOUT ANY WARRANTY; without even the implied warranty of
    16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    17 GNU General Public License for more details.
    18 
    19 You should have received a copy of the GNU General Public License
    20 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    21 
    22 #include <config.h>
    23 #include "lisp.h"
    24 #include "buffer.h"
    25 
    26 #include "treesit.h"
    27 
    28 #if HAVE_TREE_SITTER
    29 
    30 
    31 /* Dynamic loading of libtree-sitter.  */
    32 
    33 #ifdef WINDOWSNT
    34 # include "w32common.h"
    35 
    36 /* In alphabetical order.  */
    37 #undef ts_language_version
    38 #undef ts_node_child
    39 #undef ts_node_child_by_field_name
    40 #undef ts_node_child_count
    41 #undef ts_node_descendant_for_byte_range
    42 #undef ts_node_end_byte
    43 #undef ts_node_eq
    44 #undef ts_node_field_name_for_child
    45 #undef ts_node_has_error
    46 #undef ts_node_is_extra
    47 #undef ts_node_is_missing
    48 #undef ts_node_is_named
    49 #undef ts_node_is_null
    50 #undef ts_node_named_child
    51 #undef ts_node_named_child_count
    52 #undef ts_node_named_descendant_for_byte_range
    53 #undef ts_node_next_named_sibling
    54 #undef ts_node_next_sibling
    55 #undef ts_node_prev_named_sibling
    56 #undef ts_node_prev_sibling
    57 #undef ts_node_start_byte
    58 #undef ts_node_string
    59 #undef ts_node_type
    60 #undef ts_parser_delete
    61 #undef ts_parser_included_ranges
    62 #undef ts_parser_language
    63 #undef ts_parser_new
    64 #undef ts_parser_parse
    65 #undef ts_parser_set_included_ranges
    66 #undef ts_parser_set_language
    67 #undef ts_query_capture_name_for_id
    68 #undef ts_query_cursor_delete
    69 #undef ts_query_cursor_exec
    70 #undef ts_query_cursor_new
    71 #undef ts_query_cursor_next_match
    72 #undef ts_query_cursor_set_byte_range
    73 #undef ts_query_delete
    74 #undef ts_query_new
    75 #undef ts_query_pattern_count
    76 #undef ts_query_predicates_for_pattern
    77 #undef ts_query_string_value_for_id
    78 #undef ts_set_allocator
    79 #undef ts_tree_cursor_copy
    80 #undef ts_tree_cursor_current_node
    81 #undef ts_tree_cursor_delete
    82 #undef ts_tree_cursor_goto_first_child
    83 #undef ts_tree_cursor_goto_next_sibling
    84 #undef ts_tree_cursor_goto_parent
    85 #undef ts_tree_cursor_new
    86 #undef ts_tree_delete
    87 #undef ts_tree_edit
    88 #undef ts_tree_get_changed_ranges
    89 #undef ts_tree_root_node
    90 
    91 DEF_DLL_FN (uint32_t, ts_language_version, (const TSLanguage *));
    92 DEF_DLL_FN (TSNode, ts_node_child, (TSNode, uint32_t));
    93 DEF_DLL_FN (TSNode, ts_node_child_by_field_name,
    94             (TSNode, const char *, uint32_t));
    95 DEF_DLL_FN (uint32_t, ts_node_child_count, (TSNode));
    96 DEF_DLL_FN (TSNode, ts_node_descendant_for_byte_range,
    97             (TSNode, uint32_t, uint32_t));
    98 DEF_DLL_FN (uint32_t, ts_node_end_byte, (TSNode));
    99 DEF_DLL_FN (bool, ts_node_eq, (TSNode, TSNode));
   100 DEF_DLL_FN (const char *, ts_node_field_name_for_child, (TSNode, uint32_t));
   101 DEF_DLL_FN (bool, ts_node_has_error, (TSNode));
   102 DEF_DLL_FN (bool, ts_node_is_extra, (TSNode));
   103 DEF_DLL_FN (bool, ts_node_is_missing, (TSNode));
   104 DEF_DLL_FN (bool, ts_node_is_named, (TSNode));
   105 DEF_DLL_FN (bool, ts_node_is_null, (TSNode));
   106 DEF_DLL_FN (TSNode, ts_node_named_child, (TSNode, uint32_t));
   107 DEF_DLL_FN (uint32_t, ts_node_named_child_count, (TSNode));
   108 DEF_DLL_FN (TSNode, ts_node_named_descendant_for_byte_range,
   109             (TSNode, uint32_t, uint32_t));
   110 DEF_DLL_FN (TSNode, ts_node_next_named_sibling, (TSNode));
   111 DEF_DLL_FN (TSNode, ts_node_next_sibling, (TSNode));
   112 DEF_DLL_FN (TSNode, ts_node_prev_named_sibling, (TSNode));
   113 DEF_DLL_FN (TSNode, ts_node_prev_sibling, (TSNode));
   114 DEF_DLL_FN (uint32_t, ts_node_start_byte, (TSNode));
   115 DEF_DLL_FN (char *, ts_node_string, (TSNode));
   116 DEF_DLL_FN (const char *, ts_node_type, (TSNode));
   117 DEF_DLL_FN (void, ts_parser_delete, (TSParser *));
   118 DEF_DLL_FN (const TSRange *, ts_parser_included_ranges,
   119             (const TSParser *, uint32_t *));
   120 DEF_DLL_FN (const TSLanguage *, ts_parser_language, (const TSParser *));
   121 DEF_DLL_FN (TSParser *, ts_parser_new, (void));
   122 DEF_DLL_FN (TSTree *, ts_parser_parse, (TSParser *, const TSTree *, TSInput));
   123 DEF_DLL_FN (bool, ts_parser_set_included_ranges,
   124             (TSParser *, const TSRange *, uint32_t));
   125 DEF_DLL_FN (bool, ts_parser_set_language, (TSParser *, const TSLanguage *));
   126 DEF_DLL_FN (const char *, ts_query_capture_name_for_id,
   127             (const TSQuery *, uint32_t, uint32_t *));
   128 DEF_DLL_FN (void, ts_query_cursor_delete, (TSQueryCursor *));
   129 DEF_DLL_FN (void, ts_query_cursor_exec,
   130             (TSQueryCursor *, const TSQuery *, TSNode));
   131 DEF_DLL_FN (TSQueryCursor *, ts_query_cursor_new, (void));
   132 DEF_DLL_FN (bool, ts_query_cursor_next_match,
   133             (TSQueryCursor *, TSQueryMatch *));
   134 DEF_DLL_FN (void, ts_query_cursor_set_byte_range,
   135             (TSQueryCursor *, uint32_t, uint32_t));
   136 DEF_DLL_FN (void, ts_query_delete, (TSQuery *));
   137 DEF_DLL_FN (TSQuery *, ts_query_new,
   138             (const TSLanguage *, const char *, uint32_t, uint32_t *, TSQueryError *));
   139 DEF_DLL_FN (uint32_t, ts_query_pattern_count, (const TSQuery *));
   140 DEF_DLL_FN (const TSQueryPredicateStep *, ts_query_predicates_for_pattern,
   141             ( const TSQuery *, uint32_t, uint32_t *));
   142 DEF_DLL_FN (const char *, ts_query_string_value_for_id,
   143             (const TSQuery *, uint32_t, uint32_t *));
   144 DEF_DLL_FN (void, ts_set_allocator,
   145             (void *(*)(size_t), void *(*)(size_t, size_t), void *(*)(void *, size_t), void (*)(void *)));
   146 DEF_DLL_FN (TSTreeCursor, ts_tree_cursor_copy, (const TSTreeCursor *));
   147 DEF_DLL_FN (TSNode, ts_tree_cursor_current_node, (const TSTreeCursor *));
   148 DEF_DLL_FN (void, ts_tree_cursor_delete, (const TSTreeCursor *));
   149 DEF_DLL_FN (bool, ts_tree_cursor_goto_first_child, (TSTreeCursor *));
   150 DEF_DLL_FN (bool, ts_tree_cursor_goto_next_sibling, (TSTreeCursor *));
   151 DEF_DLL_FN (bool, ts_tree_cursor_goto_parent, (TSTreeCursor *));
   152 DEF_DLL_FN (TSTreeCursor, ts_tree_cursor_new, (TSNode));
   153 DEF_DLL_FN (void, ts_tree_delete, (TSTree *));
   154 DEF_DLL_FN (void, ts_tree_edit, (TSTree *, const TSInputEdit *));
   155 DEF_DLL_FN (TSRange *, ts_tree_get_changed_ranges,
   156             (const TSTree *, const TSTree *, uint32_t *));
   157 DEF_DLL_FN (TSNode, ts_tree_root_node, (const TSTree *));
   158 
   159 static bool
   160 init_treesit_functions (void)
   161 {
   162   HMODULE library = w32_delayed_load (Qtree_sitter);
   163 
   164   if (!library)
   165     return false;
   166 
   167   LOAD_DLL_FN (library, ts_language_version);
   168   LOAD_DLL_FN (library, ts_node_child);
   169   LOAD_DLL_FN (library, ts_node_child_by_field_name);
   170   LOAD_DLL_FN (library, ts_node_child_count);
   171   LOAD_DLL_FN (library, ts_node_descendant_for_byte_range);
   172   LOAD_DLL_FN (library, ts_node_end_byte);
   173   LOAD_DLL_FN (library, ts_node_eq);
   174   LOAD_DLL_FN (library, ts_node_field_name_for_child);
   175   LOAD_DLL_FN (library, ts_node_has_error);
   176   LOAD_DLL_FN (library, ts_node_is_extra);
   177   LOAD_DLL_FN (library, ts_node_is_missing);
   178   LOAD_DLL_FN (library, ts_node_is_named);
   179   LOAD_DLL_FN (library, ts_node_is_null);
   180   LOAD_DLL_FN (library, ts_node_named_child);
   181   LOAD_DLL_FN (library, ts_node_named_child_count);
   182   LOAD_DLL_FN (library, ts_node_named_descendant_for_byte_range);
   183   LOAD_DLL_FN (library, ts_node_next_named_sibling);
   184   LOAD_DLL_FN (library, ts_node_next_sibling);
   185   LOAD_DLL_FN (library, ts_node_prev_named_sibling);
   186   LOAD_DLL_FN (library, ts_node_prev_sibling);
   187   LOAD_DLL_FN (library, ts_node_start_byte);
   188   LOAD_DLL_FN (library, ts_node_string);
   189   LOAD_DLL_FN (library, ts_node_type);
   190   LOAD_DLL_FN (library, ts_parser_delete);
   191   LOAD_DLL_FN (library, ts_parser_included_ranges);
   192   LOAD_DLL_FN (library, ts_parser_language);
   193   LOAD_DLL_FN (library, ts_parser_new);
   194   LOAD_DLL_FN (library, ts_parser_parse);
   195   LOAD_DLL_FN (library, ts_parser_set_included_ranges);
   196   LOAD_DLL_FN (library, ts_parser_set_language);
   197   LOAD_DLL_FN (library, ts_query_capture_name_for_id);
   198   LOAD_DLL_FN (library, ts_query_cursor_delete);
   199   LOAD_DLL_FN (library, ts_query_cursor_exec);
   200   LOAD_DLL_FN (library, ts_query_cursor_new);
   201   LOAD_DLL_FN (library, ts_query_cursor_next_match);
   202   LOAD_DLL_FN (library, ts_query_cursor_set_byte_range);
   203   LOAD_DLL_FN (library, ts_query_delete);
   204   LOAD_DLL_FN (library, ts_query_new);
   205   LOAD_DLL_FN (library, ts_query_pattern_count);
   206   LOAD_DLL_FN (library, ts_query_predicates_for_pattern);
   207   LOAD_DLL_FN (library, ts_query_string_value_for_id);
   208   LOAD_DLL_FN (library, ts_set_allocator);
   209   LOAD_DLL_FN (library, ts_tree_cursor_copy);
   210   LOAD_DLL_FN (library, ts_tree_cursor_current_node);
   211   LOAD_DLL_FN (library, ts_tree_cursor_delete);
   212   LOAD_DLL_FN (library, ts_tree_cursor_goto_first_child);
   213   LOAD_DLL_FN (library, ts_tree_cursor_goto_next_sibling);
   214   LOAD_DLL_FN (library, ts_tree_cursor_goto_parent);
   215   LOAD_DLL_FN (library, ts_tree_cursor_new);
   216   LOAD_DLL_FN (library, ts_tree_delete);
   217   LOAD_DLL_FN (library, ts_tree_edit);
   218   LOAD_DLL_FN (library, ts_tree_get_changed_ranges);
   219   LOAD_DLL_FN (library, ts_tree_root_node);
   220 
   221   return true;
   222 }
   223 
   224 #define ts_language_version fn_ts_language_version
   225 #define ts_node_child fn_ts_node_child
   226 #define ts_node_child_by_field_name fn_ts_node_child_by_field_name
   227 #define ts_node_child_count fn_ts_node_child_count
   228 #define ts_node_descendant_for_byte_range fn_ts_node_descendant_for_byte_range
   229 #define ts_node_end_byte fn_ts_node_end_byte
   230 #define ts_node_eq fn_ts_node_eq
   231 #define ts_node_field_name_for_child fn_ts_node_field_name_for_child
   232 #define ts_node_has_error fn_ts_node_has_error
   233 #define ts_node_is_extra fn_ts_node_is_extra
   234 #define ts_node_is_missing fn_ts_node_is_missing
   235 #define ts_node_is_named fn_ts_node_is_named
   236 #define ts_node_is_null fn_ts_node_is_null
   237 #define ts_node_named_child fn_ts_node_named_child
   238 #define ts_node_named_child_count fn_ts_node_named_child_count
   239 #define ts_node_named_descendant_for_byte_range fn_ts_node_named_descendant_for_byte_range
   240 #define ts_node_next_named_sibling fn_ts_node_next_named_sibling
   241 #define ts_node_next_sibling fn_ts_node_next_sibling
   242 #define ts_node_prev_named_sibling fn_ts_node_prev_named_sibling
   243 #define ts_node_prev_sibling fn_ts_node_prev_sibling
   244 #define ts_node_start_byte fn_ts_node_start_byte
   245 #define ts_node_string fn_ts_node_string
   246 #define ts_node_type fn_ts_node_type
   247 #define ts_parser_delete fn_ts_parser_delete
   248 #define ts_parser_included_ranges fn_ts_parser_included_ranges
   249 #define ts_parser_language fn_ts_parser_language
   250 #define ts_parser_new fn_ts_parser_new
   251 #define ts_parser_parse fn_ts_parser_parse
   252 #define ts_parser_set_included_ranges fn_ts_parser_set_included_ranges
   253 #define ts_parser_set_language fn_ts_parser_set_language
   254 #define ts_query_capture_name_for_id fn_ts_query_capture_name_for_id
   255 #define ts_query_cursor_delete fn_ts_query_cursor_delete
   256 #define ts_query_cursor_exec fn_ts_query_cursor_exec
   257 #define ts_query_cursor_new fn_ts_query_cursor_new
   258 #define ts_query_cursor_next_match fn_ts_query_cursor_next_match
   259 #define ts_query_cursor_set_byte_range fn_ts_query_cursor_set_byte_range
   260 #define ts_query_delete fn_ts_query_delete
   261 #define ts_query_new fn_ts_query_new
   262 #define ts_query_pattern_count fn_ts_query_pattern_count
   263 #define ts_query_predicates_for_pattern fn_ts_query_predicates_for_pattern
   264 #define ts_query_string_value_for_id fn_ts_query_string_value_for_id
   265 #define ts_set_allocator fn_ts_set_allocator
   266 #define ts_tree_cursor_copy fn_ts_tree_cursor_copy
   267 #define ts_tree_cursor_current_node fn_ts_tree_cursor_current_node
   268 #define ts_tree_cursor_delete fn_ts_tree_cursor_delete
   269 #define ts_tree_cursor_goto_first_child fn_ts_tree_cursor_goto_first_child
   270 #define ts_tree_cursor_goto_next_sibling fn_ts_tree_cursor_goto_next_sibling
   271 #define ts_tree_cursor_goto_parent fn_ts_tree_cursor_goto_parent
   272 #define ts_tree_cursor_new fn_ts_tree_cursor_new
   273 #define ts_tree_delete fn_ts_tree_delete
   274 #define ts_tree_edit fn_ts_tree_edit
   275 #define ts_tree_get_changed_ranges fn_ts_tree_get_changed_ranges
   276 #define ts_tree_root_node fn_ts_tree_root_node
   277 
   278 #endif  /* WINDOWSNT */
   279 
   280 
   281 /* Commentary
   282 
   283    The Emacs wrapper of tree-sitter does not expose everything the C
   284    API provides, most notably:
   285 
   286    - It doesn't expose a syntax tree.  The syntax tree is part of the
   287      parser object, and updating the tree is handled on the C level.
   288 
   289    - It doesn't expose the tree cursor, either.  Presumably, Lisp is
   290      slow enough to make insignificant any performance advantages from
   291      using the cursor.  Not exposing the cursor also minimizes the
   292      number of new types this adds to Emacs Lisp; currently, this adds
   293      only the parser, node, and compiled query types.
   294 
   295    - Because updating the change is handled on the C level as each
   296      change is made in the buffer, there is no way for Lisp to update
   297      a node.  But since we can just retrieve a new node, it shouldn't
   298      be a limitation.
   299 
   300    - I didn't expose setting timeout and cancellation flag for a
   301      parser, mainly because I don't think they are really necessary
   302      in Emacs's use cases.
   303 
   304    - Many tree-sitter functions take a TSPoint, which is basically a
   305      row and column.  Emacs uses a gap buffer and does not keep
   306      information about the row and column position of a buffer.
   307      According to the author of tree-sitter, those functions only take
   308      a TSPoint so that it can be moved alongside the byte position and
   309      returned to the caller afterwards, and the position actually used
   310      is the specified byte position.  He also said that he _thinks_
   311      that just passing a byte position will also work.  As a result, a
   312      dummy value is used in place of each TSPoint.  Judging by the
   313      nature of parsing algorithms, I think it is safe to use only the
   314      byte position, and I don't think this will change in the future.
   315 
   316      See: https://github.com/tree-sitter/tree-sitter/issues/445
   317 
   318    treesit.h has some commentary on the two main data structure for
   319    the parser and node.  treesit_sync_visible_region has some
   320    commentary on how we make tree-sitter play well with narrowing (the
   321    tree-sitter parser only sees the visible region, so we need to
   322    translate positions back and forth).  Most action happens in
   323    treesit_ensure_parsed, treesit_read_buffer and
   324    treesit_record_change.
   325 
   326    A complete correspondence list between tree-sitter functions and
   327    exposed Lisp functions can be found in the manual node (elisp)API
   328    Correspondence.
   329 
   330    Placement of CHECK_xxx functions: call CHECK_xxx before using any
   331    unchecked Lisp values; these include arguments of Lisp functions,
   332    the return value of Fsymbol_value, and that of Fcar or Fcdr on
   333    user-specified conses.
   334 
   335    Initializing tree-sitter: there are two entry points to tree-sitter
   336    functions: 'treesit-parser-create' and
   337    'treesit-language-available-p'.  Technically we only need to call
   338    initialization function in those two functions, but in reality we
   339    check at the beginning of every Lisp function.  That should be more
   340    fool-proof.
   341 
   342    Tree-sitter offset (0-based) and buffer position (1-based):
   343      tree-sitter offset + buffer position = buffer position
   344      buffer position - buffer position = tree-sitter offset
   345 
   346    Tree-sitter-related code in other files:
   347    - src/alloc.c for gc for parser and node
   348    - src/casefiddle.c & src/insdel.c for notifying tree-sitter
   349      parser of buffer changes.
   350    - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
   351      node type.
   352    - print.c for printing tree-sitter objects (node, parser, query).
   353 
   354    Regarding signals: only raise signals in Lisp functions.
   355 
   356    Casts from EMACS_INT and ptrdiff_t to uint32_t: We install checks
   357    for buffer size and range and thus able to assume these casts never
   358    overflow.
   359 
   360    We don't parse at every keystroke.  Instead we only record the
   361    changes at each keystroke, and only parse when requested.  It is
   362    possible that lazy parsing is worse: instead of dispersed little
   363    pauses, now you have less frequent but larger pauses.  I doubt
   364    there will be any perceived difference, as the lazy parsing is
   365    going to be pretty frequent anyway.  Also this (lazy parsing) is
   366    what the mailing list guys wanted.
   367 
   368    Because it is pretty slow (comparing to other tree-sitter
   369    operations) for tree-sitter to parse the query and produce a query
   370    object, it is very wasteful to reparse the query every time
   371    treesit-query-capture is called, and it completely kills the
   372    performance of querying in a loop for a moderate amount of times
   373    (hundreds of queries takes seconds rather than milliseconds to
   374    complete).  Therefore we want some caching.  We can either use a
   375    search.c style transparent caching, or simply expose a new type,
   376    compiled-ts-query and let the user to manually compile AOT.  I
   377    believe AOT compiling gives users more control, makes the
   378    performance stable and easy to understand (compiled -> fast,
   379    uncompiled -> slow), and avoids some edge cases transparent cache
   380    could have (see below).  So I implemented the AOT compilation.
   381 
   382    Problems a transparent cache could have: Suppose we store cache
   383    entries in a fixed-length linked-list, and compare with EQ.  1)
   384    One-off query could kick out useful cache.  2) if the user messed
   385    up and the query doesn't EQ to the cache anymore, the performance
   386    mysteriously drops.  3) what if a user uses so many stuff that the
   387    default cache size (20) is not enough and we end up thrashing?
   388    These are all imaginary scenarios but they are not impossible
   389    :-)
   390 
   391    Parsers in indirect buffers: We make indirect buffers to share the
   392    parser of its base buffer.  Indirect buffers and their base buffer
   393    share the same buffer content but not other buffer attributes.  If
   394    they have separate parser lists, changes made in an indirect buffer
   395    will only update parsers of that indirect buffer, and not parsers
   396    in the base buffer or other indirect buffers, and vice versa.  We
   397    could keep track of all the base and indirect buffers, and update
   398    all of their parsers, but ultimately decide to take a simpler
   399    approach, which is to make indirect buffers share their base
   400    buffer's parser list.  The discussion can be found in bug#59693.  */
   401 
   402 
   403 /*** Initialization */
   404 
   405 static Lisp_Object Vtreesit_str_libtree_sitter;
   406 static Lisp_Object Vtreesit_str_tree_sitter;
   407 #ifndef WINDOWSNT
   408 static Lisp_Object Vtreesit_str_dot_0;
   409 #endif
   410 static Lisp_Object Vtreesit_str_dot;
   411 static Lisp_Object Vtreesit_str_question_mark;
   412 static Lisp_Object Vtreesit_str_star;
   413 static Lisp_Object Vtreesit_str_plus;
   414 static Lisp_Object Vtreesit_str_pound_equal;
   415 static Lisp_Object Vtreesit_str_pound_match;
   416 static Lisp_Object Vtreesit_str_pound_pred;
   417 static Lisp_Object Vtreesit_str_open_bracket;
   418 static Lisp_Object Vtreesit_str_close_bracket;
   419 static Lisp_Object Vtreesit_str_open_paren;
   420 static Lisp_Object Vtreesit_str_close_paren;
   421 static Lisp_Object Vtreesit_str_space;
   422 static Lisp_Object Vtreesit_str_equal;
   423 static Lisp_Object Vtreesit_str_match;
   424 static Lisp_Object Vtreesit_str_pred;
   425 
   426 /* This is the limit on recursion levels for some tree-sitter
   427    functions.  Remember to update docstrings when changing this value.
   428 
   429    If we think of programs and AST, it is very rare for any program to
   430    have a very deep AST. For example, you would need 1000+ levels of
   431    nested if-statements, or a struct somehow nested for 1000+ levels.
   432    It’s hard for me to imagine any hand-written or machine generated
   433    program to be like that.  So I think 1000 is already generous.  If
   434    we look at xdisp.c, its AST only have 30 levels.  */
   435 #define TREESIT_RECURSION_LIMIT 1000
   436 
   437 static bool treesit_initialized = false;
   438 
   439 static bool
   440 load_tree_sitter_if_necessary (bool required)
   441 {
   442 #ifdef WINDOWSNT
   443   static bool tried_to_initialize_once;
   444   static bool tree_sitter_initialized;
   445 
   446   if (!tried_to_initialize_once)
   447     {
   448       Lisp_Object status;
   449 
   450       tried_to_initialize_once = true;
   451       tree_sitter_initialized = init_treesit_functions ();
   452       status = tree_sitter_initialized ? Qt : Qnil;
   453       Vlibrary_cache = Fcons (Fcons (Qtree_sitter, status), Vlibrary_cache);
   454     }
   455 
   456   if (required && !tree_sitter_initialized)
   457     xsignal1 (Qtreesit_error,
   458               build_string ("tree-sitter library not found or failed to load"));
   459 
   460   return tree_sitter_initialized;
   461 #else
   462   return true;
   463 #endif
   464 }
   465 
   466 static void *
   467 treesit_calloc_wrapper (size_t n, size_t size)
   468 {
   469   return xzalloc (n * size);
   470 }
   471 
   472 static void
   473 treesit_initialize (void)
   474 {
   475   if (!treesit_initialized)
   476     {
   477       load_tree_sitter_if_necessary (true);
   478       ts_set_allocator (xmalloc, treesit_calloc_wrapper, xrealloc, xfree);
   479       treesit_initialized = true;
   480     }
   481 }
   482 
   483 
   484 /*** Loading language library */
   485 
   486 /* Translates a symbol treesit-<lang> to a C name
   487    treesit_<lang>.  */
   488 static void
   489 treesit_symbol_to_c_name (char *symbol_name)
   490 {
   491   size_t len = strlen (symbol_name);
   492   for (int idx = 0; idx < len; idx++)
   493     {
   494       if (symbol_name[idx] == '-')
   495         symbol_name[idx] = '_';
   496     }
   497 }
   498 
   499 /* Find the override name for LANGUAGE_SYMBOL in
   500    treesit-load-name-override-list.  Set NAME and C_SYMBOL to the
   501    override name, and return true if there exists one, otherwise
   502    return false.
   503 
   504    This function may signal if treesit-load-name-override-list is
   505    malformed.  */
   506 static bool
   507 treesit_find_override_name (Lisp_Object language_symbol, Lisp_Object *name,
   508                             Lisp_Object *c_symbol)
   509 {
   510   CHECK_LIST (Vtreesit_load_name_override_list);
   511   Lisp_Object tail = Vtreesit_load_name_override_list;
   512 
   513   FOR_EACH_TAIL (tail)
   514     {
   515       Lisp_Object entry = XCAR (tail);
   516       CHECK_LIST (entry);
   517       Lisp_Object lang = XCAR (entry);
   518       CHECK_SYMBOL (lang);
   519 
   520       if (EQ (lang, language_symbol))
   521         {
   522           *name = Fnth (make_fixnum (1), entry);
   523           CHECK_STRING (*name);
   524           *c_symbol = Fnth (make_fixnum (2), entry);
   525           CHECK_STRING (*c_symbol);
   526 
   527           return true;
   528         }
   529     }
   530 
   531   CHECK_LIST_END (tail, Vtreesit_load_name_override_list);
   532 
   533   return false;
   534 }
   535 
   536 /* For example, if Vdynamic_library_suffixes is (".so", ".dylib"),
   537    this function pushes "lib_base_name.so" and "lib_base_name.dylib"
   538    into *path_candidates.  Obviously path_candidates should be a Lisp
   539    list of Lisp strings.  */
   540 static void
   541 treesit_load_language_push_for_each_suffix (Lisp_Object lib_base_name,
   542                                             Lisp_Object *path_candidates)
   543 {
   544   Lisp_Object suffixes;
   545 
   546   suffixes = Vdynamic_library_suffixes;
   547 
   548   FOR_EACH_TAIL (suffixes)
   549     {
   550       Lisp_Object candidate1 = concat2 (lib_base_name, XCAR (suffixes));
   551 #ifndef WINDOWSNT
   552       /* On Posix hosts, support libraries named with ABI version
   553          numbers.  In the foreseeable future we only need to support
   554          version 0.0.  For more details, see
   555          https://lists.gnu.org/archive/html/emacs-devel/2023-04/msg00386.html.  */
   556       Lisp_Object candidate2 = concat2 (candidate1, Vtreesit_str_dot_0);
   557       Lisp_Object candidate3 = concat2 (candidate2, Vtreesit_str_dot_0);
   558 
   559       *path_candidates = Fcons (candidate3, *path_candidates);
   560       *path_candidates = Fcons (candidate2, *path_candidates);
   561 #endif
   562       *path_candidates = Fcons (candidate1, *path_candidates);
   563     }
   564 }
   565 
   566 /* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer
   567    to the language definition.
   568 
   569    If error occurs, return NULL and fill SIGNAL_SYMBOL and SIGNAL_DATA
   570    with values suitable for xsignal.  */
   571 static TSLanguage *
   572 treesit_load_language (Lisp_Object language_symbol,
   573                        Lisp_Object *signal_symbol, Lisp_Object *signal_data)
   574 {
   575   Lisp_Object symbol_name = Fsymbol_name (language_symbol);
   576 
   577   CHECK_LIST (Vtreesit_extra_load_path);
   578 
   579   /* Figure out the library name and C name.  */
   580   Lisp_Object lib_base_name
   581     = concat2 (Vtreesit_str_libtree_sitter, symbol_name);
   582   Lisp_Object base_name
   583     = concat2 (Vtreesit_str_tree_sitter, symbol_name);
   584 
   585   /* Override the library name and C name, if appropriate.  */
   586   Lisp_Object override_name;
   587   Lisp_Object override_c_name;
   588   bool found_override = treesit_find_override_name (language_symbol,
   589                                                     &override_name,
   590                                                     &override_c_name);
   591   if (found_override)
   592     lib_base_name = override_name;
   593 
   594   /* Now we generate a list of possible library paths.  */
   595   Lisp_Object path_candidates = Qnil;
   596   /* First push just the filenames to the candidate list, which will
   597      make dynlib_open look under standard system load paths.  */
   598   treesit_load_language_push_for_each_suffix (lib_base_name, &path_candidates);
   599   /* This is used for reporting errors (i.e., just filenames).  */
   600   Lisp_Object base_candidates = path_candidates;
   601   /* Then push ~/.emacs.d/tree-sitter paths.  */
   602   Lisp_Object lib_name
   603     = Fexpand_file_name (concat2 (build_string ("tree-sitter/"), lib_base_name),
   604                          Fsymbol_value (Quser_emacs_directory));
   605   treesit_load_language_push_for_each_suffix (lib_name, &path_candidates);
   606   /* Then push paths from treesit-extra-load-path.  */
   607   Lisp_Object tail;
   608 
   609   tail = Freverse (Vtreesit_extra_load_path);
   610 
   611   FOR_EACH_TAIL (tail)
   612     {
   613       Lisp_Object expanded_lib = Fexpand_file_name (lib_base_name, XCAR (tail));
   614       treesit_load_language_push_for_each_suffix (expanded_lib,
   615                                                   &path_candidates);
   616     }
   617 
   618   /* Try loading the dynamic library by each path candidate.  Stop
   619      when succeed, record the error message and try the next one when
   620      fail.  */
   621   dynlib_handle_ptr handle;
   622   const char *error;
   623 
   624   tail = path_candidates;
   625   error = NULL;
   626   handle = NULL;
   627 
   628   FOR_EACH_TAIL (tail)
   629     {
   630       char *library_name = SSDATA (XCAR (tail));
   631       dynlib_error ();
   632       handle = dynlib_open (library_name);
   633       error = dynlib_error ();
   634       if (error == NULL)
   635         break;
   636     }
   637 
   638   if (error != NULL)
   639     {
   640       *signal_symbol = Qtreesit_load_language_error;
   641       *signal_data = list3 (Qnot_found, base_candidates,
   642                             build_string ("No such file or directory"));
   643       return NULL;
   644     }
   645 
   646   /* Load TSLanguage.  */
   647   eassume (handle != NULL);
   648   dynlib_error ();
   649   TSLanguage *(*langfn) (void);
   650   char *c_name;
   651   if (found_override)
   652     c_name = xstrdup (SSDATA (override_c_name));
   653   else
   654     {
   655       c_name = xstrdup (SSDATA (base_name));
   656       treesit_symbol_to_c_name (c_name);
   657     }
   658   langfn = dynlib_sym (handle, c_name);
   659   xfree (c_name);
   660   error = dynlib_error ();
   661   if (error != NULL)
   662     {
   663       *signal_symbol = Qtreesit_load_language_error;
   664       *signal_data = list2 (Qsymbol_error, build_string (error));
   665       return NULL;
   666     }
   667   TSLanguage *lang = (*langfn) ();
   668 
   669   /* Check if language version matches tree-sitter version.  */
   670   TSParser *parser = ts_parser_new ();
   671   bool success = ts_parser_set_language (parser, lang);
   672   ts_parser_delete (parser);
   673   if (!success)
   674     {
   675       *signal_symbol = Qtreesit_load_language_error;
   676       *signal_data = list2 (Qversion_mismatch,
   677                             make_fixnum (ts_language_version (lang)));
   678       return NULL;
   679     }
   680   return lang;
   681 }
   682 
   683 DEFUN ("treesit-language-available-p", Ftreesit_language_available_p,
   684        Streesit_language_available_p,
   685        1, 2, 0,
   686        doc: /* Return non-nil if LANGUAGE exists and is loadable.
   687 
   688 If DETAIL is non-nil, return (t . nil) when LANGUAGE is available,
   689 (nil . DATA) when unavailable.  DATA is the signal data of
   690 `treesit-load-language-error'.  */)
   691   (Lisp_Object language, Lisp_Object detail)
   692 {
   693   CHECK_SYMBOL (language);
   694   treesit_initialize ();
   695   Lisp_Object signal_symbol = Qnil;
   696   Lisp_Object signal_data = Qnil;
   697   if (treesit_load_language (language, &signal_symbol, &signal_data) == NULL)
   698     {
   699       if (NILP (detail))
   700         return Qnil;
   701       else
   702         return Fcons (Qnil, signal_data);
   703     }
   704   else
   705     {
   706       if (NILP (detail))
   707         return Qt;
   708       else
   709         return Fcons (Qt, Qnil);
   710     }
   711 }
   712 
   713 DEFUN ("treesit-library-abi-version", Ftreesit_library_abi_version,
   714        Streesit_library_abi_version,
   715        0, 1, 0,
   716        doc: /* Return the language ABI version of the tree-sitter library.
   717 
   718 By default, report the latest ABI version supported by the library for
   719 loading language support modules.  The library is backward-compatible
   720 with language modules which use older ABI versions; if MIN-COMPATIBLE
   721 is non-nil, return the oldest compatible ABI version.  */)
   722   (Lisp_Object min_compatible)
   723 {
   724   if (NILP (min_compatible))
   725     return make_fixnum (TREE_SITTER_LANGUAGE_VERSION);
   726   else
   727     return make_fixnum (TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION);
   728 }
   729 
   730 DEFUN ("treesit-language-abi-version", Ftreesit_language_abi_version,
   731        Streesit_language_abi_version,
   732        0, 1, 0,
   733        doc: /* Return the ABI version of the tree-sitter grammar for LANGUAGE.
   734 Return nil if a grammar library for LANGUAGE is not available.  */)
   735   (Lisp_Object language)
   736 {
   737   if (NILP (Ftreesit_language_available_p (language, Qnil)))
   738     return Qnil;
   739   else
   740     {
   741       Lisp_Object signal_symbol = Qnil;
   742       Lisp_Object signal_data = Qnil;
   743       TSLanguage *ts_language = treesit_load_language (language,
   744                                                        &signal_symbol,
   745                                                        &signal_data);
   746       if (ts_language == NULL)
   747         return Qnil;
   748       uint32_t version =  ts_language_version (ts_language);
   749       return make_fixnum((ptrdiff_t) version);
   750     }
   751 }
   752 
   753 
   754 /*** Parsing functions */
   755 
   756 static void
   757 treesit_check_parser (Lisp_Object obj)
   758 {
   759   CHECK_TS_PARSER (obj);
   760   if (XTS_PARSER (obj)->deleted)
   761     xsignal1 (Qtreesit_parser_deleted, obj);
   762 }
   763 
   764 /* An auxiliary function that saves a few lines of code.  Assumes TREE
   765    is not NULL.  START_BYTE, OLD_END_BYTE, NEW_END_BYTE must not be
   766    larger than UINT32_MAX.  */
   767 static inline void
   768 treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
   769                      ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
   770 {
   771   eassert (start_byte >= 0);
   772   eassert (start_byte <= old_end_byte);
   773   eassert (start_byte <= new_end_byte);
   774   TSPoint dummy_point = {0, 0};
   775   eassert (start_byte <= UINT32_MAX);
   776   eassert (old_end_byte <= UINT32_MAX);
   777   eassert (new_end_byte <= UINT32_MAX);
   778   TSInputEdit edit = {(uint32_t) start_byte,
   779                       (uint32_t) old_end_byte,
   780                       (uint32_t) new_end_byte,
   781                       dummy_point, dummy_point, dummy_point};
   782   ts_tree_edit (tree, &edit);
   783 }
   784 
   785 /* Update each parser's tree after the user made an edit.  This
   786    function does not parse the buffer and only updates the tree, so it
   787    should be very fast.  */
   788 void
   789 treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
   790                        ptrdiff_t new_end_byte)
   791 {
   792   struct buffer *base_buffer = current_buffer;
   793   if (current_buffer->base_buffer)
   794     base_buffer = current_buffer->base_buffer;
   795   Lisp_Object parser_list = BVAR (base_buffer, ts_parser_list);
   796 
   797   FOR_EACH_TAIL_SAFE (parser_list)
   798     {
   799       CHECK_CONS (parser_list);
   800       Lisp_Object lisp_parser = XCAR (parser_list);
   801       treesit_check_parser (lisp_parser);
   802       TSTree *tree = XTS_PARSER (lisp_parser)->tree;
   803       /* See comment (ref:visible-beg-null) if you wonder why we don't
   804          update visible_beg/end when tree is NULL.  */
   805 
   806       if (tree != NULL)
   807         {
   808           eassert (start_byte <= old_end_byte);
   809           eassert (start_byte <= new_end_byte);
   810           /* Think the recorded change as a delete followed by an
   811              insert, and think of them as moving unchanged text back
   812              and forth.  After all, the whole point of updating the
   813              tree is to update the position of unchanged text.  */
   814           ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
   815           ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
   816           eassert (visible_beg >= 0);
   817           eassert (visible_beg <= visible_end);
   818 
   819           /* AFFECTED_START/OLD_END/NEW_END are (0-based) offsets from
   820              VISIBLE_BEG.  min(visi_end, max(visi_beg, value)) clips
   821              value into [visi_beg, visi_end], and subtracting visi_beg
   822              gives the offset from visi_beg.  */
   823           ptrdiff_t start_offset = (min (visible_end,
   824                                          max (visible_beg, start_byte))
   825                                     - visible_beg);
   826           ptrdiff_t old_end_offset = (min (visible_end,
   827                                            max (visible_beg, old_end_byte))
   828                                       - visible_beg);
   829           /* We don't clip new_end_offset under visible_end, because
   830              otherwise we would miss updating the clipped part.  Plus,
   831              when inserting in narrowed region, the narrowed region
   832              will grow to accommodate the new text, so this is the
   833              correct behavior.  (Bug#61369).  */
   834           ptrdiff_t new_end_offset = (max (visible_beg, new_end_byte)
   835                                       - visible_beg);
   836           eassert (start_offset <= old_end_offset);
   837           eassert (start_offset <= new_end_offset);
   838 
   839           treesit_tree_edit_1 (tree, start_offset, old_end_offset,
   840                                new_end_offset);
   841           XTS_PARSER (lisp_parser)->need_reparse = true;
   842           XTS_PARSER (lisp_parser)->timestamp++;
   843 
   844           /* VISIBLE_BEG/END records tree-sitter's range of view in
   845              the buffer.  We need to adjust them when tree-sitter's
   846              view changes.  */
   847           ptrdiff_t visi_beg_delta;
   848           if (old_end_byte > new_end_byte)
   849             /* Move backward.  */
   850             visi_beg_delta = (min (visible_beg, new_end_byte)
   851                               - min (visible_beg, old_end_byte));
   852           else
   853             /* Move forward.  */
   854             visi_beg_delta = (old_end_byte < visible_beg
   855                               ? new_end_byte - old_end_byte : 0);
   856 
   857           XTS_PARSER (lisp_parser)->visible_beg = visible_beg + visi_beg_delta;
   858           XTS_PARSER (lisp_parser)->visible_end = (visible_end
   859                                                    + visi_beg_delta
   860                                                    + (new_end_offset
   861                                                       - old_end_offset));
   862 
   863           eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0);
   864           eassert (XTS_PARSER (lisp_parser)->visible_beg
   865                    <= XTS_PARSER (lisp_parser)->visible_end);
   866         }
   867     }
   868 }
   869 
   870 /* Comment (ref:visible-beg-null) The purpose of visible_beg/end is to
   871    keep track of "which part of the buffer does the tree-sitter tree
   872    see", in order to update the tree correctly.  Visible_beg/end have
   873    two purposes: they "clip" buffer changes within them, and they
   874    translate positions in the buffer to positions in the tree
   875    (buffer position - visible_beg = tree position).
   876 
   877    Conceptually, visible_beg/end hold the visible region of the buffer
   878    when we last reparsed.  In-between two reparses, we don't really
   879    care if the visible region of the buffer changes.
   880 
   881    Right before we reparse, we make tree-sitter's visible region
   882    match that of the buffer, and update visible_beg/end.
   883 
   884    That is, the whole purpose of visible_beg/end (and also of
   885    treesit_record_change and treesit_sync_visible_region) is to update
   886    the tree (by ts_tree_edit).  So if the tree is NULL,
   887    visible_beg/end are considered uninitialized.  Only when we already
   888    have a tree, do we need to keep track of position changes and
   889    update it correctly, so it can be fed into ts_parser_parse as the
   890    old tree, so that tree-sitter will only parse the changed part,
   891    incrementally.
   892 
   893    In a nutshell, tree-sitter incremental parsing in Emacs looks like:
   894 
   895    treesit_record_change (tree)  \
   896    treesit_record_change (tree)  | user edits buffer
   897    ...                           /
   898 
   899    treesit_sync_visible_region (tree) \ treesit_ensure_parsed
   900    ts_parser_parse(tree) -> tree      /
   901 
   902    treesit_record_change (tree)  \
   903    treesit_record_change (tree)  | user edits buffer
   904    ...                           /
   905 
   906    and so on.  */
   907 
   908 /* Make sure the tree's visible range is in sync with the buffer's
   909    visible range, and PARSER's visible_beg and visible_end are in sync
   910    with BUF_BEGV_BYTE and BUG_ZV_BYTE.  When calling this function you
   911    must make sure the current buffer's size in bytes is not larger than
   912    UINT32_MAX.  Basically, always call treesit_check_buffer_size before
   913    this function.
   914 
   915    If buffer range changed since last parse (visible_beg/end doesn't
   916    match buffer visible beginning/end), set need_reparse to true.  */
   917 static void
   918 treesit_sync_visible_region (Lisp_Object parser)
   919 {
   920   TSTree *tree = XTS_PARSER (parser)->tree;
   921   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
   922 
   923   /* If we are setting visible_beg/end for the first time, we can skip
   924   the offset acrobatics and updating the tree below.  */
   925   if (tree == NULL)
   926     {
   927       XTS_PARSER (parser)->visible_beg = BUF_BEGV_BYTE (buffer);
   928       XTS_PARSER (parser)->visible_end = BUF_ZV_BYTE (buffer);
   929       return;
   930     }
   931 
   932   ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
   933   ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
   934   eassert (0 <= visible_beg);
   935   eassert (visible_beg <= visible_end);
   936 
   937   eassert (BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
   938   eassert (BUF_ZV_BYTE (buffer) <= UINT32_MAX);
   939 
   940   /* If buffer restriction changed and user requests for a node (hence
   941      this function is called), we need to reparse.  */
   942   if (visible_beg != BUF_BEGV_BYTE (buffer)
   943       || visible_end != BUF_ZV_BYTE (buffer))
   944     XTS_PARSER (parser)->need_reparse = true;
   945 
   946   /* Before we parse or set ranges, catch up with the narrowing
   947      situation.  We change visible_beg and visible_end to match
   948      BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the
   949      change.  We want to move the visible range of tree-sitter to
   950      match the narrowed range.  For example,
   951      from ________|xxxx|__
   952      to   |xxxx|__________ */
   953 
   954   /* 1. Make sure visible_beg <= BUF_BEGV_BYTE.  */
   955   if (visible_beg > BUF_BEGV_BYTE (buffer))
   956     {
   957       /* Tree-sitter sees: insert at the beginning.  */
   958       treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
   959       visible_beg = BUF_BEGV_BYTE (buffer);
   960       eassert (visible_beg <= visible_end);
   961     }
   962   /* 2. Make sure visible_end = BUF_ZV_BYTE.  */
   963   if (visible_end < BUF_ZV_BYTE (buffer))
   964     {
   965       /* Tree-sitter sees: insert at the end.  */
   966       treesit_tree_edit_1 (tree, visible_end - visible_beg,
   967                            visible_end - visible_beg,
   968                            BUF_ZV_BYTE (buffer) - visible_beg);
   969       visible_end = BUF_ZV_BYTE (buffer);
   970       eassert (visible_beg <= visible_end);
   971     }
   972   else if (visible_end > BUF_ZV_BYTE (buffer))
   973     {
   974       /* Tree-sitter sees: delete at the end.  */
   975       treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
   976                            visible_end - visible_beg,
   977                            BUF_ZV_BYTE (buffer) - visible_beg);
   978       visible_end = BUF_ZV_BYTE (buffer);
   979       eassert (visible_beg <= visible_end);
   980     }
   981   /* 3. Make sure visible_beg = BUF_BEGV_BYTE.  */
   982   if (visible_beg < BUF_BEGV_BYTE (buffer))
   983     {
   984       /* Tree-sitter sees: delete at the beginning.  */
   985       treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
   986       visible_beg = BUF_BEGV_BYTE (buffer);
   987       eassert (visible_beg <= visible_end);
   988     }
   989   eassert (0 <= visible_beg);
   990   eassert (visible_beg <= visible_end);
   991   eassert (visible_beg == BUF_BEGV_BYTE (buffer));
   992   eassert (visible_end == BUF_ZV_BYTE (buffer));
   993 
   994   XTS_PARSER (parser)->visible_beg = visible_beg;
   995   XTS_PARSER (parser)->visible_end = visible_end;
   996 }
   997 
   998 static void
   999 treesit_check_buffer_size (struct buffer *buffer)
  1000 {
  1001   ptrdiff_t buffer_size_bytes = (BUF_Z_BYTE (buffer) - BUF_BEG_BYTE (buffer));
  1002   if (buffer_size_bytes > UINT32_MAX)
  1003     xsignal2 (Qtreesit_buffer_too_large,
  1004               build_string ("Buffer size cannot be larger than 4GB"),
  1005               make_fixnum (buffer_size_bytes));
  1006 }
  1007 
  1008 static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct buffer *);
  1009 
  1010 static void
  1011 treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
  1012                                      Lisp_Object parser)
  1013 {
  1014   /* If the old_tree is NULL, meaning this is the first parse, the
  1015      changed range is the whole buffer.  */
  1016   Lisp_Object lisp_ranges;
  1017   struct buffer *buf = XBUFFER (XTS_PARSER (parser)->buffer);
  1018   if (old_tree)
  1019     {
  1020       uint32_t len;
  1021       TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
  1022       lisp_ranges = treesit_make_ranges (ranges, len, buf);
  1023       xfree (ranges);
  1024     }
  1025   else
  1026     {
  1027       struct buffer *oldbuf = current_buffer;
  1028       set_buffer_internal (buf);
  1029       lisp_ranges = Fcons (Fcons (Fpoint_min (), Fpoint_max ()), Qnil);
  1030       set_buffer_internal (oldbuf);
  1031     }
  1032 
  1033   specpdl_ref count = SPECPDL_INDEX ();
  1034 
  1035   /* let's trust the after change functions and not clone a new ranges
  1036      for each of them.  */
  1037   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1038   FOR_EACH_TAIL (functions)
  1039     safe_call2 (XCAR (functions), lisp_ranges, parser);
  1040 
  1041   unbind_to (count, Qnil);
  1042 }
  1043 
  1044 /* Parse the buffer.  We don't parse until we have to.  When we have
  1045    to, we call this function to parse and update the tree.  */
  1046 static void
  1047 treesit_ensure_parsed (Lisp_Object parser)
  1048 {
  1049   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1050 
  1051   /* Before we parse, catch up with the narrowing situation.  */
  1052   treesit_check_buffer_size (buffer);
  1053   /* This function has to run before we check for need_reparse flag,
  1054      because it might set the flag to true.  */
  1055   treesit_sync_visible_region (parser);
  1056 
  1057   /* Make sure this comes before everything else, see comment
  1058      (ref:notifier-inside-ensure-parsed) for more detail.  */
  1059   if (!XTS_PARSER (parser)->need_reparse)
  1060     return;
  1061 
  1062   TSParser *treesit_parser = XTS_PARSER (parser)->parser;
  1063   TSTree *tree = XTS_PARSER (parser)->tree;
  1064   TSInput input = XTS_PARSER (parser)->input;
  1065 
  1066   TSTree *new_tree = ts_parser_parse (treesit_parser, tree, input);
  1067   /* This should be very rare (impossible, really): it only happens
  1068      when 1) language is not set (impossible in Emacs because the user
  1069      has to supply a language to create a parser), 2) parse canceled
  1070      due to timeout (impossible because we don't set a timeout), 3)
  1071      parse canceled due to cancellation flag (impossible because we
  1072      don't set the flag).  (See comments for ts_parser_parse in
  1073      tree_sitter/api.h.)  */
  1074   if (new_tree == NULL)
  1075     {
  1076       Lisp_Object buf;
  1077       XSETBUFFER (buf, buffer);
  1078       xsignal1 (Qtreesit_parse_error, buf);
  1079     }
  1080 
  1081   XTS_PARSER (parser)->tree = new_tree;
  1082   XTS_PARSER (parser)->need_reparse = false;
  1083 
  1084   /* After-change functions should run at the very end, most crucially
  1085      after need_reparse is set to false, this way if the function
  1086      calls some tree-sitter function which invokes
  1087      treesit_ensure_parsed again, it returns early and do not
  1088      recursively call the after change functions again.
  1089      (ref:notifier-inside-ensure-parsed)  */
  1090   treesit_call_after_change_functions (tree, new_tree, parser);
  1091   ts_tree_delete (tree);
  1092 }
  1093 
  1094 /* This is the read function provided to tree-sitter to read from a
  1095    buffer.  It reads one character at a time and automatically skips
  1096    the gap.  */
  1097 static const char*
  1098 treesit_read_buffer (void *parser, uint32_t byte_index,
  1099                      TSPoint position, uint32_t *bytes_read)
  1100 {
  1101   struct buffer *buffer = XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer);
  1102   ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg;
  1103   ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end;
  1104   ptrdiff_t byte_pos = byte_index + visible_beg;
  1105   /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in
  1106      treesit_ensure_parsed), so byte_pos will never be smaller than
  1107      BUF_BEG_BYTE.  */
  1108   eassert (visible_beg = BUF_BEGV_BYTE (buffer));
  1109   eassert (visible_end = BUF_ZV_BYTE (buffer));
  1110 
  1111   /* Read one character.  Tree-sitter wants us to set bytes_read to 0
  1112      if it reads to the end of buffer.  It doesn't say what it wants
  1113      for the return value in that case, so we just give it an empty
  1114      string.  */
  1115   char *beg;
  1116   int len;
  1117   /* This function could run from a user command, so it is better to
  1118      do nothing instead of raising an error.  (It was a pain in the a**
  1119      to decrypt mega-if-conditions in Emacs source, so I wrote the two
  1120      branches separately, you are welcome.)  */
  1121   if (!BUFFER_LIVE_P (buffer))
  1122     {
  1123       beg = NULL;
  1124       len = 0;
  1125     }
  1126   /* Reached visible end-of-buffer, tell tree-sitter to read no more.  */
  1127   else if (byte_pos >= visible_end)
  1128     {
  1129       beg = NULL;
  1130       len = 0;
  1131     }
  1132   /* Normal case, read a character.  */
  1133   else
  1134     {
  1135       beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
  1136       len = BYTES_BY_CHAR_HEAD ((int) *beg);
  1137     }
  1138   /* We never let tree-sitter to parse buffers that large so this
  1139      assertion should never hit.  */
  1140   eassert (len < UINT32_MAX);
  1141   *bytes_read = (uint32_t) len;
  1142   return beg;
  1143 }
  1144 
  1145 
  1146 /*** Functions for parser and node object */
  1147 
  1148 /* Wrap the parser in a Lisp_Object to be used in the Lisp
  1149    machine.  */
  1150 Lisp_Object
  1151 make_treesit_parser (Lisp_Object buffer, TSParser *parser,
  1152                      TSTree *tree, Lisp_Object language_symbol)
  1153 {
  1154   struct Lisp_TS_Parser *lisp_parser;
  1155 
  1156   lisp_parser = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Parser,
  1157                                        buffer, PVEC_TS_PARSER);
  1158 
  1159   lisp_parser->language_symbol = language_symbol;
  1160   lisp_parser->after_change_functions = Qnil;
  1161   lisp_parser->buffer = buffer;
  1162   lisp_parser->parser = parser;
  1163   lisp_parser->tree = tree;
  1164   TSInput input = {lisp_parser, treesit_read_buffer, TSInputEncodingUTF8};
  1165   lisp_parser->input = input;
  1166   lisp_parser->need_reparse = true;
  1167   lisp_parser->visible_beg = BUF_BEGV_BYTE (XBUFFER (buffer));
  1168   lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
  1169   lisp_parser->timestamp = 0;
  1170   lisp_parser->deleted = false;
  1171   lisp_parser->has_range = false;
  1172   eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
  1173   return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
  1174 }
  1175 
  1176 /* Wrap the node in a Lisp_Object to be used in the Lisp machine.  */
  1177 Lisp_Object
  1178 make_treesit_node (Lisp_Object parser, TSNode node)
  1179 {
  1180   struct Lisp_TS_Node *lisp_node;
  1181 
  1182   lisp_node = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node,
  1183                                      parser, PVEC_TS_NODE);
  1184   lisp_node->parser = parser;
  1185   lisp_node->node = node;
  1186   lisp_node->timestamp = XTS_PARSER (parser)->timestamp;
  1187   return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
  1188 }
  1189 
  1190 /* Make a compiled query.  QUERY has to be either a cons or a
  1191    string.  */
  1192 static Lisp_Object
  1193 make_treesit_query (Lisp_Object query, Lisp_Object language)
  1194 {
  1195   TSQueryCursor *treesit_cursor = ts_query_cursor_new ();
  1196   struct Lisp_TS_Query *lisp_query;
  1197 
  1198   lisp_query = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Query,
  1199                                       source, PVEC_TS_COMPILED_QUERY);
  1200 
  1201   lisp_query->language = language;
  1202   lisp_query->source = query;
  1203   lisp_query->query = NULL;
  1204   lisp_query->cursor = treesit_cursor;
  1205   return make_lisp_ptr (lisp_query, Lisp_Vectorlike);
  1206 }
  1207 
  1208 /* The following two functions are called from alloc.c:cleanup_vector.  */
  1209 void
  1210 treesit_delete_parser (struct Lisp_TS_Parser *lisp_parser)
  1211 {
  1212   ts_tree_delete (lisp_parser->tree);
  1213   ts_parser_delete (lisp_parser->parser);
  1214 }
  1215 
  1216 void
  1217 treesit_delete_query (struct Lisp_TS_Query *lisp_query)
  1218 {
  1219   ts_query_delete (lisp_query->query);
  1220   ts_query_cursor_delete (lisp_query->cursor);
  1221 }
  1222 
  1223 /* The following function is called from print.c:print_vectorlike.  */
  1224 bool
  1225 treesit_named_node_p (TSNode node)
  1226 {
  1227   return ts_node_is_named (node);
  1228 }
  1229 
  1230 static const char*
  1231 treesit_query_error_to_string (TSQueryError error)
  1232 {
  1233   switch (error)
  1234     {
  1235     case TSQueryErrorNone:
  1236       return "None";
  1237     case TSQueryErrorSyntax:
  1238       return "Syntax error at";
  1239     case TSQueryErrorNodeType:
  1240       return "Node type error at";
  1241     case TSQueryErrorField:
  1242       return "Field error at";
  1243     case TSQueryErrorCapture:
  1244       return "Capture error at";
  1245     case TSQueryErrorStructure:
  1246       return "Structure error at";
  1247     default:
  1248       return "Unknown error";
  1249     }
  1250 }
  1251 
  1252 static Lisp_Object
  1253 treesit_compose_query_signal_data (uint32_t error_offset,
  1254                                    TSQueryError error_type,
  1255                                    Lisp_Object query_source)
  1256 {
  1257   return list4 (build_string (treesit_query_error_to_string (error_type)),
  1258                 make_fixnum (error_offset + 1),
  1259                 query_source,
  1260                 build_string ("Debug the query with `treesit-query-validate'"));
  1261 }
  1262 
  1263 /* Ensure the QUERY is compiled.  Return the TSQuery.  It could be
  1264    NULL if error occurs, in which case ERROR_OFFSET and ERROR_TYPE are
  1265    bound.  If error occurs, return NULL, and assign SIGNAL_SYMBOL and
  1266    SIGNAL_DATA accordingly.  */
  1267 static TSQuery *
  1268 treesit_ensure_query_compiled (Lisp_Object query, Lisp_Object *signal_symbol,
  1269                                Lisp_Object *signal_data)
  1270 {
  1271   /* If query is already compiled (not null), return that, otherwise
  1272      compile and return it.  */
  1273   TSQuery *treesit_query = XTS_COMPILED_QUERY (query)->query;
  1274   if (treesit_query != NULL)
  1275     return treesit_query;
  1276 
  1277   /* Get query source and TSLanguage ready.  */
  1278   Lisp_Object source = XTS_COMPILED_QUERY (query)->source;
  1279   Lisp_Object language = XTS_COMPILED_QUERY (query)->language;
  1280   /* This is the main reason why we compile query lazily: to avoid
  1281      loading languages early.  */
  1282   TSLanguage *treesit_lang = treesit_load_language (language, signal_symbol,
  1283                                                     signal_data);
  1284   if (treesit_lang == NULL)
  1285     return NULL;
  1286 
  1287   if (CONSP (source))
  1288     source = Ftreesit_query_expand (source);
  1289 
  1290   /* Create TSQuery.  */
  1291   uint32_t error_offset;
  1292   TSQueryError error_type;
  1293   char *treesit_source = SSDATA (source);
  1294   treesit_query = ts_query_new (treesit_lang, treesit_source,
  1295                                 strlen (treesit_source),
  1296                                 &error_offset, &error_type);
  1297   if (treesit_query == NULL)
  1298     {
  1299       *signal_symbol = Qtreesit_query_error;
  1300       *signal_data = treesit_compose_query_signal_data (error_offset,
  1301                                                         error_type,
  1302                                                         source);
  1303     }
  1304   XTS_COMPILED_QUERY (query)->query = treesit_query;
  1305   return treesit_query;
  1306 }
  1307 
  1308 
  1309 /* Lisp definitions.  */
  1310 
  1311 DEFUN ("treesit-parser-p",
  1312        Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
  1313        doc: /* Return t if OBJECT is a tree-sitter parser.  */)
  1314   (Lisp_Object object)
  1315 {
  1316   if (TS_PARSERP (object))
  1317     return Qt;
  1318   else
  1319     return Qnil;
  1320 }
  1321 
  1322 DEFUN ("treesit-node-p",
  1323        Ftreesit_node_p, Streesit_node_p, 1, 1, 0,
  1324        doc: /* Return t if OBJECT is a tree-sitter node.  */)
  1325   (Lisp_Object object)
  1326 {
  1327   if (TS_NODEP (object))
  1328     return Qt;
  1329   else
  1330     return Qnil;
  1331 }
  1332 
  1333 DEFUN ("treesit-compiled-query-p",
  1334        Ftreesit_compiled_query_p, Streesit_compiled_query_p, 1, 1, 0,
  1335        doc: /* Return t if OBJECT is a compiled tree-sitter query.  */)
  1336   (Lisp_Object object)
  1337 {
  1338   if (TS_COMPILED_QUERY_P (object))
  1339     return Qt;
  1340   else
  1341     return Qnil;
  1342 }
  1343 
  1344 DEFUN ("treesit-query-p",
  1345        Ftreesit_query_p, Streesit_query_p, 1, 1, 0,
  1346        doc: /* Return t if OBJECT is a generic tree-sitter query.  */)
  1347   (Lisp_Object object)
  1348 {
  1349   if (TS_COMPILED_QUERY_P (object)
  1350       || CONSP (object) || STRINGP (object))
  1351     return Qt;
  1352   else
  1353     return Qnil;
  1354 }
  1355 
  1356 DEFUN ("treesit-query-language",
  1357        Ftreesit_query_language, Streesit_query_language, 1, 1, 0,
  1358        doc: /* Return the language of QUERY.
  1359 QUERY has to be a compiled query.  */)
  1360   (Lisp_Object query)
  1361 {
  1362   CHECK_TS_COMPILED_QUERY (query);
  1363   return XTS_COMPILED_QUERY (query)->language;
  1364 }
  1365 
  1366 DEFUN ("treesit-node-parser",
  1367        Ftreesit_node_parser, Streesit_node_parser,
  1368        1, 1, 0,
  1369        doc: /* Return the parser to which NODE belongs.  */)
  1370   (Lisp_Object node)
  1371 {
  1372   CHECK_TS_NODE (node);
  1373   return XTS_NODE (node)->parser;
  1374 }
  1375 
  1376 DEFUN ("treesit-parser-create",
  1377        Ftreesit_parser_create, Streesit_parser_create,
  1378        1, 3, 0,
  1379        doc: /* Create and return a parser in BUFFER for LANGUAGE.
  1380 
  1381 The parser is automatically added to BUFFER's parser list, as returned
  1382 by `treesit-parser-list'.  LANGUAGE is a language symbol.  If BUFFER
  1383 is nil or omitted, it defaults to the current buffer.  If BUFFER
  1384 already has a parser for LANGUAGE, return that parser, but if NO-REUSE
  1385 is non-nil, always create a new parser.
  1386 
  1387 If that buffer is an indirect buffer, its base buffer is used instead.
  1388 That is, indirect buffers use their base buffer's parsers.  Lisp
  1389 programs should widen as necessary should they want to use a parser in
  1390 an indirect buffer.  */)
  1391   (Lisp_Object language, Lisp_Object buffer, Lisp_Object no_reuse)
  1392 {
  1393   treesit_initialize ();
  1394 
  1395   CHECK_SYMBOL (language);
  1396   struct buffer *buf;
  1397   if (NILP (buffer))
  1398     buf = current_buffer;
  1399   else
  1400     {
  1401       CHECK_BUFFER (buffer);
  1402       buf = XBUFFER (buffer);
  1403     }
  1404   if (buf->base_buffer)
  1405     buf = buf->base_buffer;
  1406 
  1407   treesit_check_buffer_size (buf);
  1408 
  1409   /* See if we can reuse a parser.  */
  1410   if (NILP (no_reuse))
  1411     {
  1412       Lisp_Object tail = BVAR (buf, ts_parser_list);
  1413       FOR_EACH_TAIL (tail)
  1414       {
  1415         struct Lisp_TS_Parser *parser = XTS_PARSER (XCAR (tail));
  1416         if (EQ (parser->language_symbol, language))
  1417           return XCAR (tail);
  1418       }
  1419     }
  1420 
  1421   /* Load language.  */
  1422   Lisp_Object signal_symbol = Qnil;
  1423   Lisp_Object signal_data = Qnil;
  1424   TSParser *parser = ts_parser_new ();
  1425   TSLanguage *lang = treesit_load_language (language, &signal_symbol,
  1426                                             &signal_data);
  1427   if (lang == NULL)
  1428     xsignal (signal_symbol, signal_data);
  1429   /* We check language version when loading a language, so this should
  1430      always succeed.  */
  1431   ts_parser_set_language (parser, lang);
  1432 
  1433   /* Create parser.  */
  1434   Lisp_Object lisp_parser = make_treesit_parser (Fcurrent_buffer (),
  1435                                                  parser, NULL,
  1436                                                  language);
  1437 
  1438   /* Update parser-list.  */
  1439   BVAR (buf, ts_parser_list) = Fcons (lisp_parser, BVAR (buf, ts_parser_list));
  1440 
  1441   return lisp_parser;
  1442 }
  1443 
  1444 DEFUN ("treesit-parser-delete",
  1445        Ftreesit_parser_delete, Streesit_parser_delete,
  1446        1, 1, 0,
  1447        doc: /* Delete PARSER from its buffer's parser list.
  1448 See `treesit-parser-list' for the buffer's parser list.  */)
  1449   (Lisp_Object parser)
  1450 {
  1451   treesit_check_parser (parser);
  1452 
  1453   Lisp_Object buffer = XTS_PARSER (parser)->buffer;
  1454   struct buffer *buf = XBUFFER (buffer);
  1455 
  1456   BVAR (buf, ts_parser_list)
  1457     = Fdelete (parser, BVAR (buf, ts_parser_list));
  1458 
  1459   XTS_PARSER (parser)->deleted = true;
  1460   return Qnil;
  1461 }
  1462 
  1463 DEFUN ("treesit-parser-list",
  1464        Ftreesit_parser_list, Streesit_parser_list,
  1465        0, 1, 0,
  1466        doc: /* Return BUFFER's parser list.
  1467 
  1468 BUFFER defaults to the current buffer.  If that buffer is an indirect
  1469 buffer, its base buffer is used instead.  That is, indirect buffers
  1470 use their base buffer's parsers.  */)
  1471   (Lisp_Object buffer)
  1472 {
  1473   struct buffer *buf;
  1474   if (NILP (buffer))
  1475     buf = current_buffer;
  1476   else
  1477     {
  1478       CHECK_BUFFER (buffer);
  1479       buf = XBUFFER (buffer);
  1480     }
  1481   if (buf->base_buffer)
  1482     buf = buf->base_buffer;
  1483 
  1484   /* Return a fresh list so messing with that list doesn't affect our
  1485      internal data.  */
  1486   Lisp_Object return_list = Qnil;
  1487   Lisp_Object tail;
  1488 
  1489   tail = BVAR (buf, ts_parser_list);
  1490 
  1491   FOR_EACH_TAIL (tail)
  1492     return_list = Fcons (XCAR (tail), return_list);
  1493 
  1494   return Freverse (return_list);
  1495 }
  1496 
  1497 DEFUN ("treesit-parser-buffer",
  1498        Ftreesit_parser_buffer, Streesit_parser_buffer,
  1499        1, 1, 0,
  1500        doc: /* Return the buffer of PARSER.  */)
  1501   (Lisp_Object parser)
  1502 {
  1503   treesit_check_parser (parser);
  1504   Lisp_Object buf;
  1505   XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer));
  1506   return buf;
  1507 }
  1508 
  1509 DEFUN ("treesit-parser-language",
  1510        Ftreesit_parser_language, Streesit_parser_language,
  1511        1, 1, 0,
  1512        doc: /* Return PARSER's language symbol.
  1513 This symbol is the one used to create the parser.  */)
  1514   (Lisp_Object parser)
  1515 {
  1516   treesit_check_parser (parser);
  1517   return XTS_PARSER (parser)->language_symbol;
  1518 }
  1519 
  1520 /* Return true if PARSER is not deleted and its buffer is live.  */
  1521 static bool
  1522 treesit_parser_live_p (Lisp_Object parser)
  1523 {
  1524   CHECK_TS_PARSER (parser);
  1525   return ((!XTS_PARSER (parser)->deleted) &&
  1526           (!NILP (Fbuffer_live_p (XTS_PARSER (parser)->buffer))));
  1527 }
  1528 
  1529 
  1530 /*** Parser API */
  1531 
  1532 DEFUN ("treesit-parser-root-node",
  1533        Ftreesit_parser_root_node, Streesit_parser_root_node,
  1534        1, 1, 0,
  1535        doc: /* Return the root node of PARSER.  */)
  1536   (Lisp_Object parser)
  1537 {
  1538   treesit_check_parser (parser);
  1539   treesit_initialize ();
  1540   treesit_ensure_parsed (parser);
  1541   TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree);
  1542   return make_treesit_node (parser, root_node);
  1543 }
  1544 
  1545 /* Checks that the RANGES argument of
  1546    treesit-parser-set-included-ranges is valid.  */
  1547 static void
  1548 treesit_check_range_argument (Lisp_Object ranges)
  1549 {
  1550   struct buffer *buffer = current_buffer;
  1551   ptrdiff_t point_min = BUF_BEGV (buffer);
  1552   ptrdiff_t point_max = BUF_ZV (buffer);
  1553   EMACS_INT last_point = point_min;
  1554   Lisp_Object tail;
  1555 
  1556   tail = ranges;
  1557 
  1558   CHECK_LIST (tail);
  1559 
  1560   FOR_EACH_TAIL (tail)
  1561     {
  1562       CHECK_CONS (tail);
  1563       Lisp_Object range = XCAR (tail);
  1564       CHECK_CONS (range);
  1565       CHECK_FIXNUM (XCAR (range));
  1566       CHECK_FIXNUM (XCDR (range));
  1567       EMACS_INT beg = XFIXNUM (XCAR (range));
  1568       EMACS_INT end = XFIXNUM (XCDR (range));
  1569       if (!(last_point <= beg && beg <= end && end <= point_max))
  1570         xsignal2 (Qtreesit_range_invalid,
  1571                   build_string ("RANGE is either overlapping,"
  1572                                 " out-of-order or out-of-range"),
  1573                   ranges);
  1574       last_point = end;
  1575     }
  1576 
  1577   CHECK_LIST_END (tail, ranges);
  1578 }
  1579 
  1580 /* Generate a list of ranges in Lisp from RANGES.  Assumes tree-sitter
  1581    tree and the buffer has the same visible region (wrt narrowing).
  1582    This function doesn't take ownership of RANGES.  BUFFER is used to
  1583    convert between tree-sitter buffer offset and buffer position.  */
  1584 static Lisp_Object
  1585 treesit_make_ranges (const TSRange *ranges, uint32_t len,
  1586                      struct buffer *buffer)
  1587 {
  1588   Lisp_Object list = Qnil;
  1589   for (int idx = 0; idx < len; idx++)
  1590     {
  1591       TSRange range = ranges[idx];
  1592       uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
  1593       uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
  1594       eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
  1595       eassert (beg_byte <= end_byte);
  1596       eassert (end_byte <= BUF_ZV_BYTE (buffer));
  1597 
  1598       Lisp_Object lisp_range
  1599         = Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)),
  1600                  make_fixnum (buf_bytepos_to_charpos (buffer, end_byte)));
  1601       list = Fcons (lisp_range, list);
  1602     }
  1603   return Fnreverse (list);
  1604 }
  1605 
  1606 DEFUN ("treesit-parser-set-included-ranges",
  1607        Ftreesit_parser_set_included_ranges,
  1608        Streesit_parser_set_included_ranges,
  1609        2, 2, 0,
  1610        doc: /* Limit PARSER to RANGES.
  1611 
  1612 RANGES is a list of (BEG . END), each (BEG . END) defines a region in
  1613 which the parser should operate.  Regions must not overlap, and the
  1614 regions should come in order in the list.  Signal
  1615 `treesit-set-range-error' if the argument is invalid, or something
  1616 else went wrong.  If RANGES is nil, the PARSER is to parse the whole
  1617 buffer.  */)
  1618   (Lisp_Object parser, Lisp_Object ranges)
  1619 {
  1620   treesit_check_parser (parser);
  1621   if (!NILP (ranges))
  1622     CHECK_CONS (ranges);
  1623   treesit_check_range_argument (ranges);
  1624 
  1625   treesit_initialize ();
  1626   /* Before we parse, catch up with narrowing/widening.  */
  1627   treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
  1628   treesit_sync_visible_region (parser);
  1629 
  1630   bool success;
  1631   if (NILP (ranges))
  1632     {
  1633       XTS_PARSER (parser)->has_range = false;
  1634       /* If RANGES is nil, make parser to parse the whole document.
  1635          To do that we give tree-sitter a 0 length, the range is a
  1636          dummy.  */
  1637       TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0};
  1638       success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
  1639                                                &treesit_range , 0);
  1640     }
  1641   else
  1642     {
  1643       /* Set ranges for PARSER.  */
  1644       XTS_PARSER (parser)->has_range = true;
  1645 
  1646       if (list_length (ranges) > UINT32_MAX)
  1647         xsignal (Qargs_out_of_range, list2 (ranges, Flength (ranges)));
  1648       uint32_t len = (uint32_t) list_length (ranges);
  1649       TSRange *treesit_ranges = xmalloc (sizeof (TSRange) * len);
  1650       struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1651 
  1652       /* We can use XFIXNUM, XCAR, XCDR freely because we have checked
  1653          the input by treesit_check_range_argument.  */
  1654 
  1655       for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges))
  1656         {
  1657           Lisp_Object range = XCAR (ranges);
  1658           ptrdiff_t beg_byte = buf_charpos_to_bytepos (buffer,
  1659                                                        XFIXNUM (XCAR (range)));
  1660           ptrdiff_t end_byte = buf_charpos_to_bytepos (buffer,
  1661                                                        XFIXNUM (XCDR (range)));
  1662           /* Shouldn't violate assertion since we just checked for
  1663              buffer size at the beginning of this function.  */
  1664           eassert (beg_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
  1665           eassert (end_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
  1666           /* We don't care about start and end points, put in dummy
  1667              values.  */
  1668           TSRange rg = {{0, 0}, {0, 0},
  1669                         (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
  1670                         (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
  1671           treesit_ranges[idx] = rg;
  1672         }
  1673       success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
  1674                                                treesit_ranges, len);
  1675       xfree (treesit_ranges);
  1676     }
  1677 
  1678   if (!success)
  1679     xsignal2 (Qtreesit_range_invalid,
  1680               build_string ("Something went wrong when setting ranges"),
  1681               ranges);
  1682 
  1683   XTS_PARSER (parser)->need_reparse = true;
  1684   return Qnil;
  1685 }
  1686 
  1687 DEFUN ("treesit-parser-included-ranges",
  1688        Ftreesit_parser_included_ranges,
  1689        Streesit_parser_included_ranges,
  1690        1, 1, 0,
  1691        doc: /* Return the ranges set for PARSER.
  1692 If no ranges are set for PARSER, return nil.
  1693 See also `treesit-parser-set-included-ranges'.  */)
  1694   (Lisp_Object parser)
  1695 {
  1696   treesit_check_parser (parser);
  1697   treesit_initialize ();
  1698 
  1699   /* When the parser doesn't have a range set and we call
  1700      ts_parser_included_ranges on it, it doesn't return an empty list,
  1701      but rather return some garbled data. (A single range where
  1702      start_byte = 0, end_byte = UINT32_MAX).  So we need to track
  1703      whether the parser is ranged ourselves.  */
  1704   if (!XTS_PARSER (parser)->has_range)
  1705     return Qnil;
  1706 
  1707   uint32_t len;
  1708   const TSRange *ranges
  1709     = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
  1710 
  1711   /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
  1712      etc), so we need to sync up.  */
  1713   treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
  1714   treesit_sync_visible_region (parser);
  1715 
  1716   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1717   return treesit_make_ranges (ranges, len, buffer);
  1718 }
  1719 
  1720 DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers,
  1721        Streesit_parser_notifiers,
  1722        1, 1, 0,
  1723        doc: /* Return the list of after-change notifier functions for PARSER.  */)
  1724   (Lisp_Object parser)
  1725 {
  1726   treesit_check_parser (parser);
  1727 
  1728   Lisp_Object return_list = Qnil;
  1729   Lisp_Object tail = XTS_PARSER (parser)->after_change_functions;
  1730   FOR_EACH_TAIL (tail)
  1731     return_list = Fcons (XCAR (tail), return_list);
  1732 
  1733   return return_list;
  1734 }
  1735 
  1736 DEFUN ("treesit-parser-add-notifier", Ftreesit_parser_add_notifier,
  1737        Streesit_parser_add_notifier,
  1738        2, 2, 0,
  1739        doc: /* Add FUNCTION to the list of PARSER's after-change notifiers.
  1740 FUNCTION must be a function symbol, rather than a lambda form.
  1741 FUNCTION should take 2 arguments, RANGES and PARSER.  RANGES is a list
  1742 of cons cells of the form (START . END), where START and END are buffer
  1743 positions.  PARSER is the parser issuing the notification.  */)
  1744   (Lisp_Object parser, Lisp_Object function)
  1745 {
  1746   treesit_check_parser (parser);
  1747   /* For simplicity we don't accept lambda functions.  */
  1748   CHECK_SYMBOL (function);
  1749 
  1750   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1751   if (NILP (Fmemq (function, functions)))
  1752     XTS_PARSER (parser)->after_change_functions = Fcons (function, functions);
  1753   return Qnil;
  1754 }
  1755 
  1756 DEFUN ("treesit-parser-remove-notifier", Ftreesit_parser_remove_notifier,
  1757        Streesit_parser_remove_notifier,
  1758        2, 2, 0,
  1759        doc: /* Remove FUNCTION from the list of PARSER's after-change notifiers.
  1760   FUNCTION must be a function symbol, rather than a lambda form.
  1761 FUNCTION should take 2 arguments, RANGES and PARSER.  RANGES is a list
  1762 of cons of the form (START . END), where START and END are buffer
  1763 positions.  PARSER is the parser issuing the notification.   */)
  1764   (Lisp_Object parser, Lisp_Object function)
  1765 {
  1766   treesit_check_parser (parser);
  1767   /* For simplicity we don't accept lambda functions.  */
  1768   CHECK_SYMBOL (function);
  1769 
  1770   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1771   if (!NILP (Fmemq (function, functions)))
  1772     XTS_PARSER (parser)->after_change_functions = Fdelq (function, functions);
  1773   return Qnil;
  1774 }
  1775 
  1776 
  1777 /*** Node API  */
  1778 
  1779 /* Check that OBJ is a positive integer and signal an error if
  1780    otherwise.  */
  1781 static void
  1782 treesit_check_positive_integer (Lisp_Object obj)
  1783 {
  1784   CHECK_INTEGER (obj);
  1785   if (XFIXNUM (obj) < 0)
  1786     xsignal1 (Qargs_out_of_range, obj);
  1787 }
  1788 
  1789 static void
  1790 treesit_check_node (Lisp_Object obj)
  1791 {
  1792   CHECK_TS_NODE (obj);
  1793   if (!treesit_node_uptodate_p (obj))
  1794     xsignal1 (Qtreesit_node_outdated, obj);
  1795 }
  1796 
  1797 /* Checks that OBJ is a positive integer and it is within the visible
  1798    portion of BUF. */
  1799 static void
  1800 treesit_check_position (Lisp_Object obj, struct buffer *buf)
  1801 {
  1802   treesit_check_positive_integer (obj);
  1803   ptrdiff_t pos = XFIXNUM (obj);
  1804   if (pos < BUF_BEGV (buf) || pos > BUF_ZV (buf))
  1805     xsignal1 (Qargs_out_of_range, obj);
  1806 }
  1807 
  1808 bool
  1809 treesit_node_uptodate_p (Lisp_Object obj)
  1810 {
  1811   Lisp_Object lisp_parser = XTS_NODE (obj)->parser;
  1812   return XTS_NODE (obj)->timestamp == XTS_PARSER (lisp_parser)->timestamp;
  1813 }
  1814 
  1815 DEFUN ("treesit-node-type",
  1816        Ftreesit_node_type, Streesit_node_type, 1, 1, 0,
  1817        doc: /* Return the NODE's type as a string.
  1818 If NODE is nil, return nil.  */)
  1819   (Lisp_Object node)
  1820 {
  1821   if (NILP (node)) return Qnil;
  1822   treesit_check_node (node);
  1823   treesit_initialize ();
  1824 
  1825   TSNode treesit_node = XTS_NODE (node)->node;
  1826   const char *type = ts_node_type (treesit_node);
  1827   return build_string (type);
  1828 }
  1829 
  1830 DEFUN ("treesit-node-start",
  1831        Ftreesit_node_start, Streesit_node_start, 1, 1, 0,
  1832        doc: /* Return the NODE's start position in its buffer.
  1833 If NODE is nil, return nil.  */)
  1834   (Lisp_Object node)
  1835 {
  1836   if (NILP (node)) return Qnil;
  1837   treesit_check_node (node);
  1838   treesit_initialize ();
  1839 
  1840   TSNode treesit_node = XTS_NODE (node)->node;
  1841   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  1842   uint32_t start_byte_offset = ts_node_start_byte (treesit_node);
  1843   struct buffer *buffer
  1844     = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  1845   ptrdiff_t start_pos
  1846     = buf_bytepos_to_charpos (buffer,
  1847                               start_byte_offset + visible_beg);
  1848   return make_fixnum (start_pos);
  1849 }
  1850 
  1851 DEFUN ("treesit-node-end",
  1852        Ftreesit_node_end, Streesit_node_end, 1, 1, 0,
  1853        doc: /* Return the NODE's end position in its buffer.
  1854 If NODE is nil, return nil.  */)
  1855   (Lisp_Object node)
  1856 {
  1857   if (NILP (node)) return Qnil;
  1858   treesit_check_node (node);
  1859   treesit_initialize ();
  1860 
  1861   TSNode treesit_node = XTS_NODE (node)->node;
  1862   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  1863   uint32_t end_byte_offset = ts_node_end_byte (treesit_node);
  1864   struct buffer *buffer
  1865     = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  1866   ptrdiff_t end_pos
  1867     = buf_bytepos_to_charpos (buffer, end_byte_offset + visible_beg);
  1868   return make_fixnum (end_pos);
  1869 }
  1870 
  1871 DEFUN ("treesit-node-string",
  1872        Ftreesit_node_string, Streesit_node_string, 1, 1, 0,
  1873        doc: /* Return the string representation of NODE.
  1874 If NODE is nil, return nil.  */)
  1875   (Lisp_Object node)
  1876 {
  1877   if (NILP (node)) return Qnil;
  1878   treesit_check_node (node);
  1879   treesit_initialize ();
  1880 
  1881   TSNode treesit_node = XTS_NODE (node)->node;
  1882   char *string = ts_node_string (treesit_node);
  1883   return build_string (string);
  1884 }
  1885 
  1886 static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object);
  1887 
  1888 DEFUN ("treesit-node-parent",
  1889        Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
  1890        doc: /* Return the immediate parent of NODE.
  1891 Return nil if NODE has no parent.  If NODE is nil, return nil.  */)
  1892   (Lisp_Object node)
  1893 {
  1894   if (NILP (node)) return Qnil;
  1895   treesit_check_node (node);
  1896   treesit_initialize ();
  1897 
  1898   Lisp_Object return_value = Qnil;
  1899 
  1900   TSNode treesit_node = XTS_NODE (node)->node;
  1901   Lisp_Object parser = XTS_NODE (node)->parser;
  1902   TSTreeCursor cursor;
  1903   /* See the comments to treesit_cursor_helper about the algorithm for
  1904      finding the parent node.  The complexity is roughly proportional
  1905      to the square root of the current node's depth in the parse tree,
  1906      and we punt if the tree is too deep.  */
  1907   if (!treesit_cursor_helper (&cursor, treesit_node, parser))
  1908     return return_value;
  1909 
  1910   if (ts_tree_cursor_goto_parent (&cursor))
  1911   {
  1912     TSNode parent = ts_tree_cursor_current_node (&cursor);
  1913     return_value = make_treesit_node (parser, parent);
  1914   }
  1915   ts_tree_cursor_delete (&cursor);
  1916   return return_value;
  1917 }
  1918 
  1919 DEFUN ("treesit-node-child",
  1920        Ftreesit_node_child, Streesit_node_child, 2, 3, 0,
  1921        doc: /* Return the Nth child of NODE.
  1922 
  1923 Return nil if there is no Nth child.  If NAMED is non-nil, look for
  1924 named child only.  NAMED defaults to nil.  If NODE is nil, return
  1925 nil.
  1926 
  1927 N could be negative, e.g., -1 represents the last child.  */)
  1928   (Lisp_Object node, Lisp_Object n, Lisp_Object named)
  1929 {
  1930   if (NILP (node)) return Qnil;
  1931   treesit_check_node (node);
  1932   CHECK_INTEGER (n);
  1933   EMACS_INT idx = XFIXNUM (n);
  1934 
  1935   treesit_initialize ();
  1936 
  1937   TSNode treesit_node = XTS_NODE (node)->node;
  1938   TSNode child;
  1939 
  1940   /* Process negative index.  */
  1941   if (idx < 0)
  1942     {
  1943       if (NILP (named))
  1944         idx = ts_node_child_count (treesit_node) + idx;
  1945       else
  1946         idx = ts_node_named_child_count (treesit_node) + idx;
  1947     }
  1948   if (idx < 0)
  1949     return Qnil;
  1950   if (idx > UINT32_MAX)
  1951     xsignal1 (Qargs_out_of_range, n);
  1952 
  1953   if (NILP (named))
  1954     child = ts_node_child (treesit_node, (uint32_t) idx);
  1955   else
  1956     child = ts_node_named_child (treesit_node, (uint32_t) idx);
  1957 
  1958   if (ts_node_is_null (child))
  1959     return Qnil;
  1960 
  1961   return make_treesit_node (XTS_NODE (node)->parser, child);
  1962 }
  1963 
  1964 DEFUN ("treesit-node-check",
  1965        Ftreesit_node_check, Streesit_node_check, 2, 2, 0,
  1966        doc: /* Return non-nil if NODE has PROPERTY, nil otherwise.
  1967 
  1968 PROPERTY could be `named', `missing', `extra', `outdated',
  1969 `has-error', or `live'.
  1970 
  1971 Named nodes correspond to named rules in the language definition,
  1972 whereas "anonymous" nodes correspond to string literals in the
  1973 language definition.
  1974 
  1975 Missing nodes are inserted by the parser in order to recover from
  1976 certain kinds of syntax errors, i.e., should be there but not there.
  1977 
  1978 Extra nodes represent things like comments, which are not required the
  1979 language definition, but can appear anywhere.
  1980 
  1981 A node is "outdated" if the parser has reparsed at least once after
  1982 the node was created.
  1983 
  1984 A node "has error" if itself is a syntax error or contains any syntax
  1985 errors.
  1986 
  1987 A node is "live" if its parser is not deleted and its buffer is
  1988 live.  */)
  1989   (Lisp_Object node, Lisp_Object property)
  1990 {
  1991   if (NILP (node)) return Qnil;
  1992   CHECK_TS_NODE (node);
  1993   CHECK_SYMBOL (property);
  1994   treesit_initialize ();
  1995 
  1996   TSNode treesit_node = XTS_NODE (node)->node;
  1997   bool result;
  1998 
  1999   if (BASE_EQ (property, Qoutdated))
  2000     return treesit_node_uptodate_p (node) ? Qnil : Qt;
  2001 
  2002   treesit_check_node (node);
  2003   if (BASE_EQ (property, Qnamed))
  2004     result = ts_node_is_named (treesit_node);
  2005   else if (BASE_EQ (property, Qmissing))
  2006     result = ts_node_is_missing (treesit_node);
  2007   else if (BASE_EQ (property, Qextra))
  2008     result = ts_node_is_extra (treesit_node);
  2009   else if (BASE_EQ (property, Qhas_error))
  2010     result = ts_node_has_error (treesit_node);
  2011   else if (BASE_EQ (property, Qlive))
  2012     result = treesit_parser_live_p (XTS_NODE (node)->parser);
  2013   else
  2014     signal_error ("Expecting `named', `missing', `extra', "
  2015                   "`outdated', `has-error', or `live', but got",
  2016                   property);
  2017   return result ? Qt : Qnil;
  2018 }
  2019 
  2020 DEFUN ("treesit-node-field-name-for-child",
  2021        Ftreesit_node_field_name_for_child,
  2022        Streesit_node_field_name_for_child, 2, 2, 0,
  2023        doc: /* Return the field name of the Nth child of NODE.
  2024 
  2025 Return nil if there's no Nth child, or if it has no field.
  2026 If NODE is nil, return nil.
  2027 
  2028 N counts all children, i.e., named ones and anonymous ones.
  2029 
  2030 N could be negative, e.g., -1 represents the last child.  */)
  2031   (Lisp_Object node, Lisp_Object n)
  2032 {
  2033   if (NILP (node))
  2034     return Qnil;
  2035   treesit_check_node (node);
  2036   CHECK_INTEGER (n);
  2037   EMACS_INT idx = XFIXNUM (n);
  2038   treesit_initialize ();
  2039 
  2040   TSNode treesit_node = XTS_NODE (node)->node;
  2041 
  2042   /* Process negative index.  */
  2043   if (idx < 0)
  2044     idx = ts_node_child_count (treesit_node) + idx;
  2045   if (idx < 0)
  2046     return Qnil;
  2047   if (idx > UINT32_MAX)
  2048     xsignal1 (Qargs_out_of_range, n);
  2049 
  2050   const char *name
  2051     = ts_node_field_name_for_child (treesit_node, (uint32_t) idx);
  2052 
  2053   if (name == NULL)
  2054     return Qnil;
  2055 
  2056   return build_string (name);
  2057 }
  2058 
  2059 DEFUN ("treesit-node-child-count",
  2060        Ftreesit_node_child_count,
  2061        Streesit_node_child_count, 1, 2, 0,
  2062        doc: /* Return the number of children of NODE.
  2063 
  2064 If NAMED is non-nil, count named children only.  NAMED defaults to
  2065 nil.  If NODE is nil, return nil.  */)
  2066   (Lisp_Object node, Lisp_Object named)
  2067 {
  2068   if (NILP (node))
  2069     return Qnil;
  2070   treesit_check_node (node);
  2071   treesit_initialize ();
  2072 
  2073   TSNode treesit_node = XTS_NODE (node)->node;
  2074   uint32_t count;
  2075   if (NILP (named))
  2076     count = ts_node_child_count (treesit_node);
  2077   else
  2078     count = ts_node_named_child_count (treesit_node);
  2079   return make_fixnum (count);
  2080 }
  2081 
  2082 DEFUN ("treesit-node-child-by-field-name",
  2083        Ftreesit_node_child_by_field_name,
  2084        Streesit_node_child_by_field_name, 2, 2, 0,
  2085        doc: /* Return the child of NODE with FIELD-NAME.
  2086 Return nil if there is no such child.  If NODE is nil, return nil.  */)
  2087   (Lisp_Object node, Lisp_Object field_name)
  2088 {
  2089   if (NILP (node))
  2090     return Qnil;
  2091   treesit_check_node (node);
  2092   CHECK_STRING (field_name);
  2093   treesit_initialize ();
  2094 
  2095   char *name_str = SSDATA (field_name);
  2096   TSNode treesit_node = XTS_NODE (node)->node;
  2097   TSNode child
  2098     = ts_node_child_by_field_name (treesit_node, name_str,
  2099                                    strlen (name_str));
  2100 
  2101   if (ts_node_is_null (child))
  2102     return Qnil;
  2103 
  2104   return make_treesit_node (XTS_NODE (node)->parser, child);
  2105 }
  2106 
  2107 DEFUN ("treesit-node-next-sibling",
  2108        Ftreesit_node_next_sibling,
  2109        Streesit_node_next_sibling, 1, 2, 0,
  2110        doc: /* Return the next sibling of NODE.
  2111 
  2112 Return nil if there is no next sibling.  If NAMED is non-nil, look for named
  2113 siblings only.  NAMED defaults to nil.  If NODE is nil, return nil.  */)
  2114   (Lisp_Object node, Lisp_Object named)
  2115 {
  2116   if (NILP (node)) return Qnil;
  2117   treesit_check_node (node);
  2118   treesit_initialize ();
  2119 
  2120   TSNode treesit_node = XTS_NODE (node)->node;
  2121   TSNode sibling;
  2122   if (NILP (named))
  2123     sibling = ts_node_next_sibling (treesit_node);
  2124   else
  2125     sibling = ts_node_next_named_sibling (treesit_node);
  2126 
  2127   if (ts_node_is_null (sibling))
  2128     return Qnil;
  2129 
  2130   return make_treesit_node (XTS_NODE (node)->parser, sibling);
  2131 }
  2132 
  2133 DEFUN ("treesit-node-prev-sibling",
  2134        Ftreesit_node_prev_sibling,
  2135        Streesit_node_prev_sibling, 1, 2, 0,
  2136        doc: /* Return the previous sibling of NODE.
  2137 
  2138 Return nil if there is no previous sibling.  If NAMED is non-nil, look
  2139 for named siblings only.  NAMED defaults to nil.  If NODE is nil,
  2140 return nil.  */)
  2141   (Lisp_Object node, Lisp_Object named)
  2142 {
  2143   if (NILP (node)) return Qnil;
  2144   treesit_check_node (node);
  2145   treesit_initialize ();
  2146 
  2147   TSNode treesit_node = XTS_NODE (node)->node;
  2148   TSNode sibling;
  2149 
  2150   if (NILP (named))
  2151     sibling = ts_node_prev_sibling (treesit_node);
  2152   else
  2153     sibling = ts_node_prev_named_sibling (treesit_node);
  2154 
  2155   if (ts_node_is_null (sibling))
  2156     return Qnil;
  2157 
  2158   return make_treesit_node (XTS_NODE (node)->parser, sibling);
  2159 }
  2160 
  2161 /* Our reimplementation of ts_node_first_child_for_byte.  The current
  2162    implementation of that function has problems (see bug#60127), so
  2163    before it's fixed upstream, we use our own reimplementation of it.
  2164    Return true if there is a valid sibling, return false otherwise.
  2165    If the return value is false, the position of the cursor is
  2166    undefined.  (We use cursor because technically we can't make a null
  2167    node for ourselves, also, using cursor is more convenient.)
  2168 
  2169    TODO: Remove this function once tree-sitter fixed the bug.  */
  2170 static bool treesit_cursor_first_child_for_byte
  2171 (TSTreeCursor *cursor, ptrdiff_t pos, bool named)
  2172 {
  2173   if (!ts_tree_cursor_goto_first_child (cursor))
  2174     return false;
  2175 
  2176   TSNode node = ts_tree_cursor_current_node (cursor);
  2177   while (ts_node_end_byte (node) <= pos)
  2178     {
  2179       if (ts_tree_cursor_goto_next_sibling (cursor))
  2180         node = ts_tree_cursor_current_node (cursor);
  2181       else
  2182         /* Reached the end and still can't find a valid sibling.  */
  2183         return false;
  2184     }
  2185   while (named && (!ts_node_is_named (node)))
  2186     {
  2187       if (ts_tree_cursor_goto_next_sibling (cursor))
  2188         node = ts_tree_cursor_current_node (cursor);
  2189       else
  2190         /* Reached the end and still can't find a named sibling.  */
  2191         return false;
  2192     }
  2193   return true;
  2194 }
  2195 
  2196 DEFUN ("treesit-node-first-child-for-pos",
  2197        Ftreesit_node_first_child_for_pos,
  2198        Streesit_node_first_child_for_pos, 2, 3, 0,
  2199        doc: /* Return the first child of NODE for buffer position POS.
  2200 
  2201 Specifically, return the first child that extends beyond POS.
  2202 Return nil if there is no such child.
  2203 If NAMED is non-nil, look for named children only.  NAMED defaults to nil.
  2204 Note that this function returns an immediate child, not the smallest
  2205 (grand)child.  If NODE is nil, return nil.  */)
  2206   (Lisp_Object node, Lisp_Object pos, Lisp_Object named)
  2207 {
  2208   if (NILP (node))
  2209     return Qnil;
  2210   treesit_check_node (node);
  2211 
  2212   struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  2213   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2214 
  2215   treesit_check_position (pos, buf);
  2216   treesit_initialize ();
  2217 
  2218   ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
  2219   TSNode treesit_node = XTS_NODE (node)->node;
  2220 
  2221   TSTreeCursor cursor = ts_tree_cursor_new (treesit_node);
  2222   ptrdiff_t treesit_pos = byte_pos - visible_beg;
  2223   bool success;
  2224   success = treesit_cursor_first_child_for_byte (&cursor, treesit_pos,
  2225                                                  !NILP (named));
  2226   TSNode child = ts_tree_cursor_current_node (&cursor);
  2227   ts_tree_cursor_delete (&cursor);
  2228 
  2229   if (!success)
  2230     return Qnil;
  2231   return make_treesit_node (XTS_NODE (node)->parser, child);
  2232 }
  2233 
  2234 DEFUN ("treesit-node-descendant-for-range",
  2235        Ftreesit_node_descendant_for_range,
  2236        Streesit_node_descendant_for_range, 3, 4, 0,
  2237        doc: /* Return the smallest node that covers buffer positions BEG to END.
  2238 
  2239 The returned node is a descendant of NODE.
  2240 Return nil if there is no such node.
  2241 If NAMED is non-nil, look for named child only.  NAMED defaults to nil.
  2242 If NODE is nil, return nil.  */)
  2243   (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named)
  2244 {
  2245   if (NILP (node)) return Qnil;
  2246   treesit_check_node (node);
  2247 
  2248   struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  2249   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2250 
  2251   treesit_check_position (beg, buf);
  2252   treesit_check_position (end, buf);
  2253 
  2254   treesit_initialize ();
  2255 
  2256   ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
  2257   ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
  2258   TSNode treesit_node = XTS_NODE (node)->node;
  2259   TSNode child;
  2260   if (NILP (named))
  2261     child = ts_node_descendant_for_byte_range (treesit_node, byte_beg - visible_beg,
  2262                                                byte_end - visible_beg);
  2263   else
  2264     child = ts_node_named_descendant_for_byte_range (treesit_node,
  2265                                                      byte_beg - visible_beg,
  2266                                                      byte_end - visible_beg);
  2267 
  2268   if (ts_node_is_null (child))
  2269     return Qnil;
  2270 
  2271   return make_treesit_node (XTS_NODE (node)->parser, child);
  2272 }
  2273 
  2274 /* Return true if NODE1 and NODE2 are the same node.  Assumes they are
  2275    TS_NODE type.  */
  2276 bool treesit_node_eq (Lisp_Object node1, Lisp_Object node2)
  2277 {
  2278   treesit_initialize ();
  2279   TSNode treesit_node_1 = XTS_NODE (node1)->node;
  2280   TSNode treesit_node_2 = XTS_NODE (node2)->node;
  2281   return ts_node_eq (treesit_node_1, treesit_node_2);
  2282 }
  2283 
  2284 DEFUN ("treesit-node-eq",
  2285        Ftreesit_node_eq,
  2286        Streesit_node_eq, 2, 2, 0,
  2287        doc: /* Return non-nil if NODE1 and NODE2 refer to the same node.
  2288 If any one of NODE1 and NODE2 is nil, return nil.
  2289 This function uses the same equivalence metric as `equal', and returns
  2290 non-nil if NODE1 and NODE2 refer to the same node in a syntax tree
  2291 produced by tree-sitter.  */)
  2292   (Lisp_Object node1, Lisp_Object node2)
  2293 {
  2294   if (NILP (node1) || NILP (node2))
  2295     return Qnil;
  2296   CHECK_TS_NODE (node1);
  2297   CHECK_TS_NODE (node2);
  2298 
  2299   bool same_node = treesit_node_eq (node1, node2);
  2300   return same_node ? Qt : Qnil;
  2301 }
  2302 
  2303 
  2304 /*** Query functions */
  2305 
  2306 /* Convert a Lisp string to its printed representation in the tree-sitter
  2307    query syntax.  */
  2308 static Lisp_Object
  2309 treesit_query_string_string (Lisp_Object str)
  2310 {
  2311   /* Strings in the treesit query syntax only have the escapes
  2312      \n \r \t \0 and any other escaped char stands for that character.
  2313      Literal LF, NUL and " are forbidden.  */
  2314   ptrdiff_t nbytes = SBYTES (str);
  2315   ptrdiff_t escapes = 0;
  2316   for (ptrdiff_t i = 0; i < nbytes; i++)
  2317     {
  2318       unsigned char c = SREF (str, i);
  2319       escapes += (c == '\0' || c == '\n' || c == '\r' || c == '\t'
  2320                   || c == '"' || c == '\\');
  2321     }
  2322   ptrdiff_t nchars = SCHARS (str);
  2323   ptrdiff_t extra = escapes + 2;   /* backslashes + double quotes */
  2324   Lisp_Object dst = (STRING_MULTIBYTE (str)
  2325                      ? make_uninit_multibyte_string (nchars + extra,
  2326                                                      nbytes + extra)
  2327                      : make_uninit_string (nbytes + extra));
  2328   unsigned char *d = SDATA (dst);
  2329   *d++ = '"';
  2330   for (ptrdiff_t i = 0; i < nbytes; i++)
  2331     {
  2332       unsigned char c = SREF (str, i);
  2333       switch (c)
  2334         {
  2335         case '\0': *d++ = '\\'; *d++ = '0'; break;
  2336         case '\n': *d++ = '\\'; *d++ = 'n'; break;
  2337         case '\r': *d++ = '\\'; *d++ = 'r'; break;
  2338         case '\t': *d++ = '\\'; *d++ = 't'; break;
  2339         case '"':
  2340         case '\\': *d++ = '\\'; *d++ = c; break;
  2341         default: *d++ = c; break;
  2342         }
  2343     }
  2344   *d++ = '"';
  2345   eassert (d == SDATA (dst) + SBYTES (dst));
  2346   return dst;
  2347 }
  2348 
  2349 DEFUN ("treesit-pattern-expand",
  2350        Ftreesit_pattern_expand,
  2351        Streesit_pattern_expand, 1, 1, 0,
  2352        doc: /* Expand PATTERN to its string form.
  2353 
  2354 PATTERN can be
  2355 
  2356     :anchor
  2357     :?
  2358     :*
  2359     :+
  2360     :equal
  2361     :match
  2362     (TYPE PATTERN...)
  2363     [PATTERN...]
  2364     FIELD-NAME:
  2365     @CAPTURE-NAME
  2366     (_)
  2367     _
  2368     \"TYPE\"
  2369 
  2370 See Info node `(elisp)Pattern Matching' for detailed explanation.  */)
  2371   (Lisp_Object pattern)
  2372 {
  2373   if (BASE_EQ (pattern, QCanchor))
  2374     return Vtreesit_str_dot;
  2375   if (BASE_EQ (pattern, QCquestion))
  2376     return Vtreesit_str_question_mark;
  2377   if (BASE_EQ (pattern, QCstar))
  2378     return Vtreesit_str_star;
  2379   if (BASE_EQ (pattern, QCplus))
  2380     return Vtreesit_str_plus;
  2381   if (BASE_EQ (pattern, QCequal))
  2382     return Vtreesit_str_pound_equal;
  2383   if (BASE_EQ (pattern, QCmatch))
  2384     return Vtreesit_str_pound_match;
  2385   if (BASE_EQ (pattern, QCpred))
  2386     return Vtreesit_str_pound_pred;
  2387   Lisp_Object opening_delimeter
  2388     = VECTORP (pattern)
  2389       ? Vtreesit_str_open_bracket : Vtreesit_str_open_paren;
  2390   Lisp_Object closing_delimiter
  2391     = VECTORP (pattern)
  2392       ? Vtreesit_str_close_bracket : Vtreesit_str_close_paren;
  2393   if (VECTORP (pattern) || CONSP (pattern))
  2394     return concat3 (opening_delimeter,
  2395                     Fmapconcat (Qtreesit_pattern_expand,
  2396                                 pattern,
  2397                                 Vtreesit_str_space),
  2398                     closing_delimiter);
  2399   if (STRINGP (pattern))
  2400     return treesit_query_string_string (pattern);
  2401 
  2402   return Fprin1_to_string (pattern, Qnil, Qt);
  2403 }
  2404 
  2405 DEFUN ("treesit-query-expand",
  2406        Ftreesit_query_expand,
  2407        Streesit_query_expand, 1, 1, 0,
  2408        doc: /* Expand sexp QUERY to its string form.
  2409 
  2410 A PATTERN in QUERY can be
  2411 
  2412     :anchor
  2413     :?
  2414     :*
  2415     :+
  2416     :equal
  2417     :match
  2418     (TYPE PATTERN...)
  2419     [PATTERN...]
  2420     FIELD-NAME:
  2421     @CAPTURE-NAME
  2422     (_)
  2423     _
  2424     \"TYPE\"
  2425 
  2426 See Info node `(elisp)Pattern Matching' for detailed explanation.  */)
  2427   (Lisp_Object query)
  2428 {
  2429   return Fmapconcat (Qtreesit_pattern_expand, query, Vtreesit_str_space);
  2430 }
  2431 
  2432 /* This struct is used for passing captures to be check against
  2433    predicates.  Captures we check for are the ones in START before
  2434    END.  For example, if START and END are
  2435 
  2436    START       END
  2437     v              v
  2438    (1 . (2 . (3 . (4 . (5 . (6 . nil))))))
  2439 
  2440    We only look at captures 1 2 3.  */
  2441 struct capture_range
  2442 {
  2443   Lisp_Object start;
  2444   Lisp_Object end;
  2445 };
  2446 
  2447 /* Collect predicates for this match and return them in a list.  Each
  2448    predicate is a list of strings and symbols.  */
  2449 static Lisp_Object
  2450 treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index)
  2451 {
  2452   uint32_t len;
  2453   const TSQueryPredicateStep *predicate_list
  2454     = ts_query_predicates_for_pattern (query, pattern_index, &len);
  2455   Lisp_Object result = Qnil;
  2456   Lisp_Object predicate = Qnil;
  2457   for (int idx = 0; idx < len; idx++)
  2458     {
  2459       TSQueryPredicateStep step = predicate_list[idx];
  2460       switch (step.type)
  2461         {
  2462         case TSQueryPredicateStepTypeCapture:
  2463           {
  2464             uint32_t str_len;
  2465             const char *str = ts_query_capture_name_for_id (query,
  2466                                                             step.value_id,
  2467                                                             &str_len);
  2468             predicate = Fcons (intern_c_string_1 (str, str_len),
  2469                                predicate);
  2470             break;
  2471           }
  2472         case TSQueryPredicateStepTypeString:
  2473           {
  2474             uint32_t str_len;
  2475             const char *str = ts_query_string_value_for_id (query,
  2476                                                             step.value_id,
  2477                                                             &str_len);
  2478             predicate = Fcons (make_string (str, str_len), predicate);
  2479             break;
  2480           }
  2481         case TSQueryPredicateStepTypeDone:
  2482           result = Fcons (Fnreverse (predicate), result);
  2483           predicate = Qnil;
  2484           break;
  2485         }
  2486     }
  2487   return Fnreverse (result);
  2488 }
  2489 
  2490 /* Translate a capture NAME (symbol) to a node.  If everything goes
  2491    fine, set NODE and return true; if error occurs (e.g., when there
  2492    is no node for the capture name), set NODE to Qnil, SIGNAL_DATA to
  2493    a suitable signal data, and return false.  */
  2494 static bool
  2495 treesit_predicate_capture_name_to_node (Lisp_Object name,
  2496                                         struct capture_range captures,
  2497                                         Lisp_Object *node,
  2498                                         Lisp_Object *signal_data)
  2499 {
  2500   *node = Qnil;
  2501   for (Lisp_Object tail = captures.start; !EQ (tail, captures.end);
  2502        tail = XCDR (tail))
  2503     {
  2504       if (EQ (XCAR (XCAR (tail)), name))
  2505         {
  2506           *node = XCDR (XCAR (tail));
  2507           break;
  2508         }
  2509     }
  2510 
  2511   if (NILP (*node))
  2512     {
  2513       *signal_data = list3 (build_string ("Cannot find captured node"),
  2514                             name, build_string ("A predicate can only refer"
  2515                                                 " to captured nodes in the "
  2516                                                 "same pattern"));
  2517       return false;
  2518     }
  2519   return true;
  2520 }
  2521 
  2522 /* Translate a capture NAME (symbol) to the text of the captured node.
  2523    If everything goes fine, set TEXT to the text and return true;
  2524    otherwise set TEXT to Qnil and set SIGNAL_DATA to a suitable signal
  2525    data.  */
  2526 static bool
  2527 treesit_predicate_capture_name_to_text (Lisp_Object name,
  2528                                         struct capture_range captures,
  2529                                         Lisp_Object *text,
  2530                                         Lisp_Object *signal_data)
  2531 {
  2532   Lisp_Object node = Qnil;
  2533   if (!treesit_predicate_capture_name_to_node (name, captures, &node, signal_data))
  2534     return false;
  2535 
  2536   struct buffer *old_buffer = current_buffer;
  2537   set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
  2538   *text = Fbuffer_substring (Ftreesit_node_start (node),
  2539                              Ftreesit_node_end (node));
  2540   set_buffer_internal (old_buffer);
  2541   return true;
  2542 }
  2543 
  2544 /* Handles predicate (#equal A B).  Return true if A equals B; return
  2545    false otherwise.  A and B can be either string, or a capture name.
  2546    The capture name evaluates to the text its captured node spans in
  2547    the buffer.  If everything goes fine, don't touch SIGNAL_DATA; if
  2548    error occurs, set it to a suitable signal data.  */
  2549 static bool
  2550 treesit_predicate_equal (Lisp_Object args, struct capture_range captures,
  2551                          Lisp_Object *signal_data)
  2552 {
  2553   if (list_length (args) != 2)
  2554     {
  2555       *signal_data = list2 (build_string ("Predicate `equal' requires "
  2556                                           "two arguments but got"),
  2557                             Flength (args));
  2558       return false;
  2559     }
  2560   Lisp_Object arg1 = XCAR (args);
  2561   Lisp_Object arg2 = XCAR (XCDR (args));
  2562   Lisp_Object text1 = arg1;
  2563   Lisp_Object text2 = arg2;
  2564   if (SYMBOLP (arg1))
  2565     {
  2566       if (!treesit_predicate_capture_name_to_text (arg1, captures, &text1,
  2567                                                    signal_data))
  2568         return false;
  2569     }
  2570   if (SYMBOLP (arg2))
  2571     {
  2572       if (!treesit_predicate_capture_name_to_text (arg2, captures, &text2,
  2573                                                    signal_data))
  2574         return false;
  2575     }
  2576 
  2577   return !NILP (Fstring_equal (text1, text2));
  2578 }
  2579 
  2580 /* Handles predicate (#match "regexp" @node).  Return true if "regexp"
  2581    matches the text spanned by @node; return false otherwise.
  2582    Matching is case-sensitive.  If everything goes fine, don't touch
  2583    SIGNAL_DATA; if error occurs, set it to a suitable signal data.  */
  2584 static bool
  2585 treesit_predicate_match (Lisp_Object args, struct capture_range captures,
  2586                          Lisp_Object *signal_data)
  2587 {
  2588   if (list_length (args) != 2)
  2589     {
  2590       *signal_data = list2 (build_string ("Predicate `match' requires two "
  2591                                           "arguments but got"),
  2592                             Flength (args));
  2593       return false;
  2594     }
  2595   Lisp_Object regexp = XCAR (args);
  2596   Lisp_Object capture_name = XCAR (XCDR (args));
  2597 
  2598   /* It's probably common to get the argument order backwards.  Catch
  2599      this mistake early and show helpful explanation, because Emacs
  2600      loves you.  (We put the regexp first because that's what
  2601      string-match does.)  */
  2602   if (!STRINGP (regexp))
  2603     xsignal1 (Qtreesit_query_error,
  2604               build_string ("The first argument to `match' should "
  2605                             "be a regexp string, not a capture name"));
  2606   if (!SYMBOLP (capture_name))
  2607     xsignal1 (Qtreesit_query_error,
  2608               build_string ("The second argument to `match' should "
  2609                             "be a capture name, not a string"));
  2610 
  2611   Lisp_Object node = Qnil;
  2612   if (!treesit_predicate_capture_name_to_node (capture_name, captures, &node,
  2613                                                signal_data))
  2614     return false;
  2615 
  2616   TSNode treesit_node = XTS_NODE (node)->node;
  2617   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2618   uint32_t start_byte_offset = ts_node_start_byte (treesit_node);
  2619   uint32_t end_byte_offset = ts_node_end_byte (treesit_node);
  2620   ptrdiff_t start_byte = visible_beg + start_byte_offset;
  2621   ptrdiff_t end_byte = visible_beg + end_byte_offset;
  2622   ptrdiff_t start_pos = BYTE_TO_CHAR (start_byte);
  2623   ptrdiff_t end_pos = BYTE_TO_CHAR (end_byte);
  2624   ptrdiff_t old_begv = BEGV;
  2625   ptrdiff_t old_begv_byte = BEGV_BYTE;
  2626   ptrdiff_t old_zv = ZV;
  2627   ptrdiff_t old_zv_byte = ZV_BYTE;
  2628 
  2629   BEGV = start_pos;
  2630   BEGV_BYTE = start_byte;
  2631   ZV = end_pos;
  2632   ZV_BYTE = end_byte;
  2633 
  2634   ptrdiff_t val = search_buffer (regexp, start_pos, start_byte,
  2635                                  end_pos, end_byte, 1, true, Qnil, Qnil, false);
  2636 
  2637   BEGV = old_begv;
  2638   BEGV_BYTE = old_begv_byte;
  2639   ZV = old_zv;
  2640   ZV_BYTE = old_zv_byte;
  2641 
  2642   return (val > 0);
  2643 }
  2644 
  2645 /* Handles predicate (#pred FN ARG...).  Return true if FN returns
  2646    non-nil; return false otherwise.  The arity of FN must match the
  2647    number of ARGs.  If everything goes fine, don't touch SIGNAL_DATA;
  2648    if error occurs, set it to a suitable signal data.  */
  2649 static bool
  2650 treesit_predicate_pred (Lisp_Object args, struct capture_range captures,
  2651                         Lisp_Object *signal_data)
  2652 {
  2653   if (list_length (args) < 2)
  2654     {
  2655       *signal_data = list2 (build_string ("Predicate `pred' requires "
  2656                                           "at least two arguments, "
  2657                                           "but only got"),
  2658                             Flength (args));
  2659       return false;
  2660     }
  2661 
  2662   Lisp_Object fn = Fintern (XCAR (args), Qnil);
  2663   Lisp_Object nodes = Qnil;
  2664   Lisp_Object tail = XCDR (args);
  2665   FOR_EACH_TAIL (tail)
  2666   {
  2667     Lisp_Object node = Qnil;
  2668     if (!treesit_predicate_capture_name_to_node (XCAR (tail), captures, &node,
  2669                                                  signal_data))
  2670       return false;
  2671     nodes = Fcons (node, nodes);
  2672   }
  2673   nodes = Fnreverse (nodes);
  2674 
  2675   return !NILP (CALLN (Fapply, fn, nodes));
  2676 }
  2677 
  2678 /* If all predicates in PREDICATES pass, return true; otherwise
  2679    return false.  If everything goes fine, don't touch SIGNAL_DATA; if
  2680    error occurs, set it to a suitable signal data.  */
  2681 static bool
  2682 treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates,
  2683                          Lisp_Object *signal_data)
  2684 {
  2685   bool pass = true;
  2686   /* Evaluate each predicates.  */
  2687   for (Lisp_Object tail = predicates;
  2688        pass && !NILP (tail); tail = XCDR (tail))
  2689     {
  2690       Lisp_Object predicate = XCAR (tail);
  2691       Lisp_Object fn = XCAR (predicate);
  2692       Lisp_Object args = XCDR (predicate);
  2693       if (!NILP (Fstring_equal (fn, Vtreesit_str_equal)))
  2694         pass &= treesit_predicate_equal (args, captures, signal_data);
  2695       else if (!NILP (Fstring_equal (fn, Vtreesit_str_match)))
  2696         pass &= treesit_predicate_match (args, captures, signal_data);
  2697       else if (!NILP (Fstring_equal (fn, Vtreesit_str_pred)))
  2698         pass &= treesit_predicate_pred (args, captures, signal_data);
  2699       else
  2700         {
  2701           *signal_data = list3 (build_string ("Invalid predicate"),
  2702                                 fn, build_string ("Currently Emacs only supports"
  2703                                                   " `equal', `match', and `pred'"
  2704                                                   " predicates"));
  2705           pass = false;
  2706         }
  2707     }
  2708   /* If all predicates passed, add captures to result list.  */
  2709   return pass;
  2710 }
  2711 
  2712 DEFUN ("treesit-query-compile",
  2713        Ftreesit_query_compile,
  2714        Streesit_query_compile, 2, 3, 0,
  2715        doc: /* Compile QUERY to a compiled query.
  2716 
  2717 Querying with a compiled query is much faster than an uncompiled one.
  2718 LANGUAGE is the language this query is for.
  2719 
  2720 If EAGER is non-nil, immediately load LANGUAGE and compile the query.
  2721 Otherwise defer the compilation until the query is first used.
  2722 
  2723 Signal `treesit-query-error' if QUERY is malformed or something else
  2724 goes wrong.  (This only happens if EAGER is non-nil.)
  2725 You can use `treesit-query-validate' to validate and debug a query.  */)
  2726   (Lisp_Object language, Lisp_Object query, Lisp_Object eager)
  2727 {
  2728   if (NILP (Ftreesit_query_p (query)))
  2729     wrong_type_argument (Qtreesit_query_p, query);
  2730   CHECK_SYMBOL (language);
  2731   if (TS_COMPILED_QUERY_P (query))
  2732     return query;
  2733 
  2734   treesit_initialize ();
  2735 
  2736   Lisp_Object lisp_query = make_treesit_query (query, language);
  2737 
  2738   /* Maybe actually compile.  */
  2739   if (NILP (eager))
  2740     return lisp_query;
  2741   else
  2742     {
  2743       Lisp_Object signal_symbol = Qnil;
  2744       Lisp_Object signal_data = Qnil;
  2745       TSQuery *treesit_query = treesit_ensure_query_compiled (lisp_query,
  2746                                                               &signal_symbol,
  2747                                                               &signal_data);
  2748 
  2749       if (treesit_query == NULL)
  2750         xsignal (signal_symbol, signal_data);
  2751 
  2752       return lisp_query;
  2753     }
  2754 }
  2755 
  2756 /* Resolve OBJ into a tree-sitter node Lisp_Object.  OBJ can be a
  2757    node, a parser, or a language symbol.  Note that this function can
  2758    signal.  */
  2759 static Lisp_Object treesit_resolve_node (Lisp_Object obj)
  2760 {
  2761   if (TS_NODEP (obj))
  2762     {
  2763       treesit_check_node (obj); /* Check if up-to-date.  */
  2764       return obj;
  2765     }
  2766   else if (TS_PARSERP (obj))
  2767     {
  2768       treesit_check_parser (obj); /* Check if deleted.  */
  2769       return Ftreesit_parser_root_node (obj);
  2770     }
  2771   else if (SYMBOLP (obj))
  2772     {
  2773       Lisp_Object parser
  2774         = Ftreesit_parser_create (obj, Fcurrent_buffer (), Qnil);
  2775       return Ftreesit_parser_root_node (parser);
  2776     }
  2777   else
  2778     xsignal2 (Qwrong_type_argument,
  2779               list4 (Qor, Qtreesit_node_p, Qtreesit_parser_p, Qsymbolp),
  2780               obj);
  2781 }
  2782 
  2783 /* Create and initialize QUERY.  When success, initialize TS_QUERY,
  2784    CURSOR, and NEED_FREE, and return true; if failed, initialize
  2785    SIGNAL_SYMBOL and SIGNAL_DATA, and return false.  If NEED_FREE is
  2786    initialized to true, the TS_QUERY and CURSOR needs to be freed
  2787    after use; otherwise they shouldn't be freed by hand.
  2788 
  2789    Basically this function looks at QUERY and check its type, if QUERY
  2790    is a compiled query, this function takes out its query and cursor;
  2791    if QUERY is a string or a cons, this function creates a new query
  2792    and cursor (so they need to be manually freed).
  2793 
  2794    This function assumes QUERY is either a compiled query, a string or
  2795    a cons, the caller should make sure QUERY is valid.
  2796 
  2797    LANG is the language to use if we need to create the query and
  2798    cursor.  */
  2799 static bool
  2800 treesit_initialize_query (Lisp_Object query, const TSLanguage *lang,
  2801                           TSQuery **ts_query, TSQueryCursor **cursor,
  2802                           bool *need_free, Lisp_Object *signal_symbol,
  2803                           Lisp_Object *signal_data)
  2804 {
  2805   if (TS_COMPILED_QUERY_P (query))
  2806     {
  2807       *ts_query = treesit_ensure_query_compiled (query, signal_symbol,
  2808                                                  signal_data);
  2809       *cursor = XTS_COMPILED_QUERY (query)->cursor;
  2810       /* We don't need to free ts_query and cursor because they
  2811          are stored in a lisp object, which is tracked by gc.  */
  2812       *need_free = false;
  2813       return (*ts_query != NULL);
  2814     }
  2815   else
  2816     {
  2817       /* Since query is not TS_COMPILED_QUERY, it can only be a string
  2818          or a cons.  */
  2819       if (CONSP (query))
  2820         query = Ftreesit_query_expand (query);
  2821       char *query_string = SSDATA (query);
  2822       uint32_t error_offset;
  2823       TSQueryError error_type;
  2824       *ts_query = ts_query_new (lang, query_string, strlen (query_string),
  2825                                 &error_offset, &error_type);
  2826       if (*ts_query == NULL)
  2827         {
  2828           *signal_symbol = Qtreesit_query_error;
  2829           *signal_data = treesit_compose_query_signal_data (error_offset,
  2830                                                             error_type, query);
  2831           return false;
  2832         }
  2833       else
  2834         {
  2835           *cursor = ts_query_cursor_new ();
  2836           *need_free = true;
  2837           return true;
  2838         }
  2839     }
  2840 }
  2841 
  2842 DEFUN ("treesit-query-capture",
  2843        Ftreesit_query_capture,
  2844        Streesit_query_capture, 2, 5, 0,
  2845        doc: /* Query NODE with patterns in QUERY.
  2846 
  2847 Return a list of (CAPTURE_NAME . NODE).  CAPTURE_NAME is the name
  2848 assigned to the node in PATTERN.  NODE is the captured node.
  2849 
  2850 QUERY is either a string query, a sexp query, or a compiled query.
  2851 See Info node `(elisp)Pattern Matching' for how to write a query in
  2852 either string or sexp form.  When using repeatedly, a compiled query
  2853 is much faster than a string or sexp one, so it is recommend to
  2854 compile your query if it will be used repeatedly.
  2855 
  2856 BEG and END, if both non-nil, specify the region of buffer positions
  2857 in which the query is executed.  Any matching node whose span overlaps
  2858 with the region between BEG and END are captured, it doesn't have to
  2859 be completely in the region.
  2860 
  2861 If NODE-ONLY is non-nil, return a list of nodes.
  2862 
  2863 Besides a node, NODE can also be a parser, in which case the root node
  2864 of that parser is used.
  2865 NODE can also be a language symbol, in which case the root node of a
  2866 parser for that language is used.  If such a parser doesn't exist, it
  2867 is created.
  2868 
  2869 Signal `treesit-query-error' if QUERY is malformed or something else
  2870 goes wrong.  You can use `treesit-query-validate' to validate and debug
  2871 the query.  */)
  2872   (Lisp_Object node, Lisp_Object query,
  2873    Lisp_Object beg, Lisp_Object end, Lisp_Object node_only)
  2874 {
  2875   if (!(TS_COMPILED_QUERY_P (query)
  2876         || CONSP (query) || STRINGP (query)))
  2877     wrong_type_argument (Qtreesit_query_p, query);
  2878 
  2879   treesit_initialize ();
  2880 
  2881   /* Resolve NODE into an actual node.  */
  2882   Lisp_Object lisp_node = treesit_resolve_node (node);
  2883 
  2884   /* Extract C values from Lisp objects.  */
  2885   TSNode treesit_node = XTS_NODE (lisp_node)->node;
  2886   Lisp_Object lisp_parser = XTS_NODE (lisp_node)->parser;
  2887 
  2888   const TSLanguage *lang
  2889     = ts_parser_language (XTS_PARSER (lisp_parser)->parser);
  2890 
  2891   /* Check BEG and END.  */
  2892   struct buffer *buf = XBUFFER (XTS_PARSER (lisp_parser)->buffer);
  2893   if (!NILP (beg))
  2894     treesit_check_position (beg, buf);
  2895   if (!NILP (end))
  2896     treesit_check_position (end, buf);
  2897 
  2898   /* Initialize query objects.  At the end of this block, we should
  2899      have a working TSQuery and a TSQueryCursor.  */
  2900   TSQuery *treesit_query;
  2901   TSQueryCursor *cursor;
  2902   bool needs_to_free_query_and_cursor;
  2903   Lisp_Object signal_symbol;
  2904   Lisp_Object signal_data;
  2905   if (!treesit_initialize_query (query, lang, &treesit_query, &cursor,
  2906                                  &needs_to_free_query_and_cursor,
  2907                                  &signal_symbol, &signal_data))
  2908     xsignal (signal_symbol, signal_data);
  2909 
  2910   /* WARN: After this point, free TREESIT_QUERY and CURSOR before every
  2911      signal and return if NEEDS_TO_FREE_QUERY_AND_CURSOR is true.  */
  2912 
  2913   /* Set query range.  */
  2914   if (!NILP (beg) && !NILP (end))
  2915     {
  2916       ptrdiff_t visible_beg
  2917         = XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
  2918       ptrdiff_t beg_byte = CHAR_TO_BYTE (XFIXNUM (beg));
  2919       ptrdiff_t end_byte = CHAR_TO_BYTE (XFIXNUM (end));
  2920       /* We never let tree-sitter run on buffers too large, so these
  2921          assertion should never hit.  */
  2922       eassert (beg_byte - visible_beg <= UINT32_MAX);
  2923       eassert (end_byte - visible_beg <= UINT32_MAX);
  2924       ts_query_cursor_set_byte_range (cursor,
  2925                                       (uint32_t) (beg_byte - visible_beg),
  2926                                       (uint32_t) (end_byte - visible_beg));
  2927     }
  2928 
  2929   /* Execute query.  */
  2930   ts_query_cursor_exec (cursor, treesit_query, treesit_node);
  2931   TSQueryMatch match;
  2932 
  2933   /* Go over each match, collect captures and predicates.  Include the
  2934      captures in the RESULT list unconditionally as we get them, then
  2935      test for predicates.  If predicates pass, then all good, if
  2936      predicates don't pass, revert the result back to the result
  2937      before this loop (PREV_RESULT).  (Predicates control the entire
  2938      match.) This way we don't need to create a list of captures in
  2939      every for loop and nconc it to RESULT every time.  That is indeed
  2940      the initial implementation in which Yoav found nconc being the
  2941      bottleneck (98.4% of the running time spent on nconc).  */
  2942   uint32_t patterns_count = ts_query_pattern_count (treesit_query);
  2943   Lisp_Object result = Qnil;
  2944   Lisp_Object prev_result = result;
  2945   Lisp_Object predicates_table = make_vector (patterns_count, Qt);
  2946   Lisp_Object predicate_signal_data = Qnil;
  2947 
  2948   struct buffer *old_buf = current_buffer;
  2949   set_buffer_internal (buf);
  2950 
  2951   while (ts_query_cursor_next_match (cursor, &match))
  2952     {
  2953       /* Record the checkpoint that we may roll back to.  */
  2954       prev_result = result;
  2955       /* 1. Get captured nodes.  */
  2956       const TSQueryCapture *captures = match.captures;
  2957       for (int idx = 0; idx < match.capture_count; idx++)
  2958         {
  2959           uint32_t capture_name_len;
  2960           TSQueryCapture capture = captures[idx];
  2961           Lisp_Object captured_node = make_treesit_node (lisp_parser,
  2962                                                          capture.node);
  2963 
  2964           Lisp_Object cap;
  2965           if (NILP (node_only))
  2966             {
  2967               const char *capture_name
  2968                 = ts_query_capture_name_for_id (treesit_query, capture.index,
  2969                                                 &capture_name_len);
  2970               cap = Fcons (intern_c_string_1 (capture_name, capture_name_len),
  2971                            captured_node);
  2972             }
  2973           else
  2974             cap = captured_node;
  2975 
  2976           result = Fcons (cap, result);
  2977         }
  2978       /* 2. Get predicates and check whether this match can be
  2979          included in the result list.  */
  2980       Lisp_Object predicates = AREF (predicates_table, match.pattern_index);
  2981       if (BASE_EQ (predicates, Qt))
  2982         {
  2983           predicates = treesit_predicates_for_pattern (treesit_query,
  2984                                                        match.pattern_index);
  2985           ASET (predicates_table, match.pattern_index, predicates);
  2986         }
  2987 
  2988       /* captures_lisp = Fnreverse (captures_lisp); */
  2989       struct capture_range captures_range = { result, prev_result };
  2990       bool match = treesit_eval_predicates (captures_range, predicates,
  2991                                             &predicate_signal_data);
  2992       if (!NILP (predicate_signal_data))
  2993         break;
  2994 
  2995       /* Predicates didn't pass, roll back.  */
  2996       if (!match)
  2997         result = prev_result;
  2998     }
  2999 
  3000   /* Final clean up.  */
  3001   if (needs_to_free_query_and_cursor)
  3002     {
  3003       ts_query_delete (treesit_query);
  3004       ts_query_cursor_delete (cursor);
  3005     }
  3006   set_buffer_internal (old_buf);
  3007 
  3008   /* Some capture predicate signaled an error.  */
  3009   if (!NILP (predicate_signal_data))
  3010     xsignal (Qtreesit_query_error, predicate_signal_data);
  3011 
  3012   return Fnreverse (result);
  3013 }
  3014 
  3015 
  3016 /*** Navigation */
  3017 
  3018 static inline void
  3019 treesit_assume_true (bool val)
  3020 {
  3021   eassert (val == true);
  3022 }
  3023 
  3024 /* Tries to move CURSOR to point to TARGET.  END_POS is the end of
  3025    TARGET.  If success, return true, otherwise move CURSOR back to
  3026    starting position and return false.  LIMIT is the recursion
  3027    limit.  */
  3028 static bool
  3029 treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target,
  3030                          uint32_t end_pos, ptrdiff_t limit)
  3031 {
  3032   if (limit <= 0)
  3033     return false;
  3034 
  3035   TSNode cursor_node = ts_tree_cursor_current_node (cursor);
  3036   if (ts_node_eq (cursor_node, *target))
  3037     return true;
  3038 
  3039   if (!ts_tree_cursor_goto_first_child (cursor))
  3040     return false;
  3041 
  3042   /* Skip nodes that definitely don't contain TARGET.  */
  3043   while (ts_node_end_byte (cursor_node) < end_pos)
  3044     {
  3045       if (!ts_tree_cursor_goto_next_sibling (cursor))
  3046         break;
  3047       cursor_node = ts_tree_cursor_current_node (cursor);
  3048     }
  3049 
  3050   /* Go through each sibling that could contain TARGET.  Because of
  3051      missing nodes (their width is 0), there could be multiple
  3052      siblings that could contain TARGET.  */
  3053   while (ts_node_start_byte (cursor_node) <= end_pos)
  3054     {
  3055       if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1))
  3056         return true;
  3057 
  3058       if (!ts_tree_cursor_goto_next_sibling (cursor))
  3059         break;
  3060       cursor_node = ts_tree_cursor_current_node (cursor);
  3061     }
  3062 
  3063   /* Couldn't find TARGET, must be not in this subtree, move cursor
  3064      back and pray that other brothers and sisters can succeed.  */
  3065   treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3066   return false;
  3067 }
  3068 
  3069 /* Create a TSTreeCursor pointing at NODE.  PARSER is the lisp parser
  3070    that produced NODE.  If success, return true, otherwise return
  3071    false.  This function should almost always succeed, but if the parse
  3072    tree is strangely too deep and exceeds the recursion limit, this
  3073    function will fail and return false.
  3074 
  3075    If this function returns true, caller needs to free CURSOR; if
  3076    returns false, caller don't need to free CURSOR.
  3077 
  3078    The reason we need this instead of simply using ts_tree_cursor_new
  3079    is that we have to create the cursor on the root node and traverse
  3080    down to NODE, in order to record the correct stack of parent nodes.
  3081    Otherwise going to sibling or parent of NODE wouldn't work.
  3082 
  3083    (Wow perfect filling.)  */
  3084 static bool
  3085 treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser)
  3086 {
  3087   uint32_t end_pos = ts_node_end_byte (node);
  3088   TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree);
  3089   *cursor = ts_tree_cursor_new (root);
  3090   bool success = treesit_cursor_helper_1 (cursor, &node, end_pos,
  3091                                           TREESIT_RECURSION_LIMIT);
  3092   if (!success)
  3093     ts_tree_cursor_delete (cursor);
  3094   return success;
  3095 }
  3096 
  3097 /* Move CURSOR to the next/previous sibling.  FORWARD controls the
  3098    direction.  NAMED controls the namedness.  If there is a valid
  3099    sibling, move CURSOR to it and return true, otherwise return false.
  3100    When false is returned, CURSOR points to a sibling node of the node
  3101    we started at, but exactly which is undefined.  */
  3102 static bool
  3103 treesit_traverse_sibling_helper (TSTreeCursor *cursor,
  3104                                  bool forward, bool named)
  3105 {
  3106   if (forward)
  3107     {
  3108       if (!named)
  3109         return ts_tree_cursor_goto_next_sibling (cursor);
  3110       /* Else named...  */
  3111       while (ts_tree_cursor_goto_next_sibling (cursor))
  3112         {
  3113           if (ts_node_is_named (ts_tree_cursor_current_node (cursor)))
  3114             return true;
  3115         }
  3116       return false;
  3117     }
  3118   else /* Backward.  */
  3119     {
  3120       /* Go to first child and go through each sibling, until we find
  3121          the one just before the starting node.  */
  3122       TSNode start = ts_tree_cursor_current_node (cursor);
  3123       if (!ts_tree_cursor_goto_parent (cursor))
  3124         return false;
  3125       treesit_assume_true (ts_tree_cursor_goto_first_child (cursor));
  3126 
  3127       /* Now CURSOR is at the first child.  If we started at the first
  3128          child, then there is no further siblings.  */
  3129       TSNode first_child = ts_tree_cursor_current_node (cursor);
  3130       if (ts_node_eq (first_child, start))
  3131         return false;
  3132 
  3133       /* PROBE is always DELTA siblings ahead of CURSOR. */
  3134       TSTreeCursor probe = ts_tree_cursor_copy (cursor);
  3135       /* This is position of PROBE minus position of CURSOR.  */
  3136       ptrdiff_t delta = 0;
  3137       TSNode probe_node;
  3138       TSNode cursor_node;
  3139       while (ts_tree_cursor_goto_next_sibling (&probe))
  3140         {
  3141           /* Move PROBE forward, if it equals to the starting node,
  3142              CURSOR points to the node we want (prev valid sibling of
  3143              the starting node).  */
  3144           delta++;
  3145           probe_node = ts_tree_cursor_current_node (&probe);
  3146 
  3147           /* PROBE matched, depending on NAMED, return true/false.  */
  3148           if (ts_node_eq (probe_node, start))
  3149             {
  3150               ts_tree_cursor_delete (&probe);
  3151               cursor_node = ts_tree_cursor_current_node (cursor);
  3152               ts_tree_cursor_delete (&probe);
  3153               return (!named || (named && ts_node_is_named (cursor_node)));
  3154             }
  3155 
  3156           /* PROBE didn't match, move CURSOR forward to PROBE's
  3157              position, but if we are looking for named nodes, only
  3158              move CURSOR to PROBE if PROBE is at a named node.  */
  3159           if (!named || (named && ts_node_is_named (probe_node)))
  3160             for (; delta > 0; delta--)
  3161               treesit_assume_true (ts_tree_cursor_goto_next_sibling (cursor));
  3162         }
  3163       ts_tree_cursor_delete (&probe);
  3164       return false;
  3165     }
  3166 }
  3167 
  3168 /* Move CURSOR to the first/last child.  FORWARD controls the
  3169    direction.  NAMED controls the namedness.  If there is a valid
  3170    child, move CURSOR to it and return true, otherwise don't move
  3171    CURSOR and return false.  */
  3172 static bool
  3173 treesit_traverse_child_helper (TSTreeCursor *cursor,
  3174                                bool forward, bool named)
  3175 {
  3176   if (forward)
  3177     {
  3178       if (!named)
  3179         return ts_tree_cursor_goto_first_child (cursor);
  3180       else
  3181         {
  3182           if (!ts_tree_cursor_goto_first_child (cursor))
  3183             return false;
  3184           /* After this point, if you return false, make sure to go
  3185              back to parent.  */
  3186           TSNode first_child = ts_tree_cursor_current_node (cursor);
  3187           if (ts_node_is_named (first_child))
  3188             return true;
  3189 
  3190           if (treesit_traverse_sibling_helper (cursor, true, true))
  3191             return true;
  3192           else
  3193             {
  3194               treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3195               return false;
  3196             }
  3197         }
  3198     }
  3199   else /* Backward.  */
  3200     {
  3201       if (!ts_tree_cursor_goto_first_child (cursor))
  3202         return false;
  3203       /* After this point, if you return false, make sure to go
  3204          back to parent.  */
  3205 
  3206       /* First go to the last child.  */
  3207       while (ts_tree_cursor_goto_next_sibling (cursor));
  3208 
  3209       if (!named)
  3210         return true;
  3211       /* Else named... */
  3212       if (treesit_traverse_sibling_helper(cursor, false, true))
  3213         return true;
  3214       else
  3215         {
  3216           treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3217           return false;
  3218         }
  3219     }
  3220 }
  3221 
  3222 /* Given a symbol THING, and a language symbol LANGUAGE, find the
  3223    corresponding predicate definition in treesit-things-settings.
  3224    Don't check for the type of THING and LANGUAGE.
  3225 
  3226    If there isn't one, return Qnil.  */
  3227 static Lisp_Object
  3228 treesit_traverse_get_predicate (Lisp_Object thing, Lisp_Object language)
  3229 {
  3230   Lisp_Object cons = assq_no_quit (language, Vtreesit_thing_settings);
  3231   if (NILP (cons))
  3232     return Qnil;
  3233   Lisp_Object definitions = XCDR (cons);
  3234   Lisp_Object entry = assq_no_quit (thing, definitions);
  3235   if (NILP (entry))
  3236     return Qnil;
  3237   /* ENTRY looks like (THING PRED).  */
  3238   Lisp_Object cdr = XCDR (entry);
  3239   if (!CONSP (cdr))
  3240     return Qnil;
  3241   return XCAR (cdr);
  3242 }
  3243 
  3244 /* Validate the PRED passed to treesit_traverse_match_predicate.  If
  3245    there's an error, set SIGNAL_DATA to something signal accepts, and
  3246    return false, otherwise return true.  This function also check for
  3247    recusion levels: we place a arbitrary 100 level limit on recursive
  3248    predicates.  RECURSION_LEVEL is the current recursion level (that
  3249    starts at 0), if it goes over 99, return false and set
  3250    SIGNAL_DATA.  LANGUAGE is a LANGUAGE symbol.  */
  3251 static bool
  3252 treesit_traverse_validate_predicate (Lisp_Object pred,
  3253                                      Lisp_Object language,
  3254                                      Lisp_Object *signal_data,
  3255                                      ptrdiff_t recursion_level)
  3256 {
  3257   if (recursion_level > 99)
  3258     {
  3259       *signal_data = list1 (build_string ("Predicate recursion level "
  3260                                           "exceeded: it must not exceed "
  3261                                           "100 levels"));
  3262       return false;
  3263     }
  3264   if (STRINGP (pred))
  3265     return true;
  3266   else if (FUNCTIONP (pred))
  3267     return true;
  3268   else if (SYMBOLP (pred))
  3269     {
  3270       Lisp_Object definition = treesit_traverse_get_predicate (pred,
  3271                                                                language);
  3272       if (NILP (definition))
  3273         {
  3274           *signal_data = list2 (build_string ("Cannot find the definition "
  3275                                               "of the predicate in "
  3276                                               "`treesit-thing-settings'"),
  3277                                 pred);
  3278           return false;
  3279         }
  3280       return treesit_traverse_validate_predicate (definition,
  3281                                                   language,
  3282                                                   signal_data,
  3283                                                   recursion_level + 1);
  3284     }
  3285   else if (CONSP (pred))
  3286     {
  3287       Lisp_Object car = XCAR (pred);
  3288       Lisp_Object cdr = XCDR (pred);
  3289       if (BASE_EQ (car, Qnot))
  3290         {
  3291           if (!CONSP (cdr))
  3292             {
  3293               *signal_data = list2 (build_string ("Invalide `not' "
  3294                                                   "predicate"),
  3295                                     pred);
  3296               return false;
  3297             }
  3298           /* At this point CDR must be a cons.  */
  3299           if (XFIXNUM (Flength (cdr)) != 1)
  3300             {
  3301               *signal_data = list2 (build_string ("`not' can only "
  3302                                                   "have one argument"),
  3303                                     pred);
  3304               return false;
  3305             }
  3306           return treesit_traverse_validate_predicate (XCAR (cdr),
  3307                                                       language,
  3308                                                       signal_data,
  3309                                                       recursion_level + 1);
  3310         }
  3311       else if (BASE_EQ (car, Qor))
  3312         {
  3313           if (!CONSP (cdr) || NILP (cdr))
  3314             {
  3315               *signal_data = list2 (build_string ("`or' must have a list "
  3316                                                   "of patterns as "
  3317                                                   "arguments "),
  3318                                     pred);
  3319               return false;
  3320             }
  3321           FOR_EACH_TAIL (cdr)
  3322             {
  3323               if (!treesit_traverse_validate_predicate (XCAR (cdr),
  3324                                                         language,
  3325                                                         signal_data,
  3326                                                         recursion_level + 1))
  3327                 return false;
  3328             }
  3329           return true;
  3330         }
  3331       else if (STRINGP (car) && FUNCTIONP (cdr))
  3332         return true;
  3333     }
  3334   *signal_data = list2 (build_string ("Invalid predicate, see `treesit-thing-settings' for valid forms of predicate"),
  3335                         pred);
  3336   return false;
  3337 }
  3338 
  3339 /* Return true if the node at CURSOR matches PRED.  PRED can be a lot
  3340    of things:
  3341 
  3342    PRED := string | function | (string . function)
  3343          | (or PRED...) | (not PRED)
  3344 
  3345    See docstring of treesit-search-forward and friends for the meaning
  3346    of each shape.
  3347 
  3348    This function assumes PRED is in one of its valid forms.  If NAMED
  3349    is true, also check that the node is named.
  3350 
  3351    This function may signal if the predicate function signals.  */
  3352 static bool
  3353 treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
  3354                                   Lisp_Object parser, bool named)
  3355 {
  3356   TSNode node = ts_tree_cursor_current_node (cursor);
  3357   if (named && !ts_node_is_named (node))
  3358     return false;
  3359 
  3360   if (STRINGP (pred))
  3361     {
  3362       const char *type = ts_node_type (node);
  3363       return fast_c_string_match (pred, type, strlen (type)) >= 0;
  3364     }
  3365   else if (FUNCTIONP (pred))
  3366     {
  3367       Lisp_Object lisp_node = make_treesit_node (parser, node);
  3368       return !NILP (CALLN (Ffuncall, pred, lisp_node));
  3369     }
  3370   else if (SYMBOLP (pred))
  3371     {
  3372       Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3373       Lisp_Object definition = treesit_traverse_get_predicate (pred,
  3374                                                                language);
  3375       return treesit_traverse_match_predicate (cursor, definition,
  3376                                                parser, named);
  3377     }
  3378   else if (CONSP (pred))
  3379     {
  3380       Lisp_Object car = XCAR (pred);
  3381       Lisp_Object cdr = XCDR (pred);
  3382 
  3383       if (BASE_EQ (car, Qnot))
  3384         return !treesit_traverse_match_predicate (cursor, XCAR (cdr),
  3385                                                   parser, named);
  3386       else if (BASE_EQ (car, Qor))
  3387         {
  3388           FOR_EACH_TAIL (cdr)
  3389             {
  3390               if (treesit_traverse_match_predicate (cursor, XCAR (cdr),
  3391                                                     parser, named))
  3392                 return true;
  3393             }
  3394           return false;
  3395         }
  3396       else if (STRINGP (car) && FUNCTIONP (cdr))
  3397         {
  3398           /* A bit of code duplication here, but should be fine.  */
  3399           const char *type = ts_node_type (node);
  3400           if (!(fast_c_string_match (car, type, strlen (type)) >= 0))
  3401             return false;
  3402 
  3403           Lisp_Object lisp_node = make_treesit_node (parser, node);
  3404           if (NILP (CALLN (Ffuncall, cdr, lisp_node)))
  3405             return false;
  3406 
  3407           return true;
  3408         }
  3409     }
  3410   /* Returning false is better than UB. */
  3411   return false;
  3412 }
  3413 
  3414 /* Traverse the parse tree starting from CURSOR.  See
  3415    `treesit-thing-settings' for the shapes PRED can have.  If the
  3416    node satisfies PRED, leave CURSOR on that node and return true.  If
  3417    no node satisfies PRED, move CURSOR back to starting position and
  3418    return false.
  3419 
  3420    LIMIT is the number of levels we descend in the tree.  FORWARD
  3421    controls the direction in which we traverse the tree, true means
  3422    forward, false backward.  If SKIP_ROOT is true, don't match ROOT.
  3423 
  3424    This function may signal if the predicate function signals.  */
  3425 
  3426 static bool
  3427 treesit_search_dfs (TSTreeCursor *cursor,
  3428                     Lisp_Object pred, Lisp_Object parser,
  3429                     bool forward, bool named, ptrdiff_t limit,
  3430                     bool skip_root)
  3431 {
  3432   if (!skip_root
  3433       && treesit_traverse_match_predicate (cursor, pred, parser, named))
  3434     return true;
  3435 
  3436   if (limit == 0)
  3437     return false;
  3438 
  3439   if (!treesit_traverse_child_helper (cursor, forward, named))
  3440     return false;
  3441   /* After this point, if you return false, make sure to go back to
  3442      parent.  */
  3443 
  3444   do /* Iterate through each child.  */
  3445     {
  3446       if (treesit_search_dfs (cursor, pred, parser, forward,
  3447                               named, limit - 1, false))
  3448         return true;
  3449     }
  3450   while (treesit_traverse_sibling_helper (cursor, forward, false));
  3451 
  3452   /* No match in any child's subtree, go back to starting node.  */
  3453   treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3454   return false;
  3455 }
  3456 
  3457 /* Go through the whole tree linearly, leaf-first, starting from
  3458    START.  PRED, PARSER, NAMED, FORWARD are the same as in
  3459    ts_search_subtree.  If a match is found, leave CURSOR at that node,
  3460    and return true, if no match is found, return false, and CURSOR's
  3461    position is undefined.
  3462 
  3463    This function may signal if the predicate function signals.  */
  3464 
  3465 static bool
  3466 treesit_search_forward (TSTreeCursor *cursor,
  3467                         Lisp_Object pred, Lisp_Object parser,
  3468                         bool forward, bool named)
  3469 {
  3470   /* We don't search for subtree and always search from the leaf
  3471      nodes.  This way repeated call of this function traverses each
  3472      node in the tree once and only once:
  3473 
  3474      (while node (setq node (treesit-search-forward node)))  */
  3475   bool initial = true;
  3476   while (true)
  3477     {
  3478       if (!initial /* We don't match the starting node.  */
  3479           && treesit_traverse_match_predicate (cursor, pred, parser, named))
  3480         return true;
  3481       initial = false;
  3482 
  3483       /* Try going to the next sibling, if there is no next sibling,
  3484          go to parent and try again.  */
  3485       while (!treesit_traverse_sibling_helper (cursor, forward, named))
  3486         {
  3487           /* There is no next sibling, go to parent.  */
  3488           if (!ts_tree_cursor_goto_parent (cursor))
  3489             return false;
  3490 
  3491           if (treesit_traverse_match_predicate (cursor, pred, parser, named))
  3492               return true;
  3493         }
  3494       /* We are at the next sibling, deep dive into the first leaf
  3495          node.  */
  3496       while (treesit_traverse_child_helper (cursor, forward, false));
  3497       /* At this point CURSOR is at a leaf node.  */
  3498     }
  3499 }
  3500 
  3501 /* Clean up the given tree cursor CURSOR.  */
  3502 
  3503 static void
  3504 treesit_traverse_cleanup_cursor (void *cursor)
  3505 {
  3506   ts_tree_cursor_delete (cursor);
  3507 }
  3508 
  3509 DEFUN ("treesit-search-subtree",
  3510        Ftreesit_search_subtree,
  3511        Streesit_search_subtree, 2, 5, 0,
  3512        doc: /* Traverse the parse tree of NODE depth-first using PREDICATE.
  3513 
  3514 Traverse the subtree of NODE, and match PREDICATE with each node along
  3515 the way.  PREDICATE is a regexp string that matches against each
  3516 node's type, or a function that takes a node and returns nil/non-nil.
  3517 
  3518 By default, only traverse named nodes, but if ALL is non-nil, traverse
  3519 all nodes.  If BACKWARD is non-nil, traverse backwards.  If DEPTH is
  3520 non-nil, only traverse nodes up to that number of levels down in the
  3521 tree.  If DEPTH is nil, default to 1000.
  3522 
  3523 Return the first matched node, or nil if none matches.  */)
  3524   (Lisp_Object node, Lisp_Object predicate, Lisp_Object backward,
  3525    Lisp_Object all, Lisp_Object depth)
  3526 {
  3527   CHECK_TS_NODE (node);
  3528   CHECK_SYMBOL (all);
  3529   CHECK_SYMBOL (backward);
  3530 
  3531   /* We use a default limit of 1000.  See bug#59426 for the
  3532      discussion.  */
  3533   ptrdiff_t the_limit = TREESIT_RECURSION_LIMIT;
  3534   if (!NILP (depth))
  3535     {
  3536       CHECK_FIXNUM (depth);
  3537       the_limit = XFIXNUM (depth);
  3538     }
  3539 
  3540   treesit_initialize ();
  3541 
  3542   Lisp_Object parser = XTS_NODE (node)->parser;
  3543   Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3544 
  3545   Lisp_Object signal_data = Qnil;
  3546   if (!treesit_traverse_validate_predicate (predicate, language,
  3547                                             &signal_data, 0))
  3548     xsignal1 (Qtreesit_invalid_predicate, signal_data);
  3549 
  3550   Lisp_Object return_value = Qnil;
  3551   TSTreeCursor cursor;
  3552   if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
  3553     return return_value;
  3554 
  3555   specpdl_ref count = SPECPDL_INDEX ();
  3556   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3557 
  3558   if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
  3559                           NILP (all), the_limit, false))
  3560     {
  3561       TSNode node = ts_tree_cursor_current_node (&cursor);
  3562       return_value = make_treesit_node (parser, node);
  3563     }
  3564 
  3565   return unbind_to (count, return_value);
  3566 }
  3567 
  3568 DEFUN ("treesit-search-forward",
  3569        Ftreesit_search_forward,
  3570        Streesit_search_forward, 2, 4, 0,
  3571        doc: /* Search for node matching PREDICATE in the parse tree of START.
  3572 
  3573 Start traversing the tree from node START, and match PREDICATE with
  3574 each node (except START itself) along the way.  PREDICATE is a regexp
  3575 string that matches against each node's type, or a function that takes
  3576 a node and returns non-nil if it matches.
  3577 
  3578 By default, only search for named nodes, but if ALL is non-nil, search
  3579 for all nodes.  If BACKWARD is non-nil, search backwards.
  3580 
  3581 Return the first matched node, or nil if none matches.
  3582 
  3583 For a tree like below, where START is marked by S, traverse as
  3584 numbered from 1 to 12:
  3585 
  3586                 12
  3587                 |
  3588        S--------3----------11
  3589        |        |          |
  3590   o--o-+--o  1--+--2    6--+-----10
  3591   |  |                  |        |
  3592   o  o                +-+-+   +--+--+
  3593                       |   |   |  |  |
  3594                       4   5   7  8  9
  3595 
  3596 Note that this function doesn't traverse the subtree of START, and it
  3597 always traverse leaf nodes first, then upwards.  */)
  3598   (Lisp_Object start, Lisp_Object predicate, Lisp_Object backward,
  3599    Lisp_Object all)
  3600 {
  3601   CHECK_TS_NODE (start);
  3602   CHECK_SYMBOL (all);
  3603   CHECK_SYMBOL (backward);
  3604 
  3605   treesit_initialize ();
  3606 
  3607   Lisp_Object parser = XTS_NODE (start)->parser;
  3608   Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3609 
  3610   Lisp_Object signal_data = Qnil;
  3611   if (!treesit_traverse_validate_predicate (predicate, language,
  3612                                             &signal_data, 0))
  3613     xsignal1 (Qtreesit_invalid_predicate, signal_data);
  3614 
  3615   Lisp_Object return_value = Qnil;
  3616   TSTreeCursor cursor;
  3617   if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
  3618     return return_value;
  3619 
  3620   specpdl_ref count = SPECPDL_INDEX ();
  3621   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3622 
  3623   if (treesit_search_forward (&cursor, predicate, parser,
  3624                               NILP (backward), NILP (all)))
  3625     {
  3626       TSNode node = ts_tree_cursor_current_node (&cursor);
  3627       return_value = make_treesit_node (parser, node);
  3628     }
  3629 
  3630   return unbind_to (count, return_value);
  3631 }
  3632 
  3633 /* Recursively traverse the tree under CURSOR, and append the result
  3634    subtree to PARENT's cdr.  See more in Ftreesit_induce_sparse_tree.
  3635    Note that the top-level children list is reversed, because
  3636    reasons.
  3637 
  3638    This function may signal if the predicate function signals.  */
  3639 static void
  3640 treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
  3641                            Lisp_Object pred, Lisp_Object process_fn,
  3642                            ptrdiff_t limit, Lisp_Object parser)
  3643 {
  3644   bool match = treesit_traverse_match_predicate (cursor, pred, parser, false);
  3645   if (match)
  3646     {
  3647       /* If this node matches pred, add a new node to the parent's
  3648          children list.  */
  3649       TSNode node = ts_tree_cursor_current_node (cursor);
  3650       Lisp_Object lisp_node = make_treesit_node (parser, node);
  3651       if (!NILP (process_fn))
  3652         lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
  3653 
  3654       Lisp_Object this = Fcons (lisp_node, Qnil);
  3655       Fsetcdr (parent, Fcons (this, Fcdr (parent)));
  3656       /* Now for children nodes, this is the new parent.  */
  3657       parent = this;
  3658     }
  3659   /* Go through each child.  */
  3660   if (limit > 0 && ts_tree_cursor_goto_first_child (cursor))
  3661     {
  3662       do
  3663         {
  3664           /* Make sure not to use node after the recursive funcall.
  3665              Then C compilers should be smart enough not to copy NODE
  3666              to stack.  */
  3667           treesit_build_sparse_tree (cursor, parent, pred, process_fn,
  3668                                      limit - 1, parser);
  3669         }
  3670       while (ts_tree_cursor_goto_next_sibling (cursor));
  3671       /* Don't forget to come back to this node.  */
  3672       ts_tree_cursor_goto_parent (cursor);
  3673     }
  3674   /* Before we go, reverse children in the sparse tree.  */
  3675   if (match)
  3676     /* When match == true, "parent" is actually the node we added in
  3677        this layer (parent = this).  */
  3678     Fsetcdr (parent, Fnreverse (Fcdr (parent)));
  3679 }
  3680 
  3681 DEFUN ("treesit-induce-sparse-tree",
  3682        Ftreesit_induce_sparse_tree,
  3683        Streesit_induce_sparse_tree, 2, 4, 0,
  3684        doc: /* Create a sparse tree of ROOT's subtree.
  3685 
  3686 This takes the subtree under ROOT, and combs it so only the nodes
  3687 that match PREDICATE are left, like picking out grapes on the vine.
  3688 PREDICATE is a regexp string that matches against each node's type.
  3689 
  3690 For a subtree on the left that consist of both numbers and letters, if
  3691 PREDICATE is "is letter", the returned tree is the one on the right.
  3692 
  3693         a                 a              a
  3694         |                 |              |
  3695     +---+---+         +---+---+      +---+---+
  3696     |   |   |         |   |   |      |   |   |
  3697     b   1   2         b   |   |      b   c   d
  3698         |   |     =>      |   |  =>      |
  3699         c   +--+          c   +          e
  3700         |   |  |          |   |
  3701      +--+   d  4       +--+   d
  3702      |  |              |
  3703      e  5              e
  3704 
  3705 If PROCESS-FN is non-nil, it should be a function of one argument.  In
  3706 that case, instead of returning the matched nodes, pass each node to
  3707 PROCESS-FN, and use its return value instead.
  3708 
  3709 If non-nil, DEPTH is the number of levels to go down the tree from
  3710 ROOT.  If DEPTH is nil or omitted, it defaults to 1000.
  3711 
  3712 Each node in the returned tree looks like (NODE . (CHILD ...)).  The
  3713 root of this tree might be nil, if ROOT doesn't match PREDICATE.
  3714 
  3715 If no node matches PREDICATE, return nil.
  3716 
  3717 PREDICATE can also be a function that takes a node and returns
  3718 nil/non-nil, but it is slower and more memory consuming than using
  3719 a regexp.  */)
  3720   (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn,
  3721    Lisp_Object depth)
  3722 {
  3723   CHECK_TS_NODE (root);
  3724 
  3725   if (!NILP (process_fn))
  3726     CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
  3727 
  3728   /* We use a default limit of 1000.  See bug#59426 for the
  3729      discussion.  */
  3730   ptrdiff_t the_limit = TREESIT_RECURSION_LIMIT;
  3731   if (!NILP (depth))
  3732     {
  3733       CHECK_FIXNUM (depth);
  3734       the_limit = XFIXNUM (depth);
  3735     }
  3736 
  3737   treesit_initialize ();
  3738 
  3739   Lisp_Object parser = XTS_NODE (root)->parser;
  3740   Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3741 
  3742   Lisp_Object signal_data = Qnil;
  3743   if (!treesit_traverse_validate_predicate (predicate, language,
  3744                                             &signal_data, 0))
  3745     xsignal1 (Qtreesit_invalid_predicate, signal_data);
  3746 
  3747   Lisp_Object parent = Fcons (Qnil, Qnil);
  3748   /* In this function we never traverse above NODE, so we don't need
  3749      to use treesit_cursor_helper.  */
  3750   TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
  3751 
  3752   specpdl_ref count = SPECPDL_INDEX ();
  3753   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3754 
  3755   treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
  3756                              the_limit, parser);
  3757 
  3758   unbind_to (count, Qnil);
  3759 
  3760   Fsetcdr (parent, Fnreverse (Fcdr (parent)));
  3761 
  3762   if (NILP (Fcdr (parent)))
  3763     return Qnil;
  3764   else
  3765     return parent;
  3766 }
  3767 
  3768 DEFUN ("treesit-node-match-p",
  3769        Ftreesit_node_match_p,
  3770        Streesit_node_match_p, 2, 2, 0,
  3771        doc: /* Check whether NODE matches PREDICATE.
  3772 
  3773 PREDICATE can be a regexp matching node type, a predicate function,
  3774 and more, see `treesit-thing-settings' for detail.  Return non-nil
  3775 if NODE matches PRED, nil otherwise.  */)
  3776   (Lisp_Object node, Lisp_Object predicate)
  3777 {
  3778   CHECK_TS_NODE (node);
  3779 
  3780   Lisp_Object parser = XTS_NODE (node)->parser;
  3781   Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3782 
  3783   Lisp_Object signal_data = Qnil;
  3784   if (!treesit_traverse_validate_predicate (predicate, language,
  3785                                             &signal_data, 0))
  3786     xsignal1 (Qtreesit_invalid_predicate, signal_data);
  3787 
  3788   TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (node)->node);
  3789 
  3790   specpdl_ref count = SPECPDL_INDEX ();
  3791   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3792 
  3793   bool match = false;
  3794   match = treesit_traverse_match_predicate (&cursor, predicate,
  3795                                             parser, false);
  3796 
  3797   unbind_to (count, Qnil);
  3798 
  3799   return match ? Qt : Qnil;
  3800 }
  3801 
  3802 DEFUN ("treesit-subtree-stat",
  3803        Ftreesit_subtree_stat,
  3804        Streesit_subtree_stat, 1, 1, 0,
  3805        doc: /* Return information about the subtree of NODE.
  3806 
  3807 Return a list (MAX-DEPTH MAX-WIDTH COUNT), where MAX-DEPTH is the
  3808 maximum depth of the subtree, MAX-WIDTH is the maximum number of
  3809 direct children of nodes in the subtree, and COUNT is the number of
  3810 nodes in the subtree, including NODE.  */)
  3811   (Lisp_Object node)
  3812 {
  3813   /* Having a limit on the depth to traverse doesn't have much impact
  3814      on the time it takes, so I left that out.  */
  3815   CHECK_TS_NODE (node);
  3816 
  3817   treesit_initialize ();
  3818 
  3819   TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (node)->node);
  3820   ptrdiff_t max_depth = 1;
  3821   ptrdiff_t max_width = 0;
  3822   ptrdiff_t count = 0;
  3823   ptrdiff_t current_depth = 0;
  3824 
  3825   /* Traverse the subtree depth-first.  */
  3826   while (true)
  3827     {
  3828       count++;
  3829 
  3830       /* Go down depth-first.  */
  3831       while (ts_tree_cursor_goto_first_child (&cursor))
  3832         {
  3833           current_depth++;
  3834           count++;
  3835           /* While we're at here, measure the number of siblings.  */
  3836           ptrdiff_t width_count = 1;
  3837           while (ts_tree_cursor_goto_next_sibling (&cursor))
  3838             width_count++;
  3839           max_width = max (max_width, width_count);
  3840           /* Go back to the first sibling.  */
  3841           treesit_assume_true (ts_tree_cursor_goto_parent (&cursor));
  3842           treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor));
  3843         }
  3844       max_depth = max (max_depth, current_depth);
  3845 
  3846       /* Go to next sibling.  If there is no next sibling, go to
  3847          parent's next sibling, and so on.  If there is no more
  3848          parent, we've traversed the whole subtree, stop.  */
  3849       while (!ts_tree_cursor_goto_next_sibling (&cursor))
  3850         {
  3851           if (ts_tree_cursor_goto_parent (&cursor))
  3852             current_depth--;
  3853           else
  3854             {
  3855               ts_tree_cursor_delete (&cursor);
  3856               return list3 (make_fixnum (max_depth),
  3857                             make_fixnum (max_width),
  3858                             make_fixnum (count));
  3859             }
  3860         }
  3861     }
  3862 }
  3863 
  3864 #endif  /* HAVE_TREE_SITTER */
  3865 
  3866 DEFUN ("treesit-available-p", Ftreesit_available_p,
  3867        Streesit_available_p, 0, 0, 0,
  3868        doc: /* Return non-nil if tree-sitter support is built-in and available.  */)
  3869   (void)
  3870 {
  3871 #if HAVE_TREE_SITTER
  3872   return load_tree_sitter_if_necessary (false) ? Qt : Qnil;
  3873 #else
  3874   return Qnil;
  3875 #endif
  3876 }
  3877 
  3878 
  3879 /*** Initialization */
  3880 
  3881 /* Initialize the tree-sitter routines.  */
  3882 void
  3883 syms_of_treesit (void)
  3884 {
  3885 #if HAVE_TREE_SITTER
  3886   DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
  3887   DEFSYM (Qtreesit_node_p, "treesit-node-p");
  3888   DEFSYM (Qtreesit_compiled_query_p, "treesit-compiled-query-p");
  3889   DEFSYM (Qtreesit_query_p, "treesit-query-p");
  3890   DEFSYM (Qnamed, "named");
  3891   DEFSYM (Qmissing, "missing");
  3892   DEFSYM (Qextra, "extra");
  3893   DEFSYM (Qoutdated, "outdated");
  3894   DEFSYM (Qhas_error, "has-error");
  3895   DEFSYM (Qlive, "live");
  3896   DEFSYM (Qnot, "not");
  3897 
  3898   DEFSYM (QCanchor, ":anchor");
  3899   DEFSYM (QCquestion, ":?");
  3900   DEFSYM (QCstar, ":*");
  3901   DEFSYM (QCplus, ":+");
  3902   DEFSYM (QCequal, ":equal");
  3903   DEFSYM (QCmatch, ":match");
  3904   DEFSYM (QCpred, ":pred");
  3905 
  3906   DEFSYM (Qnot_found, "not-found");
  3907   DEFSYM (Qsymbol_error, "symbol-error");
  3908   DEFSYM (Qversion_mismatch, "version-mismatch");
  3909 
  3910   DEFSYM (Qtreesit_error, "treesit-error");
  3911   DEFSYM (Qtreesit_query_error, "treesit-query-error");
  3912   DEFSYM (Qtreesit_parse_error, "treesit-parse-error");
  3913   DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid");
  3914   DEFSYM (Qtreesit_buffer_too_large,
  3915           "treesit-buffer-too-large");
  3916   DEFSYM (Qtreesit_load_language_error,
  3917           "treesit-load-language-error");
  3918   DEFSYM (Qtreesit_node_outdated,
  3919           "treesit-node-outdated");
  3920   DEFSYM (Quser_emacs_directory,
  3921           "user-emacs-directory");
  3922   DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
  3923   DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand");
  3924   DEFSYM (Qtreesit_invalid_predicate, "treesit-invalid-predicate");
  3925 
  3926   DEFSYM (Qor, "or");
  3927 
  3928 #ifdef WINDOWSNT
  3929   DEFSYM (Qtree_sitter, "tree-sitter");
  3930 #endif
  3931 
  3932   define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
  3933   define_error (Qtreesit_query_error, "Query pattern is malformed",
  3934                 Qtreesit_error);
  3935   /* Should be impossible, no need to document this error.  */
  3936   define_error (Qtreesit_parse_error, "Parse failed",
  3937                 Qtreesit_error);
  3938   define_error (Qtreesit_range_invalid,
  3939                 "RANGES are invalid: they have to be ordered and should not overlap",
  3940                 Qtreesit_error);
  3941   define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GiB)",
  3942                 Qtreesit_error);
  3943   define_error (Qtreesit_load_language_error,
  3944                 "Cannot load language definition",
  3945                 Qtreesit_error);
  3946   define_error (Qtreesit_node_outdated,
  3947                 "This node is outdated, please retrieve a new one",
  3948                 Qtreesit_error);
  3949   define_error (Qtreesit_parser_deleted,
  3950                 "This parser is deleted and cannot be used",
  3951                 Qtreesit_error);
  3952   define_error (Qtreesit_invalid_predicate,
  3953                 "Invalid predicate, see `treesit-thing-settings' "
  3954                 "for valid forms for a predicate",
  3955                 Qtreesit_error);
  3956 
  3957   DEFVAR_LISP ("treesit-load-name-override-list",
  3958                Vtreesit_load_name_override_list,
  3959                doc:
  3960                /* An override list for unconventional tree-sitter libraries.
  3961 
  3962 By default, Emacs assumes the dynamic library for LANG is
  3963 libtree-sitter-LANG.EXT, where EXT is the OS specific extension for
  3964 dynamic libraries.  Emacs also assumes that the name of the C function
  3965 the library provides is tree_sitter_LANG.  If that is not the case,
  3966 you can add an entry
  3967 
  3968     (LANG LIBRARY-BASE-NAME FUNCTION-NAME)
  3969 
  3970 to this list, where LIBRARY-BASE-NAME is the filename of the dynamic
  3971 library without the file-name extension, and FUNCTION-NAME is the
  3972 function provided by the library.  */);
  3973   Vtreesit_load_name_override_list = Qnil;
  3974 
  3975   DEFVAR_LISP ("treesit-extra-load-path",
  3976                Vtreesit_extra_load_path,
  3977                doc:
  3978                /* Additional directories to look for tree-sitter language definitions.
  3979 The value should be a list of directories.
  3980 When trying to load a tree-sitter language definition,
  3981 Emacs first looks in the directories mentioned in this variable,
  3982 then in the `tree-sitter' subdirectory of `user-emacs-directory', and
  3983 then in the system default locations for dynamic libraries, in that order.  */);
  3984   Vtreesit_extra_load_path = Qnil;
  3985 
  3986   DEFVAR_LISP ("treesit-thing-settings",
  3987                Vtreesit_thing_settings,
  3988                doc:
  3989                /* A list defining things.
  3990 
  3991 The value should be an alist of (LANGUAGE . DEFINITIONS), where
  3992 LANGUAGE is a language symbol, and DEFINITIONS is a list of
  3993 
  3994     (THING PRED)
  3995 
  3996 THING is a symbol representing the thing, like `defun', `sexp', or
  3997 `block'; PRED defines what kind of node can be qualified as THING.
  3998 
  3999 PRED can be a regexp string that matches the type of the node; it can
  4000 be a predicate function that takes the node as the sole argument and
  4001 returns t if the node is the thing; it can be a cons (REGEXP . FN),
  4002 which is a combination of a regexp and a predicate function, and the
  4003 node has to match both to qualify as the thing.
  4004 
  4005 PRED can also be recursively defined.  It can be (or PRED...), meaning
  4006 satisfying anyone of the inner PREDs qualifies the node; or (not
  4007 PRED), meaning not satisfying the inner PRED qualifies the node.
  4008 
  4009 Finally, PRED can refer to other THINGs defined in this list by using
  4010 the symbol of that THING.  For example, (or block sexp).  */);
  4011   Vtreesit_thing_settings = Qnil;
  4012 
  4013   staticpro (&Vtreesit_str_libtree_sitter);
  4014   Vtreesit_str_libtree_sitter = build_pure_c_string ("libtree-sitter-");
  4015   staticpro (&Vtreesit_str_tree_sitter);
  4016   Vtreesit_str_tree_sitter = build_pure_c_string ("tree-sitter-");
  4017 #ifndef WINDOWSNT
  4018   staticpro (&Vtreesit_str_dot_0);
  4019   Vtreesit_str_dot_0 = build_pure_c_string (".0");
  4020 #endif
  4021   staticpro (&Vtreesit_str_dot);
  4022   Vtreesit_str_dot = build_pure_c_string (".");
  4023   staticpro (&Vtreesit_str_question_mark);
  4024   Vtreesit_str_question_mark = build_pure_c_string ("?");
  4025   staticpro (&Vtreesit_str_star);
  4026   Vtreesit_str_star = build_pure_c_string ("*");
  4027   staticpro (&Vtreesit_str_plus);
  4028   Vtreesit_str_plus = build_pure_c_string ("+");
  4029   staticpro (&Vtreesit_str_pound_equal);
  4030   Vtreesit_str_pound_equal = build_pure_c_string ("#equal");
  4031   staticpro (&Vtreesit_str_pound_match);
  4032   Vtreesit_str_pound_match = build_pure_c_string ("#match");
  4033   staticpro (&Vtreesit_str_pound_pred);
  4034   Vtreesit_str_pound_pred = build_pure_c_string ("#pred");
  4035   staticpro (&Vtreesit_str_open_bracket);
  4036   Vtreesit_str_open_bracket = build_pure_c_string ("[");
  4037   staticpro (&Vtreesit_str_close_bracket);
  4038   Vtreesit_str_close_bracket = build_pure_c_string ("]");
  4039   staticpro (&Vtreesit_str_open_paren);
  4040   Vtreesit_str_open_paren = build_pure_c_string ("(");
  4041   staticpro (&Vtreesit_str_close_paren);
  4042   Vtreesit_str_close_paren = build_pure_c_string (")");
  4043   staticpro (&Vtreesit_str_space);
  4044   Vtreesit_str_space = build_pure_c_string (" ");
  4045   staticpro (&Vtreesit_str_equal);
  4046   Vtreesit_str_equal = build_pure_c_string ("equal");
  4047   staticpro (&Vtreesit_str_match);
  4048   Vtreesit_str_match = build_pure_c_string ("match");
  4049   staticpro (&Vtreesit_str_pred);
  4050   Vtreesit_str_pred = build_pure_c_string ("pred");
  4051 
  4052   defsubr (&Streesit_language_available_p);
  4053   defsubr (&Streesit_library_abi_version);
  4054   defsubr (&Streesit_language_abi_version);
  4055 
  4056   defsubr (&Streesit_parser_p);
  4057   defsubr (&Streesit_node_p);
  4058   defsubr (&Streesit_compiled_query_p);
  4059   defsubr (&Streesit_query_p);
  4060   defsubr (&Streesit_query_language);
  4061 
  4062   defsubr (&Streesit_node_parser);
  4063 
  4064   defsubr (&Streesit_parser_create);
  4065   defsubr (&Streesit_parser_delete);
  4066   defsubr (&Streesit_parser_list);
  4067   defsubr (&Streesit_parser_buffer);
  4068   defsubr (&Streesit_parser_language);
  4069 
  4070   defsubr (&Streesit_parser_root_node);
  4071   /* defsubr (&Streesit_parse_string); */
  4072 
  4073   defsubr (&Streesit_parser_set_included_ranges);
  4074   defsubr (&Streesit_parser_included_ranges);
  4075 
  4076   defsubr (&Streesit_parser_notifiers);
  4077   defsubr (&Streesit_parser_add_notifier);
  4078   defsubr (&Streesit_parser_remove_notifier);
  4079 
  4080   defsubr (&Streesit_node_type);
  4081   defsubr (&Streesit_node_start);
  4082   defsubr (&Streesit_node_end);
  4083   defsubr (&Streesit_node_string);
  4084   defsubr (&Streesit_node_parent);
  4085   defsubr (&Streesit_node_child);
  4086   defsubr (&Streesit_node_check);
  4087   defsubr (&Streesit_node_field_name_for_child);
  4088   defsubr (&Streesit_node_child_count);
  4089   defsubr (&Streesit_node_child_by_field_name);
  4090   defsubr (&Streesit_node_next_sibling);
  4091   defsubr (&Streesit_node_prev_sibling);
  4092   defsubr (&Streesit_node_first_child_for_pos);
  4093   defsubr (&Streesit_node_descendant_for_range);
  4094   defsubr (&Streesit_node_eq);
  4095 
  4096   defsubr (&Streesit_pattern_expand);
  4097   defsubr (&Streesit_query_expand);
  4098   defsubr (&Streesit_query_compile);
  4099   defsubr (&Streesit_query_capture);
  4100 
  4101   defsubr (&Streesit_search_subtree);
  4102   defsubr (&Streesit_search_forward);
  4103   defsubr (&Streesit_induce_sparse_tree);
  4104   defsubr (&Streesit_node_match_p);
  4105   defsubr (&Streesit_subtree_stat);
  4106 #endif /* HAVE_TREE_SITTER */
  4107   defsubr (&Streesit_available_p);
  4108 }

/* [<][>][^][v][top][bottom][index][help] */