root/src/treesit.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. init_treesit_functions
  2. load_tree_sitter_if_necessary
  3. treesit_calloc_wrapper
  4. treesit_initialize
  5. treesit_symbol_to_c_name
  6. treesit_find_override_name
  7. treesit_load_language_push_for_each_suffix
  8. treesit_load_language
  9. DEFUN
  10. DEFUN
  11. treesit_check_parser
  12. treesit_tree_edit_1
  13. treesit_record_change
  14. treesit_sync_visible_region
  15. treesit_check_buffer_size
  16. treesit_call_after_change_functions
  17. treesit_ensure_parsed
  18. treesit_read_buffer
  19. make_treesit_parser
  20. make_treesit_node
  21. make_treesit_query
  22. treesit_delete_parser
  23. treesit_delete_query
  24. treesit_named_node_p
  25. treesit_query_error_to_string
  26. treesit_compose_query_signal_data
  27. treesit_ensure_query_compiled
  28. DEFUN
  29. DEFUN
  30. DEFUN
  31. DEFUN
  32. DEFUN
  33. DEFUN
  34. DEFUN
  35. DEFUN
  36. DEFUN
  37. DEFUN
  38. treesit_parser_live_p
  39. DEFUN
  40. treesit_check_range_argument
  41. treesit_make_ranges
  42. DEFUN
  43. DEFUN
  44. treesit_check_positive_integer
  45. treesit_check_node
  46. treesit_check_position
  47. treesit_node_uptodate_p
  48. DEFUN
  49. DEFUN
  50. DEFUN
  51. DEFUN
  52. DEFUN
  53. treesit_node_eq
  54. treesit_query_string_string
  55. DEFUN
  56. DEFUN
  57. treesit_predicates_for_pattern
  58. treesit_predicate_capture_name_to_node
  59. treesit_predicate_capture_name_to_text
  60. treesit_predicate_equal
  61. treesit_predicate_match
  62. treesit_predicate_pred
  63. treesit_eval_predicates
  64. treesit_resolve_node
  65. treesit_initialize_query
  66. treesit_assume_true
  67. treesit_cursor_helper_1
  68. treesit_cursor_helper
  69. treesit_traverse_sibling_helper
  70. treesit_traverse_child_helper
  71. treesit_traverse_get_predicate
  72. treesit_traverse_validate_predicate
  73. treesit_traverse_match_predicate
  74. treesit_search_dfs
  75. treesit_search_forward
  76. treesit_traverse_cleanup_cursor
  77. treesit_build_sparse_tree
  78. DEFUN
  79. DEFUN
  80. syms_of_treesit

     1 /* Tree-sitter integration for GNU Emacs.
     2 
     3 Copyright (C) 2021-2023 Free Software Foundation, Inc.
     4 
     5 Maintainer: Yuan Fu <casouri@gmail.com>
     6 
     7 This file is part of GNU Emacs.
     8 
     9 GNU Emacs is free software: you can redistribute it and/or modify
    10 it under the terms of the GNU General Public License as published by
    11 the Free Software Foundation, either version 3 of the License, or (at
    12 your option) any later version.
    13 
    14 GNU Emacs is distributed in the hope that it will be useful,
    15 but WITHOUT ANY WARRANTY; without even the implied warranty of
    16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    17 GNU General Public License for more details.
    18 
    19 You should have received a copy of the GNU General Public License
    20 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    21 
    22 #include <config.h>
    23 #include "lisp.h"
    24 #include "buffer.h"
    25 
    26 #include "treesit.h"
    27 
    28 #if HAVE_TREE_SITTER
    29 
    30 
    31 /* Dynamic loading of libtree-sitter.  */
    32 
    33 #ifdef WINDOWSNT
    34 # include "w32common.h"
    35 
    36 /* In alphabetical order.  */
    37 #undef ts_language_version
    38 #undef ts_node_child
    39 #undef ts_node_child_by_field_name
    40 #undef ts_node_child_count
    41 #undef ts_node_descendant_for_byte_range
    42 #undef ts_node_end_byte
    43 #undef ts_node_eq
    44 #undef ts_node_field_name_for_child
    45 #undef ts_node_has_error
    46 #undef ts_node_is_extra
    47 #undef ts_node_is_missing
    48 #undef ts_node_is_named
    49 #undef ts_node_is_null
    50 #undef ts_node_named_child
    51 #undef ts_node_named_child_count
    52 #undef ts_node_named_descendant_for_byte_range
    53 #undef ts_node_next_named_sibling
    54 #undef ts_node_next_sibling
    55 #undef ts_node_prev_named_sibling
    56 #undef ts_node_prev_sibling
    57 #undef ts_node_start_byte
    58 #undef ts_node_string
    59 #undef ts_node_type
    60 #undef ts_parser_delete
    61 #undef ts_parser_included_ranges
    62 #undef ts_parser_language
    63 #undef ts_parser_new
    64 #undef ts_parser_parse
    65 #undef ts_parser_set_included_ranges
    66 #undef ts_parser_set_language
    67 #undef ts_query_capture_name_for_id
    68 #undef ts_query_cursor_delete
    69 #undef ts_query_cursor_exec
    70 #undef ts_query_cursor_new
    71 #undef ts_query_cursor_next_match
    72 #undef ts_query_cursor_set_byte_range
    73 #undef ts_query_delete
    74 #undef ts_query_new
    75 #undef ts_query_pattern_count
    76 #undef ts_query_predicates_for_pattern
    77 #undef ts_query_string_value_for_id
    78 #undef ts_set_allocator
    79 #undef ts_tree_cursor_copy
    80 #undef ts_tree_cursor_current_node
    81 #undef ts_tree_cursor_delete
    82 #undef ts_tree_cursor_goto_first_child
    83 #undef ts_tree_cursor_goto_next_sibling
    84 #undef ts_tree_cursor_goto_parent
    85 #undef ts_tree_cursor_new
    86 #undef ts_tree_delete
    87 #undef ts_tree_edit
    88 #undef ts_tree_get_changed_ranges
    89 #undef ts_tree_root_node
    90 
    91 DEF_DLL_FN (uint32_t, ts_language_version, (const TSLanguage *));
    92 DEF_DLL_FN (TSNode, ts_node_child, (TSNode, uint32_t));
    93 DEF_DLL_FN (TSNode, ts_node_child_by_field_name,
    94             (TSNode, const char *, uint32_t));
    95 DEF_DLL_FN (uint32_t, ts_node_child_count, (TSNode));
    96 DEF_DLL_FN (TSNode, ts_node_descendant_for_byte_range,
    97             (TSNode, uint32_t, uint32_t));
    98 DEF_DLL_FN (uint32_t, ts_node_end_byte, (TSNode));
    99 DEF_DLL_FN (bool, ts_node_eq, (TSNode, TSNode));
   100 DEF_DLL_FN (const char *, ts_node_field_name_for_child, (TSNode, uint32_t));
   101 DEF_DLL_FN (bool, ts_node_has_error, (TSNode));
   102 DEF_DLL_FN (bool, ts_node_is_extra, (TSNode));
   103 DEF_DLL_FN (bool, ts_node_is_missing, (TSNode));
   104 DEF_DLL_FN (bool, ts_node_is_named, (TSNode));
   105 DEF_DLL_FN (bool, ts_node_is_null, (TSNode));
   106 DEF_DLL_FN (TSNode, ts_node_named_child, (TSNode, uint32_t));
   107 DEF_DLL_FN (uint32_t, ts_node_named_child_count, (TSNode));
   108 DEF_DLL_FN (TSNode, ts_node_named_descendant_for_byte_range,
   109             (TSNode, uint32_t, uint32_t));
   110 DEF_DLL_FN (TSNode, ts_node_next_named_sibling, (TSNode));
   111 DEF_DLL_FN (TSNode, ts_node_next_sibling, (TSNode));
   112 DEF_DLL_FN (TSNode, ts_node_prev_named_sibling, (TSNode));
   113 DEF_DLL_FN (TSNode, ts_node_prev_sibling, (TSNode));
   114 DEF_DLL_FN (uint32_t, ts_node_start_byte, (TSNode));
   115 DEF_DLL_FN (char *, ts_node_string, (TSNode));
   116 DEF_DLL_FN (const char *, ts_node_type, (TSNode));
   117 DEF_DLL_FN (void, ts_parser_delete, (TSParser *));
   118 DEF_DLL_FN (const TSRange *, ts_parser_included_ranges,
   119             (const TSParser *, uint32_t *));
   120 DEF_DLL_FN (const TSLanguage *, ts_parser_language, (const TSParser *));
   121 DEF_DLL_FN (TSParser *, ts_parser_new, (void));
   122 DEF_DLL_FN (TSTree *, ts_parser_parse, (TSParser *, const TSTree *, TSInput));
   123 DEF_DLL_FN (bool, ts_parser_set_included_ranges,
   124             (TSParser *, const TSRange *, uint32_t));
   125 DEF_DLL_FN (bool, ts_parser_set_language, (TSParser *, const TSLanguage *));
   126 DEF_DLL_FN (const char *, ts_query_capture_name_for_id,
   127             (const TSQuery *, uint32_t, uint32_t *));
   128 DEF_DLL_FN (void, ts_query_cursor_delete, (TSQueryCursor *));
   129 DEF_DLL_FN (void, ts_query_cursor_exec,
   130             (TSQueryCursor *, const TSQuery *, TSNode));
   131 DEF_DLL_FN (TSQueryCursor *, ts_query_cursor_new, (void));
   132 DEF_DLL_FN (bool, ts_query_cursor_next_match,
   133             (TSQueryCursor *, TSQueryMatch *));
   134 DEF_DLL_FN (void, ts_query_cursor_set_byte_range,
   135             (TSQueryCursor *, uint32_t, uint32_t));
   136 DEF_DLL_FN (void, ts_query_delete, (TSQuery *));
   137 DEF_DLL_FN (TSQuery *, ts_query_new,
   138             (const TSLanguage *, const char *, uint32_t, uint32_t *, TSQueryError *));
   139 DEF_DLL_FN (uint32_t, ts_query_pattern_count, (const TSQuery *));
   140 DEF_DLL_FN (const TSQueryPredicateStep *, ts_query_predicates_for_pattern,
   141             ( const TSQuery *, uint32_t, uint32_t *));
   142 DEF_DLL_FN (const char *, ts_query_string_value_for_id,
   143             (const TSQuery *, uint32_t, uint32_t *));
   144 DEF_DLL_FN (void, ts_set_allocator,
   145             (void *(*)(size_t), void *(*)(size_t, size_t), void *(*)(void *, size_t), void (*)(void *)));
   146 DEF_DLL_FN (TSTreeCursor, ts_tree_cursor_copy, (const TSTreeCursor *));
   147 DEF_DLL_FN (TSNode, ts_tree_cursor_current_node, (const TSTreeCursor *));
   148 DEF_DLL_FN (void, ts_tree_cursor_delete, (const TSTreeCursor *));
   149 DEF_DLL_FN (bool, ts_tree_cursor_goto_first_child, (TSTreeCursor *));
   150 DEF_DLL_FN (bool, ts_tree_cursor_goto_next_sibling, (TSTreeCursor *));
   151 DEF_DLL_FN (bool, ts_tree_cursor_goto_parent, (TSTreeCursor *));
   152 DEF_DLL_FN (TSTreeCursor, ts_tree_cursor_new, (TSNode));
   153 DEF_DLL_FN (void, ts_tree_delete, (TSTree *));
   154 DEF_DLL_FN (void, ts_tree_edit, (TSTree *, const TSInputEdit *));
   155 DEF_DLL_FN (TSRange *, ts_tree_get_changed_ranges,
   156             (const TSTree *, const TSTree *, uint32_t *));
   157 DEF_DLL_FN (TSNode, ts_tree_root_node, (const TSTree *));
   158 
   159 static bool
   160 init_treesit_functions (void)
   161 {
   162   HMODULE library = w32_delayed_load (Qtree_sitter);
   163 
   164   if (!library)
   165     return false;
   166 
   167   LOAD_DLL_FN (library, ts_language_version);
   168   LOAD_DLL_FN (library, ts_node_child);
   169   LOAD_DLL_FN (library, ts_node_child_by_field_name);
   170   LOAD_DLL_FN (library, ts_node_child_count);
   171   LOAD_DLL_FN (library, ts_node_descendant_for_byte_range);
   172   LOAD_DLL_FN (library, ts_node_end_byte);
   173   LOAD_DLL_FN (library, ts_node_eq);
   174   LOAD_DLL_FN (library, ts_node_field_name_for_child);
   175   LOAD_DLL_FN (library, ts_node_has_error);
   176   LOAD_DLL_FN (library, ts_node_is_extra);
   177   LOAD_DLL_FN (library, ts_node_is_missing);
   178   LOAD_DLL_FN (library, ts_node_is_named);
   179   LOAD_DLL_FN (library, ts_node_is_null);
   180   LOAD_DLL_FN (library, ts_node_named_child);
   181   LOAD_DLL_FN (library, ts_node_named_child_count);
   182   LOAD_DLL_FN (library, ts_node_named_descendant_for_byte_range);
   183   LOAD_DLL_FN (library, ts_node_next_named_sibling);
   184   LOAD_DLL_FN (library, ts_node_next_sibling);
   185   LOAD_DLL_FN (library, ts_node_prev_named_sibling);
   186   LOAD_DLL_FN (library, ts_node_prev_sibling);
   187   LOAD_DLL_FN (library, ts_node_start_byte);
   188   LOAD_DLL_FN (library, ts_node_string);
   189   LOAD_DLL_FN (library, ts_node_type);
   190   LOAD_DLL_FN (library, ts_parser_delete);
   191   LOAD_DLL_FN (library, ts_parser_included_ranges);
   192   LOAD_DLL_FN (library, ts_parser_language);
   193   LOAD_DLL_FN (library, ts_parser_new);
   194   LOAD_DLL_FN (library, ts_parser_parse);
   195   LOAD_DLL_FN (library, ts_parser_set_included_ranges);
   196   LOAD_DLL_FN (library, ts_parser_set_language);
   197   LOAD_DLL_FN (library, ts_query_capture_name_for_id);
   198   LOAD_DLL_FN (library, ts_query_cursor_delete);
   199   LOAD_DLL_FN (library, ts_query_cursor_exec);
   200   LOAD_DLL_FN (library, ts_query_cursor_new);
   201   LOAD_DLL_FN (library, ts_query_cursor_next_match);
   202   LOAD_DLL_FN (library, ts_query_cursor_set_byte_range);
   203   LOAD_DLL_FN (library, ts_query_delete);
   204   LOAD_DLL_FN (library, ts_query_new);
   205   LOAD_DLL_FN (library, ts_query_pattern_count);
   206   LOAD_DLL_FN (library, ts_query_predicates_for_pattern);
   207   LOAD_DLL_FN (library, ts_query_string_value_for_id);
   208   LOAD_DLL_FN (library, ts_set_allocator);
   209   LOAD_DLL_FN (library, ts_tree_cursor_copy);
   210   LOAD_DLL_FN (library, ts_tree_cursor_current_node);
   211   LOAD_DLL_FN (library, ts_tree_cursor_delete);
   212   LOAD_DLL_FN (library, ts_tree_cursor_goto_first_child);
   213   LOAD_DLL_FN (library, ts_tree_cursor_goto_next_sibling);
   214   LOAD_DLL_FN (library, ts_tree_cursor_goto_parent);
   215   LOAD_DLL_FN (library, ts_tree_cursor_new);
   216   LOAD_DLL_FN (library, ts_tree_delete);
   217   LOAD_DLL_FN (library, ts_tree_edit);
   218   LOAD_DLL_FN (library, ts_tree_get_changed_ranges);
   219   LOAD_DLL_FN (library, ts_tree_root_node);
   220 
   221   return true;
   222 }
   223 
   224 #define ts_language_version fn_ts_language_version
   225 #define ts_node_child fn_ts_node_child
   226 #define ts_node_child_by_field_name fn_ts_node_child_by_field_name
   227 #define ts_node_child_count fn_ts_node_child_count
   228 #define ts_node_descendant_for_byte_range fn_ts_node_descendant_for_byte_range
   229 #define ts_node_end_byte fn_ts_node_end_byte
   230 #define ts_node_eq fn_ts_node_eq
   231 #define ts_node_field_name_for_child fn_ts_node_field_name_for_child
   232 #define ts_node_has_error fn_ts_node_has_error
   233 #define ts_node_is_extra fn_ts_node_is_extra
   234 #define ts_node_is_missing fn_ts_node_is_missing
   235 #define ts_node_is_named fn_ts_node_is_named
   236 #define ts_node_is_null fn_ts_node_is_null
   237 #define ts_node_named_child fn_ts_node_named_child
   238 #define ts_node_named_child_count fn_ts_node_named_child_count
   239 #define ts_node_named_descendant_for_byte_range fn_ts_node_named_descendant_for_byte_range
   240 #define ts_node_next_named_sibling fn_ts_node_next_named_sibling
   241 #define ts_node_next_sibling fn_ts_node_next_sibling
   242 #define ts_node_prev_named_sibling fn_ts_node_prev_named_sibling
   243 #define ts_node_prev_sibling fn_ts_node_prev_sibling
   244 #define ts_node_start_byte fn_ts_node_start_byte
   245 #define ts_node_string fn_ts_node_string
   246 #define ts_node_type fn_ts_node_type
   247 #define ts_parser_delete fn_ts_parser_delete
   248 #define ts_parser_included_ranges fn_ts_parser_included_ranges
   249 #define ts_parser_language fn_ts_parser_language
   250 #define ts_parser_new fn_ts_parser_new
   251 #define ts_parser_parse fn_ts_parser_parse
   252 #define ts_parser_set_included_ranges fn_ts_parser_set_included_ranges
   253 #define ts_parser_set_language fn_ts_parser_set_language
   254 #define ts_query_capture_name_for_id fn_ts_query_capture_name_for_id
   255 #define ts_query_cursor_delete fn_ts_query_cursor_delete
   256 #define ts_query_cursor_exec fn_ts_query_cursor_exec
   257 #define ts_query_cursor_new fn_ts_query_cursor_new
   258 #define ts_query_cursor_next_match fn_ts_query_cursor_next_match
   259 #define ts_query_cursor_set_byte_range fn_ts_query_cursor_set_byte_range
   260 #define ts_query_delete fn_ts_query_delete
   261 #define ts_query_new fn_ts_query_new
   262 #define ts_query_pattern_count fn_ts_query_pattern_count
   263 #define ts_query_predicates_for_pattern fn_ts_query_predicates_for_pattern
   264 #define ts_query_string_value_for_id fn_ts_query_string_value_for_id
   265 #define ts_set_allocator fn_ts_set_allocator
   266 #define ts_tree_cursor_copy fn_ts_tree_cursor_copy
   267 #define ts_tree_cursor_current_node fn_ts_tree_cursor_current_node
   268 #define ts_tree_cursor_delete fn_ts_tree_cursor_delete
   269 #define ts_tree_cursor_goto_first_child fn_ts_tree_cursor_goto_first_child
   270 #define ts_tree_cursor_goto_next_sibling fn_ts_tree_cursor_goto_next_sibling
   271 #define ts_tree_cursor_goto_parent fn_ts_tree_cursor_goto_parent
   272 #define ts_tree_cursor_new fn_ts_tree_cursor_new
   273 #define ts_tree_delete fn_ts_tree_delete
   274 #define ts_tree_edit fn_ts_tree_edit
   275 #define ts_tree_get_changed_ranges fn_ts_tree_get_changed_ranges
   276 #define ts_tree_root_node fn_ts_tree_root_node
   277 
   278 #endif  /* WINDOWSNT */
   279 
   280 
   281 /* Commentary
   282 
   283    The Emacs wrapper of tree-sitter does not expose everything the C
   284    API provides, most notably:
   285 
   286    - It doesn't expose a syntax tree.  The syntax tree is part of the
   287      parser object, and updating the tree is handled on the C level.
   288 
   289    - It doesn't expose the tree cursor, either.  Presumably, Lisp is
   290      slow enough to make insignificant any performance advantages from
   291      using the cursor.  Not exposing the cursor also minimizes the
   292      number of new types this adds to Emacs Lisp; currently, this adds
   293      only the parser, node, and compiled query types.
   294 
   295    - Because updating the change is handled on the C level as each
   296      change is made in the buffer, there is no way for Lisp to update
   297      a node.  But since we can just retrieve a new node, it shouldn't
   298      be a limitation.
   299 
   300    - I didn't expose setting timeout and cancellation flag for a
   301      parser, mainly because I don't think they are really necessary
   302      in Emacs's use cases.
   303 
   304    - Many tree-sitter functions take a TSPoint, which is basically a
   305      row and column.  Emacs uses a gap buffer and does not keep
   306      information about the row and column position of a buffer.
   307      According to the author of tree-sitter, those functions only take
   308      a TSPoint so that it can be moved alongside the byte position and
   309      returned to the caller afterwards, and the position actually used
   310      is the specified byte position.  He also said that he _thinks_
   311      that just passing a byte position will also work.  As a result, a
   312      dummy value is used in place of each TSPoint.  Judging by the
   313      nature of parsing algorithms, I think it is safe to use only the
   314      byte position, and I don't think this will change in the future.
   315 
   316      See: https://github.com/tree-sitter/tree-sitter/issues/445
   317 
   318    treesit.h has some commentary on the two main data structure for
   319    the parser and node.  treesit_sync_visible_region has some
   320    commentary on how we make tree-sitter play well with narrowing (the
   321    tree-sitter parser only sees the visible region, so we need to
   322    translate positions back and forth).  Most action happens in
   323    treesit_ensure_parsed, treesit_read_buffer and
   324    treesit_record_change.
   325 
   326    A complete correspondence list between tree-sitter functions and
   327    exposed Lisp functions can be found in the manual node (elisp)API
   328    Correspondence.
   329 
   330    Placement of CHECK_xxx functions: call CHECK_xxx before using any
   331    unchecked Lisp values; these include arguments of Lisp functions,
   332    the return value of Fsymbol_value, and that of Fcar or Fcdr on
   333    user-specified conses.
   334 
   335    Initializing tree-sitter: there are two entry points to tree-sitter
   336    functions: 'treesit-parser-create' and
   337    'treesit-language-available-p'.  Technically we only need to call
   338    initialization function in those two functions, but in reality we
   339    check at the beginning of every Lisp function.  That should be more
   340    fool-proof.
   341 
   342    Tree-sitter offset (0-based) and buffer position (1-based):
   343      tree-sitter offset + buffer position = buffer position
   344      buffer position - buffer position = tree-sitter offset
   345 
   346    Tree-sitter-related code in other files:
   347    - src/alloc.c for gc for parser and node
   348    - src/casefiddle.c & src/insdel.c for notifying tree-sitter
   349      parser of buffer changes.
   350    - lisp/emacs-lisp/cl-preloaded.el & data.c & lisp.h for parser and
   351      node type.
   352    - print.c for printing tree-sitter objects (node, parser, query).
   353 
   354    Regarding signals: only raise signals in Lisp functions.
   355 
   356    Casts from EMACS_INT and ptrdiff_t to uint32_t: We install checks
   357    for buffer size and range and thus able to assume these casts never
   358    overflow.
   359 
   360    We don't parse at every keystroke.  Instead we only record the
   361    changes at each keystroke, and only parse when requested.  It is
   362    possible that lazy parsing is worse: instead of dispersed little
   363    pauses, now you have less frequent but larger pauses.  I doubt
   364    there will be any perceived difference, as the lazy parsing is
   365    going to be pretty frequent anyway.  Also this (lazy parsing) is
   366    what the mailing list guys wanted.
   367 
   368    Because it is pretty slow (comparing to other tree-sitter
   369    operations) for tree-sitter to parse the query and produce a query
   370    object, it is very wasteful to reparse the query every time
   371    treesit-query-capture is called, and it completely kills the
   372    performance of querying in a loop for a moderate amount of times
   373    (hundreds of queries takes seconds rather than milliseconds to
   374    complete).  Therefore we want some caching.  We can either use a
   375    search.c style transparent caching, or simply expose a new type,
   376    compiled-ts-query and let the user to manually compile AOT.  I
   377    believe AOT compiling gives users more control, makes the
   378    performance stable and easy to understand (compiled -> fast,
   379    uncompiled -> slow), and avoids some edge cases transparent cache
   380    could have (see below).  So I implemented the AOT compilation.
   381 
   382    Problems a transparent cache could have: Suppose we store cache
   383    entries in a fixed-length linked-list, and compare with EQ.  1)
   384    One-off query could kick out useful cache.  2) if the user messed
   385    up and the query doesn't EQ to the cache anymore, the performance
   386    mysteriously drops.  3) what if a user uses so many stuff that the
   387    default cache size (20) is not enough and we end up thrashing?
   388    These are all imaginary scenarios but they are not impossible
   389    :-)
   390 
   391    Parsers in indirect buffers: We make indirect buffers to share the
   392    parser of its base buffer.  Indirect buffers and their base buffer
   393    share the same buffer content but not other buffer attributes.  If
   394    they have separate parser lists, changes made in an indirect buffer
   395    will only update parsers of that indirect buffer, and not parsers
   396    in the base buffer or other indirect buffers, and vice versa.  We
   397    could keep track of all the base and indirect buffers, and update
   398    all of their parsers, but ultimately decide to take a simpler
   399    approach, which is to make indirect buffers share their base
   400    buffer's parser list.  The discussion can be found in bug#59693.  */
   401 
   402 
   403 /*** Initialization */
   404 
   405 static Lisp_Object Vtreesit_str_libtree_sitter;
   406 static Lisp_Object Vtreesit_str_tree_sitter;
   407 #ifndef WINDOWSNT
   408 static Lisp_Object Vtreesit_str_dot_0;
   409 #endif
   410 static Lisp_Object Vtreesit_str_dot;
   411 static Lisp_Object Vtreesit_str_question_mark;
   412 static Lisp_Object Vtreesit_str_star;
   413 static Lisp_Object Vtreesit_str_plus;
   414 static Lisp_Object Vtreesit_str_pound_equal;
   415 static Lisp_Object Vtreesit_str_pound_match;
   416 static Lisp_Object Vtreesit_str_pound_pred;
   417 static Lisp_Object Vtreesit_str_open_bracket;
   418 static Lisp_Object Vtreesit_str_close_bracket;
   419 static Lisp_Object Vtreesit_str_open_paren;
   420 static Lisp_Object Vtreesit_str_close_paren;
   421 static Lisp_Object Vtreesit_str_space;
   422 static Lisp_Object Vtreesit_str_equal;
   423 static Lisp_Object Vtreesit_str_match;
   424 static Lisp_Object Vtreesit_str_pred;
   425 
   426 /* This is the limit on recursion levels for some tree-sitter
   427    functions.  Remember to update docstrings when changing this value.
   428 
   429    If we think of programs and AST, it is very rare for any program to
   430    have a very deep AST. For example, you would need 1000+ levels of
   431    nested if-statements, or a struct somehow nested for 1000+ levels.
   432    It’s hard for me to imagine any hand-written or machine generated
   433    program to be like that.  So I think 1000 is already generous.  If
   434    we look at xdisp.c, its AST only have 30 levels.  */
   435 #define TREESIT_RECURSION_LIMIT 1000
   436 
   437 static bool treesit_initialized = false;
   438 
   439 static bool
   440 load_tree_sitter_if_necessary (bool required)
   441 {
   442 #ifdef WINDOWSNT
   443   static bool tried_to_initialize_once;
   444   static bool tree_sitter_initialized;
   445 
   446   if (!tried_to_initialize_once)
   447     {
   448       Lisp_Object status;
   449 
   450       tried_to_initialize_once = true;
   451       tree_sitter_initialized = init_treesit_functions ();
   452       status = tree_sitter_initialized ? Qt : Qnil;
   453       Vlibrary_cache = Fcons (Fcons (Qtree_sitter, status), Vlibrary_cache);
   454     }
   455 
   456   if (required && !tree_sitter_initialized)
   457     xsignal1 (Qtreesit_error,
   458               build_string ("tree-sitter library not found or failed to load"));
   459 
   460   return tree_sitter_initialized;
   461 #else
   462   return true;
   463 #endif
   464 }
   465 
   466 static void *
   467 treesit_calloc_wrapper (size_t n, size_t size)
   468 {
   469   return xzalloc (n * size);
   470 }
   471 
   472 static void
   473 treesit_initialize (void)
   474 {
   475   if (!treesit_initialized)
   476     {
   477       load_tree_sitter_if_necessary (true);
   478       ts_set_allocator (xmalloc, treesit_calloc_wrapper, xrealloc, xfree);
   479       treesit_initialized = true;
   480     }
   481 }
   482 
   483 
   484 /*** Loading language library */
   485 
   486 /* Translates a symbol treesit-<lang> to a C name
   487    treesit_<lang>.  */
   488 static void
   489 treesit_symbol_to_c_name (char *symbol_name)
   490 {
   491   size_t len = strlen (symbol_name);
   492   for (int idx = 0; idx < len; idx++)
   493     {
   494       if (symbol_name[idx] == '-')
   495         symbol_name[idx] = '_';
   496     }
   497 }
   498 
   499 /* Find the override name for LANGUAGE_SYMBOL in
   500    treesit-load-name-override-list.  Set NAME and C_SYMBOL to the
   501    override name, and return true if there exists one, otherwise
   502    return false.
   503 
   504    This function may signal if treesit-load-name-override-list is
   505    malformed.  */
   506 static bool
   507 treesit_find_override_name (Lisp_Object language_symbol, Lisp_Object *name,
   508                             Lisp_Object *c_symbol)
   509 {
   510   CHECK_LIST (Vtreesit_load_name_override_list);
   511   Lisp_Object tail = Vtreesit_load_name_override_list;
   512 
   513   FOR_EACH_TAIL (tail)
   514     {
   515       Lisp_Object entry = XCAR (tail);
   516       CHECK_LIST (entry);
   517       Lisp_Object lang = XCAR (entry);
   518       CHECK_SYMBOL (lang);
   519 
   520       if (EQ (lang, language_symbol))
   521         {
   522           *name = Fnth (make_fixnum (1), entry);
   523           CHECK_STRING (*name);
   524           *c_symbol = Fnth (make_fixnum (2), entry);
   525           CHECK_STRING (*c_symbol);
   526 
   527           return true;
   528         }
   529     }
   530 
   531   CHECK_LIST_END (tail, Vtreesit_load_name_override_list);
   532 
   533   return false;
   534 }
   535 
   536 /* For example, if Vdynamic_library_suffixes is (".so", ".dylib"),
   537    this function pushes "lib_base_name.so" and "lib_base_name.dylib"
   538    into *path_candidates.  Obviously path_candidates should be a Lisp
   539    list of Lisp strings.  */
   540 static void
   541 treesit_load_language_push_for_each_suffix (Lisp_Object lib_base_name,
   542                                             Lisp_Object *path_candidates)
   543 {
   544   Lisp_Object suffixes;
   545 
   546   suffixes = Vdynamic_library_suffixes;
   547 
   548   FOR_EACH_TAIL (suffixes)
   549     {
   550       Lisp_Object candidate1 = concat2 (lib_base_name, XCAR (suffixes));
   551 #ifndef WINDOWSNT
   552       /* On Posix hosts, support libraries named with ABI version
   553          numbers.  In the foreseeable future we only need to support
   554          version 0.0.  For more details, see
   555          https://lists.gnu.org/archive/html/emacs-devel/2023-04/msg00386.html.  */
   556       Lisp_Object candidate2 = concat2 (candidate1, Vtreesit_str_dot_0);
   557       Lisp_Object candidate3 = concat2 (candidate2, Vtreesit_str_dot_0);
   558 
   559       *path_candidates = Fcons (candidate3, *path_candidates);
   560       *path_candidates = Fcons (candidate2, *path_candidates);
   561 #endif
   562       *path_candidates = Fcons (candidate1, *path_candidates);
   563     }
   564 }
   565 
   566 /* Load the dynamic library of LANGUAGE_SYMBOL and return the pointer
   567    to the language definition.
   568 
   569    If error occurs, return NULL and fill SIGNAL_SYMBOL and SIGNAL_DATA
   570    with values suitable for xsignal.  */
   571 static TSLanguage *
   572 treesit_load_language (Lisp_Object language_symbol,
   573                        Lisp_Object *signal_symbol, Lisp_Object *signal_data)
   574 {
   575   Lisp_Object symbol_name = Fsymbol_name (language_symbol);
   576 
   577   CHECK_LIST (Vtreesit_extra_load_path);
   578 
   579   /* Figure out the library name and C name.  */
   580   Lisp_Object lib_base_name
   581     = concat2 (Vtreesit_str_libtree_sitter, symbol_name);
   582   Lisp_Object base_name
   583     = concat2 (Vtreesit_str_tree_sitter, symbol_name);
   584 
   585   /* Override the library name and C name, if appropriate.  */
   586   Lisp_Object override_name;
   587   Lisp_Object override_c_name;
   588   bool found_override = treesit_find_override_name (language_symbol,
   589                                                     &override_name,
   590                                                     &override_c_name);
   591   if (found_override)
   592     lib_base_name = override_name;
   593 
   594   /* Now we generate a list of possible library paths.  */
   595   Lisp_Object path_candidates = Qnil;
   596   /* First push just the filenames to the candidate list, which will
   597      make dynlib_open look under standard system load paths.  */
   598   treesit_load_language_push_for_each_suffix (lib_base_name, &path_candidates);
   599   /* This is used for reporting errors (i.e., just filenames).  */
   600   Lisp_Object base_candidates = path_candidates;
   601   /* Then push ~/.emacs.d/tree-sitter paths.  */
   602   Lisp_Object lib_name
   603     = Fexpand_file_name (concat2 (build_string ("tree-sitter/"), lib_base_name),
   604                          Fsymbol_value (Quser_emacs_directory));
   605   treesit_load_language_push_for_each_suffix (lib_name, &path_candidates);
   606   /* Then push paths from treesit-extra-load-path.  */
   607   Lisp_Object tail;
   608 
   609   tail = Freverse (Vtreesit_extra_load_path);
   610 
   611   FOR_EACH_TAIL (tail)
   612     {
   613       Lisp_Object expanded_lib = Fexpand_file_name (lib_base_name, XCAR (tail));
   614       treesit_load_language_push_for_each_suffix (expanded_lib,
   615                                                   &path_candidates);
   616     }
   617 
   618   /* Try loading the dynamic library by each path candidate.  Stop
   619      when succeed, record the error message and try the next one when
   620      fail.  */
   621   dynlib_handle_ptr handle;
   622   const char *error;
   623 
   624   tail = path_candidates;
   625   error = NULL;
   626   handle = NULL;
   627 
   628   FOR_EACH_TAIL (tail)
   629     {
   630       char *library_name = SSDATA (XCAR (tail));
   631       dynlib_error ();
   632       handle = dynlib_open (library_name);
   633       error = dynlib_error ();
   634       if (error == NULL)
   635         break;
   636     }
   637 
   638   if (error != NULL)
   639     {
   640       *signal_symbol = Qtreesit_load_language_error;
   641       *signal_data = list3 (Qnot_found, base_candidates,
   642                             build_string ("No such file or directory"));
   643       return NULL;
   644     }
   645 
   646   /* Load TSLanguage.  */
   647   eassume (handle != NULL);
   648   dynlib_error ();
   649   TSLanguage *(*langfn) (void);
   650   char *c_name;
   651   if (found_override)
   652     c_name = xstrdup (SSDATA (override_c_name));
   653   else
   654     {
   655       c_name = xstrdup (SSDATA (base_name));
   656       treesit_symbol_to_c_name (c_name);
   657     }
   658   langfn = dynlib_sym (handle, c_name);
   659   xfree (c_name);
   660   error = dynlib_error ();
   661   if (error != NULL)
   662     {
   663       *signal_symbol = Qtreesit_load_language_error;
   664       *signal_data = list2 (Qsymbol_error, build_string (error));
   665       return NULL;
   666     }
   667   TSLanguage *lang = (*langfn) ();
   668 
   669   /* Check if language version matches tree-sitter version.  */
   670   TSParser *parser = ts_parser_new ();
   671   bool success = ts_parser_set_language (parser, lang);
   672   ts_parser_delete (parser);
   673   if (!success)
   674     {
   675       *signal_symbol = Qtreesit_load_language_error;
   676       *signal_data = list2 (Qversion_mismatch,
   677                             make_fixnum (ts_language_version (lang)));
   678       return NULL;
   679     }
   680   return lang;
   681 }
   682 
   683 DEFUN ("treesit-language-available-p", Ftreesit_language_available_p,
   684        Streesit_language_available_p,
   685        1, 2, 0,
   686        doc: /* Return non-nil if LANGUAGE exists and is loadable.
   687 
   688 If DETAIL is non-nil, return (t . nil) when LANGUAGE is available,
   689 (nil . DATA) when unavailable.  DATA is the signal data of
   690 `treesit-load-language-error'.  */)
   691   (Lisp_Object language, Lisp_Object detail)
   692 {
   693   CHECK_SYMBOL (language);
   694   treesit_initialize ();
   695   Lisp_Object signal_symbol = Qnil;
   696   Lisp_Object signal_data = Qnil;
   697   if (treesit_load_language (language, &signal_symbol, &signal_data) == NULL)
   698     {
   699       if (NILP (detail))
   700         return Qnil;
   701       else
   702         return Fcons (Qnil, signal_data);
   703     }
   704   else
   705     {
   706       if (NILP (detail))
   707         return Qt;
   708       else
   709         return Fcons (Qt, Qnil);
   710     }
   711 }
   712 
   713 DEFUN ("treesit-library-abi-version", Ftreesit_library_abi_version,
   714        Streesit_library_abi_version,
   715        0, 1, 0,
   716        doc: /* Return the language ABI version of the tree-sitter library.
   717 
   718 By default, report the latest ABI version supported by the library for
   719 loading language support modules.  The library is backward-compatible
   720 with language modules which use older ABI versions; if MIN-COMPATIBLE
   721 is non-nil, return the oldest compatible ABI version.  */)
   722   (Lisp_Object min_compatible)
   723 {
   724   if (NILP (min_compatible))
   725     return make_fixnum (TREE_SITTER_LANGUAGE_VERSION);
   726   else
   727     return make_fixnum (TREE_SITTER_MIN_COMPATIBLE_LANGUAGE_VERSION);
   728 }
   729 
   730 DEFUN ("treesit-language-abi-version", Ftreesit_language_abi_version,
   731        Streesit_language_abi_version,
   732        0, 1, 0,
   733        doc: /* Return the ABI version of the tree-sitter grammar for LANGUAGE.
   734 Return nil if a grammar library for LANGUAGE is not available.  */)
   735   (Lisp_Object language)
   736 {
   737   if (NILP (Ftreesit_language_available_p (language, Qnil)))
   738     return Qnil;
   739   else
   740     {
   741       Lisp_Object signal_symbol = Qnil;
   742       Lisp_Object signal_data = Qnil;
   743       TSLanguage *ts_language = treesit_load_language (language,
   744                                                        &signal_symbol,
   745                                                        &signal_data);
   746       if (ts_language == NULL)
   747         return Qnil;
   748       uint32_t version =  ts_language_version (ts_language);
   749       return make_fixnum((ptrdiff_t) version);
   750     }
   751 }
   752 
   753 
   754 /*** Parsing functions */
   755 
   756 static void
   757 treesit_check_parser (Lisp_Object obj)
   758 {
   759   CHECK_TS_PARSER (obj);
   760   if (XTS_PARSER (obj)->deleted)
   761     xsignal1 (Qtreesit_parser_deleted, obj);
   762 }
   763 
   764 /* An auxiliary function that saves a few lines of code.  Assumes TREE
   765    is not NULL.  START_BYTE, OLD_END_BYTE, NEW_END_BYTE must not be
   766    larger than UINT32_MAX.  */
   767 static inline void
   768 treesit_tree_edit_1 (TSTree *tree, ptrdiff_t start_byte,
   769                      ptrdiff_t old_end_byte, ptrdiff_t new_end_byte)
   770 {
   771   eassert (start_byte >= 0);
   772   eassert (start_byte <= old_end_byte);
   773   eassert (start_byte <= new_end_byte);
   774   TSPoint dummy_point = {0, 0};
   775   eassert (start_byte <= UINT32_MAX);
   776   eassert (old_end_byte <= UINT32_MAX);
   777   eassert (new_end_byte <= UINT32_MAX);
   778   TSInputEdit edit = {(uint32_t) start_byte,
   779                       (uint32_t) old_end_byte,
   780                       (uint32_t) new_end_byte,
   781                       dummy_point, dummy_point, dummy_point};
   782   ts_tree_edit (tree, &edit);
   783 }
   784 
   785 /* Update each parser's tree after the user made an edit.  This
   786    function does not parse the buffer and only updates the tree, so it
   787    should be very fast.  */
   788 void
   789 treesit_record_change (ptrdiff_t start_byte, ptrdiff_t old_end_byte,
   790                        ptrdiff_t new_end_byte)
   791 {
   792   struct buffer *base_buffer = current_buffer;
   793   if (current_buffer->base_buffer)
   794     base_buffer = current_buffer->base_buffer;
   795   Lisp_Object parser_list = BVAR (base_buffer, ts_parser_list);
   796 
   797   FOR_EACH_TAIL_SAFE (parser_list)
   798     {
   799       CHECK_CONS (parser_list);
   800       Lisp_Object lisp_parser = XCAR (parser_list);
   801       treesit_check_parser (lisp_parser);
   802       TSTree *tree = XTS_PARSER (lisp_parser)->tree;
   803       /* See comment (ref:visible-beg-null) if you wonder why we don't
   804          update visible_beg/end when tree is NULL.  */
   805 
   806       if (tree != NULL)
   807         {
   808           eassert (start_byte <= old_end_byte);
   809           eassert (start_byte <= new_end_byte);
   810           /* Think the recorded change as a delete followed by an
   811              insert, and think of them as moving unchanged text back
   812              and forth.  After all, the whole point of updating the
   813              tree is to update the position of unchanged text.  */
   814           ptrdiff_t visible_beg = XTS_PARSER (lisp_parser)->visible_beg;
   815           ptrdiff_t visible_end = XTS_PARSER (lisp_parser)->visible_end;
   816           eassert (visible_beg >= 0);
   817           eassert (visible_beg <= visible_end);
   818 
   819           /* AFFECTED_START/OLD_END/NEW_END are (0-based) offsets from
   820              VISIBLE_BEG.  min(visi_end, max(visi_beg, value)) clips
   821              value into [visi_beg, visi_end], and subtracting visi_beg
   822              gives the offset from visi_beg.  */
   823           ptrdiff_t start_offset = (min (visible_end,
   824                                          max (visible_beg, start_byte))
   825                                     - visible_beg);
   826           ptrdiff_t old_end_offset = (min (visible_end,
   827                                            max (visible_beg, old_end_byte))
   828                                       - visible_beg);
   829           /* We don't clip new_end_offset under visible_end, because
   830              otherwise we would miss updating the clipped part.  Plus,
   831              when inserting in narrowed region, the narrowed region
   832              will grow to accommodate the new text, so this is the
   833              correct behavior.  (Bug#61369).  */
   834           ptrdiff_t new_end_offset = (max (visible_beg, new_end_byte)
   835                                       - visible_beg);
   836           eassert (start_offset <= old_end_offset);
   837           eassert (start_offset <= new_end_offset);
   838 
   839           treesit_tree_edit_1 (tree, start_offset, old_end_offset,
   840                                new_end_offset);
   841           XTS_PARSER (lisp_parser)->need_reparse = true;
   842           XTS_PARSER (lisp_parser)->timestamp++;
   843 
   844           /* VISIBLE_BEG/END records tree-sitter's range of view in
   845              the buffer.  We need to adjust them when tree-sitter's
   846              view changes.  */
   847           ptrdiff_t visi_beg_delta;
   848           if (old_end_byte > new_end_byte)
   849             /* Move backward.  */
   850             visi_beg_delta = (min (visible_beg, new_end_byte)
   851                               - min (visible_beg, old_end_byte));
   852           else
   853             /* Move forward.  */
   854             visi_beg_delta = (old_end_byte < visible_beg
   855                               ? new_end_byte - old_end_byte : 0);
   856 
   857           XTS_PARSER (lisp_parser)->visible_beg = visible_beg + visi_beg_delta;
   858           XTS_PARSER (lisp_parser)->visible_end = (visible_end
   859                                                    + visi_beg_delta
   860                                                    + (new_end_offset
   861                                                       - old_end_offset));
   862 
   863           eassert (XTS_PARSER (lisp_parser)->visible_beg >= 0);
   864           eassert (XTS_PARSER (lisp_parser)->visible_beg
   865                    <= XTS_PARSER (lisp_parser)->visible_end);
   866         }
   867     }
   868 }
   869 
   870 /* Comment (ref:visible-beg-null) The purpose of visible_beg/end is to
   871    keep track of "which part of the buffer does the tree-sitter tree
   872    see", in order to update the tree correctly.  Visible_beg/end have
   873    two purposes: they "clip" buffer changes within them, and they
   874    translate positions in the buffer to positions in the tree
   875    (buffer position - visible_beg = tree position).
   876 
   877    Conceptually, visible_beg/end hold the visible region of the buffer
   878    when we last reparsed.  In-between two reparses, we don't really
   879    care if the visible region of the buffer changes.
   880 
   881    Right before we reparse, we make tree-sitter's visible region
   882    match that of the buffer, and update visible_beg/end.
   883 
   884    That is, the whole purpose of visible_beg/end (and also of
   885    treesit_record_change and treesit_sync_visible_region) is to update
   886    the tree (by ts_tree_edit).  So if the tree is NULL,
   887    visible_beg/end are considered uninitialized.  Only when we already
   888    have a tree, do we need to keep track of position changes and
   889    update it correctly, so it can be fed into ts_parser_parse as the
   890    old tree, so that tree-sitter will only parse the changed part,
   891    incrementally.
   892 
   893    In a nutshell, tree-sitter incremental parsing in Emacs looks like:
   894 
   895    treesit_record_change (tree)  \
   896    treesit_record_change (tree)  | user edits buffer
   897    ...                           /
   898 
   899    treesit_sync_visible_region (tree) \ treesit_ensure_parsed
   900    ts_parser_parse(tree) -> tree      /
   901 
   902    treesit_record_change (tree)  \
   903    treesit_record_change (tree)  | user edits buffer
   904    ...                           /
   905 
   906    and so on.  */
   907 
   908 /* Make sure the tree's visible range is in sync with the buffer's
   909    visible range, and PARSER's visible_beg and visible_end are in sync
   910    with BUF_BEGV_BYTE and BUG_ZV_BYTE.  When calling this function you
   911    must make sure the current buffer's size in bytes is not larger than
   912    UINT32_MAX.  Basically, always call treesit_check_buffer_size before
   913    this function.
   914 
   915    If buffer range changed since last parse (visible_beg/end doesn't
   916    match buffer visible beginning/end), set need_reparse to true.  */
   917 static void
   918 treesit_sync_visible_region (Lisp_Object parser)
   919 {
   920   TSTree *tree = XTS_PARSER (parser)->tree;
   921   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
   922 
   923   /* If we are setting visible_beg/end for the first time, we can skip
   924   the offset acrobatics and updating the tree below.  */
   925   if (tree == NULL)
   926     {
   927       XTS_PARSER (parser)->visible_beg = BUF_BEGV_BYTE (buffer);
   928       XTS_PARSER (parser)->visible_end = BUF_ZV_BYTE (buffer);
   929       return;
   930     }
   931 
   932   ptrdiff_t visible_beg = XTS_PARSER (parser)->visible_beg;
   933   ptrdiff_t visible_end = XTS_PARSER (parser)->visible_end;
   934   eassert (0 <= visible_beg);
   935   eassert (visible_beg <= visible_end);
   936 
   937   eassert (BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
   938   eassert (BUF_ZV_BYTE (buffer) <= UINT32_MAX);
   939 
   940   /* If buffer restriction changed and user requests for a node (hence
   941      this function is called), we need to reparse.  */
   942   if (visible_beg != BUF_BEGV_BYTE (buffer)
   943       || visible_end != BUF_ZV_BYTE (buffer))
   944     XTS_PARSER (parser)->need_reparse = true;
   945 
   946   /* Before we parse or set ranges, catch up with the narrowing
   947      situation.  We change visible_beg and visible_end to match
   948      BUF_BEGV_BYTE and BUF_ZV_BYTE, and inform tree-sitter of the
   949      change.  We want to move the visible range of tree-sitter to
   950      match the narrowed range.  For example,
   951      from ________|xxxx|__
   952      to   |xxxx|__________ */
   953 
   954   /* 1. Make sure visible_beg <= BUF_BEGV_BYTE.  */
   955   if (visible_beg > BUF_BEGV_BYTE (buffer))
   956     {
   957       /* Tree-sitter sees: insert at the beginning.  */
   958       treesit_tree_edit_1 (tree, 0, 0, visible_beg - BUF_BEGV_BYTE (buffer));
   959       visible_beg = BUF_BEGV_BYTE (buffer);
   960       eassert (visible_beg <= visible_end);
   961     }
   962   /* 2. Make sure visible_end = BUF_ZV_BYTE.  */
   963   if (visible_end < BUF_ZV_BYTE (buffer))
   964     {
   965       /* Tree-sitter sees: insert at the end.  */
   966       treesit_tree_edit_1 (tree, visible_end - visible_beg,
   967                            visible_end - visible_beg,
   968                            BUF_ZV_BYTE (buffer) - visible_beg);
   969       visible_end = BUF_ZV_BYTE (buffer);
   970       eassert (visible_beg <= visible_end);
   971     }
   972   else if (visible_end > BUF_ZV_BYTE (buffer))
   973     {
   974       /* Tree-sitter sees: delete at the end.  */
   975       treesit_tree_edit_1 (tree, BUF_ZV_BYTE (buffer) - visible_beg,
   976                            visible_end - visible_beg,
   977                            BUF_ZV_BYTE (buffer) - visible_beg);
   978       visible_end = BUF_ZV_BYTE (buffer);
   979       eassert (visible_beg <= visible_end);
   980     }
   981   /* 3. Make sure visible_beg = BUF_BEGV_BYTE.  */
   982   if (visible_beg < BUF_BEGV_BYTE (buffer))
   983     {
   984       /* Tree-sitter sees: delete at the beginning.  */
   985       treesit_tree_edit_1 (tree, 0, BUF_BEGV_BYTE (buffer) - visible_beg, 0);
   986       visible_beg = BUF_BEGV_BYTE (buffer);
   987       eassert (visible_beg <= visible_end);
   988     }
   989   eassert (0 <= visible_beg);
   990   eassert (visible_beg <= visible_end);
   991   eassert (visible_beg == BUF_BEGV_BYTE (buffer));
   992   eassert (visible_end == BUF_ZV_BYTE (buffer));
   993 
   994   XTS_PARSER (parser)->visible_beg = visible_beg;
   995   XTS_PARSER (parser)->visible_end = visible_end;
   996 }
   997 
   998 static void
   999 treesit_check_buffer_size (struct buffer *buffer)
  1000 {
  1001   ptrdiff_t buffer_size_bytes = (BUF_Z_BYTE (buffer) - BUF_BEG_BYTE (buffer));
  1002   if (buffer_size_bytes > UINT32_MAX)
  1003     xsignal2 (Qtreesit_buffer_too_large,
  1004               build_string ("Buffer size cannot be larger than 4GB"),
  1005               make_fixnum (buffer_size_bytes));
  1006 }
  1007 
  1008 static Lisp_Object treesit_make_ranges (const TSRange *, uint32_t, struct buffer *);
  1009 
  1010 static void
  1011 treesit_call_after_change_functions (TSTree *old_tree, TSTree *new_tree,
  1012                                      Lisp_Object parser)
  1013 {
  1014   /* If the old_tree is NULL, meaning this is the first parse, the
  1015      changed range is the whole buffer.  */
  1016   Lisp_Object lisp_ranges;
  1017   struct buffer *buf = XBUFFER (XTS_PARSER (parser)->buffer);
  1018   if (old_tree)
  1019     {
  1020       uint32_t len;
  1021       TSRange *ranges = ts_tree_get_changed_ranges (old_tree, new_tree, &len);
  1022       lisp_ranges = treesit_make_ranges (ranges, len, buf);
  1023       xfree (ranges);
  1024     }
  1025   else
  1026     {
  1027       struct buffer *oldbuf = current_buffer;
  1028       set_buffer_internal (buf);
  1029       lisp_ranges = Fcons (Fcons (Fpoint_min (), Fpoint_max ()), Qnil);
  1030       set_buffer_internal (oldbuf);
  1031     }
  1032 
  1033   specpdl_ref count = SPECPDL_INDEX ();
  1034 
  1035   /* let's trust the after change functions and not clone a new ranges
  1036      for each of them.  */
  1037   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1038   FOR_EACH_TAIL (functions)
  1039     safe_call2 (XCAR (functions), lisp_ranges, parser);
  1040 
  1041   unbind_to (count, Qnil);
  1042 }
  1043 
  1044 /* Parse the buffer.  We don't parse until we have to.  When we have
  1045    to, we call this function to parse and update the tree.  */
  1046 static void
  1047 treesit_ensure_parsed (Lisp_Object parser)
  1048 {
  1049   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1050 
  1051   /* Before we parse, catch up with the narrowing situation.  */
  1052   treesit_check_buffer_size (buffer);
  1053   /* This function has to run before we check for need_reparse flag,
  1054      because it might set the flag to true.  */
  1055   treesit_sync_visible_region (parser);
  1056 
  1057   /* Make sure this comes before everything else, see comment
  1058      (ref:notifier-inside-ensure-parsed) for more detail.  */
  1059   if (!XTS_PARSER (parser)->need_reparse)
  1060     return;
  1061 
  1062   TSParser *treesit_parser = XTS_PARSER (parser)->parser;
  1063   TSTree *tree = XTS_PARSER (parser)->tree;
  1064   TSInput input = XTS_PARSER (parser)->input;
  1065 
  1066   TSTree *new_tree = ts_parser_parse (treesit_parser, tree, input);
  1067   /* This should be very rare (impossible, really): it only happens
  1068      when 1) language is not set (impossible in Emacs because the user
  1069      has to supply a language to create a parser), 2) parse canceled
  1070      due to timeout (impossible because we don't set a timeout), 3)
  1071      parse canceled due to cancellation flag (impossible because we
  1072      don't set the flag).  (See comments for ts_parser_parse in
  1073      tree_sitter/api.h.)  */
  1074   if (new_tree == NULL)
  1075     {
  1076       Lisp_Object buf;
  1077       XSETBUFFER (buf, buffer);
  1078       xsignal1 (Qtreesit_parse_error, buf);
  1079     }
  1080 
  1081   XTS_PARSER (parser)->tree = new_tree;
  1082   XTS_PARSER (parser)->need_reparse = false;
  1083 
  1084   /* After-change functions should run at the very end, most crucially
  1085      after need_reparse is set to false, this way if the function
  1086      calls some tree-sitter function which invokes
  1087      treesit_ensure_parsed again, it returns early and do not
  1088      recursively call the after change functions again.
  1089      (ref:notifier-inside-ensure-parsed)  */
  1090   treesit_call_after_change_functions (tree, new_tree, parser);
  1091   ts_tree_delete (tree);
  1092 }
  1093 
  1094 /* This is the read function provided to tree-sitter to read from a
  1095    buffer.  It reads one character at a time and automatically skips
  1096    the gap.  */
  1097 static const char*
  1098 treesit_read_buffer (void *parser, uint32_t byte_index,
  1099                      TSPoint position, uint32_t *bytes_read)
  1100 {
  1101   struct buffer *buffer = XBUFFER (((struct Lisp_TS_Parser *) parser)->buffer);
  1102   ptrdiff_t visible_beg = ((struct Lisp_TS_Parser *) parser)->visible_beg;
  1103   ptrdiff_t visible_end = ((struct Lisp_TS_Parser *) parser)->visible_end;
  1104   ptrdiff_t byte_pos = byte_index + visible_beg;
  1105   /* We will make sure visible_beg = BUF_BEGV_BYTE before re-parse (in
  1106      treesit_ensure_parsed), so byte_pos will never be smaller than
  1107      BUF_BEG_BYTE.  */
  1108   eassert (visible_beg = BUF_BEGV_BYTE (buffer));
  1109   eassert (visible_end = BUF_ZV_BYTE (buffer));
  1110 
  1111   /* Read one character.  Tree-sitter wants us to set bytes_read to 0
  1112      if it reads to the end of buffer.  It doesn't say what it wants
  1113      for the return value in that case, so we just give it an empty
  1114      string.  */
  1115   char *beg;
  1116   int len;
  1117   /* This function could run from a user command, so it is better to
  1118      do nothing instead of raising an error.  (It was a pain in the a**
  1119      to decrypt mega-if-conditions in Emacs source, so I wrote the two
  1120      branches separately, you are welcome.)  */
  1121   if (!BUFFER_LIVE_P (buffer))
  1122     {
  1123       beg = NULL;
  1124       len = 0;
  1125     }
  1126   /* Reached visible end-of-buffer, tell tree-sitter to read no more.  */
  1127   else if (byte_pos >= visible_end)
  1128     {
  1129       beg = NULL;
  1130       len = 0;
  1131     }
  1132   /* Normal case, read a character.  */
  1133   else
  1134     {
  1135       beg = (char *) BUF_BYTE_ADDRESS (buffer, byte_pos);
  1136       len = BYTES_BY_CHAR_HEAD ((int) *beg);
  1137     }
  1138   /* We never let tree-sitter to parse buffers that large so this
  1139      assertion should never hit.  */
  1140   eassert (len < UINT32_MAX);
  1141   *bytes_read = (uint32_t) len;
  1142   return beg;
  1143 }
  1144 
  1145 
  1146 /*** Functions for parser and node object */
  1147 
  1148 /* Wrap the parser in a Lisp_Object to be used in the Lisp
  1149    machine.  */
  1150 Lisp_Object
  1151 make_treesit_parser (Lisp_Object buffer, TSParser *parser,
  1152                      TSTree *tree, Lisp_Object language_symbol)
  1153 {
  1154   struct Lisp_TS_Parser *lisp_parser;
  1155 
  1156   lisp_parser = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Parser,
  1157                                        buffer, PVEC_TS_PARSER);
  1158 
  1159   lisp_parser->language_symbol = language_symbol;
  1160   lisp_parser->after_change_functions = Qnil;
  1161   lisp_parser->buffer = buffer;
  1162   lisp_parser->parser = parser;
  1163   lisp_parser->tree = tree;
  1164   TSInput input = {lisp_parser, treesit_read_buffer, TSInputEncodingUTF8};
  1165   lisp_parser->input = input;
  1166   lisp_parser->need_reparse = true;
  1167   lisp_parser->visible_beg = BUF_BEGV_BYTE (XBUFFER (buffer));
  1168   lisp_parser->visible_end = BUF_ZV_BYTE (XBUFFER (buffer));
  1169   lisp_parser->timestamp = 0;
  1170   lisp_parser->deleted = false;
  1171   lisp_parser->has_range = false;
  1172   eassert (lisp_parser->visible_beg <= lisp_parser->visible_end);
  1173   return make_lisp_ptr (lisp_parser, Lisp_Vectorlike);
  1174 }
  1175 
  1176 /* Wrap the node in a Lisp_Object to be used in the Lisp machine.  */
  1177 Lisp_Object
  1178 make_treesit_node (Lisp_Object parser, TSNode node)
  1179 {
  1180   struct Lisp_TS_Node *lisp_node;
  1181 
  1182   lisp_node = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Node,
  1183                                      parser, PVEC_TS_NODE);
  1184   lisp_node->parser = parser;
  1185   lisp_node->node = node;
  1186   lisp_node->timestamp = XTS_PARSER (parser)->timestamp;
  1187   return make_lisp_ptr (lisp_node, Lisp_Vectorlike);
  1188 }
  1189 
  1190 /* Make a compiled query.  QUERY has to be either a cons or a
  1191    string.  */
  1192 static Lisp_Object
  1193 make_treesit_query (Lisp_Object query, Lisp_Object language)
  1194 {
  1195   TSQueryCursor *treesit_cursor = ts_query_cursor_new ();
  1196   struct Lisp_TS_Query *lisp_query;
  1197 
  1198   lisp_query = ALLOCATE_PSEUDOVECTOR (struct Lisp_TS_Query,
  1199                                       source, PVEC_TS_COMPILED_QUERY);
  1200 
  1201   lisp_query->language = language;
  1202   lisp_query->source = query;
  1203   lisp_query->query = NULL;
  1204   lisp_query->cursor = treesit_cursor;
  1205   return make_lisp_ptr (lisp_query, Lisp_Vectorlike);
  1206 }
  1207 
  1208 /* The following two functions are called from alloc.c:cleanup_vector.  */
  1209 void
  1210 treesit_delete_parser (struct Lisp_TS_Parser *lisp_parser)
  1211 {
  1212   ts_tree_delete (lisp_parser->tree);
  1213   ts_parser_delete (lisp_parser->parser);
  1214 }
  1215 
  1216 void
  1217 treesit_delete_query (struct Lisp_TS_Query *lisp_query)
  1218 {
  1219   ts_query_delete (lisp_query->query);
  1220   ts_query_cursor_delete (lisp_query->cursor);
  1221 }
  1222 
  1223 /* The following function is called from print.c:print_vectorlike.  */
  1224 bool
  1225 treesit_named_node_p (TSNode node)
  1226 {
  1227   return ts_node_is_named (node);
  1228 }
  1229 
  1230 static const char*
  1231 treesit_query_error_to_string (TSQueryError error)
  1232 {
  1233   switch (error)
  1234     {
  1235     case TSQueryErrorNone:
  1236       return "None";
  1237     case TSQueryErrorSyntax:
  1238       return "Syntax error at";
  1239     case TSQueryErrorNodeType:
  1240       return "Node type error at";
  1241     case TSQueryErrorField:
  1242       return "Field error at";
  1243     case TSQueryErrorCapture:
  1244       return "Capture error at";
  1245     case TSQueryErrorStructure:
  1246       return "Structure error at";
  1247     default:
  1248       return "Unknown error";
  1249     }
  1250 }
  1251 
  1252 static Lisp_Object
  1253 treesit_compose_query_signal_data (uint32_t error_offset,
  1254                                    TSQueryError error_type,
  1255                                    Lisp_Object query_source)
  1256 {
  1257   return list4 (build_string (treesit_query_error_to_string (error_type)),
  1258                 make_fixnum (error_offset + 1),
  1259                 query_source,
  1260                 build_string ("Debug the query with `treesit-query-validate'"));
  1261 }
  1262 
  1263 /* Ensure the QUERY is compiled.  Return the TSQuery.  It could be
  1264    NULL if error occurs, in which case ERROR_OFFSET and ERROR_TYPE are
  1265    bound.  If error occurs, return NULL, and assign SIGNAL_SYMBOL and
  1266    SIGNAL_DATA accordingly.  */
  1267 static TSQuery *
  1268 treesit_ensure_query_compiled (Lisp_Object query, Lisp_Object *signal_symbol,
  1269                                Lisp_Object *signal_data)
  1270 {
  1271   /* If query is already compiled (not null), return that, otherwise
  1272      compile and return it.  */
  1273   TSQuery *treesit_query = XTS_COMPILED_QUERY (query)->query;
  1274   if (treesit_query != NULL)
  1275     return treesit_query;
  1276 
  1277   /* Get query source and TSLanguage ready.  */
  1278   Lisp_Object source = XTS_COMPILED_QUERY (query)->source;
  1279   Lisp_Object language = XTS_COMPILED_QUERY (query)->language;
  1280   /* This is the main reason why we compile query lazily: to avoid
  1281      loading languages early.  */
  1282   TSLanguage *treesit_lang = treesit_load_language (language, signal_symbol,
  1283                                                     signal_data);
  1284   if (treesit_lang == NULL)
  1285     return NULL;
  1286 
  1287   if (CONSP (source))
  1288     source = Ftreesit_query_expand (source);
  1289 
  1290   /* Create TSQuery.  */
  1291   uint32_t error_offset;
  1292   TSQueryError error_type;
  1293   char *treesit_source = SSDATA (source);
  1294   treesit_query = ts_query_new (treesit_lang, treesit_source,
  1295                                 strlen (treesit_source),
  1296                                 &error_offset, &error_type);
  1297   if (treesit_query == NULL)
  1298     {
  1299       *signal_symbol = Qtreesit_query_error;
  1300       *signal_data = treesit_compose_query_signal_data (error_offset,
  1301                                                         error_type,
  1302                                                         source);
  1303     }
  1304   XTS_COMPILED_QUERY (query)->query = treesit_query;
  1305   return treesit_query;
  1306 }
  1307 
  1308 
  1309 /* Lisp definitions.  */
  1310 
  1311 DEFUN ("treesit-parser-p",
  1312        Ftreesit_parser_p, Streesit_parser_p, 1, 1, 0,
  1313        doc: /* Return t if OBJECT is a tree-sitter parser.  */)
  1314   (Lisp_Object object)
  1315 {
  1316   if (TS_PARSERP (object))
  1317     return Qt;
  1318   else
  1319     return Qnil;
  1320 }
  1321 
  1322 DEFUN ("treesit-node-p",
  1323        Ftreesit_node_p, Streesit_node_p, 1, 1, 0,
  1324        doc: /* Return t if OBJECT is a tree-sitter node.  */)
  1325   (Lisp_Object object)
  1326 {
  1327   if (TS_NODEP (object))
  1328     return Qt;
  1329   else
  1330     return Qnil;
  1331 }
  1332 
  1333 DEFUN ("treesit-compiled-query-p",
  1334        Ftreesit_compiled_query_p, Streesit_compiled_query_p, 1, 1, 0,
  1335        doc: /* Return t if OBJECT is a compiled tree-sitter query.  */)
  1336   (Lisp_Object object)
  1337 {
  1338   if (TS_COMPILED_QUERY_P (object))
  1339     return Qt;
  1340   else
  1341     return Qnil;
  1342 }
  1343 
  1344 DEFUN ("treesit-query-p",
  1345        Ftreesit_query_p, Streesit_query_p, 1, 1, 0,
  1346        doc: /* Return t if OBJECT is a generic tree-sitter query.  */)
  1347   (Lisp_Object object)
  1348 {
  1349   if (TS_COMPILED_QUERY_P (object)
  1350       || CONSP (object) || STRINGP (object))
  1351     return Qt;
  1352   else
  1353     return Qnil;
  1354 }
  1355 
  1356 DEFUN ("treesit-query-language",
  1357        Ftreesit_query_language, Streesit_query_language, 1, 1, 0,
  1358        doc: /* Return the language of QUERY.
  1359 QUERY has to be a compiled query.  */)
  1360   (Lisp_Object query)
  1361 {
  1362   CHECK_TS_COMPILED_QUERY (query);
  1363   return XTS_COMPILED_QUERY (query)->language;
  1364 }
  1365 
  1366 DEFUN ("treesit-node-parser",
  1367        Ftreesit_node_parser, Streesit_node_parser,
  1368        1, 1, 0,
  1369        doc: /* Return the parser to which NODE belongs.  */)
  1370   (Lisp_Object node)
  1371 {
  1372   CHECK_TS_NODE (node);
  1373   return XTS_NODE (node)->parser;
  1374 }
  1375 
  1376 DEFUN ("treesit-parser-create",
  1377        Ftreesit_parser_create, Streesit_parser_create,
  1378        1, 3, 0,
  1379        doc: /* Create and return a parser in BUFFER for LANGUAGE.
  1380 
  1381 The parser is automatically added to BUFFER's parser list, as returned
  1382 by `treesit-parser-list'.  LANGUAGE is a language symbol.  If BUFFER
  1383 is nil or omitted, it defaults to the current buffer.  If BUFFER
  1384 already has a parser for LANGUAGE, return that parser, but if NO-REUSE
  1385 is non-nil, always create a new parser.
  1386 
  1387 If that buffer is an indirect buffer, its base buffer is used instead.
  1388 That is, indirect buffers use their base buffer's parsers.  Lisp
  1389 programs should widen as necessary should they want to use a parser in
  1390 an indirect buffer.  */)
  1391   (Lisp_Object language, Lisp_Object buffer, Lisp_Object no_reuse)
  1392 {
  1393   treesit_initialize ();
  1394 
  1395   CHECK_SYMBOL (language);
  1396   struct buffer *buf;
  1397   if (NILP (buffer))
  1398     buf = current_buffer;
  1399   else
  1400     {
  1401       CHECK_BUFFER (buffer);
  1402       buf = XBUFFER (buffer);
  1403     }
  1404   if (buf->base_buffer)
  1405     buf = buf->base_buffer;
  1406 
  1407   treesit_check_buffer_size (buf);
  1408 
  1409   /* See if we can reuse a parser.  */
  1410   if (NILP (no_reuse))
  1411     {
  1412       Lisp_Object tail = BVAR (buf, ts_parser_list);
  1413       FOR_EACH_TAIL (tail)
  1414       {
  1415         struct Lisp_TS_Parser *parser = XTS_PARSER (XCAR (tail));
  1416         if (EQ (parser->language_symbol, language))
  1417           return XCAR (tail);
  1418       }
  1419     }
  1420 
  1421   /* Load language.  */
  1422   Lisp_Object signal_symbol = Qnil;
  1423   Lisp_Object signal_data = Qnil;
  1424   TSParser *parser = ts_parser_new ();
  1425   TSLanguage *lang = treesit_load_language (language, &signal_symbol,
  1426                                             &signal_data);
  1427   if (lang == NULL)
  1428     xsignal (signal_symbol, signal_data);
  1429   /* We check language version when loading a language, so this should
  1430      always succeed.  */
  1431   ts_parser_set_language (parser, lang);
  1432 
  1433   /* Create parser.  */
  1434   Lisp_Object lisp_parser = make_treesit_parser (Fcurrent_buffer (),
  1435                                                  parser, NULL,
  1436                                                  language);
  1437 
  1438   /* Update parser-list.  */
  1439   BVAR (buf, ts_parser_list) = Fcons (lisp_parser, BVAR (buf, ts_parser_list));
  1440 
  1441   return lisp_parser;
  1442 }
  1443 
  1444 DEFUN ("treesit-parser-delete",
  1445        Ftreesit_parser_delete, Streesit_parser_delete,
  1446        1, 1, 0,
  1447        doc: /* Delete PARSER from its buffer's parser list.
  1448 See `treesit-parser-list' for the buffer's parser list.  */)
  1449   (Lisp_Object parser)
  1450 {
  1451   treesit_check_parser (parser);
  1452 
  1453   Lisp_Object buffer = XTS_PARSER (parser)->buffer;
  1454   struct buffer *buf = XBUFFER (buffer);
  1455 
  1456   BVAR (buf, ts_parser_list)
  1457     = Fdelete (parser, BVAR (buf, ts_parser_list));
  1458 
  1459   XTS_PARSER (parser)->deleted = true;
  1460   return Qnil;
  1461 }
  1462 
  1463 DEFUN ("treesit-parser-list",
  1464        Ftreesit_parser_list, Streesit_parser_list,
  1465        0, 1, 0,
  1466        doc: /* Return BUFFER's parser list.
  1467 
  1468 BUFFER defaults to the current buffer.  If that buffer is an indirect
  1469 buffer, its base buffer is used instead.  That is, indirect buffers
  1470 use their base buffer's parsers.  */)
  1471   (Lisp_Object buffer)
  1472 {
  1473   struct buffer *buf;
  1474   if (NILP (buffer))
  1475     buf = current_buffer;
  1476   else
  1477     {
  1478       CHECK_BUFFER (buffer);
  1479       buf = XBUFFER (buffer);
  1480     }
  1481   if (buf->base_buffer)
  1482     buf = buf->base_buffer;
  1483 
  1484   /* Return a fresh list so messing with that list doesn't affect our
  1485      internal data.  */
  1486   Lisp_Object return_list = Qnil;
  1487   Lisp_Object tail;
  1488 
  1489   tail = BVAR (buf, ts_parser_list);
  1490 
  1491   FOR_EACH_TAIL (tail)
  1492     return_list = Fcons (XCAR (tail), return_list);
  1493 
  1494   return Freverse (return_list);
  1495 }
  1496 
  1497 DEFUN ("treesit-parser-buffer",
  1498        Ftreesit_parser_buffer, Streesit_parser_buffer,
  1499        1, 1, 0,
  1500        doc: /* Return the buffer of PARSER.  */)
  1501   (Lisp_Object parser)
  1502 {
  1503   treesit_check_parser (parser);
  1504   Lisp_Object buf;
  1505   XSETBUFFER (buf, XBUFFER (XTS_PARSER (parser)->buffer));
  1506   return buf;
  1507 }
  1508 
  1509 DEFUN ("treesit-parser-language",
  1510        Ftreesit_parser_language, Streesit_parser_language,
  1511        1, 1, 0,
  1512        doc: /* Return PARSER's language symbol.
  1513 This symbol is the one used to create the parser.  */)
  1514   (Lisp_Object parser)
  1515 {
  1516   treesit_check_parser (parser);
  1517   return XTS_PARSER (parser)->language_symbol;
  1518 }
  1519 
  1520 /* Return true if PARSER is not deleted and its buffer is live.  */
  1521 static bool
  1522 treesit_parser_live_p (Lisp_Object parser)
  1523 {
  1524   CHECK_TS_PARSER (parser);
  1525   return ((!XTS_PARSER (parser)->deleted) &&
  1526           (!NILP (Fbuffer_live_p (XTS_PARSER (parser)->buffer))));
  1527 }
  1528 
  1529 
  1530 /*** Parser API */
  1531 
  1532 DEFUN ("treesit-parser-root-node",
  1533        Ftreesit_parser_root_node, Streesit_parser_root_node,
  1534        1, 1, 0,
  1535        doc: /* Return the root node of PARSER.  */)
  1536   (Lisp_Object parser)
  1537 {
  1538   treesit_check_parser (parser);
  1539   treesit_initialize ();
  1540   treesit_ensure_parsed (parser);
  1541   TSNode root_node = ts_tree_root_node (XTS_PARSER (parser)->tree);
  1542   return make_treesit_node (parser, root_node);
  1543 }
  1544 
  1545 /* Checks that the RANGES argument of
  1546    treesit-parser-set-included-ranges is valid.  */
  1547 static void
  1548 treesit_check_range_argument (Lisp_Object ranges)
  1549 {
  1550   struct buffer *buffer = current_buffer;
  1551   ptrdiff_t point_min = BUF_BEGV (buffer);
  1552   ptrdiff_t point_max = BUF_ZV (buffer);
  1553   EMACS_INT last_point = point_min;
  1554   Lisp_Object tail;
  1555 
  1556   tail = ranges;
  1557 
  1558   CHECK_LIST (tail);
  1559 
  1560   FOR_EACH_TAIL (tail)
  1561     {
  1562       CHECK_CONS (tail);
  1563       Lisp_Object range = XCAR (tail);
  1564       CHECK_CONS (range);
  1565       CHECK_FIXNUM (XCAR (range));
  1566       CHECK_FIXNUM (XCDR (range));
  1567       EMACS_INT beg = XFIXNUM (XCAR (range));
  1568       EMACS_INT end = XFIXNUM (XCDR (range));
  1569       if (!(last_point <= beg && beg <= end && end <= point_max))
  1570         xsignal2 (Qtreesit_range_invalid,
  1571                   build_string ("RANGE is either overlapping,"
  1572                                 " out-of-order or out-of-range"),
  1573                   ranges);
  1574       last_point = end;
  1575     }
  1576 
  1577   CHECK_LIST_END (tail, ranges);
  1578 }
  1579 
  1580 /* Generate a list of ranges in Lisp from RANGES.  Assumes tree-sitter
  1581    tree and the buffer has the same visible region (wrt narrowing).
  1582    This function doesn't take ownership of RANGES.  BUFFER is used to
  1583    convert between tree-sitter buffer offset and buffer position.  */
  1584 static Lisp_Object
  1585 treesit_make_ranges (const TSRange *ranges, uint32_t len,
  1586                      struct buffer *buffer)
  1587 {
  1588   Lisp_Object list = Qnil;
  1589   for (int idx = 0; idx < len; idx++)
  1590     {
  1591       TSRange range = ranges[idx];
  1592       uint32_t beg_byte = range.start_byte + BUF_BEGV_BYTE (buffer);
  1593       uint32_t end_byte = range.end_byte + BUF_BEGV_BYTE (buffer);
  1594       eassert (BUF_BEGV_BYTE (buffer) <= beg_byte);
  1595       eassert (beg_byte <= end_byte);
  1596       eassert (end_byte <= BUF_ZV_BYTE (buffer));
  1597 
  1598       Lisp_Object lisp_range
  1599         = Fcons (make_fixnum (buf_bytepos_to_charpos (buffer, beg_byte)),
  1600                  make_fixnum (buf_bytepos_to_charpos (buffer, end_byte)));
  1601       list = Fcons (lisp_range, list);
  1602     }
  1603   return Fnreverse (list);
  1604 }
  1605 
  1606 DEFUN ("treesit-parser-set-included-ranges",
  1607        Ftreesit_parser_set_included_ranges,
  1608        Streesit_parser_set_included_ranges,
  1609        2, 2, 0,
  1610        doc: /* Limit PARSER to RANGES.
  1611 
  1612 RANGES is a list of (BEG . END), each (BEG . END) defines a region in
  1613 which the parser should operate.  Regions must not overlap, and the
  1614 regions should come in order in the list.  Signal
  1615 `treesit-set-range-error' if the argument is invalid, or something
  1616 else went wrong.  If RANGES is nil, the PARSER is to parse the whole
  1617 buffer.  */)
  1618   (Lisp_Object parser, Lisp_Object ranges)
  1619 {
  1620   treesit_check_parser (parser);
  1621   if (!NILP (ranges))
  1622     CHECK_CONS (ranges);
  1623   treesit_check_range_argument (ranges);
  1624 
  1625   treesit_initialize ();
  1626   /* Before we parse, catch up with narrowing/widening.  */
  1627   treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
  1628   treesit_sync_visible_region (parser);
  1629 
  1630   bool success;
  1631   if (NILP (ranges))
  1632     {
  1633       XTS_PARSER (parser)->has_range = false;
  1634       /* If RANGES is nil, make parser to parse the whole document.
  1635          To do that we give tree-sitter a 0 length, the range is a
  1636          dummy.  */
  1637       TSRange treesit_range = {{0, 0}, {0, 0}, 0, 0};
  1638       success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
  1639                                                &treesit_range , 0);
  1640     }
  1641   else
  1642     {
  1643       /* Set ranges for PARSER.  */
  1644       XTS_PARSER (parser)->has_range = true;
  1645 
  1646       if (list_length (ranges) > UINT32_MAX)
  1647         xsignal (Qargs_out_of_range, list2 (ranges, Flength (ranges)));
  1648       uint32_t len = (uint32_t) list_length (ranges);
  1649       TSRange *treesit_ranges = xmalloc (sizeof (TSRange) * len);
  1650       struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1651 
  1652       /* We can use XFIXNUM, XCAR, XCDR freely because we have checked
  1653          the input by treesit_check_range_argument.  */
  1654 
  1655       for (int idx = 0; !NILP (ranges); idx++, ranges = XCDR (ranges))
  1656         {
  1657           Lisp_Object range = XCAR (ranges);
  1658           ptrdiff_t beg_byte = buf_charpos_to_bytepos (buffer,
  1659                                                        XFIXNUM (XCAR (range)));
  1660           ptrdiff_t end_byte = buf_charpos_to_bytepos (buffer,
  1661                                                        XFIXNUM (XCDR (range)));
  1662           /* Shouldn't violate assertion since we just checked for
  1663              buffer size at the beginning of this function.  */
  1664           eassert (beg_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
  1665           eassert (end_byte - BUF_BEGV_BYTE (buffer) <= UINT32_MAX);
  1666           /* We don't care about start and end points, put in dummy
  1667              values.  */
  1668           TSRange rg = {{0, 0}, {0, 0},
  1669                         (uint32_t) beg_byte - BUF_BEGV_BYTE (buffer),
  1670                         (uint32_t) end_byte - BUF_BEGV_BYTE (buffer)};
  1671           treesit_ranges[idx] = rg;
  1672         }
  1673       success = ts_parser_set_included_ranges (XTS_PARSER (parser)->parser,
  1674                                                treesit_ranges, len);
  1675       xfree (treesit_ranges);
  1676     }
  1677 
  1678   if (!success)
  1679     xsignal2 (Qtreesit_range_invalid,
  1680               build_string ("Something went wrong when setting ranges"),
  1681               ranges);
  1682 
  1683   XTS_PARSER (parser)->need_reparse = true;
  1684   return Qnil;
  1685 }
  1686 
  1687 DEFUN ("treesit-parser-included-ranges",
  1688        Ftreesit_parser_included_ranges,
  1689        Streesit_parser_included_ranges,
  1690        1, 1, 0,
  1691        doc: /* Return the ranges set for PARSER.
  1692 If no ranges are set for PARSER, return nil.
  1693 See also `treesit-parser-set-included-ranges'.  */)
  1694   (Lisp_Object parser)
  1695 {
  1696   treesit_check_parser (parser);
  1697   treesit_initialize ();
  1698 
  1699   /* When the parser doesn't have a range set and we call
  1700      ts_parser_included_ranges on it, it doesn't return an empty list,
  1701      but rather return some garbled data. (A single range where
  1702      start_byte = 0, end_byte = UINT32_MAX).  So we need to track
  1703      whether the parser is ranged ourselves.  */
  1704   if (!XTS_PARSER (parser)->has_range)
  1705     return Qnil;
  1706 
  1707   uint32_t len;
  1708   const TSRange *ranges
  1709     = ts_parser_included_ranges (XTS_PARSER (parser)->parser, &len);
  1710 
  1711   /* Our return value depends on the buffer state (BUF_BEGV_BYTE,
  1712      etc), so we need to sync up.  */
  1713   treesit_check_buffer_size (XBUFFER (XTS_PARSER (parser)->buffer));
  1714   treesit_sync_visible_region (parser);
  1715 
  1716   struct buffer *buffer = XBUFFER (XTS_PARSER (parser)->buffer);
  1717   return treesit_make_ranges (ranges, len, buffer);
  1718 }
  1719 
  1720 DEFUN ("treesit-parser-notifiers", Ftreesit_parser_notifiers,
  1721        Streesit_parser_notifiers,
  1722        1, 1, 0,
  1723        doc: /* Return the list of after-change notifier functions for PARSER.  */)
  1724   (Lisp_Object parser)
  1725 {
  1726   treesit_check_parser (parser);
  1727 
  1728   Lisp_Object return_list = Qnil;
  1729   Lisp_Object tail = XTS_PARSER (parser)->after_change_functions;
  1730   FOR_EACH_TAIL (tail)
  1731     return_list = Fcons (XCAR (tail), return_list);
  1732 
  1733   return return_list;
  1734 }
  1735 
  1736 DEFUN ("treesit-parser-add-notifier", Ftreesit_parser_add_notifier,
  1737        Streesit_parser_add_notifier,
  1738        2, 2, 0,
  1739        doc: /* Add FUNCTION to the list of PARSER's after-change notifiers.
  1740 FUNCTION must be a function symbol, rather than a lambda form.
  1741 FUNCTION should take 2 arguments, RANGES and PARSER.  RANGES is a list
  1742 of cons cells of the form (START . END), where START and END are buffer
  1743 positions.  PARSER is the parser issuing the notification.  */)
  1744   (Lisp_Object parser, Lisp_Object function)
  1745 {
  1746   treesit_check_parser (parser);
  1747   /* For simplicity we don't accept lambda functions.  */
  1748   CHECK_SYMBOL (function);
  1749 
  1750   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1751   if (NILP (Fmemq (function, functions)))
  1752     XTS_PARSER (parser)->after_change_functions = Fcons (function, functions);
  1753   return Qnil;
  1754 }
  1755 
  1756 DEFUN ("treesit-parser-remove-notifier", Ftreesit_parser_remove_notifier,
  1757        Streesit_parser_remove_notifier,
  1758        2, 2, 0,
  1759        doc: /* Remove FUNCTION from the list of PARSER's after-change notifiers.
  1760   FUNCTION must be a function symbol, rather than a lambda form.
  1761 FUNCTION should take 2 arguments, RANGES and PARSER.  RANGES is a list
  1762 of cons of the form (START . END), where START and END are buffer
  1763 positions.  PARSER is the parser issuing the notification.   */)
  1764   (Lisp_Object parser, Lisp_Object function)
  1765 {
  1766   treesit_check_parser (parser);
  1767   /* For simplicity we don't accept lambda functions.  */
  1768   CHECK_SYMBOL (function);
  1769 
  1770   Lisp_Object functions = XTS_PARSER (parser)->after_change_functions;
  1771   if (!NILP (Fmemq (function, functions)))
  1772     XTS_PARSER (parser)->after_change_functions = Fdelq (function, functions);
  1773   return Qnil;
  1774 }
  1775 
  1776 
  1777 /*** Node API  */
  1778 
  1779 /* Check that OBJ is a positive integer and signal an error if
  1780    otherwise.  */
  1781 static void
  1782 treesit_check_positive_integer (Lisp_Object obj)
  1783 {
  1784   CHECK_INTEGER (obj);
  1785   if (XFIXNUM (obj) < 0)
  1786     xsignal1 (Qargs_out_of_range, obj);
  1787 }
  1788 
  1789 static void
  1790 treesit_check_node (Lisp_Object obj)
  1791 {
  1792   CHECK_TS_NODE (obj);
  1793   if (!treesit_node_uptodate_p (obj))
  1794     xsignal1 (Qtreesit_node_outdated, obj);
  1795 }
  1796 
  1797 /* Checks that OBJ is a positive integer and it is within the visible
  1798    portion of BUF. */
  1799 static void
  1800 treesit_check_position (Lisp_Object obj, struct buffer *buf)
  1801 {
  1802   treesit_check_positive_integer (obj);
  1803   ptrdiff_t pos = XFIXNUM (obj);
  1804   if (pos < BUF_BEGV (buf) || pos > BUF_ZV (buf))
  1805     xsignal1 (Qargs_out_of_range, obj);
  1806 }
  1807 
  1808 bool
  1809 treesit_node_uptodate_p (Lisp_Object obj)
  1810 {
  1811   Lisp_Object lisp_parser = XTS_NODE (obj)->parser;
  1812   return XTS_NODE (obj)->timestamp == XTS_PARSER (lisp_parser)->timestamp;
  1813 }
  1814 
  1815 DEFUN ("treesit-node-type",
  1816        Ftreesit_node_type, Streesit_node_type, 1, 1, 0,
  1817        doc: /* Return the NODE's type as a string.
  1818 If NODE is nil, return nil.  */)
  1819   (Lisp_Object node)
  1820 {
  1821   if (NILP (node)) return Qnil;
  1822   treesit_check_node (node);
  1823   treesit_initialize ();
  1824 
  1825   TSNode treesit_node = XTS_NODE (node)->node;
  1826   const char *type = ts_node_type (treesit_node);
  1827   return build_string (type);
  1828 }
  1829 
  1830 DEFUN ("treesit-node-start",
  1831        Ftreesit_node_start, Streesit_node_start, 1, 1, 0,
  1832        doc: /* Return the NODE's start position in its buffer.
  1833 If NODE is nil, return nil.  */)
  1834   (Lisp_Object node)
  1835 {
  1836   if (NILP (node)) return Qnil;
  1837   treesit_check_node (node);
  1838   treesit_initialize ();
  1839 
  1840   TSNode treesit_node = XTS_NODE (node)->node;
  1841   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  1842   uint32_t start_byte_offset = ts_node_start_byte (treesit_node);
  1843   struct buffer *buffer
  1844     = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  1845   ptrdiff_t start_pos
  1846     = buf_bytepos_to_charpos (buffer,
  1847                               start_byte_offset + visible_beg);
  1848   return make_fixnum (start_pos);
  1849 }
  1850 
  1851 DEFUN ("treesit-node-end",
  1852        Ftreesit_node_end, Streesit_node_end, 1, 1, 0,
  1853        doc: /* Return the NODE's end position in its buffer.
  1854 If NODE is nil, return nil.  */)
  1855   (Lisp_Object node)
  1856 {
  1857   if (NILP (node)) return Qnil;
  1858   treesit_check_node (node);
  1859   treesit_initialize ();
  1860 
  1861   TSNode treesit_node = XTS_NODE (node)->node;
  1862   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  1863   uint32_t end_byte_offset = ts_node_end_byte (treesit_node);
  1864   struct buffer *buffer
  1865     = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  1866   ptrdiff_t end_pos
  1867     = buf_bytepos_to_charpos (buffer, end_byte_offset + visible_beg);
  1868   return make_fixnum (end_pos);
  1869 }
  1870 
  1871 DEFUN ("treesit-node-string",
  1872        Ftreesit_node_string, Streesit_node_string, 1, 1, 0,
  1873        doc: /* Return the string representation of NODE.
  1874 If NODE is nil, return nil.  */)
  1875   (Lisp_Object node)
  1876 {
  1877   if (NILP (node)) return Qnil;
  1878   treesit_check_node (node);
  1879   treesit_initialize ();
  1880 
  1881   TSNode treesit_node = XTS_NODE (node)->node;
  1882   char *string = ts_node_string (treesit_node);
  1883   return build_string (string);
  1884 }
  1885 
  1886 static bool treesit_cursor_helper (TSTreeCursor *, TSNode, Lisp_Object);
  1887 
  1888 DEFUN ("treesit-node-parent",
  1889        Ftreesit_node_parent, Streesit_node_parent, 1, 1, 0,
  1890        doc: /* Return the immediate parent of NODE.
  1891 Return nil if NODE has no parent.  If NODE is nil, return nil.  */)
  1892   (Lisp_Object node)
  1893 {
  1894   if (NILP (node)) return Qnil;
  1895   treesit_check_node (node);
  1896   treesit_initialize ();
  1897 
  1898   Lisp_Object return_value = Qnil;
  1899 
  1900   TSNode treesit_node = XTS_NODE (node)->node;
  1901   Lisp_Object parser = XTS_NODE (node)->parser;
  1902   TSTreeCursor cursor;
  1903   if (!treesit_cursor_helper (&cursor, treesit_node, parser))
  1904     return return_value;
  1905 
  1906   if (ts_tree_cursor_goto_parent (&cursor))
  1907   {
  1908     TSNode parent = ts_tree_cursor_current_node (&cursor);
  1909     return_value = make_treesit_node (parser, parent);
  1910   }
  1911   ts_tree_cursor_delete (&cursor);
  1912   return return_value;
  1913 }
  1914 
  1915 DEFUN ("treesit-node-child",
  1916        Ftreesit_node_child, Streesit_node_child, 2, 3, 0,
  1917        doc: /* Return the Nth child of NODE.
  1918 
  1919 Return nil if there is no Nth child.  If NAMED is non-nil, look for
  1920 named child only.  NAMED defaults to nil.  If NODE is nil, return
  1921 nil.
  1922 
  1923 N could be negative, e.g., -1 represents the last child.  */)
  1924   (Lisp_Object node, Lisp_Object n, Lisp_Object named)
  1925 {
  1926   if (NILP (node)) return Qnil;
  1927   treesit_check_node (node);
  1928   CHECK_INTEGER (n);
  1929   EMACS_INT idx = XFIXNUM (n);
  1930 
  1931   treesit_initialize ();
  1932 
  1933   TSNode treesit_node = XTS_NODE (node)->node;
  1934   TSNode child;
  1935 
  1936   /* Process negative index.  */
  1937   if (idx < 0)
  1938     {
  1939       if (NILP (named))
  1940         idx = ts_node_child_count (treesit_node) + idx;
  1941       else
  1942         idx = ts_node_named_child_count (treesit_node) + idx;
  1943     }
  1944   if (idx < 0)
  1945     return Qnil;
  1946   if (idx > UINT32_MAX)
  1947     xsignal1 (Qargs_out_of_range, n);
  1948 
  1949   if (NILP (named))
  1950     child = ts_node_child (treesit_node, (uint32_t) idx);
  1951   else
  1952     child = ts_node_named_child (treesit_node, (uint32_t) idx);
  1953 
  1954   if (ts_node_is_null (child))
  1955     return Qnil;
  1956 
  1957   return make_treesit_node (XTS_NODE (node)->parser, child);
  1958 }
  1959 
  1960 DEFUN ("treesit-node-check",
  1961        Ftreesit_node_check, Streesit_node_check, 2, 2, 0,
  1962        doc: /* Return non-nil if NODE has PROPERTY, nil otherwise.
  1963 
  1964 PROPERTY could be `named', `missing', `extra', `outdated',
  1965 `has-error', or `live'.
  1966 
  1967 Named nodes correspond to named rules in the language definition,
  1968 whereas "anonymous" nodes correspond to string literals in the
  1969 language definition.
  1970 
  1971 Missing nodes are inserted by the parser in order to recover from
  1972 certain kinds of syntax errors, i.e., should be there but not there.
  1973 
  1974 Extra nodes represent things like comments, which are not required the
  1975 language definition, but can appear anywhere.
  1976 
  1977 A node is "outdated" if the parser has reparsed at least once after
  1978 the node was created.
  1979 
  1980 A node "has error" if itself is a syntax error or contains any syntax
  1981 errors.
  1982 
  1983 A node is "live" if its parser is not deleted and its buffer is
  1984 live.  */)
  1985   (Lisp_Object node, Lisp_Object property)
  1986 {
  1987   if (NILP (node)) return Qnil;
  1988   CHECK_TS_NODE (node);
  1989   CHECK_SYMBOL (property);
  1990   treesit_initialize ();
  1991 
  1992   TSNode treesit_node = XTS_NODE (node)->node;
  1993   bool result;
  1994 
  1995   if (BASE_EQ (property, Qoutdated))
  1996     return treesit_node_uptodate_p (node) ? Qnil : Qt;
  1997 
  1998   treesit_check_node (node);
  1999   if (BASE_EQ (property, Qnamed))
  2000     result = ts_node_is_named (treesit_node);
  2001   else if (BASE_EQ (property, Qmissing))
  2002     result = ts_node_is_missing (treesit_node);
  2003   else if (BASE_EQ (property, Qextra))
  2004     result = ts_node_is_extra (treesit_node);
  2005   else if (BASE_EQ (property, Qhas_error))
  2006     result = ts_node_has_error (treesit_node);
  2007   else if (BASE_EQ (property, Qlive))
  2008     result = treesit_parser_live_p (XTS_NODE (node)->parser);
  2009   else
  2010     signal_error ("Expecting `named', `missing', `extra', "
  2011                   "`outdated', `has-error', or `live', but got",
  2012                   property);
  2013   return result ? Qt : Qnil;
  2014 }
  2015 
  2016 DEFUN ("treesit-node-field-name-for-child",
  2017        Ftreesit_node_field_name_for_child,
  2018        Streesit_node_field_name_for_child, 2, 2, 0,
  2019        doc: /* Return the field name of the Nth child of NODE.
  2020 
  2021 Return nil if there's no Nth child, or if it has no field.
  2022 If NODE is nil, return nil.
  2023 
  2024 N counts all children, i.e., named ones and anonymous ones.
  2025 
  2026 N could be negative, e.g., -1 represents the last child.  */)
  2027   (Lisp_Object node, Lisp_Object n)
  2028 {
  2029   if (NILP (node))
  2030     return Qnil;
  2031   treesit_check_node (node);
  2032   CHECK_INTEGER (n);
  2033   EMACS_INT idx = XFIXNUM (n);
  2034   treesit_initialize ();
  2035 
  2036   TSNode treesit_node = XTS_NODE (node)->node;
  2037 
  2038   /* Process negative index.  */
  2039   if (idx < 0)
  2040     idx = ts_node_child_count (treesit_node) + idx;
  2041   if (idx < 0)
  2042     return Qnil;
  2043   if (idx > UINT32_MAX)
  2044     xsignal1 (Qargs_out_of_range, n);
  2045 
  2046   const char *name
  2047     = ts_node_field_name_for_child (treesit_node, (uint32_t) idx);
  2048 
  2049   if (name == NULL)
  2050     return Qnil;
  2051 
  2052   return build_string (name);
  2053 }
  2054 
  2055 DEFUN ("treesit-node-child-count",
  2056        Ftreesit_node_child_count,
  2057        Streesit_node_child_count, 1, 2, 0,
  2058        doc: /* Return the number of children of NODE.
  2059 
  2060 If NAMED is non-nil, count named children only.  NAMED defaults to
  2061 nil.  If NODE is nil, return nil.  */)
  2062   (Lisp_Object node, Lisp_Object named)
  2063 {
  2064   if (NILP (node))
  2065     return Qnil;
  2066   treesit_check_node (node);
  2067   treesit_initialize ();
  2068 
  2069   TSNode treesit_node = XTS_NODE (node)->node;
  2070   uint32_t count;
  2071   if (NILP (named))
  2072     count = ts_node_child_count (treesit_node);
  2073   else
  2074     count = ts_node_named_child_count (treesit_node);
  2075   return make_fixnum (count);
  2076 }
  2077 
  2078 DEFUN ("treesit-node-child-by-field-name",
  2079        Ftreesit_node_child_by_field_name,
  2080        Streesit_node_child_by_field_name, 2, 2, 0,
  2081        doc: /* Return the child of NODE with FIELD-NAME.
  2082 Return nil if there is no such child.  If NODE is nil, return nil.  */)
  2083   (Lisp_Object node, Lisp_Object field_name)
  2084 {
  2085   if (NILP (node))
  2086     return Qnil;
  2087   treesit_check_node (node);
  2088   CHECK_STRING (field_name);
  2089   treesit_initialize ();
  2090 
  2091   char *name_str = SSDATA (field_name);
  2092   TSNode treesit_node = XTS_NODE (node)->node;
  2093   TSNode child
  2094     = ts_node_child_by_field_name (treesit_node, name_str,
  2095                                    strlen (name_str));
  2096 
  2097   if (ts_node_is_null (child))
  2098     return Qnil;
  2099 
  2100   return make_treesit_node (XTS_NODE (node)->parser, child);
  2101 }
  2102 
  2103 DEFUN ("treesit-node-next-sibling",
  2104        Ftreesit_node_next_sibling,
  2105        Streesit_node_next_sibling, 1, 2, 0,
  2106        doc: /* Return the next sibling of NODE.
  2107 
  2108 Return nil if there is no next sibling.  If NAMED is non-nil, look for named
  2109 siblings only.  NAMED defaults to nil.  If NODE is nil, return nil.  */)
  2110   (Lisp_Object node, Lisp_Object named)
  2111 {
  2112   if (NILP (node)) return Qnil;
  2113   treesit_check_node (node);
  2114   treesit_initialize ();
  2115 
  2116   TSNode treesit_node = XTS_NODE (node)->node;
  2117   TSNode sibling;
  2118   if (NILP (named))
  2119     sibling = ts_node_next_sibling (treesit_node);
  2120   else
  2121     sibling = ts_node_next_named_sibling (treesit_node);
  2122 
  2123   if (ts_node_is_null (sibling))
  2124     return Qnil;
  2125 
  2126   return make_treesit_node (XTS_NODE (node)->parser, sibling);
  2127 }
  2128 
  2129 DEFUN ("treesit-node-prev-sibling",
  2130        Ftreesit_node_prev_sibling,
  2131        Streesit_node_prev_sibling, 1, 2, 0,
  2132        doc: /* Return the previous sibling of NODE.
  2133 
  2134 Return nil if there is no previous sibling.  If NAMED is non-nil, look
  2135 for named siblings only.  NAMED defaults to nil.  If NODE is nil,
  2136 return nil.  */)
  2137   (Lisp_Object node, Lisp_Object named)
  2138 {
  2139   if (NILP (node)) return Qnil;
  2140   treesit_check_node (node);
  2141   treesit_initialize ();
  2142 
  2143   TSNode treesit_node = XTS_NODE (node)->node;
  2144   TSNode sibling;
  2145 
  2146   if (NILP (named))
  2147     sibling = ts_node_prev_sibling (treesit_node);
  2148   else
  2149     sibling = ts_node_prev_named_sibling (treesit_node);
  2150 
  2151   if (ts_node_is_null (sibling))
  2152     return Qnil;
  2153 
  2154   return make_treesit_node (XTS_NODE (node)->parser, sibling);
  2155 }
  2156 
  2157 /* Our reimplementation of ts_node_first_child_for_byte.  The current
  2158    implementation of that function has problems (see bug#60127), so
  2159    before it's fixed upstream, we use our own reimplementation of it.
  2160    Return true if there is a valid sibling, return false otherwise.
  2161    If the return value is false, the position of the cursor is
  2162    undefined.  (We use cursor because technically we can't make a null
  2163    node for ourselves, also, using cursor is more convenient.)
  2164 
  2165    TODO: Remove this function once tree-sitter fixed the bug.  */
  2166 static bool treesit_cursor_first_child_for_byte
  2167 (TSTreeCursor *cursor, ptrdiff_t pos, bool named)
  2168 {
  2169   if (!ts_tree_cursor_goto_first_child (cursor))
  2170     return false;
  2171 
  2172   TSNode node = ts_tree_cursor_current_node (cursor);
  2173   while (ts_node_end_byte (node) <= pos)
  2174     {
  2175       if (ts_tree_cursor_goto_next_sibling (cursor))
  2176         node = ts_tree_cursor_current_node (cursor);
  2177       else
  2178         /* Reached the end and still can't find a valid sibling.  */
  2179         return false;
  2180     }
  2181   while (named && (!ts_node_is_named (node)))
  2182     {
  2183       if (ts_tree_cursor_goto_next_sibling (cursor))
  2184         node = ts_tree_cursor_current_node (cursor);
  2185       else
  2186         /* Reached the end and still can't find a named sibling.  */
  2187         return false;
  2188     }
  2189   return true;
  2190 }
  2191 
  2192 DEFUN ("treesit-node-first-child-for-pos",
  2193        Ftreesit_node_first_child_for_pos,
  2194        Streesit_node_first_child_for_pos, 2, 3, 0,
  2195        doc: /* Return the first child of NODE for buffer position POS.
  2196 
  2197 Specifically, return the first child that extends beyond POS.
  2198 Return nil if there is no such child.
  2199 If NAMED is non-nil, look for named children only.  NAMED defaults to nil.
  2200 Note that this function returns an immediate child, not the smallest
  2201 (grand)child.  If NODE is nil, return nil.  */)
  2202   (Lisp_Object node, Lisp_Object pos, Lisp_Object named)
  2203 {
  2204   if (NILP (node))
  2205     return Qnil;
  2206   treesit_check_node (node);
  2207 
  2208   struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  2209   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2210 
  2211   treesit_check_position (pos, buf);
  2212   treesit_initialize ();
  2213 
  2214   ptrdiff_t byte_pos = buf_charpos_to_bytepos (buf, XFIXNUM (pos));
  2215   TSNode treesit_node = XTS_NODE (node)->node;
  2216 
  2217   TSTreeCursor cursor = ts_tree_cursor_new (treesit_node);
  2218   ptrdiff_t treesit_pos = byte_pos - visible_beg;
  2219   bool success;
  2220   success = treesit_cursor_first_child_for_byte (&cursor, treesit_pos,
  2221                                                  !NILP (named));
  2222   TSNode child = ts_tree_cursor_current_node (&cursor);
  2223   ts_tree_cursor_delete (&cursor);
  2224 
  2225   if (!success)
  2226     return Qnil;
  2227   return make_treesit_node (XTS_NODE (node)->parser, child);
  2228 }
  2229 
  2230 DEFUN ("treesit-node-descendant-for-range",
  2231        Ftreesit_node_descendant_for_range,
  2232        Streesit_node_descendant_for_range, 3, 4, 0,
  2233        doc: /* Return the smallest node that covers buffer positions BEG to END.
  2234 
  2235 The returned node is a descendant of NODE.
  2236 Return nil if there is no such node.
  2237 If NAMED is non-nil, look for named child only.  NAMED defaults to nil.
  2238 If NODE is nil, return nil.  */)
  2239   (Lisp_Object node, Lisp_Object beg, Lisp_Object end, Lisp_Object named)
  2240 {
  2241   if (NILP (node)) return Qnil;
  2242   treesit_check_node (node);
  2243 
  2244   struct buffer *buf = XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer);
  2245   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2246 
  2247   treesit_check_position (beg, buf);
  2248   treesit_check_position (end, buf);
  2249 
  2250   treesit_initialize ();
  2251 
  2252   ptrdiff_t byte_beg = buf_charpos_to_bytepos (buf, XFIXNUM (beg));
  2253   ptrdiff_t byte_end = buf_charpos_to_bytepos (buf, XFIXNUM (end));
  2254   TSNode treesit_node = XTS_NODE (node)->node;
  2255   TSNode child;
  2256   if (NILP (named))
  2257     child = ts_node_descendant_for_byte_range (treesit_node, byte_beg - visible_beg,
  2258                                                byte_end - visible_beg);
  2259   else
  2260     child = ts_node_named_descendant_for_byte_range (treesit_node,
  2261                                                      byte_beg - visible_beg,
  2262                                                      byte_end - visible_beg);
  2263 
  2264   if (ts_node_is_null (child))
  2265     return Qnil;
  2266 
  2267   return make_treesit_node (XTS_NODE (node)->parser, child);
  2268 }
  2269 
  2270 /* Return true if NODE1 and NODE2 are the same node.  Assumes they are
  2271    TS_NODE type.  */
  2272 bool treesit_node_eq (Lisp_Object node1, Lisp_Object node2)
  2273 {
  2274   treesit_initialize ();
  2275   TSNode treesit_node_1 = XTS_NODE (node1)->node;
  2276   TSNode treesit_node_2 = XTS_NODE (node2)->node;
  2277   return ts_node_eq (treesit_node_1, treesit_node_2);
  2278 }
  2279 
  2280 DEFUN ("treesit-node-eq",
  2281        Ftreesit_node_eq,
  2282        Streesit_node_eq, 2, 2, 0,
  2283        doc: /* Return non-nil if NODE1 and NODE2 refer to the same node.
  2284 If any one of NODE1 and NODE2 is nil, return nil.
  2285 This function uses the same equivalence metric as `equal', and returns
  2286 non-nil if NODE1 and NODE2 refer to the same node in a syntax tree
  2287 produced by tree-sitter.  */)
  2288   (Lisp_Object node1, Lisp_Object node2)
  2289 {
  2290   if (NILP (node1) || NILP (node2))
  2291     return Qnil;
  2292   CHECK_TS_NODE (node1);
  2293   CHECK_TS_NODE (node2);
  2294 
  2295   bool same_node = treesit_node_eq (node1, node2);
  2296   return same_node ? Qt : Qnil;
  2297 }
  2298 
  2299 
  2300 /*** Query functions */
  2301 
  2302 /* Convert a Lisp string to its printed representation in the tree-sitter
  2303    query syntax.  */
  2304 static Lisp_Object
  2305 treesit_query_string_string (Lisp_Object str)
  2306 {
  2307   /* Strings in the treesit query syntax only have the escapes
  2308      \n \r \t \0 and any other escaped char stands for that character.
  2309      Literal LF, NUL and " are forbidden.  */
  2310   ptrdiff_t nbytes = SBYTES (str);
  2311   ptrdiff_t escapes = 0;
  2312   for (ptrdiff_t i = 0; i < nbytes; i++)
  2313     {
  2314       unsigned char c = SREF (str, i);
  2315       escapes += (c == '\0' || c == '\n' || c == '\r' || c == '\t'
  2316                   || c == '"' || c == '\\');
  2317     }
  2318   ptrdiff_t nchars = SCHARS (str);
  2319   ptrdiff_t extra = escapes + 2;   /* backslashes + double quotes */
  2320   Lisp_Object dst = (STRING_MULTIBYTE (str)
  2321                      ? make_uninit_multibyte_string (nchars + extra,
  2322                                                      nbytes + extra)
  2323                      : make_uninit_string (nbytes + extra));
  2324   unsigned char *d = SDATA (dst);
  2325   *d++ = '"';
  2326   for (ptrdiff_t i = 0; i < nbytes; i++)
  2327     {
  2328       unsigned char c = SREF (str, i);
  2329       switch (c)
  2330         {
  2331         case '\0': *d++ = '\\'; *d++ = '0'; break;
  2332         case '\n': *d++ = '\\'; *d++ = 'n'; break;
  2333         case '\r': *d++ = '\\'; *d++ = 'r'; break;
  2334         case '\t': *d++ = '\\'; *d++ = 't'; break;
  2335         case '"':
  2336         case '\\': *d++ = '\\'; *d++ = c; break;
  2337         default: *d++ = c; break;
  2338         }
  2339     }
  2340   *d++ = '"';
  2341   eassert (d == SDATA (dst) + SBYTES (dst));
  2342   return dst;
  2343 }
  2344 
  2345 DEFUN ("treesit-pattern-expand",
  2346        Ftreesit_pattern_expand,
  2347        Streesit_pattern_expand, 1, 1, 0,
  2348        doc: /* Expand PATTERN to its string form.
  2349 
  2350 PATTERN can be
  2351 
  2352     :anchor
  2353     :?
  2354     :*
  2355     :+
  2356     :equal
  2357     :match
  2358     (TYPE PATTERN...)
  2359     [PATTERN...]
  2360     FIELD-NAME:
  2361     @CAPTURE-NAME
  2362     (_)
  2363     _
  2364     \"TYPE\"
  2365 
  2366 See Info node `(elisp)Pattern Matching' for detailed explanation.  */)
  2367   (Lisp_Object pattern)
  2368 {
  2369   if (BASE_EQ (pattern, QCanchor))
  2370     return Vtreesit_str_dot;
  2371   if (BASE_EQ (pattern, QCquestion))
  2372     return Vtreesit_str_question_mark;
  2373   if (BASE_EQ (pattern, QCstar))
  2374     return Vtreesit_str_star;
  2375   if (BASE_EQ (pattern, QCplus))
  2376     return Vtreesit_str_plus;
  2377   if (BASE_EQ (pattern, QCequal))
  2378     return Vtreesit_str_pound_equal;
  2379   if (BASE_EQ (pattern, QCmatch))
  2380     return Vtreesit_str_pound_match;
  2381   if (BASE_EQ (pattern, QCpred))
  2382     return Vtreesit_str_pound_pred;
  2383   Lisp_Object opening_delimeter
  2384     = VECTORP (pattern)
  2385       ? Vtreesit_str_open_bracket : Vtreesit_str_open_paren;
  2386   Lisp_Object closing_delimiter
  2387     = VECTORP (pattern)
  2388       ? Vtreesit_str_close_bracket : Vtreesit_str_close_paren;
  2389   if (VECTORP (pattern) || CONSP (pattern))
  2390     return concat3 (opening_delimeter,
  2391                     Fmapconcat (Qtreesit_pattern_expand,
  2392                                 pattern,
  2393                                 Vtreesit_str_space),
  2394                     closing_delimiter);
  2395   if (STRINGP (pattern))
  2396     return treesit_query_string_string (pattern);
  2397 
  2398   return Fprin1_to_string (pattern, Qnil, Qt);
  2399 }
  2400 
  2401 DEFUN ("treesit-query-expand",
  2402        Ftreesit_query_expand,
  2403        Streesit_query_expand, 1, 1, 0,
  2404        doc: /* Expand sexp QUERY to its string form.
  2405 
  2406 A PATTERN in QUERY can be
  2407 
  2408     :anchor
  2409     :?
  2410     :*
  2411     :+
  2412     :equal
  2413     :match
  2414     (TYPE PATTERN...)
  2415     [PATTERN...]
  2416     FIELD-NAME:
  2417     @CAPTURE-NAME
  2418     (_)
  2419     _
  2420     \"TYPE\"
  2421 
  2422 See Info node `(elisp)Pattern Matching' for detailed explanation.  */)
  2423   (Lisp_Object query)
  2424 {
  2425   return Fmapconcat (Qtreesit_pattern_expand, query, Vtreesit_str_space);
  2426 }
  2427 
  2428 /* This struct is used for passing captures to be check against
  2429    predicates.  Captures we check for are the ones in START before
  2430    END.  For example, if START and END are
  2431 
  2432    START       END
  2433     v              v
  2434    (1 . (2 . (3 . (4 . (5 . (6 . nil))))))
  2435 
  2436    We only look at captures 1 2 3.  */
  2437 struct capture_range
  2438 {
  2439   Lisp_Object start;
  2440   Lisp_Object end;
  2441 };
  2442 
  2443 /* Collect predicates for this match and return them in a list.  Each
  2444    predicate is a list of strings and symbols.  */
  2445 static Lisp_Object
  2446 treesit_predicates_for_pattern (TSQuery *query, uint32_t pattern_index)
  2447 {
  2448   uint32_t len;
  2449   const TSQueryPredicateStep *predicate_list
  2450     = ts_query_predicates_for_pattern (query, pattern_index, &len);
  2451   Lisp_Object result = Qnil;
  2452   Lisp_Object predicate = Qnil;
  2453   for (int idx = 0; idx < len; idx++)
  2454     {
  2455       TSQueryPredicateStep step = predicate_list[idx];
  2456       switch (step.type)
  2457         {
  2458         case TSQueryPredicateStepTypeCapture:
  2459           {
  2460             uint32_t str_len;
  2461             const char *str = ts_query_capture_name_for_id (query,
  2462                                                             step.value_id,
  2463                                                             &str_len);
  2464             predicate = Fcons (intern_c_string_1 (str, str_len),
  2465                                predicate);
  2466             break;
  2467           }
  2468         case TSQueryPredicateStepTypeString:
  2469           {
  2470             uint32_t str_len;
  2471             const char *str = ts_query_string_value_for_id (query,
  2472                                                             step.value_id,
  2473                                                             &str_len);
  2474             predicate = Fcons (make_string (str, str_len), predicate);
  2475             break;
  2476           }
  2477         case TSQueryPredicateStepTypeDone:
  2478           result = Fcons (Fnreverse (predicate), result);
  2479           predicate = Qnil;
  2480           break;
  2481         }
  2482     }
  2483   return Fnreverse (result);
  2484 }
  2485 
  2486 /* Translate a capture NAME (symbol) to a node.  If everything goes
  2487    fine, set NODE and return true; if error occurs (e.g., when there
  2488    is no node for the capture name), set NODE to Qnil, SIGNAL_DATA to
  2489    a suitable signal data, and return false.  */
  2490 static bool
  2491 treesit_predicate_capture_name_to_node (Lisp_Object name,
  2492                                         struct capture_range captures,
  2493                                         Lisp_Object *node,
  2494                                         Lisp_Object *signal_data)
  2495 {
  2496   *node = Qnil;
  2497   for (Lisp_Object tail = captures.start; !EQ (tail, captures.end);
  2498        tail = XCDR (tail))
  2499     {
  2500       if (EQ (XCAR (XCAR (tail)), name))
  2501         {
  2502           *node = XCDR (XCAR (tail));
  2503           break;
  2504         }
  2505     }
  2506 
  2507   if (NILP (*node))
  2508     {
  2509       *signal_data = list3 (build_string ("Cannot find captured node"),
  2510                             name, build_string ("A predicate can only refer"
  2511                                                 " to captured nodes in the "
  2512                                                 "same pattern"));
  2513       return false;
  2514     }
  2515   return true;
  2516 }
  2517 
  2518 /* Translate a capture NAME (symbol) to the text of the captured node.
  2519    If everything goes fine, set TEXT to the text and return true;
  2520    otherwise set TEXT to Qnil and set SIGNAL_DATA to a suitable signal
  2521    data.  */
  2522 static bool
  2523 treesit_predicate_capture_name_to_text (Lisp_Object name,
  2524                                         struct capture_range captures,
  2525                                         Lisp_Object *text,
  2526                                         Lisp_Object *signal_data)
  2527 {
  2528   Lisp_Object node = Qnil;
  2529   if (!treesit_predicate_capture_name_to_node (name, captures, &node, signal_data))
  2530     return false;
  2531 
  2532   struct buffer *old_buffer = current_buffer;
  2533   set_buffer_internal (XBUFFER (XTS_PARSER (XTS_NODE (node)->parser)->buffer));
  2534   *text = Fbuffer_substring (Ftreesit_node_start (node),
  2535                              Ftreesit_node_end (node));
  2536   set_buffer_internal (old_buffer);
  2537   return true;
  2538 }
  2539 
  2540 /* Handles predicate (#equal A B).  Return true if A equals B; return
  2541    false otherwise.  A and B can be either string, or a capture name.
  2542    The capture name evaluates to the text its captured node spans in
  2543    the buffer.  If everything goes fine, don't touch SIGNAL_DATA; if
  2544    error occurs, set it to a suitable signal data.  */
  2545 static bool
  2546 treesit_predicate_equal (Lisp_Object args, struct capture_range captures,
  2547                          Lisp_Object *signal_data)
  2548 {
  2549   if (list_length (args) != 2)
  2550     {
  2551       *signal_data = list2 (build_string ("Predicate `equal' requires "
  2552                                           "two arguments but got"),
  2553                             Flength (args));
  2554       return false;
  2555     }
  2556   Lisp_Object arg1 = XCAR (args);
  2557   Lisp_Object arg2 = XCAR (XCDR (args));
  2558   Lisp_Object text1 = arg1;
  2559   Lisp_Object text2 = arg2;
  2560   if (SYMBOLP (arg1))
  2561     {
  2562       if (!treesit_predicate_capture_name_to_text (arg1, captures, &text1,
  2563                                                    signal_data))
  2564         return false;
  2565     }
  2566   if (SYMBOLP (arg2))
  2567     {
  2568       if (!treesit_predicate_capture_name_to_text (arg2, captures, &text2,
  2569                                                    signal_data))
  2570         return false;
  2571     }
  2572 
  2573   return !NILP (Fstring_equal (text1, text2));
  2574 }
  2575 
  2576 /* Handles predicate (#match "regexp" @node).  Return true if "regexp"
  2577    matches the text spanned by @node; return false otherwise.
  2578    Matching is case-sensitive.  If everything goes fine, don't touch
  2579    SIGNAL_DATA; if error occurs, set it to a suitable signal data.  */
  2580 static bool
  2581 treesit_predicate_match (Lisp_Object args, struct capture_range captures,
  2582                          Lisp_Object *signal_data)
  2583 {
  2584   if (list_length (args) != 2)
  2585     {
  2586       *signal_data = list2 (build_string ("Predicate `match' requires two "
  2587                                           "arguments but got"),
  2588                             Flength (args));
  2589       return false;
  2590     }
  2591   Lisp_Object regexp = XCAR (args);
  2592   Lisp_Object capture_name = XCAR (XCDR (args));
  2593 
  2594   /* It's probably common to get the argument order backwards.  Catch
  2595      this mistake early and show helpful explanation, because Emacs
  2596      loves you.  (We put the regexp first because that's what
  2597      string-match does.)  */
  2598   if (!STRINGP (regexp))
  2599     xsignal1 (Qtreesit_query_error,
  2600               build_string ("The first argument to `match' should "
  2601                             "be a regexp string, not a capture name"));
  2602   if (!SYMBOLP (capture_name))
  2603     xsignal1 (Qtreesit_query_error,
  2604               build_string ("The second argument to `match' should "
  2605                             "be a capture name, not a string"));
  2606 
  2607   Lisp_Object node = Qnil;
  2608   if (!treesit_predicate_capture_name_to_node (capture_name, captures, &node,
  2609                                                signal_data))
  2610     return false;
  2611 
  2612   TSNode treesit_node = XTS_NODE (node)->node;
  2613   ptrdiff_t visible_beg = XTS_PARSER (XTS_NODE (node)->parser)->visible_beg;
  2614   uint32_t start_byte_offset = ts_node_start_byte (treesit_node);
  2615   uint32_t end_byte_offset = ts_node_end_byte (treesit_node);
  2616   ptrdiff_t start_byte = visible_beg + start_byte_offset;
  2617   ptrdiff_t end_byte = visible_beg + end_byte_offset;
  2618   ptrdiff_t start_pos = BYTE_TO_CHAR (start_byte);
  2619   ptrdiff_t end_pos = BYTE_TO_CHAR (end_byte);
  2620   ptrdiff_t old_begv = BEGV;
  2621   ptrdiff_t old_begv_byte = BEGV_BYTE;
  2622   ptrdiff_t old_zv = ZV;
  2623   ptrdiff_t old_zv_byte = ZV_BYTE;
  2624 
  2625   BEGV = start_pos;
  2626   BEGV_BYTE = start_byte;
  2627   ZV = end_pos;
  2628   ZV_BYTE = end_byte;
  2629 
  2630   ptrdiff_t val = search_buffer (regexp, start_pos, start_byte,
  2631                                  end_pos, end_byte, 1, true, Qnil, Qnil, false);
  2632 
  2633   BEGV = old_begv;
  2634   BEGV_BYTE = old_begv_byte;
  2635   ZV = old_zv;
  2636   ZV_BYTE = old_zv_byte;
  2637 
  2638   return (val > 0);
  2639 }
  2640 
  2641 /* Handles predicate (#pred FN ARG...).  Return true if FN returns
  2642    non-nil; return false otherwise.  The arity of FN must match the
  2643    number of ARGs.  If everything goes fine, don't touch SIGNAL_DATA;
  2644    if error occurs, set it to a suitable signal data.  */
  2645 static bool
  2646 treesit_predicate_pred (Lisp_Object args, struct capture_range captures,
  2647                         Lisp_Object *signal_data)
  2648 {
  2649   if (list_length (args) < 2)
  2650     {
  2651       *signal_data = list2 (build_string ("Predicate `pred' requires "
  2652                                           "at least two arguments, "
  2653                                           "but only got"),
  2654                             Flength (args));
  2655       return false;
  2656     }
  2657 
  2658   Lisp_Object fn = Fintern (XCAR (args), Qnil);
  2659   Lisp_Object nodes = Qnil;
  2660   Lisp_Object tail = XCDR (args);
  2661   FOR_EACH_TAIL (tail)
  2662   {
  2663     Lisp_Object node = Qnil;
  2664     if (!treesit_predicate_capture_name_to_node (XCAR (tail), captures, &node,
  2665                                                  signal_data))
  2666       return false;
  2667     nodes = Fcons (node, nodes);
  2668   }
  2669   nodes = Fnreverse (nodes);
  2670 
  2671   return !NILP (CALLN (Fapply, fn, nodes));
  2672 }
  2673 
  2674 /* If all predicates in PREDICATES pass, return true; otherwise
  2675    return false.  If everything goes fine, don't touch SIGNAL_DATA; if
  2676    error occurs, set it to a suitable signal data.  */
  2677 static bool
  2678 treesit_eval_predicates (struct capture_range captures, Lisp_Object predicates,
  2679                          Lisp_Object *signal_data)
  2680 {
  2681   bool pass = true;
  2682   /* Evaluate each predicates.  */
  2683   for (Lisp_Object tail = predicates;
  2684        pass && !NILP (tail); tail = XCDR (tail))
  2685     {
  2686       Lisp_Object predicate = XCAR (tail);
  2687       Lisp_Object fn = XCAR (predicate);
  2688       Lisp_Object args = XCDR (predicate);
  2689       if (!NILP (Fstring_equal (fn, Vtreesit_str_equal)))
  2690         pass &= treesit_predicate_equal (args, captures, signal_data);
  2691       else if (!NILP (Fstring_equal (fn, Vtreesit_str_match)))
  2692         pass &= treesit_predicate_match (args, captures, signal_data);
  2693       else if (!NILP (Fstring_equal (fn, Vtreesit_str_pred)))
  2694         pass &= treesit_predicate_pred (args, captures, signal_data);
  2695       else
  2696         {
  2697           *signal_data = list3 (build_string ("Invalid predicate"),
  2698                                 fn, build_string ("Currently Emacs only supports"
  2699                                                   " `equal', `match', and `pred'"
  2700                                                   " predicates"));
  2701           pass = false;
  2702         }
  2703     }
  2704   /* If all predicates passed, add captures to result list.  */
  2705   return pass;
  2706 }
  2707 
  2708 DEFUN ("treesit-query-compile",
  2709        Ftreesit_query_compile,
  2710        Streesit_query_compile, 2, 3, 0,
  2711        doc: /* Compile QUERY to a compiled query.
  2712 
  2713 Querying with a compiled query is much faster than an uncompiled one.
  2714 LANGUAGE is the language this query is for.
  2715 
  2716 If EAGER is non-nil, immediately load LANGUAGE and compile the query.
  2717 Otherwise defer the compilation until the query is first used.
  2718 
  2719 Signal `treesit-query-error' if QUERY is malformed or something else
  2720 goes wrong.  (This only happens if EAGER is non-nil.)
  2721 You can use `treesit-query-validate' to validate and debug a query.  */)
  2722   (Lisp_Object language, Lisp_Object query, Lisp_Object eager)
  2723 {
  2724   if (NILP (Ftreesit_query_p (query)))
  2725     wrong_type_argument (Qtreesit_query_p, query);
  2726   CHECK_SYMBOL (language);
  2727   if (TS_COMPILED_QUERY_P (query))
  2728     return query;
  2729 
  2730   treesit_initialize ();
  2731 
  2732   Lisp_Object lisp_query = make_treesit_query (query, language);
  2733 
  2734   /* Maybe actually compile.  */
  2735   if (NILP (eager))
  2736     return lisp_query;
  2737   else
  2738     {
  2739       Lisp_Object signal_symbol = Qnil;
  2740       Lisp_Object signal_data = Qnil;
  2741       TSQuery *treesit_query = treesit_ensure_query_compiled (lisp_query,
  2742                                                               &signal_symbol,
  2743                                                               &signal_data);
  2744 
  2745       if (treesit_query == NULL)
  2746         xsignal (signal_symbol, signal_data);
  2747 
  2748       return lisp_query;
  2749     }
  2750 }
  2751 
  2752 /* Resolve OBJ into a tree-sitter node Lisp_Object.  OBJ can be a
  2753    node, a parser, or a language symbol.  Note that this function can
  2754    signal.  */
  2755 static Lisp_Object treesit_resolve_node (Lisp_Object obj)
  2756 {
  2757   if (TS_NODEP (obj))
  2758     {
  2759       treesit_check_node (obj); /* Check if up-to-date.  */
  2760       return obj;
  2761     }
  2762   else if (TS_PARSERP (obj))
  2763     {
  2764       treesit_check_parser (obj); /* Check if deleted.  */
  2765       return Ftreesit_parser_root_node (obj);
  2766     }
  2767   else if (SYMBOLP (obj))
  2768     {
  2769       Lisp_Object parser
  2770         = Ftreesit_parser_create (obj, Fcurrent_buffer (), Qnil);
  2771       return Ftreesit_parser_root_node (parser);
  2772     }
  2773   else
  2774     xsignal2 (Qwrong_type_argument,
  2775               list4 (Qor, Qtreesit_node_p, Qtreesit_parser_p, Qsymbolp),
  2776               obj);
  2777 }
  2778 
  2779 /* Create and initialize QUERY.  When success, initialize TS_QUERY,
  2780    CURSOR, and NEED_FREE, and return true; if failed, initialize
  2781    SIGNAL_SYMBOL and SIGNAL_DATA, and return false.  If NEED_FREE is
  2782    initialized to true, the TS_QUERY and CURSOR needs to be freed
  2783    after use; otherwise they shouldn't be freed by hand.
  2784 
  2785    Basically this function looks at QUERY and check its type, if QUERY
  2786    is a compiled query, this function takes out its query and cursor;
  2787    if QUERY is a string or a cons, this function creates a new query
  2788    and cursor (so they need to be manually freed).
  2789 
  2790    This function assumes QUERY is either a compiled query, a string or
  2791    a cons, the caller should make sure QUERY is valid.
  2792 
  2793    LANG is the language to use if we need to create the query and
  2794    cursor.  */
  2795 static bool
  2796 treesit_initialize_query (Lisp_Object query, const TSLanguage *lang,
  2797                           TSQuery **ts_query, TSQueryCursor **cursor,
  2798                           bool *need_free, Lisp_Object *signal_symbol,
  2799                           Lisp_Object *signal_data)
  2800 {
  2801   if (TS_COMPILED_QUERY_P (query))
  2802     {
  2803       *ts_query = treesit_ensure_query_compiled (query, signal_symbol,
  2804                                                  signal_data);
  2805       *cursor = XTS_COMPILED_QUERY (query)->cursor;
  2806       /* We don't need to free ts_query and cursor because they
  2807          are stored in a lisp object, which is tracked by gc.  */
  2808       *need_free = false;
  2809       return (*ts_query != NULL);
  2810     }
  2811   else
  2812     {
  2813       /* Since query is not TS_COMPILED_QUERY, it can only be a string
  2814          or a cons.  */
  2815       if (CONSP (query))
  2816         query = Ftreesit_query_expand (query);
  2817       char *query_string = SSDATA (query);
  2818       uint32_t error_offset;
  2819       TSQueryError error_type;
  2820       *ts_query = ts_query_new (lang, query_string, strlen (query_string),
  2821                                 &error_offset, &error_type);
  2822       if (*ts_query == NULL)
  2823         {
  2824           *signal_symbol = Qtreesit_query_error;
  2825           *signal_data = treesit_compose_query_signal_data (error_offset,
  2826                                                             error_type, query);
  2827           return false;
  2828         }
  2829       else
  2830         {
  2831           *cursor = ts_query_cursor_new ();
  2832           *need_free = true;
  2833           return true;
  2834         }
  2835     }
  2836 }
  2837 
  2838 DEFUN ("treesit-query-capture",
  2839        Ftreesit_query_capture,
  2840        Streesit_query_capture, 2, 5, 0,
  2841        doc: /* Query NODE with patterns in QUERY.
  2842 
  2843 Return a list of (CAPTURE_NAME . NODE).  CAPTURE_NAME is the name
  2844 assigned to the node in PATTERN.  NODE is the captured node.
  2845 
  2846 QUERY is either a string query, a sexp query, or a compiled query.
  2847 See Info node `(elisp)Pattern Matching' for how to write a query in
  2848 either string or sexp form.  When using repeatedly, a compiled query
  2849 is much faster than a string or sexp one, so it is recommend to
  2850 compile your query if it will be used repeatedly.
  2851 
  2852 BEG and END, if both non-nil, specify the region of buffer positions
  2853 in which the query is executed.  Any matching node whose span overlaps
  2854 with the region between BEG and END are captured, it doesn't have to
  2855 be completely in the region.
  2856 
  2857 If NODE-ONLY is non-nil, return a list of nodes.
  2858 
  2859 Besides a node, NODE can also be a parser, in which case the root node
  2860 of that parser is used.
  2861 NODE can also be a language symbol, in which case the root node of a
  2862 parser for that language is used.  If such a parser doesn't exist, it
  2863 is created.
  2864 
  2865 Signal `treesit-query-error' if QUERY is malformed or something else
  2866 goes wrong.  You can use `treesit-query-validate' to validate and debug
  2867 the query.  */)
  2868   (Lisp_Object node, Lisp_Object query,
  2869    Lisp_Object beg, Lisp_Object end, Lisp_Object node_only)
  2870 {
  2871   if (!(TS_COMPILED_QUERY_P (query)
  2872         || CONSP (query) || STRINGP (query)))
  2873     wrong_type_argument (Qtreesit_query_p, query);
  2874 
  2875   treesit_initialize ();
  2876 
  2877   /* Resolve NODE into an actual node.  */
  2878   Lisp_Object lisp_node = treesit_resolve_node (node);
  2879 
  2880   /* Extract C values from Lisp objects.  */
  2881   TSNode treesit_node = XTS_NODE (lisp_node)->node;
  2882   Lisp_Object lisp_parser = XTS_NODE (lisp_node)->parser;
  2883 
  2884   const TSLanguage *lang
  2885     = ts_parser_language (XTS_PARSER (lisp_parser)->parser);
  2886 
  2887   /* Check BEG and END.  */
  2888   struct buffer *buf = XBUFFER (XTS_PARSER (lisp_parser)->buffer);
  2889   if (!NILP (beg))
  2890     treesit_check_position (beg, buf);
  2891   if (!NILP (end))
  2892     treesit_check_position (end, buf);
  2893 
  2894   /* Initialize query objects.  At the end of this block, we should
  2895      have a working TSQuery and a TSQueryCursor.  */
  2896   TSQuery *treesit_query;
  2897   TSQueryCursor *cursor;
  2898   bool needs_to_free_query_and_cursor;
  2899   Lisp_Object signal_symbol;
  2900   Lisp_Object signal_data;
  2901   if (!treesit_initialize_query (query, lang, &treesit_query, &cursor,
  2902                                  &needs_to_free_query_and_cursor,
  2903                                  &signal_symbol, &signal_data))
  2904     xsignal (signal_symbol, signal_data);
  2905 
  2906   /* WARN: After this point, free TREESIT_QUERY and CURSOR before every
  2907      signal and return if NEEDS_TO_FREE_QUERY_AND_CURSOR is true.  */
  2908 
  2909   /* Set query range.  */
  2910   if (!NILP (beg) && !NILP (end))
  2911     {
  2912       ptrdiff_t visible_beg
  2913         = XTS_PARSER (XTS_NODE (lisp_node)->parser)->visible_beg;
  2914       ptrdiff_t beg_byte = CHAR_TO_BYTE (XFIXNUM (beg));
  2915       ptrdiff_t end_byte = CHAR_TO_BYTE (XFIXNUM (end));
  2916       /* We never let tree-sitter run on buffers too large, so these
  2917          assertion should never hit.  */
  2918       eassert (beg_byte - visible_beg <= UINT32_MAX);
  2919       eassert (end_byte - visible_beg <= UINT32_MAX);
  2920       ts_query_cursor_set_byte_range (cursor,
  2921                                       (uint32_t) (beg_byte - visible_beg),
  2922                                       (uint32_t) (end_byte - visible_beg));
  2923     }
  2924 
  2925   /* Execute query.  */
  2926   ts_query_cursor_exec (cursor, treesit_query, treesit_node);
  2927   TSQueryMatch match;
  2928 
  2929   /* Go over each match, collect captures and predicates.  Include the
  2930      captures in the RESULT list unconditionally as we get them, then
  2931      test for predicates.  If predicates pass, then all good, if
  2932      predicates don't pass, revert the result back to the result
  2933      before this loop (PREV_RESULT).  (Predicates control the entire
  2934      match.) This way we don't need to create a list of captures in
  2935      every for loop and nconc it to RESULT every time.  That is indeed
  2936      the initial implementation in which Yoav found nconc being the
  2937      bottleneck (98.4% of the running time spent on nconc).  */
  2938   uint32_t patterns_count = ts_query_pattern_count (treesit_query);
  2939   Lisp_Object result = Qnil;
  2940   Lisp_Object prev_result = result;
  2941   Lisp_Object predicates_table = make_vector (patterns_count, Qt);
  2942   Lisp_Object predicate_signal_data = Qnil;
  2943 
  2944   struct buffer *old_buf = current_buffer;
  2945   set_buffer_internal (buf);
  2946 
  2947   while (ts_query_cursor_next_match (cursor, &match))
  2948     {
  2949       /* Record the checkpoint that we may roll back to.  */
  2950       prev_result = result;
  2951       /* 1. Get captured nodes.  */
  2952       const TSQueryCapture *captures = match.captures;
  2953       for (int idx = 0; idx < match.capture_count; idx++)
  2954         {
  2955           uint32_t capture_name_len;
  2956           TSQueryCapture capture = captures[idx];
  2957           Lisp_Object captured_node = make_treesit_node (lisp_parser,
  2958                                                          capture.node);
  2959 
  2960           Lisp_Object cap;
  2961           if (NILP (node_only))
  2962             {
  2963               const char *capture_name
  2964                 = ts_query_capture_name_for_id (treesit_query, capture.index,
  2965                                                 &capture_name_len);
  2966               cap = Fcons (intern_c_string_1 (capture_name, capture_name_len),
  2967                            captured_node);
  2968             }
  2969           else
  2970             cap = captured_node;
  2971 
  2972           result = Fcons (cap, result);
  2973         }
  2974       /* 2. Get predicates and check whether this match can be
  2975          included in the result list.  */
  2976       Lisp_Object predicates = AREF (predicates_table, match.pattern_index);
  2977       if (BASE_EQ (predicates, Qt))
  2978         {
  2979           predicates = treesit_predicates_for_pattern (treesit_query,
  2980                                                        match.pattern_index);
  2981           ASET (predicates_table, match.pattern_index, predicates);
  2982         }
  2983 
  2984       /* captures_lisp = Fnreverse (captures_lisp); */
  2985       struct capture_range captures_range = { result, prev_result };
  2986       bool match = treesit_eval_predicates (captures_range, predicates,
  2987                                             &predicate_signal_data);
  2988       if (!NILP (predicate_signal_data))
  2989         break;
  2990 
  2991       /* Predicates didn't pass, roll back.  */
  2992       if (!match)
  2993         result = prev_result;
  2994     }
  2995 
  2996   /* Final clean up.  */
  2997   if (needs_to_free_query_and_cursor)
  2998     {
  2999       ts_query_delete (treesit_query);
  3000       ts_query_cursor_delete (cursor);
  3001     }
  3002   set_buffer_internal (old_buf);
  3003 
  3004   /* Some capture predicate signaled an error.  */
  3005   if (!NILP (predicate_signal_data))
  3006     xsignal (Qtreesit_query_error, predicate_signal_data);
  3007 
  3008   return Fnreverse (result);
  3009 }
  3010 
  3011 
  3012 /*** Navigation */
  3013 
  3014 static inline void
  3015 treesit_assume_true (bool val)
  3016 {
  3017   eassert (val == true);
  3018 }
  3019 
  3020 /* Tries to move CURSOR to point to TARGET.  END_POS is the end of
  3021    TARGET.  If success, return true, otherwise move CURSOR back to
  3022    starting position and return false.  LIMIT is the recursion
  3023    limit.  */
  3024 static bool
  3025 treesit_cursor_helper_1 (TSTreeCursor *cursor, TSNode *target,
  3026                          uint32_t end_pos, ptrdiff_t limit)
  3027 {
  3028   if (limit <= 0)
  3029     return false;
  3030 
  3031   TSNode cursor_node = ts_tree_cursor_current_node (cursor);
  3032   if (ts_node_eq (cursor_node, *target))
  3033     return true;
  3034 
  3035   if (!ts_tree_cursor_goto_first_child (cursor))
  3036     return false;
  3037 
  3038   /* Skip nodes that definitely don't contain TARGET.  */
  3039   while (ts_node_end_byte (cursor_node) < end_pos)
  3040     {
  3041       if (!ts_tree_cursor_goto_next_sibling (cursor))
  3042         break;
  3043       cursor_node = ts_tree_cursor_current_node (cursor);
  3044     }
  3045 
  3046   /* Go through each sibling that could contain TARGET.  Because of
  3047      missing nodes (their width is 0), there could be multiple
  3048      siblings that could contain TARGET.  */
  3049   while (ts_node_start_byte (cursor_node) <= end_pos)
  3050     {
  3051       if (treesit_cursor_helper_1 (cursor, target, end_pos, limit - 1))
  3052         return true;
  3053 
  3054       if (!ts_tree_cursor_goto_next_sibling (cursor))
  3055         break;
  3056       cursor_node = ts_tree_cursor_current_node (cursor);
  3057     }
  3058 
  3059   /* Couldn't find TARGET, must be not in this subtree, move cursor
  3060      back and pray that other brothers and sisters can succeed.  */
  3061   treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3062   return false;
  3063 }
  3064 
  3065 /* Create a TSTreeCursor pointing at NODE.  PARSER is the lisp parser
  3066    that produced NODE.  If success, return true, otherwise return
  3067    false.  This function should almost always succeed, but if the parse
  3068    tree is strangely too deep and exceeds the recursion limit, this
  3069    function will fail and return false.
  3070 
  3071    If this function returns true, caller needs to free CURSOR; if
  3072    returns false, caller don't need to free CURSOR.
  3073 
  3074    The reason we need this instead of simply using ts_tree_cursor_new
  3075    is that we have to create the cursor on the root node and traverse
  3076    down to NODE, in order to record the correct stack of parent nodes.
  3077    Otherwise going to sibling or parent of NODE wouldn't work.
  3078 
  3079    (Wow perfect filling.)  */
  3080 static bool
  3081 treesit_cursor_helper (TSTreeCursor *cursor, TSNode node, Lisp_Object parser)
  3082 {
  3083   uint32_t end_pos = ts_node_end_byte (node);
  3084   TSNode root = ts_tree_root_node (XTS_PARSER (parser)->tree);
  3085   *cursor = ts_tree_cursor_new (root);
  3086   bool success = treesit_cursor_helper_1 (cursor, &node, end_pos,
  3087                                           TREESIT_RECURSION_LIMIT);
  3088   if (!success)
  3089     ts_tree_cursor_delete (cursor);
  3090   return success;
  3091 }
  3092 
  3093 /* Move CURSOR to the next/previous sibling.  FORWARD controls the
  3094    direction.  NAMED controls the namedness.  If there is a valid
  3095    sibling, move CURSOR to it and return true, otherwise return false.
  3096    When false is returned, CURSOR points to a sibling node of the node
  3097    we started at, but exactly which is undefined.  */
  3098 static bool
  3099 treesit_traverse_sibling_helper (TSTreeCursor *cursor,
  3100                                  bool forward, bool named)
  3101 {
  3102   if (forward)
  3103     {
  3104       if (!named)
  3105         return ts_tree_cursor_goto_next_sibling (cursor);
  3106       /* Else named...  */
  3107       while (ts_tree_cursor_goto_next_sibling (cursor))
  3108         {
  3109           if (ts_node_is_named (ts_tree_cursor_current_node (cursor)))
  3110             return true;
  3111         }
  3112       return false;
  3113     }
  3114   else /* Backward.  */
  3115     {
  3116       /* Go to first child and go through each sibling, until we find
  3117          the one just before the starting node.  */
  3118       TSNode start = ts_tree_cursor_current_node (cursor);
  3119       if (!ts_tree_cursor_goto_parent (cursor))
  3120         return false;
  3121       treesit_assume_true (ts_tree_cursor_goto_first_child (cursor));
  3122 
  3123       /* Now CURSOR is at the first child.  If we started at the first
  3124          child, then there is no further siblings.  */
  3125       TSNode first_child = ts_tree_cursor_current_node (cursor);
  3126       if (ts_node_eq (first_child, start))
  3127         return false;
  3128 
  3129       /* PROBE is always DELTA siblings ahead of CURSOR. */
  3130       TSTreeCursor probe = ts_tree_cursor_copy (cursor);
  3131       /* This is position of PROBE minus position of CURSOR.  */
  3132       ptrdiff_t delta = 0;
  3133       TSNode probe_node;
  3134       TSNode cursor_node;
  3135       while (ts_tree_cursor_goto_next_sibling (&probe))
  3136         {
  3137           /* Move PROBE forward, if it equals to the starting node,
  3138              CURSOR points to the node we want (prev valid sibling of
  3139              the starting node).  */
  3140           delta++;
  3141           probe_node = ts_tree_cursor_current_node (&probe);
  3142 
  3143           /* PROBE matched, depending on NAMED, return true/false.  */
  3144           if (ts_node_eq (probe_node, start))
  3145             {
  3146               ts_tree_cursor_delete (&probe);
  3147               cursor_node = ts_tree_cursor_current_node (cursor);
  3148               ts_tree_cursor_delete (&probe);
  3149               return (!named || (named && ts_node_is_named (cursor_node)));
  3150             }
  3151 
  3152           /* PROBE didn't match, move CURSOR forward to PROBE's
  3153              position, but if we are looking for named nodes, only
  3154              move CURSOR to PROBE if PROBE is at a named node.  */
  3155           if (!named || (named && ts_node_is_named (probe_node)))
  3156             for (; delta > 0; delta--)
  3157               treesit_assume_true (ts_tree_cursor_goto_next_sibling (cursor));
  3158         }
  3159       ts_tree_cursor_delete (&probe);
  3160       return false;
  3161     }
  3162 }
  3163 
  3164 /* Move CURSOR to the first/last child.  FORWARD controls the
  3165    direction.  NAMED controls the namedness.  If there is a valid
  3166    child, move CURSOR to it and return true, otherwise don't move
  3167    CURSOR and return false.  */
  3168 static bool
  3169 treesit_traverse_child_helper (TSTreeCursor *cursor,
  3170                                bool forward, bool named)
  3171 {
  3172   if (forward)
  3173     {
  3174       if (!named)
  3175         return ts_tree_cursor_goto_first_child (cursor);
  3176       else
  3177         {
  3178           if (!ts_tree_cursor_goto_first_child (cursor))
  3179             return false;
  3180           /* After this point, if you return false, make sure to go
  3181              back to parent.  */
  3182           TSNode first_child = ts_tree_cursor_current_node (cursor);
  3183           if (ts_node_is_named (first_child))
  3184             return true;
  3185 
  3186           if (treesit_traverse_sibling_helper (cursor, true, true))
  3187             return true;
  3188           else
  3189             {
  3190               treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3191               return false;
  3192             }
  3193         }
  3194     }
  3195   else /* Backward.  */
  3196     {
  3197       if (!ts_tree_cursor_goto_first_child (cursor))
  3198         return false;
  3199       /* After this point, if you return false, make sure to go
  3200          back to parent.  */
  3201 
  3202       /* First go to the last child.  */
  3203       while (ts_tree_cursor_goto_next_sibling (cursor));
  3204 
  3205       if (!named)
  3206         return true;
  3207       /* Else named... */
  3208       if (treesit_traverse_sibling_helper(cursor, false, true))
  3209         return true;
  3210       else
  3211         {
  3212           treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3213           return false;
  3214         }
  3215     }
  3216 }
  3217 
  3218 /* Given a symbol THING, and a language symbol LANGUAGE, find the
  3219    corresponding predicate definition in treesit-things-settings.
  3220    Don't check for the type of THING and LANGUAGE.
  3221 
  3222    If there isn't one, return Qnil.  */
  3223 static Lisp_Object
  3224 treesit_traverse_get_predicate (Lisp_Object thing, Lisp_Object language)
  3225 {
  3226   Lisp_Object cons = assq_no_quit (language, Vtreesit_thing_settings);
  3227   if (NILP (cons))
  3228     return Qnil;
  3229   Lisp_Object definitions = XCDR (cons);
  3230   Lisp_Object entry = assq_no_quit (thing, definitions);
  3231   if (NILP (entry))
  3232     return Qnil;
  3233   /* ENTRY looks like (THING PRED).  */
  3234   Lisp_Object cdr = XCDR (entry);
  3235   if (!CONSP (cdr))
  3236     return Qnil;
  3237   return XCAR (cdr);
  3238 }
  3239 
  3240 /* Validate the PRED passed to treesit_traverse_match_predicate.  If
  3241    there's an error, set SIGNAL_DATA to something signal accepts, and
  3242    return false, otherwise return true.  This function also check for
  3243    recusion levels: we place a arbitrary 100 level limit on recursive
  3244    predicates.  RECURSION_LEVEL is the current recursion level (that
  3245    starts at 0), if it goes over 99, return false and set
  3246    SIGNAL_DATA.  LANGUAGE is a LANGUAGE symbol.  */
  3247 static bool
  3248 treesit_traverse_validate_predicate (Lisp_Object pred,
  3249                                      Lisp_Object language,
  3250                                      Lisp_Object *signal_data,
  3251                                      ptrdiff_t recursion_level)
  3252 {
  3253   if (recursion_level > 99)
  3254     {
  3255       *signal_data = list1 (build_string ("Predicate recursion level "
  3256                                           "exceeded: it must not exceed "
  3257                                           "100 levels"));
  3258       return false;
  3259     }
  3260   if (STRINGP (pred))
  3261     return true;
  3262   else if (FUNCTIONP (pred))
  3263     return true;
  3264   else if (SYMBOLP (pred))
  3265     {
  3266       Lisp_Object definition = treesit_traverse_get_predicate (pred,
  3267                                                                language);
  3268       if (NILP (definition))
  3269         {
  3270           *signal_data = list2 (build_string ("Cannot find the definition "
  3271                                               "of the predicate in "
  3272                                               "`treesit-thing-settings'"),
  3273                                 pred);
  3274           return false;
  3275         }
  3276       return treesit_traverse_validate_predicate (definition,
  3277                                                   language,
  3278                                                   signal_data,
  3279                                                   recursion_level + 1);
  3280     }
  3281   else if (CONSP (pred))
  3282     {
  3283       Lisp_Object car = XCAR (pred);
  3284       Lisp_Object cdr = XCDR (pred);
  3285       if (BASE_EQ (car, Qnot))
  3286         {
  3287           if (!CONSP (cdr))
  3288             {
  3289               *signal_data = list2 (build_string ("Invalide `not' "
  3290                                                   "predicate"),
  3291                                     pred);
  3292               return false;
  3293             }
  3294           /* At this point CDR must be a cons.  */
  3295           if (XFIXNUM (Flength (cdr)) != 1)
  3296             {
  3297               *signal_data = list2 (build_string ("`not' can only "
  3298                                                   "have one argument"),
  3299                                     pred);
  3300               return false;
  3301             }
  3302           return treesit_traverse_validate_predicate (XCAR (cdr),
  3303                                                       language,
  3304                                                       signal_data,
  3305                                                       recursion_level + 1);
  3306         }
  3307       else if (BASE_EQ (car, Qor))
  3308         {
  3309           if (!CONSP (cdr) || NILP (cdr))
  3310             {
  3311               *signal_data = list2 (build_string ("`or' must have a list "
  3312                                                   "of patterns as "
  3313                                                   "arguments "),
  3314                                     pred);
  3315               return false;
  3316             }
  3317           FOR_EACH_TAIL (cdr)
  3318             {
  3319               if (!treesit_traverse_validate_predicate (XCAR (cdr),
  3320                                                         language,
  3321                                                         signal_data,
  3322                                                         recursion_level + 1))
  3323                 return false;
  3324             }
  3325           return true;
  3326         }
  3327       else if (STRINGP (car) && FUNCTIONP (cdr))
  3328         return true;
  3329     }
  3330   *signal_data = list2 (build_string ("Invalid predicate, see `treesit-thing-settings' for valid forms of predicate"),
  3331                         pred);
  3332   return false;
  3333 }
  3334 
  3335 /* Return true if the node at CURSOR matches PRED.  PRED can be a lot
  3336    of things:
  3337 
  3338    PRED := string | function | (string . function)
  3339          | (or PRED...) | (not PRED)
  3340 
  3341    See docstring of treesit-search-forward and friends for the meaning
  3342    of each shape.
  3343 
  3344    This function assumes PRED is in one of its valid forms.  If NAMED
  3345    is true, also check that the node is named.
  3346 
  3347    This function may signal if the predicate function signals.  */
  3348 static bool
  3349 treesit_traverse_match_predicate (TSTreeCursor *cursor, Lisp_Object pred,
  3350                                   Lisp_Object parser, bool named)
  3351 {
  3352   TSNode node = ts_tree_cursor_current_node (cursor);
  3353   if (named && !ts_node_is_named (node))
  3354     return false;
  3355 
  3356   if (STRINGP (pred))
  3357     {
  3358       const char *type = ts_node_type (node);
  3359       return fast_c_string_match (pred, type, strlen (type)) >= 0;
  3360     }
  3361   else if (FUNCTIONP (pred))
  3362     {
  3363       Lisp_Object lisp_node = make_treesit_node (parser, node);
  3364       return !NILP (CALLN (Ffuncall, pred, lisp_node));
  3365     }
  3366   else if (SYMBOLP (pred))
  3367     {
  3368       Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3369       Lisp_Object definition = treesit_traverse_get_predicate (pred,
  3370                                                                language);
  3371       return treesit_traverse_match_predicate (cursor, definition,
  3372                                                parser, named);
  3373     }
  3374   else if (CONSP (pred))
  3375     {
  3376       Lisp_Object car = XCAR (pred);
  3377       Lisp_Object cdr = XCDR (pred);
  3378 
  3379       if (BASE_EQ (car, Qnot))
  3380         return !treesit_traverse_match_predicate (cursor, XCAR (cdr),
  3381                                                   parser, named);
  3382       else if (BASE_EQ (car, Qor))
  3383         {
  3384           FOR_EACH_TAIL (cdr)
  3385             {
  3386               if (treesit_traverse_match_predicate (cursor, XCAR (cdr),
  3387                                                     parser, named))
  3388                 return true;
  3389             }
  3390           return false;
  3391         }
  3392       else if (STRINGP (car) && FUNCTIONP (cdr))
  3393         {
  3394           /* A bit of code duplication here, but should be fine.  */
  3395           const char *type = ts_node_type (node);
  3396           if (!(fast_c_string_match (car, type, strlen (type)) >= 0))
  3397             return false;
  3398 
  3399           Lisp_Object lisp_node = make_treesit_node (parser, node);
  3400           if (NILP (CALLN (Ffuncall, cdr, lisp_node)))
  3401             return false;
  3402 
  3403           return true;
  3404         }
  3405     }
  3406   /* Returning false is better than UB. */
  3407   return false;
  3408 }
  3409 
  3410 /* Traverse the parse tree starting from CURSOR.  See
  3411    `treesit-thing-settings' for the shapes PRED can have.  If the
  3412    node satisfies PRED, leave CURSOR on that node and return true.  If
  3413    no node satisfies PRED, move CURSOR back to starting position and
  3414    return false.
  3415 
  3416    LIMIT is the number of levels we descend in the tree.  FORWARD
  3417    controls the direction in which we traverse the tree, true means
  3418    forward, false backward.  If SKIP_ROOT is true, don't match ROOT.
  3419 
  3420    This function may signal if the predicate function signals.  */
  3421 
  3422 static bool
  3423 treesit_search_dfs (TSTreeCursor *cursor,
  3424                     Lisp_Object pred, Lisp_Object parser,
  3425                     bool forward, bool named, ptrdiff_t limit,
  3426                     bool skip_root)
  3427 {
  3428   if (!skip_root
  3429       && treesit_traverse_match_predicate (cursor, pred, parser, named))
  3430     return true;
  3431 
  3432   if (limit == 0)
  3433     return false;
  3434 
  3435   if (!treesit_traverse_child_helper (cursor, forward, named))
  3436     return false;
  3437   /* After this point, if you return false, make sure to go back to
  3438      parent.  */
  3439 
  3440   do /* Iterate through each child.  */
  3441     {
  3442       if (treesit_search_dfs (cursor, pred, parser, forward,
  3443                               named, limit - 1, false))
  3444         return true;
  3445     }
  3446   while (treesit_traverse_sibling_helper (cursor, forward, false));
  3447 
  3448   /* No match in any child's subtree, go back to starting node.  */
  3449   treesit_assume_true (ts_tree_cursor_goto_parent (cursor));
  3450   return false;
  3451 }
  3452 
  3453 /* Go through the whole tree linearly, leaf-first, starting from
  3454    START.  PRED, PARSER, NAMED, FORWARD are the same as in
  3455    ts_search_subtree.  If a match is found, leave CURSOR at that node,
  3456    and return true, if no match is found, return false, and CURSOR's
  3457    position is undefined.
  3458 
  3459    This function may signal if the predicate function signals.  */
  3460 
  3461 static bool
  3462 treesit_search_forward (TSTreeCursor *cursor,
  3463                         Lisp_Object pred, Lisp_Object parser,
  3464                         bool forward, bool named)
  3465 {
  3466   /* We don't search for subtree and always search from the leaf
  3467      nodes.  This way repeated call of this function traverses each
  3468      node in the tree once and only once:
  3469 
  3470      (while node (setq node (treesit-search-forward node)))  */
  3471   bool initial = true;
  3472   while (true)
  3473     {
  3474       if (!initial /* We don't match the starting node.  */
  3475           && treesit_traverse_match_predicate (cursor, pred, parser, named))
  3476         return true;
  3477       initial = false;
  3478 
  3479       /* Try going to the next sibling, if there is no next sibling,
  3480          go to parent and try again.  */
  3481       while (!treesit_traverse_sibling_helper (cursor, forward, named))
  3482         {
  3483           /* There is no next sibling, go to parent.  */
  3484           if (!ts_tree_cursor_goto_parent (cursor))
  3485             return false;
  3486 
  3487           if (treesit_traverse_match_predicate (cursor, pred, parser, named))
  3488               return true;
  3489         }
  3490       /* We are at the next sibling, deep dive into the first leaf
  3491          node.  */
  3492       while (treesit_traverse_child_helper (cursor, forward, false));
  3493       /* At this point CURSOR is at a leaf node.  */
  3494     }
  3495 }
  3496 
  3497 /* Clean up the given tree cursor CURSOR.  */
  3498 
  3499 static void
  3500 treesit_traverse_cleanup_cursor (void *cursor)
  3501 {
  3502   ts_tree_cursor_delete (cursor);
  3503 }
  3504 
  3505 DEFUN ("treesit-search-subtree",
  3506        Ftreesit_search_subtree,
  3507        Streesit_search_subtree, 2, 5, 0,
  3508        doc: /* Traverse the parse tree of NODE depth-first using PREDICATE.
  3509 
  3510 Traverse the subtree of NODE, and match PREDICATE with each node along
  3511 the way.  PREDICATE is a regexp string that matches against each
  3512 node's type, or a function that takes a node and returns nil/non-nil.
  3513 
  3514 By default, only traverse named nodes, but if ALL is non-nil, traverse
  3515 all nodes.  If BACKWARD is non-nil, traverse backwards.  If DEPTH is
  3516 non-nil, only traverse nodes up to that number of levels down in the
  3517 tree.  If DEPTH is nil, default to 1000.
  3518 
  3519 Return the first matched node, or nil if none matches.  */)
  3520   (Lisp_Object node, Lisp_Object predicate, Lisp_Object backward,
  3521    Lisp_Object all, Lisp_Object depth)
  3522 {
  3523   CHECK_TS_NODE (node);
  3524   CHECK_SYMBOL (all);
  3525   CHECK_SYMBOL (backward);
  3526 
  3527   /* We use a default limit of 1000.  See bug#59426 for the
  3528      discussion.  */
  3529   ptrdiff_t the_limit = TREESIT_RECURSION_LIMIT;
  3530   if (!NILP (depth))
  3531     {
  3532       CHECK_FIXNUM (depth);
  3533       the_limit = XFIXNUM (depth);
  3534     }
  3535 
  3536   treesit_initialize ();
  3537 
  3538   Lisp_Object parser = XTS_NODE (node)->parser;
  3539   Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3540 
  3541   Lisp_Object signal_data = Qnil;
  3542   if (!treesit_traverse_validate_predicate (predicate, language,
  3543                                             &signal_data, 0))
  3544     xsignal1 (Qtreesit_invalid_predicate, signal_data);
  3545 
  3546   Lisp_Object return_value = Qnil;
  3547   TSTreeCursor cursor;
  3548   if (!treesit_cursor_helper (&cursor, XTS_NODE (node)->node, parser))
  3549     return return_value;
  3550 
  3551   specpdl_ref count = SPECPDL_INDEX ();
  3552   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3553 
  3554   if (treesit_search_dfs (&cursor, predicate, parser, NILP (backward),
  3555                           NILP (all), the_limit, false))
  3556     {
  3557       TSNode node = ts_tree_cursor_current_node (&cursor);
  3558       return_value = make_treesit_node (parser, node);
  3559     }
  3560 
  3561   return unbind_to (count, return_value);
  3562 }
  3563 
  3564 DEFUN ("treesit-search-forward",
  3565        Ftreesit_search_forward,
  3566        Streesit_search_forward, 2, 4, 0,
  3567        doc: /* Search for node matching PREDICATE in the parse tree of START.
  3568 
  3569 Start traversing the tree from node START, and match PREDICATE with
  3570 each node (except START itself) along the way.  PREDICATE is a regexp
  3571 string that matches against each node's type, or a function that takes
  3572 a node and returns non-nil if it matches.
  3573 
  3574 By default, only search for named nodes, but if ALL is non-nil, search
  3575 for all nodes.  If BACKWARD is non-nil, search backwards.
  3576 
  3577 Return the first matched node, or nil if none matches.
  3578 
  3579 For a tree like below, where START is marked by S, traverse as
  3580 numbered from 1 to 12:
  3581 
  3582                 12
  3583                 |
  3584        S--------3----------11
  3585        |        |          |
  3586   o--o-+--o  1--+--2    6--+-----10
  3587   |  |                  |        |
  3588   o  o                +-+-+   +--+--+
  3589                       |   |   |  |  |
  3590                       4   5   7  8  9
  3591 
  3592 Note that this function doesn't traverse the subtree of START, and it
  3593 always traverse leaf nodes first, then upwards.  */)
  3594   (Lisp_Object start, Lisp_Object predicate, Lisp_Object backward,
  3595    Lisp_Object all)
  3596 {
  3597   CHECK_TS_NODE (start);
  3598   CHECK_SYMBOL (all);
  3599   CHECK_SYMBOL (backward);
  3600 
  3601   treesit_initialize ();
  3602 
  3603   Lisp_Object parser = XTS_NODE (start)->parser;
  3604   Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3605 
  3606   Lisp_Object signal_data = Qnil;
  3607   if (!treesit_traverse_validate_predicate (predicate, language,
  3608                                             &signal_data, 0))
  3609     xsignal1 (Qtreesit_invalid_predicate, signal_data);
  3610 
  3611   Lisp_Object return_value = Qnil;
  3612   TSTreeCursor cursor;
  3613   if (!treesit_cursor_helper (&cursor, XTS_NODE (start)->node, parser))
  3614     return return_value;
  3615 
  3616   specpdl_ref count = SPECPDL_INDEX ();
  3617   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3618 
  3619   if (treesit_search_forward (&cursor, predicate, parser,
  3620                               NILP (backward), NILP (all)))
  3621     {
  3622       TSNode node = ts_tree_cursor_current_node (&cursor);
  3623       return_value = make_treesit_node (parser, node);
  3624     }
  3625 
  3626   return unbind_to (count, return_value);
  3627 }
  3628 
  3629 /* Recursively traverse the tree under CURSOR, and append the result
  3630    subtree to PARENT's cdr.  See more in Ftreesit_induce_sparse_tree.
  3631    Note that the top-level children list is reversed, because
  3632    reasons.
  3633 
  3634    This function may signal if the predicate function signals.  */
  3635 static void
  3636 treesit_build_sparse_tree (TSTreeCursor *cursor, Lisp_Object parent,
  3637                            Lisp_Object pred, Lisp_Object process_fn,
  3638                            ptrdiff_t limit, Lisp_Object parser)
  3639 {
  3640   bool match = treesit_traverse_match_predicate (cursor, pred, parser, false);
  3641   if (match)
  3642     {
  3643       /* If this node matches pred, add a new node to the parent's
  3644          children list.  */
  3645       TSNode node = ts_tree_cursor_current_node (cursor);
  3646       Lisp_Object lisp_node = make_treesit_node (parser, node);
  3647       if (!NILP (process_fn))
  3648         lisp_node = CALLN (Ffuncall, process_fn, lisp_node);
  3649 
  3650       Lisp_Object this = Fcons (lisp_node, Qnil);
  3651       Fsetcdr (parent, Fcons (this, Fcdr (parent)));
  3652       /* Now for children nodes, this is the new parent.  */
  3653       parent = this;
  3654     }
  3655   /* Go through each child.  */
  3656   if (limit > 0 && ts_tree_cursor_goto_first_child (cursor))
  3657     {
  3658       do
  3659         {
  3660           /* Make sure not to use node after the recursive funcall.
  3661              Then C compilers should be smart enough not to copy NODE
  3662              to stack.  */
  3663           treesit_build_sparse_tree (cursor, parent, pred, process_fn,
  3664                                      limit - 1, parser);
  3665         }
  3666       while (ts_tree_cursor_goto_next_sibling (cursor));
  3667       /* Don't forget to come back to this node.  */
  3668       ts_tree_cursor_goto_parent (cursor);
  3669     }
  3670   /* Before we go, reverse children in the sparse tree.  */
  3671   if (match)
  3672     /* When match == true, "parent" is actually the node we added in
  3673        this layer (parent = this).  */
  3674     Fsetcdr (parent, Fnreverse (Fcdr (parent)));
  3675 }
  3676 
  3677 DEFUN ("treesit-induce-sparse-tree",
  3678        Ftreesit_induce_sparse_tree,
  3679        Streesit_induce_sparse_tree, 2, 4, 0,
  3680        doc: /* Create a sparse tree of ROOT's subtree.
  3681 
  3682 This takes the subtree under ROOT, and combs it so only the nodes
  3683 that match PREDICATE are left, like picking out grapes on the vine.
  3684 PREDICATE is a regexp string that matches against each node's type.
  3685 
  3686 For a subtree on the left that consist of both numbers and letters, if
  3687 PREDICATE is "is letter", the returned tree is the one on the right.
  3688 
  3689         a                 a              a
  3690         |                 |              |
  3691     +---+---+         +---+---+      +---+---+
  3692     |   |   |         |   |   |      |   |   |
  3693     b   1   2         b   |   |      b   c   d
  3694         |   |     =>      |   |  =>      |
  3695         c   +--+          c   +          e
  3696         |   |  |          |   |
  3697      +--+   d  4       +--+   d
  3698      |  |              |
  3699      e  5              e
  3700 
  3701 If PROCESS-FN is non-nil, it should be a function of one argument.  In
  3702 that case, instead of returning the matched nodes, pass each node to
  3703 PROCESS-FN, and use its return value instead.
  3704 
  3705 If non-nil, DEPTH is the number of levels to go down the tree from
  3706 ROOT.  If DEPTH is nil or omitted, it defaults to 1000.
  3707 
  3708 Each node in the returned tree looks like (NODE . (CHILD ...)).  The
  3709 root of this tree might be nil, if ROOT doesn't match PREDICATE.
  3710 
  3711 If no node matches PREDICATE, return nil.
  3712 
  3713 PREDICATE can also be a function that takes a node and returns
  3714 nil/non-nil, but it is slower and more memory consuming than using
  3715 a regexp.  */)
  3716   (Lisp_Object root, Lisp_Object predicate, Lisp_Object process_fn,
  3717    Lisp_Object depth)
  3718 {
  3719   CHECK_TS_NODE (root);
  3720 
  3721   if (!NILP (process_fn))
  3722     CHECK_TYPE (FUNCTIONP (process_fn), Qfunctionp, process_fn);
  3723 
  3724   /* We use a default limit of 1000.  See bug#59426 for the
  3725      discussion.  */
  3726   ptrdiff_t the_limit = TREESIT_RECURSION_LIMIT;
  3727   if (!NILP (depth))
  3728     {
  3729       CHECK_FIXNUM (depth);
  3730       the_limit = XFIXNUM (depth);
  3731     }
  3732 
  3733   treesit_initialize ();
  3734 
  3735   Lisp_Object parser = XTS_NODE (root)->parser;
  3736   Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3737 
  3738   Lisp_Object signal_data = Qnil;
  3739   if (!treesit_traverse_validate_predicate (predicate, language,
  3740                                             &signal_data, 0))
  3741     xsignal1 (Qtreesit_invalid_predicate, signal_data);
  3742 
  3743   Lisp_Object parent = Fcons (Qnil, Qnil);
  3744   /* In this function we never traverse above NODE, so we don't need
  3745      to use treesit_cursor_helper.  */
  3746   TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (root)->node);
  3747 
  3748   specpdl_ref count = SPECPDL_INDEX ();
  3749   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3750 
  3751   treesit_build_sparse_tree (&cursor, parent, predicate, process_fn,
  3752                              the_limit, parser);
  3753 
  3754   unbind_to (count, Qnil);
  3755 
  3756   Fsetcdr (parent, Fnreverse (Fcdr (parent)));
  3757 
  3758   if (NILP (Fcdr (parent)))
  3759     return Qnil;
  3760   else
  3761     return parent;
  3762 }
  3763 
  3764 DEFUN ("treesit-node-match-p",
  3765        Ftreesit_node_match_p,
  3766        Streesit_node_match_p, 2, 2, 0,
  3767        doc: /* Check whether NODE matches PREDICATE.
  3768 
  3769 PREDICATE can be a regexp matching node type, a predicate function,
  3770 and more, see `treesit-thing-settings' for detail.  Return non-nil
  3771 if NODE matches PRED, nil otherwise.  */)
  3772   (Lisp_Object node, Lisp_Object predicate)
  3773 {
  3774   CHECK_TS_NODE (node);
  3775 
  3776   Lisp_Object parser = XTS_NODE (node)->parser;
  3777   Lisp_Object language = XTS_PARSER (parser)->language_symbol;
  3778 
  3779   Lisp_Object signal_data = Qnil;
  3780   if (!treesit_traverse_validate_predicate (predicate, language,
  3781                                             &signal_data, 0))
  3782     xsignal1 (Qtreesit_invalid_predicate, signal_data);
  3783 
  3784   TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (node)->node);
  3785 
  3786   specpdl_ref count = SPECPDL_INDEX ();
  3787   record_unwind_protect_ptr (treesit_traverse_cleanup_cursor, &cursor);
  3788 
  3789   bool match = false;
  3790   match = treesit_traverse_match_predicate (&cursor, predicate,
  3791                                             parser, false);
  3792 
  3793   unbind_to (count, Qnil);
  3794 
  3795   return match ? Qt : Qnil;
  3796 }
  3797 
  3798 DEFUN ("treesit-subtree-stat",
  3799        Ftreesit_subtree_stat,
  3800        Streesit_subtree_stat, 1, 1, 0,
  3801        doc: /* Return information about the subtree of NODE.
  3802 
  3803 Return a list (MAX-DEPTH MAX-WIDTH COUNT), where MAX-DEPTH is the
  3804 maximum depth of the subtree, MAX-WIDTH is the maximum number of
  3805 direct children of nodes in the subtree, and COUNT is the number of
  3806 nodes in the subtree, including NODE.  */)
  3807   (Lisp_Object node)
  3808 {
  3809   /* Having a limit on the depth to traverse doesn't have much impact
  3810      on the time it takes, so I left that out.  */
  3811   CHECK_TS_NODE (node);
  3812 
  3813   treesit_initialize ();
  3814 
  3815   TSTreeCursor cursor = ts_tree_cursor_new (XTS_NODE (node)->node);
  3816   ptrdiff_t max_depth = 1;
  3817   ptrdiff_t max_width = 0;
  3818   ptrdiff_t count = 0;
  3819   ptrdiff_t current_depth = 0;
  3820 
  3821   /* Traverse the subtree depth-first.  */
  3822   while (true)
  3823     {
  3824       count++;
  3825 
  3826       /* Go down depth-first.  */
  3827       while (ts_tree_cursor_goto_first_child (&cursor))
  3828         {
  3829           current_depth++;
  3830           count++;
  3831           /* While we're at here, measure the number of siblings.  */
  3832           ptrdiff_t width_count = 1;
  3833           while (ts_tree_cursor_goto_next_sibling (&cursor))
  3834             width_count++;
  3835           max_width = max (max_width, width_count);
  3836           /* Go back to the first sibling.  */
  3837           treesit_assume_true (ts_tree_cursor_goto_parent (&cursor));
  3838           treesit_assume_true (ts_tree_cursor_goto_first_child (&cursor));
  3839         }
  3840       max_depth = max (max_depth, current_depth);
  3841 
  3842       /* Go to next sibling.  If there is no next sibling, go to
  3843          parent's next sibling, and so on.  If there is no more
  3844          parent, we've traversed the whole subtree, stop.  */
  3845       while (!ts_tree_cursor_goto_next_sibling (&cursor))
  3846         {
  3847           if (ts_tree_cursor_goto_parent (&cursor))
  3848             current_depth--;
  3849           else
  3850             {
  3851               ts_tree_cursor_delete (&cursor);
  3852               return list3 (make_fixnum (max_depth),
  3853                             make_fixnum (max_width),
  3854                             make_fixnum (count));
  3855             }
  3856         }
  3857     }
  3858 }
  3859 
  3860 #endif  /* HAVE_TREE_SITTER */
  3861 
  3862 DEFUN ("treesit-available-p", Ftreesit_available_p,
  3863        Streesit_available_p, 0, 0, 0,
  3864        doc: /* Return non-nil if tree-sitter support is built-in and available.  */)
  3865   (void)
  3866 {
  3867 #if HAVE_TREE_SITTER
  3868   return load_tree_sitter_if_necessary (false) ? Qt : Qnil;
  3869 #else
  3870   return Qnil;
  3871 #endif
  3872 }
  3873 
  3874 
  3875 /*** Initialization */
  3876 
  3877 /* Initialize the tree-sitter routines.  */
  3878 void
  3879 syms_of_treesit (void)
  3880 {
  3881 #if HAVE_TREE_SITTER
  3882   DEFSYM (Qtreesit_parser_p, "treesit-parser-p");
  3883   DEFSYM (Qtreesit_node_p, "treesit-node-p");
  3884   DEFSYM (Qtreesit_compiled_query_p, "treesit-compiled-query-p");
  3885   DEFSYM (Qtreesit_query_p, "treesit-query-p");
  3886   DEFSYM (Qnamed, "named");
  3887   DEFSYM (Qmissing, "missing");
  3888   DEFSYM (Qextra, "extra");
  3889   DEFSYM (Qoutdated, "outdated");
  3890   DEFSYM (Qhas_error, "has-error");
  3891   DEFSYM (Qlive, "live");
  3892   DEFSYM (Qnot, "not");
  3893 
  3894   DEFSYM (QCanchor, ":anchor");
  3895   DEFSYM (QCquestion, ":?");
  3896   DEFSYM (QCstar, ":*");
  3897   DEFSYM (QCplus, ":+");
  3898   DEFSYM (QCequal, ":equal");
  3899   DEFSYM (QCmatch, ":match");
  3900   DEFSYM (QCpred, ":pred");
  3901 
  3902   DEFSYM (Qnot_found, "not-found");
  3903   DEFSYM (Qsymbol_error, "symbol-error");
  3904   DEFSYM (Qversion_mismatch, "version-mismatch");
  3905 
  3906   DEFSYM (Qtreesit_error, "treesit-error");
  3907   DEFSYM (Qtreesit_query_error, "treesit-query-error");
  3908   DEFSYM (Qtreesit_parse_error, "treesit-parse-error");
  3909   DEFSYM (Qtreesit_range_invalid, "treesit-range-invalid");
  3910   DEFSYM (Qtreesit_buffer_too_large,
  3911           "treesit-buffer-too-large");
  3912   DEFSYM (Qtreesit_load_language_error,
  3913           "treesit-load-language-error");
  3914   DEFSYM (Qtreesit_node_outdated,
  3915           "treesit-node-outdated");
  3916   DEFSYM (Quser_emacs_directory,
  3917           "user-emacs-directory");
  3918   DEFSYM (Qtreesit_parser_deleted, "treesit-parser-deleted");
  3919   DEFSYM (Qtreesit_pattern_expand, "treesit-pattern-expand");
  3920   DEFSYM (Qtreesit_invalid_predicate, "treesit-invalid-predicate");
  3921 
  3922   DEFSYM (Qor, "or");
  3923 
  3924 #ifdef WINDOWSNT
  3925   DEFSYM (Qtree_sitter, "tree-sitter");
  3926 #endif
  3927 
  3928   define_error (Qtreesit_error, "Generic tree-sitter error", Qerror);
  3929   define_error (Qtreesit_query_error, "Query pattern is malformed",
  3930                 Qtreesit_error);
  3931   /* Should be impossible, no need to document this error.  */
  3932   define_error (Qtreesit_parse_error, "Parse failed",
  3933                 Qtreesit_error);
  3934   define_error (Qtreesit_range_invalid,
  3935                 "RANGES are invalid: they have to be ordered and should not overlap",
  3936                 Qtreesit_error);
  3937   define_error (Qtreesit_buffer_too_large, "Buffer too large (> 4GiB)",
  3938                 Qtreesit_error);
  3939   define_error (Qtreesit_load_language_error,
  3940                 "Cannot load language definition",
  3941                 Qtreesit_error);
  3942   define_error (Qtreesit_node_outdated,
  3943                 "This node is outdated, please retrieve a new one",
  3944                 Qtreesit_error);
  3945   define_error (Qtreesit_parser_deleted,
  3946                 "This parser is deleted and cannot be used",
  3947                 Qtreesit_error);
  3948   define_error (Qtreesit_invalid_predicate,
  3949                 "Invalid predicate, see `treesit-thing-settings' "
  3950                 "for valid forms for a predicate",
  3951                 Qtreesit_error);
  3952 
  3953   DEFVAR_LISP ("treesit-load-name-override-list",
  3954                Vtreesit_load_name_override_list,
  3955                doc:
  3956                /* An override list for unconventional tree-sitter libraries.
  3957 
  3958 By default, Emacs assumes the dynamic library for LANG is
  3959 libtree-sitter-LANG.EXT, where EXT is the OS specific extension for
  3960 dynamic libraries.  Emacs also assumes that the name of the C function
  3961 the library provides is tree_sitter_LANG.  If that is not the case,
  3962 you can add an entry
  3963 
  3964     (LANG LIBRARY-BASE-NAME FUNCTION-NAME)
  3965 
  3966 to this list, where LIBRARY-BASE-NAME is the filename of the dynamic
  3967 library without the file-name extension, and FUNCTION-NAME is the
  3968 function provided by the library.  */);
  3969   Vtreesit_load_name_override_list = Qnil;
  3970 
  3971   DEFVAR_LISP ("treesit-extra-load-path",
  3972                Vtreesit_extra_load_path,
  3973                doc:
  3974                /* Additional directories to look for tree-sitter language definitions.
  3975 The value should be a list of directories.
  3976 When trying to load a tree-sitter language definition,
  3977 Emacs first looks in the directories mentioned in this variable,
  3978 then in the `tree-sitter' subdirectory of `user-emacs-directory', and
  3979 then in the system default locations for dynamic libraries, in that order.  */);
  3980   Vtreesit_extra_load_path = Qnil;
  3981 
  3982   DEFVAR_LISP ("treesit-thing-settings",
  3983                Vtreesit_thing_settings,
  3984                doc:
  3985                /* A list defining things.
  3986 
  3987 The value should be an alist of (LANGUAGE . DEFINITIONS), where
  3988 LANGUAGE is a language symbol, and DEFINITIONS is a list of
  3989 
  3990     (THING PRED)
  3991 
  3992 THING is a symbol representing the thing, like `defun', `sexp', or
  3993 `block'; PRED defines what kind of node can be qualified as THING.
  3994 
  3995 PRED can be a regexp string that matches the type of the node; it can
  3996 be a predicate function that takes the node as the sole argument and
  3997 returns t if the node is the thing; it can be a cons (REGEXP . FN),
  3998 which is a combination of a regexp and a predicate function, and the
  3999 node has to match both to qualify as the thing.
  4000 
  4001 PRED can also be recursively defined.  It can be (or PRED...), meaning
  4002 satisfying anyone of the inner PREDs qualifies the node; or (not
  4003 PRED), meaning not satisfying the inner PRED qualifies the node.
  4004 
  4005 Finally, PRED can refer to other THINGs defined in this list by using
  4006 the symbol of that THING.  For example, (or block sexp).  */);
  4007   Vtreesit_thing_settings = Qnil;
  4008 
  4009   staticpro (&Vtreesit_str_libtree_sitter);
  4010   Vtreesit_str_libtree_sitter = build_pure_c_string ("libtree-sitter-");
  4011   staticpro (&Vtreesit_str_tree_sitter);
  4012   Vtreesit_str_tree_sitter = build_pure_c_string ("tree-sitter-");
  4013 #ifndef WINDOWSNT
  4014   staticpro (&Vtreesit_str_dot_0);
  4015   Vtreesit_str_dot_0 = build_pure_c_string (".0");
  4016 #endif
  4017   staticpro (&Vtreesit_str_dot);
  4018   Vtreesit_str_dot = build_pure_c_string (".");
  4019   staticpro (&Vtreesit_str_question_mark);
  4020   Vtreesit_str_question_mark = build_pure_c_string ("?");
  4021   staticpro (&Vtreesit_str_star);
  4022   Vtreesit_str_star = build_pure_c_string ("*");
  4023   staticpro (&Vtreesit_str_plus);
  4024   Vtreesit_str_plus = build_pure_c_string ("+");
  4025   staticpro (&Vtreesit_str_pound_equal);
  4026   Vtreesit_str_pound_equal = build_pure_c_string ("#equal");
  4027   staticpro (&Vtreesit_str_pound_match);
  4028   Vtreesit_str_pound_match = build_pure_c_string ("#match");
  4029   staticpro (&Vtreesit_str_pound_pred);
  4030   Vtreesit_str_pound_pred = build_pure_c_string ("#pred");
  4031   staticpro (&Vtreesit_str_open_bracket);
  4032   Vtreesit_str_open_bracket = build_pure_c_string ("[");
  4033   staticpro (&Vtreesit_str_close_bracket);
  4034   Vtreesit_str_close_bracket = build_pure_c_string ("]");
  4035   staticpro (&Vtreesit_str_open_paren);
  4036   Vtreesit_str_open_paren = build_pure_c_string ("(");
  4037   staticpro (&Vtreesit_str_close_paren);
  4038   Vtreesit_str_close_paren = build_pure_c_string (")");
  4039   staticpro (&Vtreesit_str_space);
  4040   Vtreesit_str_space = build_pure_c_string (" ");
  4041   staticpro (&Vtreesit_str_equal);
  4042   Vtreesit_str_equal = build_pure_c_string ("equal");
  4043   staticpro (&Vtreesit_str_match);
  4044   Vtreesit_str_match = build_pure_c_string ("match");
  4045   staticpro (&Vtreesit_str_pred);
  4046   Vtreesit_str_pred = build_pure_c_string ("pred");
  4047 
  4048   defsubr (&Streesit_language_available_p);
  4049   defsubr (&Streesit_library_abi_version);
  4050   defsubr (&Streesit_language_abi_version);
  4051 
  4052   defsubr (&Streesit_parser_p);
  4053   defsubr (&Streesit_node_p);
  4054   defsubr (&Streesit_compiled_query_p);
  4055   defsubr (&Streesit_query_p);
  4056   defsubr (&Streesit_query_language);
  4057 
  4058   defsubr (&Streesit_node_parser);
  4059 
  4060   defsubr (&Streesit_parser_create);
  4061   defsubr (&Streesit_parser_delete);
  4062   defsubr (&Streesit_parser_list);
  4063   defsubr (&Streesit_parser_buffer);
  4064   defsubr (&Streesit_parser_language);
  4065 
  4066   defsubr (&Streesit_parser_root_node);
  4067   /* defsubr (&Streesit_parse_string); */
  4068 
  4069   defsubr (&Streesit_parser_set_included_ranges);
  4070   defsubr (&Streesit_parser_included_ranges);
  4071 
  4072   defsubr (&Streesit_parser_notifiers);
  4073   defsubr (&Streesit_parser_add_notifier);
  4074   defsubr (&Streesit_parser_remove_notifier);
  4075 
  4076   defsubr (&Streesit_node_type);
  4077   defsubr (&Streesit_node_start);
  4078   defsubr (&Streesit_node_end);
  4079   defsubr (&Streesit_node_string);
  4080   defsubr (&Streesit_node_parent);
  4081   defsubr (&Streesit_node_child);
  4082   defsubr (&Streesit_node_check);
  4083   defsubr (&Streesit_node_field_name_for_child);
  4084   defsubr (&Streesit_node_child_count);
  4085   defsubr (&Streesit_node_child_by_field_name);
  4086   defsubr (&Streesit_node_next_sibling);
  4087   defsubr (&Streesit_node_prev_sibling);
  4088   defsubr (&Streesit_node_first_child_for_pos);
  4089   defsubr (&Streesit_node_descendant_for_range);
  4090   defsubr (&Streesit_node_eq);
  4091 
  4092   defsubr (&Streesit_pattern_expand);
  4093   defsubr (&Streesit_query_expand);
  4094   defsubr (&Streesit_query_compile);
  4095   defsubr (&Streesit_query_capture);
  4096 
  4097   defsubr (&Streesit_search_subtree);
  4098   defsubr (&Streesit_search_forward);
  4099   defsubr (&Streesit_induce_sparse_tree);
  4100   defsubr (&Streesit_node_match_p);
  4101   defsubr (&Streesit_subtree_stat);
  4102 #endif /* HAVE_TREE_SITTER */
  4103   defsubr (&Streesit_available_p);
  4104 }

/* [<][>][^][v][top][bottom][index][help] */