root/src/syntax.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. SYNTAX_FLAGS_COMSTART_FIRST
  2. SYNTAX_FLAGS_COMSTART_SECOND
  3. SYNTAX_FLAGS_COMEND_FIRST
  4. SYNTAX_FLAGS_COMEND_SECOND
  5. SYNTAX_FLAGS_COMSTARTEND_FIRST
  6. SYNTAX_FLAGS_PREFIX
  7. SYNTAX_FLAGS_COMMENT_STYLEB
  8. SYNTAX_FLAGS_COMMENT_STYLEC
  9. SYNTAX_FLAGS_COMMENT_STYLEC2
  10. SYNTAX_FLAGS_COMMENT_NESTED
  11. SYNTAX_FLAGS_COMMENT_STYLE
  12. SYNTAX_COMEND_FIRST
  13. bset_syntax_table
  14. syntax_prefix_flag_p
  15. SET_RAW_SYNTAX_ENTRY
  16. SET_RAW_SYNTAX_ENTRY_RANGE
  17. SYNTAX_MATCH
  18. SETUP_SYNTAX_TABLE
  19. SETUP_SYNTAX_TABLE_FOR_OBJECT
  20. update_syntax_table
  21. parse_sexp_propertize
  22. update_syntax_table_forward
  23. char_quoted
  24. dec_bytepos
  25. find_defun_start
  26. prev_char_comend_first
  27. back_comment
  28. DEFUN
  29. check_syntax_table
  30. DEFUN
  31. DEFUN
  32. DEFUN
  33. DEFUN
  34. DEFUN
  35. DEFUN
  36. DEFUN
  37. DEFUN
  38. DEFUN
  39. scan_words
  40. DEFUN
  41. skip_chars
  42. skip_syntaxes
  43. in_classes
  44. forw_comment
  45. DEFUN
  46. syntax_multibyte
  47. scan_lists
  48. DEFUN
  49. in_2char_comment_start
  50. scan_sexps_forward
  51. internalize_parse_state
  52. init_syntax_once
  53. syms_of_syntax

     1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
     2    Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2023 Free
     3    Software Foundation, Inc.
     4 
     5 This file is part of GNU Emacs.
     6 
     7 GNU Emacs is free software: you can redistribute it and/or modify
     8 it under the terms of the GNU General Public License as published by
     9 the Free Software Foundation, either version 3 of the License, or (at
    10 your option) any later version.
    11 
    12 GNU Emacs is distributed in the hope that it will be useful,
    13 but WITHOUT ANY WARRANTY; without even the implied warranty of
    14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    15 GNU General Public License for more details.
    16 
    17 You should have received a copy of the GNU General Public License
    18 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    19 
    20 #include <config.h>
    21 
    22 #include "lisp.h"
    23 #include "dispextern.h"
    24 #include "character.h"
    25 #include "buffer.h"
    26 #include "regex-emacs.h"
    27 #include "syntax.h"
    28 #include "intervals.h"
    29 #include "category.h"
    30 
    31 /* Make syntax table lookup grant data in gl_state.  */
    32 #define SYNTAX(c) syntax_property (c, 1)
    33 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
    34 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
    35 
    36 /* Eight single-bit flags have the following meanings:
    37   1. This character is the first of a two-character comment-start sequence.
    38   2. This character is the second of a two-character comment-start sequence.
    39   3. This character is the first of a two-character comment-end sequence.
    40   4. This character is the second of a two-character comment-end sequence.
    41   5. This character is a prefix, for backward-prefix-chars.
    42   6. The char is part of a delimiter for comments of style "b".
    43   7. This character is part of a nestable comment sequence.
    44   8. The char is part of a delimiter for comments of style "c".
    45   Note that any two-character sequence whose first character has flag 1
    46   and whose second character has flag 2 will be interpreted as a comment start.
    47 
    48   Bits 6 and 8 discriminate among different comment styles.
    49   Languages such as C++ allow two orthogonal syntax start/end pairs
    50   and bit 6 determines whether a comment-end or Scommentend
    51   ends style a or b.  Comment markers can start style a, b, c, or bc.
    52   Style a is always the default.
    53   For 2-char comment markers, the style b flag is looked up only on the second
    54   char of the comment marker and on the first char of the comment ender.
    55   For style c (like the nested flag), the flag can be placed on any of
    56   the chars.  */
    57 
    58 /* These functions extract specific flags from an integer
    59    that holds the syntax code and the flags.  */
    60 
    61 static bool
    62 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
    63 {
    64   return (flags >> 16) & 1;
    65 }
    66 static bool
    67 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
    68 {
    69   return (flags >> 17) & 1;
    70 }
    71 static bool
    72 SYNTAX_FLAGS_COMEND_FIRST (int flags)
    73 {
    74   return (flags >> 18) & 1;
    75 }
    76 static bool
    77 SYNTAX_FLAGS_COMEND_SECOND (int flags)
    78 {
    79   return (flags >> 19) & 1;
    80 }
    81 static bool
    82 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
    83 {
    84   return (flags & 0x50000) != 0;
    85 }
    86 static bool
    87 SYNTAX_FLAGS_PREFIX (int flags)
    88 {
    89   return (flags >> 20) & 1;
    90 }
    91 static bool
    92 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
    93 {
    94   return (flags >> 21) & 1;
    95 }
    96 static bool
    97 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
    98 {
    99   return (flags >> 23) & 1;
   100 }
   101 static int
   102 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
   103 {
   104   return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
   105 }
   106 static bool
   107 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
   108 {
   109   return (flags >> 22) & 1;
   110 }
   111 
   112 /* FLAGS should be the flags of the main char of the comment marker, e.g.
   113    the second for comstart and the first for comend.  */
   114 static int
   115 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
   116 {
   117   return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
   118           | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
   119           | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
   120 }
   121 
   122 /* Extract a particular flag for a given character.  */
   123 
   124 static bool
   125 SYNTAX_COMEND_FIRST (int c)
   126 {
   127   return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
   128 }
   129 
   130 /* We use these constants in place for comment-style and
   131    string-ender-char to distinguish comments/strings started by
   132    comment_fence and string_fence codes.  */
   133 
   134 enum
   135   {
   136     ST_COMMENT_STYLE = 256 + 1,
   137     ST_STRING_STYLE = 256 + 2
   138   };
   139 
   140 /* This is the internal form of the parse state used in parse-partial-sexp.  */
   141 
   142 struct lisp_parse_state
   143   {
   144     EMACS_INT depth;    /* Depth at end of parsing.  */
   145     int instring;  /* -1 if not within string, else desired terminator.  */
   146     EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
   147     int comstyle;  /* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
   148     bool quoted;   /* True if just after an escape char at end of parsing.  */
   149     EMACS_INT mindepth; /* Minimum depth seen while scanning.  */
   150     /* Char number of most recent start-of-expression at current level */
   151     ptrdiff_t thislevelstart;
   152     /* Char number of start of containing expression */
   153     ptrdiff_t prevlevelstart;
   154     ptrdiff_t location;      /* Char number at which parsing stopped.  */
   155     ptrdiff_t location_byte; /* Corresponding byte position.  */
   156     ptrdiff_t comstr_start;  /* Position of last comment/string starter.  */
   157     Lisp_Object levelstarts; /* Char numbers of starts-of-expression
   158                                 of levels (starting from outermost).  */
   159     int prev_syntax; /* Syntax of previous position scanned, when
   160                         that position (potentially) holds the first char
   161                         of a 2-char construct, i.e. comment delimiter
   162                         or Sescape, etc.  Smax otherwise. */
   163   };
   164 
   165 /* These variables are a cache for finding the start of a defun.
   166    find_start_pos is the place for which the defun start was found.
   167    find_start_value is the defun start position found for it.
   168    find_start_value_byte is the corresponding byte position.
   169    find_start_buffer is the buffer it was found in.
   170    find_start_begv is the BEGV value when it was found.
   171    find_start_modiff is the value of MODIFF when it was found.  */
   172 
   173 static ptrdiff_t find_start_pos;
   174 static ptrdiff_t find_start_value;
   175 static ptrdiff_t find_start_value_byte;
   176 static struct buffer *find_start_buffer;
   177 static ptrdiff_t find_start_begv;
   178 static modiff_count find_start_modiff;
   179 
   180 
   181 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
   182 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
   183 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
   184 static void scan_sexps_forward (struct lisp_parse_state *,
   185                                 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
   186                                 bool, int);
   187 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
   188 static bool in_classes (int, Lisp_Object);
   189 static void parse_sexp_propertize (ptrdiff_t charpos);
   190 
   191 /* This setter is used only in this file, so it can be private.  */
   192 static void
   193 bset_syntax_table (struct buffer *b, Lisp_Object val)
   194 {
   195   b->syntax_table_ = val;
   196 }
   197 
   198 /* Whether the syntax of the character C has the prefix flag set.  */
   199 bool
   200 syntax_prefix_flag_p (int c)
   201 {
   202   return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
   203 }
   204 
   205 struct gl_state_s gl_state;             /* Global state of syntax parser.  */
   206 
   207 enum { INTERVALS_AT_ONCE = 10 };        /* 1 + max-number of intervals
   208                                            to scan to property-change.  */
   209 
   210 /* Set the syntax entry VAL for char C in table TABLE.  */
   211 
   212 static void
   213 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
   214 {
   215   CHAR_TABLE_SET (table, c, val);
   216 }
   217 
   218 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
   219    RANGE is a cons (FROM . TO) specifying the range of characters.  */
   220 
   221 static void
   222 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
   223                             Lisp_Object val)
   224 {
   225   Fset_char_table_range (table, range, val);
   226 }
   227 
   228 /* Extract the information from the entry for character C
   229    in the current syntax table.  */
   230 
   231 static Lisp_Object
   232 SYNTAX_MATCH (int c)
   233 {
   234   Lisp_Object ent = SYNTAX_ENTRY (c);
   235   return CONSP (ent) ? XCDR (ent) : Qnil;
   236 }
   237 
   238 /* This should be called with FROM at the start of forward
   239    search, or after the last position of the backward search.  It
   240    makes sure that the first char is picked up with correct table, so
   241    one does not need to call UPDATE_SYNTAX_TABLE immediately after the
   242    call.
   243    Sign of COUNT gives the direction of the search.
   244  */
   245 
   246 static void
   247 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
   248 {
   249   SETUP_BUFFER_SYNTAX_TABLE ();
   250   gl_state.b_property = BEGV;
   251   gl_state.e_property = ZV + 1;
   252   gl_state.object = Qnil;
   253   gl_state.offset = 0;
   254   if (parse_sexp_lookup_properties)
   255     {
   256       if (count > 0)
   257         update_syntax_table_forward (from, true, Qnil);
   258       else if (from > BEGV)
   259         {
   260           update_syntax_table (from - 1, count, true, Qnil);
   261           parse_sexp_propertize (from - 1);
   262         }
   263     }
   264 }
   265 
   266 /* Same as above, but in OBJECT.  If OBJECT is nil, use current buffer.
   267    If it is t (which is only used in fast_c_string_match_ignore_case),
   268    ignore properties altogether.
   269 
   270    This is meant for regex-emacs.c to use.  For buffers, regex-emacs.c
   271    passes arguments to the UPDATE_SYNTAX_TABLE functions which are
   272    relative to BEGV.  So if it is a buffer, we set the offset field to
   273    BEGV.  */
   274 
   275 void
   276 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
   277                                ptrdiff_t from, ptrdiff_t count)
   278 {
   279   SETUP_BUFFER_SYNTAX_TABLE ();
   280   gl_state.object = object;
   281   if (BUFFERP (gl_state.object))
   282     {
   283       struct buffer *buf = XBUFFER (gl_state.object);
   284       gl_state.b_property = 1;
   285       gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
   286       gl_state.offset = BUF_BEGV (buf) - 1;
   287     }
   288   else if (NILP (gl_state.object))
   289     {
   290       gl_state.b_property = 1;
   291       gl_state.e_property = ZV - BEGV + 1;
   292       gl_state.offset = BEGV - 1;
   293     }
   294   else if (EQ (gl_state.object, Qt))
   295     {
   296       gl_state.b_property = 0;
   297       gl_state.e_property = PTRDIFF_MAX;
   298       gl_state.offset = 0;
   299     }
   300   else
   301     {
   302       gl_state.b_property = 0;
   303       gl_state.e_property = 1 + SCHARS (gl_state.object);
   304       gl_state.offset = 0;
   305     }
   306   if (parse_sexp_lookup_properties)
   307     update_syntax_table (from + gl_state.offset - (count <= 0),
   308                          count, 1, gl_state.object);
   309 }
   310 
   311 /* Update gl_state to an appropriate interval which contains CHARPOS.  The
   312    sign of COUNT gives the relative position of CHARPOS wrt the previously
   313    valid interval.  If INIT, only [be]_property fields of gl_state are
   314    valid at start, the rest is filled basing on OBJECT.
   315 
   316    `gl_state.*_i' are the intervals, and CHARPOS is further in the search
   317    direction than the intervals - or in an interval.  We update the
   318    current syntax-table basing on the property of this interval, and
   319    update the interval to start further than CHARPOS - or be
   320    NULL.  We also update lim_property to be the next value of
   321    charpos to call this subroutine again - or be before/after the
   322    start/end of OBJECT.  */
   323 
   324 void
   325 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
   326                      Lisp_Object object)
   327 {
   328   Lisp_Object tmp_table;
   329   int cnt = 0;
   330   bool invalidate = true;
   331   INTERVAL i;
   332 
   333   if (init)
   334     {
   335       gl_state.old_prop = Qnil;
   336       gl_state.start = gl_state.b_property;
   337       gl_state.stop = gl_state.e_property;
   338       i = interval_of (charpos, object);
   339       gl_state.backward_i = gl_state.forward_i = i;
   340       invalidate = false;
   341       if (!i)
   342         return;
   343       i = gl_state.forward_i;
   344       gl_state.b_property = i->position - gl_state.offset;
   345       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
   346     }
   347   else
   348     {
   349       i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
   350 
   351       /* We are guaranteed to be called with CHARPOS either in i,
   352          or further off.  */
   353       if (!i)
   354         error ("Error in syntax_table logic for to-the-end intervals");
   355       else if (charpos < i->position)           /* Move left.  */
   356         {
   357           if (count > 0)
   358             error ("Error in syntax_table logic for intervals <-");
   359           /* Update the interval.  */
   360           i = update_interval (i, charpos);
   361           if (INTERVAL_LAST_POS (i) != gl_state.b_property)
   362             {
   363               invalidate = false;
   364               gl_state.forward_i = i;
   365               gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
   366             }
   367         }
   368       else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
   369         {
   370           if (count < 0)
   371             error ("Error in syntax_table logic for intervals ->");
   372           /* Update the interval.  */
   373           i = update_interval (i, charpos);
   374           if (i->position != gl_state.e_property)
   375             {
   376               invalidate = false;
   377               gl_state.backward_i = i;
   378               gl_state.b_property = i->position - gl_state.offset;
   379             }
   380         }
   381     }
   382 
   383   tmp_table = textget (i->plist, Qsyntax_table);
   384 
   385   if (invalidate)
   386     invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
   387 
   388   if (invalidate)               /* Did not get to adjacent interval.  */
   389     {                           /* with the same table => */
   390                                 /* invalidate the old range.  */
   391       if (count > 0)
   392         {
   393           gl_state.backward_i = i;
   394           gl_state.b_property = i->position - gl_state.offset;
   395         }
   396       else
   397         {
   398           gl_state.forward_i = i;
   399           gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
   400         }
   401     }
   402 
   403   if (!EQ (tmp_table, gl_state.old_prop))
   404     {
   405       gl_state.current_syntax_table = tmp_table;
   406       gl_state.old_prop = tmp_table;
   407       if (EQ (Fsyntax_table_p (tmp_table), Qt))
   408         {
   409           gl_state.use_global = 0;
   410         }
   411       else if (CONSP (tmp_table))
   412         {
   413           gl_state.use_global = 1;
   414           gl_state.global_code = tmp_table;
   415         }
   416       else
   417         {
   418           gl_state.use_global = 0;
   419           gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
   420         }
   421     }
   422 
   423   while (i)
   424     {
   425       if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
   426         {
   427           if (count > 0)
   428             {
   429               gl_state.e_property = i->position - gl_state.offset;
   430               gl_state.forward_i = i;
   431             }
   432           else
   433             {
   434               gl_state.b_property
   435                 = i->position + LENGTH (i) - gl_state.offset;
   436               gl_state.backward_i = i;
   437             }
   438           return;
   439         }
   440       else if (cnt == INTERVALS_AT_ONCE)
   441         {
   442           if (count > 0)
   443             {
   444               gl_state.e_property
   445                 = i->position + LENGTH (i) - gl_state.offset
   446                 /* e_property at EOB is not set to ZV but to ZV+1, so that
   447                    we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
   448                    having to check eob between the two.  */
   449                 + (next_interval (i) ? 0 : 1);
   450               gl_state.forward_i = i;
   451             }
   452           else
   453             {
   454               gl_state.b_property = i->position - gl_state.offset;
   455               gl_state.backward_i = i;
   456             }
   457           return;
   458         }
   459       cnt++;
   460       i = count > 0 ? next_interval (i) : previous_interval (i);
   461     }
   462   eassert (i == NULL); /* This property goes to the end.  */
   463   if (count > 0)
   464     {
   465       gl_state.e_property = gl_state.stop;
   466       gl_state.forward_i = i;
   467     }
   468   else
   469     gl_state.b_property = gl_state.start;
   470 }
   471 
   472 static void
   473 parse_sexp_propertize (ptrdiff_t charpos)
   474 {
   475   EMACS_INT zv = ZV;
   476   if (syntax_propertize__done <= charpos
   477       && syntax_propertize__done < zv)
   478     {
   479       modiff_count modiffs = CHARS_MODIFF;
   480       safe_call1 (Qinternal__syntax_propertize,
   481                   make_fixnum (min (zv, 1 + charpos)));
   482       if (modiffs != CHARS_MODIFF)
   483         error ("internal--syntax-propertize modified the buffer!");
   484       if (syntax_propertize__done <= charpos
   485           && syntax_propertize__done < zv)
   486         error ("internal--syntax-propertize did not move"
   487                " syntax-propertize--done");
   488       SETUP_SYNTAX_TABLE (charpos, 1);
   489     }
   490   else if (gl_state.e_property > syntax_propertize__done)
   491     {
   492       gl_state.e_property = syntax_propertize__done;
   493       gl_state.e_property_truncated = true;
   494     }
   495   else if (gl_state.e_property_truncated
   496            && gl_state.e_property < syntax_propertize__done)
   497     { /* When moving backward, e_property might be set without resetting
   498          e_property_truncated, so the e_property_truncated flag may
   499          occasionally be left raised spuriously.  This should be rare.  */
   500       gl_state.e_property_truncated = false;
   501       update_syntax_table_forward (charpos, false, Qnil);
   502     }
   503 }
   504 
   505 void
   506 update_syntax_table_forward (ptrdiff_t charpos, bool init,
   507                              Lisp_Object object)
   508 {
   509   if (gl_state.e_property_truncated)
   510     {
   511       eassert (NILP (object));
   512       eassert (charpos >= gl_state.e_property);
   513       parse_sexp_propertize (charpos);
   514     }
   515   else
   516     {
   517       update_syntax_table (charpos, 1, init, object);
   518       if (NILP (object) && gl_state.e_property > syntax_propertize__done)
   519         parse_sexp_propertize (charpos);
   520     }
   521 }
   522 
   523 /* Returns true if char at CHARPOS is quoted.
   524    Global syntax-table data should be set up already to be good at CHARPOS
   525    or after.  On return global syntax data is good for lookup at CHARPOS.  */
   526 
   527 static bool
   528 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
   529 {
   530   enum syntaxcode code;
   531   ptrdiff_t beg = BEGV;
   532   bool quoted = 0;
   533   ptrdiff_t orig = charpos;
   534 
   535   while (charpos > beg)
   536     {
   537       int c;
   538       dec_both (&charpos, &bytepos);
   539 
   540       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
   541       c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
   542       code = SYNTAX (c);
   543       if (! (code == Scharquote || code == Sescape))
   544         break;
   545 
   546       quoted = !quoted;
   547     }
   548 
   549   UPDATE_SYNTAX_TABLE (orig);
   550   return quoted;
   551 }
   552 
   553 /* Return the bytepos one character before BYTEPOS.
   554    We assume that BYTEPOS is not at the start of the buffer.  */
   555 
   556 static ptrdiff_t
   557 dec_bytepos (ptrdiff_t bytepos)
   558 {
   559   return (bytepos
   560           - (!NILP (BVAR (current_buffer, enable_multibyte_characters))
   561              ? prev_char_len (bytepos) : 1));
   562 }
   563 
   564 /* Return a defun-start position before POS and not too far before.
   565    It should be the last one before POS, or nearly the last.
   566 
   567    When open_paren_in_column_0_is_defun_start is nonzero,
   568    only the beginning of the buffer is treated as a defun-start.
   569 
   570    We record the information about where the scan started
   571    and what its result was, so that another call in the same area
   572    can return the same value very quickly.
   573 
   574    There is no promise at which position the global syntax data is
   575    valid on return from the subroutine, so the caller should explicitly
   576    update the global data.  */
   577 
   578 static ptrdiff_t
   579 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
   580 {
   581   ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
   582 
   583   /* Use previous finding, if it's valid and applies to this inquiry.  */
   584   if (current_buffer == find_start_buffer
   585       /* Reuse the defun-start even if POS is a little farther on.
   586          POS might be in the next defun, but that's ok.
   587          Our value may not be the best possible, but will still be usable.  */
   588       && pos <= find_start_pos + 1000
   589       && pos >= find_start_value
   590       && BEGV == find_start_begv
   591       && MODIFF == find_start_modiff)
   592     return find_start_value;
   593 
   594   if (!NILP (Vcomment_use_syntax_ppss))
   595     {
   596       modiff_count modiffs = CHARS_MODIFF;
   597       Lisp_Object ppss = call1 (Qsyntax_ppss, make_fixnum (pos));
   598       if (modiffs != CHARS_MODIFF)
   599         error ("syntax-ppss modified the buffer!");
   600       TEMP_SET_PT_BOTH (opoint, opoint_byte);
   601       Lisp_Object boc = Fnth (make_fixnum (8), ppss);
   602       if (FIXNUMP (boc))
   603         {
   604           find_start_value = XFIXNUM (boc);
   605           find_start_value_byte = CHAR_TO_BYTE (find_start_value);
   606         }
   607       else
   608         {
   609           find_start_value = pos;
   610           find_start_value_byte = pos_byte;
   611         }
   612       goto found;
   613     }
   614   if (!open_paren_in_column_0_is_defun_start)
   615     {
   616       find_start_value = BEGV;
   617       find_start_value_byte = BEGV_BYTE;
   618       goto found;
   619     }
   620 
   621   /* Back up to start of line.  */
   622   scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
   623 
   624   /* We optimize syntax-table lookup for rare updates.  Thus we accept
   625      only those `^\s(' which are good in global _and_ text-property
   626      syntax-tables.  */
   627   SETUP_BUFFER_SYNTAX_TABLE ();
   628   while (PT > BEGV)
   629     {
   630       /* Open-paren at start of line means we may have found our
   631          defun-start.  */
   632       int c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
   633       if (SYNTAX (c) == Sopen)
   634         {
   635           SETUP_SYNTAX_TABLE (PT + 1, -1);      /* Try again... */
   636           c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
   637           if (SYNTAX (c) == Sopen)
   638             break;
   639           /* Now fallback to the default value.  */
   640           SETUP_BUFFER_SYNTAX_TABLE ();
   641         }
   642       /* Move to beg of previous line.  */
   643       scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
   644     }
   645 
   646   /* Record what we found, for the next try.  */
   647   find_start_value = PT;
   648   find_start_value_byte = PT_BYTE;
   649   TEMP_SET_PT_BOTH (opoint, opoint_byte);
   650 
   651  found:
   652   find_start_buffer = current_buffer;
   653   find_start_modiff = MODIFF;
   654   find_start_begv = BEGV;
   655   find_start_pos = pos;
   656 
   657   return find_start_value;
   658 }
   659 
   660 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
   661 
   662 static bool
   663 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
   664 {
   665   int c;
   666   bool val;
   667 
   668   dec_both (&pos, &pos_byte);
   669   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
   670   c = FETCH_CHAR (pos_byte);
   671   val = SYNTAX_COMEND_FIRST (c);
   672   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
   673   return val;
   674 }
   675 
   676 /* Check whether charpos FROM is at the end of a comment.
   677    FROM_BYTE is the bytepos corresponding to FROM.
   678    Do not move back before STOP.
   679 
   680    Return true if we find a comment ending at FROM/FROM_BYTE.
   681 
   682    If successful, store the charpos of the comment's beginning
   683    into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
   684 
   685    Global syntax data remains valid for backward search starting at
   686    the returned value (or at FROM, if the search was not successful).  */
   687 
   688 static bool
   689 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
   690               bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
   691               ptrdiff_t *bytepos_ptr)
   692 {
   693   /* Look back, counting the parity of string-quotes,
   694      and recording the comment-starters seen.
   695      When we reach a safe place, assume that's not in a string;
   696      then step the main scan to the earliest comment-starter seen
   697      an even number of string quotes away from the safe place.
   698 
   699      OFROM[I] is position of the earliest comment-starter seen
   700      which is I+2X quotes from the comment-end.
   701      PARITY is current parity of quotes from the comment end.  */
   702   int string_style = -1;        /* Presumed outside of any string.  */
   703   bool string_lossage = 0;
   704   /* Not a real lossage: indicates that we have passed a matching comment
   705      starter plus a non-matching comment-ender, meaning that any matching
   706      comment-starter we might see later could be a false positive (hidden
   707      inside another comment).
   708      Test case:  { a (* b } c (* d *) */
   709   bool comment_lossage = 0;
   710   ptrdiff_t comment_end = from;
   711   ptrdiff_t comment_end_byte = from_byte;
   712   ptrdiff_t comstart_pos = 0;
   713   ptrdiff_t comstart_byte;
   714   /* Place where the containing defun starts,
   715      or 0 if we didn't come across it yet.  */
   716   ptrdiff_t defun_start = 0;
   717   ptrdiff_t defun_start_byte = 0;
   718   enum syntaxcode code;
   719   ptrdiff_t nesting = 1;                /* Current comment nesting.  */
   720   int c;
   721   int syntax = 0;
   722   unsigned short int quit_count = 0;
   723 
   724   /* FIXME: A }} comment-ender style leads to incorrect behavior
   725      in the case of {{ c }}} because we ignore the last two chars which are
   726      assumed to be comment-enders although they aren't.  */
   727 
   728   /* At beginning of range to scan, we're outside of strings;
   729      that determines quote parity to the comment-end.  */
   730   while (from != stop)
   731     {
   732       rarely_quit (++quit_count);
   733 
   734       ptrdiff_t temp_byte;
   735       int prev_syntax;
   736       bool com2start, com2end, comstart;
   737 
   738       /* Move back and examine a character.  */
   739       dec_both (&from, &from_byte);
   740       UPDATE_SYNTAX_TABLE_BACKWARD (from);
   741 
   742       prev_syntax = syntax;
   743       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
   744       syntax = SYNTAX_WITH_FLAGS (c);
   745       code = SYNTAX (c);
   746 
   747       /* Check for 2-char comment markers.  */
   748       com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
   749                    && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
   750                    && (comstyle
   751                        == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
   752                    && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
   753                        || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
   754       com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
   755                  && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
   756       comstart = (com2start || code == Scomment);
   757 
   758       /* Nasty cases with overlapping 2-char comment markers:
   759          - snmp-mode: -- c -- foo -- c --
   760                       --- c --
   761                       ------ c --
   762          - c-mode:    *||*
   763                       |* *|* *|
   764                       |*| |* |*|
   765                       ///   */
   766 
   767       /* If a 2-char comment sequence partly overlaps with another,
   768          we don't try to be clever.  E.g. |*| in C, or }% in modes that
   769          have %..\n and %{..}%.  */
   770       if (from > stop && (com2end || comstart))
   771         {
   772           ptrdiff_t next = from, next_byte = from_byte;
   773           int next_c, next_syntax;
   774           dec_both (&next, &next_byte);
   775           UPDATE_SYNTAX_TABLE_BACKWARD (next);
   776           next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
   777           next_syntax = SYNTAX_WITH_FLAGS (next_c);
   778           if (((comstart || comnested)
   779                && SYNTAX_FLAGS_COMEND_SECOND (syntax)
   780                && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
   781               || ((com2end || comnested)
   782                   && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
   783                   && (comstyle
   784                       == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
   785                   && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
   786             goto lossage;
   787           /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
   788         }
   789 
   790       if (com2start && comstart_pos == 0)
   791         /* We're looking at a comment starter.  But it might be a comment
   792            ender as well (see snmp-mode).  The first time we see one, we
   793            need to consider it as a comment starter,
   794            and the subsequent times as a comment ender.  */
   795         com2end = 0;
   796 
   797       /* Turn a 2-char comment sequences into the appropriate syntax.  */
   798       if (com2end)
   799         code = Sendcomment;
   800       else if (com2start)
   801         code = Scomment;
   802       /* Ignore comment starters of a different style.  */
   803       else if (code == Scomment
   804                && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
   805                    || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
   806         continue;
   807 
   808       /* Ignore escaped characters, except comment-enders which cannot
   809          be escaped.  */
   810       if ((comment_end_can_be_escaped || code != Sendcomment)
   811           && char_quoted (from, from_byte))
   812         continue;
   813 
   814       switch (code)
   815         {
   816         case Sstring_fence:
   817         case Scomment_fence:
   818           c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
   819           FALLTHROUGH;
   820         case Sstring:
   821           /* Track parity of quotes.  */
   822           if (string_style == -1)
   823             /* Entering a string.  */
   824             string_style = c;
   825           else if (string_style == c)
   826             /* Leaving the string.  */
   827             string_style = -1;
   828           else
   829             /* If we have two kinds of string delimiters.
   830                There's no way to grok this scanning backwards.  */
   831             string_lossage = 1;
   832           break;
   833 
   834         case Scomment:
   835           /* We've already checked that it is the relevant comstyle.  */
   836           if (string_style != -1 || comment_lossage || string_lossage)
   837             /* There are odd string quotes involved, so let's be careful.
   838                Test case in Pascal: " { " a { " } */
   839             goto lossage;
   840 
   841           if (!comnested)
   842             {
   843               /* Record best comment-starter so far.  */
   844               comstart_pos = from;
   845               comstart_byte = from_byte;
   846             }
   847           else if (--nesting <= 0)
   848             /* nested comments have to be balanced, so we don't need to
   849                keep looking for earlier ones.  We use here the same (slightly
   850                incorrect) reasoning as below:  since it is followed by uniform
   851                paired string quotes, this comment-start has to be outside of
   852                strings, else the comment-end itself would be inside a string. */
   853             goto done;
   854           break;
   855 
   856         case Sendcomment:
   857           if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
   858               && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
   859                   || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
   860             /* This is the same style of comment ender as ours. */
   861             {
   862               if (comnested)
   863                 nesting++;
   864               else
   865                 /* Anything before that can't count because it would match
   866                    this comment-ender rather than ours.  */
   867                 from = stop;    /* Break out of the loop.  */
   868             }
   869           else if (comstart_pos != 0 || c != '\n')
   870             /* We're mixing comment styles here, so we'd better be careful.
   871                The (comstart_pos != 0 || c != '\n') check is not quite correct
   872                (we should just always set comment_lossage), but removing it
   873                would imply that any multiline comment in C would go through
   874                lossage, which seems overkill.
   875                The failure should only happen in the rare cases such as
   876                  { (* } *)   */
   877             comment_lossage = 1;
   878           break;
   879 
   880         case Sopen:
   881           /* Assume a defun-start point is outside of strings.  */
   882           if (open_paren_in_column_0_is_defun_start
   883               && NILP (Vcomment_use_syntax_ppss)
   884               && (from == stop
   885                   || (temp_byte = dec_bytepos (from_byte),
   886                       FETCH_CHAR (temp_byte) == '\n')))
   887             {
   888               defun_start = from;
   889               defun_start_byte = from_byte;
   890               from = stop;      /* Break out of the loop.  */
   891             }
   892           break;
   893 
   894         default:
   895           break;
   896         }
   897     }
   898 
   899   if (comstart_pos == 0)
   900     {
   901       from = comment_end;
   902       from_byte = comment_end_byte;
   903       UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
   904     }
   905   /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
   906      or `done'), then we've found the beginning of the non-nested comment.  */
   907   else if (1)   /* !comnested */
   908     {
   909       from = comstart_pos;
   910       from_byte = comstart_byte;
   911       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
   912     }
   913   else lossage:
   914     {
   915       struct lisp_parse_state state;
   916       bool adjusted = true;
   917       /* We had two kinds of string delimiters mixed up
   918          together.  Decode this going forwards.
   919          Scan fwd from a known safe place (beginning-of-defun)
   920          to the one in question; this records where we
   921          last passed a comment starter.  */
   922       /* If we did not already find the defun start, find it now.  */
   923       if (defun_start == 0)
   924         {
   925           defun_start = find_defun_start (comment_end, comment_end_byte);
   926           defun_start_byte = find_start_value_byte;
   927           adjusted = (defun_start > BEGV);
   928         }
   929       do
   930         {
   931           internalize_parse_state (Qnil, &state);
   932           scan_sexps_forward (&state,
   933                               defun_start, defun_start_byte,
   934                               comment_end, TYPE_MINIMUM (EMACS_INT),
   935                               0, 0);
   936           defun_start = comment_end;
   937           if (!adjusted)
   938             {
   939               adjusted = true;
   940               find_start_value
   941                 = CONSP (state.levelstarts) ? XFIXNUM (XCAR (state.levelstarts))
   942                 : state.thislevelstart >= 0 ? state.thislevelstart
   943                 : find_start_value;
   944               find_start_value_byte = CHAR_TO_BYTE (find_start_value);
   945             }
   946 
   947           if (state.incomment == (comnested ? 1 : -1)
   948               && state.comstyle == comstyle)
   949             from = state.comstr_start;
   950           else
   951             {
   952               from = comment_end;
   953               if (state.incomment)
   954                 /* If comment_end is inside some other comment, maybe ours
   955                    is nested, so we need to try again from within the
   956                    surrounding comment.  Example: { a (* " *)  */
   957                 {
   958                   /* FIXME: We should advance by one or two chars.  */
   959                   defun_start = state.comstr_start + 2;
   960                   defun_start_byte = CHAR_TO_BYTE (defun_start);
   961                 }
   962             }
   963           rarely_quit (++quit_count);
   964         }
   965       while (defun_start < comment_end);
   966 
   967       from_byte = CHAR_TO_BYTE (from);
   968       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
   969     }
   970 
   971  done:
   972   *charpos_ptr = from;
   973   *bytepos_ptr = from_byte;
   974 
   975   return from != comment_end;
   976 }
   977 
   978 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
   979        doc: /* Return t if OBJECT is a syntax table.
   980 Currently, any char-table counts as a syntax table.  */)
   981   (Lisp_Object object)
   982 {
   983   if (CHAR_TABLE_P (object)
   984       && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
   985     return Qt;
   986   return Qnil;
   987 }
   988 
   989 static void
   990 check_syntax_table (Lisp_Object obj)
   991 {
   992   CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
   993               Qsyntax_table_p, obj);
   994 }
   995 
   996 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
   997        doc: /* Return the current syntax table.
   998 This is the one specified by the current buffer.  */)
   999   (void)
  1000 {
  1001   return BVAR (current_buffer, syntax_table);
  1002 }
  1003 
  1004 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
  1005    Sstandard_syntax_table, 0, 0, 0,
  1006        doc: /* Return the standard syntax table.
  1007 This is the one used for new buffers.  */)
  1008   (void)
  1009 {
  1010   return Vstandard_syntax_table;
  1011 }
  1012 
  1013 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
  1014        doc: /* Construct a new syntax table and return it.
  1015 It is a copy of the TABLE, which defaults to the standard syntax table.  */)
  1016   (Lisp_Object table)
  1017 {
  1018   Lisp_Object copy;
  1019 
  1020   if (!NILP (table))
  1021     check_syntax_table (table);
  1022   else
  1023     table = Vstandard_syntax_table;
  1024 
  1025   copy = Fcopy_sequence (table);
  1026 
  1027   /* Only the standard syntax table should have a default element.
  1028      Other syntax tables should inherit from parents instead.  */
  1029   set_char_table_defalt (copy, Qnil);
  1030 
  1031   /* Copied syntax tables should all have parents.
  1032      If we copied one with no parent, such as the standard syntax table,
  1033      use the standard syntax table as the copy's parent.  */
  1034   if (NILP (XCHAR_TABLE (copy)->parent))
  1035     Fset_char_table_parent (copy, Vstandard_syntax_table);
  1036   return copy;
  1037 }
  1038 
  1039 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
  1040        doc: /* Select a new syntax table for the current buffer.
  1041 One argument, a syntax table.  */)
  1042   (Lisp_Object table)
  1043 {
  1044   int idx;
  1045   check_syntax_table (table);
  1046   bset_syntax_table (current_buffer, table);
  1047   /* Indicate that this buffer now has a specified syntax table.  */
  1048   idx = PER_BUFFER_VAR_IDX (syntax_table);
  1049   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
  1050   return table;
  1051 }
  1052 
  1053 /* Convert a letter which signifies a syntax code
  1054  into the code it signifies.
  1055  This is used by modify-syntax-entry, and other things.  */
  1056 
  1057 unsigned char const syntax_spec_code[0400] =
  1058   { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
  1059     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
  1060     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
  1061     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
  1062     Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
  1063     Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
  1064     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
  1065     0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
  1066     Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
  1067     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
  1068     0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
  1069     0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
  1070     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
  1071     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
  1072     0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
  1073     0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
  1074   };
  1075 
  1076 /* Indexed by syntax code, give the letter that describes it.  */
  1077 
  1078 static char const syntax_code_spec[16] =
  1079   {
  1080     ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
  1081     '!', '|'
  1082   };
  1083 
  1084 /* Indexed by syntax code, give the object (cons of syntax code and
  1085    nil) to be stored in syntax table.  Since these objects can be
  1086    shared among syntax tables, we generate them in advance.  By
  1087    sharing objects, the function `describe-syntax' can give a more
  1088    compact listing.  */
  1089 static Lisp_Object Vsyntax_code_object;
  1090 
  1091 
  1092 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
  1093        doc: /* Return the syntax code of CHARACTER, described by a character.
  1094 For example, if CHARACTER is a word constituent, the
  1095 character `w' (119) is returned.
  1096 The characters that correspond to various syntax codes
  1097 are listed in the documentation of `modify-syntax-entry'.
  1098 
  1099 If you're trying to determine the syntax of characters in the buffer,
  1100 this is probably the wrong function to use, because it can't take
  1101 `syntax-table' text properties into account.  Consider using
  1102 `syntax-after' instead.  */)
  1103   (Lisp_Object character)
  1104 {
  1105   CHECK_CHARACTER (character);
  1106   int char_int = XFIXNAT (character);
  1107   SETUP_BUFFER_SYNTAX_TABLE ();
  1108   if (NILP (BVAR (current_buffer, enable_multibyte_characters)))
  1109     char_int = make_char_multibyte (char_int);
  1110   return make_fixnum (syntax_code_spec[SYNTAX (char_int)]);
  1111 }
  1112 
  1113 DEFUN ("syntax-class-to-char", Fsyntax_class_to_char,
  1114        Ssyntax_class_to_char, 1, 1, 0,
  1115        doc: /* Return the syntax char of CLASS, described by an integer.
  1116 For example, if SYNTAX is word constituent (the integer 2), the
  1117 character `w' (119) is returned.  */)
  1118   (Lisp_Object syntax)
  1119 {
  1120   int syn;
  1121   CHECK_FIXNUM (syntax);
  1122   syn = XFIXNUM (syntax);
  1123 
  1124   if (syn < 0 || syn >= sizeof syntax_code_spec)
  1125     args_out_of_range (make_fixnum (sizeof syntax_code_spec - 1),
  1126                        syntax);
  1127   return make_fixnum (syntax_code_spec[syn]);
  1128 }
  1129 
  1130 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
  1131        doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
  1132   (Lisp_Object character)
  1133 {
  1134   int char_int;
  1135   enum syntaxcode code;
  1136   CHECK_CHARACTER (character);
  1137   char_int = XFIXNUM (character);
  1138   SETUP_BUFFER_SYNTAX_TABLE ();
  1139   code = SYNTAX (char_int);
  1140   if (code == Sopen || code == Sclose)
  1141     return SYNTAX_MATCH (char_int);
  1142   return Qnil;
  1143 }
  1144 
  1145 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
  1146        doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
  1147 STRING should be a string of the form allowed as argument of
  1148 `modify-syntax-entry'.  The return value is a raw syntax descriptor: a
  1149 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
  1150 the value of a `syntax-table' text property.  */)
  1151   (Lisp_Object string)
  1152 {
  1153   const unsigned char *p;
  1154   int val;
  1155   Lisp_Object match;
  1156 
  1157   CHECK_STRING (string);
  1158 
  1159   p = SDATA (string);
  1160   val = syntax_spec_code[*p++];
  1161   if (val == 0377)
  1162     error ("Invalid syntax description letter: %c", p[-1]);
  1163 
  1164   if (val == Sinherit)
  1165     return Qnil;
  1166 
  1167   if (*p)
  1168     {
  1169       int len, character = string_char_and_length (p, &len);
  1170       XSETINT (match, character);
  1171       if (XFIXNAT (match) == ' ')
  1172         match = Qnil;
  1173       p += len;
  1174     }
  1175   else
  1176     match = Qnil;
  1177 
  1178   while (*p)
  1179     switch (*p++)
  1180       {
  1181       case '1':
  1182         val |= 1 << 16;
  1183         break;
  1184 
  1185       case '2':
  1186         val |= 1 << 17;
  1187         break;
  1188 
  1189       case '3':
  1190         val |= 1 << 18;
  1191         break;
  1192 
  1193       case '4':
  1194         val |= 1 << 19;
  1195         break;
  1196 
  1197       case 'p':
  1198         val |= 1 << 20;
  1199         break;
  1200 
  1201       case 'b':
  1202         val |= 1 << 21;
  1203         break;
  1204 
  1205       case 'n':
  1206         val |= 1 << 22;
  1207         break;
  1208 
  1209       case 'c':
  1210         val |= 1 << 23;
  1211         break;
  1212       }
  1213 
  1214   if (val < ASIZE (Vsyntax_code_object) && NILP (match))
  1215     return AREF (Vsyntax_code_object, val);
  1216   else
  1217     /* Since we can't use a shared object, let's make a new one.  */
  1218     return Fcons (make_fixnum (val), match);
  1219 }
  1220 
  1221 /* I really don't know why this is interactive
  1222    help-form should at least be made useful whilst reading the second arg.  */
  1223 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
  1224   "cSet syntax for character: \nsSet syntax for %s to: ",
  1225        doc: /* Set syntax for character CHAR according to string NEWENTRY.
  1226 The syntax is changed only for table SYNTAX-TABLE, which defaults to
  1227  the current buffer's syntax table.
  1228 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
  1229 in the range MIN to MAX are changed.
  1230 The first character of NEWENTRY should be one of the following:
  1231   Space or -  whitespace syntax.    w   word constituent.
  1232   _           symbol constituent.   .   punctuation.
  1233   (           open-parenthesis.     )   close-parenthesis.
  1234   "           string quote.         \\   escape.
  1235   $           paired delimiter.     \\='   expression quote or prefix operator.
  1236   <           comment starter.      >   comment ender.
  1237   /           character-quote.      @   inherit from parent table.
  1238   |           generic string fence. !   generic comment fence.
  1239 
  1240 Only single-character comment start and end sequences are represented thus.
  1241 Two-character sequences are represented as described below.
  1242 The second character of NEWENTRY is the matching parenthesis,
  1243  used only if the first character is `(' or `)'.
  1244 Any additional characters are flags.
  1245 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
  1246  1 means CHAR is the start of a two-char comment start sequence.
  1247  2 means CHAR is the second character of such a sequence.
  1248  3 means CHAR is the start of a two-char comment end sequence.
  1249  4 means CHAR is the second character of such a sequence.
  1250 
  1251 There can be several orthogonal comment sequences.  This is to support
  1252 language modes such as C++.  By default, all comment sequences are of style
  1253 a, but you can set the comment sequence style to b (on the second character
  1254 of a comment-start, and the first character of a comment-end sequence) and/or
  1255 c (on any of its chars) using this flag:
  1256  b means CHAR is part of comment sequence b.
  1257  c means CHAR is part of comment sequence c.
  1258  n means CHAR is part of a nestable comment sequence.
  1259 
  1260  p means CHAR is a prefix character for `backward-prefix-chars';
  1261    such characters are treated as whitespace when they occur
  1262    between expressions.
  1263 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
  1264   (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
  1265 {
  1266   if (CONSP (c))
  1267     {
  1268       CHECK_CHARACTER_CAR (c);
  1269       CHECK_CHARACTER_CDR (c);
  1270     }
  1271   else
  1272     CHECK_CHARACTER (c);
  1273 
  1274   if (NILP (syntax_table))
  1275     syntax_table = BVAR (current_buffer, syntax_table);
  1276   else
  1277     check_syntax_table (syntax_table);
  1278 
  1279   newentry = Fstring_to_syntax (newentry);
  1280   if (CONSP (c))
  1281     SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
  1282   else
  1283     SET_RAW_SYNTAX_ENTRY (syntax_table, XFIXNUM (c), newentry);
  1284 
  1285   /* We clear the regexp cache, since character classes can now have
  1286      different values from those in the compiled regexps.*/
  1287   clear_regexp_cache ();
  1288 
  1289   return Qnil;
  1290 }
  1291 
  1292 /* Dump syntax table to buffer in human-readable format */
  1293 
  1294 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
  1295        Sinternal_describe_syntax_value, 1, 1, 0,
  1296        doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
  1297   (Lisp_Object syntax)
  1298 {
  1299   int code, syntax_code;
  1300   bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
  1301   char str[2];
  1302   Lisp_Object first, match_lisp, value = syntax;
  1303 
  1304   if (NILP (value))
  1305     {
  1306       insert_string ("default");
  1307       return syntax;
  1308     }
  1309 
  1310   if (CHAR_TABLE_P (value))
  1311     {
  1312       insert_string ("deeper char-table ...");
  1313       return syntax;
  1314     }
  1315 
  1316   if (!CONSP (value))
  1317     {
  1318       insert_string ("invalid");
  1319       return syntax;
  1320     }
  1321 
  1322   first = XCAR (value);
  1323   match_lisp = XCDR (value);
  1324 
  1325   if (!FIXNUMP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
  1326     {
  1327       insert_string ("invalid");
  1328       return syntax;
  1329     }
  1330 
  1331   syntax_code = XFIXNUM (first) & INT_MAX;
  1332   code = syntax_code & 0377;
  1333   start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
  1334   start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
  1335   end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
  1336   end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
  1337   prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
  1338   comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
  1339   comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
  1340   comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
  1341 
  1342   if (Smax <= code)
  1343     {
  1344       insert_string ("invalid");
  1345       return syntax;
  1346     }
  1347 
  1348   str[0] = syntax_code_spec[code], str[1] = 0;
  1349   insert (str, 1);
  1350 
  1351   if (NILP (match_lisp))
  1352     insert (" ", 1);
  1353   else
  1354     insert_char (XFIXNUM (match_lisp));
  1355 
  1356   if (start1)
  1357     insert ("1", 1);
  1358   if (start2)
  1359     insert ("2", 1);
  1360 
  1361   if (end1)
  1362     insert ("3", 1);
  1363   if (end2)
  1364     insert ("4", 1);
  1365 
  1366   if (prefix)
  1367     insert ("p", 1);
  1368   if (comstyleb)
  1369     insert ("b", 1);
  1370   if (comstylec)
  1371     insert ("c", 1);
  1372   if (comnested)
  1373     insert ("n", 1);
  1374 
  1375   insert_string ("\twhich means: ");
  1376 
  1377   switch (code)
  1378     {
  1379     case Swhitespace:
  1380       insert_string ("whitespace"); break;
  1381     case Spunct:
  1382       insert_string ("punctuation"); break;
  1383     case Sword:
  1384       insert_string ("word"); break;
  1385     case Ssymbol:
  1386       insert_string ("symbol"); break;
  1387     case Sopen:
  1388       insert_string ("open"); break;
  1389     case Sclose:
  1390       insert_string ("close"); break;
  1391     case Squote:
  1392       insert_string ("prefix"); break;
  1393     case Sstring:
  1394       insert_string ("string"); break;
  1395     case Smath:
  1396       insert_string ("math"); break;
  1397     case Sescape:
  1398       insert_string ("escape"); break;
  1399     case Scharquote:
  1400       insert_string ("charquote"); break;
  1401     case Scomment:
  1402       insert_string ("comment"); break;
  1403     case Sendcomment:
  1404       insert_string ("endcomment"); break;
  1405     case Sinherit:
  1406       insert_string ("inherit"); break;
  1407     case Scomment_fence:
  1408       insert_string ("comment fence"); break;
  1409     case Sstring_fence:
  1410       insert_string ("string fence"); break;
  1411     default:
  1412       insert_string ("invalid");
  1413       return syntax;
  1414     }
  1415 
  1416   if (!NILP (match_lisp))
  1417     {
  1418       insert_string (", matches ");
  1419       insert_char (XFIXNUM (match_lisp));
  1420     }
  1421 
  1422   if (start1)
  1423     insert_string (",\n\t  is the first character of a comment-start sequence");
  1424   if (start2)
  1425     insert_string (",\n\t  is the second character of a comment-start sequence");
  1426 
  1427   if (end1)
  1428     insert_string (",\n\t  is the first character of a comment-end sequence");
  1429   if (end2)
  1430     insert_string (",\n\t  is the second character of a comment-end sequence");
  1431   if (comstyleb)
  1432     insert_string (" (comment style b)");
  1433   if (comstylec)
  1434     insert_string (" (comment style c)");
  1435   if (comnested)
  1436     insert_string (" (nestable)");
  1437 
  1438   if (prefix)
  1439     {
  1440       AUTO_STRING (prefixdoc,
  1441                    ",\n\t  is a prefix character for `backward-prefix-chars'");
  1442       insert1 (call1 (Qsubstitute_command_keys, prefixdoc));
  1443     }
  1444 
  1445   return syntax;
  1446 }
  1447 
  1448 /* Return the position across COUNT words from FROM.
  1449    If that many words cannot be found before the end of the buffer, return 0.
  1450    COUNT negative means scan backward and stop at word beginning.  */
  1451 
  1452 ptrdiff_t
  1453 scan_words (ptrdiff_t from, EMACS_INT count)
  1454 {
  1455   ptrdiff_t beg = BEGV;
  1456   ptrdiff_t end = ZV;
  1457   ptrdiff_t from_byte = CHAR_TO_BYTE (from);
  1458   enum syntaxcode code;
  1459   int ch0, ch1;
  1460   Lisp_Object func, pos;
  1461 
  1462   SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count, PTRDIFF_MAX));
  1463 
  1464   while (count > 0)
  1465     {
  1466       while (true)
  1467         {
  1468           if (from == end)
  1469             return 0;
  1470           UPDATE_SYNTAX_TABLE_FORWARD (from);
  1471           ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  1472           code = SYNTAX (ch0);
  1473           inc_both (&from, &from_byte);
  1474           if (words_include_escapes
  1475               && (code == Sescape || code == Scharquote))
  1476             break;
  1477           if (code == Sword)
  1478             break;
  1479           rarely_quit (from);
  1480         }
  1481       /* Now CH0 is a character which begins a word and FROM is the
  1482          position of the next character.  */
  1483       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
  1484       if (! NILP (Ffboundp (func)))
  1485         {
  1486           pos = call2 (func, make_fixnum (from - 1), make_fixnum (end));
  1487           if (FIXNUMP (pos) && from < XFIXNUM (pos) && XFIXNUM (pos) <= ZV)
  1488             {
  1489               from = XFIXNUM (pos);
  1490               from_byte = CHAR_TO_BYTE (from);
  1491             }
  1492         }
  1493       else
  1494         {
  1495           while (1)
  1496             {
  1497               if (from == end) break;
  1498               UPDATE_SYNTAX_TABLE_FORWARD (from);
  1499               ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  1500               code = SYNTAX (ch1);
  1501               if ((code != Sword
  1502                    && (! words_include_escapes
  1503                        || (code != Sescape && code != Scharquote)))
  1504                   || word_boundary_p (ch0, ch1))
  1505                 break;
  1506               inc_both (&from, &from_byte);
  1507               ch0 = ch1;
  1508               rarely_quit (from);
  1509             }
  1510         }
  1511       count--;
  1512     }
  1513   while (count < 0)
  1514     {
  1515       while (true)
  1516         {
  1517           if (from == beg)
  1518             return 0;
  1519           dec_both (&from, &from_byte);
  1520           UPDATE_SYNTAX_TABLE_BACKWARD (from);
  1521           ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  1522           code = SYNTAX (ch1);
  1523           if (words_include_escapes
  1524               && (code == Sescape || code == Scharquote))
  1525             break;
  1526           if (code == Sword)
  1527             break;
  1528           rarely_quit (from);
  1529         }
  1530       /* Now CH1 is a character which ends a word and FROM is the
  1531          position of it.  */
  1532       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
  1533       if (! NILP (Ffboundp (func)))
  1534         {
  1535           pos = call2 (func, make_fixnum (from), make_fixnum (beg));
  1536           if (FIXNUMP (pos) && BEGV <= XFIXNUM (pos) && XFIXNUM (pos) < from)
  1537             {
  1538               from = XFIXNUM (pos);
  1539               from_byte = CHAR_TO_BYTE (from);
  1540             }
  1541         }
  1542       else
  1543         {
  1544           while (1)
  1545             {
  1546               if (from == beg)
  1547                 break;
  1548               dec_both (&from, &from_byte);
  1549               UPDATE_SYNTAX_TABLE_BACKWARD (from);
  1550               ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  1551               code = SYNTAX (ch0);
  1552               if ((code != Sword
  1553                    && (! words_include_escapes
  1554                        || (code != Sescape && code != Scharquote)))
  1555                   || word_boundary_p (ch0, ch1))
  1556                 {
  1557                   inc_both (&from, &from_byte);
  1558                   break;
  1559                 }
  1560               ch1 = ch0;
  1561               rarely_quit (from);
  1562             }
  1563         }
  1564       count++;
  1565     }
  1566 
  1567   return from;
  1568 }
  1569 
  1570 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
  1571        doc: /* Move point forward ARG words (backward if ARG is negative).
  1572 If ARG is omitted or nil, move point forward one word.
  1573 Normally returns t.
  1574 If an edge of the buffer or a field boundary is reached, point is
  1575 left there and the function returns nil.  Field boundaries are not
  1576 noticed if `inhibit-field-text-motion' is non-nil.
  1577 
  1578 The word boundaries are normally determined by the buffer's syntax
  1579 table and character script (according to `char-script-table'), but
  1580 `find-word-boundary-function-table', such as set up by `subword-mode',
  1581 can change that.  If a Lisp program needs to move by words determined
  1582 strictly by the syntax table, it should use `forward-word-strictly'
  1583 instead.  See Info node `(elisp) Word Motion' for details.  */)
  1584   (Lisp_Object arg)
  1585 {
  1586   Lisp_Object tmp;
  1587   ptrdiff_t orig_val, val;
  1588 
  1589   if (NILP (arg))
  1590     XSETFASTINT (arg, 1);
  1591   else
  1592     CHECK_FIXNUM (arg);
  1593 
  1594   val = orig_val = scan_words (PT, XFIXNUM (arg));
  1595   if (! orig_val)
  1596     val = XFIXNUM (arg) > 0 ? ZV : BEGV;
  1597 
  1598   /* Avoid jumping out of an input field.  */
  1599   tmp = Fconstrain_to_field (make_fixnum (val), make_fixnum (PT),
  1600                              Qnil, Qnil, Qnil);
  1601   val = XFIXNAT (tmp);
  1602 
  1603   SET_PT (val);
  1604   return val == orig_val ? Qt : Qnil;
  1605 }
  1606 
  1607 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
  1608        doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
  1609 STRING is like the inside of a `[...]' in a regular expression
  1610 except that `]' is never special and `\\' quotes `^', `-' or `\\'
  1611  (but not at the end of a range; quoting is never needed there).
  1612 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
  1613 With arg "^a-zA-Z", skips nonletters stopping before first letter.
  1614 Char classes, e.g. `[:alpha:]', are supported.
  1615 
  1616 Returns the distance traveled, either zero or positive.  */)
  1617   (Lisp_Object string, Lisp_Object lim)
  1618 {
  1619   return skip_chars (1, string, lim, 1);
  1620 }
  1621 
  1622 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
  1623        doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
  1624 See `skip-chars-forward' for details.
  1625 Returns the distance traveled, either zero or negative.  */)
  1626   (Lisp_Object string, Lisp_Object lim)
  1627 {
  1628   return skip_chars (0, string, lim, 1);
  1629 }
  1630 
  1631 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
  1632        doc: /* Move point forward across chars in specified syntax classes.
  1633 SYNTAX is a string of syntax code characters.
  1634 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
  1635 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
  1636 This function returns the distance traveled, either zero or positive.  */)
  1637   (Lisp_Object syntax, Lisp_Object lim)
  1638 {
  1639   return skip_syntaxes (1, syntax, lim);
  1640 }
  1641 
  1642 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
  1643        doc: /* Move point backward across chars in specified syntax classes.
  1644 SYNTAX is a string of syntax code characters.
  1645 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
  1646 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
  1647 This function returns either zero or a negative number, and the absolute value
  1648 of this is the distance traveled.  */)
  1649   (Lisp_Object syntax, Lisp_Object lim)
  1650 {
  1651   return skip_syntaxes (0, syntax, lim);
  1652 }
  1653 
  1654 static Lisp_Object
  1655 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
  1656             bool handle_iso_classes)
  1657 {
  1658   int c;
  1659   char fastmap[0400];
  1660   /* Store the ranges of non-ASCII characters.  */
  1661   int *char_ranges UNINIT;
  1662   int n_char_ranges = 0;
  1663   bool negate = 0;
  1664   ptrdiff_t i, i_byte;
  1665   /* True if the current buffer is multibyte and the region contains
  1666      non-ASCII chars.  */
  1667   bool multibyte;
  1668   /* True if STRING is multibyte and it contains non-ASCII chars.  */
  1669   bool string_multibyte;
  1670   ptrdiff_t size_byte;
  1671   const unsigned char *str;
  1672   int len;
  1673   Lisp_Object iso_classes;
  1674   USE_SAFE_ALLOCA;
  1675 
  1676   CHECK_STRING (string);
  1677   iso_classes = Qnil;
  1678 
  1679   if (NILP (lim))
  1680     XSETINT (lim, forwardp ? ZV : BEGV);
  1681   else
  1682     CHECK_FIXNUM_COERCE_MARKER (lim);
  1683 
  1684   /* In any case, don't allow scan outside bounds of buffer.  */
  1685   if (XFIXNUM (lim) > ZV)
  1686     XSETFASTINT (lim, ZV);
  1687   if (XFIXNUM (lim) < BEGV)
  1688     XSETFASTINT (lim, BEGV);
  1689 
  1690   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
  1691                && (XFIXNUM (lim) - PT != CHAR_TO_BYTE (XFIXNUM (lim)) - PT_BYTE));
  1692   string_multibyte = SBYTES (string) > SCHARS (string);
  1693 
  1694   memset (fastmap, 0, sizeof fastmap);
  1695 
  1696   str = SDATA (string);
  1697   size_byte = SBYTES (string);
  1698 
  1699   i_byte = 0;
  1700   if (i_byte < size_byte
  1701       && SREF (string, 0) == '^')
  1702     {
  1703       negate = 1; i_byte++;
  1704     }
  1705 
  1706   /* Find the characters specified and set their elements of fastmap.
  1707      Handle backslashes and ranges specially.
  1708 
  1709      If STRING contains non-ASCII characters, setup char_ranges for
  1710      them and use fastmap only for their leading codes.  */
  1711 
  1712   if (! string_multibyte)
  1713     {
  1714       bool string_has_eight_bit = 0;
  1715 
  1716       /* At first setup fastmap.  */
  1717       while (i_byte < size_byte)
  1718         {
  1719           if (handle_iso_classes)
  1720             {
  1721               const unsigned char *ch = str + i_byte;
  1722               re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
  1723               if (cc == 0)
  1724                 error ("Invalid ISO C character class");
  1725               if (cc != -1)
  1726                 {
  1727                   iso_classes = Fcons (make_fixnum (cc), iso_classes);
  1728                   i_byte = ch - str;
  1729                   continue;
  1730                 }
  1731             }
  1732 
  1733           c = str[i_byte++];
  1734 
  1735           if (c == '\\')
  1736             {
  1737               if (i_byte == size_byte)
  1738                 break;
  1739 
  1740               c = str[i_byte++];
  1741             }
  1742           /* Treat `-' as range character only if another character
  1743              follows.  */
  1744           if (i_byte + 1 < size_byte
  1745               && str[i_byte] == '-')
  1746             {
  1747               int c2;
  1748 
  1749               /* Skip over the dash.  */
  1750               i_byte++;
  1751 
  1752               /* Get the end of the range.  */
  1753               c2 = str[i_byte++];
  1754               if (c2 == '\\'
  1755                   && i_byte < size_byte)
  1756                 c2 = str[i_byte++];
  1757 
  1758               if (c <= c2)
  1759                 {
  1760                   int lim2 = c2 + 1;
  1761                   while (c < lim2)
  1762                     fastmap[c++] = 1;
  1763                   if (! ASCII_CHAR_P (c2))
  1764                     string_has_eight_bit = 1;
  1765                 }
  1766             }
  1767           else
  1768             {
  1769               fastmap[c] = 1;
  1770               if (! ASCII_CHAR_P (c))
  1771                 string_has_eight_bit = 1;
  1772             }
  1773         }
  1774 
  1775       /* If the current range is multibyte and STRING contains
  1776          eight-bit chars, arrange fastmap and setup char_ranges for
  1777          the corresponding multibyte chars.  */
  1778       if (multibyte && string_has_eight_bit)
  1779         {
  1780           char *p1;
  1781           char himap[0200 + 1];
  1782           memcpy (himap, fastmap + 0200, 0200);
  1783           himap[0200] = 0;
  1784           memset (fastmap + 0200, 0, 0200);
  1785           SAFE_NALLOCA (char_ranges, 2, 128);
  1786           i = 0;
  1787 
  1788           while ((p1 = memchr (himap + i, 1, 0200 - i)))
  1789             {
  1790               /* Deduce the next range C..C2 from the next clump of 1s
  1791                  in HIMAP starting with &HIMAP[I].  HIMAP is the high
  1792                  order half of the old FASTMAP.  */
  1793               int c2, leading_code;
  1794               i = p1 - himap;
  1795               c = BYTE8_TO_CHAR (i + 0200);
  1796               i += strlen (p1);
  1797               c2 = BYTE8_TO_CHAR (i + 0200 - 1);
  1798 
  1799               char_ranges[n_char_ranges++] = c;
  1800               char_ranges[n_char_ranges++] = c2;
  1801               leading_code = CHAR_LEADING_CODE (c);
  1802               memset (fastmap + leading_code, 1,
  1803                       CHAR_LEADING_CODE (c2) - leading_code + 1);
  1804             }
  1805         }
  1806     }
  1807   else                          /* STRING is multibyte */
  1808     {
  1809       SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
  1810 
  1811       while (i_byte < size_byte)
  1812         {
  1813           int leading_code = str[i_byte];
  1814 
  1815           if (handle_iso_classes)
  1816             {
  1817               const unsigned char *ch = str + i_byte;
  1818               re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
  1819               if (cc == 0)
  1820                 error ("Invalid ISO C character class");
  1821               if (cc != -1)
  1822                 {
  1823                   iso_classes = Fcons (make_fixnum (cc), iso_classes);
  1824                   i_byte = ch - str;
  1825                   continue;
  1826                 }
  1827             }
  1828 
  1829           if (leading_code== '\\')
  1830             {
  1831               if (++i_byte == size_byte)
  1832                 break;
  1833 
  1834               leading_code = str[i_byte];
  1835             }
  1836           c = string_char_and_length (str + i_byte, &len);
  1837           i_byte += len;
  1838 
  1839 
  1840           /* Treat `-' as range character only if another character
  1841              follows.  */
  1842           if (i_byte + 1 < size_byte
  1843               && str[i_byte] == '-')
  1844             {
  1845               int c2, leading_code2;
  1846 
  1847               /* Skip over the dash.  */
  1848               i_byte++;
  1849 
  1850               /* Get the end of the range.  */
  1851               leading_code2 = str[i_byte];
  1852               c2 = string_char_and_length (str + i_byte, &len);
  1853               i_byte += len;
  1854 
  1855               if (c2 == '\\'
  1856                   && i_byte < size_byte)
  1857                 {
  1858                   leading_code2 = str[i_byte];
  1859                   c2 = string_char_and_length (str + i_byte, &len);
  1860                   i_byte += len;
  1861                 }
  1862 
  1863               if (c > c2)
  1864                 continue;
  1865               if (ASCII_CHAR_P (c))
  1866                 {
  1867                   while (c <= c2 && c < 0x80)
  1868                     fastmap[c++] = 1;
  1869                   leading_code = CHAR_LEADING_CODE (c);
  1870                 }
  1871               if (! ASCII_CHAR_P (c))
  1872                 {
  1873                   int lim2 = leading_code2 + 1;
  1874                   while (leading_code < lim2)
  1875                     fastmap[leading_code++] = 1;
  1876                   if (c <= c2)
  1877                     {
  1878                       char_ranges[n_char_ranges++] = c;
  1879                       char_ranges[n_char_ranges++] = c2;
  1880                     }
  1881                 }
  1882             }
  1883           else
  1884             {
  1885               if (ASCII_CHAR_P (c))
  1886                 fastmap[c] = 1;
  1887               else
  1888                 {
  1889                   fastmap[leading_code] = 1;
  1890                   char_ranges[n_char_ranges++] = c;
  1891                   char_ranges[n_char_ranges++] = c;
  1892                 }
  1893             }
  1894         }
  1895 
  1896       /* If the current range is unibyte and STRING contains non-ASCII
  1897          chars, arrange fastmap for the corresponding unibyte
  1898          chars.  */
  1899 
  1900       if (! multibyte && n_char_ranges > 0)
  1901         {
  1902           memset (fastmap + 0200, 0, 0200);
  1903           for (i = 0; i < n_char_ranges; i += 2)
  1904             {
  1905               int c1 = char_ranges[i];
  1906               int lim2 = char_ranges[i + 1] + 1;
  1907 
  1908               for (; c1 < lim2; c1++)
  1909                 {
  1910                   int b = CHAR_TO_BYTE_SAFE (c1);
  1911                   if (b >= 0)
  1912                     fastmap[b] = 1;
  1913                 }
  1914             }
  1915         }
  1916     }
  1917 
  1918   /* If ^ was the first character, complement the fastmap.  */
  1919   if (negate)
  1920     {
  1921       if (! multibyte)
  1922         for (i = 0; i < sizeof fastmap; i++)
  1923           fastmap[i] ^= 1;
  1924       else
  1925         {
  1926           for (i = 0; i < 0200; i++)
  1927             fastmap[i] ^= 1;
  1928           /* All non-ASCII chars possibly match.  */
  1929           for (; i < sizeof fastmap; i++)
  1930             fastmap[i] = 1;
  1931         }
  1932     }
  1933 
  1934   {
  1935     ptrdiff_t start_point = PT;
  1936     ptrdiff_t pos = PT;
  1937     ptrdiff_t pos_byte = PT_BYTE;
  1938     unsigned char *p = PT_ADDR, *endp, *stop;
  1939 
  1940     if (forwardp)
  1941       {
  1942         endp = (XFIXNUM (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XFIXNUM (lim));
  1943         stop = (pos < GPT && GPT < XFIXNUM (lim)) ? GPT_ADDR : endp;
  1944       }
  1945     else
  1946       {
  1947         endp = CHAR_POS_ADDR (XFIXNUM (lim));
  1948         stop = (pos >= GPT && GPT > XFIXNUM (lim)) ? GAP_END_ADDR : endp;
  1949       }
  1950 
  1951     /* This code may look up syntax tables using functions that rely on the
  1952        gl_state object.  To make sure this object is not out of date,
  1953        let's initialize it manually.
  1954        We ignore syntax-table text-properties for now, since that's
  1955        what we've done in the past.  */
  1956     SETUP_BUFFER_SYNTAX_TABLE ();
  1957     if (forwardp)
  1958       {
  1959         if (multibyte)
  1960           while (1)
  1961             {
  1962               int nbytes;
  1963 
  1964               if (p >= stop)
  1965                 {
  1966                   if (p >= endp)
  1967                     break;
  1968                   p = GAP_END_ADDR;
  1969                   stop = endp;
  1970                 }
  1971               c = string_char_and_length (p, &nbytes);
  1972               if (! NILP (iso_classes) && in_classes (c, iso_classes))
  1973                 {
  1974                   if (negate)
  1975                     break;
  1976                   else
  1977                     goto fwd_ok;
  1978                 }
  1979 
  1980               if (! fastmap[*p])
  1981                 break;
  1982               if (! ASCII_CHAR_P (c))
  1983                 {
  1984                   /* As we are looking at a multibyte character, we
  1985                      must look up the character in the table
  1986                      CHAR_RANGES.  If there's no data in the table,
  1987                      that character is not what we want to skip.  */
  1988 
  1989                   /* The following code do the right thing even if
  1990                      n_char_ranges is zero (i.e. no data in
  1991                      CHAR_RANGES).  */
  1992                   for (i = 0; i < n_char_ranges; i += 2)
  1993                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
  1994                       break;
  1995                   if (!(negate ^ (i < n_char_ranges)))
  1996                     break;
  1997                 }
  1998             fwd_ok:
  1999               p += nbytes, pos++, pos_byte += nbytes;
  2000               rarely_quit (pos);
  2001             }
  2002         else
  2003           while (true)
  2004             {
  2005               if (p >= stop)
  2006                 {
  2007                   if (p >= endp)
  2008                     break;
  2009                   p = GAP_END_ADDR;
  2010                   stop = endp;
  2011                 }
  2012 
  2013               if (!NILP (iso_classes) && in_classes (*p, iso_classes))
  2014                 {
  2015                   if (negate)
  2016                     break;
  2017                   else
  2018                     goto fwd_unibyte_ok;
  2019                 }
  2020 
  2021               if (!fastmap[*p])
  2022                 break;
  2023             fwd_unibyte_ok:
  2024               p++, pos++, pos_byte++;
  2025               rarely_quit (pos);
  2026             }
  2027       }
  2028     else
  2029       {
  2030         if (multibyte)
  2031           while (true)
  2032             {
  2033               if (p <= stop)
  2034                 {
  2035                   if (p <= endp)
  2036                     break;
  2037                   p = GPT_ADDR;
  2038                   stop = endp;
  2039                 }
  2040               unsigned char *prev_p = p;
  2041               do
  2042                 p--;
  2043               while (stop <= p && ! CHAR_HEAD_P (*p));
  2044 
  2045               c = STRING_CHAR (p);
  2046 
  2047               if (! NILP (iso_classes) && in_classes (c, iso_classes))
  2048                 {
  2049                   if (negate)
  2050                     break;
  2051                   else
  2052                     goto back_ok;
  2053                 }
  2054 
  2055               if (! fastmap[*p])
  2056                 break;
  2057               if (! ASCII_CHAR_P (c))
  2058                 {
  2059                   /* See the comment in the previous similar code.  */
  2060                   for (i = 0; i < n_char_ranges; i += 2)
  2061                     if (c >= char_ranges[i] && c <= char_ranges[i + 1])
  2062                       break;
  2063                   if (!(negate ^ (i < n_char_ranges)))
  2064                     break;
  2065                 }
  2066             back_ok:
  2067               pos--, pos_byte -= prev_p - p;
  2068               rarely_quit (pos);
  2069             }
  2070         else
  2071           while (true)
  2072             {
  2073               if (p <= stop)
  2074                 {
  2075                   if (p <= endp)
  2076                     break;
  2077                   p = GPT_ADDR;
  2078                   stop = endp;
  2079                 }
  2080 
  2081               if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
  2082                 {
  2083                   if (negate)
  2084                     break;
  2085                   else
  2086                     goto back_unibyte_ok;
  2087                 }
  2088 
  2089               if (!fastmap[p[-1]])
  2090                 break;
  2091             back_unibyte_ok:
  2092               p--, pos--, pos_byte--;
  2093               rarely_quit (pos);
  2094             }
  2095       }
  2096 
  2097     SET_PT_BOTH (pos, pos_byte);
  2098 
  2099     SAFE_FREE ();
  2100     return make_fixnum (PT - start_point);
  2101   }
  2102 }
  2103 
  2104 
  2105 static Lisp_Object
  2106 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
  2107 {
  2108   int c;
  2109   unsigned char fastmap[0400];
  2110   bool negate = 0;
  2111   ptrdiff_t i, i_byte;
  2112   bool multibyte;
  2113   ptrdiff_t size_byte;
  2114   unsigned char *str;
  2115 
  2116   CHECK_STRING (string);
  2117 
  2118   if (NILP (lim))
  2119     XSETINT (lim, forwardp ? ZV : BEGV);
  2120   else
  2121     CHECK_FIXNUM_COERCE_MARKER (lim);
  2122 
  2123   /* In any case, don't allow scan outside bounds of buffer.  */
  2124   if (XFIXNUM (lim) > ZV)
  2125     XSETFASTINT (lim, ZV);
  2126   if (XFIXNUM (lim) < BEGV)
  2127     XSETFASTINT (lim, BEGV);
  2128 
  2129   if (forwardp ? (PT >= XFIXNAT (lim)) : (PT <= XFIXNAT (lim)))
  2130     return make_fixnum (0);
  2131 
  2132   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
  2133                && (XFIXNUM (lim) - PT != CHAR_TO_BYTE (XFIXNUM (lim)) - PT_BYTE));
  2134 
  2135   memset (fastmap, 0, sizeof fastmap);
  2136 
  2137   if (SBYTES (string) > SCHARS (string))
  2138     /* As this is very rare case (syntax spec is ASCII only), don't
  2139        consider efficiency.  */
  2140     string = string_make_unibyte (string);
  2141 
  2142   str = SDATA (string);
  2143   size_byte = SBYTES (string);
  2144 
  2145   i_byte = 0;
  2146   if (i_byte < size_byte
  2147       && SREF (string, 0) == '^')
  2148     {
  2149       negate = 1; i_byte++;
  2150     }
  2151 
  2152   /* Find the syntaxes specified and set their elements of fastmap.  */
  2153 
  2154   while (i_byte < size_byte)
  2155     {
  2156       c = str[i_byte++];
  2157       fastmap[syntax_spec_code[c]] = 1;
  2158     }
  2159 
  2160   /* If ^ was the first character, complement the fastmap.  */
  2161   if (negate)
  2162     for (i = 0; i < sizeof fastmap; i++)
  2163       fastmap[i] ^= 1;
  2164 
  2165   {
  2166     ptrdiff_t start_point = PT;
  2167     ptrdiff_t pos = PT;
  2168     ptrdiff_t pos_byte = PT_BYTE;
  2169     unsigned char *p, *endp, *stop;
  2170 
  2171     SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
  2172 
  2173     if (forwardp)
  2174       {
  2175         while (true)
  2176           {
  2177             p = BYTE_POS_ADDR (pos_byte);
  2178             endp = XFIXNUM (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XFIXNUM (lim));
  2179             stop = pos < GPT && GPT < XFIXNUM (lim) ? GPT_ADDR : endp;
  2180 
  2181             do
  2182               {
  2183                 int nbytes;
  2184 
  2185                 if (p >= stop)
  2186                   {
  2187                     if (p >= endp)
  2188                       goto done;
  2189                     p = GAP_END_ADDR;
  2190                     stop = endp;
  2191                   }
  2192                 if (multibyte)
  2193                   c = string_char_and_length (p, &nbytes);
  2194                 else
  2195                   c = *p, nbytes = 1;
  2196                 if (! fastmap[SYNTAX (c)])
  2197                   goto done;
  2198                 p += nbytes, pos++, pos_byte += nbytes;
  2199                 rarely_quit (pos);
  2200               }
  2201             while (!parse_sexp_lookup_properties
  2202                    || pos < gl_state.e_property);
  2203 
  2204             update_syntax_table_forward (pos + gl_state.offset,
  2205                                          false, gl_state.object);
  2206           }
  2207       }
  2208     else
  2209       {
  2210         p = BYTE_POS_ADDR (pos_byte);
  2211         endp = CHAR_POS_ADDR (XFIXNUM (lim));
  2212         stop = pos >= GPT && GPT > XFIXNUM (lim) ? GAP_END_ADDR : endp;
  2213 
  2214         if (multibyte)
  2215           {
  2216             while (true)
  2217               {
  2218                 if (p <= stop)
  2219                   {
  2220                     if (p <= endp)
  2221                       break;
  2222                     p = GPT_ADDR;
  2223                     stop = endp;
  2224                   }
  2225                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
  2226 
  2227                 unsigned char *prev_p = p;
  2228                 do
  2229                   p--;
  2230                 while (stop <= p && ! CHAR_HEAD_P (*p));
  2231 
  2232                 c = STRING_CHAR (p);
  2233                 if (! fastmap[SYNTAX (c)])
  2234                   break;
  2235                 pos--, pos_byte -= prev_p - p;
  2236                 rarely_quit (pos);
  2237               }
  2238           }
  2239         else
  2240           {
  2241             while (true)
  2242               {
  2243                 if (p <= stop)
  2244                   {
  2245                     if (p <= endp)
  2246                       break;
  2247                     p = GPT_ADDR;
  2248                     stop = endp;
  2249                   }
  2250                 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
  2251                 if (! fastmap[SYNTAX (p[-1])])
  2252                   break;
  2253                 p--, pos--, pos_byte--;
  2254                 rarely_quit (pos);
  2255               }
  2256           }
  2257       }
  2258 
  2259   done:
  2260     SET_PT_BOTH (pos, pos_byte);
  2261 
  2262     return make_fixnum (PT - start_point);
  2263   }
  2264 }
  2265 
  2266 /* Return true if character C belongs to one of the ISO classes
  2267    in the list ISO_CLASSES.  Each class is represented by an
  2268    integer which is its type according to re_wctype.  */
  2269 
  2270 static bool
  2271 in_classes (int c, Lisp_Object iso_classes)
  2272 {
  2273   bool fits_class = 0;
  2274 
  2275   while (CONSP (iso_classes))
  2276     {
  2277       Lisp_Object elt;
  2278       elt = XCAR (iso_classes);
  2279       iso_classes = XCDR (iso_classes);
  2280 
  2281       if (re_iswctype (c, XFIXNAT (elt)))
  2282         fits_class = 1;
  2283     }
  2284 
  2285   return fits_class;
  2286 }
  2287 
  2288 /* Jump over a comment, assuming we are at the beginning of one.
  2289    FROM is the current position.
  2290    FROM_BYTE is the bytepos corresponding to FROM.
  2291    Do not move past STOP (a charpos).
  2292    The comment over which we have to jump is of style STYLE
  2293      (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
  2294    NESTING should be positive to indicate the nesting at the beginning
  2295      for nested comments and should be zero or negative else.
  2296      ST_COMMENT_STYLE cannot be nested.
  2297    PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
  2298      (or 0 if the search cannot start in the middle of a two-character).
  2299 
  2300    If successful, return true and store the charpos of the comment's
  2301    end into *CHARPOS_PTR and the corresponding bytepos into
  2302    *BYTEPOS_PTR.  Else, return false and store the charpos STOP into
  2303    *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
  2304    current nesting (as defined for state->incomment) in
  2305    *INCOMMENT_PTR.  Should the last character scanned in an incomplete
  2306    comment be a possible first character of a two character construct,
  2307    we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr.  Otherwise,
  2308    we store Smax into *last_syntax_ptr.
  2309 
  2310    The comment end is the last character of the comment rather than the
  2311    character just after the comment.
  2312 
  2313    Global syntax data is assumed to initially be valid for FROM and
  2314    remains valid for forward search starting at the returned position. */
  2315 
  2316 static bool
  2317 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
  2318               EMACS_INT nesting, int style, int prev_syntax,
  2319               ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
  2320               EMACS_INT *incomment_ptr, int *last_syntax_ptr)
  2321 {
  2322   unsigned short int quit_count = 0;
  2323   int c, c1;
  2324   enum syntaxcode code;
  2325   int syntax, other_syntax;
  2326 
  2327   if (nesting <= 0) nesting = -1;
  2328 
  2329   /* Enter the loop in the middle so that we find
  2330      a 2-char comment ender if we start in the middle of it.  */
  2331   syntax = prev_syntax;
  2332   code = syntax & 0xff;
  2333   if (syntax != 0 && from < stop) goto forw_incomment;
  2334 
  2335   while (1)
  2336     {
  2337       if (from == stop)
  2338         {
  2339           *incomment_ptr = nesting;
  2340           *charpos_ptr = from;
  2341           *bytepos_ptr = from_byte;
  2342           *last_syntax_ptr =
  2343             (code == Sescape || code == Scharquote
  2344              || SYNTAX_FLAGS_COMEND_FIRST (syntax)
  2345              || (nesting > 0
  2346                  && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
  2347             ? syntax : Smax ;
  2348           return 0;
  2349         }
  2350       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2351       syntax = SYNTAX_WITH_FLAGS (c);
  2352       code = syntax & 0xff;
  2353       if (code == Sendcomment
  2354           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
  2355           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
  2356               (nesting > 0 && --nesting == 0) : nesting < 0)
  2357           && !(comment_end_can_be_escaped && char_quoted (from, from_byte)))
  2358         /* We have encountered a comment end of the same style
  2359            as the comment sequence which began this comment
  2360            section.  */
  2361         break;
  2362       if (code == Scomment_fence
  2363           && style == ST_COMMENT_STYLE)
  2364         /* We have encountered a comment end of the same style
  2365            as the comment sequence which began this comment
  2366            section.  */
  2367         break;
  2368       if (nesting > 0
  2369           && code == Scomment
  2370           && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
  2371           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
  2372         /* We have encountered a nested comment of the same style
  2373            as the comment sequence which began this comment section.  */
  2374         nesting++;
  2375       if (comment_end_can_be_escaped
  2376           && (code == Sescape || code == Scharquote))
  2377         {
  2378           inc_both (&from, &from_byte);
  2379           UPDATE_SYNTAX_TABLE_FORWARD (from);
  2380           if (from == stop) continue; /* Failure */
  2381         }
  2382       inc_both (&from, &from_byte);
  2383       UPDATE_SYNTAX_TABLE_FORWARD (from);
  2384 
  2385     forw_incomment:
  2386       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
  2387           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
  2388               other_syntax = SYNTAX_WITH_FLAGS (c1),
  2389               SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
  2390           && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
  2391           && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
  2392                SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
  2393               ? nesting > 0 : nesting < 0))
  2394         {
  2395           syntax = Smax;        /* So that "|#" (lisp) can not return
  2396                                    the syntax of "#" in *last_syntax_ptr. */
  2397           if (--nesting <= 0)
  2398             /* We have encountered a comment end of the same style
  2399                as the comment sequence which began this comment section.  */
  2400             break;
  2401           else
  2402             {
  2403               inc_both (&from, &from_byte);
  2404               UPDATE_SYNTAX_TABLE_FORWARD (from);
  2405             }
  2406         }
  2407       if (nesting > 0
  2408           && from < stop
  2409           && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
  2410           && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
  2411               other_syntax = SYNTAX_WITH_FLAGS (c1),
  2412               SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
  2413               && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
  2414           && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
  2415               SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
  2416         /* We have encountered a nested comment of the same style
  2417            as the comment sequence which began this comment section.  */
  2418         {
  2419           syntax = Smax; /* So that "#|#" isn't also a comment ender. */
  2420           inc_both (&from, &from_byte);
  2421           UPDATE_SYNTAX_TABLE_FORWARD (from);
  2422           nesting++;
  2423         }
  2424 
  2425       rarely_quit (++quit_count);
  2426     }
  2427   *charpos_ptr = from;
  2428   *bytepos_ptr = from_byte;
  2429   *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
  2430                               used up. */
  2431   return 1;
  2432 }
  2433 
  2434 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
  2435        doc: /*
  2436 Move forward across up to COUNT comments.  If COUNT is negative, move backward.
  2437 Stop scanning if we find something other than a comment or whitespace.
  2438 Set point to where scanning stops.
  2439 If COUNT comments are found as expected, with nothing except whitespace
  2440 between them, return t; otherwise return nil.  */)
  2441   (Lisp_Object count)
  2442 {
  2443   ptrdiff_t from, from_byte, stop;
  2444   int c, c1;
  2445   enum syntaxcode code;
  2446   int comstyle = 0;         /* style of comment encountered */
  2447   bool comnested = 0;       /* whether the comment is nestable or not */
  2448   bool found;
  2449   EMACS_INT count1;
  2450   ptrdiff_t out_charpos, out_bytepos;
  2451   EMACS_INT dummy;
  2452   int dummy2;
  2453   unsigned short int quit_count = 0;
  2454 
  2455   CHECK_FIXNUM (count);
  2456   count1 = XFIXNUM (count);
  2457   stop = count1 > 0 ? ZV : BEGV;
  2458 
  2459   from = PT;
  2460   from_byte = PT_BYTE;
  2461 
  2462   SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count1, PTRDIFF_MAX));
  2463   while (count1 > 0)
  2464     {
  2465       do
  2466         {
  2467           bool comstart_first;
  2468           int syntax, other_syntax;
  2469 
  2470           if (from == stop)
  2471             {
  2472               SET_PT_BOTH (from, from_byte);
  2473               return Qnil;
  2474             }
  2475           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2476           syntax = SYNTAX_WITH_FLAGS (c);
  2477           code = SYNTAX (c);
  2478           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
  2479           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
  2480           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
  2481           inc_both (&from, &from_byte);
  2482           UPDATE_SYNTAX_TABLE_FORWARD (from);
  2483           if (from < stop && comstart_first
  2484               && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
  2485                   other_syntax = SYNTAX_WITH_FLAGS (c1),
  2486                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
  2487             {
  2488               /* We have encountered a comment start sequence and we
  2489                  are ignoring all text inside comments.  We must record
  2490                  the comment style this sequence begins so that later,
  2491                  only a comment end of the same style actually ends
  2492                  the comment section.  */
  2493               code = Scomment;
  2494               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
  2495               comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
  2496               inc_both (&from, &from_byte);
  2497               UPDATE_SYNTAX_TABLE_FORWARD (from);
  2498             }
  2499           rarely_quit (++quit_count);
  2500         }
  2501       while (code == Swhitespace || (code == Sendcomment && c == '\n'));
  2502 
  2503       if (code == Scomment_fence)
  2504         comstyle = ST_COMMENT_STYLE;
  2505       else if (code != Scomment)
  2506         {
  2507           dec_both (&from, &from_byte);
  2508           SET_PT_BOTH (from, from_byte);
  2509           return Qnil;
  2510         }
  2511       /* We're at the start of a comment.  */
  2512       found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
  2513                             &out_charpos, &out_bytepos, &dummy, &dummy2);
  2514       from = out_charpos; from_byte = out_bytepos;
  2515       if (!found)
  2516         {
  2517           SET_PT_BOTH (from, from_byte);
  2518           return Qnil;
  2519         }
  2520       inc_both (&from, &from_byte);
  2521       UPDATE_SYNTAX_TABLE_FORWARD (from);
  2522       /* We have skipped one comment.  */
  2523       count1--;
  2524     }
  2525 
  2526   while (count1 < 0)
  2527     {
  2528       while (true)
  2529         {
  2530           if (from <= stop)
  2531             {
  2532               SET_PT_BOTH (BEGV, BEGV_BYTE);
  2533               return Qnil;
  2534             }
  2535 
  2536           dec_both (&from, &from_byte);
  2537           /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
  2538           bool quoted = char_quoted (from, from_byte);
  2539           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2540           int syntax = SYNTAX_WITH_FLAGS (c);
  2541           code = SYNTAX (c);
  2542           comstyle = 0;
  2543           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
  2544           if (code == Sendcomment)
  2545             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
  2546           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
  2547               && prev_char_comend_first (from, from_byte)
  2548               && !char_quoted (from - 1, dec_bytepos (from_byte)))
  2549             {
  2550               int other_syntax;
  2551               /* We must record the comment style encountered so that
  2552                  later, we can match only the proper comment begin
  2553                  sequence of the same style.  */
  2554               dec_both (&from, &from_byte);
  2555               code = Sendcomment;
  2556               /* Calling char_quoted, above, set up global syntax position
  2557                  at the new value of FROM.  */
  2558               c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2559               other_syntax = SYNTAX_WITH_FLAGS (c1);
  2560               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
  2561               comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
  2562             }
  2563 
  2564           if (code == Scomment_fence)
  2565             {
  2566               /* Skip until first preceding unquoted comment_fence.  */
  2567               bool fence_found = 0;
  2568               ptrdiff_t ini = from, ini_byte = from_byte;
  2569 
  2570               if (from > stop)
  2571                 {
  2572                   while (1)
  2573                     {
  2574                       dec_both (&from, &from_byte);
  2575                       UPDATE_SYNTAX_TABLE_BACKWARD (from);
  2576                       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2577                       if (SYNTAX (c) == Scomment_fence
  2578                           && !char_quoted (from, from_byte))
  2579                         {
  2580                           fence_found = 1;
  2581                           break;
  2582                         }
  2583                       else if (from == stop)
  2584                         break;
  2585                       rarely_quit (++quit_count);
  2586                     }
  2587                 }
  2588               if (fence_found == 0)
  2589                 {
  2590                   from = ini;           /* Set point to ini + 1.  */
  2591                   from_byte = ini_byte;
  2592                   goto leave;
  2593                 }
  2594               else
  2595                 /* We have skipped one comment.  */
  2596                 break;
  2597             }
  2598           else if (code == Sendcomment)
  2599             {
  2600               found = (!quoted || !comment_end_can_be_escaped)
  2601                 && back_comment (from, from_byte, stop, comnested, comstyle,
  2602                                  &out_charpos, &out_bytepos);
  2603               if (!found)
  2604                 {
  2605                   if (c == '\n')
  2606                     /* This end-of-line is not an end-of-comment.
  2607                        Treat it like a whitespace.
  2608                        CC-mode (and maybe others) relies on this behavior.  */
  2609                     ;
  2610                   else
  2611                     {
  2612                       /* Failure: we should go back to the end of this
  2613                          not-quite-endcomment.  */
  2614                       if (SYNTAX (c) != code)
  2615                         /* It was a two-char Sendcomment.  */
  2616                         inc_both (&from, &from_byte);
  2617                       goto leave;
  2618                     }
  2619                 }
  2620               else
  2621                 {
  2622                   /* We have skipped one comment.  */
  2623                   from = out_charpos, from_byte = out_bytepos;
  2624                   break;
  2625                 }
  2626             }
  2627           else if (code != Swhitespace || quoted)
  2628             {
  2629             leave:
  2630               inc_both (&from, &from_byte);
  2631               SET_PT_BOTH (from, from_byte);
  2632               return Qnil;
  2633             }
  2634 
  2635           rarely_quit (++quit_count);
  2636         }
  2637 
  2638       count1++;
  2639     }
  2640 
  2641   SET_PT_BOTH (from, from_byte);
  2642   return Qt;
  2643 }
  2644 
  2645 /* Return syntax code of character C if C is an ASCII character
  2646    or if MULTIBYTE_SYMBOL_P is false.  Otherwise, return Ssymbol.  */
  2647 
  2648 static enum syntaxcode
  2649 syntax_multibyte (int c, bool multibyte_symbol_p)
  2650 {
  2651   return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
  2652 }
  2653 
  2654 static Lisp_Object
  2655 scan_lists (EMACS_INT from0, EMACS_INT count, EMACS_INT depth, bool sexpflag)
  2656 {
  2657   Lisp_Object val;
  2658   ptrdiff_t stop = count > 0 ? ZV : BEGV;
  2659   int c, c1;
  2660   int stringterm;
  2661   bool quoted;
  2662   bool mathexit = 0;
  2663   enum syntaxcode code;
  2664   EMACS_INT min_depth = depth;  /* Err out if depth gets less than this.  */
  2665   int comstyle = 0;             /* Style of comment encountered.  */
  2666   bool comnested = 0;           /* Whether the comment is nestable or not.  */
  2667   ptrdiff_t temp_pos;
  2668   EMACS_INT last_good = from0;
  2669   bool found;
  2670   ptrdiff_t from_byte;
  2671   ptrdiff_t out_bytepos, out_charpos;
  2672   EMACS_INT dummy;
  2673   int dummy2;
  2674   bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
  2675   unsigned short int quit_count = 0;
  2676 
  2677   if (depth > 0) min_depth = 0;
  2678 
  2679   ptrdiff_t from = clip_to_bounds (BEGV, from0, ZV);
  2680 
  2681   from_byte = CHAR_TO_BYTE (from);
  2682 
  2683   maybe_quit ();
  2684 
  2685   SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count, PTRDIFF_MAX));
  2686   while (count > 0)
  2687     {
  2688       while (from < stop)
  2689         {
  2690           rarely_quit (++quit_count);
  2691           bool comstart_first, prefix;
  2692           int syntax, other_syntax;
  2693           UPDATE_SYNTAX_TABLE_FORWARD (from);
  2694           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2695           syntax = SYNTAX_WITH_FLAGS (c);
  2696           code = syntax_multibyte (c, multibyte_symbol_p);
  2697           comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
  2698           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
  2699           comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
  2700           prefix = SYNTAX_FLAGS_PREFIX (syntax);
  2701           if (depth == min_depth)
  2702             last_good = from;
  2703           inc_both (&from, &from_byte);
  2704           UPDATE_SYNTAX_TABLE_FORWARD (from);
  2705           if (from < stop && comstart_first
  2706               && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
  2707                   other_syntax = SYNTAX_WITH_FLAGS (c),
  2708                   SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
  2709               && parse_sexp_ignore_comments)
  2710             {
  2711               /* We have encountered a comment start sequence and we
  2712                  are ignoring all text inside comments.  We must record
  2713                  the comment style this sequence begins so that later,
  2714                  only a comment end of the same style actually ends
  2715                  the comment section.  */
  2716               code = Scomment;
  2717               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
  2718               comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
  2719               inc_both (&from, &from_byte);
  2720               UPDATE_SYNTAX_TABLE_FORWARD (from);
  2721             }
  2722 
  2723           if (prefix)
  2724             continue;
  2725 
  2726           switch (code)
  2727             {
  2728             case Sescape:
  2729             case Scharquote:
  2730               if (from == stop)
  2731                 goto lose;
  2732               inc_both (&from, &from_byte);
  2733               /* Treat following character as a word constituent.  */
  2734               FALLTHROUGH;
  2735             case Sword:
  2736             case Ssymbol:
  2737               if (depth || !sexpflag) break;
  2738               /* This word counts as a sexp; return at end of it.  */
  2739               while (from < stop)
  2740                 {
  2741                   UPDATE_SYNTAX_TABLE_FORWARD (from);
  2742 
  2743                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2744                   switch (syntax_multibyte (c, multibyte_symbol_p))
  2745                     {
  2746                     case Scharquote:
  2747                     case Sescape:
  2748                       inc_both (&from, &from_byte);
  2749                       if (from == stop)
  2750                         goto lose;
  2751                       break;
  2752                     case Sword:
  2753                     case Ssymbol:
  2754                     case Squote:
  2755                       break;
  2756                     default:
  2757                       goto done;
  2758                     }
  2759                   inc_both (&from, &from_byte);
  2760                   rarely_quit (++quit_count);
  2761                 }
  2762               goto done;
  2763 
  2764             case Scomment_fence:
  2765               comstyle = ST_COMMENT_STYLE;
  2766               FALLTHROUGH;
  2767             case Scomment:
  2768               if (!parse_sexp_ignore_comments) break;
  2769               UPDATE_SYNTAX_TABLE_FORWARD (from);
  2770               found = forw_comment (from, from_byte, stop,
  2771                                     comnested, comstyle, 0,
  2772                                     &out_charpos, &out_bytepos, &dummy,
  2773                                     &dummy2);
  2774               from = out_charpos, from_byte = out_bytepos;
  2775               if (!found)
  2776                 {
  2777                   if (depth == 0)
  2778                     goto done;
  2779                   goto lose;
  2780                 }
  2781               inc_both (&from, &from_byte);
  2782               UPDATE_SYNTAX_TABLE_FORWARD (from);
  2783               break;
  2784 
  2785             case Smath:
  2786               if (!sexpflag)
  2787                 break;
  2788               if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
  2789                 {
  2790                   inc_both (&from, &from_byte);
  2791                 }
  2792               if (mathexit)
  2793                 {
  2794                   mathexit = 0;
  2795                   goto close1;
  2796                 }
  2797               mathexit = 1;
  2798               FALLTHROUGH;
  2799             case Sopen:
  2800               if (!++depth) goto done;
  2801               break;
  2802 
  2803             case Sclose:
  2804             close1:
  2805               if (!--depth) goto done;
  2806               if (depth < min_depth)
  2807                 xsignal3 (Qscan_error,
  2808                           build_string ("Containing expression ends prematurely"),
  2809                           make_fixnum (last_good), make_fixnum (from));
  2810               break;
  2811 
  2812             case Sstring:
  2813             case Sstring_fence:
  2814               temp_pos = dec_bytepos (from_byte);
  2815               stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
  2816               while (1)
  2817                 {
  2818                   enum syntaxcode c_code;
  2819                   if (from >= stop)
  2820                     goto lose;
  2821                   UPDATE_SYNTAX_TABLE_FORWARD (from);
  2822                   c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2823                   c_code = syntax_multibyte (c, multibyte_symbol_p);
  2824                   if (code == Sstring
  2825                       ? c == stringterm && c_code == Sstring
  2826                       : c_code == Sstring_fence)
  2827                     break;
  2828 
  2829                   if (c_code == Scharquote || c_code == Sescape)
  2830                     inc_both (&from, &from_byte);
  2831                   inc_both (&from, &from_byte);
  2832                   rarely_quit (++quit_count);
  2833                 }
  2834               inc_both (&from, &from_byte);
  2835               if (!depth && sexpflag) goto done;
  2836               break;
  2837             default:
  2838               /* Ignore whitespace, punctuation, quote, endcomment.  */
  2839               break;
  2840             }
  2841         }
  2842 
  2843       /* Reached end of buffer.  Error if within object, return nil if between */
  2844       if (depth)
  2845         goto lose;
  2846 
  2847       return Qnil;
  2848 
  2849       /* End of object reached */
  2850     done:
  2851       count--;
  2852     }
  2853 
  2854   while (count < 0)
  2855     {
  2856       while (from > stop)
  2857         {
  2858           rarely_quit (++quit_count);
  2859           dec_both (&from, &from_byte);
  2860           UPDATE_SYNTAX_TABLE_BACKWARD (from);
  2861           c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2862           int syntax = SYNTAX_WITH_FLAGS (c);
  2863           code = syntax_multibyte (c, multibyte_symbol_p);
  2864           if (depth == min_depth)
  2865             last_good = from;
  2866           comstyle = 0;
  2867           comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
  2868           if (code == Sendcomment)
  2869             comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
  2870           if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
  2871               && prev_char_comend_first (from, from_byte)
  2872               && parse_sexp_ignore_comments)
  2873             {
  2874               /* We must record the comment style encountered so that
  2875                  later, we can match only the proper comment begin
  2876                  sequence of the same style.  */
  2877               int c2, other_syntax;
  2878               dec_both (&from, &from_byte);
  2879               UPDATE_SYNTAX_TABLE_BACKWARD (from);
  2880               code = Sendcomment;
  2881               c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2882               other_syntax = SYNTAX_WITH_FLAGS (c2);
  2883               comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
  2884               comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
  2885             }
  2886 
  2887           /* Quoting turns anything except a comment-ender
  2888              into a word character.  Note that this cannot be true
  2889              if we decremented FROM in the if-statement above.  */
  2890           if (code != Sendcomment && char_quoted (from, from_byte))
  2891             {
  2892               dec_both (&from, &from_byte);
  2893               code = Sword;
  2894             }
  2895           else if (SYNTAX_FLAGS_PREFIX (syntax))
  2896             continue;
  2897 
  2898           switch (code)
  2899             {
  2900             case Sword:
  2901             case Ssymbol:
  2902             case Sescape:
  2903             case Scharquote:
  2904               if (depth || !sexpflag) break;
  2905               /* This word counts as a sexp; count object finished
  2906                  after passing it.  */
  2907               while (from > stop)
  2908                 {
  2909                   temp_pos = dec_bytepos (from_byte);
  2910                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
  2911                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
  2912                   /* Don't allow comment-end to be quoted.  */
  2913                   if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
  2914                     goto done2;
  2915                   quoted = char_quoted (from - 1, temp_pos);
  2916                   if (quoted)
  2917                     {
  2918                       dec_both (&from, &from_byte);
  2919                       temp_pos = dec_bytepos (temp_pos);
  2920                       UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
  2921                     }
  2922                   c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
  2923                   if (! quoted)
  2924                     switch (syntax_multibyte (c1, multibyte_symbol_p))
  2925                       {
  2926                       case Sword: case Ssymbol: case Squote: break;
  2927                       default: goto done2;
  2928                       }
  2929                   dec_both (&from, &from_byte);
  2930                   rarely_quit (++quit_count);
  2931                 }
  2932               goto done2;
  2933 
  2934             case Smath:
  2935               if (!sexpflag)
  2936                 break;
  2937               if (from > BEGV)
  2938                 {
  2939                   temp_pos = dec_bytepos (from_byte);
  2940                   UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
  2941                   if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
  2942                     dec_both (&from, &from_byte);
  2943                 }
  2944               if (mathexit)
  2945                 {
  2946                   mathexit = 0;
  2947                   goto open2;
  2948                 }
  2949               mathexit = 1;
  2950               FALLTHROUGH;
  2951             case Sclose:
  2952               if (!++depth) goto done2;
  2953               break;
  2954 
  2955             case Sopen:
  2956             open2:
  2957               if (!--depth) goto done2;
  2958               if (depth < min_depth)
  2959                 xsignal3 (Qscan_error,
  2960                           build_string ("Containing expression ends prematurely"),
  2961                           make_fixnum (last_good), make_fixnum (from));
  2962               break;
  2963 
  2964             case Sendcomment:
  2965               if (!parse_sexp_ignore_comments)
  2966                 break;
  2967               found = back_comment (from, from_byte, stop, comnested, comstyle,
  2968                                     &out_charpos, &out_bytepos);
  2969               /* FIXME:  if !found, it really wasn't a comment-end.
  2970                  For single-char Sendcomment, we can't do much about it apart
  2971                  from skipping the char.
  2972                  For 2-char endcomments, we could try again, taking both
  2973                  chars as separate entities, but it's a lot of trouble
  2974                  for very little gain, so we don't bother either.  -sm */
  2975               if (found)
  2976                 from = out_charpos, from_byte = out_bytepos;
  2977               break;
  2978 
  2979             case Scomment_fence:
  2980             case Sstring_fence:
  2981               while (1)
  2982                 {
  2983                   if (from == stop)
  2984                     goto lose;
  2985                   dec_both (&from, &from_byte);
  2986                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
  2987                   if (!char_quoted (from, from_byte))
  2988                     {
  2989                       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  2990                       if (syntax_multibyte (c, multibyte_symbol_p) == code)
  2991                         break;
  2992                     }
  2993                   rarely_quit (++quit_count);
  2994                 }
  2995               if (code == Sstring_fence && !depth && sexpflag) goto done2;
  2996               break;
  2997 
  2998             case Sstring:
  2999               stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  3000               while (true)
  3001                 {
  3002                   if (from == stop)
  3003                     goto lose;
  3004                   dec_both (&from, &from_byte);
  3005                   UPDATE_SYNTAX_TABLE_BACKWARD (from);
  3006                   if (!char_quoted (from, from_byte))
  3007                     {
  3008                       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  3009                       if (c == stringterm
  3010                           && (syntax_multibyte (c, multibyte_symbol_p)
  3011                               == Sstring))
  3012                         break;
  3013                     }
  3014                   rarely_quit (++quit_count);
  3015                 }
  3016               if (!depth && sexpflag) goto done2;
  3017               break;
  3018             default:
  3019               /* Ignore whitespace, punctuation, quote, endcomment.  */
  3020               break;
  3021             }
  3022         }
  3023 
  3024       /* Reached start of buffer.  Error if within object, return nil if between */
  3025       if (depth)
  3026         goto lose;
  3027 
  3028       return Qnil;
  3029 
  3030     done2:
  3031       count++;
  3032     }
  3033 
  3034 
  3035   XSETFASTINT (val, from);
  3036   return val;
  3037 
  3038  lose:
  3039   xsignal3 (Qscan_error,
  3040             build_string ("Unbalanced parentheses"),
  3041             make_fixnum (last_good), make_fixnum (from));
  3042 }
  3043 
  3044 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
  3045        doc: /* Scan from character number FROM by COUNT lists.
  3046 Scan forward if COUNT is positive, backward if COUNT is negative.
  3047 Return the character number of the position thus found.
  3048 
  3049 A \"list", in this context, refers to a balanced parenthetical
  3050 grouping, as determined by the syntax table.
  3051 
  3052 If DEPTH is nonzero, treat that as the nesting depth of the starting
  3053 point (i.e. the starting point is DEPTH parentheses deep).  This
  3054 function scans over parentheses until the depth goes to zero COUNT
  3055 times.  Hence, positive DEPTH moves out that number of levels of
  3056 parentheses, while negative DEPTH moves to a deeper level.
  3057 
  3058 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
  3059 
  3060 If we reach the beginning or end of the accessible part of the buffer
  3061 before we have scanned over COUNT lists, return nil if the depth at
  3062 that point is zero, and signal an error if the depth is nonzero.  */)
  3063   (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
  3064 {
  3065   CHECK_FIXNUM (from);
  3066   CHECK_FIXNUM (count);
  3067   CHECK_FIXNUM (depth);
  3068 
  3069   return scan_lists (XFIXNUM (from), XFIXNUM (count), XFIXNUM (depth), 0);
  3070 }
  3071 
  3072 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
  3073        doc: /* Scan from character number FROM by COUNT balanced expressions.
  3074 If COUNT is negative, scan backwards.
  3075 Returns the character number of the position thus found.
  3076 
  3077 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
  3078 
  3079 If the beginning or end of (the accessible part of) the buffer is reached
  3080 in the middle of a parenthetical grouping, an error is signaled.
  3081 If the beginning or end is reached between groupings
  3082 but before count is used up, nil is returned.  */)
  3083   (Lisp_Object from, Lisp_Object count)
  3084 {
  3085   CHECK_FIXNUM (from);
  3086   CHECK_FIXNUM (count);
  3087 
  3088   return scan_lists (XFIXNUM (from), XFIXNUM (count), 0, 1);
  3089 }
  3090 
  3091 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
  3092        0, 0, 0,
  3093        doc: /* Move point backward over any number of chars with prefix syntax.
  3094 This includes chars with expression prefix syntax class (\\=') and those with
  3095 the prefix syntax flag (p).  */)
  3096   (void)
  3097 {
  3098   ptrdiff_t beg = BEGV;
  3099   ptrdiff_t opoint = PT;
  3100   ptrdiff_t opoint_byte = PT_BYTE;
  3101   ptrdiff_t pos = PT;
  3102   ptrdiff_t pos_byte = PT_BYTE;
  3103   int c;
  3104 
  3105   if (pos <= beg)
  3106     {
  3107       SET_PT_BOTH (opoint, opoint_byte);
  3108 
  3109       return Qnil;
  3110     }
  3111 
  3112   SETUP_SYNTAX_TABLE (pos, -1);
  3113 
  3114   dec_both (&pos, &pos_byte);
  3115 
  3116   while (!char_quoted (pos, pos_byte)
  3117          /* Previous statement updates syntax table.  */
  3118          && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
  3119              || syntax_prefix_flag_p (c)))
  3120     {
  3121       opoint = pos;
  3122       opoint_byte = pos_byte;
  3123 
  3124       if (pos <= beg)
  3125         break;
  3126       dec_both (&pos, &pos_byte);
  3127       rarely_quit (pos);
  3128     }
  3129 
  3130   SET_PT_BOTH (opoint, opoint_byte);
  3131 
  3132   return Qnil;
  3133 }
  3134 
  3135 
  3136 /* If the character at FROM_BYTE is the second part of a 2-character
  3137    comment opener based on PREV_FROM_SYNTAX, update STATE and return
  3138    true.  */
  3139 static bool
  3140 in_2char_comment_start (struct lisp_parse_state *state,
  3141                         int prev_from_syntax,
  3142                         ptrdiff_t prev_from,
  3143                         ptrdiff_t from_byte)
  3144 {
  3145   int c1, syntax;
  3146   if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
  3147       && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
  3148           syntax = SYNTAX_WITH_FLAGS (c1),
  3149           SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
  3150     {
  3151       /* Record the comment style we have entered so that only
  3152          the comment-end sequence of the same style actually
  3153          terminates the comment section.  */
  3154       state->comstyle
  3155         = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
  3156       bool comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
  3157                         | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
  3158       state->incomment = comnested ? 1 : -1;
  3159       state->comstr_start = prev_from;
  3160       return true;
  3161     }
  3162   return false;
  3163 }
  3164 
  3165 /* Parse forward from FROM / FROM_BYTE to END,
  3166    assuming that FROM has state STATE,
  3167    and return a description of the state of the parse at END.
  3168    If STOPBEFORE, stop at the start of an atom.
  3169    If COMMENTSTOP is 1, stop at the start of a comment.
  3170    If COMMENTSTOP is -1, stop at the start or end of a comment,
  3171    after the beginning of a string, or after the end of a string.  */
  3172 
  3173 static void
  3174 scan_sexps_forward (struct lisp_parse_state *state,
  3175                     ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
  3176                     EMACS_INT targetdepth, bool stopbefore,
  3177                     int commentstop)
  3178 {
  3179   enum syntaxcode code;
  3180   struct level { ptrdiff_t last, prev; };
  3181   struct level levelstart[100];
  3182   struct level *curlevel = levelstart;
  3183   struct level *endlevel = levelstart + 100;
  3184   EMACS_INT depth;      /* Paren depth of current scanning location.
  3185                            level - levelstart equals this except
  3186                            when the depth becomes negative.  */
  3187   EMACS_INT mindepth;           /* Lowest DEPTH value seen.  */
  3188   bool start_quoted = 0;        /* True means starting after a char quote.  */
  3189   Lisp_Object tem;
  3190   ptrdiff_t prev_from;          /* Keep one character before FROM.  */
  3191   ptrdiff_t prev_from_byte;
  3192   int prev_from_syntax, prev_prev_from_syntax;
  3193   bool boundary_stop = commentstop == -1;
  3194   bool nofence;
  3195   bool found;
  3196   ptrdiff_t out_bytepos, out_charpos;
  3197   int temp;
  3198   unsigned short int quit_count = 0;
  3199   ptrdiff_t started_from = from;
  3200 
  3201   prev_from = from;
  3202   prev_from_byte = from_byte;
  3203   if (from != BEGV)
  3204     dec_both (&prev_from, &prev_from_byte);
  3205 
  3206   /* Use this macro instead of `from++'.  */
  3207 #define INC_FROM                                \
  3208 do { prev_from = from;                          \
  3209      prev_from_byte = from_byte;                \
  3210      temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);   \
  3211      prev_prev_from_syntax = prev_from_syntax;  \
  3212      prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
  3213      inc_both (&from, &from_byte);              \
  3214      if (from < end)                            \
  3215        UPDATE_SYNTAX_TABLE_FORWARD (from);      \
  3216   } while (0)
  3217 
  3218   maybe_quit ();
  3219 
  3220   depth = state->depth;
  3221   start_quoted = state->quoted;
  3222   prev_prev_from_syntax = Smax;
  3223   prev_from_syntax = state->prev_syntax;
  3224 
  3225   tem = state->levelstarts;
  3226   while (!NILP (tem))           /* >= second enclosing sexps.  */
  3227     {
  3228       Lisp_Object temhd = Fcar (tem);
  3229       if (RANGED_FIXNUMP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
  3230         curlevel->last = XFIXNUM (temhd);
  3231       if (++curlevel == endlevel)
  3232         curlevel--; /* error ("Nesting too deep for parser"); */
  3233       curlevel->prev = -1;
  3234       curlevel->last = -1;
  3235       tem = Fcdr (tem);
  3236     }
  3237   curlevel->prev = -1;
  3238   curlevel->last = -1;
  3239 
  3240   state->quoted = 0;
  3241   mindepth = depth;
  3242 
  3243   SETUP_SYNTAX_TABLE (from, 1);
  3244 
  3245   /* Enter the loop at a place appropriate for initial state.  */
  3246 
  3247   if (state->incomment)
  3248     goto startincomment;
  3249   if (state->instring >= 0)
  3250     {
  3251       nofence = state->instring != ST_STRING_STYLE;
  3252       if (start_quoted)
  3253         goto startquotedinstring;
  3254       goto startinstring;
  3255     }
  3256   else if (start_quoted)
  3257     goto startquoted;
  3258   else if ((from < end)
  3259            && (in_2char_comment_start (state, prev_from_syntax,
  3260                                        prev_from, from_byte)))
  3261     {
  3262       INC_FROM;
  3263       prev_from_syntax = Smax; /* the syntax has already been "used up". */
  3264       goto atcomment;
  3265     }
  3266 
  3267   while (from < end)
  3268     {
  3269       rarely_quit (++quit_count);
  3270       INC_FROM;
  3271 
  3272       if ((from < end)
  3273           && (in_2char_comment_start (state, prev_from_syntax,
  3274                                       prev_from, from_byte)))
  3275         {
  3276           INC_FROM;
  3277           prev_from_syntax = Smax; /* the syntax has already been "used up". */
  3278           goto atcomment;
  3279         }
  3280 
  3281       if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
  3282         continue;
  3283       code = prev_from_syntax & 0xff;
  3284       switch (code)
  3285         {
  3286         case Sescape:
  3287         case Scharquote:
  3288           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
  3289           curlevel->last = prev_from;
  3290         startquoted:
  3291           if (from == end) goto endquoted;
  3292           INC_FROM;
  3293           goto symstarted;
  3294           /* treat following character as a word constituent */
  3295         case Sword:
  3296         case Ssymbol:
  3297           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
  3298           curlevel->last = prev_from;
  3299         symstarted:
  3300           while (from < end)
  3301             {
  3302               if (in_2char_comment_start (state, prev_from_syntax,
  3303                                           prev_from, from_byte))
  3304                 {
  3305                   INC_FROM;
  3306                   prev_from_syntax = Smax; /* the syntax has already been "used up". */
  3307                   goto atcomment;
  3308                 }
  3309 
  3310               int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  3311               switch (SYNTAX (symchar))
  3312                 {
  3313                 case Scharquote:
  3314                 case Sescape:
  3315                   INC_FROM;
  3316                   if (from == end) goto endquoted;
  3317                   break;
  3318                 case Sword:
  3319                 case Ssymbol:
  3320                 case Squote:
  3321                   break;
  3322                 default:
  3323                   goto symdone;
  3324                 }
  3325               INC_FROM;
  3326               rarely_quit (++quit_count);
  3327             }
  3328         symdone:
  3329           curlevel->prev = curlevel->last;
  3330           break;
  3331 
  3332         case Scomment_fence:
  3333           /* Record the comment style we have entered so that only
  3334              the comment-end sequence of the same style actually
  3335              terminates the comment section.  */
  3336           state->comstyle = ST_COMMENT_STYLE;
  3337           state->incomment = -1;
  3338           state->comstr_start = prev_from;
  3339           goto atcomment;
  3340         case Scomment:
  3341           state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
  3342           state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
  3343                               1 : -1);
  3344           state->comstr_start = prev_from;
  3345         atcomment:
  3346           if (commentstop || boundary_stop) goto done;
  3347         startincomment:
  3348           /* The (from == BEGV) test was to enter the loop in the middle so
  3349              that we find a 2-char comment ender even if we start in the
  3350              middle of it.  We don't want to do that if we're just at the
  3351              beginning of the comment (think of (*) ... (*)).  */
  3352           found = forw_comment (from, from_byte, end,
  3353                                 state->incomment, state->comstyle,
  3354                                 from == BEGV ? 0 : prev_from_syntax,
  3355                                 &out_charpos, &out_bytepos, &state->incomment,
  3356                                 &prev_from_syntax);
  3357           from = out_charpos; from_byte = out_bytepos;
  3358           /* Beware!  prev_from and friends (except prev_from_syntax)
  3359              are invalid now.  Luckily, the `done' doesn't use them
  3360              and the INC_FROM sets them to a sane value without
  3361              looking at them. */
  3362           if (!found) goto done;
  3363           INC_FROM;
  3364           state->incomment = 0;
  3365           state->comstyle = 0;  /* reset the comment style */
  3366           prev_from_syntax = Smax; /* For the comment closer */
  3367           if (boundary_stop) goto done;
  3368           break;
  3369 
  3370         case Sopen:
  3371           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
  3372           depth++;
  3373           /* curlevel++->last ran into compiler bug on Apollo */
  3374           curlevel->last = prev_from;
  3375           if (++curlevel == endlevel)
  3376             curlevel--; /* error ("Nesting too deep for parser"); */
  3377           curlevel->prev = -1;
  3378           curlevel->last = -1;
  3379           if (targetdepth == depth) goto done;
  3380           break;
  3381 
  3382         case Sclose:
  3383           depth--;
  3384           if (depth < mindepth)
  3385             mindepth = depth;
  3386           if (curlevel != levelstart)
  3387             curlevel--;
  3388           curlevel->prev = curlevel->last;
  3389           if (targetdepth == depth) goto done;
  3390           break;
  3391 
  3392         case Sstring:
  3393         case Sstring_fence:
  3394           state->comstr_start = from - 1;
  3395           if (stopbefore) goto stop;  /* this arg means stop at sexp start */
  3396           curlevel->last = prev_from;
  3397           state->instring = (code == Sstring
  3398                             ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
  3399                             : ST_STRING_STYLE);
  3400           if (boundary_stop) goto done;
  3401         startinstring:
  3402           {
  3403             nofence = state->instring != ST_STRING_STYLE;
  3404 
  3405             while (1)
  3406               {
  3407                 int c;
  3408                 enum syntaxcode c_code;
  3409 
  3410                 if (from >= end) goto done;
  3411                 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
  3412                 c_code = SYNTAX (c);
  3413 
  3414                 /* Check C_CODE here so that if the char has
  3415                    a syntax-table property which says it is NOT
  3416                    a string character, it does not end the string.  */
  3417                 if (nofence && c == state->instring && c_code == Sstring)
  3418                   break;
  3419 
  3420                 switch (c_code)
  3421                   {
  3422                   case Sstring_fence:
  3423                     if (!nofence) goto string_end;
  3424                     break;
  3425 
  3426                   case Scharquote:
  3427                   case Sescape:
  3428                     INC_FROM;
  3429                   startquotedinstring:
  3430                     if (from >= end) goto endquoted;
  3431                     break;
  3432 
  3433                   default:
  3434                     break;
  3435                   }
  3436                 INC_FROM;
  3437                 rarely_quit (++quit_count);
  3438               }
  3439           }
  3440         string_end:
  3441           state->instring = -1;
  3442           curlevel->prev = curlevel->last;
  3443           INC_FROM;
  3444           if (boundary_stop) goto done;
  3445           break;
  3446 
  3447         case Smath:
  3448           /* FIXME: We should do something with it.  */
  3449           break;
  3450         default:
  3451           /* Ignore whitespace, punctuation, quote, endcomment.  */
  3452           break;
  3453         }
  3454     }
  3455   goto done;
  3456 
  3457  stop:   /* Here if stopping before start of sexp. */
  3458   from = prev_from;    /* We have just fetched the char that starts it; */
  3459   from_byte = prev_from_byte;
  3460   prev_from_syntax = prev_prev_from_syntax;
  3461   goto done; /* but return the position before it. */
  3462 
  3463  endquoted:
  3464   state->quoted = 1;
  3465  done:
  3466   state->depth = depth;
  3467   state->mindepth = mindepth;
  3468   state->thislevelstart = curlevel->prev;
  3469   state->prevlevelstart
  3470     = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
  3471   state->location = from;
  3472   state->location_byte = from_byte;
  3473   state->levelstarts = Qnil;
  3474   while (curlevel > levelstart)
  3475     state->levelstarts = Fcons (make_fixnum ((--curlevel)->last),
  3476                                 state->levelstarts);
  3477   state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
  3478                         || state->quoted) ? prev_from_syntax : Smax;
  3479 
  3480   /* The factor of 10 below is a heuristic that needs to be tuned.  It
  3481      means we consider 10 buffer positions examined by this function
  3482      roughly equivalent to the display engine iterating over a single
  3483      buffer position.  */
  3484   if (max_redisplay_ticks > 0 && from > started_from)
  3485     update_redisplay_ticks ((from - started_from) / 10 + 1, NULL);
  3486 }
  3487 
  3488 /* Convert a (lisp) parse state to the internal form used in
  3489    scan_sexps_forward.  */
  3490 static void
  3491 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
  3492 {
  3493   Lisp_Object tem;
  3494 
  3495   if (NILP (external))
  3496     {
  3497       state->depth = 0;
  3498       state->instring = -1;
  3499       state->incomment = 0;
  3500       state->quoted = 0;
  3501       state->comstyle = 0;      /* comment style a by default.  */
  3502       state->comstr_start = -1; /* no comment/string seen.  */
  3503       state->levelstarts = Qnil;
  3504       state->prev_syntax = Smax;
  3505     }
  3506   else
  3507     {
  3508       tem = Fcar (external);
  3509       state->depth = FIXNUMP (tem) ? XFIXNUM (tem) : 0;
  3510 
  3511       external = Fcdr (external);
  3512       external = Fcdr (external);
  3513       external = Fcdr (external);
  3514       tem = Fcar (external);
  3515       /* Check whether we are inside string_fence-style string: */
  3516       state->instring = (!NILP (tem)
  3517                          ? (CHARACTERP (tem) ? XFIXNAT (tem) : ST_STRING_STYLE)
  3518                          : -1);
  3519 
  3520       external = Fcdr (external);
  3521       tem = Fcar (external);
  3522       state->incomment = (!NILP (tem)
  3523                           ? (FIXNUMP (tem) ? XFIXNUM (tem) : -1)
  3524                           : 0);
  3525 
  3526       external = Fcdr (external);
  3527       tem = Fcar (external);
  3528       state->quoted = !NILP (tem);
  3529 
  3530       /* if the eighth element of the list is nil, we are in comment
  3531          style a.  If it is non-nil, we are in comment style b */
  3532       external = Fcdr (external);
  3533       external = Fcdr (external);
  3534       tem = Fcar (external);
  3535       state->comstyle = (NILP (tem)
  3536                          ? 0
  3537                          : (RANGED_FIXNUMP (0, tem, ST_COMMENT_STYLE)
  3538                             ? XFIXNUM (tem)
  3539                             : ST_COMMENT_STYLE));
  3540 
  3541       external = Fcdr (external);
  3542       tem = Fcar (external);
  3543       state->comstr_start =
  3544         RANGED_FIXNUMP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XFIXNUM (tem) : -1;
  3545       external = Fcdr (external);
  3546       tem = Fcar (external);
  3547       state->levelstarts = tem;
  3548 
  3549       external = Fcdr (external);
  3550       tem = Fcar (external);
  3551       state->prev_syntax = NILP (tem) ? Smax : XFIXNUM (tem);
  3552     }
  3553 }
  3554 
  3555 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
  3556        doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
  3557 Parsing stops at TO or when certain criteria are met;
  3558  point is set to where parsing stops.
  3559 
  3560 If OLDSTATE is omitted or nil, parsing assumes that FROM is the
  3561  beginning of a function.  If not, OLDSTATE should be the state at
  3562  FROM.
  3563 
  3564 Value is a list of elements describing final state of parsing:
  3565  0. depth in parens.
  3566  1. character address of start of innermost containing list; nil if none.
  3567  2. character address of start of last complete sexp terminated.
  3568  3. non-nil if inside a string.
  3569     (it is the character that will terminate the string,
  3570      or t if the string should be terminated by a generic string delimiter.)
  3571  4. nil if outside a comment, t if inside a non-nestable comment,
  3572     else an integer (the current comment nesting).
  3573  5. t if following a quote character.
  3574  6. the minimum paren-depth encountered during this scan.
  3575  7. style of comment, if any.
  3576  8. character address of start of comment or string; nil if not in one.
  3577  9. List of positions of currently open parens, outermost first.
  3578 10. When the last position scanned holds the first character of a
  3579     (potential) two character construct, the syntax of that position,
  3580     otherwise nil.  That construct can be a two character comment
  3581     delimiter or an Escaped or Char-quoted character.
  3582 11..... Possible further internal information used by `parse-partial-sexp'.
  3583 
  3584 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
  3585 in parentheses becomes equal to TARGETDEPTH.
  3586 Fourth arg STOPBEFORE non-nil means stop when we come to
  3587  any character that starts a sexp.
  3588 Fifth arg OLDSTATE is a list like what this function returns.
  3589  It is used to initialize the state of the parse.  Elements number 1, 2, 6
  3590  are ignored.
  3591 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
  3592  If it is the symbol `syntax-table', stop after the start of a comment or a
  3593  string, or after end of a comment or a string.  */)
  3594   (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
  3595    Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
  3596 {
  3597   struct lisp_parse_state state;
  3598   EMACS_INT target;
  3599 
  3600   if (!NILP (targetdepth))
  3601     {
  3602       CHECK_FIXNUM (targetdepth);
  3603       target = XFIXNUM (targetdepth);
  3604     }
  3605   else
  3606     target = TYPE_MINIMUM (EMACS_INT);  /* We won't reach this depth.  */
  3607 
  3608   if (fix_position (to) < fix_position (from))
  3609     error ("End position is smaller than start position");
  3610 
  3611   validate_region (&from, &to);
  3612   internalize_parse_state (oldstate, &state);
  3613   scan_sexps_forward (&state, XFIXNUM (from), CHAR_TO_BYTE (XFIXNUM (from)),
  3614                       XFIXNUM (to),
  3615                       target, !NILP (stopbefore),
  3616                       (NILP (commentstop)
  3617                        ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
  3618 
  3619   SET_PT_BOTH (state.location, state.location_byte);
  3620 
  3621   return
  3622     Fcons (make_fixnum (state.depth),
  3623            Fcons (state.prevlevelstart < 0
  3624                   ? Qnil : make_fixnum (state.prevlevelstart),
  3625              Fcons (state.thislevelstart < 0
  3626                     ? Qnil : make_fixnum (state.thislevelstart),
  3627                Fcons (state.instring >= 0
  3628                       ? (state.instring == ST_STRING_STYLE
  3629                          ? Qt : make_fixnum (state.instring)) : Qnil,
  3630                  Fcons (state.incomment < 0 ? Qt :
  3631                         (state.incomment == 0 ? Qnil :
  3632                          make_fixnum (state.incomment)),
  3633                    Fcons (state.quoted ? Qt : Qnil,
  3634                      Fcons (make_fixnum (state.mindepth),
  3635                        Fcons ((state.comstyle
  3636                                ? (state.comstyle == ST_COMMENT_STYLE
  3637                                   ? Qsyntax_table
  3638                                   : make_fixnum (state.comstyle))
  3639                                : Qnil),
  3640                          Fcons (((state.incomment
  3641                                   || (state.instring >= 0))
  3642                                  ? make_fixnum (state.comstr_start)
  3643                                  : Qnil),
  3644                            Fcons (state.levelstarts,
  3645                              Fcons (state.prev_syntax == Smax
  3646                                     ? Qnil
  3647                                     : make_fixnum (state.prev_syntax),
  3648                                 Qnil)))))))))));
  3649 }
  3650 
  3651 void
  3652 init_syntax_once (void)
  3653 {
  3654   register int i, c;
  3655   Lisp_Object temp;
  3656 
  3657   /* This has to be done here, before we call Fmake_char_table.  */
  3658   DEFSYM (Qsyntax_table, "syntax-table");
  3659 
  3660   /* Create objects which can be shared among syntax tables.  */
  3661   Vsyntax_code_object = make_nil_vector (Smax);
  3662   for (i = 0; i < Smax; i++)
  3663     ASET (Vsyntax_code_object, i, list1 (make_fixnum (i)));
  3664 
  3665   /* Now we are ready to set up this property, so we can
  3666      create syntax tables.  */
  3667   Fput (Qsyntax_table, Qchar_table_extra_slots, make_fixnum (0));
  3668 
  3669   temp = AREF (Vsyntax_code_object, Swhitespace);
  3670 
  3671   Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
  3672 
  3673   /* Control characters should not be whitespace.  */
  3674   temp = AREF (Vsyntax_code_object, Spunct);
  3675   for (i = 0; i <= ' ' - 1; i++)
  3676     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
  3677   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
  3678 
  3679   /* Except that a few really are whitespace.  */
  3680   temp = AREF (Vsyntax_code_object, Swhitespace);
  3681   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
  3682   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
  3683   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
  3684   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
  3685   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
  3686 
  3687   temp = AREF (Vsyntax_code_object, Sword);
  3688   for (i = 'a'; i <= 'z'; i++)
  3689     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
  3690   for (i = 'A'; i <= 'Z'; i++)
  3691     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
  3692   for (i = '0'; i <= '9'; i++)
  3693     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
  3694 
  3695   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
  3696   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
  3697 
  3698   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
  3699                         Fcons (make_fixnum (Sopen), make_fixnum (')')));
  3700   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
  3701                         Fcons (make_fixnum (Sclose), make_fixnum ('(')));
  3702   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
  3703                         Fcons (make_fixnum (Sopen), make_fixnum (']')));
  3704   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
  3705                         Fcons (make_fixnum (Sclose), make_fixnum ('[')));
  3706   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
  3707                         Fcons (make_fixnum (Sopen), make_fixnum ('}')));
  3708   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
  3709                         Fcons (make_fixnum (Sclose), make_fixnum ('{')));
  3710   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
  3711                         Fcons (make_fixnum (Sstring), Qnil));
  3712   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
  3713                         Fcons (make_fixnum (Sescape), Qnil));
  3714 
  3715   temp = AREF (Vsyntax_code_object, Ssymbol);
  3716   for (i = 0; i < 10; i++)
  3717     {
  3718       c = "_-+*/&|<>="[i];
  3719       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
  3720     }
  3721 
  3722   temp = AREF (Vsyntax_code_object, Spunct);
  3723   for (i = 0; i < 12; i++)
  3724     {
  3725       c = ".,;:?!#@~^'`"[i];
  3726       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
  3727     }
  3728 
  3729   /* All multibyte characters have syntax `word' by default.  */
  3730   temp = AREF (Vsyntax_code_object, Sword);
  3731   char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
  3732 }
  3733 
  3734 void
  3735 syms_of_syntax (void)
  3736 {
  3737   DEFSYM (Qsyntax_table_p, "syntax-table-p");
  3738   DEFSYM (Qsyntax_ppss, "syntax-ppss");
  3739   DEFVAR_LISP ("comment-use-syntax-ppss",
  3740                Vcomment_use_syntax_ppss,
  3741                doc: /* Non-nil means `forward-comment' can use `syntax-ppss' internally.  */);
  3742   Vcomment_use_syntax_ppss = Qt;
  3743 
  3744   staticpro (&Vsyntax_code_object);
  3745 
  3746   staticpro (&gl_state.object);
  3747   staticpro (&gl_state.global_code);
  3748   staticpro (&gl_state.current_syntax_table);
  3749   staticpro (&gl_state.old_prop);
  3750 
  3751   DEFSYM (Qscan_error, "scan-error");
  3752   Fput (Qscan_error, Qerror_conditions,
  3753         pure_list (Qscan_error, Qerror));
  3754   Fput (Qscan_error, Qerror_message,
  3755         build_pure_c_string ("Scan error"));
  3756 
  3757   DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
  3758                doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
  3759 
  3760   DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
  3761                doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
  3762 Otherwise, that text property is simply ignored.
  3763 See the info node `(elisp)Syntax Properties' for a description of the
  3764 `syntax-table' property.  */);
  3765 
  3766   DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
  3767               doc: /* Position up to which syntax-table properties have been set.  */);
  3768   syntax_propertize__done = -1;
  3769   DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
  3770   Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
  3771 
  3772   words_include_escapes = 0;
  3773   DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
  3774                doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
  3775 
  3776   DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
  3777                doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
  3778   multibyte_syntax_as_symbol = 0;
  3779 
  3780   DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
  3781                open_paren_in_column_0_is_defun_start,
  3782                doc: /* Non-nil means an open paren in column 0 denotes the start of a defun.  */);
  3783   open_paren_in_column_0_is_defun_start = 1;
  3784 
  3785 
  3786   DEFVAR_LISP ("find-word-boundary-function-table",
  3787                Vfind_word_boundary_function_table,
  3788                doc: /*
  3789 Char table of functions to search for the word boundary.
  3790 Each function is called with two arguments; POS and LIMIT.
  3791 POS and LIMIT are character positions in the current buffer.
  3792 
  3793 If POS is less than LIMIT, POS is at the first character of a word,
  3794 and the return value of a function should be a position after the
  3795 last character of that word.
  3796 
  3797 If POS is not less than LIMIT, POS is at the last character of a word,
  3798 and the return value of a function should be a position at the first
  3799 character of that word.
  3800 
  3801 In both cases, LIMIT bounds the search. */);
  3802   Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
  3803 
  3804   DEFVAR_BOOL ("comment-end-can-be-escaped", comment_end_can_be_escaped,
  3805                doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment.  */);
  3806   comment_end_can_be_escaped = false;
  3807   DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
  3808   Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
  3809 
  3810   defsubr (&Ssyntax_table_p);
  3811   defsubr (&Ssyntax_table);
  3812   defsubr (&Sstandard_syntax_table);
  3813   defsubr (&Scopy_syntax_table);
  3814   defsubr (&Sset_syntax_table);
  3815   defsubr (&Schar_syntax);
  3816   defsubr (&Ssyntax_class_to_char);
  3817   defsubr (&Smatching_paren);
  3818   defsubr (&Sstring_to_syntax);
  3819   defsubr (&Smodify_syntax_entry);
  3820   defsubr (&Sinternal_describe_syntax_value);
  3821 
  3822   defsubr (&Sforward_word);
  3823 
  3824   defsubr (&Sskip_chars_forward);
  3825   defsubr (&Sskip_chars_backward);
  3826   defsubr (&Sskip_syntax_forward);
  3827   defsubr (&Sskip_syntax_backward);
  3828 
  3829   defsubr (&Sforward_comment);
  3830   defsubr (&Sscan_lists);
  3831   defsubr (&Sscan_sexps);
  3832   defsubr (&Sbackward_prefix_chars);
  3833   defsubr (&Sparse_partial_sexp);
  3834 }

/* [<][>][^][v][top][bottom][index][help] */