src/search.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
matcher_overflow
freeze_buffer_relocation
compile_pattern_1
shrink_regexp_cache
clear_regexp_cache
unfreeze_pattern
freeze_pattern
compile_pattern
looking_at_1
string_match_1
fast_string_match_internal
fast_c_string_match_internal
fast_looking_at
newline_cache_on_off
find_newline
scan_newline
scan_newline_from_point
find_newline_no_quit
find_before_next_newline
search_command
trivial_regexp_p
search_buffer_re
search_buffer_non_re
search_buffer
simple_search
boyer_moore
set_search_regs
match_limit
DEFUN
DEFUN
DEFUN
save_search_regs
restore_search_regs
update_search_regs
unwind_set_match_data
record_unwind_save_match_data
DEFUN
find_newline1
DEFUN
syms_of_search
syms_of_search_for_pdumper
     1 /* String search routines for GNU Emacs.
     2 
     3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2023 Free Software
     4 Foundation, Inc.
     5 
     6 This file is part of GNU Emacs.
     7 
     8 GNU Emacs is free software: you can redistribute it and/or modify
     9 it under the terms of the GNU General Public License as published by
    10 the Free Software Foundation, either version 3 of the License, or (at
    11 your option) any later version.
    12 
    13 GNU Emacs is distributed in the hope that it will be useful,
    14 but WITHOUT ANY WARRANTY; without even the implied warranty of
    15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    16 GNU General Public License for more details.
    17 
    18 You should have received a copy of the GNU General Public License
    19 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    20 
    21 
    22 #include <config.h>
    23 
    24 #include "lisp.h"
    25 #include "character.h"
    26 #include "buffer.h"
    27 #include "syntax.h"
    28 #include "charset.h"
    29 #include "region-cache.h"
    30 #include "blockinput.h"
    31 #include "intervals.h"
    32 #include "pdumper.h"
    33 #include "composite.h"
    34 
    35 #include "regex-emacs.h"
    36 
    37 #define REGEXP_CACHE_SIZE 20
    38 
    39 /* If the regexp is non-nil, then the buffer contains the compiled form
    40    of that regexp, suitable for searching.  */
    41 struct regexp_cache
    42 {
    43   struct regexp_cache *next;
    44   Lisp_Object regexp, f_whitespace_regexp;
    45   /* Syntax table for which the regexp applies.  We need this because
    46      of character classes.  If this is t, then the compiled pattern is valid
    47      for any syntax-table.  */
    48   Lisp_Object syntax_table;
    49   struct re_pattern_buffer buf;
    50   char fastmap[0400];
    51   /* True means regexp was compiled to do full POSIX backtracking.  */
    52   bool posix;
    53   /* True means we're inside a buffer match.  */
    54   bool busy;
    55 };
    56 
    57 /* The instances of that struct.  */
    58 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
    59 
    60 /* The head of the linked list; points to the most recently used buffer.  */
    61 static struct regexp_cache *searchbuf_head;
    62 
    63 static void set_search_regs (ptrdiff_t, ptrdiff_t);
    64 static void save_search_regs (void);
    65 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
    66                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
    67                                 ptrdiff_t, ptrdiff_t);
    68 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
    69                               Lisp_Object, Lisp_Object, ptrdiff_t,
    70                               ptrdiff_t, int);
    71 
    72 Lisp_Object re_match_object;
    73 
    74 static AVOID
    75 matcher_overflow (void)
    76 {
    77   error ("Stack overflow in regexp matcher");
    78 }
    79 
    80 static void
    81 freeze_buffer_relocation (void)
    82 {
    83 #ifdef REL_ALLOC
    84   /* Prevent ralloc.c from relocating the current buffer while
    85      searching it.  */
    86   r_alloc_inhibit_buffer_relocation (1);
    87   record_unwind_protect_int (r_alloc_inhibit_buffer_relocation, 0);
    88 #endif
    89 }
    90 
    91 /* Compile a regexp and signal a Lisp error if anything goes wrong.
    92    PATTERN is the pattern to compile.
    93    CP is the place to put the result.
    94    TRANSLATE is a translation table for ignoring case, or nil for none.
    95    POSIX is true if we want full backtracking (POSIX style) for this pattern.
    96    False means backtrack only enough to get a valid match.
    97 
    98    The behavior also depends on Vsearch_spaces_regexp.  */
    99 
   100 static void
   101 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
   102                    Lisp_Object translate, bool posix)
   103 {
   104   const char *whitespace_regexp;
   105   char *val;
   106 
   107   eassert (!cp->busy);
   108   cp->regexp = Qnil;
   109   cp->buf.translate = translate;
   110   cp->posix = posix;
   111   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
   112   cp->buf.charset_unibyte = charset_unibyte;
   113   if (STRINGP (Vsearch_spaces_regexp))
   114     cp->f_whitespace_regexp = Vsearch_spaces_regexp;
   115   else
   116     cp->f_whitespace_regexp = Qnil;
   117 
   118   whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ?
   119     SSDATA (Vsearch_spaces_regexp) : NULL;
   120 
   121   val = (char *) re_compile_pattern (SSDATA (pattern), SBYTES (pattern),
   122                                      posix, whitespace_regexp, &cp->buf);
   123 
   124   /* If the compiled pattern hard codes some of the contents of the
   125      syntax-table, it can only be reused with *this* syntax table.  */
   126   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
   127 
   128   if (val)
   129     xsignal1 (Qinvalid_regexp, build_string (val));
   130 
   131   cp->regexp = Fcopy_sequence (pattern);
   132 }
   133 
   134 /* Shrink each compiled regexp buffer in the cache
   135    to the size actually used right now.
   136    This is called from garbage collection.  */
   137 
   138 void
   139 shrink_regexp_cache (void)
   140 {
   141   struct regexp_cache *cp;
   142 
   143   for (cp = searchbuf_head; cp != 0; cp = cp->next)
   144     if (!cp->busy)
   145       {
   146         cp->buf.allocated = cp->buf.used;
   147         cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
   148       }
   149 }
   150 
   151 /* Clear the regexp cache w.r.t. a particular syntax table,
   152    because it was changed.
   153    There is no danger of memory leak here because re_compile_pattern
   154    automagically manages the memory in each re_pattern_buffer struct,
   155    based on its `allocated' and `buffer' values.  */
   156 void
   157 clear_regexp_cache (void)
   158 {
   159   int i;
   160 
   161   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
   162     /* It's tempting to compare with the syntax-table we've actually changed,
   163        but it's not sufficient because char-table inheritance means that
   164        modifying one syntax-table can change others at the same time.  */
   165     if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
   166       searchbufs[i].regexp = Qnil;
   167 }
   168 
   169 static void
   170 unfreeze_pattern (void *arg)
   171 {
   172   struct regexp_cache *searchbuf = arg;
   173   searchbuf->busy = false;
   174 }
   175 
   176 static void
   177 freeze_pattern (struct regexp_cache *searchbuf)
   178 {
   179   eassert (!searchbuf->busy);
   180   record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
   181   searchbuf->busy = true;
   182 }
   183 
   184 /* Compile a regexp if necessary, but first check to see if there's one in
   185    the cache.
   186    PATTERN is the pattern to compile.
   187    TRANSLATE is a translation table for ignoring case, or nil for none.
   188    REGP is the structure that says where to store the "register"
   189    values that will result from matching this pattern.
   190    If it is 0, we should compile the pattern not to record any
   191    subexpression bounds.
   192    POSIX is true if we want full backtracking (POSIX style) for this pattern.
   193    False means backtrack only enough to get a valid match.  */
   194 
   195 static struct regexp_cache *
   196 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
   197                  Lisp_Object translate, bool posix, bool multibyte)
   198 {
   199   struct regexp_cache *cp, **cpp, **lru_nonbusy;
   200 
   201   for (cpp = &searchbuf_head, lru_nonbusy = NULL; ; cpp = &cp->next)
   202     {
   203       cp = *cpp;
   204       if (!cp->busy)
   205         lru_nonbusy = cpp;
   206       /* Entries are initialized to nil, and may be set to nil by
   207          compile_pattern_1 if the pattern isn't valid.  Don't apply
   208          string accessors in those cases.  However, compile_pattern_1
   209          is only applied to the cache entry we pick here to reuse.  So
   210          nil should never appear before a non-nil entry.  */
   211       if (NILP (cp->regexp))
   212         goto compile_it;
   213       if (SCHARS (cp->regexp) == SCHARS (pattern)
   214           && !cp->busy
   215           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
   216           && !NILP (Fstring_equal (cp->regexp, pattern))
   217           && EQ (cp->buf.translate, translate)
   218           && cp->posix == posix
   219           && (EQ (cp->syntax_table, Qt)
   220               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
   221           && !NILP (Fequal (cp->f_whitespace_regexp, Vsearch_spaces_regexp))
   222           && cp->buf.charset_unibyte == charset_unibyte)
   223         break;
   224 
   225       /* If we're at the end of the cache, compile into the last
   226          (least recently used) non-busy cell in the cache.  */
   227       if (cp->next == 0)
   228         {
   229           if (!lru_nonbusy)
   230             error ("Too much matching reentrancy");
   231           cpp = lru_nonbusy;
   232           cp = *cpp;
   233         compile_it:
   234           eassert (!cp->busy);
   235           compile_pattern_1 (cp, pattern, translate, posix);
   236           break;
   237         }
   238     }
   239 
   240   /* When we get here, cp (aka *cpp) contains the compiled pattern,
   241      either because we found it in the cache or because we just compiled it.
   242      Move it to the front of the queue to mark it as most recently used.  */
   243   *cpp = cp->next;
   244   cp->next = searchbuf_head;
   245   searchbuf_head = cp;
   246 
   247   /* Advise the searching functions about the space we have allocated
   248      for register data.  */
   249   if (regp)
   250     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
   251 
   252   /* The compiled pattern can be used both for multibyte and unibyte
   253      target.  But, we have to tell which the pattern is used for. */
   254   cp->buf.target_multibyte = multibyte;
   255   return cp;
   256 }
   257 
   258 
   259 static Lisp_Object
   260 looking_at_1 (Lisp_Object string, bool posix, bool modify_data)
   261 {
   262   Lisp_Object val;
   263   unsigned char *p1, *p2;
   264   ptrdiff_t s1, s2;
   265   register ptrdiff_t i;
   266 
   267   if (running_asynch_code)
   268     save_search_regs ();
   269 
   270   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
   271      table.  */
   272   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
   273                          BVAR (current_buffer, case_eqv_table));
   274 
   275   CHECK_STRING (string);
   276 
   277   /* Snapshot in case Lisp changes the value.  */
   278   bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
   279 
   280   struct regexp_cache *cache_entry = compile_pattern (
   281     string,
   282     modify_match_data ? &search_regs : NULL,
   283     (!NILP (BVAR (current_buffer, case_fold_search))
   284      ? BVAR (current_buffer, case_canon_table) : Qnil),
   285     posix,
   286     !NILP (BVAR (current_buffer, enable_multibyte_characters)));
   287 
   288   /* Do a pending quit right away, to avoid paradoxical behavior */
   289   maybe_quit ();
   290 
   291   /* Get pointers and sizes of the two strings
   292      that make up the visible portion of the buffer. */
   293 
   294   p1 = BEGV_ADDR;
   295   s1 = GPT_BYTE - BEGV_BYTE;
   296   p2 = GAP_END_ADDR;
   297   s2 = ZV_BYTE - GPT_BYTE;
   298   if (s1 < 0)
   299     {
   300       p2 = p1;
   301       s2 = ZV_BYTE - BEGV_BYTE;
   302       s1 = 0;
   303     }
   304   if (s2 < 0)
   305     {
   306       s1 = ZV_BYTE - BEGV_BYTE;
   307       s2 = 0;
   308     }
   309 
   310   specpdl_ref count = SPECPDL_INDEX ();
   311   freeze_buffer_relocation ();
   312   freeze_pattern (cache_entry);
   313   re_match_object = Qnil;
   314   i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
   315                   PT_BYTE - BEGV_BYTE,
   316                   modify_match_data ? &search_regs : NULL,
   317                   ZV_BYTE - BEGV_BYTE);
   318 
   319   if (i == -2)
   320     {
   321       unbind_to (count, Qnil);
   322       matcher_overflow ();
   323     }
   324 
   325   val = (i >= 0 ? Qt : Qnil);
   326   if (modify_match_data && i >= 0)
   327   {
   328     for (i = 0; i < search_regs.num_regs; i++)
   329       if (search_regs.start[i] >= 0)
   330         {
   331           search_regs.start[i]
   332             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
   333          search_regs.end[i]
   334            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
   335        }
   336     /* Set last_thing_searched only when match data is changed.  */
   337     XSETBUFFER (last_thing_searched, current_buffer);
   338   }
   339 
   340   return unbind_to (count, val);
   341 }
   342 
   343 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 2, 0,
   344        doc: /* Return t if text after point matches regular expression REGEXP.
   345 By default, this function modifies the match data that
   346 `match-beginning', `match-end' and `match-data' access.  If
   347 INHIBIT-MODIFY is non-nil, don't modify the match data.  */)
   348   (Lisp_Object regexp, Lisp_Object inhibit_modify)
   349 {
   350   return looking_at_1 (regexp, 0, NILP (inhibit_modify));
   351 }
   352 
   353 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 2, 0,
   354        doc: /* Return t if text after point matches REGEXP according to Posix rules.
   355 Find the longest match, in accordance with Posix regular expression rules.
   356 
   357 By default, this function modifies the match data that
   358 `match-beginning', `match-end' and `match-data' access.  If
   359 INHIBIT-MODIFY is non-nil, don't modify the match data.  */)
   360   (Lisp_Object regexp, Lisp_Object inhibit_modify)
   361 {
   362   return looking_at_1 (regexp, 1, NILP (inhibit_modify));
   363 }
   364 
   365 static Lisp_Object
   366 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   367                 bool posix, bool modify_data)
   368 {
   369   ptrdiff_t val;
   370   EMACS_INT pos;
   371   ptrdiff_t pos_byte, i;
   372   bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
   373 
   374   if (running_asynch_code)
   375     save_search_regs ();
   376 
   377   CHECK_STRING (regexp);
   378   CHECK_STRING (string);
   379 
   380   if (NILP (start))
   381     pos = 0, pos_byte = 0;
   382   else
   383     {
   384       ptrdiff_t len = SCHARS (string);
   385 
   386       CHECK_FIXNUM (start);
   387       pos = XFIXNUM (start);
   388       if (pos < 0 && -pos <= len)
   389         pos = len + pos;
   390       else if (0 > pos || pos > len)
   391         args_out_of_range (string, start);
   392       pos_byte = string_char_to_byte (string, pos);
   393     }
   394 
   395   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
   396      table.  */
   397   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
   398                          BVAR (current_buffer, case_eqv_table));
   399 
   400   specpdl_ref count = SPECPDL_INDEX ();
   401   struct regexp_cache *cache_entry
   402     = compile_pattern (regexp,
   403                        modify_match_data ? &search_regs : NULL,
   404                        (!NILP (BVAR (current_buffer, case_fold_search))
   405                         ? BVAR (current_buffer, case_canon_table)
   406                         : Qnil),
   407                        posix,
   408                        STRING_MULTIBYTE (string));
   409   freeze_pattern (cache_entry);
   410   re_match_object = string;
   411   val = re_search (&cache_entry->buf, SSDATA (string),
   412                    SBYTES (string), pos_byte,
   413                    SBYTES (string) - pos_byte,
   414                    (modify_match_data ? &search_regs : NULL));
   415   unbind_to (count, Qnil);
   416 
   417   /* Set last_thing_searched only when match data is changed.  */
   418   if (modify_match_data)
   419     last_thing_searched = Qt;
   420 
   421   if (val == -2)
   422     matcher_overflow ();
   423   if (val < 0) return Qnil;
   424 
   425   if (modify_match_data)
   426     for (i = 0; i < search_regs.num_regs; i++)
   427       if (search_regs.start[i] >= 0)
   428         {
   429           search_regs.start[i]
   430             = string_byte_to_char (string, search_regs.start[i]);
   431           search_regs.end[i]
   432             = string_byte_to_char (string, search_regs.end[i]);
   433         }
   434 
   435   return make_fixnum (string_byte_to_char (string, val));
   436 }
   437 
   438 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 4, 0,
   439        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
   440 Matching ignores case if `case-fold-search' is non-nil.
   441 If third arg START is non-nil, start search at that index in STRING.
   442 
   443 If INHIBIT-MODIFY is non-nil, match data is not changed.
   444 
   445 If INHIBIT-MODIFY is nil or missing, match data is changed, and
   446 `match-end' and `match-beginning' give indices of substrings matched
   447 by parenthesis constructs in the pattern.  You can use the function
   448 `match-string' to extract the substrings matched by the parenthesis
   449 constructions in REGEXP.  For index of first char beyond the match, do
   450 (match-end 0).  */)
   451   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   452    Lisp_Object inhibit_modify)
   453 {
   454   return string_match_1 (regexp, string, start, 0, NILP (inhibit_modify));
   455 }
   456 
   457 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 4, 0,
   458        doc: /* Return index of start of first match for Posix REGEXP in STRING, or nil.
   459 Find the longest match, in accord with Posix regular expression rules.
   460 Case is ignored if `case-fold-search' is non-nil in the current buffer.
   461 
   462 If INHIBIT-MODIFY is non-nil, match data is not changed.
   463 
   464 If INHIBIT-MODIFY is nil or missing, match data is changed, and
   465 `match-end' and `match-beginning' give indices of substrings matched
   466 by parenthesis constructs in the pattern.  You can use the function
   467 `match-string' to extract the substrings matched by the parenthesis
   468 constructions in REGEXP.  For index of first char beyond the match, do
   469 (match-end 0).  */)
   470   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   471    Lisp_Object inhibit_modify)
   472 {
   473   return string_match_1 (regexp, string, start, 1, NILP (inhibit_modify));
   474 }
   475 
   476 /* Match REGEXP against STRING using translation table TABLE,
   477    searching all of STRING, and return the index of the match,
   478    or negative on failure.  This does not clobber the match data.  */
   479 
   480 ptrdiff_t
   481 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
   482                             Lisp_Object table)
   483 {
   484   re_match_object = string;
   485   specpdl_ref count = SPECPDL_INDEX ();
   486   struct regexp_cache *cache_entry
   487     = compile_pattern (regexp, 0, table, 0, STRING_MULTIBYTE (string));
   488   freeze_pattern (cache_entry);
   489   ptrdiff_t val = re_search (&cache_entry->buf, SSDATA (string),
   490                              SBYTES (string), 0,
   491                              SBYTES (string), 0);
   492   unbind_to (count, Qnil);
   493   return val;
   494 }
   495 
   496 /* Match REGEXP against STRING, searching all of STRING and return the
   497    index of the match, or negative on failure.  This does not clobber
   498    the match data.  Table is a canonicalize table for ignoring case,
   499    or nil for none.
   500 
   501    We assume that STRING contains single-byte characters.  */
   502 
   503 ptrdiff_t
   504 fast_c_string_match_internal (Lisp_Object regexp,
   505                               const char *string, ptrdiff_t len,
   506                               Lisp_Object table)
   507 {
   508   /* FIXME: This is expensive and not obviously correct when it makes
   509      a difference. I.e., no longer "fast", and may hide bugs.
   510      Something should be done about this.  */
   511   regexp = string_make_unibyte (regexp);
   512   /* Record specpdl index because freeze_pattern pushes an
   513      unwind-protect on the specpdl.  */
   514   specpdl_ref count = SPECPDL_INDEX ();
   515   struct regexp_cache *cache_entry
   516     = compile_pattern (regexp, 0, table, 0, 0);
   517   freeze_pattern (cache_entry);
   518   re_match_object = Qt;
   519   ptrdiff_t val = re_search (&cache_entry->buf, string, len, 0, len, 0);
   520   unbind_to (count, Qnil);
   521   return val;
   522 }
   523 
   524 /* Match REGEXP against the characters after POS to LIMIT, and return
   525    the number of matched characters.  If STRING is non-nil, match
   526    against the characters in it.  In that case, POS and LIMIT are
   527    indices into the string.  This function doesn't modify the match
   528    data.  */
   529 
   530 ptrdiff_t
   531 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
   532                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
   533 {
   534   bool multibyte;
   535   unsigned char *p1, *p2;
   536   ptrdiff_t s1, s2;
   537   ptrdiff_t len;
   538 
   539   if (STRINGP (string))
   540     {
   541       if (pos_byte < 0)
   542         pos_byte = string_char_to_byte (string, pos);
   543       if (limit_byte < 0)
   544         limit_byte = string_char_to_byte (string, limit);
   545       p1 = NULL;
   546       s1 = 0;
   547       p2 = SDATA (string);
   548       s2 = SBYTES (string);
   549       multibyte = STRING_MULTIBYTE (string);
   550     }
   551   else
   552     {
   553       if (pos_byte < 0)
   554         pos_byte = CHAR_TO_BYTE (pos);
   555       if (limit_byte < 0)
   556         limit_byte = CHAR_TO_BYTE (limit);
   557       pos_byte -= BEGV_BYTE;
   558       limit_byte -= BEGV_BYTE;
   559       p1 = BEGV_ADDR;
   560       s1 = GPT_BYTE - BEGV_BYTE;
   561       p2 = GAP_END_ADDR;
   562       s2 = ZV_BYTE - GPT_BYTE;
   563       if (s1 < 0)
   564         {
   565           p2 = p1;
   566           s2 = ZV_BYTE - BEGV_BYTE;
   567           s1 = 0;
   568         }
   569       if (s2 < 0)
   570         {
   571           s1 = ZV_BYTE - BEGV_BYTE;
   572           s2 = 0;
   573         }
   574       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
   575     }
   576 
   577   struct regexp_cache *cache_entry =
   578     compile_pattern (regexp, 0, Qnil, 0, multibyte);
   579   specpdl_ref count = SPECPDL_INDEX ();
   580   freeze_buffer_relocation ();
   581   freeze_pattern (cache_entry);
   582   re_match_object = STRINGP (string) ? string : Qnil;
   583   len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
   584                     pos_byte, NULL, limit_byte);
   585 
   586   unbind_to (count, Qnil);
   587   return len;
   588 }
   589 
   590 
   591 /* The newline cache: remembering which sections of text have no newlines.  */
   592 
   593 /* If the user has requested the long scans caching, make sure it's on.
   594    Otherwise, make sure it's off.
   595    This is our cheezy way of associating an action with the change of
   596    state of a buffer-local variable.  */
   597 static struct region_cache *
   598 newline_cache_on_off (struct buffer *buf)
   599 {
   600   struct buffer *base_buf = buf;
   601   bool indirect_p = false;
   602 
   603   if (buf->base_buffer)
   604     {
   605       base_buf = buf->base_buffer;
   606       indirect_p = true;
   607     }
   608 
   609   /* Don't turn on or off the cache in the base buffer, if the value
   610      of cache-long-scans of the base buffer is inconsistent with that.
   611      This is because doing so will just make the cache pure overhead,
   612      since if we turn it on via indirect buffer, it will be
   613      immediately turned off by its base buffer.  */
   614   if (NILP (BVAR (buf, cache_long_scans)))
   615     {
   616       if (!indirect_p
   617           || NILP (BVAR (base_buf, cache_long_scans)))
   618         {
   619           /* It should be off.  */
   620           if (base_buf->newline_cache)
   621             {
   622               free_region_cache (base_buf->newline_cache);
   623               base_buf->newline_cache = 0;
   624             }
   625         }
   626       return NULL;
   627     }
   628   else
   629     {
   630       if (!indirect_p
   631           || !NILP (BVAR (base_buf, cache_long_scans)))
   632         {
   633           /* It should be on.  */
   634           if (base_buf->newline_cache == 0)
   635             {
   636               base_buf->newline_cache = new_region_cache ();
   637               __lsan_ignore_object (base_buf->newline_cache);
   638             }
   639         }
   640       return base_buf->newline_cache;
   641     }
   642 }
   643 
   644 
   645 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
   646 
   647    If COUNT is positive, search forwards; END must be >= START.
   648    If COUNT is negative, search backwards for the -COUNTth instance;
   649       END must be <= START.
   650    If COUNT is zero, do anything you please; run rogue, for all I care.
   651 
   652    If END is zero, use BEGV or ZV instead, as appropriate for the
   653    direction indicated by COUNT.  If START_BYTE is -1 it is unknown,
   654    and similarly for END_BYTE.
   655 
   656    If we find COUNT instances, set *COUNTED to COUNT, and return the
   657    position past the COUNTth match.  Note that for reverse motion
   658    this is not the same as the usual convention for Emacs motion commands.
   659 
   660    If we don't find COUNT instances before reaching END, set *COUNTED
   661    to the number of newlines left found (negated if COUNT is negative),
   662    and return END.
   663 
   664    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
   665    to the returned character position.
   666 
   667    If ALLOW_QUIT, check for quitting.  That's good to do
   668    except when inside redisplay.  */
   669 
   670 ptrdiff_t
   671 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
   672               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
   673               ptrdiff_t *bytepos, bool allow_quit)
   674 {
   675   struct region_cache *newline_cache;
   676   struct buffer *cache_buffer;
   677 
   678   if (!end)
   679     {
   680       if (count > 0)
   681         end = ZV, end_byte = ZV_BYTE;
   682       else
   683         end = BEGV, end_byte = BEGV_BYTE;
   684     }
   685   if (end_byte == -1)
   686     end_byte = CHAR_TO_BYTE (end);
   687 
   688   newline_cache = newline_cache_on_off (current_buffer);
   689   if (current_buffer->base_buffer)
   690     cache_buffer = current_buffer->base_buffer;
   691   else
   692     cache_buffer = current_buffer;
   693 
   694   if (counted)
   695     *counted = count;
   696 
   697   if (count > 0)
   698     while (start != end)
   699       {
   700         /* Our innermost scanning loop is very simple; it doesn't know
   701            about gaps, buffer ends, or the newline cache.  ceiling is
   702            the position of the last character before the next such
   703            obstacle --- the last character the dumb search loop should
   704            examine.  */
   705         ptrdiff_t tem, ceiling_byte = end_byte - 1;
   706 
   707         /* If we're using the newline cache, consult it to see whether
   708            we can avoid some scanning.  */
   709         if (newline_cache)
   710           {
   711             ptrdiff_t next_change;
   712             int result = 1;
   713 
   714             while (start < end && result)
   715               {
   716                 ptrdiff_t lim1;
   717 
   718                 result = region_cache_forward (cache_buffer, newline_cache,
   719                                                start, &next_change);
   720                 if (result)
   721                   {
   722                     /* When the cache revalidation is deferred,
   723                        next-change might point beyond ZV, which will
   724                        cause assertion violation in CHAR_TO_BYTE below.
   725                        Limit next_change to ZV to avoid that.  */
   726                     if (next_change > ZV)
   727                       next_change = ZV;
   728                     start = next_change;
   729                     lim1 = next_change = end;
   730                   }
   731                 else
   732                   lim1 = min (next_change, end);
   733 
   734                 /* The cache returned zero for this region; see if
   735                    this is because the region is known and includes
   736                    only newlines.  While at that, count any newlines
   737                    we bump into, and exit if we found enough off them.  */
   738                 start_byte = CHAR_TO_BYTE (start);
   739                 while (start < lim1
   740                        && FETCH_BYTE (start_byte) == '\n')
   741                   {
   742                     start_byte++;
   743                     start++;
   744                     if (--count == 0)
   745                       {
   746                         if (bytepos)
   747                           *bytepos = start_byte;
   748                         return start;
   749                       }
   750                   }
   751                 /* If we found a non-newline character before hitting
   752                    position where the cache will again return non-zero
   753                    (i.e. no newlines beyond that position), it means
   754                    this region is not yet known to the cache, and we
   755                    must resort to the "dumb loop" method.  */
   756                 if (start < next_change && !result)
   757                   break;
   758                 result = 1;
   759               }
   760             if (start >= end)
   761               {
   762                 start = end;
   763                 start_byte = end_byte;
   764                 break;
   765               }
   766 
   767             /* START should never be after END.  */
   768             if (start_byte > ceiling_byte)
   769               start_byte = ceiling_byte;
   770 
   771             /* Now the text after start is an unknown region, and
   772                next_change is the position of the next known region. */
   773             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
   774           }
   775         else if (start_byte == -1)
   776           start_byte = CHAR_TO_BYTE (start);
   777 
   778         /* The dumb loop can only scan text stored in contiguous
   779            bytes. BUFFER_CEILING_OF returns the last character
   780            position that is contiguous, so the ceiling is the
   781            position after that.  */
   782         tem = BUFFER_CEILING_OF (start_byte);
   783         ceiling_byte = min (tem, ceiling_byte);
   784 
   785         {
   786           /* The termination address of the dumb loop.  */
   787           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
   788           ptrdiff_t lim_byte = ceiling_byte + 1;
   789 
   790           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
   791              of the base, the cursor, and the next line.  */
   792           ptrdiff_t base = start_byte - lim_byte;
   793           ptrdiff_t cursor, next;
   794 
   795           for (cursor = base; cursor < 0; cursor = next)
   796             {
   797               /* The dumb loop.  */
   798               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
   799               next = nl ? nl - lim_addr : 0;
   800 
   801               /* If we're using the newline cache, cache the fact that
   802                  the region we just traversed is free of newlines. */
   803               if (newline_cache && cursor != next)
   804                 {
   805                   know_region_cache (cache_buffer, newline_cache,
   806                                      BYTE_TO_CHAR (lim_byte + cursor),
   807                                      BYTE_TO_CHAR (lim_byte + next));
   808                   /* know_region_cache can relocate buffer text.  */
   809                   lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
   810                 }
   811 
   812               if (! nl)
   813                 break;
   814               next++;
   815 
   816               if (--count == 0)
   817                 {
   818                   if (bytepos)
   819                     *bytepos = lim_byte + next;
   820                   return BYTE_TO_CHAR (lim_byte + next);
   821                 }
   822               if (allow_quit)
   823                 maybe_quit ();
   824             }
   825 
   826           start_byte = lim_byte;
   827           start = BYTE_TO_CHAR (start_byte);
   828         }
   829       }
   830   else
   831     while (start > end)
   832       {
   833         /* The last character to check before the next obstacle.  */
   834         ptrdiff_t tem, ceiling_byte = end_byte;
   835 
   836         /* Consult the newline cache, if appropriate.  */
   837         if (newline_cache)
   838           {
   839             ptrdiff_t next_change;
   840             int result = 1;
   841 
   842             while (start > end && result)
   843               {
   844                 ptrdiff_t lim1;
   845 
   846                 result = region_cache_backward (cache_buffer, newline_cache,
   847                                                 start, &next_change);
   848                 if (result)
   849                   {
   850                     start = next_change;
   851                     lim1 = next_change = end;
   852                   }
   853                 else
   854                   lim1 = max (next_change, end);
   855                 start_byte = CHAR_TO_BYTE (start);
   856                 while (start > lim1
   857                        && FETCH_BYTE (start_byte - 1) == '\n')
   858                   {
   859                     if (++count == 0)
   860                       {
   861                         if (bytepos)
   862                           *bytepos = start_byte;
   863                         return start;
   864                       }
   865                     start_byte--;
   866                     start--;
   867                   }
   868                 if (start > next_change && !result)
   869                   break;
   870                 result = 1;
   871               }
   872             if (start <= end)
   873               {
   874                 start = end;
   875                 start_byte = end_byte;
   876                 break;
   877               }
   878 
   879             /* Start should never be at or before end.  */
   880             if (start_byte <= ceiling_byte)
   881               start_byte = ceiling_byte + 1;
   882 
   883             /* Now the text before start is an unknown region, and
   884                next_change is the position of the next known region. */
   885             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
   886           }
   887         else if (start_byte == -1)
   888           start_byte = CHAR_TO_BYTE (start);
   889 
   890         /* Stop scanning before the gap.  */
   891         tem = BUFFER_FLOOR_OF (start_byte - 1);
   892         ceiling_byte = max (tem, ceiling_byte);
   893 
   894         {
   895           /* The termination address of the dumb loop.  */
   896           unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
   897 
   898           /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
   899              the base, the cursor, and the previous line.  These
   900              offsets are at least -1.  */
   901           ptrdiff_t base = start_byte - ceiling_byte;
   902           ptrdiff_t cursor, prev;
   903 
   904           for (cursor = base; 0 < cursor; cursor = prev)
   905             {
   906               unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
   907               prev = nl ? nl - ceiling_addr : -1;
   908 
   909               /* If we're looking for newlines, cache the fact that
   910                  this line's region is free of them. */
   911               if (newline_cache && cursor != prev + 1)
   912                 {
   913                   know_region_cache (cache_buffer, newline_cache,
   914                                      BYTE_TO_CHAR (ceiling_byte + prev + 1),
   915                                      BYTE_TO_CHAR (ceiling_byte + cursor));
   916                   /* know_region_cache can relocate buffer text.  */
   917                   ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
   918                 }
   919 
   920               if (! nl)
   921                 break;
   922 
   923               if (++count >= 0)
   924                 {
   925                   if (bytepos)
   926                     *bytepos = ceiling_byte + prev + 1;
   927                   return BYTE_TO_CHAR (ceiling_byte + prev + 1);
   928                 }
   929               if (allow_quit)
   930                 maybe_quit ();
   931             }
   932 
   933           start_byte = ceiling_byte;
   934           start = BYTE_TO_CHAR (start_byte);
   935         }
   936       }
   937 
   938   if (counted)
   939     *counted -= count;
   940   if (bytepos)
   941     {
   942       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
   943       eassert (*bytepos == CHAR_TO_BYTE (start));
   944     }
   945   return start;
   946 }
   947 
   948 /* Search for COUNT instances of a line boundary.
   949    Start at START.  If COUNT is negative, search backwards.
   950 
   951    We report the resulting position by calling TEMP_SET_PT_BOTH.
   952 
   953    If we find COUNT instances. we position after (always after,
   954    even if scanning backwards) the COUNTth match.
   955 
   956    If we don't find COUNT instances before reaching the end of the
   957    buffer (or the beginning, if scanning backwards), we position at
   958    the limit we bumped up against.
   959 
   960    If ALLOW_QUIT, check for quitting.  That's good to do
   961    except in special cases.  */
   962 
   963 void
   964 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
   965               ptrdiff_t limit, ptrdiff_t limit_byte,
   966               ptrdiff_t count, bool allow_quit)
   967 {
   968   ptrdiff_t charpos, bytepos, counted;
   969 
   970   charpos = find_newline (start, start_byte, limit, limit_byte,
   971                           count, &counted, &bytepos, allow_quit);
   972   if (counted != count)
   973     TEMP_SET_PT_BOTH (limit, limit_byte);
   974   else
   975     TEMP_SET_PT_BOTH (charpos, bytepos);
   976 }
   977 
   978 /* Like above, but always scan from point and report the
   979    resulting position in *CHARPOS and *BYTEPOS.  */
   980 
   981 ptrdiff_t
   982 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
   983                          ptrdiff_t *bytepos)
   984 {
   985   ptrdiff_t counted;
   986 
   987   if (count <= 0)
   988     *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
   989                              &counted, bytepos, 1);
   990   else
   991     *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
   992                              &counted, bytepos, 1);
   993   return counted;
   994 }
   995 
   996 /* Like find_newline, but doesn't allow QUITting and doesn't return
   997    COUNTED.  */
   998 ptrdiff_t
   999 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
  1000                       ptrdiff_t cnt, ptrdiff_t *bytepos)
  1001 {
  1002   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
  1003 }
  1004 
  1005 /* Like find_newline, but returns position before the newline, not
  1006    after, and only search up to TO.
  1007    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
  1008 
  1009 ptrdiff_t
  1010 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
  1011                           ptrdiff_t cnt, ptrdiff_t *bytepos)
  1012 {
  1013   ptrdiff_t counted;
  1014   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &counted, bytepos, 1);
  1015 
  1016   if (counted == cnt)
  1017     {
  1018       if (bytepos)
  1019         dec_both (&pos, &*bytepos);
  1020       else
  1021         pos--;
  1022     }
  1023   return pos;
  1024 }
  1025 
  1026 /* Subroutines of Lisp buffer search functions. */
  1027 
  1028 static Lisp_Object
  1029 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
  1030                 Lisp_Object count, int direction, int RE, bool posix)
  1031 {
  1032   EMACS_INT np;
  1033   EMACS_INT lim;
  1034   ptrdiff_t lim_byte;
  1035   EMACS_INT n = direction;
  1036 
  1037   if (!NILP (count))
  1038     {
  1039       CHECK_FIXNUM (count);
  1040       n *= XFIXNUM (count);
  1041     }
  1042 
  1043   CHECK_STRING (string);
  1044   if (NILP (bound))
  1045     {
  1046       if (n > 0)
  1047         lim = ZV, lim_byte = ZV_BYTE;
  1048       else
  1049         lim = BEGV, lim_byte = BEGV_BYTE;
  1050     }
  1051   else
  1052     {
  1053       lim = fix_position (bound);
  1054       if (n > 0 ? lim < PT : lim > PT)
  1055         error ("Invalid search bound (wrong side of point)");
  1056       if (lim > ZV)
  1057         lim = ZV, lim_byte = ZV_BYTE;
  1058       else if (lim < BEGV)
  1059         lim = BEGV, lim_byte = BEGV_BYTE;
  1060       else
  1061         lim_byte = CHAR_TO_BYTE (lim);
  1062     }
  1063 
  1064   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
  1065      table.  */
  1066   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
  1067                          BVAR (current_buffer, case_eqv_table));
  1068 
  1069   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
  1070                       (!NILP (BVAR (current_buffer, case_fold_search))
  1071                        ? BVAR (current_buffer, case_canon_table)
  1072                        : Qnil),
  1073                       (!NILP (BVAR (current_buffer, case_fold_search))
  1074                        ? BVAR (current_buffer, case_eqv_table)
  1075                        : Qnil),
  1076                       posix);
  1077   if (np <= 0)
  1078     {
  1079       if (NILP (noerror))
  1080         xsignal1 (Qsearch_failed, string);
  1081 
  1082       if (!EQ (noerror, Qt))
  1083         {
  1084           eassert (BEGV <= lim && lim <= ZV);
  1085           SET_PT_BOTH (lim, lim_byte);
  1086           return Qnil;
  1087 #if 0 /* This would be clean, but maybe programs depend on
  1088          a value of nil here.  */
  1089           np = lim;
  1090 #endif
  1091         }
  1092       else
  1093         return Qnil;
  1094     }
  1095 
  1096   eassert (BEGV <= np && np <= ZV);
  1097   SET_PT (np);
  1098 
  1099   return make_fixnum (np);
  1100 }
  1101 
  1102 /* Return true if REGEXP it matches just one constant string.  */
  1103 
  1104 static bool
  1105 trivial_regexp_p (Lisp_Object regexp)
  1106 {
  1107   ptrdiff_t len = SBYTES (regexp);
  1108   unsigned char *s = SDATA (regexp);
  1109   while (--len >= 0)
  1110     {
  1111       switch (*s++)
  1112         {
  1113         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
  1114           return 0;
  1115         case '\\':
  1116           if (--len < 0)
  1117             return 0;
  1118           switch (*s++)
  1119             {
  1120             case '|': case '(': case ')': case '`': case '\'': case 'b':
  1121             case 'B': case '<': case '>': case 'w': case 'W': case 's':
  1122             case 'S': case '=': case '{': case '}': case '_':
  1123             case 'c': case 'C': /* for categoryspec and notcategoryspec */
  1124             case '1': case '2': case '3': case '4': case '5':
  1125             case '6': case '7': case '8': case '9':
  1126               return 0;
  1127             }
  1128         }
  1129     }
  1130   return 1;
  1131 }
  1132 
  1133 /* Search for the n'th occurrence of STRING in the current buffer,
  1134    starting at position POS and stopping at position LIM,
  1135    treating STRING as a literal string if RE is false or as
  1136    a regular expression if RE is true.
  1137 
  1138    If N is positive, searching is forward and LIM must be greater than POS.
  1139    If N is negative, searching is backward and LIM must be less than POS.
  1140 
  1141    Returns -x if x occurrences remain to be found (x > 0),
  1142    or else the position at the beginning of the Nth occurrence
  1143    (if searching backward) or the end (if searching forward).
  1144 
  1145    POSIX is nonzero if we want full backtracking (POSIX style)
  1146    for this pattern.  0 means backtrack only enough to get a valid match.  */
  1147 
  1148 #define TRANSLATE(out, trt, d)                  \
  1149 do                                              \
  1150   {                                             \
  1151     if (! NILP (trt))                           \
  1152       {                                         \
  1153         Lisp_Object temp;                       \
  1154         temp = Faref (trt, make_fixnum (d));    \
  1155         if (FIXNUMP (temp))                     \
  1156           out = XFIXNUM (temp);                 \
  1157         else                                    \
  1158           out = d;                              \
  1159       }                                         \
  1160     else                                        \
  1161       out = d;                                  \
  1162   }                                             \
  1163 while (0)
  1164 
  1165 /* Only used in search_buffer, to record the end position of the match
  1166    when searching regexps and SEARCH_REGS should not be changed
  1167    (i.e. Vinhibit_changing_match_data is non-nil).  */
  1168 static struct re_registers search_regs_1;
  1169 
  1170 static EMACS_INT
  1171 search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
  1172                   ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
  1173                   Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
  1174 {
  1175   unsigned char *p1, *p2;
  1176   ptrdiff_t s1, s2;
  1177 
  1178   /* Snapshot in case Lisp changes the value.  */
  1179   bool preserve_match_data = NILP (Vinhibit_changing_match_data);
  1180 
  1181   struct regexp_cache *cache_entry =
  1182     compile_pattern (string,
  1183                      preserve_match_data ? &search_regs : &search_regs_1,
  1184                      trt, posix,
  1185                      !NILP (BVAR (current_buffer, enable_multibyte_characters)));
  1186   struct re_pattern_buffer *bufp = &cache_entry->buf;
  1187 
  1188   maybe_quit ();                /* Do a pending quit right away,
  1189                                    to avoid paradoxical behavior */
  1190   /* Get pointers and sizes of the two strings
  1191      that make up the visible portion of the buffer. */
  1192 
  1193   p1 = BEGV_ADDR;
  1194   s1 = GPT_BYTE - BEGV_BYTE;
  1195   p2 = GAP_END_ADDR;
  1196   s2 = ZV_BYTE - GPT_BYTE;
  1197   if (s1 < 0)
  1198     {
  1199       p2 = p1;
  1200       s2 = ZV_BYTE - BEGV_BYTE;
  1201       s1 = 0;
  1202     }
  1203   if (s2 < 0)
  1204     {
  1205       s1 = ZV_BYTE - BEGV_BYTE;
  1206       s2 = 0;
  1207     }
  1208 
  1209   specpdl_ref count = SPECPDL_INDEX ();
  1210   freeze_buffer_relocation ();
  1211   freeze_pattern (cache_entry);
  1212 
  1213   while (n < 0)
  1214     {
  1215       ptrdiff_t val;
  1216 
  1217       re_match_object = Qnil;
  1218       val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
  1219                          pos_byte - BEGV_BYTE, lim_byte - pos_byte,
  1220                          preserve_match_data ? &search_regs : &search_regs_1,
  1221                          /* Don't allow match past current point */
  1222                          pos_byte - BEGV_BYTE);
  1223       if (val == -2)
  1224         {
  1225           unbind_to (count, Qnil);
  1226           matcher_overflow ();
  1227         }
  1228       if (val >= 0)
  1229         {
  1230           if (preserve_match_data)
  1231             {
  1232               pos_byte = search_regs.start[0] + BEGV_BYTE;
  1233               for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  1234                 if (search_regs.start[i] >= 0)
  1235                   {
  1236                     search_regs.start[i]
  1237                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
  1238                     search_regs.end[i]
  1239                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
  1240                   }
  1241               XSETBUFFER (last_thing_searched, current_buffer);
  1242               /* Set pos to the new position. */
  1243               pos = search_regs.start[0];
  1244             }
  1245           else
  1246             {
  1247               pos_byte = search_regs_1.start[0] + BEGV_BYTE;
  1248               /* Set pos to the new position.  */
  1249               pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
  1250             }
  1251         }
  1252       else
  1253         {
  1254           unbind_to (count, Qnil);
  1255           return (n);
  1256         }
  1257       n++;
  1258       maybe_quit ();
  1259     }
  1260   while (n > 0)
  1261     {
  1262       ptrdiff_t val;
  1263 
  1264       re_match_object = Qnil;
  1265       val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
  1266                          pos_byte - BEGV_BYTE, lim_byte - pos_byte,
  1267                          preserve_match_data ? &search_regs : &search_regs_1,
  1268                          lim_byte - BEGV_BYTE);
  1269       if (val == -2)
  1270         {
  1271           unbind_to (count, Qnil);
  1272           matcher_overflow ();
  1273         }
  1274       if (val >= 0)
  1275         {
  1276           if (preserve_match_data)
  1277             {
  1278               pos_byte = search_regs.end[0] + BEGV_BYTE;
  1279               for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  1280                 if (search_regs.start[i] >= 0)
  1281                   {
  1282                     search_regs.start[i]
  1283                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
  1284                     search_regs.end[i]
  1285                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
  1286                   }
  1287               XSETBUFFER (last_thing_searched, current_buffer);
  1288               pos = search_regs.end[0];
  1289             }
  1290           else
  1291             {
  1292               pos_byte = search_regs_1.end[0] + BEGV_BYTE;
  1293               pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
  1294             }
  1295         }
  1296       else
  1297         {
  1298           unbind_to (count, Qnil);
  1299           return (0 - n);
  1300         }
  1301       n--;
  1302       maybe_quit ();
  1303     }
  1304   unbind_to (count, Qnil);
  1305   return (pos);
  1306 }
  1307 
  1308 static EMACS_INT
  1309 search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
  1310                       ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
  1311                       EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
  1312                       bool posix)
  1313 {
  1314   unsigned char *raw_pattern, *pat;
  1315   ptrdiff_t raw_pattern_size;
  1316   ptrdiff_t raw_pattern_size_byte;
  1317   unsigned char *patbuf;
  1318   bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
  1319   unsigned char *base_pat;
  1320   /* Set to positive if we find a non-ASCII char that need
  1321      translation.  Otherwise set to zero later.  */
  1322   int char_base = -1;
  1323   bool boyer_moore_ok = 1;
  1324   USE_SAFE_ALLOCA;
  1325 
  1326   /* MULTIBYTE says whether the text to be searched is multibyte.
  1327      We must convert PATTERN to match that, or we will not really
  1328      find things right.  */
  1329 
  1330   if (multibyte == STRING_MULTIBYTE (string))
  1331     {
  1332       raw_pattern = SDATA (string);
  1333       raw_pattern_size = SCHARS (string);
  1334       raw_pattern_size_byte = SBYTES (string);
  1335     }
  1336   else if (multibyte)
  1337     {
  1338       raw_pattern_size = SCHARS (string);
  1339       raw_pattern_size_byte
  1340         = count_size_as_multibyte (SDATA (string),
  1341                                    raw_pattern_size);
  1342       raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
  1343       copy_text (SDATA (string), raw_pattern,
  1344                  SCHARS (string), 0, 1);
  1345     }
  1346   else
  1347     {
  1348       /* Converting multibyte to single-byte.  */
  1349       raw_pattern_size = SCHARS (string);
  1350       raw_pattern_size_byte = SCHARS (string);
  1351       raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
  1352       copy_text (SDATA (string), raw_pattern,
  1353                  SBYTES (string), 1, 0);
  1354     }
  1355 
  1356   /* Copy and optionally translate the pattern.  */
  1357   ptrdiff_t len = raw_pattern_size;
  1358   ptrdiff_t len_byte = raw_pattern_size_byte;
  1359   SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
  1360   pat = patbuf;
  1361   base_pat = raw_pattern;
  1362   if (multibyte)
  1363     {
  1364       /* Fill patbuf by translated characters in STRING while
  1365          checking if we can use boyer-moore search.  If TRT is
  1366          non-nil, we can use boyer-moore search only if TRT can be
  1367          represented by the byte array of 256 elements.  For that,
  1368          all non-ASCII case-equivalents of all case-sensitive
  1369          characters in STRING must belong to the same character
  1370          group (two characters belong to the same group iff their
  1371          multibyte forms are the same except for the last byte;
  1372          i.e. every 64 characters form a group; U+0000..U+003F,
  1373          U+0040..U+007F, U+0080..U+00BF, ...).  */
  1374 
  1375       while (--len >= 0)
  1376         {
  1377           unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
  1378           int translated, inverse;
  1379           int charlen;
  1380 
  1381           /* If we got here and the RE flag is set, it's because we're
  1382              dealing with a regexp known to be trivial, so the backslash
  1383              just quotes the next character.  */
  1384           if (RE && *base_pat == '\\')
  1385             {
  1386               len--;
  1387               raw_pattern_size--;
  1388               len_byte--;
  1389               base_pat++;
  1390             }
  1391 
  1392           int in_charlen, c = string_char_and_length (base_pat, &in_charlen);
  1393 
  1394           if (NILP (trt))
  1395             {
  1396               str = base_pat;
  1397               charlen = in_charlen;
  1398             }
  1399           else
  1400             {
  1401               /* Translate the character.  */
  1402               TRANSLATE (translated, trt, c);
  1403               charlen = CHAR_STRING (translated, str_base);
  1404               str = str_base;
  1405 
  1406               /* Check if C has any other case-equivalents.  */
  1407               TRANSLATE (inverse, inverse_trt, c);
  1408               /* If so, check if we can use boyer-moore.  */
  1409               if (c != inverse && boyer_moore_ok)
  1410                 {
  1411                   /* Check if all equivalents belong to the same
  1412                      group of characters.  Note that the check of C
  1413                      itself is done by the last iteration.  */
  1414                   int this_char_base = -1;
  1415 
  1416                   while (boyer_moore_ok)
  1417                     {
  1418                       if (ASCII_CHAR_P (inverse))
  1419                         {
  1420                           if (this_char_base > 0)
  1421                             boyer_moore_ok = 0;
  1422                           else
  1423                             this_char_base = 0;
  1424                         }
  1425                       else if (CHAR_BYTE8_P (inverse))
  1426                         /* Boyer-moore search can't handle a
  1427                            translation of an eight-bit
  1428                            character.  */
  1429                         boyer_moore_ok = 0;
  1430                       else if (this_char_base < 0)
  1431                         {
  1432                           this_char_base = inverse & ~0x3F;
  1433                           if (char_base < 0)
  1434                             char_base = this_char_base;
  1435                           else if (this_char_base != char_base)
  1436                             boyer_moore_ok = 0;
  1437                         }
  1438                       else if ((inverse & ~0x3F) != this_char_base)
  1439                         boyer_moore_ok = 0;
  1440                       if (c == inverse)
  1441                         break;
  1442                       TRANSLATE (inverse, inverse_trt, inverse);
  1443                     }
  1444                 }
  1445             }
  1446 
  1447           /* Store this character into the translated pattern.  */
  1448           memcpy (pat, str, charlen);
  1449           pat += charlen;
  1450           base_pat += in_charlen;
  1451           len_byte -= in_charlen;
  1452         }
  1453 
  1454       /* If char_base is still negative we didn't find any translated
  1455          non-ASCII characters.  */
  1456       if (char_base < 0)
  1457         char_base = 0;
  1458     }
  1459   else
  1460     {
  1461       /* Unibyte buffer.  */
  1462       char_base = 0;
  1463       while (--len >= 0)
  1464         {
  1465           int c, translated, inverse;
  1466 
  1467           /* If we got here and the RE flag is set, it's because we're
  1468              dealing with a regexp known to be trivial, so the backslash
  1469              just quotes the next character.  */
  1470           if (RE && *base_pat == '\\')
  1471             {
  1472               len--;
  1473               raw_pattern_size--;
  1474               base_pat++;
  1475             }
  1476           c = *base_pat++;
  1477           TRANSLATE (translated, trt, c);
  1478           *pat++ = translated;
  1479           /* Check that none of C's equivalents violates the
  1480              assumptions of boyer_moore.  */
  1481           TRANSLATE (inverse, inverse_trt, c);
  1482           while (1)
  1483             {
  1484               if (inverse >= 0200)
  1485                 {
  1486                   boyer_moore_ok = 0;
  1487                   break;
  1488                 }
  1489               if (c == inverse)
  1490                 break;
  1491               TRANSLATE (inverse, inverse_trt, inverse);
  1492             }
  1493         }
  1494     }
  1495 
  1496   len_byte = pat - patbuf;
  1497   pat = base_pat = patbuf;
  1498 
  1499   EMACS_INT result
  1500     = (boyer_moore_ok
  1501        ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
  1502                       pos_byte, lim_byte,
  1503                       char_base)
  1504        : simple_search (n, pat, raw_pattern_size, len_byte, trt,
  1505                         pos, pos_byte, lim, lim_byte));
  1506   SAFE_FREE ();
  1507   return result;
  1508 }
  1509 
  1510 EMACS_INT
  1511 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
  1512                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
  1513                int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
  1514 {
  1515   if (running_asynch_code)
  1516     save_search_regs ();
  1517 
  1518   /* Searching 0 times means don't move.  */
  1519   /* Null string is found at starting position.  */
  1520   if (n == 0 || SCHARS (string) == 0)
  1521     {
  1522       set_search_regs (pos_byte, 0);
  1523       return pos;
  1524     }
  1525 
  1526   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
  1527     pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
  1528                             n, trt, inverse_trt, posix);
  1529   else
  1530     pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
  1531                                 n, RE, trt, inverse_trt, posix);
  1532 
  1533   return pos;
  1534 }
  1535 
  1536 /* Do a simple string search N times for the string PAT,
  1537    whose length is LEN/LEN_BYTE,
  1538    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
  1539    TRT is the translation table.
  1540 
  1541    Return the character position where the match is found.
  1542    Otherwise, if M matches remained to be found, return -M.
  1543 
  1544    This kind of search works regardless of what is in PAT and
  1545    regardless of what is in TRT.  It is used in cases where
  1546    boyer_moore cannot work.  */
  1547 
  1548 static EMACS_INT
  1549 simple_search (EMACS_INT n, unsigned char *pat,
  1550                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
  1551                ptrdiff_t pos, ptrdiff_t pos_byte,
  1552                ptrdiff_t lim, ptrdiff_t lim_byte)
  1553 {
  1554   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  1555   bool forward = n > 0;
  1556   /* Number of buffer bytes matched.  Note that this may be different
  1557      from len_byte in a multibyte buffer.  */
  1558   ptrdiff_t match_byte = PTRDIFF_MIN;
  1559 
  1560   if (lim > pos && multibyte)
  1561     while (n > 0)
  1562       {
  1563         while (1)
  1564           {
  1565             /* Try matching at position POS.  */
  1566             ptrdiff_t this_pos_byte = pos_byte;
  1567             ptrdiff_t this_len = len;
  1568             unsigned char *p = pat;
  1569             if (pos + len > lim || pos_byte + len_byte > lim_byte)
  1570               goto stop;
  1571 
  1572             while (this_len > 0)
  1573               {
  1574                 int charlen, pat_ch = string_char_and_length (p, &charlen);
  1575                 int buf_charlen, buf_ch
  1576                   = string_char_and_length (BYTE_POS_ADDR (this_pos_byte),
  1577                                             &buf_charlen);
  1578                 TRANSLATE (buf_ch, trt, buf_ch);
  1579 
  1580                 if (buf_ch != pat_ch)
  1581                   break;
  1582 
  1583                 this_len--;
  1584                 p += charlen;
  1585 
  1586                 this_pos_byte += buf_charlen;
  1587               }
  1588 
  1589             if (this_len == 0)
  1590               {
  1591                 match_byte = this_pos_byte - pos_byte;
  1592                 pos += len;
  1593                 pos_byte += match_byte;
  1594                 break;
  1595               }
  1596 
  1597             inc_both (&pos, &pos_byte);
  1598           }
  1599 
  1600         n--;
  1601       }
  1602   else if (lim > pos)
  1603     while (n > 0)
  1604       {
  1605         while (1)
  1606           {
  1607             /* Try matching at position POS.  */
  1608             ptrdiff_t this_pos = pos;
  1609             ptrdiff_t this_len = len;
  1610             unsigned char *p = pat;
  1611 
  1612             if (pos + len > lim)
  1613               goto stop;
  1614 
  1615             while (this_len > 0)
  1616               {
  1617                 int pat_ch = *p++;
  1618                 int buf_ch = FETCH_BYTE (this_pos);
  1619                 TRANSLATE (buf_ch, trt, buf_ch);
  1620 
  1621                 if (buf_ch != pat_ch)
  1622                   break;
  1623 
  1624                 this_len--;
  1625                 this_pos++;
  1626               }
  1627 
  1628             if (this_len == 0)
  1629               {
  1630                 match_byte = len;
  1631                 pos += len;
  1632                 break;
  1633               }
  1634 
  1635             pos++;
  1636           }
  1637 
  1638         n--;
  1639       }
  1640   /* Backwards search.  */
  1641   else if (lim < pos && multibyte)
  1642     while (n < 0)
  1643       {
  1644         while (1)
  1645           {
  1646             /* Try matching at position POS.  */
  1647             ptrdiff_t this_pos = pos;
  1648             ptrdiff_t this_pos_byte = pos_byte;
  1649             ptrdiff_t this_len = len;
  1650             const unsigned char *p = pat + len_byte;
  1651 
  1652             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
  1653               goto stop;
  1654 
  1655             while (this_len > 0)
  1656               {
  1657                 int pat_ch, buf_ch;
  1658 
  1659                 dec_both (&this_pos, &this_pos_byte);
  1660                 p -= raw_prev_char_len (p);
  1661                 pat_ch = STRING_CHAR (p);
  1662                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
  1663                 TRANSLATE (buf_ch, trt, buf_ch);
  1664 
  1665                 if (buf_ch != pat_ch)
  1666                   break;
  1667 
  1668                 this_len--;
  1669               }
  1670 
  1671             if (this_len == 0)
  1672               {
  1673                 match_byte = pos_byte - this_pos_byte;
  1674                 pos = this_pos;
  1675                 pos_byte = this_pos_byte;
  1676                 break;
  1677               }
  1678 
  1679             dec_both (&pos, &pos_byte);
  1680           }
  1681 
  1682         n++;
  1683       }
  1684   else if (lim < pos)
  1685     while (n < 0)
  1686       {
  1687         while (1)
  1688           {
  1689             /* Try matching at position POS.  */
  1690             ptrdiff_t this_pos = pos - len;
  1691             ptrdiff_t this_len = len;
  1692             unsigned char *p = pat;
  1693 
  1694             if (this_pos < lim)
  1695               goto stop;
  1696 
  1697             while (this_len > 0)
  1698               {
  1699                 int pat_ch = *p++;
  1700                 int buf_ch = FETCH_BYTE (this_pos);
  1701                 TRANSLATE (buf_ch, trt, buf_ch);
  1702 
  1703                 if (buf_ch != pat_ch)
  1704                   break;
  1705                 this_len--;
  1706                 this_pos++;
  1707               }
  1708 
  1709             if (this_len == 0)
  1710               {
  1711                 match_byte = len;
  1712                 pos -= len;
  1713                 break;
  1714               }
  1715 
  1716             pos--;
  1717           }
  1718 
  1719         n++;
  1720       }
  1721 
  1722  stop:
  1723   if (n == 0)
  1724     {
  1725       eassert (match_byte != PTRDIFF_MIN);
  1726       if (forward)
  1727         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
  1728       else
  1729         set_search_regs (multibyte ? pos_byte : pos, match_byte);
  1730 
  1731       return pos;
  1732     }
  1733   else if (n > 0)
  1734     return -n;
  1735   else
  1736     return n;
  1737 }
  1738 
  1739 /* Do Boyer-Moore search N times for the string BASE_PAT,
  1740    whose length is LEN_BYTE,
  1741    from buffer position POS_BYTE until LIM_BYTE.
  1742    DIRECTION says which direction we search in.
  1743    TRT and INVERSE_TRT are translation tables.
  1744    Characters in PAT are already translated by TRT.
  1745 
  1746    This kind of search works if all the characters in BASE_PAT that
  1747    have nontrivial translation are the same aside from the last byte.
  1748    This makes it possible to translate just the last byte of a
  1749    character, and do so after just a simple test of the context.
  1750    CHAR_BASE is nonzero if there is such a non-ASCII character.
  1751 
  1752    If that criterion is not satisfied, do not call this function.  */
  1753 
  1754 static EMACS_INT
  1755 boyer_moore (EMACS_INT n, unsigned char *base_pat,
  1756              ptrdiff_t len_byte,
  1757              Lisp_Object trt, Lisp_Object inverse_trt,
  1758              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
  1759              int char_base)
  1760 {
  1761   int direction = ((n > 0) ? 1 : -1);
  1762   register ptrdiff_t dirlen;
  1763   ptrdiff_t limit;
  1764   int stride_for_teases = 0;
  1765   int BM_tab[0400];
  1766   register unsigned char *cursor, *p_limit;
  1767   register ptrdiff_t i;
  1768   register int j;
  1769   unsigned char *pat, *pat_end;
  1770   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  1771 
  1772   unsigned char simple_translate[0400];
  1773   /* These are set to the preceding bytes of a byte to be translated
  1774      if char_base is nonzero.  As the maximum byte length of a
  1775      multibyte character is 5, we have to check at most four previous
  1776      bytes.  */
  1777   int translate_prev_byte1 = 0;
  1778   int translate_prev_byte2 = 0;
  1779   int translate_prev_byte3 = 0;
  1780 
  1781   /* The general approach is that we are going to maintain that we know
  1782      the first (closest to the present position, in whatever direction
  1783      we're searching) character that could possibly be the last
  1784      (furthest from present position) character of a valid match.  We
  1785      advance the state of our knowledge by looking at that character
  1786      and seeing whether it indeed matches the last character of the
  1787      pattern.  If it does, we take a closer look.  If it does not, we
  1788      move our pointer (to putative last characters) as far as is
  1789      logically possible.  This amount of movement, which I call a
  1790      stride, will be the length of the pattern if the actual character
  1791      appears nowhere in the pattern, otherwise it will be the distance
  1792      from the last occurrence of that character to the end of the
  1793      pattern.  If the amount is zero we have a possible match.  */
  1794 
  1795   /* Here we make a "mickey mouse" BM table.  The stride of the search
  1796      is determined only by the last character of the putative match.
  1797      If that character does not match, we will stride the proper
  1798      distance to propose a match that superimposes it on the last
  1799      instance of a character that matches it (per trt), or misses
  1800      it entirely if there is none. */
  1801 
  1802   dirlen = len_byte * direction;
  1803 
  1804   /* Record position after the end of the pattern.  */
  1805   pat_end = base_pat + len_byte;
  1806   /* BASE_PAT points to a character that we start scanning from.
  1807      It is the first character in a forward search,
  1808      the last character in a backward search.  */
  1809   if (direction < 0)
  1810     base_pat = pat_end - 1;
  1811 
  1812   /* A character that does not appear in the pattern induces a
  1813      stride equal to the pattern length.  */
  1814   for (i = 0; i < 0400; i++)
  1815     BM_tab[i] = dirlen;
  1816 
  1817   /* We use this for translation, instead of TRT itself.
  1818      We fill this in to handle the characters that actually
  1819      occur in the pattern.  Others don't matter anyway!  */
  1820   for (i = 0; i < 0400; i++)
  1821     simple_translate[i] = i;
  1822 
  1823   if (char_base)
  1824     {
  1825       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
  1826          byte following them are the target of translation.  */
  1827       eassume (0x80 <= char_base && char_base <= MAX_CHAR);
  1828       unsigned char str[MAX_MULTIBYTE_LENGTH];
  1829       int cblen = CHAR_STRING (char_base, str);
  1830 
  1831       translate_prev_byte1 = str[cblen - 2];
  1832       if (cblen > 2)
  1833         {
  1834           translate_prev_byte2 = str[cblen - 3];
  1835           if (cblen > 3)
  1836             translate_prev_byte3 = str[cblen - 4];
  1837         }
  1838     }
  1839 
  1840   i = 0;
  1841   while (i != dirlen)
  1842     {
  1843       unsigned char *ptr = base_pat + i;
  1844       i += direction;
  1845       if (! NILP (trt))
  1846         {
  1847           /* If the byte currently looking at is the last of a
  1848              character to check case-equivalents, set CH to that
  1849              character.  An ASCII character and a non-ASCII character
  1850              matching with CHAR_BASE are to be checked.  */
  1851           int ch = -1;
  1852 
  1853           if (ASCII_CHAR_P (*ptr) || ! multibyte)
  1854             ch = *ptr;
  1855           else if (char_base
  1856                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
  1857             {
  1858               unsigned char *charstart = ptr - 1;
  1859 
  1860               while (! (CHAR_HEAD_P (*charstart)))
  1861                 charstart--;
  1862               ch = STRING_CHAR (charstart);
  1863               if (char_base != (ch & ~0x3F))
  1864                 ch = -1;
  1865             }
  1866 
  1867           if (ch >= 0200 && multibyte)
  1868             j = (ch & 0x3F) | 0200;
  1869           else
  1870             j = *ptr;
  1871 
  1872           if (i == dirlen)
  1873             stride_for_teases = BM_tab[j];
  1874 
  1875           BM_tab[j] = dirlen - i;
  1876           /* A translation table is accompanied by its inverse -- see
  1877              comment following downcase_table for details.  */
  1878           if (ch >= 0)
  1879             {
  1880               int starting_ch = ch;
  1881               int starting_j = j;
  1882 
  1883               while (1)
  1884                 {
  1885                   TRANSLATE (ch, inverse_trt, ch);
  1886                   if (ch >= 0200 && multibyte)
  1887                     j = (ch & 0x3F) | 0200;
  1888                   else
  1889                     j = ch;
  1890 
  1891                   /* For all the characters that map into CH,
  1892                      set up simple_translate to map the last byte
  1893                      into STARTING_J.  */
  1894                   simple_translate[j] = starting_j;
  1895                   if (ch == starting_ch)
  1896                     break;
  1897                   BM_tab[j] = dirlen - i;
  1898                 }
  1899             }
  1900         }
  1901       else
  1902         {
  1903           j = *ptr;
  1904 
  1905           if (i == dirlen)
  1906             stride_for_teases = BM_tab[j];
  1907           BM_tab[j] = dirlen - i;
  1908         }
  1909       /* stride_for_teases tells how much to stride if we get a
  1910          match on the far character but are subsequently
  1911          disappointed, by recording what the stride would have been
  1912          for that character if the last character had been
  1913          different.  */
  1914     }
  1915   pos_byte += dirlen - ((direction > 0) ? direction : 0);
  1916   /* loop invariant - POS_BYTE points at where last char (first
  1917      char if reverse) of pattern would align in a possible match.  */
  1918   while (n != 0)
  1919     {
  1920       ptrdiff_t tail_end;
  1921       unsigned char *tail_end_ptr;
  1922 
  1923       /* It's been reported that some (broken) compiler thinks that
  1924          Boolean expressions in an arithmetic context are unsigned.
  1925          Using an explicit ?1:0 prevents this.  */
  1926       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
  1927           < 0)
  1928         return (n * (0 - direction));
  1929       /* First we do the part we can by pointers (maybe nothing) */
  1930       maybe_quit ();
  1931       pat = base_pat;
  1932       limit = pos_byte - dirlen + direction;
  1933       if (direction > 0)
  1934         {
  1935           limit = BUFFER_CEILING_OF (limit);
  1936           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
  1937              can take on without hitting edge of buffer or the gap.  */
  1938           limit = min (limit, pos_byte + 20000);
  1939           limit = min (limit, lim_byte - 1);
  1940         }
  1941       else
  1942         {
  1943           limit = BUFFER_FLOOR_OF (limit);
  1944           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
  1945              can take on without hitting edge of buffer or the gap.  */
  1946           limit = max (limit, pos_byte - 20000);
  1947           limit = max (limit, lim_byte);
  1948         }
  1949       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
  1950       tail_end_ptr = BYTE_POS_ADDR (tail_end);
  1951 
  1952       if ((limit - pos_byte) * direction > 20)
  1953         {
  1954           unsigned char *p2;
  1955 
  1956           p_limit = BYTE_POS_ADDR (limit);
  1957           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
  1958           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
  1959           while (1)             /* use one cursor setting as long as i can */
  1960             {
  1961               if (direction > 0) /* worth duplicating */
  1962                 {
  1963                   while (cursor <= p_limit)
  1964                     {
  1965                       if (BM_tab[*cursor] == 0)
  1966                         goto hit;
  1967                       cursor += BM_tab[*cursor];
  1968                     }
  1969                 }
  1970               else
  1971                 {
  1972                   while (cursor >= p_limit)
  1973                     {
  1974                       if (BM_tab[*cursor] == 0)
  1975                         goto hit;
  1976                       cursor += BM_tab[*cursor];
  1977                     }
  1978                 }
  1979               /* If you are here, cursor is beyond the end of the
  1980                  searched region.  You fail to match within the
  1981                  permitted region and would otherwise try a character
  1982                  beyond that region.  */
  1983               break;
  1984 
  1985             hit:
  1986               i = dirlen - direction;
  1987               if (! NILP (trt))
  1988                 {
  1989                   while ((i -= direction) + direction != 0)
  1990                     {
  1991                       int ch;
  1992                       cursor -= direction;
  1993                       /* Translate only the last byte of a character.  */
  1994                       if (! multibyte
  1995                           || ((cursor == tail_end_ptr
  1996                                || CHAR_HEAD_P (cursor[1]))
  1997                               && (CHAR_HEAD_P (cursor[0])
  1998                                   /* Check if this is the last byte of
  1999                                      a translatable character.  */
  2000                                   || (translate_prev_byte1 == cursor[-1]
  2001                                       && (CHAR_HEAD_P (translate_prev_byte1)
  2002                                           || (translate_prev_byte2 == cursor[-2]
  2003                                               && (CHAR_HEAD_P (translate_prev_byte2)
  2004                                                   || (translate_prev_byte3 == cursor[-3]))))))))
  2005                         ch = simple_translate[*cursor];
  2006                       else
  2007                         ch = *cursor;
  2008                       if (pat[i] != ch)
  2009                         break;
  2010                     }
  2011                 }
  2012               else
  2013                 {
  2014                   while ((i -= direction) + direction != 0)
  2015                     {
  2016                       cursor -= direction;
  2017                       if (pat[i] != *cursor)
  2018                         break;
  2019                     }
  2020                 }
  2021               cursor += dirlen - i - direction; /* fix cursor */
  2022               if (i + direction == 0)
  2023                 {
  2024                   ptrdiff_t position, start, end;
  2025 #ifdef REL_ALLOC
  2026                   ptrdiff_t cursor_off;
  2027 #endif
  2028 
  2029                   cursor -= direction;
  2030 
  2031                   position = pos_byte + cursor - p2 + ((direction > 0)
  2032                                                        ? 1 - len_byte : 0);
  2033 #ifdef REL_ALLOC
  2034                   /* set_search_regs might call malloc, which could
  2035                      cause ralloc.c relocate buffer text.  We need to
  2036                      update pointers into buffer text due to that.  */
  2037                   cursor_off = cursor - p2;
  2038 #endif
  2039                   set_search_regs (position, len_byte);
  2040 #ifdef REL_ALLOC
  2041                   p_limit = BYTE_POS_ADDR (limit);
  2042                   p2 = BYTE_POS_ADDR (pos_byte);
  2043                   cursor = p2 + cursor_off;
  2044 #endif
  2045 
  2046                   if (NILP (Vinhibit_changing_match_data))
  2047                     {
  2048                       start = search_regs.start[0];
  2049                       end = search_regs.end[0];
  2050                     }
  2051                   else
  2052                     /* If Vinhibit_changing_match_data is non-nil,
  2053                        search_regs will not be changed.  So let's
  2054                        compute start and end here.  */
  2055                     {
  2056                       start = BYTE_TO_CHAR (position);
  2057                       end = BYTE_TO_CHAR (position + len_byte);
  2058                     }
  2059 
  2060                   if ((n -= direction) != 0)
  2061                     cursor += dirlen; /* to resume search */
  2062                   else
  2063                     return direction > 0 ? end : start;
  2064                 }
  2065               else
  2066                 cursor += stride_for_teases; /* <sigh> we lose -  */
  2067             }
  2068           pos_byte += cursor - p2;
  2069         }
  2070       else
  2071         /* Now we'll pick up a clump that has to be done the hard
  2072            way because it covers a discontinuity.  */
  2073         {
  2074           limit = ((direction > 0)
  2075                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
  2076                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
  2077           limit = ((direction > 0)
  2078                    ? min (limit + len_byte, lim_byte - 1)
  2079                    : max (limit - len_byte, lim_byte));
  2080           /* LIMIT is now the last value POS_BYTE can have
  2081              and still be valid for a possible match.  */
  2082           while (1)
  2083             {
  2084               /* This loop can be coded for space rather than
  2085                  speed because it will usually run only once.
  2086                  (the reach is at most len + 21, and typically
  2087                  does not exceed len).  */
  2088               while ((limit - pos_byte) * direction >= 0)
  2089                 {
  2090                   int ch = FETCH_BYTE (pos_byte);
  2091                   if (BM_tab[ch] == 0)
  2092                     goto hit2;
  2093                   pos_byte += BM_tab[ch];
  2094                 }
  2095               break;    /* ran off the end */
  2096 
  2097             hit2:
  2098               /* Found what might be a match.  */
  2099               i = dirlen - direction;
  2100               while ((i -= direction) + direction != 0)
  2101                 {
  2102                   int ch;
  2103                   unsigned char *ptr;
  2104                   pos_byte -= direction;
  2105                   ptr = BYTE_POS_ADDR (pos_byte);
  2106                   /* Translate only the last byte of a character.  */
  2107                   if (! multibyte
  2108                       || ((ptr == tail_end_ptr
  2109                            || CHAR_HEAD_P (ptr[1]))
  2110                           && (CHAR_HEAD_P (ptr[0])
  2111                               /* Check if this is the last byte of a
  2112                                  translatable character.  */
  2113                               || (translate_prev_byte1 == ptr[-1]
  2114                                   && (CHAR_HEAD_P (translate_prev_byte1)
  2115                                       || (translate_prev_byte2 == ptr[-2]
  2116                                           && (CHAR_HEAD_P (translate_prev_byte2)
  2117                                               || translate_prev_byte3 == ptr[-3])))))))
  2118                     ch = simple_translate[*ptr];
  2119                   else
  2120                     ch = *ptr;
  2121                   if (pat[i] != ch)
  2122                     break;
  2123                 }
  2124               /* Above loop has moved POS_BYTE part or all the way
  2125                  back to the first pos (last pos if reverse).
  2126                  Set it once again at the last (first if reverse) char.  */
  2127               pos_byte += dirlen - i - direction;
  2128               if (i + direction == 0)
  2129                 {
  2130                   ptrdiff_t position, start, end;
  2131                   pos_byte -= direction;
  2132 
  2133                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
  2134                   set_search_regs (position, len_byte);
  2135 
  2136                   if (NILP (Vinhibit_changing_match_data))
  2137                     {
  2138                       start = search_regs.start[0];
  2139                       end = search_regs.end[0];
  2140                     }
  2141                   else
  2142                     /* If Vinhibit_changing_match_data is non-nil,
  2143                        search_regs will not be changed.  So let's
  2144                        compute start and end here.  */
  2145                     {
  2146                       start = BYTE_TO_CHAR (position);
  2147                       end = BYTE_TO_CHAR (position + len_byte);
  2148                     }
  2149 
  2150                   if ((n -= direction) != 0)
  2151                     pos_byte += dirlen; /* to resume search */
  2152                   else
  2153                     return direction > 0 ? end : start;
  2154                 }
  2155               else
  2156                 pos_byte += stride_for_teases;
  2157             }
  2158           }
  2159       /* We have done one clump.  Can we continue? */
  2160       if ((lim_byte - pos_byte) * direction < 0)
  2161         return ((0 - n) * direction);
  2162     }
  2163   return BYTE_TO_CHAR (pos_byte);
  2164 }
  2165 
  2166 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
  2167    for the overall match just found in the current buffer.
  2168    Also clear out the match data for registers 1 and up.  */
  2169 
  2170 static void
  2171 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
  2172 {
  2173   ptrdiff_t i;
  2174 
  2175   if (!NILP (Vinhibit_changing_match_data))
  2176     return;
  2177 
  2178   /* Make sure we have registers in which to store
  2179      the match position.  */
  2180   if (search_regs.num_regs == 0)
  2181     {
  2182       search_regs.start = xmalloc (2 * sizeof *search_regs.start);
  2183       search_regs.end = xmalloc (2 * sizeof *search_regs.end);
  2184       search_regs.num_regs = 2;
  2185     }
  2186 
  2187   /* Clear out the other registers.  */
  2188   for (i = 1; i < search_regs.num_regs; i++)
  2189     {
  2190       search_regs.start[i] = -1;
  2191       search_regs.end[i] = -1;
  2192     }
  2193 
  2194   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
  2195   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
  2196   XSETBUFFER (last_thing_searched, current_buffer);
  2197 }
  2198 
  2199 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
  2200        "MSearch backward: ",
  2201        doc: /* Search backward from point for STRING.
  2202 Set point to the beginning of the occurrence found, and return point.
  2203 An optional second argument bounds the search; it is a buffer position.
  2204   The match found must not begin before that position.  A value of nil
  2205   means search to the beginning of the accessible portion of the buffer.
  2206 Optional third argument, if t, means if fail just return nil (no error).
  2207   If not nil and not t, position at limit of search and return nil.
  2208 Optional fourth argument COUNT, if a positive number, means to search
  2209   for COUNT successive occurrences.  If COUNT is negative, search
  2210   forward, instead of backward, for -COUNT occurrences.  A value of
  2211   nil means the same as 1.
  2212 With COUNT positive, the match found is the COUNTth to last one (or
  2213   last, if COUNT is 1 or nil) in the buffer located entirely before
  2214   the origin of the search; correspondingly with COUNT negative.
  2215 
  2216 Search case-sensitivity is determined by the value of the variable
  2217 `case-fold-search', which see.
  2218 
  2219 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
  2220   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2221 {
  2222   return search_command (string, bound, noerror, count, -1, 0, 0);
  2223 }
  2224 
  2225 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
  2226        doc: /* Search forward from point for STRING.
  2227 Set point to the end of the occurrence found, and return point.
  2228 An optional second argument bounds the search; it is a buffer position.
  2229   The match found must not end after that position.  A value of nil
  2230   means search to the end of the accessible portion of the buffer.
  2231 Optional third argument, if t, means if fail just return nil (no error).
  2232   If not nil and not t, move to limit of search and return nil.
  2233 Optional fourth argument COUNT, if a positive number, means to search
  2234   for COUNT successive occurrences.  If COUNT is negative, search
  2235   backward, instead of forward, for -COUNT occurrences.  A value of
  2236   nil means the same as 1.
  2237 With COUNT positive, the match found is the COUNTth one (or first,
  2238   if COUNT is 1 or nil) in the buffer located entirely after the
  2239   origin of the search; correspondingly with COUNT negative.
  2240 
  2241 Search case-sensitivity is determined by the value of the variable
  2242 `case-fold-search', which see.
  2243 
  2244 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
  2245   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2246 {
  2247   return search_command (string, bound, noerror, count, 1, 0, 0);
  2248 }
  2249 
  2250 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
  2251        "sRE search backward: ",
  2252        doc: /* Search backward from point for regular expression REGEXP.
  2253 This function is almost identical to `re-search-forward', except that
  2254 by default it searches backward instead of forward, and the sign of
  2255 COUNT also indicates exactly the opposite searching direction.
  2256 See `re-search-forward' for details.
  2257 
  2258 Note that searching backwards may give a shorter match than expected,
  2259 because REGEXP is still matched in the forward direction.  See Info
  2260 anchor `(elisp) re-search-backward' for details.  */)
  2261   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2262 {
  2263   return search_command (regexp, bound, noerror, count, -1, 1, 0);
  2264 }
  2265 
  2266 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
  2267        "sRE search: ",
  2268        doc: /* Search forward from point for regular expression REGEXP.
  2269 Set point to the end of the occurrence found, and return point.
  2270 The optional second argument BOUND is a buffer position that bounds
  2271   the search.  The match found must not end after that position.  A
  2272   value of nil means search to the end of the accessible portion of
  2273   the buffer.
  2274 The optional third argument NOERROR indicates how errors are handled
  2275   when the search fails.  If it is nil or omitted, emit an error; if
  2276   it is t, simply return nil and do nothing; if it is neither nil nor
  2277   t, move to the limit of search and return nil.
  2278 The optional fourth argument COUNT is a number that indicates the
  2279   search direction and the number of occurrences to search for.  If it
  2280   is positive, search forward for COUNT successive occurrences; if it
  2281   is negative, search backward, instead of forward, for -COUNT
  2282   occurrences.  A value of nil means the same as 1.
  2283 With COUNT positive/negative, the match found is the COUNTth/-COUNTth
  2284   one in the buffer located entirely after/before the origin of the
  2285   search.
  2286 
  2287 Search case-sensitivity is determined by the value of the variable
  2288 `case-fold-search', which see.
  2289 
  2290 See also the functions `match-beginning', `match-end', `match-string',
  2291 and `replace-match'.  */)
  2292   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2293 {
  2294   return search_command (regexp, bound, noerror, count, 1, 1, 0);
  2295 }
  2296 
  2297 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
  2298        "sPosix search backward: ",
  2299        doc: /* Search backward from point for match for REGEXP according to Posix rules.
  2300 Find the longest match in accord with Posix regular expression rules.
  2301 Set point to the beginning of the occurrence found, and return point.
  2302 An optional second argument bounds the search; it is a buffer position.
  2303   The match found must not begin before that position.  A value of nil
  2304   means search to the beginning of the accessible portion of the buffer.
  2305 Optional third argument, if t, means if fail just return nil (no error).
  2306   If not nil and not t, position at limit of search and return nil.
  2307 Optional fourth argument COUNT, if a positive number, means to search
  2308   for COUNT successive occurrences.  If COUNT is negative, search
  2309   forward, instead of backward, for -COUNT occurrences.  A value of
  2310   nil means the same as 1.
  2311 With COUNT positive, the match found is the COUNTth to last one (or
  2312   last, if COUNT is 1 or nil) in the buffer located entirely before
  2313   the origin of the search; correspondingly with COUNT negative.
  2314 
  2315 Search case-sensitivity is determined by the value of the variable
  2316 `case-fold-search', which see.
  2317 
  2318 See also the functions `match-beginning', `match-end', `match-string',
  2319 and `replace-match'.  */)
  2320   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2321 {
  2322   return search_command (regexp, bound, noerror, count, -1, 1, 1);
  2323 }
  2324 
  2325 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
  2326        "sPosix search: ",
  2327        doc: /* Search forward from point for REGEXP according to Posix rules.
  2328 Find the longest match in accord with Posix regular expression rules.
  2329 Set point to the end of the occurrence found, and return point.
  2330 An optional second argument bounds the search; it is a buffer position.
  2331   The match found must not end after that position.  A value of nil
  2332   means search to the end of the accessible portion of the buffer.
  2333 Optional third argument, if t, means if fail just return nil (no error).
  2334   If not nil and not t, move to limit of search and return nil.
  2335 Optional fourth argument COUNT, if a positive number, means to search
  2336   for COUNT successive occurrences.  If COUNT is negative, search
  2337   backward, instead of forward, for -COUNT occurrences.  A value of
  2338   nil means the same as 1.
  2339 With COUNT positive, the match found is the COUNTth one (or first,
  2340   if COUNT is 1 or nil) in the buffer located entirely after the
  2341   origin of the search; correspondingly with COUNT negative.
  2342 
  2343 Search case-sensitivity is determined by the value of the variable
  2344 `case-fold-search', which see.
  2345 
  2346 See also the functions `match-beginning', `match-end', `match-string',
  2347 and `replace-match'.  */)
  2348   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2349 {
  2350   return search_command (regexp, bound, noerror, count, 1, 1, 1);
  2351 }
  2352 
  2353 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
  2354        doc: /* Replace text matched by last search with NEWTEXT.
  2355 Leave point at the end of the replacement text.
  2356 
  2357 If optional second arg FIXEDCASE is non-nil, do not alter the case of
  2358 the replacement text.  Otherwise, maybe capitalize the whole text, or
  2359 maybe just word initials, based on the replaced text.  If the replaced
  2360 text has only capital letters and has at least one multiletter word,
  2361 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
  2362 in the replaced text, capitalize each word in NEWTEXT.  Note that
  2363 what exactly is a word is determined by the syntax tables in effect
  2364 in the current buffer.
  2365 
  2366 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
  2367 Otherwise treat `\\' as special:
  2368   `\\&' in NEWTEXT means substitute original matched text.
  2369   `\\N' means substitute what matched the Nth `\\(...\\)'.
  2370        If Nth parens didn't match, substitute nothing.
  2371   `\\\\' means insert one `\\'.
  2372   `\\?' is treated literally
  2373        (for compatibility with `query-replace-regexp').
  2374   Any other character following `\\' signals an error.
  2375 Case conversion does not apply to these substitutions.
  2376 
  2377 If optional fourth argument STRING is non-nil, it should be a string
  2378 to act on; this should be the string on which the previous match was
  2379 done via `string-match'.  In this case, `replace-match' creates and
  2380 returns a new string, made by copying STRING and replacing the part of
  2381 STRING that was matched (the original STRING itself is not altered).
  2382 
  2383 The optional fifth argument SUBEXP specifies a subexpression;
  2384 it says to replace just that subexpression with NEWTEXT,
  2385 rather than replacing the entire matched text.
  2386 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
  2387 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
  2388 NEWTEXT in place of subexp N.
  2389 This is useful only after a regular expression search or match,
  2390 since only regular expressions have distinguished subexpressions.  */)
  2391   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
  2392 {
  2393   enum { nochange, all_caps, cap_initial } case_action;
  2394   ptrdiff_t pos, pos_byte;
  2395   bool some_multiletter_word;
  2396   bool some_lowercase;
  2397   bool some_uppercase;
  2398   bool some_nonuppercase_initial;
  2399   int c, prevc;
  2400   ptrdiff_t sub;
  2401   ptrdiff_t opoint, newpoint;
  2402 
  2403   CHECK_STRING (newtext);
  2404 
  2405   if (! NILP (string))
  2406     CHECK_STRING (string);
  2407 
  2408   /* Most replacement texts don't contain any backslash directives in
  2409      the replacements.  Check whether that's the case, which will
  2410      enable us to take the fast path later.  */
  2411   if (NILP (literal)
  2412       && !memchr (SSDATA (newtext), '\\', SBYTES (newtext)))
  2413     literal = Qt;
  2414 
  2415   case_action = nochange;       /* We tried an initialization */
  2416                                 /* but some C compilers blew it */
  2417 
  2418   ptrdiff_t num_regs = search_regs.num_regs;
  2419   if (num_regs <= 0)
  2420     error ("`replace-match' called before any match found");
  2421 
  2422   sub = !NILP (subexp) ? check_integer_range (subexp, 0, num_regs - 1) : 0;
  2423   ptrdiff_t sub_start = search_regs.start[sub];
  2424   ptrdiff_t sub_end = search_regs.end[sub];
  2425   eassert (sub_start <= sub_end);
  2426 
  2427   /* Check whether the text to replace is present in the buffer/string.  */
  2428   if (! (NILP (string)
  2429          ? BEGV <= sub_start && sub_end <= ZV
  2430          : 0 <= sub_start && sub_end <= SCHARS (string)))
  2431     {
  2432       if (sub_start < 0)
  2433         xsignal2 (Qerror,
  2434                   build_string ("replace-match subexpression does not exist"),
  2435                   subexp);
  2436       args_out_of_range (make_fixnum (sub_start), make_fixnum (sub_end));
  2437     }
  2438 
  2439   if (NILP (fixedcase))
  2440     {
  2441       /* Decide how to casify by examining the matched text. */
  2442       ptrdiff_t last;
  2443 
  2444       pos = sub_start;
  2445       last = sub_end;
  2446 
  2447       if (NILP (string))
  2448         pos_byte = CHAR_TO_BYTE (pos);
  2449       else
  2450         pos_byte = string_char_to_byte (string, pos);
  2451 
  2452       prevc = '\n';
  2453       case_action = all_caps;
  2454 
  2455       /* some_multiletter_word is set nonzero if any original word
  2456          is more than one letter long. */
  2457       some_multiletter_word = 0;
  2458       some_lowercase = 0;
  2459       some_nonuppercase_initial = 0;
  2460       some_uppercase = 0;
  2461 
  2462       while (pos < last)
  2463         {
  2464           if (NILP (string))
  2465             {
  2466               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
  2467               inc_both (&pos, &pos_byte);
  2468             }
  2469           else
  2470             c = fetch_string_char_as_multibyte_advance (string,
  2471                                                         &pos, &pos_byte);
  2472 
  2473           if (lowercasep (c))
  2474             {
  2475               /* Cannot be all caps if any original char is lower case */
  2476 
  2477               some_lowercase = 1;
  2478               if (SYNTAX (prevc) != Sword)
  2479                 some_nonuppercase_initial = 1;
  2480               else
  2481                 some_multiletter_word = 1;
  2482             }
  2483           else if (uppercasep (c))
  2484             {
  2485               some_uppercase = 1;
  2486               if (SYNTAX (prevc) != Sword)
  2487                 ;
  2488               else
  2489                 some_multiletter_word = 1;
  2490             }
  2491           else
  2492             {
  2493               /* If the initial is a caseless word constituent,
  2494                  treat that like a lowercase initial.  */
  2495               if (SYNTAX (prevc) != Sword)
  2496                 some_nonuppercase_initial = 1;
  2497             }
  2498 
  2499           prevc = c;
  2500         }
  2501 
  2502       /* Convert to all caps if the old text is all caps
  2503          and has at least one multiletter word.  */
  2504       if (! some_lowercase && some_multiletter_word)
  2505         case_action = all_caps;
  2506       /* Capitalize each word, if the old text has all capitalized words.  */
  2507       else if (!some_nonuppercase_initial && some_multiletter_word)
  2508         case_action = cap_initial;
  2509       else if (!some_nonuppercase_initial && some_uppercase)
  2510         /* Should x -> yz, operating on X, give Yz or YZ?
  2511            We'll assume the latter.  */
  2512         case_action = all_caps;
  2513       else
  2514         case_action = nochange;
  2515     }
  2516 
  2517   /* Do replacement in a string.  */
  2518   if (!NILP (string))
  2519     {
  2520       Lisp_Object before, after;
  2521 
  2522       before = Fsubstring (string, make_fixnum (0), make_fixnum (sub_start));
  2523       after = Fsubstring (string, make_fixnum (sub_end), Qnil);
  2524 
  2525       /* Substitute parts of the match into NEWTEXT
  2526          if desired.  */
  2527       if (NILP (literal))
  2528         {
  2529           ptrdiff_t lastpos = 0;
  2530           ptrdiff_t lastpos_byte = 0;
  2531           /* We build up the substituted string in ACCUM.  */
  2532           Lisp_Object accum;
  2533           Lisp_Object middle;
  2534           ptrdiff_t length = SBYTES (newtext);
  2535 
  2536           accum = Qnil;
  2537 
  2538           for (pos_byte = 0, pos = 0; pos_byte < length;)
  2539             {
  2540               ptrdiff_t substart = -1;
  2541               ptrdiff_t subend = 0;
  2542               bool delbackslash = 0;
  2543 
  2544               c = fetch_string_char_advance (newtext, &pos, &pos_byte);
  2545 
  2546               if (c == '\\')
  2547                 {
  2548                   c = fetch_string_char_advance (newtext, &pos, &pos_byte);
  2549 
  2550                   if (c == '&')
  2551                     {
  2552                       substart = sub_start;
  2553                       subend = sub_end;
  2554                     }
  2555                   else if (c >= '1' && c <= '9')
  2556                     {
  2557                       if (c - '0' < num_regs
  2558                           && search_regs.start[c - '0'] >= 0)
  2559                         {
  2560                           substart = search_regs.start[c - '0'];
  2561                           subend = search_regs.end[c - '0'];
  2562                         }
  2563                       else
  2564                         {
  2565                           /* If that subexp did not match,
  2566                              replace \\N with nothing.  */
  2567                           substart = 0;
  2568                           subend = 0;
  2569                         }
  2570                     }
  2571                   else if (c == '\\')
  2572                     delbackslash = 1;
  2573                   else if (c != '?')
  2574                     error ("Invalid use of `\\' in replacement text");
  2575                 }
  2576               if (substart >= 0)
  2577                 {
  2578                   if (pos - 2 != lastpos)
  2579                     middle = substring_both (newtext, lastpos,
  2580                                              lastpos_byte,
  2581                                              pos - 2, pos_byte - 2);
  2582                   else
  2583                     middle = Qnil;
  2584                   accum = concat3 (accum, middle,
  2585                                    Fsubstring (string,
  2586                                                make_fixnum (substart),
  2587                                                make_fixnum (subend)));
  2588                   lastpos = pos;
  2589                   lastpos_byte = pos_byte;
  2590                 }
  2591               else if (delbackslash)
  2592                 {
  2593                   middle = substring_both (newtext, lastpos,
  2594                                            lastpos_byte,
  2595                                            pos - 1, pos_byte - 1);
  2596 
  2597                   accum = concat2 (accum, middle);
  2598                   lastpos = pos;
  2599                   lastpos_byte = pos_byte;
  2600                 }
  2601             }
  2602 
  2603           if (pos != lastpos)
  2604             middle = substring_both (newtext, lastpos,
  2605                                      lastpos_byte,
  2606                                      pos, pos_byte);
  2607           else
  2608             middle = Qnil;
  2609 
  2610           newtext = concat2 (accum, middle);
  2611         }
  2612 
  2613       /* Do case substitution in NEWTEXT if desired.  */
  2614       if (case_action == all_caps)
  2615         newtext = Fupcase (newtext);
  2616       else if (case_action == cap_initial)
  2617         newtext = Fupcase_initials (newtext);
  2618 
  2619       return concat3 (before, newtext, after);
  2620     }
  2621 
  2622   /* Record point.  A nonpositive OPOINT is actually an offset from ZV.  */
  2623   opoint = PT <= sub_start ? PT : max (PT, sub_end) - ZV;
  2624 
  2625   /* If we want non-literal replacement,
  2626      perform substitution on the replacement string.  */
  2627   if (NILP (literal))
  2628     {
  2629       ptrdiff_t length = SBYTES (newtext);
  2630       unsigned char *substed;
  2631       ptrdiff_t substed_alloc_size, substed_len;
  2632       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
  2633       bool str_multibyte = STRING_MULTIBYTE (newtext);
  2634       bool really_changed = 0;
  2635 
  2636       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
  2637                             ? length * 2 + 100
  2638                             : STRING_BYTES_BOUND);
  2639       substed = xmalloc (substed_alloc_size);
  2640       substed_len = 0;
  2641 
  2642       /* Go thru NEWTEXT, producing the actual text to insert in
  2643          SUBSTED while adjusting multibyteness to that of the current
  2644          buffer.  */
  2645 
  2646       for (pos_byte = 0, pos = 0; pos_byte < length;)
  2647         {
  2648           unsigned char str[MAX_MULTIBYTE_LENGTH];
  2649           const unsigned char *add_stuff = NULL;
  2650           ptrdiff_t add_len = 0;
  2651           ptrdiff_t idx = -1;
  2652           ptrdiff_t begbyte UNINIT;
  2653 
  2654           if (str_multibyte)
  2655             {
  2656               c = fetch_string_char_advance_no_check (newtext,
  2657                                                       &pos, &pos_byte);
  2658               if (!buf_multibyte)
  2659                 c = CHAR_TO_BYTE8 (c);
  2660             }
  2661           else
  2662             {
  2663               /* Note that we don't have to increment POS.  */
  2664               c = SREF (newtext, pos_byte++);
  2665               if (buf_multibyte)
  2666                 c = make_char_multibyte (c);
  2667             }
  2668 
  2669           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
  2670              or set IDX to a match index, which means put that part
  2671              of the buffer text into SUBSTED.  */
  2672 
  2673           if (c == '\\')
  2674             {
  2675               really_changed = 1;
  2676 
  2677               if (str_multibyte)
  2678                 {
  2679                   c = fetch_string_char_advance_no_check (newtext,
  2680                                                           &pos, &pos_byte);
  2681                   if (!buf_multibyte && !ASCII_CHAR_P (c))
  2682                     c = CHAR_TO_BYTE8 (c);
  2683                 }
  2684               else
  2685                 {
  2686                   c = SREF (newtext, pos_byte++);
  2687                   if (buf_multibyte)
  2688                     c = make_char_multibyte (c);
  2689                 }
  2690 
  2691               if (c == '&')
  2692                 idx = sub;
  2693               else if ('1' <= c && c <= '9' && c - '0' < num_regs)
  2694                 {
  2695                   if (search_regs.start[c - '0'] >= 1)
  2696                     idx = c - '0';
  2697                 }
  2698               else if (c == '\\')
  2699                 add_len = 1, add_stuff = (unsigned char *) "\\";
  2700               else
  2701                 {
  2702                   xfree (substed);
  2703                   error ("Invalid use of `\\' in replacement text");
  2704                 }
  2705             }
  2706           else
  2707             {
  2708               add_len = CHAR_STRING (c, str);
  2709               add_stuff = str;
  2710             }
  2711 
  2712           /* If we want to copy part of a previous match,
  2713              set up ADD_STUFF and ADD_LEN to point to it.  */
  2714           if (idx >= 0)
  2715             {
  2716               begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
  2717               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
  2718               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
  2719                 move_gap_both (search_regs.start[idx], begbyte);
  2720             }
  2721 
  2722           /* Now the stuff we want to add to SUBSTED
  2723              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
  2724 
  2725           /* Make sure SUBSTED is big enough.  */
  2726           if (substed_alloc_size - substed_len < add_len)
  2727             substed =
  2728               xpalloc (substed, &substed_alloc_size,
  2729                        add_len - (substed_alloc_size - substed_len),
  2730                        STRING_BYTES_BOUND, 1);
  2731 
  2732           /* We compute this after the call to xpalloc, because that
  2733              could cause buffer text be relocated when ralloc.c is used.  */
  2734           if (idx >= 0)
  2735             add_stuff = BYTE_POS_ADDR (begbyte);
  2736 
  2737           /* Now add to the end of SUBSTED.  */
  2738           if (add_stuff)
  2739             {
  2740               memcpy (substed + substed_len, add_stuff, add_len);
  2741               substed_len += add_len;
  2742             }
  2743         }
  2744 
  2745       if (really_changed)
  2746         newtext = make_specified_string ((const char *) substed, -1,
  2747                                          substed_len, buf_multibyte);
  2748       xfree (substed);
  2749     }
  2750 
  2751   newpoint = sub_start + SCHARS (newtext);
  2752 
  2753   /* Replace the old text with the new in the cleanest possible way.  */
  2754   replace_range (sub_start, sub_end, newtext, 1, 0, 1, true, true);
  2755 
  2756   if (case_action == all_caps)
  2757     Fupcase_region (make_fixnum (search_regs.start[sub]),
  2758                     make_fixnum (newpoint),
  2759                     Qnil);
  2760   else if (case_action == cap_initial)
  2761     Fupcase_initials_region (make_fixnum (search_regs.start[sub]),
  2762                              make_fixnum (newpoint), Qnil);
  2763 
  2764   /* The replace_range etc. functions can trigger modification hooks
  2765      (see signal_before_change and signal_after_change).  Try to error
  2766      out if these hooks clobber the match data since clobbering can
  2767      result in confusing bugs.  We used to check for changes in
  2768      search_regs start and end, but that fails if modification hooks
  2769      remove or add text earlier in the buffer, so just check num_regs
  2770      now. */
  2771   if (search_regs.num_regs != num_regs)
  2772     error ("Match data clobbered by buffer modification hooks");
  2773 
  2774   /* Put point back where it was in the text, if possible.  */
  2775   TEMP_SET_PT (clip_to_bounds (BEGV, opoint + (opoint <= 0 ? ZV : 0), ZV));
  2776   /* Now move point "officially" to the end of the inserted replacement.  */
  2777   move_if_not_intangible (newpoint);
  2778 
  2779   signal_after_change (sub_start, sub_end - sub_start, SCHARS (newtext));
  2780   update_compositions (sub_start, newpoint, CHECK_BORDER);
  2781 
  2782   return Qnil;
  2783 }
  2784 
  2785 static Lisp_Object
  2786 match_limit (Lisp_Object num, bool beginningp)
  2787 {
  2788   EMACS_INT n;
  2789 
  2790   CHECK_FIXNUM (num);
  2791   n = XFIXNUM (num);
  2792   if (n < 0)
  2793     args_out_of_range (num, make_fixnum (0));
  2794   if (search_regs.num_regs <= 0)
  2795     error ("No match data, because no search succeeded");
  2796   if (n >= search_regs.num_regs
  2797       || search_regs.start[n] < 0)
  2798     return Qnil;
  2799   return (make_fixnum ((beginningp) ? search_regs.start[n]
  2800                                     : search_regs.end[n]));
  2801 }
  2802 
  2803 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
  2804        doc: /* Return position of start of text matched by last search.
  2805 SUBEXP, a number, specifies which parenthesized expression in the last
  2806   regexp.
  2807 Value is nil if SUBEXPth pair didn't match, or there were less than
  2808   SUBEXP pairs.
  2809 Zero means the entire text matched by the whole regexp or whole string.
  2810 
  2811 Return value is undefined if the last search failed.  */)
  2812   (Lisp_Object subexp)
  2813 {
  2814   return match_limit (subexp, 1);
  2815 }
  2816 
  2817 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
  2818        doc: /* Return position of end of text matched by last search.
  2819 SUBEXP, a number, specifies which parenthesized expression in the last
  2820   regexp.
  2821 Value is nil if SUBEXPth pair didn't match, or there were less than
  2822   SUBEXP pairs.
  2823 Zero means the entire text matched by the whole regexp or whole string.
  2824 
  2825 Return value is undefined if the last search failed.  */)
  2826   (Lisp_Object subexp)
  2827 {
  2828   return match_limit (subexp, 0);
  2829 }
  2830 
  2831 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
  2832        doc: /* Return a list of positions that record text matched by the last search.
  2833 Element 2N of the returned list is the position of the beginning of the
  2834 match of the Nth subexpression; it corresponds to `(match-beginning N)';
  2835 element 2N + 1 is the position of the end of the match of the Nth
  2836 subexpression; it corresponds to `(match-end N)'.  See `match-beginning'
  2837 and `match-end'.
  2838 If the last search was on a buffer, all the elements are by default
  2839 markers or nil (nil when the Nth pair didn't match); they are integers
  2840 or nil if the search was on a string.  But if the optional argument
  2841 INTEGERS is non-nil, the elements that represent buffer positions are
  2842 always integers, not markers, and (if the search was on a buffer) the
  2843 buffer itself is appended to the list as one additional element.
  2844 
  2845 Use `set-match-data' to reinstate the match data from the elements of
  2846 this list.
  2847 
  2848 Note that non-matching optional groups at the end of the regexp are
  2849 elided instead of being represented with two `nil's each.  For instance:
  2850 
  2851   (progn
  2852     (string-match "^\\(a\\)?\\(b\\)\\(c\\)?$" "b")
  2853     (match-data))
  2854   => (0 1 nil nil 0 1)
  2855 
  2856 If REUSE is a list, store the value in REUSE by destructively modifying it.
  2857 If REUSE is long enough to hold all the values, its length remains the
  2858 same, and any unused elements are set to nil.  If REUSE is not long
  2859 enough, it is extended.  Note that if REUSE is long enough and INTEGERS
  2860 is non-nil, no consing is done to make the return value; this minimizes GC.
  2861 
  2862 If optional third argument RESEAT is non-nil, any previous markers on the
  2863 REUSE list will be modified to point to nowhere.
  2864 
  2865 Return value is undefined if the last search failed.  */)
  2866   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
  2867 {
  2868   Lisp_Object tail, prev;
  2869   Lisp_Object *data;
  2870   ptrdiff_t i, len;
  2871 
  2872   if (!NILP (reseat))
  2873     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
  2874       if (MARKERP (XCAR (tail)))
  2875         {
  2876           unchain_marker (XMARKER (XCAR (tail)));
  2877           XSETCAR (tail, Qnil);
  2878         }
  2879 
  2880   if (NILP (last_thing_searched))
  2881     return Qnil;
  2882 
  2883   prev = Qnil;
  2884 
  2885   USE_SAFE_ALLOCA;
  2886   SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
  2887 
  2888   len = 0;
  2889   for (i = 0; i < search_regs.num_regs; i++)
  2890     {
  2891       ptrdiff_t start = search_regs.start[i];
  2892       if (start >= 0)
  2893         {
  2894           if (EQ (last_thing_searched, Qt)
  2895               || ! NILP (integers))
  2896             {
  2897               XSETFASTINT (data[2 * i], start);
  2898               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
  2899             }
  2900           else if (BUFFERP (last_thing_searched))
  2901             {
  2902               data[2 * i] = Fmake_marker ();
  2903               Fset_marker (data[2 * i],
  2904                            make_fixnum (start),
  2905                            last_thing_searched);
  2906               data[2 * i + 1] = Fmake_marker ();
  2907               Fset_marker (data[2 * i + 1],
  2908                            make_fixnum (search_regs.end[i]),
  2909                            last_thing_searched);
  2910             }
  2911           else
  2912             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
  2913             emacs_abort ();
  2914 
  2915           len = 2 * i + 2;
  2916         }
  2917       else
  2918         data[2 * i] = data[2 * i + 1] = Qnil;
  2919     }
  2920 
  2921   if (BUFFERP (last_thing_searched) && !NILP (integers))
  2922     {
  2923       data[len] = last_thing_searched;
  2924       len++;
  2925     }
  2926 
  2927   /* If REUSE is not usable, cons up the values and return them.  */
  2928   if (! CONSP (reuse))
  2929     reuse = Flist (len, data);
  2930   else
  2931     {
  2932       /* If REUSE is a list, store as many value elements as will fit
  2933          into the elements of REUSE.  */
  2934       for (i = 0, tail = reuse; CONSP (tail);
  2935            i++, tail = XCDR (tail))
  2936         {
  2937           if (i < len)
  2938             XSETCAR (tail, data[i]);
  2939           else
  2940             XSETCAR (tail, Qnil);
  2941           prev = tail;
  2942         }
  2943 
  2944       /* If we couldn't fit all value elements into REUSE,
  2945          cons up the rest of them and add them to the end of REUSE.  */
  2946       if (i < len)
  2947         XSETCDR (prev, Flist (len - i, data + i));
  2948     }
  2949 
  2950   SAFE_FREE ();
  2951   return reuse;
  2952 }
  2953 
  2954 /* We used to have an internal use variant of `reseat' described as:
  2955 
  2956       If RESEAT is `evaporate', put the markers back on the free list
  2957       immediately.  No other references to the markers must exist in this
  2958       case, so it is used only internally on the unwind stack and
  2959       save-match-data from Lisp.
  2960 
  2961    But it was ill-conceived: those supposedly-internal markers get exposed via
  2962    the undo-list, so freeing them here is unsafe.  */
  2963 
  2964 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
  2965        doc: /* Set internal data on last search match from elements of LIST.
  2966 LIST should have been created by calling `match-data' previously.
  2967 
  2968 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
  2969   (register Lisp_Object list, Lisp_Object reseat)
  2970 {
  2971   ptrdiff_t i;
  2972   register Lisp_Object marker;
  2973 
  2974   if (running_asynch_code)
  2975     save_search_regs ();
  2976 
  2977   CHECK_LIST (list);
  2978 
  2979   /* Unless we find a marker with a buffer or an explicit buffer
  2980      in LIST, assume that this match data came from a string.  */
  2981   last_thing_searched = Qt;
  2982 
  2983   /* Allocate registers if they don't already exist.  */
  2984   {
  2985     ptrdiff_t length = list_length (list) / 2;
  2986 
  2987     if (length > search_regs.num_regs)
  2988       {
  2989         ptrdiff_t num_regs = search_regs.num_regs;
  2990         search_regs.start =
  2991           xpalloc (search_regs.start, &num_regs, length - num_regs,
  2992                    min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start);
  2993         search_regs.end =
  2994           xrealloc (search_regs.end, num_regs * sizeof *search_regs.end);
  2995 
  2996         for (i = search_regs.num_regs; i < num_regs; i++)
  2997           search_regs.start[i] = -1;
  2998 
  2999         search_regs.num_regs = num_regs;
  3000       }
  3001 
  3002     for (i = 0; CONSP (list); i++)
  3003       {
  3004         marker = XCAR (list);
  3005         if (BUFFERP (marker))
  3006           {
  3007             last_thing_searched = marker;
  3008             break;
  3009           }
  3010         if (i >= length)
  3011           break;
  3012         if (NILP (marker))
  3013           {
  3014             search_regs.start[i] = -1;
  3015             list = XCDR (list);
  3016           }
  3017         else
  3018           {
  3019             Lisp_Object from;
  3020             Lisp_Object m;
  3021 
  3022             m = marker;
  3023             if (MARKERP (marker))
  3024               {
  3025                 if (XMARKER (marker)->buffer == 0)
  3026                   XSETFASTINT (marker, 0);
  3027                 else
  3028                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
  3029               }
  3030 
  3031             CHECK_FIXNUM_COERCE_MARKER (marker);
  3032             from = marker;
  3033 
  3034             if (!NILP (reseat) && MARKERP (m))
  3035               {
  3036                 unchain_marker (XMARKER (m));
  3037                 XSETCAR (list, Qnil);
  3038               }
  3039 
  3040             if ((list = XCDR (list), !CONSP (list)))
  3041               break;
  3042 
  3043             m = marker = XCAR (list);
  3044 
  3045             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
  3046               XSETFASTINT (marker, 0);
  3047 
  3048             CHECK_FIXNUM_COERCE_MARKER (marker);
  3049             if (PTRDIFF_MIN <= XFIXNUM (from) && XFIXNUM (from) <= PTRDIFF_MAX
  3050                 && PTRDIFF_MIN <= XFIXNUM (marker)
  3051                 && XFIXNUM (marker) <= PTRDIFF_MAX)
  3052               {
  3053                 search_regs.start[i] = XFIXNUM (from);
  3054                 search_regs.end[i] = XFIXNUM (marker);
  3055               }
  3056             else
  3057               {
  3058                 search_regs.start[i] = -1;
  3059               }
  3060 
  3061             if (!NILP (reseat) && MARKERP (m))
  3062               {
  3063                 unchain_marker (XMARKER (m));
  3064                 XSETCAR (list, Qnil);
  3065               }
  3066           }
  3067         list = XCDR (list);
  3068       }
  3069 
  3070     for (; i < search_regs.num_regs; i++)
  3071       search_regs.start[i] = -1;
  3072   }
  3073 
  3074   return Qnil;
  3075 }
  3076 
  3077 DEFUN ("match-data--translate", Fmatch_data__translate, Smatch_data__translate,
  3078        1, 1, 0,
  3079        doc: /* Add N to all positions in the match data.  Internal.  */)
  3080   (Lisp_Object n)
  3081 {
  3082   CHECK_FIXNUM (n);
  3083   EMACS_INT delta = XFIXNUM (n);
  3084   if (!NILP (last_thing_searched))
  3085     for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  3086       if (search_regs.start[i] >= 0)
  3087         {
  3088           search_regs.start[i] = max (0, search_regs.start[i] + delta);
  3089           search_regs.end[i] = max (0, search_regs.end[i] + delta);
  3090         }
  3091   return Qnil;
  3092 }
  3093 
  3094 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
  3095    if asynchronous code (filter or sentinel) is running. */
  3096 static void
  3097 save_search_regs (void)
  3098 {
  3099   if (saved_search_regs.num_regs == 0)
  3100     {
  3101       saved_search_regs = search_regs;
  3102       saved_last_thing_searched = last_thing_searched;
  3103       last_thing_searched = Qnil;
  3104       search_regs.num_regs = 0;
  3105       search_regs.start = 0;
  3106       search_regs.end = 0;
  3107     }
  3108 }
  3109 
  3110 /* Called upon exit from filters and sentinels. */
  3111 void
  3112 restore_search_regs (void)
  3113 {
  3114   if (saved_search_regs.num_regs != 0)
  3115     {
  3116       if (search_regs.num_regs > 0)
  3117         {
  3118           xfree (search_regs.start);
  3119           xfree (search_regs.end);
  3120         }
  3121       search_regs = saved_search_regs;
  3122       last_thing_searched = saved_last_thing_searched;
  3123       saved_last_thing_searched = Qnil;
  3124       saved_search_regs.num_regs = 0;
  3125     }
  3126 }
  3127 
  3128 /* Called from replace-match via replace_range.  */
  3129 void
  3130 update_search_regs (ptrdiff_t oldstart, ptrdiff_t oldend, ptrdiff_t newend)
  3131 {
  3132   /* Adjust search data for this change.  */
  3133   ptrdiff_t change = newend - oldend;
  3134   ptrdiff_t i;
  3135 
  3136   for (i = 0; i < search_regs.num_regs; i++)
  3137     {
  3138       if (search_regs.start[i] >= oldend)
  3139         search_regs.start[i] += change;
  3140       else if (search_regs.start[i] > oldstart)
  3141         search_regs.start[i] = oldstart;
  3142       if (search_regs.end[i] >= oldend)
  3143         search_regs.end[i] += change;
  3144       else if (search_regs.end[i] > oldstart)
  3145         search_regs.end[i] = oldstart;
  3146     }
  3147 }
  3148 
  3149 static void
  3150 unwind_set_match_data (Lisp_Object list)
  3151 {
  3152   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
  3153   Fset_match_data (list, Qt);
  3154 }
  3155 
  3156 /* Called to unwind protect the match data.  */
  3157 void
  3158 record_unwind_save_match_data (void)
  3159 {
  3160   record_unwind_protect (unwind_set_match_data,
  3161                          Fmatch_data (Qnil, Qnil, Qnil));
  3162 }
  3163 
  3164 /* Quote a string to deactivate reg-expr chars */
  3165 
  3166 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
  3167        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
  3168   (Lisp_Object string)
  3169 {
  3170   char *in, *out, *end;
  3171   char *temp;
  3172   ptrdiff_t backslashes_added = 0;
  3173 
  3174   CHECK_STRING (string);
  3175 
  3176   USE_SAFE_ALLOCA;
  3177   SAFE_NALLOCA (temp, 2, SBYTES (string));
  3178 
  3179   /* Now copy the data into the new string, inserting escapes. */
  3180 
  3181   in = SSDATA (string);
  3182   end = in + SBYTES (string);
  3183   out = temp;
  3184 
  3185   for (; in != end; in++)
  3186     {
  3187       if (*in == '['
  3188           || *in == '*' || *in == '.' || *in == '\\'
  3189           || *in == '?' || *in == '+'
  3190           || *in == '^' || *in == '$')
  3191         *out++ = '\\', backslashes_added++;
  3192       *out++ = *in;
  3193     }
  3194 
  3195   Lisp_Object result
  3196     = (backslashes_added > 0
  3197        ? make_specified_string (temp,
  3198                                 SCHARS (string) + backslashes_added,
  3199                                 out - temp,
  3200                                 STRING_MULTIBYTE (string))
  3201        : string);
  3202   SAFE_FREE ();
  3203   return result;
  3204 }
  3205 
  3206 /* Like find_newline, but doesn't use the cache, and only searches forward.  */
  3207 ptrdiff_t
  3208 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
  3209                ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
  3210                ptrdiff_t *bytepos, bool allow_quit)
  3211 {
  3212   if (count > 0)
  3213     {
  3214       if (!end)
  3215         end = ZV, end_byte = ZV_BYTE;
  3216     }
  3217   else
  3218     {
  3219       if (!end)
  3220         end = BEGV, end_byte = BEGV_BYTE;
  3221     }
  3222   if (end_byte == -1)
  3223     end_byte = CHAR_TO_BYTE (end);
  3224 
  3225   if (counted)
  3226     *counted = count;
  3227 
  3228   if (count > 0)
  3229     while (start != end)
  3230       {
  3231         /* Our innermost scanning loop is very simple; it doesn't know
  3232            about gaps, buffer ends, or the newline cache.  ceiling is
  3233            the position of the last character before the next such
  3234            obstacle --- the last character the dumb search loop should
  3235            examine.  */
  3236         ptrdiff_t tem, ceiling_byte = end_byte - 1;
  3237 
  3238         if (start_byte == -1)
  3239           start_byte = CHAR_TO_BYTE (start);
  3240 
  3241         /* The dumb loop can only scan text stored in contiguous
  3242            bytes. BUFFER_CEILING_OF returns the last character
  3243            position that is contiguous, so the ceiling is the
  3244            position after that.  */
  3245         tem = BUFFER_CEILING_OF (start_byte);
  3246         ceiling_byte = min (tem, ceiling_byte);
  3247 
  3248         {
  3249           /* The termination address of the dumb loop.  */
  3250           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
  3251           ptrdiff_t lim_byte = ceiling_byte + 1;
  3252 
  3253           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
  3254              of the base, the cursor, and the next line.  */
  3255           ptrdiff_t base = start_byte - lim_byte;
  3256           ptrdiff_t cursor, next;
  3257 
  3258           for (cursor = base; cursor < 0; cursor = next)
  3259             {
  3260               /* The dumb loop.  */
  3261               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
  3262               next = nl ? nl - lim_addr : 0;
  3263 
  3264               if (! nl)
  3265                 break;
  3266               next++;
  3267 
  3268               if (--count == 0)
  3269                 {
  3270                   if (bytepos)
  3271                     *bytepos = lim_byte + next;
  3272                   return BYTE_TO_CHAR (lim_byte + next);
  3273                 }
  3274               if (allow_quit)
  3275                 maybe_quit ();
  3276             }
  3277 
  3278           start_byte = lim_byte;
  3279           start = BYTE_TO_CHAR (start_byte);
  3280         }
  3281       }
  3282 
  3283   if (counted)
  3284     *counted -= count;
  3285   if (bytepos)
  3286     {
  3287       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
  3288       eassert (*bytepos == CHAR_TO_BYTE (start));
  3289     }
  3290   return start;
  3291 }
  3292 
  3293 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
  3294        0, 1, 0,
  3295        doc: /* Check the newline cache of BUFFER against buffer contents.
  3296 
  3297 BUFFER defaults to the current buffer.
  3298 
  3299 Value is an array of 2 sub-arrays of buffer positions for newlines,
  3300 the first based on the cache, the second based on actually scanning
  3301 the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
  3302   (Lisp_Object buffer)
  3303 {
  3304   struct buffer *buf, *old = NULL;
  3305   ptrdiff_t nl_count_cache, nl_count_buf;
  3306   Lisp_Object cache_newlines, buf_newlines, val;
  3307   ptrdiff_t from, found, i;
  3308 
  3309   if (NILP (buffer))
  3310     buf = current_buffer;
  3311   else
  3312     {
  3313       CHECK_BUFFER (buffer);
  3314       buf = XBUFFER (buffer);
  3315       old = current_buffer;
  3316     }
  3317   if (buf->base_buffer)
  3318     buf = buf->base_buffer;
  3319 
  3320   /* If the buffer doesn't have a newline cache, return nil.  */
  3321   if (NILP (BVAR (buf, cache_long_scans))
  3322       || buf->newline_cache == NULL)
  3323     return Qnil;
  3324 
  3325   /* find_newline can only work on the current buffer.  */
  3326   if (old != NULL)
  3327     set_buffer_internal_1 (buf);
  3328 
  3329   /* How many newlines are there according to the cache?  */
  3330   find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
  3331                 TYPE_MAXIMUM (ptrdiff_t), &nl_count_cache, NULL, true);
  3332 
  3333   /* Create vector and populate it.  */
  3334   cache_newlines = make_vector (nl_count_cache, make_fixnum (-1));
  3335 
  3336   if (nl_count_cache)
  3337     {
  3338       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
  3339         {
  3340           ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
  3341 
  3342           found = find_newline (from, from_byte, 0, -1, 1, &counted,
  3343                                 NULL, true);
  3344           if (counted == 0 || i >= nl_count_cache)
  3345             break;
  3346           ASET (cache_newlines, i, make_fixnum (found - 1));
  3347         }
  3348     }
  3349 
  3350   /* Now do the same, but without using the cache.  */
  3351   find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
  3352                  TYPE_MAXIMUM (ptrdiff_t), &nl_count_buf, NULL, true);
  3353   buf_newlines = make_vector (nl_count_buf, make_fixnum (-1));
  3354   if (nl_count_buf)
  3355     {
  3356       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
  3357         {
  3358           ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
  3359 
  3360           found = find_newline1 (from, from_byte, 0, -1, 1, &counted,
  3361                                  NULL, true);
  3362           if (counted == 0 || i >= nl_count_buf)
  3363             break;
  3364           ASET (buf_newlines, i, make_fixnum (found - 1));
  3365         }
  3366     }
  3367 
  3368   /* Construct the value and return it.  */
  3369   val = CALLN (Fvector, cache_newlines, buf_newlines);
  3370 
  3371   if (old != NULL)
  3372     set_buffer_internal_1 (old);
  3373   return val;
  3374 }
  3375 
  3376 
  3377 static void syms_of_search_for_pdumper (void);
  3378 
  3379 void
  3380 syms_of_search (void)
  3381 {
  3382   for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
  3383     {
  3384       staticpro (&searchbufs[i].regexp);
  3385       staticpro (&searchbufs[i].f_whitespace_regexp);
  3386       staticpro (&searchbufs[i].syntax_table);
  3387     }
  3388 
  3389   /* Error condition used for failing searches.  */
  3390   DEFSYM (Qsearch_failed, "search-failed");
  3391 
  3392   /* Error condition used for failing searches started by user, i.e.,
  3393      where failure should not invoke the debugger.  */
  3394   DEFSYM (Quser_search_failed, "user-search-failed");
  3395 
  3396   /* Error condition signaled when regexp compile_pattern fails.  */
  3397   DEFSYM (Qinvalid_regexp, "invalid-regexp");
  3398 
  3399   Fput (Qsearch_failed, Qerror_conditions,
  3400         pure_list (Qsearch_failed, Qerror));
  3401   Fput (Qsearch_failed, Qerror_message,
  3402         build_pure_c_string ("Search failed"));
  3403 
  3404   Fput (Quser_search_failed, Qerror_conditions,
  3405         pure_list (Quser_search_failed, Quser_error, Qsearch_failed, Qerror));
  3406   Fput (Quser_search_failed, Qerror_message,
  3407         build_pure_c_string ("Search failed"));
  3408 
  3409   Fput (Qinvalid_regexp, Qerror_conditions,
  3410         pure_list (Qinvalid_regexp, Qerror));
  3411   Fput (Qinvalid_regexp, Qerror_message,
  3412         build_pure_c_string ("Invalid regexp"));
  3413 
  3414   re_match_object = Qnil;
  3415   staticpro (&re_match_object);
  3416 
  3417   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
  3418       doc: /* Regexp to substitute for bunches of spaces in regexp search.
  3419 Some commands use this for user-specified regexps.
  3420 Spaces that occur inside character classes or repetition operators
  3421 or other such regexp constructs are not replaced with this.
  3422 A value of nil (which is the normal value) means treat spaces
  3423 literally.  Note that a value with capturing groups can change the
  3424 numbering of existing capture groups in unexpected ways.  */);
  3425   Vsearch_spaces_regexp = Qnil;
  3426 
  3427   DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data");
  3428   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
  3429       doc: /* Internal use only.
  3430 If non-nil, the primitive searching and matching functions
  3431 such as `looking-at', `string-match', `re-search-forward', etc.,
  3432 do not set the match data.  The proper way to use this variable
  3433 is to bind it with `let' around a small expression.  */);
  3434   Vinhibit_changing_match_data = Qnil;
  3435 
  3436   defsubr (&Slooking_at);
  3437   defsubr (&Sposix_looking_at);
  3438   defsubr (&Sstring_match);
  3439   defsubr (&Sposix_string_match);
  3440   defsubr (&Ssearch_forward);
  3441   defsubr (&Ssearch_backward);
  3442   defsubr (&Sre_search_forward);
  3443   defsubr (&Sre_search_backward);
  3444   defsubr (&Sposix_search_forward);
  3445   defsubr (&Sposix_search_backward);
  3446   defsubr (&Sreplace_match);
  3447   defsubr (&Smatch_beginning);
  3448   defsubr (&Smatch_end);
  3449   defsubr (&Smatch_data);
  3450   defsubr (&Sset_match_data);
  3451   defsubr (&Smatch_data__translate);
  3452   defsubr (&Sregexp_quote);
  3453   defsubr (&Snewline_cache_check);
  3454 
  3455   pdumper_do_now_and_after_load (syms_of_search_for_pdumper);
  3456 }
  3457 
  3458 static void
  3459 syms_of_search_for_pdumper (void)
  3460 {
  3461   for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
  3462     {
  3463       searchbufs[i].buf.allocated = 100;
  3464       searchbufs[i].buf.buffer = xmalloc (100);
  3465       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
  3466       searchbufs[i].regexp = Qnil;
  3467       searchbufs[i].f_whitespace_regexp = Qnil;
  3468       searchbufs[i].busy = false;
  3469       searchbufs[i].syntax_table = Qnil;
  3470       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
  3471     }
  3472   searchbuf_head = &searchbufs[0];
  3473 }
/* [<][>][^][v][top][bottom][index][help] */
root/src/search.c

DEFINITIONS