src/search.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
matcher_overflow
freeze_buffer_relocation
compile_pattern_1
shrink_regexp_cache
clear_regexp_cache
unfreeze_pattern
freeze_pattern
compile_pattern
looking_at_1
string_match_1
fast_string_match_internal
fast_c_string_match_internal
fast_looking_at
newline_cache_on_off
find_newline
scan_newline
scan_newline_from_point
find_newline_no_quit
find_before_next_newline
search_command
trivial_regexp_p
search_buffer_re
search_buffer_non_re
search_buffer
simple_search
boyer_moore
set_search_regs
match_limit
DEFUN
DEFUN
DEFUN
save_search_regs
restore_search_regs
update_search_regs
unwind_set_match_data
record_unwind_save_match_data
DEFUN
find_newline1
DEFUN
syms_of_search
syms_of_search_for_pdumper
     1 /* String search routines for GNU Emacs.
     2 
     3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2023 Free Software
     4 Foundation, Inc.
     5 
     6 This file is part of GNU Emacs.
     7 
     8 GNU Emacs is free software: you can redistribute it and/or modify
     9 it under the terms of the GNU General Public License as published by
    10 the Free Software Foundation, either version 3 of the License, or (at
    11 your option) any later version.
    12 
    13 GNU Emacs is distributed in the hope that it will be useful,
    14 but WITHOUT ANY WARRANTY; without even the implied warranty of
    15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    16 GNU General Public License for more details.
    17 
    18 You should have received a copy of the GNU General Public License
    19 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    20 
    21 
    22 #include <config.h>
    23 
    24 #include "lisp.h"
    25 #include "character.h"
    26 #include "buffer.h"
    27 #include "syntax.h"
    28 #include "charset.h"
    29 #include "region-cache.h"
    30 #include "blockinput.h"
    31 #include "intervals.h"
    32 #include "pdumper.h"
    33 #include "composite.h"
    34 
    35 #include "regex-emacs.h"
    36 
    37 #define REGEXP_CACHE_SIZE 20
    38 
    39 /* If the regexp is non-nil, then the buffer contains the compiled form
    40    of that regexp, suitable for searching.  */
    41 struct regexp_cache
    42 {
    43   struct regexp_cache *next;
    44   Lisp_Object regexp, f_whitespace_regexp;
    45   /* Syntax table for which the regexp applies.  We need this because
    46      of character classes.  If this is t, then the compiled pattern is valid
    47      for any syntax-table.  */
    48   Lisp_Object syntax_table;
    49   struct re_pattern_buffer buf;
    50   char fastmap[0400];
    51   /* True means regexp was compiled to do full POSIX backtracking.  */
    52   bool posix;
    53   /* True means we're inside a buffer match.  */
    54   bool busy;
    55 };
    56 
    57 /* The instances of that struct.  */
    58 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
    59 
    60 /* The head of the linked list; points to the most recently used buffer.  */
    61 static struct regexp_cache *searchbuf_head;
    62 
    63 static void set_search_regs (ptrdiff_t, ptrdiff_t);
    64 static void save_search_regs (void);
    65 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
    66                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
    67                                 ptrdiff_t, ptrdiff_t);
    68 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
    69                               Lisp_Object, Lisp_Object, ptrdiff_t,
    70                               ptrdiff_t, int);
    71 
    72 Lisp_Object re_match_object;
    73 
    74 static AVOID
    75 matcher_overflow (void)
    76 {
    77   error ("Stack overflow in regexp matcher");
    78 }
    79 
    80 static void
    81 freeze_buffer_relocation (void)
    82 {
    83 #ifdef REL_ALLOC
    84   /* Prevent ralloc.c from relocating the current buffer while
    85      searching it.  */
    86   r_alloc_inhibit_buffer_relocation (1);
    87   record_unwind_protect_int (r_alloc_inhibit_buffer_relocation, 0);
    88 #endif
    89 }
    90 
    91 /* Compile a regexp and signal a Lisp error if anything goes wrong.
    92    PATTERN is the pattern to compile.
    93    CP is the place to put the result.
    94    TRANSLATE is a translation table for ignoring case, or nil for none.
    95    POSIX is true if we want full backtracking (POSIX style) for this pattern.
    96    False means backtrack only enough to get a valid match.
    97 
    98    The behavior also depends on Vsearch_spaces_regexp.  */
    99 
   100 static void
   101 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
   102                    Lisp_Object translate, bool posix)
   103 {
   104   const char *whitespace_regexp;
   105   char *val;
   106 
   107   eassert (!cp->busy);
   108   cp->regexp = Qnil;
   109   cp->buf.translate = translate;
   110   cp->posix = posix;
   111   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
   112   cp->buf.charset_unibyte = charset_unibyte;
   113   if (STRINGP (Vsearch_spaces_regexp))
   114     cp->f_whitespace_regexp = Vsearch_spaces_regexp;
   115   else
   116     cp->f_whitespace_regexp = Qnil;
   117 
   118   whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ?
   119     SSDATA (Vsearch_spaces_regexp) : NULL;
   120 
   121   val = (char *) re_compile_pattern (SSDATA (pattern), SBYTES (pattern),
   122                                      posix, whitespace_regexp, &cp->buf);
   123 
   124   /* If the compiled pattern hard codes some of the contents of the
   125      syntax-table, it can only be reused with *this* syntax table.  */
   126   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
   127 
   128   if (val)
   129     xsignal1 (Qinvalid_regexp, build_string (val));
   130 
   131   cp->regexp = Fcopy_sequence (pattern);
   132 }
   133 
   134 /* Shrink each compiled regexp buffer in the cache
   135    to the size actually used right now.
   136    This is called from garbage collection.  */
   137 
   138 void
   139 shrink_regexp_cache (void)
   140 {
   141   struct regexp_cache *cp;
   142 
   143   for (cp = searchbuf_head; cp != 0; cp = cp->next)
   144     if (!cp->busy)
   145       {
   146         cp->buf.allocated = cp->buf.used;
   147         cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
   148       }
   149 }
   150 
   151 /* Clear the regexp cache w.r.t. a particular syntax table,
   152    because it was changed.
   153    There is no danger of memory leak here because re_compile_pattern
   154    automagically manages the memory in each re_pattern_buffer struct,
   155    based on its `allocated' and `buffer' values.  */
   156 void
   157 clear_regexp_cache (void)
   158 {
   159   int i;
   160 
   161   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
   162     /* It's tempting to compare with the syntax-table we've actually changed,
   163        but it's not sufficient because char-table inheritance means that
   164        modifying one syntax-table can change others at the same time.  */
   165     if (!searchbufs[i].busy && !BASE_EQ (searchbufs[i].syntax_table, Qt))
   166       searchbufs[i].regexp = Qnil;
   167 }
   168 
   169 static void
   170 unfreeze_pattern (void *arg)
   171 {
   172   struct regexp_cache *searchbuf = arg;
   173   searchbuf->busy = false;
   174 }
   175 
   176 static void
   177 freeze_pattern (struct regexp_cache *searchbuf)
   178 {
   179   eassert (!searchbuf->busy);
   180   record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
   181   searchbuf->busy = true;
   182 }
   183 
   184 /* Compile a regexp if necessary, but first check to see if there's one in
   185    the cache.
   186    PATTERN is the pattern to compile.
   187    TRANSLATE is a translation table for ignoring case, or nil for none.
   188    REGP is the structure that says where to store the "register"
   189    values that will result from matching this pattern.
   190    If it is 0, we should compile the pattern not to record any
   191    subexpression bounds.
   192    POSIX is true if we want full backtracking (POSIX style) for this pattern.
   193    False means backtrack only enough to get a valid match.  */
   194 
   195 static struct regexp_cache *
   196 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
   197                  Lisp_Object translate, bool posix, bool multibyte)
   198 {
   199   struct regexp_cache *cp, **cpp, **lru_nonbusy;
   200 
   201   for (cpp = &searchbuf_head, lru_nonbusy = NULL; ; cpp = &cp->next)
   202     {
   203       cp = *cpp;
   204       if (!cp->busy)
   205         lru_nonbusy = cpp;
   206       /* Entries are initialized to nil, and may be set to nil by
   207          compile_pattern_1 if the pattern isn't valid.  Don't apply
   208          string accessors in those cases.  However, compile_pattern_1
   209          is only applied to the cache entry we pick here to reuse.  So
   210          nil should never appear before a non-nil entry.  */
   211       if (NILP (cp->regexp))
   212         goto compile_it;
   213       if (SCHARS (cp->regexp) == SCHARS (pattern)
   214           && !cp->busy
   215           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
   216           && !NILP (Fstring_equal (cp->regexp, pattern))
   217           && BASE_EQ (cp->buf.translate, translate)
   218           && cp->posix == posix
   219           && (BASE_EQ (cp->syntax_table, Qt)
   220               || BASE_EQ (cp->syntax_table,
   221                           BVAR (current_buffer, syntax_table)))
   222           && !NILP (Fequal (cp->f_whitespace_regexp, Vsearch_spaces_regexp))
   223           && cp->buf.charset_unibyte == charset_unibyte)
   224         break;
   225 
   226       /* If we're at the end of the cache, compile into the last
   227          (least recently used) non-busy cell in the cache.  */
   228       if (cp->next == 0)
   229         {
   230           if (!lru_nonbusy)
   231             error ("Too much matching reentrancy");
   232           cpp = lru_nonbusy;
   233           cp = *cpp;
   234         compile_it:
   235           eassert (!cp->busy);
   236           compile_pattern_1 (cp, pattern, translate, posix);
   237           break;
   238         }
   239     }
   240 
   241   /* When we get here, cp (aka *cpp) contains the compiled pattern,
   242      either because we found it in the cache or because we just compiled it.
   243      Move it to the front of the queue to mark it as most recently used.  */
   244   *cpp = cp->next;
   245   cp->next = searchbuf_head;
   246   searchbuf_head = cp;
   247 
   248   /* Advise the searching functions about the space we have allocated
   249      for register data.  */
   250   if (regp)
   251     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
   252 
   253   /* The compiled pattern can be used both for multibyte and unibyte
   254      target.  But, we have to tell which the pattern is used for. */
   255   cp->buf.target_multibyte = multibyte;
   256   return cp;
   257 }
   258 
   259 
   260 static Lisp_Object
   261 looking_at_1 (Lisp_Object string, bool posix, bool modify_data)
   262 {
   263   Lisp_Object val;
   264   unsigned char *p1, *p2;
   265   ptrdiff_t s1, s2;
   266   register ptrdiff_t i;
   267 
   268   if (running_asynch_code)
   269     save_search_regs ();
   270 
   271   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
   272      table.  */
   273   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
   274                          BVAR (current_buffer, case_eqv_table));
   275 
   276   CHECK_STRING (string);
   277 
   278   /* Snapshot in case Lisp changes the value.  */
   279   bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
   280 
   281   struct regexp_cache *cache_entry = compile_pattern (
   282     string,
   283     modify_match_data ? &search_regs : NULL,
   284     (!NILP (BVAR (current_buffer, case_fold_search))
   285      ? BVAR (current_buffer, case_canon_table) : Qnil),
   286     posix,
   287     !NILP (BVAR (current_buffer, enable_multibyte_characters)));
   288 
   289   /* Do a pending quit right away, to avoid paradoxical behavior */
   290   maybe_quit ();
   291 
   292   /* Get pointers and sizes of the two strings
   293      that make up the visible portion of the buffer. */
   294 
   295   p1 = BEGV_ADDR;
   296   s1 = GPT_BYTE - BEGV_BYTE;
   297   p2 = GAP_END_ADDR;
   298   s2 = ZV_BYTE - GPT_BYTE;
   299   if (s1 < 0)
   300     {
   301       p2 = p1;
   302       s2 = ZV_BYTE - BEGV_BYTE;
   303       s1 = 0;
   304     }
   305   if (s2 < 0)
   306     {
   307       s1 = ZV_BYTE - BEGV_BYTE;
   308       s2 = 0;
   309     }
   310 
   311   specpdl_ref count = SPECPDL_INDEX ();
   312   freeze_buffer_relocation ();
   313   freeze_pattern (cache_entry);
   314   re_match_object = Qnil;
   315   i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
   316                   PT_BYTE - BEGV_BYTE,
   317                   modify_match_data ? &search_regs : NULL,
   318                   ZV_BYTE - BEGV_BYTE);
   319 
   320   if (i == -2)
   321     {
   322       unbind_to (count, Qnil);
   323       matcher_overflow ();
   324     }
   325 
   326   val = (i >= 0 ? Qt : Qnil);
   327   if (modify_match_data && i >= 0)
   328   {
   329     for (i = 0; i < search_regs.num_regs; i++)
   330       if (search_regs.start[i] >= 0)
   331         {
   332           search_regs.start[i]
   333             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
   334          search_regs.end[i]
   335            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
   336        }
   337     /* Set last_thing_searched only when match data is changed.  */
   338     XSETBUFFER (last_thing_searched, current_buffer);
   339   }
   340 
   341   return unbind_to (count, val);
   342 }
   343 
   344 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 2, 0,
   345        doc: /* Return t if text after point matches regular expression REGEXP.
   346 By default, this function modifies the match data that
   347 `match-beginning', `match-end' and `match-data' access.  If
   348 INHIBIT-MODIFY is non-nil, don't modify the match data.  */)
   349   (Lisp_Object regexp, Lisp_Object inhibit_modify)
   350 {
   351   return looking_at_1 (regexp, 0, NILP (inhibit_modify));
   352 }
   353 
   354 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 2, 0,
   355        doc: /* Return t if text after point matches REGEXP according to Posix rules.
   356 Find the longest match, in accordance with Posix regular expression rules.
   357 
   358 By default, this function modifies the match data that
   359 `match-beginning', `match-end' and `match-data' access.  If
   360 INHIBIT-MODIFY is non-nil, don't modify the match data.  */)
   361   (Lisp_Object regexp, Lisp_Object inhibit_modify)
   362 {
   363   return looking_at_1 (regexp, 1, NILP (inhibit_modify));
   364 }
   365 
   366 static Lisp_Object
   367 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   368                 bool posix, bool modify_data)
   369 {
   370   ptrdiff_t val;
   371   EMACS_INT pos;
   372   ptrdiff_t pos_byte, i;
   373   bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
   374 
   375   if (running_asynch_code)
   376     save_search_regs ();
   377 
   378   CHECK_STRING (regexp);
   379   CHECK_STRING (string);
   380 
   381   if (NILP (start))
   382     pos = 0, pos_byte = 0;
   383   else
   384     {
   385       ptrdiff_t len = SCHARS (string);
   386 
   387       CHECK_FIXNUM (start);
   388       pos = XFIXNUM (start);
   389       if (pos < 0 && -pos <= len)
   390         pos = len + pos;
   391       else if (0 > pos || pos > len)
   392         args_out_of_range (string, start);
   393       pos_byte = string_char_to_byte (string, pos);
   394     }
   395 
   396   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
   397      table.  */
   398   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
   399                          BVAR (current_buffer, case_eqv_table));
   400 
   401   specpdl_ref count = SPECPDL_INDEX ();
   402   struct regexp_cache *cache_entry
   403     = compile_pattern (regexp,
   404                        modify_match_data ? &search_regs : NULL,
   405                        (!NILP (BVAR (current_buffer, case_fold_search))
   406                         ? BVAR (current_buffer, case_canon_table)
   407                         : Qnil),
   408                        posix,
   409                        STRING_MULTIBYTE (string));
   410   freeze_pattern (cache_entry);
   411   re_match_object = string;
   412   val = re_search (&cache_entry->buf, SSDATA (string),
   413                    SBYTES (string), pos_byte,
   414                    SBYTES (string) - pos_byte,
   415                    (modify_match_data ? &search_regs : NULL));
   416   unbind_to (count, Qnil);
   417 
   418   /* Set last_thing_searched only when match data is changed.  */
   419   if (modify_match_data)
   420     last_thing_searched = Qt;
   421 
   422   if (val == -2)
   423     matcher_overflow ();
   424   if (val < 0) return Qnil;
   425 
   426   if (modify_match_data)
   427     for (i = 0; i < search_regs.num_regs; i++)
   428       if (search_regs.start[i] >= 0)
   429         {
   430           search_regs.start[i]
   431             = string_byte_to_char (string, search_regs.start[i]);
   432           search_regs.end[i]
   433             = string_byte_to_char (string, search_regs.end[i]);
   434         }
   435 
   436   return make_fixnum (string_byte_to_char (string, val));
   437 }
   438 
   439 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 4, 0,
   440        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
   441 Matching ignores case if `case-fold-search' is non-nil.
   442 If third arg START is non-nil, start search at that index in STRING.
   443 
   444 If INHIBIT-MODIFY is non-nil, match data is not changed.
   445 
   446 If INHIBIT-MODIFY is nil or missing, match data is changed, and
   447 `match-end' and `match-beginning' give indices of substrings matched
   448 by parenthesis constructs in the pattern.  You can use the function
   449 `match-string' to extract the substrings matched by the parenthesis
   450 constructions in REGEXP.  For index of first char beyond the match, do
   451 (match-end 0).  */)
   452   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   453    Lisp_Object inhibit_modify)
   454 {
   455   return string_match_1 (regexp, string, start, 0, NILP (inhibit_modify));
   456 }
   457 
   458 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 4, 0,
   459        doc: /* Return index of start of first match for Posix REGEXP in STRING, or nil.
   460 Find the longest match, in accord with Posix regular expression rules.
   461 Case is ignored if `case-fold-search' is non-nil in the current buffer.
   462 
   463 If INHIBIT-MODIFY is non-nil, match data is not changed.
   464 
   465 If INHIBIT-MODIFY is nil or missing, match data is changed, and
   466 `match-end' and `match-beginning' give indices of substrings matched
   467 by parenthesis constructs in the pattern.  You can use the function
   468 `match-string' to extract the substrings matched by the parenthesis
   469 constructions in REGEXP.  For index of first char beyond the match, do
   470 (match-end 0).  */)
   471   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   472    Lisp_Object inhibit_modify)
   473 {
   474   return string_match_1 (regexp, string, start, 1, NILP (inhibit_modify));
   475 }
   476 
   477 /* Match REGEXP against STRING using translation table TABLE,
   478    searching all of STRING, and return the index of the match,
   479    or negative on failure.  This does not clobber the match data.  */
   480 
   481 ptrdiff_t
   482 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
   483                             Lisp_Object table)
   484 {
   485   re_match_object = string;
   486   specpdl_ref count = SPECPDL_INDEX ();
   487   struct regexp_cache *cache_entry
   488     = compile_pattern (regexp, 0, table, 0, STRING_MULTIBYTE (string));
   489   freeze_pattern (cache_entry);
   490   ptrdiff_t val = re_search (&cache_entry->buf, SSDATA (string),
   491                              SBYTES (string), 0,
   492                              SBYTES (string), 0);
   493   unbind_to (count, Qnil);
   494   return val;
   495 }
   496 
   497 /* Match REGEXP against STRING, searching all of STRING and return the
   498    index of the match, or negative on failure.  This does not clobber
   499    the match data.  Table is a canonicalize table for ignoring case,
   500    or nil for none.
   501 
   502    We assume that STRING contains single-byte characters.  */
   503 
   504 ptrdiff_t
   505 fast_c_string_match_internal (Lisp_Object regexp,
   506                               const char *string, ptrdiff_t len,
   507                               Lisp_Object table)
   508 {
   509   /* FIXME: This is expensive and not obviously correct when it makes
   510      a difference. I.e., no longer "fast", and may hide bugs.
   511      Something should be done about this.  */
   512   regexp = string_make_unibyte (regexp);
   513   /* Record specpdl index because freeze_pattern pushes an
   514      unwind-protect on the specpdl.  */
   515   specpdl_ref count = SPECPDL_INDEX ();
   516   struct regexp_cache *cache_entry
   517     = compile_pattern (regexp, 0, table, 0, 0);
   518   freeze_pattern (cache_entry);
   519   re_match_object = Qt;
   520   ptrdiff_t val = re_search (&cache_entry->buf, string, len, 0, len, 0);
   521   unbind_to (count, Qnil);
   522   return val;
   523 }
   524 
   525 /* Match REGEXP against the characters after POS to LIMIT, and return
   526    the number of matched characters.  If STRING is non-nil, match
   527    against the characters in it.  In that case, POS and LIMIT are
   528    indices into the string.  This function doesn't modify the match
   529    data.  */
   530 
   531 ptrdiff_t
   532 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
   533                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
   534 {
   535   bool multibyte;
   536   unsigned char *p1, *p2;
   537   ptrdiff_t s1, s2;
   538   ptrdiff_t len;
   539 
   540   if (STRINGP (string))
   541     {
   542       if (pos_byte < 0)
   543         pos_byte = string_char_to_byte (string, pos);
   544       if (limit_byte < 0)
   545         limit_byte = string_char_to_byte (string, limit);
   546       p1 = NULL;
   547       s1 = 0;
   548       p2 = SDATA (string);
   549       s2 = SBYTES (string);
   550       multibyte = STRING_MULTIBYTE (string);
   551     }
   552   else
   553     {
   554       if (pos_byte < 0)
   555         pos_byte = CHAR_TO_BYTE (pos);
   556       if (limit_byte < 0)
   557         limit_byte = CHAR_TO_BYTE (limit);
   558       pos_byte -= BEGV_BYTE;
   559       limit_byte -= BEGV_BYTE;
   560       p1 = BEGV_ADDR;
   561       s1 = GPT_BYTE - BEGV_BYTE;
   562       p2 = GAP_END_ADDR;
   563       s2 = ZV_BYTE - GPT_BYTE;
   564       if (s1 < 0)
   565         {
   566           p2 = p1;
   567           s2 = ZV_BYTE - BEGV_BYTE;
   568           s1 = 0;
   569         }
   570       if (s2 < 0)
   571         {
   572           s1 = ZV_BYTE - BEGV_BYTE;
   573           s2 = 0;
   574         }
   575       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
   576     }
   577 
   578   struct regexp_cache *cache_entry =
   579     compile_pattern (regexp, 0, Qnil, 0, multibyte);
   580   specpdl_ref count = SPECPDL_INDEX ();
   581   freeze_buffer_relocation ();
   582   freeze_pattern (cache_entry);
   583   re_match_object = STRINGP (string) ? string : Qnil;
   584   len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
   585                     pos_byte, NULL, limit_byte);
   586 
   587   unbind_to (count, Qnil);
   588   return len;
   589 }
   590 
   591 
   592 /* The newline cache: remembering which sections of text have no newlines.  */
   593 
   594 /* If the user has requested the long scans caching, make sure it's on.
   595    Otherwise, make sure it's off.
   596    This is our cheezy way of associating an action with the change of
   597    state of a buffer-local variable.  */
   598 static struct region_cache *
   599 newline_cache_on_off (struct buffer *buf)
   600 {
   601   struct buffer *base_buf = buf;
   602   bool indirect_p = false;
   603 
   604   if (buf->base_buffer)
   605     {
   606       base_buf = buf->base_buffer;
   607       indirect_p = true;
   608     }
   609 
   610   /* Don't turn on or off the cache in the base buffer, if the value
   611      of cache-long-scans of the base buffer is inconsistent with that.
   612      This is because doing so will just make the cache pure overhead,
   613      since if we turn it on via indirect buffer, it will be
   614      immediately turned off by its base buffer.  */
   615   if (NILP (BVAR (buf, cache_long_scans)))
   616     {
   617       if (!indirect_p
   618           || NILP (BVAR (base_buf, cache_long_scans)))
   619         {
   620           /* It should be off.  */
   621           if (base_buf->newline_cache)
   622             {
   623               free_region_cache (base_buf->newline_cache);
   624               base_buf->newline_cache = 0;
   625             }
   626         }
   627       return NULL;
   628     }
   629   else
   630     {
   631       if (!indirect_p
   632           || !NILP (BVAR (base_buf, cache_long_scans)))
   633         {
   634           /* It should be on.  */
   635           if (base_buf->newline_cache == 0)
   636             {
   637               base_buf->newline_cache = new_region_cache ();
   638               __lsan_ignore_object (base_buf->newline_cache);
   639             }
   640         }
   641       return base_buf->newline_cache;
   642     }
   643 }
   644 
   645 
   646 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
   647 
   648    If COUNT is positive, search forwards; END must be >= START.
   649    If COUNT is negative, search backwards for the -COUNTth instance;
   650       END must be <= START.
   651    If COUNT is zero, do anything you please; run rogue, for all I care.
   652 
   653    If END is zero, use BEGV or ZV instead, as appropriate for the
   654    direction indicated by COUNT.  If START_BYTE is -1 it is unknown,
   655    and similarly for END_BYTE.
   656 
   657    If we find COUNT instances, set *COUNTED to COUNT, and return the
   658    position past the COUNTth match.  Note that for reverse motion
   659    this is not the same as the usual convention for Emacs motion commands.
   660 
   661    If we don't find COUNT instances before reaching END, set *COUNTED
   662    to the number of newlines left found (negated if COUNT is negative),
   663    and return END.
   664 
   665    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
   666    to the returned character position.
   667 
   668    If ALLOW_QUIT, check for quitting.  That's good to do
   669    except when inside redisplay.  */
   670 
   671 ptrdiff_t
   672 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
   673               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
   674               ptrdiff_t *bytepos, bool allow_quit)
   675 {
   676   struct region_cache *newline_cache;
   677   struct buffer *cache_buffer;
   678 
   679   if (!end)
   680     {
   681       if (count > 0)
   682         end = ZV, end_byte = ZV_BYTE;
   683       else
   684         end = BEGV, end_byte = BEGV_BYTE;
   685     }
   686   if (end_byte == -1)
   687     end_byte = CHAR_TO_BYTE (end);
   688 
   689   newline_cache = newline_cache_on_off (current_buffer);
   690   if (current_buffer->base_buffer)
   691     cache_buffer = current_buffer->base_buffer;
   692   else
   693     cache_buffer = current_buffer;
   694 
   695   if (counted)
   696     *counted = count;
   697 
   698   if (count > 0)
   699     while (start != end)
   700       {
   701         /* Our innermost scanning loop is very simple; it doesn't know
   702            about gaps, buffer ends, or the newline cache.  ceiling is
   703            the position of the last character before the next such
   704            obstacle --- the last character the dumb search loop should
   705            examine.  */
   706         ptrdiff_t tem, ceiling_byte = end_byte - 1;
   707 
   708         /* If we're using the newline cache, consult it to see whether
   709            we can avoid some scanning.  */
   710         if (newline_cache)
   711           {
   712             ptrdiff_t next_change;
   713             int result = 1;
   714 
   715             while (start < end && result)
   716               {
   717                 ptrdiff_t lim1;
   718 
   719                 result = region_cache_forward (cache_buffer, newline_cache,
   720                                                start, &next_change);
   721                 if (result)
   722                   {
   723                     /* When the cache revalidation is deferred,
   724                        next-change might point beyond ZV, which will
   725                        cause assertion violation in CHAR_TO_BYTE below.
   726                        Limit next_change to ZV to avoid that.  */
   727                     if (next_change > ZV)
   728                       next_change = ZV;
   729                     start = next_change;
   730                     lim1 = next_change = end;
   731                   }
   732                 else
   733                   lim1 = min (next_change, end);
   734 
   735                 /* The cache returned zero for this region; see if
   736                    this is because the region is known and includes
   737                    only newlines.  While at that, count any newlines
   738                    we bump into, and exit if we found enough off them.  */
   739                 start_byte = CHAR_TO_BYTE (start);
   740                 while (start < lim1
   741                        && FETCH_BYTE (start_byte) == '\n')
   742                   {
   743                     start_byte++;
   744                     start++;
   745                     if (--count == 0)
   746                       {
   747                         if (bytepos)
   748                           *bytepos = start_byte;
   749                         return start;
   750                       }
   751                   }
   752                 /* If we found a non-newline character before hitting
   753                    position where the cache will again return non-zero
   754                    (i.e. no newlines beyond that position), it means
   755                    this region is not yet known to the cache, and we
   756                    must resort to the "dumb loop" method.  */
   757                 if (start < next_change && !result)
   758                   break;
   759                 result = 1;
   760               }
   761             if (start >= end)
   762               {
   763                 start = end;
   764                 start_byte = end_byte;
   765                 break;
   766               }
   767 
   768             /* START should never be after END.  */
   769             if (start_byte > ceiling_byte)
   770               start_byte = ceiling_byte;
   771 
   772             /* Now the text after start is an unknown region, and
   773                next_change is the position of the next known region. */
   774             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
   775           }
   776         else if (start_byte == -1)
   777           start_byte = CHAR_TO_BYTE (start);
   778 
   779         /* The dumb loop can only scan text stored in contiguous
   780            bytes. BUFFER_CEILING_OF returns the last character
   781            position that is contiguous, so the ceiling is the
   782            position after that.  */
   783         tem = BUFFER_CEILING_OF (start_byte);
   784         ceiling_byte = min (tem, ceiling_byte);
   785 
   786         {
   787           /* The termination address of the dumb loop.  */
   788           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
   789           ptrdiff_t lim_byte = ceiling_byte + 1;
   790 
   791           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
   792              of the base, the cursor, and the next line.  */
   793           ptrdiff_t base = start_byte - lim_byte;
   794           ptrdiff_t cursor, next;
   795 
   796           for (cursor = base; cursor < 0; cursor = next)
   797             {
   798               /* The dumb loop.  */
   799               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
   800               next = nl ? nl - lim_addr : 0;
   801 
   802               /* If we're using the newline cache, cache the fact that
   803                  the region we just traversed is free of newlines. */
   804               if (newline_cache && cursor != next)
   805                 {
   806                   know_region_cache (cache_buffer, newline_cache,
   807                                      BYTE_TO_CHAR (lim_byte + cursor),
   808                                      BYTE_TO_CHAR (lim_byte + next));
   809                   /* know_region_cache can relocate buffer text.  */
   810                   lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
   811                 }
   812 
   813               if (! nl)
   814                 break;
   815               next++;
   816 
   817               if (--count == 0)
   818                 {
   819                   if (bytepos)
   820                     *bytepos = lim_byte + next;
   821                   return BYTE_TO_CHAR (lim_byte + next);
   822                 }
   823               if (allow_quit)
   824                 maybe_quit ();
   825             }
   826 
   827           start_byte = lim_byte;
   828           start = BYTE_TO_CHAR (start_byte);
   829         }
   830       }
   831   else
   832     while (start > end)
   833       {
   834         /* The last character to check before the next obstacle.  */
   835         ptrdiff_t tem, ceiling_byte = end_byte;
   836 
   837         /* Consult the newline cache, if appropriate.  */
   838         if (newline_cache)
   839           {
   840             ptrdiff_t next_change;
   841             int result = 1;
   842 
   843             while (start > end && result)
   844               {
   845                 ptrdiff_t lim1;
   846 
   847                 result = region_cache_backward (cache_buffer, newline_cache,
   848                                                 start, &next_change);
   849                 if (result)
   850                   {
   851                     start = next_change;
   852                     lim1 = next_change = end;
   853                   }
   854                 else
   855                   lim1 = max (next_change, end);
   856                 start_byte = CHAR_TO_BYTE (start);
   857                 while (start > lim1
   858                        && FETCH_BYTE (start_byte - 1) == '\n')
   859                   {
   860                     if (++count == 0)
   861                       {
   862                         if (bytepos)
   863                           *bytepos = start_byte;
   864                         return start;
   865                       }
   866                     start_byte--;
   867                     start--;
   868                   }
   869                 if (start > next_change && !result)
   870                   break;
   871                 result = 1;
   872               }
   873             if (start <= end)
   874               {
   875                 start = end;
   876                 start_byte = end_byte;
   877                 break;
   878               }
   879 
   880             /* Start should never be at or before end.  */
   881             if (start_byte <= ceiling_byte)
   882               start_byte = ceiling_byte + 1;
   883 
   884             /* Now the text before start is an unknown region, and
   885                next_change is the position of the next known region. */
   886             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
   887           }
   888         else if (start_byte == -1)
   889           start_byte = CHAR_TO_BYTE (start);
   890 
   891         /* Stop scanning before the gap.  */
   892         tem = BUFFER_FLOOR_OF (start_byte - 1);
   893         ceiling_byte = max (tem, ceiling_byte);
   894 
   895         {
   896           /* The termination address of the dumb loop.  */
   897           unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
   898 
   899           /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
   900              the base, the cursor, and the previous line.  These
   901              offsets are at least -1.  */
   902           ptrdiff_t base = start_byte - ceiling_byte;
   903           ptrdiff_t cursor, prev;
   904 
   905           for (cursor = base; 0 < cursor; cursor = prev)
   906             {
   907               unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
   908               prev = nl ? nl - ceiling_addr : -1;
   909 
   910               /* If we're looking for newlines, cache the fact that
   911                  this line's region is free of them. */
   912               if (newline_cache && cursor != prev + 1)
   913                 {
   914                   know_region_cache (cache_buffer, newline_cache,
   915                                      BYTE_TO_CHAR (ceiling_byte + prev + 1),
   916                                      BYTE_TO_CHAR (ceiling_byte + cursor));
   917                   /* know_region_cache can relocate buffer text.  */
   918                   ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
   919                 }
   920 
   921               if (! nl)
   922                 break;
   923 
   924               if (++count >= 0)
   925                 {
   926                   if (bytepos)
   927                     *bytepos = ceiling_byte + prev + 1;
   928                   return BYTE_TO_CHAR (ceiling_byte + prev + 1);
   929                 }
   930               if (allow_quit)
   931                 maybe_quit ();
   932             }
   933 
   934           start_byte = ceiling_byte;
   935           start = BYTE_TO_CHAR (start_byte);
   936         }
   937       }
   938 
   939   if (counted)
   940     *counted -= count;
   941   if (bytepos)
   942     {
   943       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
   944       eassert (*bytepos == CHAR_TO_BYTE (start));
   945     }
   946   return start;
   947 }
   948 
   949 /* Search for COUNT instances of a line boundary.
   950    Start at START.  If COUNT is negative, search backwards.
   951 
   952    We report the resulting position by calling TEMP_SET_PT_BOTH.
   953 
   954    If we find COUNT instances. we position after (always after,
   955    even if scanning backwards) the COUNTth match.
   956 
   957    If we don't find COUNT instances before reaching the end of the
   958    buffer (or the beginning, if scanning backwards), we position at
   959    the limit we bumped up against.
   960 
   961    If ALLOW_QUIT, check for quitting.  That's good to do
   962    except in special cases.  */
   963 
   964 void
   965 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
   966               ptrdiff_t limit, ptrdiff_t limit_byte,
   967               ptrdiff_t count, bool allow_quit)
   968 {
   969   ptrdiff_t charpos, bytepos, counted;
   970 
   971   charpos = find_newline (start, start_byte, limit, limit_byte,
   972                           count, &counted, &bytepos, allow_quit);
   973   if (counted != count)
   974     TEMP_SET_PT_BOTH (limit, limit_byte);
   975   else
   976     TEMP_SET_PT_BOTH (charpos, bytepos);
   977 }
   978 
   979 /* Like above, but always scan from point and report the
   980    resulting position in *CHARPOS and *BYTEPOS.  */
   981 
   982 ptrdiff_t
   983 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
   984                          ptrdiff_t *bytepos)
   985 {
   986   ptrdiff_t counted;
   987 
   988   if (count <= 0)
   989     *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
   990                              &counted, bytepos, 1);
   991   else
   992     *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
   993                              &counted, bytepos, 1);
   994   return counted;
   995 }
   996 
   997 /* Like find_newline, but doesn't allow QUITting and doesn't return
   998    COUNTED.  */
   999 ptrdiff_t
  1000 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
  1001                       ptrdiff_t cnt, ptrdiff_t *bytepos)
  1002 {
  1003   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
  1004 }
  1005 
  1006 /* Like find_newline, but returns position before the newline, not
  1007    after, and only search up to TO.
  1008    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
  1009 
  1010 ptrdiff_t
  1011 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
  1012                           ptrdiff_t cnt, ptrdiff_t *bytepos)
  1013 {
  1014   ptrdiff_t counted;
  1015   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &counted, bytepos, 1);
  1016 
  1017   if (counted == cnt)
  1018     {
  1019       if (bytepos)
  1020         dec_both (&pos, &*bytepos);
  1021       else
  1022         pos--;
  1023     }
  1024   return pos;
  1025 }
  1026 
  1027 /* Subroutines of Lisp buffer search functions. */
  1028 
  1029 static Lisp_Object
  1030 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
  1031                 Lisp_Object count, int direction, bool RE, bool posix)
  1032 {
  1033   EMACS_INT np;
  1034   EMACS_INT lim;
  1035   ptrdiff_t lim_byte;
  1036   EMACS_INT n = direction;
  1037 
  1038   if (!NILP (count))
  1039     {
  1040       CHECK_FIXNUM (count);
  1041       n *= XFIXNUM (count);
  1042     }
  1043 
  1044   CHECK_STRING (string);
  1045   if (NILP (bound))
  1046     {
  1047       if (n > 0)
  1048         lim = ZV, lim_byte = ZV_BYTE;
  1049       else
  1050         lim = BEGV, lim_byte = BEGV_BYTE;
  1051     }
  1052   else
  1053     {
  1054       lim = fix_position (bound);
  1055       if (n > 0 ? lim < PT : lim > PT)
  1056         error ("Invalid search bound (wrong side of point)");
  1057       if (lim > ZV)
  1058         lim = ZV, lim_byte = ZV_BYTE;
  1059       else if (lim < BEGV)
  1060         lim = BEGV, lim_byte = BEGV_BYTE;
  1061       else
  1062         lim_byte = CHAR_TO_BYTE (lim);
  1063     }
  1064 
  1065   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
  1066      table.  */
  1067   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
  1068                          BVAR (current_buffer, case_eqv_table));
  1069 
  1070   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
  1071                       (!NILP (BVAR (current_buffer, case_fold_search))
  1072                        ? BVAR (current_buffer, case_canon_table)
  1073                        : Qnil),
  1074                       (!NILP (BVAR (current_buffer, case_fold_search))
  1075                        ? BVAR (current_buffer, case_eqv_table)
  1076                        : Qnil),
  1077                       posix);
  1078   if (np <= 0)
  1079     {
  1080       if (NILP (noerror))
  1081         xsignal1 (Qsearch_failed, string);
  1082 
  1083       if (!EQ (noerror, Qt))
  1084         {
  1085           eassert (BEGV <= lim && lim <= ZV);
  1086           SET_PT_BOTH (lim, lim_byte);
  1087           return Qnil;
  1088 #if 0 /* This would be clean, but maybe programs depend on
  1089          a value of nil here.  */
  1090           np = lim;
  1091 #endif
  1092         }
  1093       else
  1094         return Qnil;
  1095     }
  1096 
  1097   eassert (BEGV <= np && np <= ZV);
  1098   SET_PT (np);
  1099 
  1100   return make_fixnum (np);
  1101 }
  1102 
  1103 /* Return true if REGEXP it matches just one constant string.  */
  1104 
  1105 static bool
  1106 trivial_regexp_p (Lisp_Object regexp)
  1107 {
  1108   ptrdiff_t len = SBYTES (regexp);
  1109   unsigned char *s = SDATA (regexp);
  1110   while (--len >= 0)
  1111     {
  1112       switch (*s++)
  1113         {
  1114         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
  1115           return 0;
  1116         case '\\':
  1117           if (--len < 0)
  1118             return 0;
  1119           switch (*s++)
  1120             {
  1121             case '|': case '(': case ')': case '`': case '\'': case 'b':
  1122             case 'B': case '<': case '>': case 'w': case 'W': case 's':
  1123             case 'S': case '=': case '{': case '}': case '_':
  1124             case 'c': case 'C': /* for categoryspec and notcategoryspec */
  1125             case '1': case '2': case '3': case '4': case '5':
  1126             case '6': case '7': case '8': case '9':
  1127               return 0;
  1128             }
  1129         }
  1130     }
  1131   return 1;
  1132 }
  1133 
  1134 #define TRANSLATE(out, trt, d)                  \
  1135 do                                              \
  1136   {                                             \
  1137     if (! NILP (trt))                           \
  1138       {                                         \
  1139         Lisp_Object temp;                       \
  1140         temp = Faref (trt, make_fixnum (d));    \
  1141         if (FIXNUMP (temp))                     \
  1142           out = XFIXNUM (temp);                 \
  1143         else                                    \
  1144           out = d;                              \
  1145       }                                         \
  1146     else                                        \
  1147       out = d;                                  \
  1148   }                                             \
  1149 while (0)
  1150 
  1151 /* Only used in search_buffer, to record the end position of the match
  1152    when searching regexps and SEARCH_REGS should not be changed
  1153    (i.e. Vinhibit_changing_match_data is non-nil).  */
  1154 static struct re_registers search_regs_1;
  1155 
  1156 static EMACS_INT
  1157 search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
  1158                   ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
  1159                   Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
  1160 {
  1161   unsigned char *p1, *p2;
  1162   ptrdiff_t s1, s2;
  1163 
  1164   /* Snapshot in case Lisp changes the value.  */
  1165   bool preserve_match_data = NILP (Vinhibit_changing_match_data);
  1166 
  1167   struct regexp_cache *cache_entry =
  1168     compile_pattern (string,
  1169                      preserve_match_data ? &search_regs : &search_regs_1,
  1170                      trt, posix,
  1171                      !NILP (BVAR (current_buffer, enable_multibyte_characters)));
  1172   struct re_pattern_buffer *bufp = &cache_entry->buf;
  1173 
  1174   maybe_quit ();                /* Do a pending quit right away,
  1175                                    to avoid paradoxical behavior */
  1176   /* Get pointers and sizes of the two strings
  1177      that make up the visible portion of the buffer. */
  1178 
  1179   p1 = BEGV_ADDR;
  1180   s1 = GPT_BYTE - BEGV_BYTE;
  1181   p2 = GAP_END_ADDR;
  1182   s2 = ZV_BYTE - GPT_BYTE;
  1183   if (s1 < 0)
  1184     {
  1185       p2 = p1;
  1186       s2 = ZV_BYTE - BEGV_BYTE;
  1187       s1 = 0;
  1188     }
  1189   if (s2 < 0)
  1190     {
  1191       s1 = ZV_BYTE - BEGV_BYTE;
  1192       s2 = 0;
  1193     }
  1194 
  1195   specpdl_ref count = SPECPDL_INDEX ();
  1196   freeze_buffer_relocation ();
  1197   freeze_pattern (cache_entry);
  1198 
  1199   while (n < 0)
  1200     {
  1201       ptrdiff_t val;
  1202 
  1203       re_match_object = Qnil;
  1204       val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
  1205                          pos_byte - BEGV_BYTE, lim_byte - pos_byte,
  1206                          preserve_match_data ? &search_regs : &search_regs_1,
  1207                          /* Don't allow match past current point */
  1208                          pos_byte - BEGV_BYTE);
  1209       if (val == -2)
  1210         {
  1211           unbind_to (count, Qnil);
  1212           matcher_overflow ();
  1213         }
  1214       if (val >= 0)
  1215         {
  1216           if (preserve_match_data)
  1217             {
  1218               pos_byte = search_regs.start[0] + BEGV_BYTE;
  1219               for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  1220                 if (search_regs.start[i] >= 0)
  1221                   {
  1222                     search_regs.start[i]
  1223                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
  1224                     search_regs.end[i]
  1225                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
  1226                   }
  1227               XSETBUFFER (last_thing_searched, current_buffer);
  1228               /* Set pos to the new position. */
  1229               pos = search_regs.start[0];
  1230             }
  1231           else
  1232             {
  1233               pos_byte = search_regs_1.start[0] + BEGV_BYTE;
  1234               /* Set pos to the new position.  */
  1235               pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
  1236             }
  1237         }
  1238       else
  1239         {
  1240           unbind_to (count, Qnil);
  1241           return (n);
  1242         }
  1243       n++;
  1244       maybe_quit ();
  1245     }
  1246   while (n > 0)
  1247     {
  1248       ptrdiff_t val;
  1249 
  1250       re_match_object = Qnil;
  1251       val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
  1252                          pos_byte - BEGV_BYTE, lim_byte - pos_byte,
  1253                          preserve_match_data ? &search_regs : &search_regs_1,
  1254                          lim_byte - BEGV_BYTE);
  1255       if (val == -2)
  1256         {
  1257           unbind_to (count, Qnil);
  1258           matcher_overflow ();
  1259         }
  1260       if (val >= 0)
  1261         {
  1262           if (preserve_match_data)
  1263             {
  1264               pos_byte = search_regs.end[0] + BEGV_BYTE;
  1265               for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  1266                 if (search_regs.start[i] >= 0)
  1267                   {
  1268                     search_regs.start[i]
  1269                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
  1270                     search_regs.end[i]
  1271                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
  1272                   }
  1273               XSETBUFFER (last_thing_searched, current_buffer);
  1274               pos = search_regs.end[0];
  1275             }
  1276           else
  1277             {
  1278               pos_byte = search_regs_1.end[0] + BEGV_BYTE;
  1279               pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
  1280             }
  1281         }
  1282       else
  1283         {
  1284           unbind_to (count, Qnil);
  1285           return (0 - n);
  1286         }
  1287       n--;
  1288       maybe_quit ();
  1289     }
  1290   unbind_to (count, Qnil);
  1291   return (pos);
  1292 }
  1293 
  1294 static EMACS_INT
  1295 search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
  1296                       ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
  1297                       EMACS_INT n, bool RE, Lisp_Object trt, Lisp_Object inverse_trt,
  1298                       bool posix)
  1299 {
  1300   unsigned char *raw_pattern, *pat;
  1301   ptrdiff_t raw_pattern_size;
  1302   ptrdiff_t raw_pattern_size_byte;
  1303   unsigned char *patbuf;
  1304   bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
  1305   unsigned char *base_pat;
  1306   /* Set to positive if we find a non-ASCII char that need
  1307      translation.  Otherwise set to zero later.  */
  1308   int char_base = -1;
  1309   bool boyer_moore_ok = 1;
  1310   USE_SAFE_ALLOCA;
  1311 
  1312   /* MULTIBYTE says whether the text to be searched is multibyte.
  1313      We must convert PATTERN to match that, or we will not really
  1314      find things right.  */
  1315 
  1316   if (multibyte == STRING_MULTIBYTE (string))
  1317     {
  1318       raw_pattern = SDATA (string);
  1319       raw_pattern_size = SCHARS (string);
  1320       raw_pattern_size_byte = SBYTES (string);
  1321     }
  1322   else if (multibyte)
  1323     {
  1324       raw_pattern_size = SCHARS (string);
  1325       raw_pattern_size_byte
  1326         = count_size_as_multibyte (SDATA (string),
  1327                                    raw_pattern_size);
  1328       raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
  1329       copy_text (SDATA (string), raw_pattern,
  1330                  SCHARS (string), 0, 1);
  1331     }
  1332   else
  1333     {
  1334       /* Converting multibyte to single-byte.  */
  1335       raw_pattern_size = SCHARS (string);
  1336       raw_pattern_size_byte = SCHARS (string);
  1337       raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
  1338       copy_text (SDATA (string), raw_pattern,
  1339                  SBYTES (string), 1, 0);
  1340     }
  1341 
  1342   /* Copy and optionally translate the pattern.  */
  1343   ptrdiff_t len = raw_pattern_size;
  1344   ptrdiff_t len_byte = raw_pattern_size_byte;
  1345   SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
  1346   pat = patbuf;
  1347   base_pat = raw_pattern;
  1348   if (multibyte)
  1349     {
  1350       /* Fill patbuf by translated characters in STRING while
  1351          checking if we can use boyer-moore search.  If TRT is
  1352          non-nil, we can use boyer-moore search only if TRT can be
  1353          represented by the byte array of 256 elements.  For that,
  1354          all non-ASCII case-equivalents of all case-sensitive
  1355          characters in STRING must belong to the same character
  1356          group (two characters belong to the same group iff their
  1357          multibyte forms are the same except for the last byte;
  1358          i.e. every 64 characters form a group; U+0000..U+003F,
  1359          U+0040..U+007F, U+0080..U+00BF, ...).  */
  1360 
  1361       while (--len >= 0)
  1362         {
  1363           unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
  1364           int translated, inverse;
  1365           int charlen;
  1366 
  1367           /* If we got here and the RE flag is set, it's because we're
  1368              dealing with a regexp known to be trivial, so the backslash
  1369              just quotes the next character.  */
  1370           if (RE && *base_pat == '\\')
  1371             {
  1372               len--;
  1373               raw_pattern_size--;
  1374               len_byte--;
  1375               base_pat++;
  1376             }
  1377 
  1378           int in_charlen, c = string_char_and_length (base_pat, &in_charlen);
  1379 
  1380           if (NILP (trt))
  1381             {
  1382               str = base_pat;
  1383               charlen = in_charlen;
  1384             }
  1385           else
  1386             {
  1387               /* Translate the character.  */
  1388               TRANSLATE (translated, trt, c);
  1389               charlen = CHAR_STRING (translated, str_base);
  1390               str = str_base;
  1391 
  1392               /* Check if C has any other case-equivalents.  */
  1393               TRANSLATE (inverse, inverse_trt, c);
  1394               /* If so, check if we can use boyer-moore.  */
  1395               if (c != inverse && boyer_moore_ok)
  1396                 {
  1397                   /* Check if all equivalents belong to the same
  1398                      group of characters.  Note that the check of C
  1399                      itself is done by the last iteration.  */
  1400                   int this_char_base = -1;
  1401 
  1402                   while (boyer_moore_ok)
  1403                     {
  1404                       if (ASCII_CHAR_P (inverse))
  1405                         {
  1406                           if (this_char_base > 0)
  1407                             boyer_moore_ok = 0;
  1408                           else
  1409                             this_char_base = 0;
  1410                         }
  1411                       else if (CHAR_BYTE8_P (inverse))
  1412                         /* Boyer-moore search can't handle a
  1413                            translation of an eight-bit
  1414                            character.  */
  1415                         boyer_moore_ok = 0;
  1416                       else if (this_char_base < 0)
  1417                         {
  1418                           this_char_base = inverse & ~0x3F;
  1419                           if (char_base < 0)
  1420                             char_base = this_char_base;
  1421                           else if (this_char_base != char_base)
  1422                             boyer_moore_ok = 0;
  1423                         }
  1424                       else if ((inverse & ~0x3F) != this_char_base)
  1425                         boyer_moore_ok = 0;
  1426                       if (c == inverse)
  1427                         break;
  1428                       TRANSLATE (inverse, inverse_trt, inverse);
  1429                     }
  1430                 }
  1431             }
  1432 
  1433           /* Store this character into the translated pattern.  */
  1434           memcpy (pat, str, charlen);
  1435           pat += charlen;
  1436           base_pat += in_charlen;
  1437           len_byte -= in_charlen;
  1438         }
  1439 
  1440       /* If char_base is still negative we didn't find any translated
  1441          non-ASCII characters.  */
  1442       if (char_base < 0)
  1443         char_base = 0;
  1444     }
  1445   else
  1446     {
  1447       /* Unibyte buffer.  */
  1448       char_base = 0;
  1449       while (--len >= 0)
  1450         {
  1451           int c, translated, inverse;
  1452 
  1453           /* If we got here and the RE flag is set, it's because we're
  1454              dealing with a regexp known to be trivial, so the backslash
  1455              just quotes the next character.  */
  1456           if (RE && *base_pat == '\\')
  1457             {
  1458               len--;
  1459               raw_pattern_size--;
  1460               base_pat++;
  1461             }
  1462           c = *base_pat++;
  1463           TRANSLATE (translated, trt, c);
  1464           *pat++ = translated;
  1465           /* Check that none of C's equivalents violates the
  1466              assumptions of boyer_moore.  */
  1467           TRANSLATE (inverse, inverse_trt, c);
  1468           while (1)
  1469             {
  1470               if (inverse >= 0200)
  1471                 {
  1472                   boyer_moore_ok = 0;
  1473                   break;
  1474                 }
  1475               if (c == inverse)
  1476                 break;
  1477               TRANSLATE (inverse, inverse_trt, inverse);
  1478             }
  1479         }
  1480     }
  1481 
  1482   len_byte = pat - patbuf;
  1483   pat = base_pat = patbuf;
  1484 
  1485   EMACS_INT result
  1486     = (boyer_moore_ok
  1487        ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
  1488                       pos_byte, lim_byte,
  1489                       char_base)
  1490        : simple_search (n, pat, raw_pattern_size, len_byte, trt,
  1491                         pos, pos_byte, lim, lim_byte));
  1492   SAFE_FREE ();
  1493   return result;
  1494 }
  1495 
  1496 /* Search for the Nth occurrence of STRING in the current buffer,
  1497    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
  1498 
  1499    If RE, look for matches against the regular expression STRING instead;
  1500    if POSIX, enable POSIX style backtracking within that regular
  1501    expression.
  1502 
  1503    If N is positive, search forward; in this case, LIM must be greater
  1504    than POS.
  1505 
  1506    If N is negative, search backward; LIM must be less than POS.
  1507 
  1508    Return -X if there are X remaining occurrences or matches,
  1509    or else the position at the beginning (if N is negative) or the end
  1510    (if N is positive) of the Nth occurrence or match against STRING.
  1511 
  1512    Use TRT and INVERSE_TRT as character translation tables.  */
  1513 
  1514 EMACS_INT
  1515 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
  1516                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
  1517                bool RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
  1518 {
  1519   if (running_asynch_code)
  1520     save_search_regs ();
  1521 
  1522   /* Searching 0 times means don't move.  */
  1523   /* Null string is found at starting position.  */
  1524   if (n == 0 || SCHARS (string) == 0)
  1525     {
  1526       set_search_regs (pos_byte, 0);
  1527       return pos;
  1528     }
  1529 
  1530   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
  1531     pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
  1532                             n, trt, inverse_trt, posix);
  1533   else
  1534     pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
  1535                                 n, RE, trt, inverse_trt, posix);
  1536 
  1537   return pos;
  1538 }
  1539 
  1540 /* Do a simple string search N times for the string PAT,
  1541    whose length is LEN/LEN_BYTE,
  1542    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
  1543    TRT is the translation table.
  1544 
  1545    Return the character position where the match is found.
  1546    Otherwise, if M matches remained to be found, return -M.
  1547 
  1548    This kind of search works regardless of what is in PAT and
  1549    regardless of what is in TRT.  It is used in cases where
  1550    boyer_moore cannot work.  */
  1551 
  1552 static EMACS_INT
  1553 simple_search (EMACS_INT n, unsigned char *pat,
  1554                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
  1555                ptrdiff_t pos, ptrdiff_t pos_byte,
  1556                ptrdiff_t lim, ptrdiff_t lim_byte)
  1557 {
  1558   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  1559   bool forward = n > 0;
  1560   /* Number of buffer bytes matched.  Note that this may be different
  1561      from len_byte in a multibyte buffer.  */
  1562   ptrdiff_t match_byte = PTRDIFF_MIN;
  1563 
  1564   if (lim > pos && multibyte)
  1565     while (n > 0)
  1566       {
  1567         while (1)
  1568           {
  1569             /* Try matching at position POS.  */
  1570             ptrdiff_t this_pos_byte = pos_byte;
  1571             ptrdiff_t this_len = len;
  1572             unsigned char *p = pat;
  1573             if (pos + len > lim || pos_byte + len_byte > lim_byte)
  1574               goto stop;
  1575 
  1576             while (this_len > 0)
  1577               {
  1578                 int charlen, pat_ch = string_char_and_length (p, &charlen);
  1579                 int buf_charlen, buf_ch
  1580                   = string_char_and_length (BYTE_POS_ADDR (this_pos_byte),
  1581                                             &buf_charlen);
  1582                 TRANSLATE (buf_ch, trt, buf_ch);
  1583 
  1584                 if (buf_ch != pat_ch)
  1585                   break;
  1586 
  1587                 this_len--;
  1588                 p += charlen;
  1589 
  1590                 this_pos_byte += buf_charlen;
  1591               }
  1592 
  1593             if (this_len == 0)
  1594               {
  1595                 match_byte = this_pos_byte - pos_byte;
  1596                 pos += len;
  1597                 pos_byte += match_byte;
  1598                 break;
  1599               }
  1600 
  1601             inc_both (&pos, &pos_byte);
  1602           }
  1603 
  1604         n--;
  1605       }
  1606   else if (lim > pos)
  1607     while (n > 0)
  1608       {
  1609         while (1)
  1610           {
  1611             /* Try matching at position POS.  */
  1612             ptrdiff_t this_pos = pos;
  1613             ptrdiff_t this_len = len;
  1614             unsigned char *p = pat;
  1615 
  1616             if (pos + len > lim)
  1617               goto stop;
  1618 
  1619             while (this_len > 0)
  1620               {
  1621                 int pat_ch = *p++;
  1622                 int buf_ch = FETCH_BYTE (this_pos);
  1623                 TRANSLATE (buf_ch, trt, buf_ch);
  1624 
  1625                 if (buf_ch != pat_ch)
  1626                   break;
  1627 
  1628                 this_len--;
  1629                 this_pos++;
  1630               }
  1631 
  1632             if (this_len == 0)
  1633               {
  1634                 match_byte = len;
  1635                 pos += len;
  1636                 break;
  1637               }
  1638 
  1639             pos++;
  1640           }
  1641 
  1642         n--;
  1643       }
  1644   /* Backwards search.  */
  1645   else if (lim < pos && multibyte)
  1646     while (n < 0)
  1647       {
  1648         while (1)
  1649           {
  1650             /* Try matching at position POS.  */
  1651             ptrdiff_t this_pos = pos;
  1652             ptrdiff_t this_pos_byte = pos_byte;
  1653             ptrdiff_t this_len = len;
  1654             const unsigned char *p = pat + len_byte;
  1655 
  1656             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
  1657               goto stop;
  1658 
  1659             while (this_len > 0)
  1660               {
  1661                 int pat_ch, buf_ch;
  1662 
  1663                 dec_both (&this_pos, &this_pos_byte);
  1664                 p -= raw_prev_char_len (p);
  1665                 pat_ch = STRING_CHAR (p);
  1666                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
  1667                 TRANSLATE (buf_ch, trt, buf_ch);
  1668 
  1669                 if (buf_ch != pat_ch)
  1670                   break;
  1671 
  1672                 this_len--;
  1673               }
  1674 
  1675             if (this_len == 0)
  1676               {
  1677                 match_byte = pos_byte - this_pos_byte;
  1678                 pos = this_pos;
  1679                 pos_byte = this_pos_byte;
  1680                 break;
  1681               }
  1682 
  1683             dec_both (&pos, &pos_byte);
  1684           }
  1685 
  1686         n++;
  1687       }
  1688   else if (lim < pos)
  1689     while (n < 0)
  1690       {
  1691         while (1)
  1692           {
  1693             /* Try matching at position POS.  */
  1694             ptrdiff_t this_pos = pos - len;
  1695             ptrdiff_t this_len = len;
  1696             unsigned char *p = pat;
  1697 
  1698             if (this_pos < lim)
  1699               goto stop;
  1700 
  1701             while (this_len > 0)
  1702               {
  1703                 int pat_ch = *p++;
  1704                 int buf_ch = FETCH_BYTE (this_pos);
  1705                 TRANSLATE (buf_ch, trt, buf_ch);
  1706 
  1707                 if (buf_ch != pat_ch)
  1708                   break;
  1709                 this_len--;
  1710                 this_pos++;
  1711               }
  1712 
  1713             if (this_len == 0)
  1714               {
  1715                 match_byte = len;
  1716                 pos -= len;
  1717                 break;
  1718               }
  1719 
  1720             pos--;
  1721           }
  1722 
  1723         n++;
  1724       }
  1725 
  1726  stop:
  1727   if (n == 0)
  1728     {
  1729       eassert (match_byte != PTRDIFF_MIN);
  1730       if (forward)
  1731         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
  1732       else
  1733         set_search_regs (multibyte ? pos_byte : pos, match_byte);
  1734 
  1735       return pos;
  1736     }
  1737   else if (n > 0)
  1738     return -n;
  1739   else
  1740     return n;
  1741 }
  1742 
  1743 /* Do Boyer-Moore search N times for the string BASE_PAT,
  1744    whose length is LEN_BYTE,
  1745    from buffer position POS_BYTE until LIM_BYTE.
  1746    DIRECTION says which direction we search in.
  1747    TRT and INVERSE_TRT are translation tables.
  1748    Characters in PAT are already translated by TRT.
  1749 
  1750    This kind of search works if all the characters in BASE_PAT that
  1751    have nontrivial translation are the same aside from the last byte.
  1752    This makes it possible to translate just the last byte of a
  1753    character, and do so after just a simple test of the context.
  1754    CHAR_BASE is nonzero if there is such a non-ASCII character.
  1755 
  1756    If that criterion is not satisfied, do not call this function.  */
  1757 
  1758 static EMACS_INT
  1759 boyer_moore (EMACS_INT n, unsigned char *base_pat,
  1760              ptrdiff_t len_byte,
  1761              Lisp_Object trt, Lisp_Object inverse_trt,
  1762              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
  1763              int char_base)
  1764 {
  1765   int direction = ((n > 0) ? 1 : -1);
  1766   register ptrdiff_t dirlen;
  1767   ptrdiff_t limit;
  1768   int stride_for_teases = 0;
  1769   int BM_tab[0400];
  1770   register unsigned char *cursor, *p_limit;
  1771   register ptrdiff_t i;
  1772   register int j;
  1773   unsigned char *pat, *pat_end;
  1774   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  1775 
  1776   unsigned char simple_translate[0400];
  1777   /* These are set to the preceding bytes of a byte to be translated
  1778      if char_base is nonzero.  As the maximum byte length of a
  1779      multibyte character is 5, we have to check at most four previous
  1780      bytes.  */
  1781   int translate_prev_byte1 = 0;
  1782   int translate_prev_byte2 = 0;
  1783   int translate_prev_byte3 = 0;
  1784 
  1785   /* The general approach is that we are going to maintain that we know
  1786      the first (closest to the present position, in whatever direction
  1787      we're searching) character that could possibly be the last
  1788      (furthest from present position) character of a valid match.  We
  1789      advance the state of our knowledge by looking at that character
  1790      and seeing whether it indeed matches the last character of the
  1791      pattern.  If it does, we take a closer look.  If it does not, we
  1792      move our pointer (to putative last characters) as far as is
  1793      logically possible.  This amount of movement, which I call a
  1794      stride, will be the length of the pattern if the actual character
  1795      appears nowhere in the pattern, otherwise it will be the distance
  1796      from the last occurrence of that character to the end of the
  1797      pattern.  If the amount is zero we have a possible match.  */
  1798 
  1799   /* Here we make a "mickey mouse" BM table.  The stride of the search
  1800      is determined only by the last character of the putative match.
  1801      If that character does not match, we will stride the proper
  1802      distance to propose a match that superimposes it on the last
  1803      instance of a character that matches it (per trt), or misses
  1804      it entirely if there is none. */
  1805 
  1806   dirlen = len_byte * direction;
  1807 
  1808   /* Record position after the end of the pattern.  */
  1809   pat_end = base_pat + len_byte;
  1810   /* BASE_PAT points to a character that we start scanning from.
  1811      It is the first character in a forward search,
  1812      the last character in a backward search.  */
  1813   if (direction < 0)
  1814     base_pat = pat_end - 1;
  1815 
  1816   /* A character that does not appear in the pattern induces a
  1817      stride equal to the pattern length.  */
  1818   for (i = 0; i < 0400; i++)
  1819     BM_tab[i] = dirlen;
  1820 
  1821   /* We use this for translation, instead of TRT itself.
  1822      We fill this in to handle the characters that actually
  1823      occur in the pattern.  Others don't matter anyway!  */
  1824   for (i = 0; i < 0400; i++)
  1825     simple_translate[i] = i;
  1826 
  1827   if (char_base)
  1828     {
  1829       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
  1830          byte following them are the target of translation.  */
  1831       eassume (0x80 <= char_base && char_base <= MAX_CHAR);
  1832       unsigned char str[MAX_MULTIBYTE_LENGTH];
  1833       int cblen = CHAR_STRING (char_base, str);
  1834 
  1835       translate_prev_byte1 = str[cblen - 2];
  1836       if (cblen > 2)
  1837         {
  1838           translate_prev_byte2 = str[cblen - 3];
  1839           if (cblen > 3)
  1840             translate_prev_byte3 = str[cblen - 4];
  1841         }
  1842     }
  1843 
  1844   i = 0;
  1845   while (i != dirlen)
  1846     {
  1847       unsigned char *ptr = base_pat + i;
  1848       i += direction;
  1849       if (! NILP (trt))
  1850         {
  1851           /* If the byte currently looking at is the last of a
  1852              character to check case-equivalents, set CH to that
  1853              character.  An ASCII character and a non-ASCII character
  1854              matching with CHAR_BASE are to be checked.  */
  1855           int ch = -1;
  1856 
  1857           if (ASCII_CHAR_P (*ptr) || ! multibyte)
  1858             ch = *ptr;
  1859           else if (char_base
  1860                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
  1861             {
  1862               unsigned char *charstart = ptr - 1;
  1863 
  1864               while (! (CHAR_HEAD_P (*charstart)))
  1865                 charstart--;
  1866               ch = STRING_CHAR (charstart);
  1867               if (char_base != (ch & ~0x3F))
  1868                 ch = -1;
  1869             }
  1870 
  1871           if (ch >= 0200 && multibyte)
  1872             j = (ch & 0x3F) | 0200;
  1873           else
  1874             j = *ptr;
  1875 
  1876           if (i == dirlen)
  1877             stride_for_teases = BM_tab[j];
  1878 
  1879           BM_tab[j] = dirlen - i;
  1880           /* A translation table is accompanied by its inverse -- see
  1881              comment following downcase_table for details.  */
  1882           if (ch >= 0)
  1883             {
  1884               int starting_ch = ch;
  1885               int starting_j = j;
  1886 
  1887               while (1)
  1888                 {
  1889                   TRANSLATE (ch, inverse_trt, ch);
  1890                   if (ch >= 0200 && multibyte)
  1891                     j = (ch & 0x3F) | 0200;
  1892                   else
  1893                     j = ch;
  1894 
  1895                   /* For all the characters that map into CH,
  1896                      set up simple_translate to map the last byte
  1897                      into STARTING_J.  */
  1898                   simple_translate[j] = starting_j;
  1899                   if (ch == starting_ch)
  1900                     break;
  1901                   BM_tab[j] = dirlen - i;
  1902                 }
  1903             }
  1904         }
  1905       else
  1906         {
  1907           j = *ptr;
  1908 
  1909           if (i == dirlen)
  1910             stride_for_teases = BM_tab[j];
  1911           BM_tab[j] = dirlen - i;
  1912         }
  1913       /* stride_for_teases tells how much to stride if we get a
  1914          match on the far character but are subsequently
  1915          disappointed, by recording what the stride would have been
  1916          for that character if the last character had been
  1917          different.  */
  1918     }
  1919   pos_byte += dirlen - ((direction > 0) ? direction : 0);
  1920   /* loop invariant - POS_BYTE points at where last char (first
  1921      char if reverse) of pattern would align in a possible match.  */
  1922   while (n != 0)
  1923     {
  1924       ptrdiff_t tail_end;
  1925       unsigned char *tail_end_ptr;
  1926 
  1927       /* It's been reported that some (broken) compiler thinks that
  1928          Boolean expressions in an arithmetic context are unsigned.
  1929          Using an explicit ?1:0 prevents this.  */
  1930       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
  1931           < 0)
  1932         return (n * (0 - direction));
  1933       /* First we do the part we can by pointers (maybe nothing) */
  1934       maybe_quit ();
  1935       pat = base_pat;
  1936       limit = pos_byte - dirlen + direction;
  1937       if (direction > 0)
  1938         {
  1939           limit = BUFFER_CEILING_OF (limit);
  1940           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
  1941              can take on without hitting edge of buffer or the gap.  */
  1942           limit = min (limit, pos_byte + 20000);
  1943           limit = min (limit, lim_byte - 1);
  1944         }
  1945       else
  1946         {
  1947           limit = BUFFER_FLOOR_OF (limit);
  1948           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
  1949              can take on without hitting edge of buffer or the gap.  */
  1950           limit = max (limit, pos_byte - 20000);
  1951           limit = max (limit, lim_byte);
  1952         }
  1953       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
  1954       tail_end_ptr = BYTE_POS_ADDR (tail_end);
  1955 
  1956       if ((limit - pos_byte) * direction > 20)
  1957         {
  1958           unsigned char *p2;
  1959 
  1960           p_limit = BYTE_POS_ADDR (limit);
  1961           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
  1962           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
  1963           while (1)             /* use one cursor setting as long as i can */
  1964             {
  1965               if (direction > 0) /* worth duplicating */
  1966                 {
  1967                   while (cursor <= p_limit)
  1968                     {
  1969                       if (BM_tab[*cursor] == 0)
  1970                         goto hit;
  1971                       cursor += BM_tab[*cursor];
  1972                     }
  1973                 }
  1974               else
  1975                 {
  1976                   while (cursor >= p_limit)
  1977                     {
  1978                       if (BM_tab[*cursor] == 0)
  1979                         goto hit;
  1980                       cursor += BM_tab[*cursor];
  1981                     }
  1982                 }
  1983               /* If you are here, cursor is beyond the end of the
  1984                  searched region.  You fail to match within the
  1985                  permitted region and would otherwise try a character
  1986                  beyond that region.  */
  1987               break;
  1988 
  1989             hit:
  1990               i = dirlen - direction;
  1991               if (! NILP (trt))
  1992                 {
  1993                   while ((i -= direction) + direction != 0)
  1994                     {
  1995                       int ch;
  1996                       cursor -= direction;
  1997                       /* Translate only the last byte of a character.  */
  1998                       if (! multibyte
  1999                           || ((cursor == tail_end_ptr
  2000                                || CHAR_HEAD_P (cursor[1]))
  2001                               && (CHAR_HEAD_P (cursor[0])
  2002                                   /* Check if this is the last byte of
  2003                                      a translatable character.  */
  2004                                   || (translate_prev_byte1 == cursor[-1]
  2005                                       && (CHAR_HEAD_P (translate_prev_byte1)
  2006                                           || (translate_prev_byte2 == cursor[-2]
  2007                                               && (CHAR_HEAD_P (translate_prev_byte2)
  2008                                                   || (translate_prev_byte3 == cursor[-3]))))))))
  2009                         ch = simple_translate[*cursor];
  2010                       else
  2011                         ch = *cursor;
  2012                       if (pat[i] != ch)
  2013                         break;
  2014                     }
  2015                 }
  2016               else
  2017                 {
  2018                   while ((i -= direction) + direction != 0)
  2019                     {
  2020                       cursor -= direction;
  2021                       if (pat[i] != *cursor)
  2022                         break;
  2023                     }
  2024                 }
  2025               cursor += dirlen - i - direction; /* fix cursor */
  2026               if (i + direction == 0)
  2027                 {
  2028                   ptrdiff_t position, start, end;
  2029 #ifdef REL_ALLOC
  2030                   ptrdiff_t cursor_off;
  2031 #endif
  2032 
  2033                   cursor -= direction;
  2034 
  2035                   position = pos_byte + cursor - p2 + ((direction > 0)
  2036                                                        ? 1 - len_byte : 0);
  2037 #ifdef REL_ALLOC
  2038                   /* set_search_regs might call malloc, which could
  2039                      cause ralloc.c relocate buffer text.  We need to
  2040                      update pointers into buffer text due to that.  */
  2041                   cursor_off = cursor - p2;
  2042 #endif
  2043                   set_search_regs (position, len_byte);
  2044 #ifdef REL_ALLOC
  2045                   p_limit = BYTE_POS_ADDR (limit);
  2046                   p2 = BYTE_POS_ADDR (pos_byte);
  2047                   cursor = p2 + cursor_off;
  2048 #endif
  2049 
  2050                   if (NILP (Vinhibit_changing_match_data))
  2051                     {
  2052                       start = search_regs.start[0];
  2053                       end = search_regs.end[0];
  2054                     }
  2055                   else
  2056                     /* If Vinhibit_changing_match_data is non-nil,
  2057                        search_regs will not be changed.  So let's
  2058                        compute start and end here.  */
  2059                     {
  2060                       start = BYTE_TO_CHAR (position);
  2061                       end = BYTE_TO_CHAR (position + len_byte);
  2062                     }
  2063 
  2064                   if ((n -= direction) != 0)
  2065                     cursor += dirlen; /* to resume search */
  2066                   else
  2067                     return direction > 0 ? end : start;
  2068                 }
  2069               else
  2070                 cursor += stride_for_teases; /* <sigh> we lose -  */
  2071             }
  2072           pos_byte += cursor - p2;
  2073         }
  2074       else
  2075         /* Now we'll pick up a clump that has to be done the hard
  2076            way because it covers a discontinuity.  */
  2077         {
  2078           limit = ((direction > 0)
  2079                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
  2080                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
  2081           limit = ((direction > 0)
  2082                    ? min (limit + len_byte, lim_byte - 1)
  2083                    : max (limit - len_byte, lim_byte));
  2084           /* LIMIT is now the last value POS_BYTE can have
  2085              and still be valid for a possible match.  */
  2086           while (1)
  2087             {
  2088               /* This loop can be coded for space rather than
  2089                  speed because it will usually run only once.
  2090                  (the reach is at most len + 21, and typically
  2091                  does not exceed len).  */
  2092               while ((limit - pos_byte) * direction >= 0)
  2093                 {
  2094                   int ch = FETCH_BYTE (pos_byte);
  2095                   if (BM_tab[ch] == 0)
  2096                     goto hit2;
  2097                   pos_byte += BM_tab[ch];
  2098                 }
  2099               break;    /* ran off the end */
  2100 
  2101             hit2:
  2102               /* Found what might be a match.  */
  2103               i = dirlen - direction;
  2104               while ((i -= direction) + direction != 0)
  2105                 {
  2106                   int ch;
  2107                   unsigned char *ptr;
  2108                   pos_byte -= direction;
  2109                   ptr = BYTE_POS_ADDR (pos_byte);
  2110                   /* Translate only the last byte of a character.  */
  2111                   if (! multibyte
  2112                       || ((ptr == tail_end_ptr
  2113                            || CHAR_HEAD_P (ptr[1]))
  2114                           && (CHAR_HEAD_P (ptr[0])
  2115                               /* Check if this is the last byte of a
  2116                                  translatable character.  */
  2117                               || (translate_prev_byte1 == ptr[-1]
  2118                                   && (CHAR_HEAD_P (translate_prev_byte1)
  2119                                       || (translate_prev_byte2 == ptr[-2]
  2120                                           && (CHAR_HEAD_P (translate_prev_byte2)
  2121                                               || translate_prev_byte3 == ptr[-3])))))))
  2122                     ch = simple_translate[*ptr];
  2123                   else
  2124                     ch = *ptr;
  2125                   if (pat[i] != ch)
  2126                     break;
  2127                 }
  2128               /* Above loop has moved POS_BYTE part or all the way
  2129                  back to the first pos (last pos if reverse).
  2130                  Set it once again at the last (first if reverse) char.  */
  2131               pos_byte += dirlen - i - direction;
  2132               if (i + direction == 0)
  2133                 {
  2134                   ptrdiff_t position, start, end;
  2135                   pos_byte -= direction;
  2136 
  2137                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
  2138                   set_search_regs (position, len_byte);
  2139 
  2140                   if (NILP (Vinhibit_changing_match_data))
  2141                     {
  2142                       start = search_regs.start[0];
  2143                       end = search_regs.end[0];
  2144                     }
  2145                   else
  2146                     /* If Vinhibit_changing_match_data is non-nil,
  2147                        search_regs will not be changed.  So let's
  2148                        compute start and end here.  */
  2149                     {
  2150                       start = BYTE_TO_CHAR (position);
  2151                       end = BYTE_TO_CHAR (position + len_byte);
  2152                     }
  2153 
  2154                   if ((n -= direction) != 0)
  2155                     pos_byte += dirlen; /* to resume search */
  2156                   else
  2157                     return direction > 0 ? end : start;
  2158                 }
  2159               else
  2160                 pos_byte += stride_for_teases;
  2161             }
  2162           }
  2163       /* We have done one clump.  Can we continue? */
  2164       if ((lim_byte - pos_byte) * direction < 0)
  2165         return ((0 - n) * direction);
  2166     }
  2167   return BYTE_TO_CHAR (pos_byte);
  2168 }
  2169 
  2170 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
  2171    for the overall match just found in the current buffer.
  2172    Also clear out the match data for registers 1 and up.  */
  2173 
  2174 static void
  2175 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
  2176 {
  2177   ptrdiff_t i;
  2178 
  2179   if (!NILP (Vinhibit_changing_match_data))
  2180     return;
  2181 
  2182   /* Make sure we have registers in which to store
  2183      the match position.  */
  2184   if (search_regs.num_regs == 0)
  2185     {
  2186       search_regs.start = xmalloc (2 * sizeof *search_regs.start);
  2187       search_regs.end = xmalloc (2 * sizeof *search_regs.end);
  2188       search_regs.num_regs = 2;
  2189     }
  2190 
  2191   /* Clear out the other registers.  */
  2192   for (i = 1; i < search_regs.num_regs; i++)
  2193     {
  2194       search_regs.start[i] = -1;
  2195       search_regs.end[i] = -1;
  2196     }
  2197 
  2198   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
  2199   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
  2200   XSETBUFFER (last_thing_searched, current_buffer);
  2201 }
  2202 
  2203 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
  2204        "MSearch backward: ",
  2205        doc: /* Search backward from point for STRING.
  2206 Set point to the beginning of the occurrence found, and return point.
  2207 An optional second argument bounds the search; it is a buffer position.
  2208   The match found must not begin before that position.  A value of nil
  2209   means search to the beginning of the accessible portion of the buffer.
  2210 Optional third argument, if t, means if fail just return nil (no error).
  2211   If not nil and not t, position at limit of search and return nil.
  2212 Optional fourth argument COUNT, if a positive number, means to search
  2213   for COUNT successive occurrences.  If COUNT is negative, search
  2214   forward, instead of backward, for -COUNT occurrences.  A value of
  2215   nil means the same as 1.
  2216 With COUNT positive, the match found is the COUNTth to last one (or
  2217   last, if COUNT is 1 or nil) in the buffer located entirely before
  2218   the origin of the search; correspondingly with COUNT negative.
  2219 
  2220 Search case-sensitivity is determined by the value of the variable
  2221 `case-fold-search', which see.
  2222 
  2223 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
  2224   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2225 {
  2226   return search_command (string, bound, noerror, count, -1, false, false);
  2227 }
  2228 
  2229 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
  2230        doc: /* Search forward from point for STRING.
  2231 Set point to the end of the occurrence found, and return point.
  2232 An optional second argument bounds the search; it is a buffer position.
  2233   The match found must not end after that position.  A value of nil
  2234   means search to the end of the accessible portion of the buffer.
  2235 Optional third argument, if t, means if fail just return nil (no error).
  2236   If not nil and not t, move to limit of search and return nil.
  2237 Optional fourth argument COUNT, if a positive number, means to search
  2238   for COUNT successive occurrences.  If COUNT is negative, search
  2239   backward, instead of forward, for -COUNT occurrences.  A value of
  2240   nil means the same as 1.
  2241 With COUNT positive, the match found is the COUNTth one (or first,
  2242   if COUNT is 1 or nil) in the buffer located entirely after the
  2243   origin of the search; correspondingly with COUNT negative.
  2244 
  2245 Search case-sensitivity is determined by the value of the variable
  2246 `case-fold-search', which see.
  2247 
  2248 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
  2249   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2250 {
  2251   return search_command (string, bound, noerror, count, 1, false, false);
  2252 }
  2253 
  2254 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
  2255        "sRE search backward: ",
  2256        doc: /* Search backward from point for regular expression REGEXP.
  2257 This function is almost identical to `re-search-forward', except that
  2258 by default it searches backward instead of forward, and the sign of
  2259 COUNT also indicates exactly the opposite searching direction.
  2260 See `re-search-forward' for details.
  2261 
  2262 Note that searching backwards may give a shorter match than expected,
  2263 because REGEXP is still matched in the forward direction.  See Info
  2264 anchor `(elisp) re-search-backward' for details.  */)
  2265   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2266 {
  2267   return search_command (regexp, bound, noerror, count, -1, true, false);
  2268 }
  2269 
  2270 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
  2271        "sRE search: ",
  2272        doc: /* Search forward from point for regular expression REGEXP.
  2273 Set point to the end of the occurrence found, and return point.
  2274 The optional second argument BOUND is a buffer position that bounds
  2275   the search.  The match found must not end after that position.  A
  2276   value of nil means search to the end of the accessible portion of
  2277   the buffer.
  2278 The optional third argument NOERROR indicates how errors are handled
  2279   when the search fails.  If it is nil or omitted, emit an error; if
  2280   it is t, simply return nil and do nothing; if it is neither nil nor
  2281   t, move to the limit of search and return nil.
  2282 The optional fourth argument COUNT is a number that indicates the
  2283   search direction and the number of occurrences to search for.  If it
  2284   is positive, search forward for COUNT successive occurrences; if it
  2285   is negative, search backward, instead of forward, for -COUNT
  2286   occurrences.  A value of nil means the same as 1.
  2287 With COUNT positive/negative, the match found is the COUNTth/-COUNTth
  2288   one in the buffer located entirely after/before the origin of the
  2289   search.
  2290 
  2291 Search case-sensitivity is determined by the value of the variable
  2292 `case-fold-search', which see.
  2293 
  2294 See also the functions `match-beginning', `match-end', `match-string',
  2295 and `replace-match'.  */)
  2296   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2297 {
  2298   return search_command (regexp, bound, noerror, count, 1, true, false);
  2299 }
  2300 
  2301 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
  2302        "sPosix search backward: ",
  2303        doc: /* Search backward from point for match for REGEXP according to Posix rules.
  2304 Find the longest match in accord with Posix regular expression rules.
  2305 Set point to the beginning of the occurrence found, and return point.
  2306 An optional second argument bounds the search; it is a buffer position.
  2307   The match found must not begin before that position.  A value of nil
  2308   means search to the beginning of the accessible portion of the buffer.
  2309 Optional third argument, if t, means if fail just return nil (no error).
  2310   If not nil and not t, position at limit of search and return nil.
  2311 Optional fourth argument COUNT, if a positive number, means to search
  2312   for COUNT successive occurrences.  If COUNT is negative, search
  2313   forward, instead of backward, for -COUNT occurrences.  A value of
  2314   nil means the same as 1.
  2315 With COUNT positive, the match found is the COUNTth to last one (or
  2316   last, if COUNT is 1 or nil) in the buffer located entirely before
  2317   the origin of the search; correspondingly with COUNT negative.
  2318 
  2319 Search case-sensitivity is determined by the value of the variable
  2320 `case-fold-search', which see.
  2321 
  2322 See also the functions `match-beginning', `match-end', `match-string',
  2323 and `replace-match'.  */)
  2324   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2325 {
  2326   return search_command (regexp, bound, noerror, count, -1, true, true);
  2327 }
  2328 
  2329 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
  2330        "sPosix search: ",
  2331        doc: /* Search forward from point for REGEXP according to Posix rules.
  2332 Find the longest match in accord with Posix regular expression rules.
  2333 Set point to the end of the occurrence found, and return point.
  2334 An optional second argument bounds the search; it is a buffer position.
  2335   The match found must not end after that position.  A value of nil
  2336   means search to the end of the accessible portion of the buffer.
  2337 Optional third argument, if t, means if fail just return nil (no error).
  2338   If not nil and not t, move to limit of search and return nil.
  2339 Optional fourth argument COUNT, if a positive number, means to search
  2340   for COUNT successive occurrences.  If COUNT is negative, search
  2341   backward, instead of forward, for -COUNT occurrences.  A value of
  2342   nil means the same as 1.
  2343 With COUNT positive, the match found is the COUNTth one (or first,
  2344   if COUNT is 1 or nil) in the buffer located entirely after the
  2345   origin of the search; correspondingly with COUNT negative.
  2346 
  2347 Search case-sensitivity is determined by the value of the variable
  2348 `case-fold-search', which see.
  2349 
  2350 See also the functions `match-beginning', `match-end', `match-string',
  2351 and `replace-match'.  */)
  2352   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2353 {
  2354   return search_command (regexp, bound, noerror, count, 1, true, true);
  2355 }
  2356 
  2357 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
  2358        doc: /* Replace text matched by last search with NEWTEXT.
  2359 Leave point at the end of the replacement text.
  2360 
  2361 If optional second arg FIXEDCASE is non-nil, do not alter the case of
  2362 the replacement text.  Otherwise, maybe capitalize the whole text, or
  2363 maybe just word initials, based on the replaced text.  If the replaced
  2364 text has only capital letters and has at least one multiletter word,
  2365 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
  2366 in the replaced text, capitalize each word in NEWTEXT.  Note that
  2367 what exactly is a word is determined by the syntax tables in effect
  2368 in the current buffer.
  2369 
  2370 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
  2371 Otherwise treat `\\' as special:
  2372   `\\&' in NEWTEXT means substitute original matched text.
  2373   `\\N' means substitute what matched the Nth `\\(...\\)'.
  2374        If Nth parens didn't match, substitute nothing.
  2375   `\\\\' means insert one `\\'.
  2376   `\\?' is treated literally
  2377        (for compatibility with `query-replace-regexp').
  2378   Any other character following `\\' signals an error.
  2379 Case conversion does not apply to these substitutions.
  2380 
  2381 If optional fourth argument STRING is non-nil, it should be a string
  2382 to act on; this should be the string on which the previous match was
  2383 done via `string-match'.  In this case, `replace-match' creates and
  2384 returns a new string, made by copying STRING and replacing the part of
  2385 STRING that was matched (the original STRING itself is not altered).
  2386 
  2387 The optional fifth argument SUBEXP specifies a subexpression;
  2388 it says to replace just that subexpression with NEWTEXT,
  2389 rather than replacing the entire matched text.
  2390 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
  2391 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
  2392 NEWTEXT in place of subexp N.
  2393 This is useful only after a regular expression search or match,
  2394 since only regular expressions have distinguished subexpressions.  */)
  2395   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
  2396 {
  2397   enum { nochange, all_caps, cap_initial } case_action;
  2398   ptrdiff_t pos, pos_byte;
  2399   bool some_multiletter_word;
  2400   bool some_lowercase;
  2401   bool some_uppercase;
  2402   bool some_nonuppercase_initial;
  2403   int c, prevc;
  2404   ptrdiff_t sub;
  2405   ptrdiff_t opoint, newpoint;
  2406 
  2407   CHECK_STRING (newtext);
  2408 
  2409   if (! NILP (string))
  2410     CHECK_STRING (string);
  2411 
  2412   /* Most replacement texts don't contain any backslash directives in
  2413      the replacements.  Check whether that's the case, which will
  2414      enable us to take the fast path later.  */
  2415   if (NILP (literal)
  2416       && !memchr (SSDATA (newtext), '\\', SBYTES (newtext)))
  2417     literal = Qt;
  2418 
  2419   case_action = nochange;       /* We tried an initialization */
  2420                                 /* but some C compilers blew it */
  2421 
  2422   ptrdiff_t num_regs = search_regs.num_regs;
  2423   if (num_regs <= 0)
  2424     error ("`replace-match' called before any match found");
  2425 
  2426   sub = !NILP (subexp) ? check_integer_range (subexp, 0, num_regs - 1) : 0;
  2427   ptrdiff_t sub_start = search_regs.start[sub];
  2428   ptrdiff_t sub_end = search_regs.end[sub];
  2429   eassert (sub_start <= sub_end);
  2430 
  2431   /* Check whether the text to replace is present in the buffer/string.  */
  2432   if (! (NILP (string)
  2433          ? BEGV <= sub_start && sub_end <= ZV
  2434          : 0 <= sub_start && sub_end <= SCHARS (string)))
  2435     {
  2436       if (sub_start < 0)
  2437         xsignal2 (Qerror,
  2438                   build_string ("replace-match subexpression does not exist"),
  2439                   subexp);
  2440       args_out_of_range (make_fixnum (sub_start), make_fixnum (sub_end));
  2441     }
  2442 
  2443   if (NILP (fixedcase))
  2444     {
  2445       /* Decide how to casify by examining the matched text. */
  2446       ptrdiff_t last;
  2447 
  2448       pos = sub_start;
  2449       last = sub_end;
  2450 
  2451       if (NILP (string))
  2452         pos_byte = CHAR_TO_BYTE (pos);
  2453       else
  2454         pos_byte = string_char_to_byte (string, pos);
  2455 
  2456       prevc = '\n';
  2457       case_action = all_caps;
  2458 
  2459       /* some_multiletter_word is set nonzero if any original word
  2460          is more than one letter long. */
  2461       some_multiletter_word = 0;
  2462       some_lowercase = 0;
  2463       some_nonuppercase_initial = 0;
  2464       some_uppercase = 0;
  2465 
  2466       while (pos < last)
  2467         {
  2468           if (NILP (string))
  2469             {
  2470               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
  2471               inc_both (&pos, &pos_byte);
  2472             }
  2473           else
  2474             c = fetch_string_char_as_multibyte_advance (string,
  2475                                                         &pos, &pos_byte);
  2476 
  2477           if (lowercasep (c))
  2478             {
  2479               /* Cannot be all caps if any original char is lower case */
  2480 
  2481               some_lowercase = 1;
  2482               if (SYNTAX (prevc) != Sword)
  2483                 some_nonuppercase_initial = 1;
  2484               else
  2485                 some_multiletter_word = 1;
  2486             }
  2487           else if (uppercasep (c))
  2488             {
  2489               some_uppercase = 1;
  2490               if (SYNTAX (prevc) != Sword)
  2491                 ;
  2492               else
  2493                 some_multiletter_word = 1;
  2494             }
  2495           else
  2496             {
  2497               /* If the initial is a caseless word constituent,
  2498                  treat that like a lowercase initial.  */
  2499               if (SYNTAX (prevc) != Sword)
  2500                 some_nonuppercase_initial = 1;
  2501             }
  2502 
  2503           prevc = c;
  2504         }
  2505 
  2506       /* Convert to all caps if the old text is all caps
  2507          and has at least one multiletter word.  */
  2508       if (! some_lowercase && some_multiletter_word)
  2509         case_action = all_caps;
  2510       /* Capitalize each word, if the old text has all capitalized words.  */
  2511       else if (!some_nonuppercase_initial && some_multiletter_word)
  2512         case_action = cap_initial;
  2513       else if (!some_nonuppercase_initial && some_uppercase)
  2514         /* Should x -> yz, operating on X, give Yz or YZ?
  2515            We'll assume the latter.  */
  2516         case_action = all_caps;
  2517       else
  2518         case_action = nochange;
  2519     }
  2520 
  2521   /* Do replacement in a string.  */
  2522   if (!NILP (string))
  2523     {
  2524       Lisp_Object before, after;
  2525 
  2526       before = Fsubstring (string, make_fixnum (0), make_fixnum (sub_start));
  2527       after = Fsubstring (string, make_fixnum (sub_end), Qnil);
  2528 
  2529       /* Substitute parts of the match into NEWTEXT
  2530          if desired.  */
  2531       if (NILP (literal))
  2532         {
  2533           ptrdiff_t lastpos = 0;
  2534           ptrdiff_t lastpos_byte = 0;
  2535           /* We build up the substituted string in ACCUM.  */
  2536           Lisp_Object accum;
  2537           Lisp_Object middle;
  2538           ptrdiff_t length = SBYTES (newtext);
  2539 
  2540           accum = Qnil;
  2541 
  2542           for (pos_byte = 0, pos = 0; pos_byte < length;)
  2543             {
  2544               ptrdiff_t substart = -1;
  2545               ptrdiff_t subend = 0;
  2546               bool delbackslash = 0;
  2547 
  2548               c = fetch_string_char_advance (newtext, &pos, &pos_byte);
  2549 
  2550               if (c == '\\')
  2551                 {
  2552                   c = fetch_string_char_advance (newtext, &pos, &pos_byte);
  2553 
  2554                   if (c == '&')
  2555                     {
  2556                       substart = sub_start;
  2557                       subend = sub_end;
  2558                     }
  2559                   else if (c >= '1' && c <= '9')
  2560                     {
  2561                       if (c - '0' < num_regs
  2562                           && search_regs.start[c - '0'] >= 0)
  2563                         {
  2564                           substart = search_regs.start[c - '0'];
  2565                           subend = search_regs.end[c - '0'];
  2566                         }
  2567                       else
  2568                         {
  2569                           /* If that subexp did not match,
  2570                              replace \\N with nothing.  */
  2571                           substart = 0;
  2572                           subend = 0;
  2573                         }
  2574                     }
  2575                   else if (c == '\\')
  2576                     delbackslash = 1;
  2577                   else if (c != '?')
  2578                     error ("Invalid use of `\\' in replacement text");
  2579                 }
  2580               if (substart >= 0)
  2581                 {
  2582                   if (pos - 2 != lastpos)
  2583                     middle = substring_both (newtext, lastpos,
  2584                                              lastpos_byte,
  2585                                              pos - 2, pos_byte - 2);
  2586                   else
  2587                     middle = Qnil;
  2588                   accum = concat3 (accum, middle,
  2589                                    Fsubstring (string,
  2590                                                make_fixnum (substart),
  2591                                                make_fixnum (subend)));
  2592                   lastpos = pos;
  2593                   lastpos_byte = pos_byte;
  2594                 }
  2595               else if (delbackslash)
  2596                 {
  2597                   middle = substring_both (newtext, lastpos,
  2598                                            lastpos_byte,
  2599                                            pos - 1, pos_byte - 1);
  2600 
  2601                   accum = concat2 (accum, middle);
  2602                   lastpos = pos;
  2603                   lastpos_byte = pos_byte;
  2604                 }
  2605             }
  2606 
  2607           if (pos != lastpos)
  2608             middle = substring_both (newtext, lastpos,
  2609                                      lastpos_byte,
  2610                                      pos, pos_byte);
  2611           else
  2612             middle = Qnil;
  2613 
  2614           newtext = concat2 (accum, middle);
  2615         }
  2616 
  2617       /* Do case substitution in NEWTEXT if desired.  */
  2618       if (case_action == all_caps)
  2619         newtext = Fupcase (newtext);
  2620       else if (case_action == cap_initial)
  2621         newtext = Fupcase_initials (newtext);
  2622 
  2623       return concat3 (before, newtext, after);
  2624     }
  2625 
  2626   /* Record point.  A nonpositive OPOINT is actually an offset from ZV.  */
  2627   opoint = PT <= sub_start ? PT : max (PT, sub_end) - ZV;
  2628 
  2629   /* If we want non-literal replacement,
  2630      perform substitution on the replacement string.  */
  2631   if (NILP (literal))
  2632     {
  2633       ptrdiff_t length = SBYTES (newtext);
  2634       unsigned char *substed;
  2635       ptrdiff_t substed_alloc_size, substed_len;
  2636       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
  2637       bool str_multibyte = STRING_MULTIBYTE (newtext);
  2638       bool really_changed = 0;
  2639 
  2640       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
  2641                             ? length * 2 + 100
  2642                             : STRING_BYTES_BOUND);
  2643       substed = xmalloc (substed_alloc_size);
  2644       substed_len = 0;
  2645 
  2646       /* Go thru NEWTEXT, producing the actual text to insert in
  2647          SUBSTED while adjusting multibyteness to that of the current
  2648          buffer.  */
  2649 
  2650       for (pos_byte = 0, pos = 0; pos_byte < length;)
  2651         {
  2652           unsigned char str[MAX_MULTIBYTE_LENGTH];
  2653           const unsigned char *add_stuff = NULL;
  2654           ptrdiff_t add_len = 0;
  2655           ptrdiff_t idx = -1;
  2656           ptrdiff_t begbyte UNINIT;
  2657 
  2658           if (str_multibyte)
  2659             {
  2660               c = fetch_string_char_advance_no_check (newtext,
  2661                                                       &pos, &pos_byte);
  2662               if (!buf_multibyte)
  2663                 c = CHAR_TO_BYTE8 (c);
  2664             }
  2665           else
  2666             {
  2667               /* Note that we don't have to increment POS.  */
  2668               c = SREF (newtext, pos_byte++);
  2669               if (buf_multibyte)
  2670                 c = make_char_multibyte (c);
  2671             }
  2672 
  2673           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
  2674              or set IDX to a match index, which means put that part
  2675              of the buffer text into SUBSTED.  */
  2676 
  2677           if (c == '\\')
  2678             {
  2679               really_changed = 1;
  2680 
  2681               if (str_multibyte)
  2682                 {
  2683                   c = fetch_string_char_advance_no_check (newtext,
  2684                                                           &pos, &pos_byte);
  2685                   if (!buf_multibyte && !ASCII_CHAR_P (c))
  2686                     c = CHAR_TO_BYTE8 (c);
  2687                 }
  2688               else
  2689                 {
  2690                   c = SREF (newtext, pos_byte++);
  2691                   if (buf_multibyte)
  2692                     c = make_char_multibyte (c);
  2693                 }
  2694 
  2695               if (c == '&')
  2696                 idx = sub;
  2697               else if ('1' <= c && c <= '9' && c - '0' < num_regs)
  2698                 {
  2699                   if (search_regs.start[c - '0'] >= 1)
  2700                     idx = c - '0';
  2701                 }
  2702               else if (c == '\\')
  2703                 add_len = 1, add_stuff = (unsigned char *) "\\";
  2704               else
  2705                 {
  2706                   xfree (substed);
  2707                   error ("Invalid use of `\\' in replacement text");
  2708                 }
  2709             }
  2710           else
  2711             {
  2712               add_len = CHAR_STRING (c, str);
  2713               add_stuff = str;
  2714             }
  2715 
  2716           /* If we want to copy part of a previous match,
  2717              set up ADD_STUFF and ADD_LEN to point to it.  */
  2718           if (idx >= 0)
  2719             {
  2720               begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
  2721               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
  2722               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
  2723                 move_gap_both (search_regs.start[idx], begbyte);
  2724             }
  2725 
  2726           /* Now the stuff we want to add to SUBSTED
  2727              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
  2728 
  2729           /* Make sure SUBSTED is big enough.  */
  2730           if (substed_alloc_size - substed_len < add_len)
  2731             substed =
  2732               xpalloc (substed, &substed_alloc_size,
  2733                        add_len - (substed_alloc_size - substed_len),
  2734                        STRING_BYTES_BOUND, 1);
  2735 
  2736           /* We compute this after the call to xpalloc, because that
  2737              could cause buffer text be relocated when ralloc.c is used.  */
  2738           if (idx >= 0)
  2739             add_stuff = BYTE_POS_ADDR (begbyte);
  2740 
  2741           /* Now add to the end of SUBSTED.  */
  2742           if (add_stuff)
  2743             {
  2744               memcpy (substed + substed_len, add_stuff, add_len);
  2745               substed_len += add_len;
  2746             }
  2747         }
  2748 
  2749       if (really_changed)
  2750         newtext = make_specified_string ((const char *) substed, -1,
  2751                                          substed_len, buf_multibyte);
  2752       xfree (substed);
  2753     }
  2754 
  2755   newpoint = sub_start + SCHARS (newtext);
  2756 
  2757   /* Replace the old text with the new in the cleanest possible way.  */
  2758   replace_range (sub_start, sub_end, newtext, 1, 0, 1, true, true);
  2759 
  2760   if (case_action == all_caps)
  2761     Fupcase_region (make_fixnum (search_regs.start[sub]),
  2762                     make_fixnum (newpoint),
  2763                     Qnil);
  2764   else if (case_action == cap_initial)
  2765     Fupcase_initials_region (make_fixnum (search_regs.start[sub]),
  2766                              make_fixnum (newpoint), Qnil);
  2767 
  2768   /* The replace_range etc. functions can trigger modification hooks
  2769      (see signal_before_change and signal_after_change).  Try to error
  2770      out if these hooks clobber the match data since clobbering can
  2771      result in confusing bugs.  We used to check for changes in
  2772      search_regs start and end, but that fails if modification hooks
  2773      remove or add text earlier in the buffer, so just check num_regs
  2774      now. */
  2775   if (search_regs.num_regs != num_regs)
  2776     error ("Match data clobbered by buffer modification hooks");
  2777 
  2778   /* Put point back where it was in the text, if possible.  */
  2779   TEMP_SET_PT (clip_to_bounds (BEGV, opoint + (opoint <= 0 ? ZV : 0), ZV));
  2780   /* Now move point "officially" to the end of the inserted replacement.  */
  2781   move_if_not_intangible (newpoint);
  2782 
  2783   signal_after_change (sub_start, sub_end - sub_start, SCHARS (newtext));
  2784   update_compositions (sub_start, newpoint, CHECK_BORDER);
  2785 
  2786   return Qnil;
  2787 }
  2788 
  2789 static Lisp_Object
  2790 match_limit (Lisp_Object num, bool beginningp)
  2791 {
  2792   EMACS_INT n;
  2793 
  2794   CHECK_FIXNUM (num);
  2795   n = XFIXNUM (num);
  2796   if (n < 0)
  2797     args_out_of_range (num, make_fixnum (0));
  2798   if (search_regs.num_regs <= 0)
  2799     error ("No match data, because no search succeeded");
  2800   if (n >= search_regs.num_regs
  2801       || search_regs.start[n] < 0)
  2802     return Qnil;
  2803   return (make_fixnum ((beginningp) ? search_regs.start[n]
  2804                                     : search_regs.end[n]));
  2805 }
  2806 
  2807 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
  2808        doc: /* Return position of start of text matched by last search.
  2809 SUBEXP, a number, specifies which parenthesized expression in the last
  2810   regexp.
  2811 Value is nil if SUBEXPth pair didn't match, or there were less than
  2812   SUBEXP pairs.
  2813 Zero means the entire text matched by the whole regexp or whole string.
  2814 
  2815 Return value is undefined if the last search failed.  */)
  2816   (Lisp_Object subexp)
  2817 {
  2818   return match_limit (subexp, 1);
  2819 }
  2820 
  2821 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
  2822        doc: /* Return position of end of text matched by last search.
  2823 SUBEXP, a number, specifies which parenthesized expression in the last
  2824   regexp.
  2825 Value is nil if SUBEXPth pair didn't match, or there were less than
  2826   SUBEXP pairs.
  2827 Zero means the entire text matched by the whole regexp or whole string.
  2828 
  2829 Return value is undefined if the last search failed.  */)
  2830   (Lisp_Object subexp)
  2831 {
  2832   return match_limit (subexp, 0);
  2833 }
  2834 
  2835 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
  2836        doc: /* Return a list of positions that record text matched by the last search.
  2837 Element 2N of the returned list is the position of the beginning of the
  2838 match of the Nth subexpression; it corresponds to `(match-beginning N)';
  2839 element 2N + 1 is the position of the end of the match of the Nth
  2840 subexpression; it corresponds to `(match-end N)'.  See `match-beginning'
  2841 and `match-end'.
  2842 If the last search was on a buffer, all the elements are by default
  2843 markers or nil (nil when the Nth pair didn't match); they are integers
  2844 or nil if the search was on a string.  But if the optional argument
  2845 INTEGERS is non-nil, the elements that represent buffer positions are
  2846 always integers, not markers, and (if the search was on a buffer) the
  2847 buffer itself is appended to the list as one additional element.
  2848 
  2849 Use `set-match-data' to reinstate the match data from the elements of
  2850 this list.
  2851 
  2852 Note that non-matching optional groups at the end of the regexp are
  2853 elided instead of being represented with two `nil's each.  For instance:
  2854 
  2855   (progn
  2856     (string-match "^\\(a\\)?\\(b\\)\\(c\\)?$" "b")
  2857     (match-data))
  2858   => (0 1 nil nil 0 1)
  2859 
  2860 If REUSE is a list, store the value in REUSE by destructively modifying it.
  2861 If REUSE is long enough to hold all the values, its length remains the
  2862 same, and any unused elements are set to nil.  If REUSE is not long
  2863 enough, it is extended.  Note that if REUSE is long enough and INTEGERS
  2864 is non-nil, no consing is done to make the return value; this minimizes GC.
  2865 
  2866 If optional third argument RESEAT is non-nil, any previous markers on the
  2867 REUSE list will be modified to point to nowhere.
  2868 
  2869 Return value is undefined if the last search failed.  */)
  2870   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
  2871 {
  2872   Lisp_Object tail, prev;
  2873   Lisp_Object *data;
  2874   ptrdiff_t i, len;
  2875 
  2876   if (!NILP (reseat))
  2877     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
  2878       if (MARKERP (XCAR (tail)))
  2879         {
  2880           unchain_marker (XMARKER (XCAR (tail)));
  2881           XSETCAR (tail, Qnil);
  2882         }
  2883 
  2884   if (NILP (last_thing_searched))
  2885     return Qnil;
  2886 
  2887   prev = Qnil;
  2888 
  2889   USE_SAFE_ALLOCA;
  2890   SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
  2891 
  2892   len = 0;
  2893   for (i = 0; i < search_regs.num_regs; i++)
  2894     {
  2895       ptrdiff_t start = search_regs.start[i];
  2896       if (start >= 0)
  2897         {
  2898           if (BASE_EQ (last_thing_searched, Qt)
  2899               || ! NILP (integers))
  2900             {
  2901               XSETFASTINT (data[2 * i], start);
  2902               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
  2903             }
  2904           else if (BUFFERP (last_thing_searched))
  2905             {
  2906               data[2 * i] = Fmake_marker ();
  2907               Fset_marker (data[2 * i],
  2908                            make_fixnum (start),
  2909                            last_thing_searched);
  2910               data[2 * i + 1] = Fmake_marker ();
  2911               Fset_marker (data[2 * i + 1],
  2912                            make_fixnum (search_regs.end[i]),
  2913                            last_thing_searched);
  2914             }
  2915           else
  2916             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
  2917             emacs_abort ();
  2918 
  2919           len = 2 * i + 2;
  2920         }
  2921       else
  2922         data[2 * i] = data[2 * i + 1] = Qnil;
  2923     }
  2924 
  2925   if (BUFFERP (last_thing_searched) && !NILP (integers))
  2926     {
  2927       data[len] = last_thing_searched;
  2928       len++;
  2929     }
  2930 
  2931   /* If REUSE is not usable, cons up the values and return them.  */
  2932   if (! CONSP (reuse))
  2933     reuse = Flist (len, data);
  2934   else
  2935     {
  2936       /* If REUSE is a list, store as many value elements as will fit
  2937          into the elements of REUSE.  */
  2938       for (i = 0, tail = reuse; CONSP (tail);
  2939            i++, tail = XCDR (tail))
  2940         {
  2941           if (i < len)
  2942             XSETCAR (tail, data[i]);
  2943           else
  2944             XSETCAR (tail, Qnil);
  2945           prev = tail;
  2946         }
  2947 
  2948       /* If we couldn't fit all value elements into REUSE,
  2949          cons up the rest of them and add them to the end of REUSE.  */
  2950       if (i < len)
  2951         XSETCDR (prev, Flist (len - i, data + i));
  2952     }
  2953 
  2954   SAFE_FREE ();
  2955   return reuse;
  2956 }
  2957 
  2958 /* We used to have an internal use variant of `reseat' described as:
  2959 
  2960       If RESEAT is `evaporate', put the markers back on the free list
  2961       immediately.  No other references to the markers must exist in this
  2962       case, so it is used only internally on the unwind stack and
  2963       save-match-data from Lisp.
  2964 
  2965    But it was ill-conceived: those supposedly-internal markers get exposed via
  2966    the undo-list, so freeing them here is unsafe.  */
  2967 
  2968 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
  2969        doc: /* Set internal data on last search match from elements of LIST.
  2970 LIST should have been created by calling `match-data' previously.
  2971 
  2972 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
  2973   (register Lisp_Object list, Lisp_Object reseat)
  2974 {
  2975   ptrdiff_t i;
  2976   register Lisp_Object marker;
  2977 
  2978   if (running_asynch_code)
  2979     save_search_regs ();
  2980 
  2981   CHECK_LIST (list);
  2982 
  2983   /* Unless we find a marker with a buffer or an explicit buffer
  2984      in LIST, assume that this match data came from a string.  */
  2985   last_thing_searched = Qt;
  2986 
  2987   /* Allocate registers if they don't already exist.  */
  2988   {
  2989     ptrdiff_t length = list_length (list) / 2;
  2990 
  2991     if (length > search_regs.num_regs)
  2992       {
  2993         ptrdiff_t num_regs = search_regs.num_regs;
  2994         search_regs.start =
  2995           xpalloc (search_regs.start, &num_regs, length - num_regs,
  2996                    min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start);
  2997         search_regs.end =
  2998           xrealloc (search_regs.end, num_regs * sizeof *search_regs.end);
  2999 
  3000         for (i = search_regs.num_regs; i < num_regs; i++)
  3001           search_regs.start[i] = -1;
  3002 
  3003         search_regs.num_regs = num_regs;
  3004       }
  3005 
  3006     for (i = 0; CONSP (list); i++)
  3007       {
  3008         marker = XCAR (list);
  3009         if (BUFFERP (marker))
  3010           {
  3011             last_thing_searched = marker;
  3012             break;
  3013           }
  3014         if (i >= length)
  3015           break;
  3016         if (NILP (marker))
  3017           {
  3018             search_regs.start[i] = -1;
  3019             list = XCDR (list);
  3020           }
  3021         else
  3022           {
  3023             Lisp_Object from;
  3024             Lisp_Object m;
  3025 
  3026             m = marker;
  3027             if (MARKERP (marker))
  3028               {
  3029                 if (XMARKER (marker)->buffer == 0)
  3030                   XSETFASTINT (marker, 0);
  3031                 else
  3032                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
  3033               }
  3034 
  3035             CHECK_FIXNUM_COERCE_MARKER (marker);
  3036             from = marker;
  3037 
  3038             if (!NILP (reseat) && MARKERP (m))
  3039               {
  3040                 unchain_marker (XMARKER (m));
  3041                 XSETCAR (list, Qnil);
  3042               }
  3043 
  3044             if ((list = XCDR (list), !CONSP (list)))
  3045               break;
  3046 
  3047             m = marker = XCAR (list);
  3048 
  3049             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
  3050               XSETFASTINT (marker, 0);
  3051 
  3052             CHECK_FIXNUM_COERCE_MARKER (marker);
  3053             if (PTRDIFF_MIN <= XFIXNUM (from) && XFIXNUM (from) <= PTRDIFF_MAX
  3054                 && PTRDIFF_MIN <= XFIXNUM (marker)
  3055                 && XFIXNUM (marker) <= PTRDIFF_MAX)
  3056               {
  3057                 search_regs.start[i] = XFIXNUM (from);
  3058                 search_regs.end[i] = XFIXNUM (marker);
  3059               }
  3060             else
  3061               {
  3062                 search_regs.start[i] = -1;
  3063               }
  3064 
  3065             if (!NILP (reseat) && MARKERP (m))
  3066               {
  3067                 unchain_marker (XMARKER (m));
  3068                 XSETCAR (list, Qnil);
  3069               }
  3070           }
  3071         list = XCDR (list);
  3072       }
  3073 
  3074     for (; i < search_regs.num_regs; i++)
  3075       search_regs.start[i] = -1;
  3076   }
  3077 
  3078   return Qnil;
  3079 }
  3080 
  3081 DEFUN ("match-data--translate", Fmatch_data__translate, Smatch_data__translate,
  3082        1, 1, 0,
  3083        doc: /* Add N to all positions in the match data.  Internal.  */)
  3084   (Lisp_Object n)
  3085 {
  3086   CHECK_FIXNUM (n);
  3087   EMACS_INT delta = XFIXNUM (n);
  3088   if (!NILP (last_thing_searched))
  3089     for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  3090       if (search_regs.start[i] >= 0)
  3091         {
  3092           search_regs.start[i] = max (0, search_regs.start[i] + delta);
  3093           search_regs.end[i] = max (0, search_regs.end[i] + delta);
  3094         }
  3095   return Qnil;
  3096 }
  3097 
  3098 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
  3099    if asynchronous code (filter or sentinel) is running. */
  3100 static void
  3101 save_search_regs (void)
  3102 {
  3103   if (saved_search_regs.num_regs == 0)
  3104     {
  3105       saved_search_regs = search_regs;
  3106       saved_last_thing_searched = last_thing_searched;
  3107       last_thing_searched = Qnil;
  3108       search_regs.num_regs = 0;
  3109       search_regs.start = 0;
  3110       search_regs.end = 0;
  3111     }
  3112 }
  3113 
  3114 /* Called upon exit from filters and sentinels. */
  3115 void
  3116 restore_search_regs (void)
  3117 {
  3118   if (saved_search_regs.num_regs != 0)
  3119     {
  3120       if (search_regs.num_regs > 0)
  3121         {
  3122           xfree (search_regs.start);
  3123           xfree (search_regs.end);
  3124         }
  3125       search_regs = saved_search_regs;
  3126       last_thing_searched = saved_last_thing_searched;
  3127       saved_last_thing_searched = Qnil;
  3128       saved_search_regs.num_regs = 0;
  3129     }
  3130 }
  3131 
  3132 /* Called from replace-match via replace_range.  */
  3133 void
  3134 update_search_regs (ptrdiff_t oldstart, ptrdiff_t oldend, ptrdiff_t newend)
  3135 {
  3136   /* Adjust search data for this change.  */
  3137   ptrdiff_t change = newend - oldend;
  3138   ptrdiff_t i;
  3139 
  3140   for (i = 0; i < search_regs.num_regs; i++)
  3141     {
  3142       if (search_regs.start[i] >= oldend)
  3143         search_regs.start[i] += change;
  3144       else if (search_regs.start[i] > oldstart)
  3145         search_regs.start[i] = oldstart;
  3146       if (search_regs.end[i] >= oldend)
  3147         search_regs.end[i] += change;
  3148       else if (search_regs.end[i] > oldstart)
  3149         search_regs.end[i] = oldstart;
  3150     }
  3151 }
  3152 
  3153 static void
  3154 unwind_set_match_data (Lisp_Object list)
  3155 {
  3156   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
  3157   Fset_match_data (list, Qt);
  3158 }
  3159 
  3160 /* Called to unwind protect the match data.  */
  3161 void
  3162 record_unwind_save_match_data (void)
  3163 {
  3164   record_unwind_protect (unwind_set_match_data,
  3165                          Fmatch_data (Qnil, Qnil, Qnil));
  3166 }
  3167 
  3168 /* Quote a string to deactivate reg-expr chars */
  3169 
  3170 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
  3171        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
  3172   (Lisp_Object string)
  3173 {
  3174   char *in, *out, *end;
  3175   char *temp;
  3176   ptrdiff_t backslashes_added = 0;
  3177 
  3178   CHECK_STRING (string);
  3179 
  3180   USE_SAFE_ALLOCA;
  3181   SAFE_NALLOCA (temp, 2, SBYTES (string));
  3182 
  3183   /* Now copy the data into the new string, inserting escapes. */
  3184 
  3185   in = SSDATA (string);
  3186   end = in + SBYTES (string);
  3187   out = temp;
  3188 
  3189   for (; in != end; in++)
  3190     {
  3191       if (*in == '['
  3192           || *in == '*' || *in == '.' || *in == '\\'
  3193           || *in == '?' || *in == '+'
  3194           || *in == '^' || *in == '$')
  3195         *out++ = '\\', backslashes_added++;
  3196       *out++ = *in;
  3197     }
  3198 
  3199   Lisp_Object result
  3200     = (backslashes_added > 0
  3201        ? make_specified_string (temp,
  3202                                 SCHARS (string) + backslashes_added,
  3203                                 out - temp,
  3204                                 STRING_MULTIBYTE (string))
  3205        : string);
  3206   SAFE_FREE ();
  3207   return result;
  3208 }
  3209 
  3210 /* Like find_newline, but doesn't use the cache, and only searches forward.  */
  3211 ptrdiff_t
  3212 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
  3213                ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
  3214                ptrdiff_t *bytepos, bool allow_quit)
  3215 {
  3216   if (count > 0)
  3217     {
  3218       if (!end)
  3219         end = ZV, end_byte = ZV_BYTE;
  3220     }
  3221   else
  3222     {
  3223       if (!end)
  3224         end = BEGV, end_byte = BEGV_BYTE;
  3225     }
  3226   if (end_byte == -1)
  3227     end_byte = CHAR_TO_BYTE (end);
  3228 
  3229   if (counted)
  3230     *counted = count;
  3231 
  3232   if (count > 0)
  3233     while (start != end)
  3234       {
  3235         /* Our innermost scanning loop is very simple; it doesn't know
  3236            about gaps, buffer ends, or the newline cache.  ceiling is
  3237            the position of the last character before the next such
  3238            obstacle --- the last character the dumb search loop should
  3239            examine.  */
  3240         ptrdiff_t tem, ceiling_byte = end_byte - 1;
  3241 
  3242         if (start_byte == -1)
  3243           start_byte = CHAR_TO_BYTE (start);
  3244 
  3245         /* The dumb loop can only scan text stored in contiguous
  3246            bytes. BUFFER_CEILING_OF returns the last character
  3247            position that is contiguous, so the ceiling is the
  3248            position after that.  */
  3249         tem = BUFFER_CEILING_OF (start_byte);
  3250         ceiling_byte = min (tem, ceiling_byte);
  3251 
  3252         {
  3253           /* The termination address of the dumb loop.  */
  3254           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
  3255           ptrdiff_t lim_byte = ceiling_byte + 1;
  3256 
  3257           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
  3258              of the base, the cursor, and the next line.  */
  3259           ptrdiff_t base = start_byte - lim_byte;
  3260           ptrdiff_t cursor, next;
  3261 
  3262           for (cursor = base; cursor < 0; cursor = next)
  3263             {
  3264               /* The dumb loop.  */
  3265               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
  3266               next = nl ? nl - lim_addr : 0;
  3267 
  3268               if (! nl)
  3269                 break;
  3270               next++;
  3271 
  3272               if (--count == 0)
  3273                 {
  3274                   if (bytepos)
  3275                     *bytepos = lim_byte + next;
  3276                   return BYTE_TO_CHAR (lim_byte + next);
  3277                 }
  3278               if (allow_quit)
  3279                 maybe_quit ();
  3280             }
  3281 
  3282           start_byte = lim_byte;
  3283           start = BYTE_TO_CHAR (start_byte);
  3284         }
  3285       }
  3286 
  3287   if (counted)
  3288     *counted -= count;
  3289   if (bytepos)
  3290     {
  3291       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
  3292       eassert (*bytepos == CHAR_TO_BYTE (start));
  3293     }
  3294   return start;
  3295 }
  3296 
  3297 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
  3298        0, 1, 0,
  3299        doc: /* Check the newline cache of BUFFER against buffer contents.
  3300 
  3301 BUFFER defaults to the current buffer.
  3302 
  3303 Value is an array of 2 sub-arrays of buffer positions for newlines,
  3304 the first based on the cache, the second based on actually scanning
  3305 the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
  3306   (Lisp_Object buffer)
  3307 {
  3308   struct buffer *buf, *old = NULL;
  3309   ptrdiff_t nl_count_cache, nl_count_buf;
  3310   Lisp_Object cache_newlines, buf_newlines, val;
  3311   ptrdiff_t from, found, i;
  3312 
  3313   if (NILP (buffer))
  3314     buf = current_buffer;
  3315   else
  3316     {
  3317       CHECK_BUFFER (buffer);
  3318       buf = XBUFFER (buffer);
  3319       old = current_buffer;
  3320     }
  3321   if (buf->base_buffer)
  3322     buf = buf->base_buffer;
  3323 
  3324   /* If the buffer doesn't have a newline cache, return nil.  */
  3325   if (NILP (BVAR (buf, cache_long_scans))
  3326       || buf->newline_cache == NULL)
  3327     return Qnil;
  3328 
  3329   /* find_newline can only work on the current buffer.  */
  3330   if (old != NULL)
  3331     set_buffer_internal_1 (buf);
  3332 
  3333   /* How many newlines are there according to the cache?  */
  3334   find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
  3335                 TYPE_MAXIMUM (ptrdiff_t), &nl_count_cache, NULL, true);
  3336 
  3337   /* Create vector and populate it.  */
  3338   cache_newlines = make_vector (nl_count_cache, make_fixnum (-1));
  3339 
  3340   if (nl_count_cache)
  3341     {
  3342       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
  3343         {
  3344           ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
  3345 
  3346           found = find_newline (from, from_byte, 0, -1, 1, &counted,
  3347                                 NULL, true);
  3348           if (counted == 0 || i >= nl_count_cache)
  3349             break;
  3350           ASET (cache_newlines, i, make_fixnum (found - 1));
  3351         }
  3352     }
  3353 
  3354   /* Now do the same, but without using the cache.  */
  3355   find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
  3356                  TYPE_MAXIMUM (ptrdiff_t), &nl_count_buf, NULL, true);
  3357   buf_newlines = make_vector (nl_count_buf, make_fixnum (-1));
  3358   if (nl_count_buf)
  3359     {
  3360       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
  3361         {
  3362           ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
  3363 
  3364           found = find_newline1 (from, from_byte, 0, -1, 1, &counted,
  3365                                  NULL, true);
  3366           if (counted == 0 || i >= nl_count_buf)
  3367             break;
  3368           ASET (buf_newlines, i, make_fixnum (found - 1));
  3369         }
  3370     }
  3371 
  3372   /* Construct the value and return it.  */
  3373   val = CALLN (Fvector, cache_newlines, buf_newlines);
  3374 
  3375   if (old != NULL)
  3376     set_buffer_internal_1 (old);
  3377   return val;
  3378 }
  3379 
  3380 
  3381 static void syms_of_search_for_pdumper (void);
  3382 
  3383 void
  3384 syms_of_search (void)
  3385 {
  3386   for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
  3387     {
  3388       staticpro (&searchbufs[i].regexp);
  3389       staticpro (&searchbufs[i].f_whitespace_regexp);
  3390       staticpro (&searchbufs[i].syntax_table);
  3391     }
  3392 
  3393   /* Error condition used for failing searches.  */
  3394   DEFSYM (Qsearch_failed, "search-failed");
  3395 
  3396   /* Error condition used for failing searches started by user, i.e.,
  3397      where failure should not invoke the debugger.  */
  3398   DEFSYM (Quser_search_failed, "user-search-failed");
  3399 
  3400   /* Error condition signaled when regexp compile_pattern fails.  */
  3401   DEFSYM (Qinvalid_regexp, "invalid-regexp");
  3402 
  3403   Fput (Qsearch_failed, Qerror_conditions,
  3404         pure_list (Qsearch_failed, Qerror));
  3405   Fput (Qsearch_failed, Qerror_message,
  3406         build_pure_c_string ("Search failed"));
  3407 
  3408   Fput (Quser_search_failed, Qerror_conditions,
  3409         pure_list (Quser_search_failed, Quser_error, Qsearch_failed, Qerror));
  3410   Fput (Quser_search_failed, Qerror_message,
  3411         build_pure_c_string ("Search failed"));
  3412 
  3413   Fput (Qinvalid_regexp, Qerror_conditions,
  3414         pure_list (Qinvalid_regexp, Qerror));
  3415   Fput (Qinvalid_regexp, Qerror_message,
  3416         build_pure_c_string ("Invalid regexp"));
  3417 
  3418   re_match_object = Qnil;
  3419   staticpro (&re_match_object);
  3420 
  3421   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
  3422       doc: /* Regexp to substitute for bunches of spaces in regexp search.
  3423 Some commands use this for user-specified regexps.
  3424 Spaces that occur inside character classes or repetition operators
  3425 or other such regexp constructs are not replaced with this.
  3426 A value of nil (which is the normal value) means treat spaces
  3427 literally.  Note that a value with capturing groups can change the
  3428 numbering of existing capture groups in unexpected ways.  */);
  3429   Vsearch_spaces_regexp = Qnil;
  3430 
  3431   DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data");
  3432   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
  3433       doc: /* Internal use only.
  3434 If non-nil, the primitive searching and matching functions
  3435 such as `looking-at', `string-match', `re-search-forward', etc.,
  3436 do not set the match data.  The proper way to use this variable
  3437 is to bind it with `let' around a small expression.  */);
  3438   Vinhibit_changing_match_data = Qnil;
  3439 
  3440   defsubr (&Slooking_at);
  3441   defsubr (&Sposix_looking_at);
  3442   defsubr (&Sstring_match);
  3443   defsubr (&Sposix_string_match);
  3444   defsubr (&Ssearch_forward);
  3445   defsubr (&Ssearch_backward);
  3446   defsubr (&Sre_search_forward);
  3447   defsubr (&Sre_search_backward);
  3448   defsubr (&Sposix_search_forward);
  3449   defsubr (&Sposix_search_backward);
  3450   defsubr (&Sreplace_match);
  3451   defsubr (&Smatch_beginning);
  3452   defsubr (&Smatch_end);
  3453   defsubr (&Smatch_data);
  3454   defsubr (&Sset_match_data);
  3455   defsubr (&Smatch_data__translate);
  3456   defsubr (&Sregexp_quote);
  3457   defsubr (&Snewline_cache_check);
  3458 
  3459   pdumper_do_now_and_after_load (syms_of_search_for_pdumper);
  3460 }
  3461 
  3462 static void
  3463 syms_of_search_for_pdumper (void)
  3464 {
  3465   for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
  3466     {
  3467       searchbufs[i].buf.allocated = 100;
  3468       searchbufs[i].buf.buffer = xmalloc (100);
  3469       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
  3470       searchbufs[i].regexp = Qnil;
  3471       searchbufs[i].f_whitespace_regexp = Qnil;
  3472       searchbufs[i].busy = false;
  3473       searchbufs[i].syntax_table = Qnil;
  3474       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
  3475     }
  3476   searchbuf_head = &searchbufs[0];
  3477 }
/* [<][>][^][v][top][bottom][index][help] */
root/src/search.c

DEFINITIONS