src/search.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
matcher_overflow
freeze_buffer_relocation
compile_pattern_1
shrink_regexp_cache
clear_regexp_cache
unfreeze_pattern
freeze_pattern
compile_pattern
looking_at_1
string_match_1
fast_string_match_internal
fast_c_string_match_internal
fast_looking_at
newline_cache_on_off
find_newline
scan_newline
scan_newline_from_point
find_newline_no_quit
find_before_next_newline
search_command
trivial_regexp_p
search_buffer_re
search_buffer_non_re
search_buffer
simple_search
boyer_moore
set_search_regs
match_limit
DEFUN
DEFUN
DEFUN
save_search_regs
restore_search_regs
update_search_regs
unwind_set_match_data
record_unwind_save_match_data
DEFUN
find_newline1
DEFUN
syms_of_search
syms_of_search_for_pdumper
     1 /* String search routines for GNU Emacs.
     2 
     3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2023 Free Software
     4 Foundation, Inc.
     5 
     6 This file is part of GNU Emacs.
     7 
     8 GNU Emacs is free software: you can redistribute it and/or modify
     9 it under the terms of the GNU General Public License as published by
    10 the Free Software Foundation, either version 3 of the License, or (at
    11 your option) any later version.
    12 
    13 GNU Emacs is distributed in the hope that it will be useful,
    14 but WITHOUT ANY WARRANTY; without even the implied warranty of
    15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    16 GNU General Public License for more details.
    17 
    18 You should have received a copy of the GNU General Public License
    19 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
    20 
    21 
    22 #include <config.h>
    23 
    24 #include "lisp.h"
    25 #include "character.h"
    26 #include "buffer.h"
    27 #include "syntax.h"
    28 #include "charset.h"
    29 #include "region-cache.h"
    30 #include "blockinput.h"
    31 #include "intervals.h"
    32 #include "pdumper.h"
    33 #include "composite.h"
    34 
    35 #include "regex-emacs.h"
    36 
    37 #define REGEXP_CACHE_SIZE 20
    38 
    39 /* If the regexp is non-nil, then the buffer contains the compiled form
    40    of that regexp, suitable for searching.  */
    41 struct regexp_cache
    42 {
    43   struct regexp_cache *next;
    44   Lisp_Object regexp, f_whitespace_regexp;
    45   /* Syntax table for which the regexp applies.  We need this because
    46      of character classes.  If this is t, then the compiled pattern is valid
    47      for any syntax-table.  */
    48   Lisp_Object syntax_table;
    49   struct re_pattern_buffer buf;
    50   char fastmap[0400];
    51   /* True means regexp was compiled to do full POSIX backtracking.  */
    52   bool posix;
    53   /* True means we're inside a buffer match.  */
    54   bool busy;
    55 };
    56 
    57 /* The instances of that struct.  */
    58 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
    59 
    60 /* The head of the linked list; points to the most recently used buffer.  */
    61 static struct regexp_cache *searchbuf_head;
    62 
    63 static void set_search_regs (ptrdiff_t, ptrdiff_t);
    64 static void save_search_regs (void);
    65 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
    66                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
    67                                 ptrdiff_t, ptrdiff_t);
    68 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
    69                               Lisp_Object, Lisp_Object, ptrdiff_t,
    70                               ptrdiff_t, int);
    71 
    72 Lisp_Object re_match_object;
    73 
    74 static AVOID
    75 matcher_overflow (void)
    76 {
    77   error ("Stack overflow in regexp matcher");
    78 }
    79 
    80 static void
    81 freeze_buffer_relocation (void)
    82 {
    83 #ifdef REL_ALLOC
    84   /* Prevent ralloc.c from relocating the current buffer while
    85      searching it.  */
    86   r_alloc_inhibit_buffer_relocation (1);
    87   record_unwind_protect_int (r_alloc_inhibit_buffer_relocation, 0);
    88 #endif
    89 }
    90 
    91 /* Compile a regexp and signal a Lisp error if anything goes wrong.
    92    PATTERN is the pattern to compile.
    93    CP is the place to put the result.
    94    TRANSLATE is a translation table for ignoring case, or nil for none.
    95    POSIX is true if we want full backtracking (POSIX style) for this pattern.
    96    False means backtrack only enough to get a valid match.
    97 
    98    The behavior also depends on Vsearch_spaces_regexp.  */
    99 
   100 static void
   101 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
   102                    Lisp_Object translate, bool posix)
   103 {
   104   const char *whitespace_regexp;
   105   char *val;
   106 
   107   eassert (!cp->busy);
   108   cp->regexp = Qnil;
   109   cp->buf.translate = translate;
   110   cp->posix = posix;
   111   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
   112   cp->buf.charset_unibyte = charset_unibyte;
   113   if (STRINGP (Vsearch_spaces_regexp))
   114     cp->f_whitespace_regexp = Vsearch_spaces_regexp;
   115   else
   116     cp->f_whitespace_regexp = Qnil;
   117 
   118   whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ?
   119     SSDATA (Vsearch_spaces_regexp) : NULL;
   120 
   121   val = (char *) re_compile_pattern (SSDATA (pattern), SBYTES (pattern),
   122                                      posix, whitespace_regexp, &cp->buf);
   123 
   124   /* If the compiled pattern hard codes some of the contents of the
   125      syntax-table, it can only be reused with *this* syntax table.  */
   126   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
   127 
   128   if (val)
   129     xsignal1 (Qinvalid_regexp, build_string (val));
   130 
   131   cp->regexp = Fcopy_sequence (pattern);
   132 }
   133 
   134 /* Shrink each compiled regexp buffer in the cache
   135    to the size actually used right now.
   136    This is called from garbage collection.  */
   137 
   138 void
   139 shrink_regexp_cache (void)
   140 {
   141   struct regexp_cache *cp;
   142 
   143   for (cp = searchbuf_head; cp != 0; cp = cp->next)
   144     if (!cp->busy)
   145       {
   146         cp->buf.allocated = cp->buf.used;
   147         cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
   148       }
   149 }
   150 
   151 /* Clear the regexp cache w.r.t. a particular syntax table,
   152    because it was changed.
   153    There is no danger of memory leak here because re_compile_pattern
   154    automagically manages the memory in each re_pattern_buffer struct,
   155    based on its `allocated' and `buffer' values.  */
   156 void
   157 clear_regexp_cache (void)
   158 {
   159   int i;
   160 
   161   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
   162     /* It's tempting to compare with the syntax-table we've actually changed,
   163        but it's not sufficient because char-table inheritance means that
   164        modifying one syntax-table can change others at the same time.  */
   165     if (!searchbufs[i].busy && !BASE_EQ (searchbufs[i].syntax_table, Qt))
   166       searchbufs[i].regexp = Qnil;
   167 }
   168 
   169 static void
   170 unfreeze_pattern (void *arg)
   171 {
   172   struct regexp_cache *searchbuf = arg;
   173   searchbuf->busy = false;
   174 }
   175 
   176 static void
   177 freeze_pattern (struct regexp_cache *searchbuf)
   178 {
   179   eassert (!searchbuf->busy);
   180   record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
   181   searchbuf->busy = true;
   182 }
   183 
   184 /* Compile a regexp if necessary, but first check to see if there's one in
   185    the cache.
   186    PATTERN is the pattern to compile.
   187    TRANSLATE is a translation table for ignoring case, or nil for none.
   188    REGP is the structure that says where to store the "register"
   189    values that will result from matching this pattern.
   190    If it is 0, we should compile the pattern not to record any
   191    subexpression bounds.
   192    POSIX is true if we want full backtracking (POSIX style) for this pattern.
   193    False means backtrack only enough to get a valid match.  */
   194 
   195 static struct regexp_cache *
   196 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
   197                  Lisp_Object translate, bool posix, bool multibyte)
   198 {
   199   struct regexp_cache *cp, **cpp, **lru_nonbusy;
   200 
   201   for (cpp = &searchbuf_head, lru_nonbusy = NULL; ; cpp = &cp->next)
   202     {
   203       cp = *cpp;
   204       if (!cp->busy)
   205         lru_nonbusy = cpp;
   206       /* Entries are initialized to nil, and may be set to nil by
   207          compile_pattern_1 if the pattern isn't valid.  Don't apply
   208          string accessors in those cases.  However, compile_pattern_1
   209          is only applied to the cache entry we pick here to reuse.  So
   210          nil should never appear before a non-nil entry.  */
   211       if (NILP (cp->regexp))
   212         goto compile_it;
   213       if (SCHARS (cp->regexp) == SCHARS (pattern)
   214           && !cp->busy
   215           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
   216           && !NILP (Fstring_equal (cp->regexp, pattern))
   217           && BASE_EQ (cp->buf.translate, translate)
   218           && cp->posix == posix
   219           && (BASE_EQ (cp->syntax_table, Qt)
   220               || BASE_EQ (cp->syntax_table,
   221                           BVAR (current_buffer, syntax_table)))
   222           && !NILP (Fequal (cp->f_whitespace_regexp, Vsearch_spaces_regexp))
   223           && cp->buf.charset_unibyte == charset_unibyte)
   224         break;
   225 
   226       /* If we're at the end of the cache, compile into the last
   227          (least recently used) non-busy cell in the cache.  */
   228       if (cp->next == 0)
   229         {
   230           if (!lru_nonbusy)
   231             error ("Too much matching reentrancy");
   232           cpp = lru_nonbusy;
   233           cp = *cpp;
   234         compile_it:
   235           eassert (!cp->busy);
   236           compile_pattern_1 (cp, pattern, translate, posix);
   237           break;
   238         }
   239     }
   240 
   241   /* When we get here, cp (aka *cpp) contains the compiled pattern,
   242      either because we found it in the cache or because we just compiled it.
   243      Move it to the front of the queue to mark it as most recently used.  */
   244   *cpp = cp->next;
   245   cp->next = searchbuf_head;
   246   searchbuf_head = cp;
   247 
   248   /* Advise the searching functions about the space we have allocated
   249      for register data.  */
   250   if (regp)
   251     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
   252 
   253   /* The compiled pattern can be used both for multibyte and unibyte
   254      target.  But, we have to tell which the pattern is used for. */
   255   cp->buf.target_multibyte = multibyte;
   256   return cp;
   257 }
   258 
   259 
   260 static Lisp_Object
   261 looking_at_1 (Lisp_Object string, bool posix, bool modify_data)
   262 {
   263   Lisp_Object val;
   264   unsigned char *p1, *p2;
   265   ptrdiff_t s1, s2;
   266   register ptrdiff_t i;
   267 
   268   if (running_asynch_code)
   269     save_search_regs ();
   270 
   271   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
   272      table.  */
   273   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
   274                          BVAR (current_buffer, case_eqv_table));
   275 
   276   CHECK_STRING (string);
   277 
   278   /* Snapshot in case Lisp changes the value.  */
   279   bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
   280 
   281   struct regexp_cache *cache_entry = compile_pattern (
   282     string,
   283     modify_match_data ? &search_regs : NULL,
   284     (!NILP (BVAR (current_buffer, case_fold_search))
   285      ? BVAR (current_buffer, case_canon_table) : Qnil),
   286     posix,
   287     !NILP (BVAR (current_buffer, enable_multibyte_characters)));
   288 
   289   /* Do a pending quit right away, to avoid paradoxical behavior */
   290   maybe_quit ();
   291 
   292   /* Get pointers and sizes of the two strings
   293      that make up the visible portion of the buffer. */
   294 
   295   p1 = BEGV_ADDR;
   296   s1 = GPT_BYTE - BEGV_BYTE;
   297   p2 = GAP_END_ADDR;
   298   s2 = ZV_BYTE - GPT_BYTE;
   299   if (s1 < 0)
   300     {
   301       p2 = p1;
   302       s2 = ZV_BYTE - BEGV_BYTE;
   303       s1 = 0;
   304     }
   305   if (s2 < 0)
   306     {
   307       s1 = ZV_BYTE - BEGV_BYTE;
   308       s2 = 0;
   309     }
   310 
   311   specpdl_ref count = SPECPDL_INDEX ();
   312   freeze_buffer_relocation ();
   313   freeze_pattern (cache_entry);
   314   re_match_object = Qnil;
   315   i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
   316                   PT_BYTE - BEGV_BYTE,
   317                   modify_match_data ? &search_regs : NULL,
   318                   ZV_BYTE - BEGV_BYTE);
   319 
   320   if (i == -2)
   321     {
   322       unbind_to (count, Qnil);
   323       matcher_overflow ();
   324     }
   325 
   326   val = (i >= 0 ? Qt : Qnil);
   327   if (modify_match_data && i >= 0)
   328   {
   329     for (i = 0; i < search_regs.num_regs; i++)
   330       if (search_regs.start[i] >= 0)
   331         {
   332           search_regs.start[i]
   333             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
   334          search_regs.end[i]
   335            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
   336        }
   337     /* Set last_thing_searched only when match data is changed.  */
   338     XSETBUFFER (last_thing_searched, current_buffer);
   339   }
   340 
   341   return unbind_to (count, val);
   342 }
   343 
   344 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 2, 0,
   345        doc: /* Return t if text after point matches regular expression REGEXP.
   346 By default, this function modifies the match data that
   347 `match-beginning', `match-end' and `match-data' access.  If
   348 INHIBIT-MODIFY is non-nil, don't modify the match data.  */)
   349   (Lisp_Object regexp, Lisp_Object inhibit_modify)
   350 {
   351   return looking_at_1 (regexp, 0, NILP (inhibit_modify));
   352 }
   353 
   354 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 2, 0,
   355        doc: /* Return t if text after point matches REGEXP according to Posix rules.
   356 Find the longest match, in accordance with Posix regular expression rules.
   357 
   358 By default, this function modifies the match data that
   359 `match-beginning', `match-end' and `match-data' access.  If
   360 INHIBIT-MODIFY is non-nil, don't modify the match data.  */)
   361   (Lisp_Object regexp, Lisp_Object inhibit_modify)
   362 {
   363   return looking_at_1 (regexp, 1, NILP (inhibit_modify));
   364 }
   365 
   366 static Lisp_Object
   367 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   368                 bool posix, bool modify_data)
   369 {
   370   ptrdiff_t val;
   371   EMACS_INT pos;
   372   ptrdiff_t pos_byte, i;
   373   bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
   374 
   375   if (running_asynch_code)
   376     save_search_regs ();
   377 
   378   CHECK_STRING (regexp);
   379   CHECK_STRING (string);
   380 
   381   if (NILP (start))
   382     pos = 0, pos_byte = 0;
   383   else
   384     {
   385       ptrdiff_t len = SCHARS (string);
   386 
   387       CHECK_FIXNUM (start);
   388       pos = XFIXNUM (start);
   389       if (pos < 0 && -pos <= len)
   390         pos = len + pos;
   391       else if (0 > pos || pos > len)
   392         args_out_of_range (string, start);
   393       pos_byte = string_char_to_byte (string, pos);
   394     }
   395 
   396   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
   397      table.  */
   398   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
   399                          BVAR (current_buffer, case_eqv_table));
   400 
   401   specpdl_ref count = SPECPDL_INDEX ();
   402   struct regexp_cache *cache_entry
   403     = compile_pattern (regexp,
   404                        modify_match_data ? &search_regs : NULL,
   405                        (!NILP (BVAR (current_buffer, case_fold_search))
   406                         ? BVAR (current_buffer, case_canon_table)
   407                         : Qnil),
   408                        posix,
   409                        STRING_MULTIBYTE (string));
   410   freeze_pattern (cache_entry);
   411   re_match_object = string;
   412   val = re_search (&cache_entry->buf, SSDATA (string),
   413                    SBYTES (string), pos_byte,
   414                    SBYTES (string) - pos_byte,
   415                    (modify_match_data ? &search_regs : NULL));
   416   unbind_to (count, Qnil);
   417 
   418   /* Set last_thing_searched only when match data is changed.  */
   419   if (modify_match_data)
   420     last_thing_searched = Qt;
   421 
   422   if (val == -2)
   423     matcher_overflow ();
   424   if (val < 0) return Qnil;
   425 
   426   if (modify_match_data)
   427     for (i = 0; i < search_regs.num_regs; i++)
   428       if (search_regs.start[i] >= 0)
   429         {
   430           search_regs.start[i]
   431             = string_byte_to_char (string, search_regs.start[i]);
   432           search_regs.end[i]
   433             = string_byte_to_char (string, search_regs.end[i]);
   434         }
   435 
   436   return make_fixnum (string_byte_to_char (string, val));
   437 }
   438 
   439 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 4, 0,
   440        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
   441 Matching ignores case if `case-fold-search' is non-nil.
   442 If third arg START is non-nil, start search at that index in STRING.
   443 
   444 If INHIBIT-MODIFY is non-nil, match data is not changed.
   445 
   446 If INHIBIT-MODIFY is nil or missing, match data is changed, and
   447 `match-end' and `match-beginning' give indices of substrings matched
   448 by parenthesis constructs in the pattern.  You can use the function
   449 `match-string' to extract the substrings matched by the parenthesis
   450 constructions in REGEXP.  For index of first char beyond the match, do
   451 (match-end 0).  */)
   452   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   453    Lisp_Object inhibit_modify)
   454 {
   455   return string_match_1 (regexp, string, start, 0, NILP (inhibit_modify));
   456 }
   457 
   458 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 4, 0,
   459        doc: /* Return index of start of first match for Posix REGEXP in STRING, or nil.
   460 Find the longest match, in accord with Posix regular expression rules.
   461 Case is ignored if `case-fold-search' is non-nil in the current buffer.
   462 
   463 If INHIBIT-MODIFY is non-nil, match data is not changed.
   464 
   465 If INHIBIT-MODIFY is nil or missing, match data is changed, and
   466 `match-end' and `match-beginning' give indices of substrings matched
   467 by parenthesis constructs in the pattern.  You can use the function
   468 `match-string' to extract the substrings matched by the parenthesis
   469 constructions in REGEXP.  For index of first char beyond the match, do
   470 (match-end 0).  */)
   471   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
   472    Lisp_Object inhibit_modify)
   473 {
   474   return string_match_1 (regexp, string, start, 1, NILP (inhibit_modify));
   475 }
   476 
   477 /* Match REGEXP against STRING using translation table TABLE,
   478    searching all of STRING, and return the index of the match,
   479    or negative on failure.  This does not clobber the match data.  */
   480 
   481 ptrdiff_t
   482 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
   483                             Lisp_Object table)
   484 {
   485   re_match_object = string;
   486   specpdl_ref count = SPECPDL_INDEX ();
   487   struct regexp_cache *cache_entry
   488     = compile_pattern (regexp, 0, table, 0, STRING_MULTIBYTE (string));
   489   freeze_pattern (cache_entry);
   490   ptrdiff_t val = re_search (&cache_entry->buf, SSDATA (string),
   491                              SBYTES (string), 0,
   492                              SBYTES (string), 0);
   493   unbind_to (count, Qnil);
   494   return val;
   495 }
   496 
   497 /* Match REGEXP against STRING, searching all of STRING and return the
   498    index of the match, or negative on failure.  This does not clobber
   499    the match data.  Table is a canonicalize table for ignoring case,
   500    or nil for none.
   501 
   502    We assume that STRING contains single-byte characters.  */
   503 
   504 ptrdiff_t
   505 fast_c_string_match_internal (Lisp_Object regexp,
   506                               const char *string, ptrdiff_t len,
   507                               Lisp_Object table)
   508 {
   509   /* FIXME: This is expensive and not obviously correct when it makes
   510      a difference. I.e., no longer "fast", and may hide bugs.
   511      Something should be done about this.  */
   512   regexp = string_make_unibyte (regexp);
   513   /* Record specpdl index because freeze_pattern pushes an
   514      unwind-protect on the specpdl.  */
   515   specpdl_ref count = SPECPDL_INDEX ();
   516   struct regexp_cache *cache_entry
   517     = compile_pattern (regexp, 0, table, 0, 0);
   518   freeze_pattern (cache_entry);
   519   re_match_object = Qt;
   520   ptrdiff_t val = re_search (&cache_entry->buf, string, len, 0, len, 0);
   521   unbind_to (count, Qnil);
   522   return val;
   523 }
   524 
   525 /* Match REGEXP against the characters after POS to LIMIT, and return
   526    the number of matched characters.  If STRING is non-nil, match
   527    against the characters in it.  In that case, POS and LIMIT are
   528    indices into the string.  This function doesn't modify the match
   529    data.  */
   530 
   531 ptrdiff_t
   532 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
   533                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
   534 {
   535   bool multibyte;
   536   unsigned char *p1, *p2;
   537   ptrdiff_t s1, s2;
   538   ptrdiff_t len;
   539 
   540   if (STRINGP (string))
   541     {
   542       if (pos_byte < 0)
   543         pos_byte = string_char_to_byte (string, pos);
   544       if (limit_byte < 0)
   545         limit_byte = string_char_to_byte (string, limit);
   546       p1 = NULL;
   547       s1 = 0;
   548       p2 = SDATA (string);
   549       s2 = SBYTES (string);
   550       multibyte = STRING_MULTIBYTE (string);
   551     }
   552   else
   553     {
   554       if (pos_byte < 0)
   555         pos_byte = CHAR_TO_BYTE (pos);
   556       if (limit_byte < 0)
   557         limit_byte = CHAR_TO_BYTE (limit);
   558       pos_byte -= BEGV_BYTE;
   559       limit_byte -= BEGV_BYTE;
   560       p1 = BEGV_ADDR;
   561       s1 = GPT_BYTE - BEGV_BYTE;
   562       p2 = GAP_END_ADDR;
   563       s2 = ZV_BYTE - GPT_BYTE;
   564       if (s1 < 0)
   565         {
   566           p2 = p1;
   567           s2 = ZV_BYTE - BEGV_BYTE;
   568           s1 = 0;
   569         }
   570       if (s2 < 0)
   571         {
   572           s1 = ZV_BYTE - BEGV_BYTE;
   573           s2 = 0;
   574         }
   575       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
   576     }
   577 
   578   struct regexp_cache *cache_entry =
   579     compile_pattern (regexp, 0, Qnil, 0, multibyte);
   580   specpdl_ref count = SPECPDL_INDEX ();
   581   freeze_buffer_relocation ();
   582   freeze_pattern (cache_entry);
   583   re_match_object = STRINGP (string) ? string : Qnil;
   584   len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
   585                     pos_byte, NULL, limit_byte);
   586 
   587   unbind_to (count, Qnil);
   588   return len;
   589 }
   590 
   591 
   592 /* The newline cache: remembering which sections of text have no newlines.  */
   593 
   594 /* If the user has requested the long scans caching, make sure it's on.
   595    Otherwise, make sure it's off.
   596    This is our cheezy way of associating an action with the change of
   597    state of a buffer-local variable.  */
   598 static struct region_cache *
   599 newline_cache_on_off (struct buffer *buf)
   600 {
   601   struct buffer *base_buf = buf;
   602   bool indirect_p = false;
   603 
   604   if (buf->base_buffer)
   605     {
   606       base_buf = buf->base_buffer;
   607       indirect_p = true;
   608     }
   609 
   610   /* Don't turn on or off the cache in the base buffer, if the value
   611      of cache-long-scans of the base buffer is inconsistent with that.
   612      This is because doing so will just make the cache pure overhead,
   613      since if we turn it on via indirect buffer, it will be
   614      immediately turned off by its base buffer.  */
   615   if (NILP (BVAR (buf, cache_long_scans)))
   616     {
   617       if (!indirect_p
   618           || NILP (BVAR (base_buf, cache_long_scans)))
   619         {
   620           /* It should be off.  */
   621           if (base_buf->newline_cache)
   622             {
   623               free_region_cache (base_buf->newline_cache);
   624               base_buf->newline_cache = 0;
   625             }
   626         }
   627       return NULL;
   628     }
   629   else
   630     {
   631       if (!indirect_p
   632           || !NILP (BVAR (base_buf, cache_long_scans)))
   633         {
   634           /* It should be on.  */
   635           if (base_buf->newline_cache == 0)
   636             {
   637               base_buf->newline_cache = new_region_cache ();
   638               __lsan_ignore_object (base_buf->newline_cache);
   639             }
   640         }
   641       return base_buf->newline_cache;
   642     }
   643 }
   644 
   645 
   646 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
   647 
   648    If COUNT is positive, search forwards; END must be >= START.
   649    If COUNT is negative, search backwards for the -COUNTth instance;
   650       END must be <= START.
   651    If COUNT is zero, do anything you please; run rogue, for all I care.
   652 
   653    If END is zero, use BEGV or ZV instead, as appropriate for the
   654    direction indicated by COUNT.  If START_BYTE is -1 it is unknown,
   655    and similarly for END_BYTE.
   656 
   657    If we find COUNT instances, set *COUNTED to COUNT, and return the
   658    position past the COUNTth match.  Note that for reverse motion
   659    this is not the same as the usual convention for Emacs motion commands.
   660 
   661    If we don't find COUNT instances before reaching END, set *COUNTED
   662    to the number of newlines left found (negated if COUNT is negative),
   663    and return END.
   664 
   665    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
   666    to the returned character position.
   667 
   668    If ALLOW_QUIT, check for quitting.  That's good to do
   669    except when inside redisplay.  */
   670 
   671 ptrdiff_t
   672 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
   673               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
   674               ptrdiff_t *bytepos, bool allow_quit)
   675 {
   676   struct region_cache *newline_cache;
   677   struct buffer *cache_buffer;
   678 
   679   if (!end)
   680     {
   681       if (count > 0)
   682         end = ZV, end_byte = ZV_BYTE;
   683       else
   684         end = BEGV, end_byte = BEGV_BYTE;
   685     }
   686   if (end_byte == -1)
   687     end_byte = CHAR_TO_BYTE (end);
   688 
   689   newline_cache = newline_cache_on_off (current_buffer);
   690   if (current_buffer->base_buffer)
   691     cache_buffer = current_buffer->base_buffer;
   692   else
   693     cache_buffer = current_buffer;
   694 
   695   if (counted)
   696     *counted = count;
   697 
   698   if (count > 0)
   699     while (start != end)
   700       {
   701         /* Our innermost scanning loop is very simple; it doesn't know
   702            about gaps, buffer ends, or the newline cache.  ceiling is
   703            the position of the last character before the next such
   704            obstacle --- the last character the dumb search loop should
   705            examine.  */
   706         ptrdiff_t tem, ceiling_byte = end_byte - 1;
   707 
   708         /* If we're using the newline cache, consult it to see whether
   709            we can avoid some scanning.  */
   710         if (newline_cache)
   711           {
   712             ptrdiff_t next_change;
   713             int result = 1;
   714 
   715             while (start < end && result)
   716               {
   717                 ptrdiff_t lim1;
   718 
   719                 result = region_cache_forward (cache_buffer, newline_cache,
   720                                                start, &next_change);
   721                 if (result)
   722                   {
   723                     /* When the cache revalidation is deferred,
   724                        next-change might point beyond ZV, which will
   725                        cause assertion violation in CHAR_TO_BYTE below.
   726                        Limit next_change to ZV to avoid that.  */
   727                     if (next_change > ZV)
   728                       next_change = ZV;
   729                     start = next_change;
   730                     lim1 = next_change = end;
   731                   }
   732                 else
   733                   lim1 = min (next_change, end);
   734 
   735                 /* The cache returned zero for this region; see if
   736                    this is because the region is known and includes
   737                    only newlines.  While at that, count any newlines
   738                    we bump into, and exit if we found enough off them.  */
   739                 start_byte = CHAR_TO_BYTE (start);
   740                 while (start < lim1
   741                        && FETCH_BYTE (start_byte) == '\n')
   742                   {
   743                     start_byte++;
   744                     start++;
   745                     if (--count == 0)
   746                       {
   747                         if (bytepos)
   748                           *bytepos = start_byte;
   749                         return start;
   750                       }
   751                   }
   752                 /* If we found a non-newline character before hitting
   753                    position where the cache will again return non-zero
   754                    (i.e. no newlines beyond that position), it means
   755                    this region is not yet known to the cache, and we
   756                    must resort to the "dumb loop" method.  */
   757                 if (start < next_change && !result)
   758                   break;
   759                 result = 1;
   760               }
   761             if (start >= end)
   762               {
   763                 start = end;
   764                 start_byte = end_byte;
   765                 break;
   766               }
   767 
   768             /* START should never be after END.  */
   769             if (start_byte > ceiling_byte)
   770               start_byte = ceiling_byte;
   771 
   772             /* Now the text after start is an unknown region, and
   773                next_change is the position of the next known region. */
   774             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
   775           }
   776         else if (start_byte == -1)
   777           start_byte = CHAR_TO_BYTE (start);
   778 
   779         /* The dumb loop can only scan text stored in contiguous
   780            bytes. BUFFER_CEILING_OF returns the last character
   781            position that is contiguous, so the ceiling is the
   782            position after that.  */
   783         tem = BUFFER_CEILING_OF (start_byte);
   784         ceiling_byte = min (tem, ceiling_byte);
   785 
   786         {
   787           /* The termination address of the dumb loop.  */
   788           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
   789           ptrdiff_t lim_byte = ceiling_byte + 1;
   790 
   791           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
   792              of the base, the cursor, and the next line.  */
   793           ptrdiff_t base = start_byte - lim_byte;
   794           ptrdiff_t cursor, next;
   795 
   796           for (cursor = base; cursor < 0; cursor = next)
   797             {
   798               /* The dumb loop.  */
   799               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
   800               next = nl ? nl - lim_addr : 0;
   801 
   802               /* If we're using the newline cache, cache the fact that
   803                  the region we just traversed is free of newlines. */
   804               if (newline_cache && cursor != next)
   805                 {
   806                   know_region_cache (cache_buffer, newline_cache,
   807                                      BYTE_TO_CHAR (lim_byte + cursor),
   808                                      BYTE_TO_CHAR (lim_byte + next));
   809                   /* know_region_cache can relocate buffer text.  */
   810                   lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
   811                 }
   812 
   813               if (! nl)
   814                 break;
   815               next++;
   816 
   817               if (--count == 0)
   818                 {
   819                   if (bytepos)
   820                     *bytepos = lim_byte + next;
   821                   return BYTE_TO_CHAR (lim_byte + next);
   822                 }
   823               if (allow_quit)
   824                 maybe_quit ();
   825             }
   826 
   827           start_byte = lim_byte;
   828           start = BYTE_TO_CHAR (start_byte);
   829         }
   830       }
   831   else
   832     while (start > end)
   833       {
   834         /* The last character to check before the next obstacle.  */
   835         ptrdiff_t tem, ceiling_byte = end_byte;
   836 
   837         /* Consult the newline cache, if appropriate.  */
   838         if (newline_cache)
   839           {
   840             ptrdiff_t next_change;
   841             int result = 1;
   842 
   843             while (start > end && result)
   844               {
   845                 ptrdiff_t lim1;
   846 
   847                 result = region_cache_backward (cache_buffer, newline_cache,
   848                                                 start, &next_change);
   849                 if (result)
   850                   {
   851                     start = next_change;
   852                     lim1 = next_change = end;
   853                   }
   854                 else
   855                   lim1 = max (next_change, end);
   856                 start_byte = CHAR_TO_BYTE (start);
   857                 while (start > lim1
   858                        && FETCH_BYTE (start_byte - 1) == '\n')
   859                   {
   860                     if (++count == 0)
   861                       {
   862                         if (bytepos)
   863                           *bytepos = start_byte;
   864                         return start;
   865                       }
   866                     start_byte--;
   867                     start--;
   868                   }
   869                 if (start > next_change && !result)
   870                   break;
   871                 result = 1;
   872               }
   873             if (start <= end)
   874               {
   875                 start = end;
   876                 start_byte = end_byte;
   877                 break;
   878               }
   879 
   880             /* Start should never be at or before end.  */
   881             if (start_byte <= ceiling_byte)
   882               start_byte = ceiling_byte + 1;
   883 
   884             /* Now the text before start is an unknown region, and
   885                next_change is the position of the next known region. */
   886             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
   887           }
   888         else if (start_byte == -1)
   889           start_byte = CHAR_TO_BYTE (start);
   890 
   891         /* Stop scanning before the gap.  */
   892         tem = BUFFER_FLOOR_OF (start_byte - 1);
   893         ceiling_byte = max (tem, ceiling_byte);
   894 
   895         {
   896           /* The termination address of the dumb loop.  */
   897           unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
   898 
   899           /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
   900              the base, the cursor, and the previous line.  These
   901              offsets are at least -1.  */
   902           ptrdiff_t base = start_byte - ceiling_byte;
   903           ptrdiff_t cursor, prev;
   904 
   905           for (cursor = base; 0 < cursor; cursor = prev)
   906             {
   907               unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
   908               prev = nl ? nl - ceiling_addr : -1;
   909 
   910               /* If we're looking for newlines, cache the fact that
   911                  this line's region is free of them. */
   912               if (newline_cache && cursor != prev + 1)
   913                 {
   914                   know_region_cache (cache_buffer, newline_cache,
   915                                      BYTE_TO_CHAR (ceiling_byte + prev + 1),
   916                                      BYTE_TO_CHAR (ceiling_byte + cursor));
   917                   /* know_region_cache can relocate buffer text.  */
   918                   ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
   919                 }
   920 
   921               if (! nl)
   922                 break;
   923 
   924               if (++count >= 0)
   925                 {
   926                   if (bytepos)
   927                     *bytepos = ceiling_byte + prev + 1;
   928                   return BYTE_TO_CHAR (ceiling_byte + prev + 1);
   929                 }
   930               if (allow_quit)
   931                 maybe_quit ();
   932             }
   933 
   934           start_byte = ceiling_byte;
   935           start = BYTE_TO_CHAR (start_byte);
   936         }
   937       }
   938 
   939   if (counted)
   940     *counted -= count;
   941   if (bytepos)
   942     {
   943       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
   944       eassert (*bytepos == CHAR_TO_BYTE (start));
   945     }
   946   return start;
   947 }
   948 
   949 /* Search for COUNT instances of a line boundary.
   950    Start at START.  If COUNT is negative, search backwards.
   951 
   952    We report the resulting position by calling TEMP_SET_PT_BOTH.
   953 
   954    If we find COUNT instances. we position after (always after,
   955    even if scanning backwards) the COUNTth match.
   956 
   957    If we don't find COUNT instances before reaching the end of the
   958    buffer (or the beginning, if scanning backwards), we position at
   959    the limit we bumped up against.
   960 
   961    If ALLOW_QUIT, check for quitting.  That's good to do
   962    except in special cases.  */
   963 
   964 void
   965 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
   966               ptrdiff_t limit, ptrdiff_t limit_byte,
   967               ptrdiff_t count, bool allow_quit)
   968 {
   969   ptrdiff_t charpos, bytepos, counted;
   970 
   971   charpos = find_newline (start, start_byte, limit, limit_byte,
   972                           count, &counted, &bytepos, allow_quit);
   973   if (counted != count)
   974     TEMP_SET_PT_BOTH (limit, limit_byte);
   975   else
   976     TEMP_SET_PT_BOTH (charpos, bytepos);
   977 }
   978 
   979 /* Like above, but always scan from point and report the
   980    resulting position in *CHARPOS and *BYTEPOS.  */
   981 
   982 ptrdiff_t
   983 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
   984                          ptrdiff_t *bytepos)
   985 {
   986   ptrdiff_t counted;
   987 
   988   if (count <= 0)
   989     *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
   990                              &counted, bytepos, 1);
   991   else
   992     *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
   993                              &counted, bytepos, 1);
   994   return counted;
   995 }
   996 
   997 /* Like find_newline, but doesn't allow QUITting and doesn't return
   998    COUNTED.  */
   999 ptrdiff_t
  1000 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
  1001                       ptrdiff_t cnt, ptrdiff_t *bytepos)
  1002 {
  1003   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
  1004 }
  1005 
  1006 /* Like find_newline, but returns position before the newline, not
  1007    after, and only search up to TO.
  1008    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
  1009 
  1010 ptrdiff_t
  1011 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
  1012                           ptrdiff_t cnt, ptrdiff_t *bytepos)
  1013 {
  1014   ptrdiff_t counted;
  1015   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &counted, bytepos, 1);
  1016 
  1017   if (counted == cnt)
  1018     {
  1019       if (bytepos)
  1020         dec_both (&pos, &*bytepos);
  1021       else
  1022         pos--;
  1023     }
  1024   return pos;
  1025 }
  1026 
  1027 /* Subroutines of Lisp buffer search functions. */
  1028 
  1029 static Lisp_Object
  1030 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
  1031                 Lisp_Object count, int direction, bool RE, bool posix)
  1032 {
  1033   EMACS_INT np;
  1034   EMACS_INT lim;
  1035   ptrdiff_t lim_byte;
  1036   EMACS_INT n = direction;
  1037 
  1038   if (!NILP (count))
  1039     {
  1040       CHECK_FIXNUM (count);
  1041       n *= XFIXNUM (count);
  1042     }
  1043 
  1044   CHECK_STRING (string);
  1045   if (NILP (bound))
  1046     {
  1047       if (n > 0)
  1048         lim = ZV, lim_byte = ZV_BYTE;
  1049       else
  1050         lim = BEGV, lim_byte = BEGV_BYTE;
  1051     }
  1052   else
  1053     {
  1054       lim = fix_position (bound);
  1055       if (n > 0 ? lim < PT : lim > PT)
  1056         error ("Invalid search bound (wrong side of point)");
  1057       if (lim > ZV)
  1058         lim = ZV, lim_byte = ZV_BYTE;
  1059       else if (lim < BEGV)
  1060         lim = BEGV, lim_byte = BEGV_BYTE;
  1061       else
  1062         lim_byte = CHAR_TO_BYTE (lim);
  1063     }
  1064 
  1065   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
  1066      table.  */
  1067   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
  1068                          BVAR (current_buffer, case_eqv_table));
  1069 
  1070   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
  1071                       (!NILP (BVAR (current_buffer, case_fold_search))
  1072                        ? BVAR (current_buffer, case_canon_table)
  1073                        : Qnil),
  1074                       (!NILP (BVAR (current_buffer, case_fold_search))
  1075                        ? BVAR (current_buffer, case_eqv_table)
  1076                        : Qnil),
  1077                       posix);
  1078   if (np <= 0)
  1079     {
  1080       if (NILP (noerror))
  1081         xsignal1 (Qsearch_failed, string);
  1082 
  1083       if (!EQ (noerror, Qt))
  1084         {
  1085           eassert (BEGV <= lim && lim <= ZV);
  1086           SET_PT_BOTH (lim, lim_byte);
  1087           return Qnil;
  1088 #if 0 /* This would be clean, but maybe programs depend on
  1089          a value of nil here.  */
  1090           np = lim;
  1091 #endif
  1092         }
  1093       else
  1094         return Qnil;
  1095     }
  1096 
  1097   eassert (BEGV <= np && np <= ZV);
  1098   SET_PT (np);
  1099 
  1100   return make_fixnum (np);
  1101 }
  1102 
  1103 /* Return true if REGEXP it matches just one constant string.  */
  1104 
  1105 static bool
  1106 trivial_regexp_p (Lisp_Object regexp)
  1107 {
  1108   ptrdiff_t len = SBYTES (regexp);
  1109   unsigned char *s = SDATA (regexp);
  1110   while (--len >= 0)
  1111     {
  1112       switch (*s++)
  1113         {
  1114         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
  1115           return 0;
  1116         case '\\':
  1117           if (--len < 0)
  1118             return 0;
  1119           switch (*s++)
  1120             {
  1121             case '|': case '(': case ')': case '`': case '\'': case 'b':
  1122             case 'B': case '<': case '>': case 'w': case 'W': case 's':
  1123             case 'S': case '=': case '{': case '}': case '_':
  1124             case 'c': case 'C': /* for categoryspec and notcategoryspec */
  1125             case '1': case '2': case '3': case '4': case '5':
  1126             case '6': case '7': case '8': case '9':
  1127               return 0;
  1128             }
  1129         }
  1130     }
  1131   return 1;
  1132 }
  1133 
  1134 #define TRANSLATE(out, trt, d)                  \
  1135 do                                              \
  1136   {                                             \
  1137     if (! NILP (trt))                           \
  1138       {                                         \
  1139         Lisp_Object temp;                       \
  1140         temp = Faref (trt, make_fixnum (d));    \
  1141         if (FIXNUMP (temp))                     \
  1142           out = XFIXNUM (temp);                 \
  1143         else                                    \
  1144           out = d;                              \
  1145       }                                         \
  1146     else                                        \
  1147       out = d;                                  \
  1148   }                                             \
  1149 while (0)
  1150 
  1151 /* Only used in search_buffer, to record the end position of the match
  1152    when searching regexps and SEARCH_REGS should not be changed
  1153    (i.e. Vinhibit_changing_match_data is non-nil).  */
  1154 static struct re_registers search_regs_1;
  1155 
  1156 static EMACS_INT
  1157 search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
  1158                   ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
  1159                   Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
  1160 {
  1161   unsigned char *p1, *p2;
  1162   ptrdiff_t s1, s2;
  1163 
  1164   /* Snapshot in case Lisp changes the value.  */
  1165   bool preserve_match_data = NILP (Vinhibit_changing_match_data);
  1166 
  1167   struct regexp_cache *cache_entry =
  1168     compile_pattern (string,
  1169                      preserve_match_data ? &search_regs : &search_regs_1,
  1170                      trt, posix,
  1171                      !NILP (BVAR (current_buffer, enable_multibyte_characters)));
  1172   struct re_pattern_buffer *bufp = &cache_entry->buf;
  1173 
  1174   maybe_quit ();                /* Do a pending quit right away,
  1175                                    to avoid paradoxical behavior */
  1176   /* Get pointers and sizes of the two strings
  1177      that make up the visible portion of the buffer. */
  1178 
  1179   p1 = BEGV_ADDR;
  1180   s1 = GPT_BYTE - BEGV_BYTE;
  1181   p2 = GAP_END_ADDR;
  1182   s2 = ZV_BYTE - GPT_BYTE;
  1183   if (s1 < 0)
  1184     {
  1185       p2 = p1;
  1186       s2 = ZV_BYTE - BEGV_BYTE;
  1187       s1 = 0;
  1188     }
  1189   if (s2 < 0)
  1190     {
  1191       s1 = ZV_BYTE - BEGV_BYTE;
  1192       s2 = 0;
  1193     }
  1194 
  1195   specpdl_ref count = SPECPDL_INDEX ();
  1196   freeze_buffer_relocation ();
  1197   freeze_pattern (cache_entry);
  1198 
  1199   while (n < 0)
  1200     {
  1201       ptrdiff_t val;
  1202 
  1203       re_match_object = Qnil;
  1204       val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
  1205                          pos_byte - BEGV_BYTE, lim_byte - pos_byte,
  1206                          preserve_match_data ? &search_regs : &search_regs_1,
  1207                          /* Don't allow match past current point */
  1208                          pos_byte - BEGV_BYTE);
  1209       if (val == -2)
  1210         {
  1211           unbind_to (count, Qnil);
  1212           matcher_overflow ();
  1213         }
  1214       if (val >= 0)
  1215         {
  1216           if (preserve_match_data)
  1217             {
  1218               pos_byte = search_regs.start[0] + BEGV_BYTE;
  1219               for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  1220                 if (search_regs.start[i] >= 0)
  1221                   {
  1222                     search_regs.start[i]
  1223                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
  1224                     search_regs.end[i]
  1225                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
  1226                   }
  1227               XSETBUFFER (last_thing_searched, current_buffer);
  1228               /* Set pos to the new position. */
  1229               pos = search_regs.start[0];
  1230             }
  1231           else
  1232             {
  1233               pos_byte = search_regs_1.start[0] + BEGV_BYTE;
  1234               /* Set pos to the new position.  */
  1235               pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
  1236             }
  1237         }
  1238       else
  1239         {
  1240           unbind_to (count, Qnil);
  1241           return (n);
  1242         }
  1243       n++;
  1244       maybe_quit ();
  1245     }
  1246   while (n > 0)
  1247     {
  1248       ptrdiff_t val;
  1249 
  1250       re_match_object = Qnil;
  1251       val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
  1252                          pos_byte - BEGV_BYTE, lim_byte - pos_byte,
  1253                          preserve_match_data ? &search_regs : &search_regs_1,
  1254                          lim_byte - BEGV_BYTE);
  1255       if (val == -2)
  1256         {
  1257           unbind_to (count, Qnil);
  1258           matcher_overflow ();
  1259         }
  1260       if (val >= 0)
  1261         {
  1262           if (preserve_match_data)
  1263             {
  1264               pos_byte = search_regs.end[0] + BEGV_BYTE;
  1265               for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  1266                 if (search_regs.start[i] >= 0)
  1267                   {
  1268                     search_regs.start[i]
  1269                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
  1270                     search_regs.end[i]
  1271                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
  1272                   }
  1273               XSETBUFFER (last_thing_searched, current_buffer);
  1274               pos = search_regs.end[0];
  1275             }
  1276           else
  1277             {
  1278               pos_byte = search_regs_1.end[0] + BEGV_BYTE;
  1279               pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
  1280             }
  1281         }
  1282       else
  1283         {
  1284           unbind_to (count, Qnil);
  1285           return (0 - n);
  1286         }
  1287       n--;
  1288       maybe_quit ();
  1289     }
  1290   unbind_to (count, Qnil);
  1291   return (pos);
  1292 }
  1293 
  1294 static EMACS_INT
  1295 search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
  1296                       ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
  1297                       EMACS_INT n, bool RE, Lisp_Object trt, Lisp_Object inverse_trt,
  1298                       bool posix)
  1299 {
  1300   unsigned char *raw_pattern, *pat;
  1301   ptrdiff_t raw_pattern_size;
  1302   ptrdiff_t raw_pattern_size_byte;
  1303   unsigned char *patbuf;
  1304   bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
  1305   unsigned char *base_pat;
  1306   /* Set to positive if we find a non-ASCII char that need
  1307      translation.  Otherwise set to zero later.  */
  1308   int char_base = -1;
  1309   bool boyer_moore_ok = 1;
  1310   USE_SAFE_ALLOCA;
  1311 
  1312   /* MULTIBYTE says whether the text to be searched is multibyte.
  1313      We must convert PATTERN to match that, or we will not really
  1314      find things right.  */
  1315 
  1316   if (multibyte == STRING_MULTIBYTE (string))
  1317     {
  1318       raw_pattern = SDATA (string);
  1319       raw_pattern_size = SCHARS (string);
  1320       raw_pattern_size_byte = SBYTES (string);
  1321     }
  1322   else if (multibyte)
  1323     {
  1324       raw_pattern_size = SCHARS (string);
  1325       raw_pattern_size_byte
  1326         = count_size_as_multibyte (SDATA (string),
  1327                                    raw_pattern_size);
  1328       raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
  1329       copy_text (SDATA (string), raw_pattern,
  1330                  SCHARS (string), 0, 1);
  1331     }
  1332   else
  1333     {
  1334       /* Converting multibyte to single-byte.  */
  1335       raw_pattern_size = SCHARS (string);
  1336       raw_pattern_size_byte = SCHARS (string);
  1337       raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
  1338       copy_text (SDATA (string), raw_pattern,
  1339                  SBYTES (string), 1, 0);
  1340     }
  1341 
  1342   /* Copy and optionally translate the pattern.  */
  1343   ptrdiff_t len = raw_pattern_size;
  1344   ptrdiff_t len_byte = raw_pattern_size_byte;
  1345   SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
  1346   pat = patbuf;
  1347   base_pat = raw_pattern;
  1348   if (multibyte)
  1349     {
  1350       /* Fill patbuf by translated characters in STRING while
  1351          checking if we can use boyer-moore search.  If TRT is
  1352          non-nil, we can use boyer-moore search only if TRT can be
  1353          represented by the byte array of 256 elements.  For that,
  1354          all non-ASCII case-equivalents of all case-sensitive
  1355          characters in STRING must belong to the same character
  1356          group (two characters belong to the same group iff their
  1357          multibyte forms are the same except for the last byte;
  1358          i.e. every 64 characters form a group; U+0000..U+003F,
  1359          U+0040..U+007F, U+0080..U+00BF, ...).  */
  1360 
  1361       while (--len >= 0)
  1362         {
  1363           unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
  1364           int translated, inverse;
  1365           int charlen;
  1366 
  1367           /* If we got here and the RE flag is set, it's because we're
  1368              dealing with a regexp known to be trivial, so the backslash
  1369              just quotes the next character.  */
  1370           if (RE && *base_pat == '\\')
  1371             {
  1372               len--;
  1373               raw_pattern_size--;
  1374               len_byte--;
  1375               base_pat++;
  1376             }
  1377 
  1378           int in_charlen, c = string_char_and_length (base_pat, &in_charlen);
  1379 
  1380           if (NILP (trt))
  1381             {
  1382               str = base_pat;
  1383               charlen = in_charlen;
  1384             }
  1385           else
  1386             {
  1387               /* Translate the character.  */
  1388               TRANSLATE (translated, trt, c);
  1389               charlen = CHAR_STRING (translated, str_base);
  1390               str = str_base;
  1391 
  1392               /* Check if C has any other case-equivalents.  */
  1393               TRANSLATE (inverse, inverse_trt, c);
  1394               /* If so, check if we can use boyer-moore.  */
  1395               if (c != inverse && boyer_moore_ok)
  1396                 {
  1397                   /* Check if all equivalents belong to the same
  1398                      group of characters.  Note that the check of C
  1399                      itself is done by the last iteration.  */
  1400                   int this_char_base = -1;
  1401 
  1402                   while (boyer_moore_ok)
  1403                     {
  1404                       if (ASCII_CHAR_P (inverse))
  1405                         {
  1406                           if (this_char_base > 0)
  1407                             boyer_moore_ok = 0;
  1408                           else
  1409                             this_char_base = 0;
  1410                         }
  1411                       else if (CHAR_BYTE8_P (inverse))
  1412                         /* Boyer-moore search can't handle a
  1413                            translation of an eight-bit
  1414                            character.  */
  1415                         boyer_moore_ok = 0;
  1416                       else if (this_char_base < 0)
  1417                         {
  1418                           this_char_base = inverse & ~0x3F;
  1419                           if (char_base < 0)
  1420                             char_base = this_char_base;
  1421                           else if (this_char_base != char_base)
  1422                             boyer_moore_ok = 0;
  1423                         }
  1424                       else if ((inverse & ~0x3F) != this_char_base)
  1425                         boyer_moore_ok = 0;
  1426                       if (c == inverse)
  1427                         break;
  1428                       TRANSLATE (inverse, inverse_trt, inverse);
  1429                     }
  1430                 }
  1431             }
  1432 
  1433           /* Store this character into the translated pattern.  */
  1434           memcpy (pat, str, charlen);
  1435           pat += charlen;
  1436           base_pat += in_charlen;
  1437           len_byte -= in_charlen;
  1438         }
  1439 
  1440       /* If char_base is still negative we didn't find any translated
  1441          non-ASCII characters.  */
  1442       if (char_base < 0)
  1443         char_base = 0;
  1444     }
  1445   else
  1446     {
  1447       /* Unibyte buffer.  */
  1448       char_base = 0;
  1449       while (--len >= 0)
  1450         {
  1451           int c, translated, inverse;
  1452 
  1453           /* If we got here and the RE flag is set, it's because we're
  1454              dealing with a regexp known to be trivial, so the backslash
  1455              just quotes the next character.  */
  1456           if (RE && *base_pat == '\\')
  1457             {
  1458               len--;
  1459               raw_pattern_size--;
  1460               base_pat++;
  1461             }
  1462           c = *base_pat++;
  1463           TRANSLATE (translated, trt, c);
  1464           *pat++ = translated;
  1465           /* Check that none of C's equivalents violates the
  1466              assumptions of boyer_moore.  */
  1467           TRANSLATE (inverse, inverse_trt, c);
  1468           while (1)
  1469             {
  1470               if (inverse >= 0200)
  1471                 {
  1472                   boyer_moore_ok = 0;
  1473                   break;
  1474                 }
  1475               if (c == inverse)
  1476                 break;
  1477               TRANSLATE (inverse, inverse_trt, inverse);
  1478             }
  1479         }
  1480     }
  1481 
  1482   len_byte = pat - patbuf;
  1483   pat = base_pat = patbuf;
  1484 
  1485   EMACS_INT result
  1486     = (boyer_moore_ok
  1487        ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
  1488                       pos_byte, lim_byte,
  1489                       char_base)
  1490        : simple_search (n, pat, raw_pattern_size, len_byte, trt,
  1491                         pos, pos_byte, lim, lim_byte));
  1492   SAFE_FREE ();
  1493   return result;
  1494 }
  1495 
  1496 /* Search for the Nth occurrence of STRING in the current buffer,
  1497    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
  1498 
  1499    If RE, look for matches against the regular expression STRING instead;
  1500    if POSIX, enable POSIX style backtracking within that regular
  1501    expression.
  1502 
  1503    If N is positive, search forward; in this case, LIM must be greater
  1504    than POS.
  1505 
  1506    If N is negative, search backward; LIM must be less than POS.
  1507 
  1508    Return -X if there are X remaining occurrences or matches,
  1509    or else the position at the beginning (if N is negative) or the end
  1510    (if N is positive) of the Nth occurrence or match against STRING.
  1511 
  1512    Use TRT and INVERSE_TRT as character translation tables.  */
  1513 
  1514 EMACS_INT
  1515 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
  1516                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
  1517                bool RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
  1518 {
  1519   if (running_asynch_code)
  1520     save_search_regs ();
  1521 
  1522   /* Searching 0 times means don't move.  */
  1523   /* Null string is found at starting position.  */
  1524   if (n == 0 || SCHARS (string) == 0)
  1525     {
  1526       set_search_regs (pos_byte, 0);
  1527       return pos;
  1528     }
  1529 
  1530   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
  1531     pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
  1532                             n, trt, inverse_trt, posix);
  1533   else
  1534     pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
  1535                                 n, RE, trt, inverse_trt, posix);
  1536 
  1537   return pos;
  1538 }
  1539 
  1540 /* Do a simple string search N times for the string PAT,
  1541    whose length is LEN/LEN_BYTE,
  1542    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
  1543    TRT is the translation table.
  1544 
  1545    Return the character position where the match is found.
  1546    Otherwise, if M matches remained to be found, return -M.
  1547 
  1548    This kind of search works regardless of what is in PAT and
  1549    regardless of what is in TRT.  It is used in cases where
  1550    boyer_moore cannot work.  */
  1551 
  1552 static EMACS_INT
  1553 simple_search (EMACS_INT n, unsigned char *pat,
  1554                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
  1555                ptrdiff_t pos, ptrdiff_t pos_byte,
  1556                ptrdiff_t lim, ptrdiff_t lim_byte)
  1557 {
  1558   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  1559   bool forward = n > 0;
  1560   /* Number of buffer bytes matched.  Note that this may be different
  1561      from len_byte in a multibyte buffer.  */
  1562   ptrdiff_t match_byte = PTRDIFF_MIN;
  1563 
  1564   if (lim > pos && multibyte)
  1565     while (n > 0)
  1566       {
  1567         while (1)
  1568           {
  1569             /* Try matching at position POS.  */
  1570             ptrdiff_t this_pos_byte = pos_byte;
  1571             ptrdiff_t this_len = len;
  1572             unsigned char *p = pat;
  1573             if (pos + len > lim || pos_byte + len_byte > lim_byte)
  1574               goto stop;
  1575 
  1576             while (this_len > 0)
  1577               {
  1578                 int charlen, pat_ch = string_char_and_length (p, &charlen);
  1579                 int buf_charlen, buf_ch
  1580                   = string_char_and_length (BYTE_POS_ADDR (this_pos_byte),
  1581                                             &buf_charlen);
  1582                 TRANSLATE (buf_ch, trt, buf_ch);
  1583 
  1584                 if (buf_ch != pat_ch)
  1585                   break;
  1586 
  1587                 this_len--;
  1588                 p += charlen;
  1589 
  1590                 this_pos_byte += buf_charlen;
  1591               }
  1592 
  1593             if (this_len == 0)
  1594               {
  1595                 match_byte = this_pos_byte - pos_byte;
  1596                 pos += len;
  1597                 pos_byte += match_byte;
  1598                 break;
  1599               }
  1600 
  1601             inc_both (&pos, &pos_byte);
  1602           }
  1603 
  1604         n--;
  1605       }
  1606   else if (lim > pos)
  1607     while (n > 0)
  1608       {
  1609         while (1)
  1610           {
  1611             /* Try matching at position POS.  */
  1612             ptrdiff_t this_pos = pos;
  1613             ptrdiff_t this_len = len;
  1614             unsigned char *p = pat;
  1615 
  1616             if (pos + len > lim)
  1617               goto stop;
  1618 
  1619             while (this_len > 0)
  1620               {
  1621                 int pat_ch = *p++;
  1622                 int buf_ch = FETCH_BYTE (this_pos);
  1623                 TRANSLATE (buf_ch, trt, buf_ch);
  1624 
  1625                 if (buf_ch != pat_ch)
  1626                   break;
  1627 
  1628                 this_len--;
  1629                 this_pos++;
  1630               }
  1631 
  1632             if (this_len == 0)
  1633               {
  1634                 match_byte = len;
  1635                 pos += len;
  1636                 break;
  1637               }
  1638 
  1639             pos++;
  1640           }
  1641 
  1642         n--;
  1643       }
  1644   /* Backwards search.  */
  1645   else if (lim < pos && multibyte)
  1646     while (n < 0)
  1647       {
  1648         while (1)
  1649           {
  1650             /* Try matching at position POS.  */
  1651             ptrdiff_t this_pos = pos;
  1652             ptrdiff_t this_pos_byte = pos_byte;
  1653             ptrdiff_t this_len = len;
  1654             const unsigned char *p = pat + len_byte;
  1655 
  1656             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
  1657               goto stop;
  1658 
  1659             while (this_len > 0)
  1660               {
  1661                 int pat_ch, buf_ch;
  1662 
  1663                 dec_both (&this_pos, &this_pos_byte);
  1664                 p -= raw_prev_char_len (p);
  1665                 pat_ch = STRING_CHAR (p);
  1666                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
  1667                 TRANSLATE (buf_ch, trt, buf_ch);
  1668 
  1669                 if (buf_ch != pat_ch)
  1670                   break;
  1671 
  1672                 this_len--;
  1673               }
  1674 
  1675             if (this_len == 0)
  1676               {
  1677                 match_byte = pos_byte - this_pos_byte;
  1678                 pos = this_pos;
  1679                 pos_byte = this_pos_byte;
  1680                 break;
  1681               }
  1682 
  1683             dec_both (&pos, &pos_byte);
  1684           }
  1685 
  1686         n++;
  1687       }
  1688   else if (lim < pos)
  1689     while (n < 0)
  1690       {
  1691         while (1)
  1692           {
  1693             /* Try matching at position POS.  */
  1694             ptrdiff_t this_pos = pos - len;
  1695             ptrdiff_t this_len = len;
  1696             unsigned char *p = pat;
  1697 
  1698             if (this_pos < lim)
  1699               goto stop;
  1700 
  1701             while (this_len > 0)
  1702               {
  1703                 int pat_ch = *p++;
  1704                 int buf_ch = FETCH_BYTE (this_pos);
  1705                 TRANSLATE (buf_ch, trt, buf_ch);
  1706 
  1707                 if (buf_ch != pat_ch)
  1708                   break;
  1709                 this_len--;
  1710                 this_pos++;
  1711               }
  1712 
  1713             if (this_len == 0)
  1714               {
  1715                 match_byte = len;
  1716                 pos -= len;
  1717                 break;
  1718               }
  1719 
  1720             pos--;
  1721           }
  1722 
  1723         n++;
  1724       }
  1725 
  1726  stop:
  1727   if (n == 0)
  1728     {
  1729       eassert (match_byte != PTRDIFF_MIN);
  1730       if (forward)
  1731         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
  1732       else
  1733         set_search_regs (multibyte ? pos_byte : pos, match_byte);
  1734 
  1735       return pos;
  1736     }
  1737   else if (n > 0)
  1738     return -n;
  1739   else
  1740     return n;
  1741 }
  1742 
  1743 /* Do Boyer-Moore search N times for the string BASE_PAT,
  1744    whose length is LEN_BYTE,
  1745    from buffer position POS_BYTE until LIM_BYTE.
  1746    DIRECTION says which direction we search in.
  1747    TRT and INVERSE_TRT are translation tables.
  1748    Characters in PAT are already translated by TRT.
  1749 
  1750    This kind of search works if all the characters in BASE_PAT that
  1751    have nontrivial translation are the same aside from the last byte.
  1752    This makes it possible to translate just the last byte of a
  1753    character, and do so after just a simple test of the context.
  1754    CHAR_BASE is nonzero if there is such a non-ASCII character.
  1755 
  1756    If that criterion is not satisfied, do not call this function.  */
  1757 
  1758 static EMACS_INT
  1759 boyer_moore (EMACS_INT n, unsigned char *base_pat,
  1760              ptrdiff_t len_byte,
  1761              Lisp_Object trt, Lisp_Object inverse_trt,
  1762              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
  1763              int char_base)
  1764 {
  1765   int direction = ((n > 0) ? 1 : -1);
  1766   register ptrdiff_t dirlen;
  1767   ptrdiff_t limit;
  1768   int stride_for_teases = 0;
  1769   int BM_tab[0400];
  1770   register unsigned char *cursor, *p_limit;
  1771   register ptrdiff_t i;
  1772   register int j;
  1773   unsigned char *pat, *pat_end;
  1774   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  1775 
  1776   unsigned char simple_translate[0400];
  1777   /* These are set to the preceding bytes of a byte to be translated
  1778      if char_base is nonzero.  As the maximum byte length of a
  1779      multibyte character is 5, we have to check at most four previous
  1780      bytes.  */
  1781   int translate_prev_byte1 = 0;
  1782   int translate_prev_byte2 = 0;
  1783   int translate_prev_byte3 = 0;
  1784 
  1785   /* The general approach is that we are going to maintain that we know
  1786      the first (closest to the present position, in whatever direction
  1787      we're searching) character that could possibly be the last
  1788      (furthest from present position) character of a valid match.  We
  1789      advance the state of our knowledge by looking at that character
  1790      and seeing whether it indeed matches the last character of the
  1791      pattern.  If it does, we take a closer look.  If it does not, we
  1792      move our pointer (to putative last characters) as far as is
  1793      logically possible.  This amount of movement, which I call a
  1794      stride, will be the length of the pattern if the actual character
  1795      appears nowhere in the pattern, otherwise it will be the distance
  1796      from the last occurrence of that character to the end of the
  1797      pattern.  If the amount is zero we have a possible match.  */
  1798 
  1799   /* Here we make a "mickey mouse" BM table.  The stride of the search
  1800      is determined only by the last character of the putative match.
  1801      If that character does not match, we will stride the proper
  1802      distance to propose a match that superimposes it on the last
  1803      instance of a character that matches it (per trt), or misses
  1804      it entirely if there is none. */
  1805 
  1806   dirlen = len_byte * direction;
  1807 
  1808   /* Record position after the end of the pattern.  */
  1809   pat_end = base_pat + len_byte;
  1810   /* BASE_PAT points to a character that we start scanning from.
  1811      It is the first character in a forward search,
  1812      the last character in a backward search.  */
  1813   if (direction < 0)
  1814     base_pat = pat_end - 1;
  1815 
  1816   /* A character that does not appear in the pattern induces a
  1817      stride equal to the pattern length.  */
  1818   for (i = 0; i < 0400; i++)
  1819     BM_tab[i] = dirlen;
  1820 
  1821   /* We use this for translation, instead of TRT itself.
  1822      We fill this in to handle the characters that actually
  1823      occur in the pattern.  Others don't matter anyway!  */
  1824   for (i = 0; i < 0400; i++)
  1825     simple_translate[i] = i;
  1826 
  1827   if (char_base)
  1828     {
  1829       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
  1830          byte following them are the target of translation.  */
  1831       eassume (0x80 <= char_base && char_base <= MAX_CHAR);
  1832       unsigned char str[MAX_MULTIBYTE_LENGTH];
  1833       int cblen = CHAR_STRING (char_base, str);
  1834 
  1835       translate_prev_byte1 = str[cblen - 2];
  1836       if (cblen > 2)
  1837         {
  1838           translate_prev_byte2 = str[cblen - 3];
  1839           if (cblen > 3)
  1840             translate_prev_byte3 = str[cblen - 4];
  1841         }
  1842     }
  1843 
  1844   i = 0;
  1845   while (i != dirlen)
  1846     {
  1847       unsigned char *ptr = base_pat + i;
  1848       i += direction;
  1849       if (! NILP (trt))
  1850         {
  1851           /* If the byte currently looking at is the last of a
  1852              character to check case-equivalents, set CH to that
  1853              character.  An ASCII character and a non-ASCII character
  1854              matching with CHAR_BASE are to be checked.  */
  1855           int ch = -1;
  1856 
  1857           if (ASCII_CHAR_P (*ptr) || ! multibyte)
  1858             ch = *ptr;
  1859           else if (char_base
  1860                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
  1861             {
  1862               unsigned char *charstart = ptr - 1;
  1863 
  1864               while (! (CHAR_HEAD_P (*charstart)))
  1865                 charstart--;
  1866               ch = STRING_CHAR (charstart);
  1867               if (char_base != (ch & ~0x3F))
  1868                 ch = -1;
  1869             }
  1870 
  1871           if (ch >= 0200 && multibyte)
  1872             j = (ch & 0x3F) | 0200;
  1873           else
  1874             j = *ptr;
  1875 
  1876           if (i == dirlen)
  1877             stride_for_teases = BM_tab[j];
  1878 
  1879           BM_tab[j] = dirlen - i;
  1880           /* A translation table is accompanied by its inverse -- see
  1881              comment following downcase_table for details.  */
  1882           if (ch >= 0)
  1883             {
  1884               int starting_ch = ch;
  1885               int starting_j = j;
  1886 
  1887               while (1)
  1888                 {
  1889                   TRANSLATE (ch, inverse_trt, ch);
  1890                   if (ch >= 0200 && multibyte)
  1891                     j = (ch & 0x3F) | 0200;
  1892                   else
  1893                     j = ch;
  1894 
  1895                   /* For all the characters that map into CH,
  1896                      set up simple_translate to map the last byte
  1897                      into STARTING_J.  */
  1898                   simple_translate[j] = starting_j;
  1899                   if (ch == starting_ch)
  1900                     break;
  1901                   BM_tab[j] = dirlen - i;
  1902                 }
  1903             }
  1904         }
  1905       else
  1906         {
  1907           j = *ptr;
  1908 
  1909           if (i == dirlen)
  1910             stride_for_teases = BM_tab[j];
  1911           BM_tab[j] = dirlen - i;
  1912         }
  1913       /* stride_for_teases tells how much to stride if we get a
  1914          match on the far character but are subsequently
  1915          disappointed, by recording what the stride would have been
  1916          for that character if the last character had been
  1917          different.  */
  1918     }
  1919   pos_byte += dirlen - ((direction > 0) ? direction : 0);
  1920   /* loop invariant - POS_BYTE points at where last char (first
  1921      char if reverse) of pattern would align in a possible match.  */
  1922   while (n != 0)
  1923     {
  1924       ptrdiff_t tail_end;
  1925       unsigned char *tail_end_ptr;
  1926 
  1927       /* It's been reported that some (broken) compiler thinks that
  1928          Boolean expressions in an arithmetic context are unsigned.
  1929          Using an explicit ?1:0 prevents this.  */
  1930       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
  1931           < 0)
  1932         return (n * (0 - direction));
  1933       /* First we do the part we can by pointers (maybe nothing) */
  1934       maybe_quit ();
  1935       pat = base_pat;
  1936       limit = pos_byte - dirlen + direction;
  1937       if (direction > 0)
  1938         {
  1939           limit = BUFFER_CEILING_OF (limit);
  1940           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
  1941              can take on without hitting edge of buffer or the gap.  */
  1942           limit = min (limit, pos_byte + 20000);
  1943           limit = min (limit, lim_byte - 1);
  1944         }
  1945       else
  1946         {
  1947           limit = BUFFER_FLOOR_OF (limit);
  1948           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
  1949              can take on without hitting edge of buffer or the gap.  */
  1950           limit = max (limit, pos_byte - 20000);
  1951           limit = max (limit, lim_byte);
  1952         }
  1953       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
  1954       tail_end_ptr = BYTE_POS_ADDR (tail_end);
  1955 
  1956       if ((limit - pos_byte) * direction > 20)
  1957         {
  1958           unsigned char *p2;
  1959 
  1960           p_limit = BYTE_POS_ADDR (limit);
  1961           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
  1962           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
  1963           while (1)             /* use one cursor setting as long as i can */
  1964             {
  1965               if (direction > 0) /* worth duplicating */
  1966                 {
  1967                   while (cursor <= p_limit)
  1968                     {
  1969                       if (BM_tab[*cursor] == 0)
  1970                         goto hit;
  1971                       cursor += BM_tab[*cursor];
  1972                     }
  1973                 }
  1974               else
  1975                 {
  1976                   while (cursor >= p_limit)
  1977                     {
  1978                       if (BM_tab[*cursor] == 0)
  1979                         goto hit;
  1980                       cursor += BM_tab[*cursor];
  1981                     }
  1982                 }
  1983               /* If you are here, cursor is beyond the end of the
  1984                  searched region.  You fail to match within the
  1985                  permitted region and would otherwise try a character
  1986                  beyond that region.  */
  1987               break;
  1988 
  1989             hit:
  1990               i = dirlen - direction;
  1991               if (! NILP (trt))
  1992                 {
  1993                   while ((i -= direction) + direction != 0)
  1994                     {
  1995                       int ch;
  1996                       cursor -= direction;
  1997                       /* Translate only the last byte of a character.  */
  1998                       if (! multibyte
  1999                           || ((cursor == tail_end_ptr
  2000                                || CHAR_HEAD_P (cursor[1]))
  2001                               && (CHAR_HEAD_P (cursor[0])
  2002                                   /* Check if this is the last byte of
  2003                                      a translatable character.  */
  2004                                   || (translate_prev_byte1 == cursor[-1]
  2005                                       && (CHAR_HEAD_P (translate_prev_byte1)
  2006                                           || (translate_prev_byte2 == cursor[-2]
  2007                                               && (CHAR_HEAD_P (translate_prev_byte2)
  2008                                                   || (translate_prev_byte3 == cursor[-3]))))))))
  2009                         ch = simple_translate[*cursor];
  2010                       else
  2011                         ch = *cursor;
  2012                       if (pat[i] != ch)
  2013                         break;
  2014                     }
  2015                 }
  2016               else
  2017                 {
  2018                   while ((i -= direction) + direction != 0)
  2019                     {
  2020                       cursor -= direction;
  2021                       if (pat[i] != *cursor)
  2022                         break;
  2023                     }
  2024                 }
  2025               cursor += dirlen - i - direction; /* fix cursor */
  2026               if (i + direction == 0)
  2027                 {
  2028                   ptrdiff_t position, start, end;
  2029 #ifdef REL_ALLOC
  2030                   ptrdiff_t cursor_off;
  2031 #endif
  2032 
  2033                   cursor -= direction;
  2034 
  2035                   position = pos_byte + cursor - p2 + ((direction > 0)
  2036                                                        ? 1 - len_byte : 0);
  2037 #ifdef REL_ALLOC
  2038                   /* set_search_regs might call malloc, which could
  2039                      cause ralloc.c relocate buffer text.  We need to
  2040                      update pointers into buffer text due to that.  */
  2041                   cursor_off = cursor - p2;
  2042 #endif
  2043                   set_search_regs (position, len_byte);
  2044 #ifdef REL_ALLOC
  2045                   p_limit = BYTE_POS_ADDR (limit);
  2046                   p2 = BYTE_POS_ADDR (pos_byte);
  2047                   cursor = p2 + cursor_off;
  2048 #endif
  2049 
  2050                   if (NILP (Vinhibit_changing_match_data))
  2051                     {
  2052                       start = search_regs.start[0];
  2053                       end = search_regs.end[0];
  2054                     }
  2055                   else
  2056                     /* If Vinhibit_changing_match_data is non-nil,
  2057                        search_regs will not be changed.  So let's
  2058                        compute start and end here.  */
  2059                     {
  2060                       start = BYTE_TO_CHAR (position);
  2061                       end = BYTE_TO_CHAR (position + len_byte);
  2062                     }
  2063 
  2064                   if ((n -= direction) != 0)
  2065                     cursor += dirlen; /* to resume search */
  2066                   else
  2067                     return direction > 0 ? end : start;
  2068                 }
  2069               else
  2070                 cursor += stride_for_teases; /* <sigh> we lose -  */
  2071             }
  2072           pos_byte += cursor - p2;
  2073         }
  2074       else
  2075         /* Now we'll pick up a clump that has to be done the hard
  2076            way because it covers a discontinuity.  */
  2077         {
  2078           limit = ((direction > 0)
  2079                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
  2080                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
  2081           limit = ((direction > 0)
  2082                    ? min (limit + len_byte, lim_byte - 1)
  2083                    : max (limit - len_byte, lim_byte));
  2084           /* LIMIT is now the last value POS_BYTE can have
  2085              and still be valid for a possible match.  */
  2086           while (1)
  2087             {
  2088               /* This loop can be coded for space rather than
  2089                  speed because it will usually run only once.
  2090                  (the reach is at most len + 21, and typically
  2091                  does not exceed len).  */
  2092               while ((limit - pos_byte) * direction >= 0)
  2093                 {
  2094                   int ch = FETCH_BYTE (pos_byte);
  2095                   if (BM_tab[ch] == 0)
  2096                     goto hit2;
  2097                   pos_byte += BM_tab[ch];
  2098                 }
  2099               break;    /* ran off the end */
  2100 
  2101             hit2:
  2102               /* Found what might be a match.  */
  2103               i = dirlen - direction;
  2104               while ((i -= direction) + direction != 0)
  2105                 {
  2106                   int ch;
  2107                   unsigned char *ptr;
  2108                   pos_byte -= direction;
  2109                   ptr = BYTE_POS_ADDR (pos_byte);
  2110                   /* Translate only the last byte of a character.  */
  2111                   if (! multibyte
  2112                       || ((ptr == tail_end_ptr
  2113                            || CHAR_HEAD_P (ptr[1]))
  2114                           && (CHAR_HEAD_P (ptr[0])
  2115                               /* Check if this is the last byte of a
  2116                                  translatable character.  */
  2117                               || (translate_prev_byte1 == ptr[-1]
  2118                                   && (CHAR_HEAD_P (translate_prev_byte1)
  2119                                       || (translate_prev_byte2 == ptr[-2]
  2120                                           && (CHAR_HEAD_P (translate_prev_byte2)
  2121                                               || translate_prev_byte3 == ptr[-3])))))))
  2122                     ch = simple_translate[*ptr];
  2123                   else
  2124                     ch = *ptr;
  2125                   if (pat[i] != ch)
  2126                     break;
  2127                 }
  2128               /* Above loop has moved POS_BYTE part or all the way
  2129                  back to the first pos (last pos if reverse).
  2130                  Set it once again at the last (first if reverse) char.  */
  2131               pos_byte += dirlen - i - direction;
  2132               if (i + direction == 0)
  2133                 {
  2134                   ptrdiff_t position, start, end;
  2135                   pos_byte -= direction;
  2136 
  2137                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
  2138                   set_search_regs (position, len_byte);
  2139 
  2140                   if (NILP (Vinhibit_changing_match_data))
  2141                     {
  2142                       start = search_regs.start[0];
  2143                       end = search_regs.end[0];
  2144                     }
  2145                   else
  2146                     /* If Vinhibit_changing_match_data is non-nil,
  2147                        search_regs will not be changed.  So let's
  2148                        compute start and end here.  */
  2149                     {
  2150                       start = BYTE_TO_CHAR (position);
  2151                       end = BYTE_TO_CHAR (position + len_byte);
  2152                     }
  2153 
  2154                   if ((n -= direction) != 0)
  2155                     pos_byte += dirlen; /* to resume search */
  2156                   else
  2157                     return direction > 0 ? end : start;
  2158                 }
  2159               else
  2160                 pos_byte += stride_for_teases;
  2161             }
  2162           }
  2163       /* We have done one clump.  Can we continue? */
  2164       if ((lim_byte - pos_byte) * direction < 0)
  2165         return ((0 - n) * direction);
  2166     }
  2167   return BYTE_TO_CHAR (pos_byte);
  2168 }
  2169 
  2170 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
  2171    for the overall match just found in the current buffer.
  2172    Also clear out the match data for registers 1 and up.  */
  2173 
  2174 static void
  2175 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
  2176 {
  2177   ptrdiff_t i;
  2178 
  2179   if (!NILP (Vinhibit_changing_match_data))
  2180     return;
  2181 
  2182   /* Make sure we have registers in which to store
  2183      the match position.  */
  2184   if (search_regs.num_regs == 0)
  2185     {
  2186       search_regs.start = xmalloc (2 * sizeof *search_regs.start);
  2187       search_regs.end = xmalloc (2 * sizeof *search_regs.end);
  2188       search_regs.num_regs = 2;
  2189     }
  2190 
  2191   /* Clear out the other registers.  */
  2192   for (i = 1; i < search_regs.num_regs; i++)
  2193     {
  2194       search_regs.start[i] = -1;
  2195       search_regs.end[i] = -1;
  2196     }
  2197 
  2198   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
  2199   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
  2200   XSETBUFFER (last_thing_searched, current_buffer);
  2201 }
  2202 
  2203 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
  2204        "MSearch backward: ",
  2205        doc: /* Search backward from point for STRING.
  2206 Set point to the beginning of the occurrence found, and return point.
  2207 An optional second argument bounds the search; it is a buffer position.
  2208   The match found must not begin before that position.  A value of nil
  2209   means search to the beginning of the accessible portion of the buffer.
  2210 Optional third argument, if t, means if fail just return nil (no error).
  2211   If not nil and not t, position at limit of search and return nil.
  2212 Optional fourth argument COUNT, if a positive number, means to search
  2213   for COUNT successive occurrences.  If COUNT is negative, search
  2214   forward, instead of backward, for -COUNT occurrences.  A value of
  2215   nil means the same as 1.
  2216 With COUNT positive, the match found is the COUNTth to last one (or
  2217   last, if COUNT is 1 or nil) in the buffer located entirely before
  2218   the origin of the search; correspondingly with COUNT negative.
  2219 
  2220 Search case-sensitivity is determined by the value of the variable
  2221 `case-fold-search', which see.
  2222 
  2223 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
  2224   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2225 {
  2226   return search_command (string, bound, noerror, count, -1, false, false);
  2227 }
  2228 
  2229 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
  2230        doc: /* Search forward from point for STRING.
  2231 Set point to the end of the occurrence found, and return point.
  2232 An optional second argument bounds the search; it is a buffer position.
  2233   The match found must not end after that position.  A value of nil
  2234   means search to the end of the accessible portion of the buffer.
  2235 Optional third argument, if t, means if fail just return nil (no error).
  2236   If not nil and not t, move to limit of search and return nil.
  2237 Optional fourth argument COUNT, if a positive number, means to search
  2238   for COUNT successive occurrences.  If COUNT is negative, search
  2239   backward, instead of forward, for -COUNT occurrences.  A value of
  2240   nil means the same as 1.
  2241 With COUNT positive, the match found is the COUNTth one (or first,
  2242   if COUNT is 1 or nil) in the buffer located entirely after the
  2243   origin of the search; correspondingly with COUNT negative.
  2244 
  2245 Search case-sensitivity is determined by the value of the variable
  2246 `case-fold-search', which see.
  2247 
  2248 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
  2249   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2250 {
  2251   return search_command (string, bound, noerror, count, 1, false, false);
  2252 }
  2253 
  2254 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
  2255        "sRE search backward: ",
  2256        doc: /* Search backward from point for regular expression REGEXP.
  2257 This function is almost identical to `re-search-forward', except that
  2258 by default it searches backward instead of forward, and the sign of
  2259 COUNT also indicates exactly the opposite searching direction.
  2260 See `re-search-forward' for details.
  2261 
  2262 Note that searching backwards may give a shorter match than expected,
  2263 because REGEXP is still matched in the forward direction.  See Info
  2264 anchor `(elisp) re-search-backward' for details.  */)
  2265   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2266 {
  2267   return search_command (regexp, bound, noerror, count, -1, true, false);
  2268 }
  2269 
  2270 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
  2271        "sRE search: ",
  2272        doc: /* Search forward from point for regular expression REGEXP.
  2273 Set point to the end of the occurrence found, and return point.
  2274 The optional second argument BOUND is a buffer position that bounds
  2275   the search.  The match found must not end after that position.  A
  2276   value of nil means search to the end of the accessible portion of
  2277   the buffer.
  2278 The optional third argument NOERROR indicates how errors are handled
  2279   when the search fails.  If it is nil or omitted, emit an error; if
  2280   it is t, simply return nil and do nothing; if it is neither nil nor
  2281   t, move to the limit of search and return nil.
  2282 The optional fourth argument COUNT is a number that indicates the
  2283   search direction and the number of occurrences to search for.  If it
  2284   is positive, search forward for COUNT successive occurrences; if it
  2285   is negative, search backward, instead of forward, for -COUNT
  2286   occurrences.  A value of nil means the same as 1.
  2287 With COUNT positive/negative, the match found is the COUNTth/-COUNTth
  2288   one in the buffer located entirely after/before the origin of the
  2289   search.
  2290 
  2291 Search case-sensitivity is determined by the value of the variable
  2292 `case-fold-search', which see.
  2293 
  2294 See also the functions `match-beginning', `match-end', `match-string',
  2295 and `replace-match'.  */)
  2296   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2297 {
  2298   return search_command (regexp, bound, noerror, count, 1, true, false);
  2299 }
  2300 
  2301 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
  2302        "sPosix search backward: ",
  2303        doc: /* Search backward from point for match for REGEXP according to Posix rules.
  2304 Find the longest match in accord with Posix regular expression rules.
  2305 Set point to the beginning of the occurrence found, and return point.
  2306 An optional second argument bounds the search; it is a buffer position.
  2307   The match found must not begin before that position.  A value of nil
  2308   means search to the beginning of the accessible portion of the buffer.
  2309 Optional third argument, if t, means if fail just return nil (no error).
  2310   If not nil and not t, position at limit of search and return nil.
  2311 Optional fourth argument COUNT, if a positive number, means to search
  2312   for COUNT successive occurrences.  If COUNT is negative, search
  2313   forward, instead of backward, for -COUNT occurrences.  A value of
  2314   nil means the same as 1.
  2315 With COUNT positive, the match found is the COUNTth to last one (or
  2316   last, if COUNT is 1 or nil) in the buffer located entirely before
  2317   the origin of the search; correspondingly with COUNT negative.
  2318 
  2319 Search case-sensitivity is determined by the value of the variable
  2320 `case-fold-search', which see.
  2321 
  2322 See also the functions `match-beginning', `match-end', `match-string',
  2323 and `replace-match'.  */)
  2324   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2325 {
  2326   return search_command (regexp, bound, noerror, count, -1, true, true);
  2327 }
  2328 
  2329 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
  2330        "sPosix search: ",
  2331        doc: /* Search forward from point for REGEXP according to Posix rules.
  2332 Find the longest match in accord with Posix regular expression rules.
  2333 Set point to the end of the occurrence found, and return point.
  2334 An optional second argument bounds the search; it is a buffer position.
  2335   The match found must not end after that position.  A value of nil
  2336   means search to the end of the accessible portion of the buffer.
  2337 Optional third argument, if t, means if fail just return nil (no error).
  2338   If not nil and not t, move to limit of search and return nil.
  2339 Optional fourth argument COUNT, if a positive number, means to search
  2340   for COUNT successive occurrences.  If COUNT is negative, search
  2341   backward, instead of forward, for -COUNT occurrences.  A value of
  2342   nil means the same as 1.
  2343 With COUNT positive, the match found is the COUNTth one (or first,
  2344   if COUNT is 1 or nil) in the buffer located entirely after the
  2345   origin of the search; correspondingly with COUNT negative.
  2346 
  2347 Search case-sensitivity is determined by the value of the variable
  2348 `case-fold-search', which see.
  2349 
  2350 See also the functions `match-beginning', `match-end', `match-string',
  2351 and `replace-match'.  */)
  2352   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
  2353 {
  2354   return search_command (regexp, bound, noerror, count, 1, true, true);
  2355 }
  2356 
  2357 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
  2358        doc: /* Replace text matched by last search with NEWTEXT.
  2359 Leave point at the end of the replacement text.
  2360 
  2361 If optional second arg FIXEDCASE is non-nil, do not alter the case of
  2362 the replacement text.  Otherwise, maybe capitalize the whole text, or
  2363 maybe just word initials, based on the replaced text.  If the replaced
  2364 text has only capital letters and has at least one multiletter word,
  2365 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
  2366 in the replaced text, capitalize each word in NEWTEXT.
  2367 
  2368 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
  2369 Otherwise treat `\\' as special:
  2370   `\\&' in NEWTEXT means substitute original matched text.
  2371   `\\N' means substitute what matched the Nth `\\(...\\)'.
  2372        If Nth parens didn't match, substitute nothing.
  2373   `\\\\' means insert one `\\'.
  2374   `\\?' is treated literally
  2375        (for compatibility with `query-replace-regexp').
  2376   Any other character following `\\' signals an error.
  2377 Case conversion does not apply to these substitutions.
  2378 
  2379 If optional fourth argument STRING is non-nil, it should be a string
  2380 to act on; this should be the string on which the previous match was
  2381 done via `string-match'.  In this case, `replace-match' creates and
  2382 returns a new string, made by copying STRING and replacing the part of
  2383 STRING that was matched (the original STRING itself is not altered).
  2384 
  2385 The optional fifth argument SUBEXP specifies a subexpression;
  2386 it says to replace just that subexpression with NEWTEXT,
  2387 rather than replacing the entire matched text.
  2388 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
  2389 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
  2390 NEWTEXT in place of subexp N.
  2391 This is useful only after a regular expression search or match,
  2392 since only regular expressions have distinguished subexpressions.  */)
  2393   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
  2394 {
  2395   enum { nochange, all_caps, cap_initial } case_action;
  2396   ptrdiff_t pos, pos_byte;
  2397   bool some_multiletter_word;
  2398   bool some_lowercase;
  2399   bool some_uppercase;
  2400   bool some_nonuppercase_initial;
  2401   int c, prevc;
  2402   ptrdiff_t sub;
  2403   ptrdiff_t opoint, newpoint;
  2404 
  2405   CHECK_STRING (newtext);
  2406 
  2407   if (! NILP (string))
  2408     CHECK_STRING (string);
  2409 
  2410   /* Most replacement texts don't contain any backslash directives in
  2411      the replacements.  Check whether that's the case, which will
  2412      enable us to take the fast path later.  */
  2413   if (NILP (literal)
  2414       && !memchr (SSDATA (newtext), '\\', SBYTES (newtext)))
  2415     literal = Qt;
  2416 
  2417   case_action = nochange;       /* We tried an initialization */
  2418                                 /* but some C compilers blew it */
  2419 
  2420   ptrdiff_t num_regs = search_regs.num_regs;
  2421   if (num_regs <= 0)
  2422     error ("`replace-match' called before any match found");
  2423 
  2424   sub = !NILP (subexp) ? check_integer_range (subexp, 0, num_regs - 1) : 0;
  2425   ptrdiff_t sub_start = search_regs.start[sub];
  2426   ptrdiff_t sub_end = search_regs.end[sub];
  2427   eassert (sub_start <= sub_end);
  2428 
  2429   /* Check whether the text to replace is present in the buffer/string.  */
  2430   if (! (NILP (string)
  2431          ? BEGV <= sub_start && sub_end <= ZV
  2432          : 0 <= sub_start && sub_end <= SCHARS (string)))
  2433     {
  2434       if (sub_start < 0)
  2435         xsignal2 (Qerror,
  2436                   build_string ("replace-match subexpression does not exist"),
  2437                   subexp);
  2438       args_out_of_range (make_fixnum (sub_start), make_fixnum (sub_end));
  2439     }
  2440 
  2441   if (NILP (fixedcase))
  2442     {
  2443       /* Decide how to casify by examining the matched text. */
  2444       ptrdiff_t last;
  2445 
  2446       pos = sub_start;
  2447       last = sub_end;
  2448 
  2449       if (NILP (string))
  2450         pos_byte = CHAR_TO_BYTE (pos);
  2451       else
  2452         pos_byte = string_char_to_byte (string, pos);
  2453 
  2454       prevc = '\n';
  2455       case_action = all_caps;
  2456 
  2457       /* some_multiletter_word is set nonzero if any original word
  2458          is more than one letter long. */
  2459       some_multiletter_word = 0;
  2460       some_lowercase = 0;
  2461       some_nonuppercase_initial = 0;
  2462       some_uppercase = 0;
  2463 
  2464       while (pos < last)
  2465         {
  2466           if (NILP (string))
  2467             {
  2468               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
  2469               inc_both (&pos, &pos_byte);
  2470             }
  2471           else
  2472             c = fetch_string_char_as_multibyte_advance (string,
  2473                                                         &pos, &pos_byte);
  2474 
  2475           if (lowercasep (c))
  2476             {
  2477               /* Cannot be all caps if any original char is lower case */
  2478 
  2479               some_lowercase = 1;
  2480               if (SYNTAX (prevc) != Sword)
  2481                 some_nonuppercase_initial = 1;
  2482               else
  2483                 some_multiletter_word = 1;
  2484             }
  2485           else if (uppercasep (c))
  2486             {
  2487               some_uppercase = 1;
  2488               if (SYNTAX (prevc) != Sword)
  2489                 ;
  2490               else
  2491                 some_multiletter_word = 1;
  2492             }
  2493           else
  2494             {
  2495               /* If the initial is a caseless word constituent,
  2496                  treat that like a lowercase initial.  */
  2497               if (SYNTAX (prevc) != Sword)
  2498                 some_nonuppercase_initial = 1;
  2499             }
  2500 
  2501           prevc = c;
  2502         }
  2503 
  2504       /* Convert to all caps if the old text is all caps
  2505          and has at least one multiletter word.  */
  2506       if (! some_lowercase && some_multiletter_word)
  2507         case_action = all_caps;
  2508       /* Capitalize each word, if the old text has all capitalized words.  */
  2509       else if (!some_nonuppercase_initial && some_multiletter_word)
  2510         case_action = cap_initial;
  2511       else if (!some_nonuppercase_initial && some_uppercase)
  2512         /* Should x -> yz, operating on X, give Yz or YZ?
  2513            We'll assume the latter.  */
  2514         case_action = all_caps;
  2515       else
  2516         case_action = nochange;
  2517     }
  2518 
  2519   /* Do replacement in a string.  */
  2520   if (!NILP (string))
  2521     {
  2522       Lisp_Object before, after;
  2523 
  2524       before = Fsubstring (string, make_fixnum (0), make_fixnum (sub_start));
  2525       after = Fsubstring (string, make_fixnum (sub_end), Qnil);
  2526 
  2527       /* Substitute parts of the match into NEWTEXT
  2528          if desired.  */
  2529       if (NILP (literal))
  2530         {
  2531           ptrdiff_t lastpos = 0;
  2532           ptrdiff_t lastpos_byte = 0;
  2533           /* We build up the substituted string in ACCUM.  */
  2534           Lisp_Object accum;
  2535           Lisp_Object middle;
  2536           ptrdiff_t length = SBYTES (newtext);
  2537 
  2538           accum = Qnil;
  2539 
  2540           for (pos_byte = 0, pos = 0; pos_byte < length;)
  2541             {
  2542               ptrdiff_t substart = -1;
  2543               ptrdiff_t subend = 0;
  2544               bool delbackslash = 0;
  2545 
  2546               c = fetch_string_char_advance (newtext, &pos, &pos_byte);
  2547 
  2548               if (c == '\\')
  2549                 {
  2550                   c = fetch_string_char_advance (newtext, &pos, &pos_byte);
  2551 
  2552                   if (c == '&')
  2553                     {
  2554                       substart = sub_start;
  2555                       subend = sub_end;
  2556                     }
  2557                   else if (c >= '1' && c <= '9')
  2558                     {
  2559                       if (c - '0' < num_regs
  2560                           && search_regs.start[c - '0'] >= 0)
  2561                         {
  2562                           substart = search_regs.start[c - '0'];
  2563                           subend = search_regs.end[c - '0'];
  2564                         }
  2565                       else
  2566                         {
  2567                           /* If that subexp did not match,
  2568                              replace \\N with nothing.  */
  2569                           substart = 0;
  2570                           subend = 0;
  2571                         }
  2572                     }
  2573                   else if (c == '\\')
  2574                     delbackslash = 1;
  2575                   else if (c != '?')
  2576                     error ("Invalid use of `\\' in replacement text");
  2577                 }
  2578               if (substart >= 0)
  2579                 {
  2580                   if (pos - 2 != lastpos)
  2581                     middle = substring_both (newtext, lastpos,
  2582                                              lastpos_byte,
  2583                                              pos - 2, pos_byte - 2);
  2584                   else
  2585                     middle = Qnil;
  2586                   accum = concat3 (accum, middle,
  2587                                    Fsubstring (string,
  2588                                                make_fixnum (substart),
  2589                                                make_fixnum (subend)));
  2590                   lastpos = pos;
  2591                   lastpos_byte = pos_byte;
  2592                 }
  2593               else if (delbackslash)
  2594                 {
  2595                   middle = substring_both (newtext, lastpos,
  2596                                            lastpos_byte,
  2597                                            pos - 1, pos_byte - 1);
  2598 
  2599                   accum = concat2 (accum, middle);
  2600                   lastpos = pos;
  2601                   lastpos_byte = pos_byte;
  2602                 }
  2603             }
  2604 
  2605           if (pos != lastpos)
  2606             middle = substring_both (newtext, lastpos,
  2607                                      lastpos_byte,
  2608                                      pos, pos_byte);
  2609           else
  2610             middle = Qnil;
  2611 
  2612           newtext = concat2 (accum, middle);
  2613         }
  2614 
  2615       /* Do case substitution in NEWTEXT if desired.  */
  2616       if (case_action == all_caps)
  2617         newtext = Fupcase (newtext);
  2618       else if (case_action == cap_initial)
  2619         newtext = Fupcase_initials (newtext);
  2620 
  2621       return concat3 (before, newtext, after);
  2622     }
  2623 
  2624   /* Record point.  A nonpositive OPOINT is actually an offset from ZV.  */
  2625   opoint = PT <= sub_start ? PT : max (PT, sub_end) - ZV;
  2626 
  2627   /* If we want non-literal replacement,
  2628      perform substitution on the replacement string.  */
  2629   if (NILP (literal))
  2630     {
  2631       ptrdiff_t length = SBYTES (newtext);
  2632       unsigned char *substed;
  2633       ptrdiff_t substed_alloc_size, substed_len;
  2634       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
  2635       bool str_multibyte = STRING_MULTIBYTE (newtext);
  2636       bool really_changed = 0;
  2637 
  2638       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
  2639                             ? length * 2 + 100
  2640                             : STRING_BYTES_BOUND);
  2641       substed = xmalloc (substed_alloc_size);
  2642       substed_len = 0;
  2643 
  2644       /* Go thru NEWTEXT, producing the actual text to insert in
  2645          SUBSTED while adjusting multibyteness to that of the current
  2646          buffer.  */
  2647 
  2648       for (pos_byte = 0, pos = 0; pos_byte < length;)
  2649         {
  2650           unsigned char str[MAX_MULTIBYTE_LENGTH];
  2651           const unsigned char *add_stuff = NULL;
  2652           ptrdiff_t add_len = 0;
  2653           ptrdiff_t idx = -1;
  2654           ptrdiff_t begbyte UNINIT;
  2655 
  2656           if (str_multibyte)
  2657             {
  2658               c = fetch_string_char_advance_no_check (newtext,
  2659                                                       &pos, &pos_byte);
  2660               if (!buf_multibyte)
  2661                 c = CHAR_TO_BYTE8 (c);
  2662             }
  2663           else
  2664             {
  2665               /* Note that we don't have to increment POS.  */
  2666               c = SREF (newtext, pos_byte++);
  2667               if (buf_multibyte)
  2668                 c = make_char_multibyte (c);
  2669             }
  2670 
  2671           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
  2672              or set IDX to a match index, which means put that part
  2673              of the buffer text into SUBSTED.  */
  2674 
  2675           if (c == '\\')
  2676             {
  2677               really_changed = 1;
  2678 
  2679               if (str_multibyte)
  2680                 {
  2681                   c = fetch_string_char_advance_no_check (newtext,
  2682                                                           &pos, &pos_byte);
  2683                   if (!buf_multibyte && !ASCII_CHAR_P (c))
  2684                     c = CHAR_TO_BYTE8 (c);
  2685                 }
  2686               else
  2687                 {
  2688                   c = SREF (newtext, pos_byte++);
  2689                   if (buf_multibyte)
  2690                     c = make_char_multibyte (c);
  2691                 }
  2692 
  2693               if (c == '&')
  2694                 idx = sub;
  2695               else if ('1' <= c && c <= '9' && c - '0' < num_regs)
  2696                 {
  2697                   if (search_regs.start[c - '0'] >= 1)
  2698                     idx = c - '0';
  2699                 }
  2700               else if (c == '\\')
  2701                 add_len = 1, add_stuff = (unsigned char *) "\\";
  2702               else
  2703                 {
  2704                   xfree (substed);
  2705                   error ("Invalid use of `\\' in replacement text");
  2706                 }
  2707             }
  2708           else
  2709             {
  2710               add_len = CHAR_STRING (c, str);
  2711               add_stuff = str;
  2712             }
  2713 
  2714           /* If we want to copy part of a previous match,
  2715              set up ADD_STUFF and ADD_LEN to point to it.  */
  2716           if (idx >= 0)
  2717             {
  2718               begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
  2719               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
  2720               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
  2721                 move_gap_both (search_regs.start[idx], begbyte);
  2722             }
  2723 
  2724           /* Now the stuff we want to add to SUBSTED
  2725              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
  2726 
  2727           /* Make sure SUBSTED is big enough.  */
  2728           if (substed_alloc_size - substed_len < add_len)
  2729             substed =
  2730               xpalloc (substed, &substed_alloc_size,
  2731                        add_len - (substed_alloc_size - substed_len),
  2732                        STRING_BYTES_BOUND, 1);
  2733 
  2734           /* We compute this after the call to xpalloc, because that
  2735              could cause buffer text be relocated when ralloc.c is used.  */
  2736           if (idx >= 0)
  2737             add_stuff = BYTE_POS_ADDR (begbyte);
  2738 
  2739           /* Now add to the end of SUBSTED.  */
  2740           if (add_stuff)
  2741             {
  2742               memcpy (substed + substed_len, add_stuff, add_len);
  2743               substed_len += add_len;
  2744             }
  2745         }
  2746 
  2747       if (really_changed)
  2748         newtext = make_specified_string ((const char *) substed, -1,
  2749                                          substed_len, buf_multibyte);
  2750       xfree (substed);
  2751     }
  2752 
  2753   newpoint = sub_start + SCHARS (newtext);
  2754 
  2755   /* Replace the old text with the new in the cleanest possible way.  */
  2756   replace_range (sub_start, sub_end, newtext, 1, 0, 1, true, true);
  2757 
  2758   if (case_action == all_caps)
  2759     Fupcase_region (make_fixnum (search_regs.start[sub]),
  2760                     make_fixnum (newpoint),
  2761                     Qnil);
  2762   else if (case_action == cap_initial)
  2763     Fupcase_initials_region (make_fixnum (search_regs.start[sub]),
  2764                              make_fixnum (newpoint), Qnil);
  2765 
  2766   /* The replace_range etc. functions can trigger modification hooks
  2767      (see signal_before_change and signal_after_change).  Try to error
  2768      out if these hooks clobber the match data since clobbering can
  2769      result in confusing bugs.  We used to check for changes in
  2770      search_regs start and end, but that fails if modification hooks
  2771      remove or add text earlier in the buffer, so just check num_regs
  2772      now. */
  2773   if (search_regs.num_regs != num_regs)
  2774     error ("Match data clobbered by buffer modification hooks");
  2775 
  2776   /* Put point back where it was in the text, if possible.  */
  2777   TEMP_SET_PT (clip_to_bounds (BEGV, opoint + (opoint <= 0 ? ZV : 0), ZV));
  2778   /* Now move point "officially" to the end of the inserted replacement.  */
  2779   move_if_not_intangible (newpoint);
  2780 
  2781   signal_after_change (sub_start, sub_end - sub_start, SCHARS (newtext));
  2782   update_compositions (sub_start, newpoint, CHECK_BORDER);
  2783 
  2784   return Qnil;
  2785 }
  2786 
  2787 static Lisp_Object
  2788 match_limit (Lisp_Object num, bool beginningp)
  2789 {
  2790   EMACS_INT n;
  2791 
  2792   CHECK_FIXNUM (num);
  2793   n = XFIXNUM (num);
  2794   if (n < 0)
  2795     args_out_of_range (num, make_fixnum (0));
  2796   if (search_regs.num_regs <= 0)
  2797     error ("No match data, because no search succeeded");
  2798   if (n >= search_regs.num_regs
  2799       || search_regs.start[n] < 0)
  2800     return Qnil;
  2801   return (make_fixnum ((beginningp) ? search_regs.start[n]
  2802                                     : search_regs.end[n]));
  2803 }
  2804 
  2805 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
  2806        doc: /* Return position of start of text matched by last search.
  2807 SUBEXP, a number, specifies which parenthesized expression in the last
  2808   regexp.
  2809 Value is nil if SUBEXPth pair didn't match, or there were less than
  2810   SUBEXP pairs.
  2811 Zero means the entire text matched by the whole regexp or whole string.
  2812 
  2813 Return value is undefined if the last search failed.  */)
  2814   (Lisp_Object subexp)
  2815 {
  2816   return match_limit (subexp, 1);
  2817 }
  2818 
  2819 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
  2820        doc: /* Return position of end of text matched by last search.
  2821 SUBEXP, a number, specifies which parenthesized expression in the last
  2822   regexp.
  2823 Value is nil if SUBEXPth pair didn't match, or there were less than
  2824   SUBEXP pairs.
  2825 Zero means the entire text matched by the whole regexp or whole string.
  2826 
  2827 Return value is undefined if the last search failed.  */)
  2828   (Lisp_Object subexp)
  2829 {
  2830   return match_limit (subexp, 0);
  2831 }
  2832 
  2833 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
  2834        doc: /* Return a list of positions that record text matched by the last search.
  2835 Element 2N of the returned list is the position of the beginning of the
  2836 match of the Nth subexpression; it corresponds to `(match-beginning N)';
  2837 element 2N + 1 is the position of the end of the match of the Nth
  2838 subexpression; it corresponds to `(match-end N)'.  See `match-beginning'
  2839 and `match-end'.
  2840 If the last search was on a buffer, all the elements are by default
  2841 markers or nil (nil when the Nth pair didn't match); they are integers
  2842 or nil if the search was on a string.  But if the optional argument
  2843 INTEGERS is non-nil, the elements that represent buffer positions are
  2844 always integers, not markers, and (if the search was on a buffer) the
  2845 buffer itself is appended to the list as one additional element.
  2846 
  2847 Use `set-match-data' to reinstate the match data from the elements of
  2848 this list.
  2849 
  2850 Note that non-matching optional groups at the end of the regexp are
  2851 elided instead of being represented with two `nil's each.  For instance:
  2852 
  2853   (progn
  2854     (string-match "^\\(a\\)?\\(b\\)\\(c\\)?$" "b")
  2855     (match-data))
  2856   => (0 1 nil nil 0 1)
  2857 
  2858 If REUSE is a list, store the value in REUSE by destructively modifying it.
  2859 If REUSE is long enough to hold all the values, its length remains the
  2860 same, and any unused elements are set to nil.  If REUSE is not long
  2861 enough, it is extended.  Note that if REUSE is long enough and INTEGERS
  2862 is non-nil, no consing is done to make the return value; this minimizes GC.
  2863 
  2864 If optional third argument RESEAT is non-nil, any previous markers on the
  2865 REUSE list will be modified to point to nowhere.
  2866 
  2867 Return value is undefined if the last search failed.  */)
  2868   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
  2869 {
  2870   Lisp_Object tail, prev;
  2871   Lisp_Object *data;
  2872   ptrdiff_t i, len;
  2873 
  2874   if (!NILP (reseat))
  2875     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
  2876       if (MARKERP (XCAR (tail)))
  2877         {
  2878           unchain_marker (XMARKER (XCAR (tail)));
  2879           XSETCAR (tail, Qnil);
  2880         }
  2881 
  2882   if (NILP (last_thing_searched))
  2883     return Qnil;
  2884 
  2885   prev = Qnil;
  2886 
  2887   USE_SAFE_ALLOCA;
  2888   SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
  2889 
  2890   len = 0;
  2891   for (i = 0; i < search_regs.num_regs; i++)
  2892     {
  2893       ptrdiff_t start = search_regs.start[i];
  2894       if (start >= 0)
  2895         {
  2896           if (BASE_EQ (last_thing_searched, Qt)
  2897               || ! NILP (integers))
  2898             {
  2899               XSETFASTINT (data[2 * i], start);
  2900               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
  2901             }
  2902           else if (BUFFERP (last_thing_searched))
  2903             {
  2904               data[2 * i] = Fmake_marker ();
  2905               Fset_marker (data[2 * i],
  2906                            make_fixnum (start),
  2907                            last_thing_searched);
  2908               data[2 * i + 1] = Fmake_marker ();
  2909               Fset_marker (data[2 * i + 1],
  2910                            make_fixnum (search_regs.end[i]),
  2911                            last_thing_searched);
  2912             }
  2913           else
  2914             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
  2915             emacs_abort ();
  2916 
  2917           len = 2 * i + 2;
  2918         }
  2919       else
  2920         data[2 * i] = data[2 * i + 1] = Qnil;
  2921     }
  2922 
  2923   if (BUFFERP (last_thing_searched) && !NILP (integers))
  2924     {
  2925       data[len] = last_thing_searched;
  2926       len++;
  2927     }
  2928 
  2929   /* If REUSE is not usable, cons up the values and return them.  */
  2930   if (! CONSP (reuse))
  2931     reuse = Flist (len, data);
  2932   else
  2933     {
  2934       /* If REUSE is a list, store as many value elements as will fit
  2935          into the elements of REUSE.  */
  2936       for (i = 0, tail = reuse; CONSP (tail);
  2937            i++, tail = XCDR (tail))
  2938         {
  2939           if (i < len)
  2940             XSETCAR (tail, data[i]);
  2941           else
  2942             XSETCAR (tail, Qnil);
  2943           prev = tail;
  2944         }
  2945 
  2946       /* If we couldn't fit all value elements into REUSE,
  2947          cons up the rest of them and add them to the end of REUSE.  */
  2948       if (i < len)
  2949         XSETCDR (prev, Flist (len - i, data + i));
  2950     }
  2951 
  2952   SAFE_FREE ();
  2953   return reuse;
  2954 }
  2955 
  2956 /* We used to have an internal use variant of `reseat' described as:
  2957 
  2958       If RESEAT is `evaporate', put the markers back on the free list
  2959       immediately.  No other references to the markers must exist in this
  2960       case, so it is used only internally on the unwind stack and
  2961       save-match-data from Lisp.
  2962 
  2963    But it was ill-conceived: those supposedly-internal markers get exposed via
  2964    the undo-list, so freeing them here is unsafe.  */
  2965 
  2966 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
  2967        doc: /* Set internal data on last search match from elements of LIST.
  2968 LIST should have been created by calling `match-data' previously.
  2969 
  2970 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
  2971   (register Lisp_Object list, Lisp_Object reseat)
  2972 {
  2973   ptrdiff_t i;
  2974   register Lisp_Object marker;
  2975 
  2976   if (running_asynch_code)
  2977     save_search_regs ();
  2978 
  2979   CHECK_LIST (list);
  2980 
  2981   /* Unless we find a marker with a buffer or an explicit buffer
  2982      in LIST, assume that this match data came from a string.  */
  2983   last_thing_searched = Qt;
  2984 
  2985   /* Allocate registers if they don't already exist.  */
  2986   {
  2987     ptrdiff_t length = list_length (list) / 2;
  2988 
  2989     if (length > search_regs.num_regs)
  2990       {
  2991         ptrdiff_t num_regs = search_regs.num_regs;
  2992         search_regs.start =
  2993           xpalloc (search_regs.start, &num_regs, length - num_regs,
  2994                    min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start);
  2995         search_regs.end =
  2996           xrealloc (search_regs.end, num_regs * sizeof *search_regs.end);
  2997 
  2998         for (i = search_regs.num_regs; i < num_regs; i++)
  2999           search_regs.start[i] = -1;
  3000 
  3001         search_regs.num_regs = num_regs;
  3002       }
  3003 
  3004     for (i = 0; CONSP (list); i++)
  3005       {
  3006         marker = XCAR (list);
  3007         if (BUFFERP (marker))
  3008           {
  3009             last_thing_searched = marker;
  3010             break;
  3011           }
  3012         if (i >= length)
  3013           break;
  3014         if (NILP (marker))
  3015           {
  3016             search_regs.start[i] = -1;
  3017             list = XCDR (list);
  3018           }
  3019         else
  3020           {
  3021             Lisp_Object from;
  3022             Lisp_Object m;
  3023 
  3024             m = marker;
  3025             if (MARKERP (marker))
  3026               {
  3027                 if (XMARKER (marker)->buffer == 0)
  3028                   XSETFASTINT (marker, 0);
  3029                 else
  3030                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
  3031               }
  3032 
  3033             CHECK_FIXNUM_COERCE_MARKER (marker);
  3034             from = marker;
  3035 
  3036             if (!NILP (reseat) && MARKERP (m))
  3037               {
  3038                 unchain_marker (XMARKER (m));
  3039                 XSETCAR (list, Qnil);
  3040               }
  3041 
  3042             if ((list = XCDR (list), !CONSP (list)))
  3043               break;
  3044 
  3045             m = marker = XCAR (list);
  3046 
  3047             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
  3048               XSETFASTINT (marker, 0);
  3049 
  3050             CHECK_FIXNUM_COERCE_MARKER (marker);
  3051             if (PTRDIFF_MIN <= XFIXNUM (from) && XFIXNUM (from) <= PTRDIFF_MAX
  3052                 && PTRDIFF_MIN <= XFIXNUM (marker)
  3053                 && XFIXNUM (marker) <= PTRDIFF_MAX)
  3054               {
  3055                 search_regs.start[i] = XFIXNUM (from);
  3056                 search_regs.end[i] = XFIXNUM (marker);
  3057               }
  3058             else
  3059               {
  3060                 search_regs.start[i] = -1;
  3061               }
  3062 
  3063             if (!NILP (reseat) && MARKERP (m))
  3064               {
  3065                 unchain_marker (XMARKER (m));
  3066                 XSETCAR (list, Qnil);
  3067               }
  3068           }
  3069         list = XCDR (list);
  3070       }
  3071 
  3072     for (; i < search_regs.num_regs; i++)
  3073       search_regs.start[i] = -1;
  3074   }
  3075 
  3076   return Qnil;
  3077 }
  3078 
  3079 DEFUN ("match-data--translate", Fmatch_data__translate, Smatch_data__translate,
  3080        1, 1, 0,
  3081        doc: /* Add N to all positions in the match data.  Internal.  */)
  3082   (Lisp_Object n)
  3083 {
  3084   CHECK_FIXNUM (n);
  3085   EMACS_INT delta = XFIXNUM (n);
  3086   if (!NILP (last_thing_searched))
  3087     for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
  3088       if (search_regs.start[i] >= 0)
  3089         {
  3090           search_regs.start[i] = max (0, search_regs.start[i] + delta);
  3091           search_regs.end[i] = max (0, search_regs.end[i] + delta);
  3092         }
  3093   return Qnil;
  3094 }
  3095 
  3096 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
  3097    if asynchronous code (filter or sentinel) is running. */
  3098 static void
  3099 save_search_regs (void)
  3100 {
  3101   if (saved_search_regs.num_regs == 0)
  3102     {
  3103       saved_search_regs = search_regs;
  3104       saved_last_thing_searched = last_thing_searched;
  3105       last_thing_searched = Qnil;
  3106       search_regs.num_regs = 0;
  3107       search_regs.start = 0;
  3108       search_regs.end = 0;
  3109     }
  3110 }
  3111 
  3112 /* Called upon exit from filters and sentinels. */
  3113 void
  3114 restore_search_regs (void)
  3115 {
  3116   if (saved_search_regs.num_regs != 0)
  3117     {
  3118       if (search_regs.num_regs > 0)
  3119         {
  3120           xfree (search_regs.start);
  3121           xfree (search_regs.end);
  3122         }
  3123       search_regs = saved_search_regs;
  3124       last_thing_searched = saved_last_thing_searched;
  3125       saved_last_thing_searched = Qnil;
  3126       saved_search_regs.num_regs = 0;
  3127     }
  3128 }
  3129 
  3130 /* Called from replace-match via replace_range.  */
  3131 void
  3132 update_search_regs (ptrdiff_t oldstart, ptrdiff_t oldend, ptrdiff_t newend)
  3133 {
  3134   /* Adjust search data for this change.  */
  3135   ptrdiff_t change = newend - oldend;
  3136   ptrdiff_t i;
  3137 
  3138   for (i = 0; i < search_regs.num_regs; i++)
  3139     {
  3140       if (search_regs.start[i] >= oldend)
  3141         search_regs.start[i] += change;
  3142       else if (search_regs.start[i] > oldstart)
  3143         search_regs.start[i] = oldstart;
  3144       if (search_regs.end[i] >= oldend)
  3145         search_regs.end[i] += change;
  3146       else if (search_regs.end[i] > oldstart)
  3147         search_regs.end[i] = oldstart;
  3148     }
  3149 }
  3150 
  3151 static void
  3152 unwind_set_match_data (Lisp_Object list)
  3153 {
  3154   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
  3155   Fset_match_data (list, Qt);
  3156 }
  3157 
  3158 /* Called to unwind protect the match data.  */
  3159 void
  3160 record_unwind_save_match_data (void)
  3161 {
  3162   record_unwind_protect (unwind_set_match_data,
  3163                          Fmatch_data (Qnil, Qnil, Qnil));
  3164 }
  3165 
  3166 /* Quote a string to deactivate reg-expr chars */
  3167 
  3168 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
  3169        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
  3170   (Lisp_Object string)
  3171 {
  3172   char *in, *out, *end;
  3173   char *temp;
  3174   ptrdiff_t backslashes_added = 0;
  3175 
  3176   CHECK_STRING (string);
  3177 
  3178   USE_SAFE_ALLOCA;
  3179   SAFE_NALLOCA (temp, 2, SBYTES (string));
  3180 
  3181   /* Now copy the data into the new string, inserting escapes. */
  3182 
  3183   in = SSDATA (string);
  3184   end = in + SBYTES (string);
  3185   out = temp;
  3186 
  3187   for (; in != end; in++)
  3188     {
  3189       if (*in == '['
  3190           || *in == '*' || *in == '.' || *in == '\\'
  3191           || *in == '?' || *in == '+'
  3192           || *in == '^' || *in == '$')
  3193         *out++ = '\\', backslashes_added++;
  3194       *out++ = *in;
  3195     }
  3196 
  3197   Lisp_Object result
  3198     = (backslashes_added > 0
  3199        ? make_specified_string (temp,
  3200                                 SCHARS (string) + backslashes_added,
  3201                                 out - temp,
  3202                                 STRING_MULTIBYTE (string))
  3203        : string);
  3204   SAFE_FREE ();
  3205   return result;
  3206 }
  3207 
  3208 /* Like find_newline, but doesn't use the cache, and only searches forward.  */
  3209 ptrdiff_t
  3210 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
  3211                ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
  3212                ptrdiff_t *bytepos, bool allow_quit)
  3213 {
  3214   if (count > 0)
  3215     {
  3216       if (!end)
  3217         end = ZV, end_byte = ZV_BYTE;
  3218     }
  3219   else
  3220     {
  3221       if (!end)
  3222         end = BEGV, end_byte = BEGV_BYTE;
  3223     }
  3224   if (end_byte == -1)
  3225     end_byte = CHAR_TO_BYTE (end);
  3226 
  3227   if (counted)
  3228     *counted = count;
  3229 
  3230   if (count > 0)
  3231     while (start != end)
  3232       {
  3233         /* Our innermost scanning loop is very simple; it doesn't know
  3234            about gaps, buffer ends, or the newline cache.  ceiling is
  3235            the position of the last character before the next such
  3236            obstacle --- the last character the dumb search loop should
  3237            examine.  */
  3238         ptrdiff_t tem, ceiling_byte = end_byte - 1;
  3239 
  3240         if (start_byte == -1)
  3241           start_byte = CHAR_TO_BYTE (start);
  3242 
  3243         /* The dumb loop can only scan text stored in contiguous
  3244            bytes. BUFFER_CEILING_OF returns the last character
  3245            position that is contiguous, so the ceiling is the
  3246            position after that.  */
  3247         tem = BUFFER_CEILING_OF (start_byte);
  3248         ceiling_byte = min (tem, ceiling_byte);
  3249 
  3250         {
  3251           /* The termination address of the dumb loop.  */
  3252           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
  3253           ptrdiff_t lim_byte = ceiling_byte + 1;
  3254 
  3255           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
  3256              of the base, the cursor, and the next line.  */
  3257           ptrdiff_t base = start_byte - lim_byte;
  3258           ptrdiff_t cursor, next;
  3259 
  3260           for (cursor = base; cursor < 0; cursor = next)
  3261             {
  3262               /* The dumb loop.  */
  3263               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
  3264               next = nl ? nl - lim_addr : 0;
  3265 
  3266               if (! nl)
  3267                 break;
  3268               next++;
  3269 
  3270               if (--count == 0)
  3271                 {
  3272                   if (bytepos)
  3273                     *bytepos = lim_byte + next;
  3274                   return BYTE_TO_CHAR (lim_byte + next);
  3275                 }
  3276               if (allow_quit)
  3277                 maybe_quit ();
  3278             }
  3279 
  3280           start_byte = lim_byte;
  3281           start = BYTE_TO_CHAR (start_byte);
  3282         }
  3283       }
  3284 
  3285   if (counted)
  3286     *counted -= count;
  3287   if (bytepos)
  3288     {
  3289       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
  3290       eassert (*bytepos == CHAR_TO_BYTE (start));
  3291     }
  3292   return start;
  3293 }
  3294 
  3295 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
  3296        0, 1, 0,
  3297        doc: /* Check the newline cache of BUFFER against buffer contents.
  3298 
  3299 BUFFER defaults to the current buffer.
  3300 
  3301 Value is an array of 2 sub-arrays of buffer positions for newlines,
  3302 the first based on the cache, the second based on actually scanning
  3303 the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
  3304   (Lisp_Object buffer)
  3305 {
  3306   struct buffer *buf, *old = NULL;
  3307   ptrdiff_t nl_count_cache, nl_count_buf;
  3308   Lisp_Object cache_newlines, buf_newlines, val;
  3309   ptrdiff_t from, found, i;
  3310 
  3311   if (NILP (buffer))
  3312     buf = current_buffer;
  3313   else
  3314     {
  3315       CHECK_BUFFER (buffer);
  3316       buf = XBUFFER (buffer);
  3317       old = current_buffer;
  3318     }
  3319   if (buf->base_buffer)
  3320     buf = buf->base_buffer;
  3321 
  3322   /* If the buffer doesn't have a newline cache, return nil.  */
  3323   if (NILP (BVAR (buf, cache_long_scans))
  3324       || buf->newline_cache == NULL)
  3325     return Qnil;
  3326 
  3327   /* find_newline can only work on the current buffer.  */
  3328   if (old != NULL)
  3329     set_buffer_internal_1 (buf);
  3330 
  3331   /* How many newlines are there according to the cache?  */
  3332   find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
  3333                 TYPE_MAXIMUM (ptrdiff_t), &nl_count_cache, NULL, true);
  3334 
  3335   /* Create vector and populate it.  */
  3336   cache_newlines = make_vector (nl_count_cache, make_fixnum (-1));
  3337 
  3338   if (nl_count_cache)
  3339     {
  3340       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
  3341         {
  3342           ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
  3343 
  3344           found = find_newline (from, from_byte, 0, -1, 1, &counted,
  3345                                 NULL, true);
  3346           if (counted == 0 || i >= nl_count_cache)
  3347             break;
  3348           ASET (cache_newlines, i, make_fixnum (found - 1));
  3349         }
  3350     }
  3351 
  3352   /* Now do the same, but without using the cache.  */
  3353   find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
  3354                  TYPE_MAXIMUM (ptrdiff_t), &nl_count_buf, NULL, true);
  3355   buf_newlines = make_vector (nl_count_buf, make_fixnum (-1));
  3356   if (nl_count_buf)
  3357     {
  3358       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
  3359         {
  3360           ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
  3361 
  3362           found = find_newline1 (from, from_byte, 0, -1, 1, &counted,
  3363                                  NULL, true);
  3364           if (counted == 0 || i >= nl_count_buf)
  3365             break;
  3366           ASET (buf_newlines, i, make_fixnum (found - 1));
  3367         }
  3368     }
  3369 
  3370   /* Construct the value and return it.  */
  3371   val = CALLN (Fvector, cache_newlines, buf_newlines);
  3372 
  3373   if (old != NULL)
  3374     set_buffer_internal_1 (old);
  3375   return val;
  3376 }
  3377 
  3378 
  3379 static void syms_of_search_for_pdumper (void);
  3380 
  3381 void
  3382 syms_of_search (void)
  3383 {
  3384   for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
  3385     {
  3386       staticpro (&searchbufs[i].regexp);
  3387       staticpro (&searchbufs[i].f_whitespace_regexp);
  3388       staticpro (&searchbufs[i].syntax_table);
  3389     }
  3390 
  3391   /* Error condition used for failing searches.  */
  3392   DEFSYM (Qsearch_failed, "search-failed");
  3393 
  3394   /* Error condition used for failing searches started by user, i.e.,
  3395      where failure should not invoke the debugger.  */
  3396   DEFSYM (Quser_search_failed, "user-search-failed");
  3397 
  3398   /* Error condition signaled when regexp compile_pattern fails.  */
  3399   DEFSYM (Qinvalid_regexp, "invalid-regexp");
  3400 
  3401   Fput (Qsearch_failed, Qerror_conditions,
  3402         pure_list (Qsearch_failed, Qerror));
  3403   Fput (Qsearch_failed, Qerror_message,
  3404         build_pure_c_string ("Search failed"));
  3405 
  3406   Fput (Quser_search_failed, Qerror_conditions,
  3407         pure_list (Quser_search_failed, Quser_error, Qsearch_failed, Qerror));
  3408   Fput (Quser_search_failed, Qerror_message,
  3409         build_pure_c_string ("Search failed"));
  3410 
  3411   Fput (Qinvalid_regexp, Qerror_conditions,
  3412         pure_list (Qinvalid_regexp, Qerror));
  3413   Fput (Qinvalid_regexp, Qerror_message,
  3414         build_pure_c_string ("Invalid regexp"));
  3415 
  3416   re_match_object = Qnil;
  3417   staticpro (&re_match_object);
  3418 
  3419   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
  3420       doc: /* Regexp to substitute for bunches of spaces in regexp search.
  3421 Some commands use this for user-specified regexps.
  3422 Spaces that occur inside character classes or repetition operators
  3423 or other such regexp constructs are not replaced with this.
  3424 A value of nil (which is the normal value) means treat spaces
  3425 literally.  Note that a value with capturing groups can change the
  3426 numbering of existing capture groups in unexpected ways.  */);
  3427   Vsearch_spaces_regexp = Qnil;
  3428 
  3429   DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data");
  3430   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
  3431       doc: /* Internal use only.
  3432 If non-nil, the primitive searching and matching functions
  3433 such as `looking-at', `string-match', `re-search-forward', etc.,
  3434 do not set the match data.  The proper way to use this variable
  3435 is to bind it with `let' around a small expression.  */);
  3436   Vinhibit_changing_match_data = Qnil;
  3437 
  3438   defsubr (&Slooking_at);
  3439   defsubr (&Sposix_looking_at);
  3440   defsubr (&Sstring_match);
  3441   defsubr (&Sposix_string_match);
  3442   defsubr (&Ssearch_forward);
  3443   defsubr (&Ssearch_backward);
  3444   defsubr (&Sre_search_forward);
  3445   defsubr (&Sre_search_backward);
  3446   defsubr (&Sposix_search_forward);
  3447   defsubr (&Sposix_search_backward);
  3448   defsubr (&Sreplace_match);
  3449   defsubr (&Smatch_beginning);
  3450   defsubr (&Smatch_end);
  3451   defsubr (&Smatch_data);
  3452   defsubr (&Sset_match_data);
  3453   defsubr (&Smatch_data__translate);
  3454   defsubr (&Sregexp_quote);
  3455   defsubr (&Snewline_cache_check);
  3456 
  3457   pdumper_do_now_and_after_load (syms_of_search_for_pdumper);
  3458 }
  3459 
  3460 static void
  3461 syms_of_search_for_pdumper (void)
  3462 {
  3463   for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
  3464     {
  3465       searchbufs[i].buf.allocated = 100;
  3466       searchbufs[i].buf.buffer = xmalloc (100);
  3467       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
  3468       searchbufs[i].regexp = Qnil;
  3469       searchbufs[i].f_whitespace_regexp = Qnil;
  3470       searchbufs[i].busy = false;
  3471       searchbufs[i].syntax_table = Qnil;
  3472       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
  3473     }
  3474   searchbuf_head = &searchbufs[0];
  3475 }
/* [<][>][^][v][top][bottom][index][help] */
root/src/search.c

DEFINITIONS