root/src/regex-emacs.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. extract_number
  2. extract_number_and_incr
  3. debug_putchar
  4. print_fastmap
  5. print_partial_compiled_pattern
  6. print_compiled_pattern
  7. print_double_string
  8. re_wctype_parse
  9. re_iswctype
  10. re_wctype_to_bit
  11. extend_range_table_work_area
  12. regex_compile
  13. store_op1
  14. store_op2
  15. insert_op1
  16. insert_op2
  17. at_begline_loc_p
  18. at_endline_loc_p
  19. group_in_compile_stack
  20. analyze_first
  21. re_compile_fastmap
  22. re_set_registers
  23. re_search
  24. re_search_2
  25. skip_one_char
  26. skip_noops
  27. execute_charset
  28. mutually_exclusive_p
  29. re_match_2
  30. unwind_re_match
  31. re_match_2_internal
  32. bcmp_translate
  33. re_compile_pattern

     1 /* Emacs regular expression matching and search
     2 
     3    Copyright (C) 1993-2023 Free Software Foundation, Inc.
     4 
     5    This program is free software; you can redistribute it and/or modify
     6    it under the terms of the GNU General Public License as published by
     7    the Free Software Foundation; either version 3, or (at your option)
     8    any later version.
     9 
    10    This program is distributed in the hope that it will be useful,
    11    but WITHOUT ANY WARRANTY; without even the implied warranty of
    12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13    GNU General Public License for more details.
    14 
    15    You should have received a copy of the GNU General Public License
    16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
    17 
    18 /* TODO:
    19    - structure the opcode space into opcode+flag.
    20    - replace (succeed_n + jump_n + set_number_at) with something that doesn't
    21      need to modify the compiled regexp so that re_search can be reentrant.
    22    - get rid of on_failure_jump_smart by doing the optimization in re_comp
    23      rather than at run-time, so that re_search can be reentrant.
    24 */
    25 
    26 #include <config.h>
    27 
    28 #include "regex-emacs.h"
    29 
    30 #include <stdlib.h>
    31 
    32 #include "character.h"
    33 #include "buffer.h"
    34 #include "syntax.h"
    35 #include "category.h"
    36 #include "dispextern.h"
    37 
    38 /* Maximum number of duplicates an interval can allow.  Some systems
    39    define this in other header files, but we want our value, so remove
    40    any previous define.  Repeat counts are stored in opcodes as 2-byte
    41    unsigned integers.  */
    42 #ifdef RE_DUP_MAX
    43 # undef RE_DUP_MAX
    44 #endif
    45 #define RE_DUP_MAX (0xffff)
    46 
    47 /* Make syntax table lookup grant data in gl_state.  */
    48 #define SYNTAX(c) syntax_property (c, 1)
    49 
    50 /* Explicit syntax lookup using the buffer-local table.  */
    51 #define BUFFER_SYNTAX(c) syntax_property (c, 0)
    52 
    53 #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
    54 #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
    55 #define RE_STRING_CHAR(p, multibyte) \
    56   (multibyte ? STRING_CHAR (p) : *(p))
    57 #define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
    58   (multibyte ? string_char_and_length (p, &(len)) : ((len) = 1, *(p)))
    59 
    60 #define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
    61 
    62 #define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
    63 
    64 /* Set C a (possibly converted to multibyte) character before P.  P
    65    points into a string which is the virtual concatenation of STR1
    66    (which ends at END1) or STR2 (which ends at END2).  */
    67 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)                      \
    68   do {                                                                       \
    69     if (target_multibyte)                                                    \
    70       {                                                                      \
    71         re_char *dtemp = (p) == (str2) ? (end1) : (p);                       \
    72         re_char *dlimit = (p) > (str2) && (p) <= (end2) ? (str2) : (str1);   \
    73         while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp))                    \
    74           continue;                                                          \
    75         c = STRING_CHAR (dtemp);                                             \
    76       }                                                                      \
    77     else                                                                     \
    78       {                                                                      \
    79         (c = ((p) == (str2) ? (end1) : (p))[-1]);                            \
    80         (c) = RE_CHAR_TO_MULTIBYTE (c);                                      \
    81       }                                                                      \
    82   } while (false)
    83 
    84 /* Set C a (possibly converted to multibyte) character at P, and set
    85    LEN to the byte length of that character.  */
    86 #define GET_CHAR_AFTER(c, p, len)               \
    87   do {                                          \
    88     if (target_multibyte)                       \
    89       (c) = string_char_and_length (p, &(len)); \
    90     else                                        \
    91       {                                         \
    92         (c) = *p;                               \
    93         len = 1;                                \
    94         (c) = RE_CHAR_TO_MULTIBYTE (c);         \
    95       }                                         \
    96    } while (false)
    97 
    98 /* 1 if C is an ASCII character.  */
    99 #define IS_REAL_ASCII(c) ((c) < 0200)
   100 
   101 /* 1 if C is a unibyte character.  */
   102 #define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
   103 
   104 /* The Emacs definitions should not be directly affected by locales.  */
   105 
   106 /* In Emacs, these are only used for single-byte characters.  */
   107 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
   108 #define ISCNTRL(c) ((c) < ' ')
   109 #define ISXDIGIT(c) (0 <= char_hexdigit (c))
   110 
   111 /* The rest must handle multibyte characters.  */
   112 
   113 #define ISBLANK(c) (IS_REAL_ASCII (c)                   \
   114                      ? ((c) == ' ' || (c) == '\t')      \
   115                      : blankp (c))
   116 
   117 #define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)                              \
   118                      ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240)       \
   119                      : graphicp (c))
   120 
   121 #define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)                              \
   122                     ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)       \
   123                      : printablep (c))
   124 
   125 #define ISALNUM(c) (IS_REAL_ASCII (c)                   \
   126                     ? (((c) >= 'a' && (c) <= 'z')       \
   127                        || ((c) >= 'A' && (c) <= 'Z')    \
   128                        || ((c) >= '0' && (c) <= '9'))   \
   129                     : alphanumericp (c))
   130 
   131 #define ISALPHA(c) (IS_REAL_ASCII (c)                   \
   132                     ? (((c) >= 'a' && (c) <= 'z')       \
   133                        || ((c) >= 'A' && (c) <= 'Z'))   \
   134                     : alphabeticp (c))
   135 
   136 #define ISLOWER(c) lowercasep (c)
   137 
   138 #define ISUPPER(c) uppercasep (c)
   139 
   140 /* The following predicates use the buffer-local syntax table and
   141    ignore syntax properties, for consistency with the up-front
   142    assumptions made at compile time.  */
   143 
   144 #define ISPUNCT(c) (IS_REAL_ASCII (c)                           \
   145                     ? ((c) > ' ' && (c) < 0177                  \
   146                        && !(((c) >= 'a' && (c) <= 'z')          \
   147                             || ((c) >= 'A' && (c) <= 'Z')       \
   148                             || ((c) >= '0' && (c) <= '9')))     \
   149                     : BUFFER_SYNTAX (c) != Sword)
   150 
   151 #define ISSPACE(c) (BUFFER_SYNTAX (c) == Swhitespace)
   152 
   153 #define ISWORD(c) (BUFFER_SYNTAX (c) == Sword)
   154 
   155 /* Use alloca instead of malloc.  This is because using malloc in
   156    re_search* or re_match* could cause memory leaks when C-g is used
   157    in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
   158    via 'record_xmalloc' which uses 'unwind_protect' to ensure the
   159    memory is freed even in case of non-local exits); also, malloc is
   160    slower and causes storage fragmentation.  On the other hand, malloc
   161    is more portable, and easier to debug.
   162 
   163    Because we sometimes use alloca, some routines have to be macros,
   164    not functions -- 'alloca'-allocated space disappears at the end of the
   165    function it is called in.  */
   166 
   167 /* This may be adjusted in main(), if the stack is successfully grown.  */
   168 ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
   169 /* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca.  */
   170 #define REGEX_USE_SAFE_ALLOCA                                          \
   171   USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca
   172 
   173 /* Assumes a 'char *destination' variable.  */
   174 #define REGEX_REALLOCATE(source, osize, nsize)                          \
   175   (destination = SAFE_ALLOCA (nsize),                                   \
   176    memcpy (destination, source, osize))
   177 
   178 /* True if 'size1' is non-NULL and PTR is pointing anywhere inside
   179    'string1' or just past its end.  This works if PTR is NULL, which is
   180    a good thing.  */
   181 #define FIRST_STRING_P(ptr)                                     \
   182   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
   183 
   184 #define BYTEWIDTH 8 /* In bits.  */
   185 
   186 /* Type of source-pattern and string chars.  */
   187 typedef const unsigned char re_char;
   188 
   189 static void re_compile_fastmap (struct re_pattern_buffer *);
   190 static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp,
   191                                      re_char *string1, ptrdiff_t size1,
   192                                      re_char *string2, ptrdiff_t size2,
   193                                      ptrdiff_t pos,
   194                                      struct re_registers *regs,
   195                                      ptrdiff_t stop);
   196 
   197 /* These are the command codes that appear in compiled regular
   198    expressions.  Some opcodes are followed by argument bytes.  A
   199    command code can specify any interpretation whatsoever for its
   200    arguments.  Zero bytes may appear in the compiled regular expression.  */
   201 
   202 typedef enum
   203 {
   204   no_op = 0,
   205 
   206   /* Succeed right away--no more backtracking.  */
   207   succeed,
   208 
   209         /* Followed by one byte giving n, then by n literal bytes.  */
   210   exactn,
   211 
   212         /* Matches any (more or less) character.  */
   213   anychar,
   214 
   215         /* Matches any one char belonging to specified set.  First
   216            following byte is number of bitmap bytes.  Then come bytes
   217            for a bitmap saying which chars are in.  Bits in each byte
   218            are ordered low-bit-first.  A character is in the set if its
   219            bit is 1.  A character too large to have a bit in the map is
   220            automatically not in the set.
   221 
   222            If the length byte has the 0x80 bit set, then that stuff
   223            is followed by a range table:
   224                2 bytes of flags for character sets (low 8 bits, high 8 bits)
   225                    See RANGE_TABLE_WORK_BITS below.
   226                2 bytes, the number of pairs that follow (up to 32767)
   227                pairs, each 2 multibyte characters,
   228                    each multibyte character represented as 3 bytes.  */
   229   charset,
   230 
   231         /* Same parameters as charset, but match any character that is
   232            not one of those specified.  */
   233   charset_not,
   234 
   235         /* Start remembering the text that is matched, for storing in a
   236            register.  Followed by one byte with the register number, in
   237            the range 0 to one less than the pattern buffer's re_nsub
   238            field.  */
   239   start_memory,
   240 
   241         /* Stop remembering the text that is matched and store it in a
   242            memory register.  Followed by one byte with the register
   243            number, in the range 0 to one less than 're_nsub' in the
   244            pattern buffer.  */
   245   stop_memory,
   246 
   247         /* Match a duplicate of something remembered. Followed by one
   248            byte containing the register number.  */
   249   duplicate,
   250 
   251         /* Fail unless at beginning of line.  */
   252   begline,
   253 
   254         /* Fail unless at end of line.  */
   255   endline,
   256 
   257         /* Succeeds if at beginning of buffer.  */
   258   begbuf,
   259 
   260         /* Analogously, for end of buffer/string.  */
   261   endbuf,
   262 
   263         /* Followed by two byte relative address to which to jump.  */
   264   jump,
   265 
   266         /* Followed by two-byte relative address of place to resume at
   267            in case of failure.  */
   268   on_failure_jump,
   269 
   270         /* Like on_failure_jump, but pushes a placeholder instead of the
   271            current string position when executed.  */
   272   on_failure_keep_string_jump,
   273 
   274         /* Just like 'on_failure_jump', except that it checks that we
   275            don't get stuck in an infinite loop (matching an empty string
   276            indefinitely).  */
   277   on_failure_jump_loop,
   278 
   279         /* Just like 'on_failure_jump_loop', except that it checks for
   280            a different kind of loop (the kind that shows up with non-greedy
   281            operators).  This operation has to be immediately preceded
   282            by a 'no_op'.  */
   283   on_failure_jump_nastyloop,
   284 
   285         /* A smart 'on_failure_jump' used for greedy * and + operators.
   286            It analyzes the loop before which it is put and if the
   287            loop does not require backtracking, it changes itself to
   288            'on_failure_keep_string_jump' and short-circuits the loop,
   289            else it just defaults to changing itself into 'on_failure_jump'.
   290            It assumes that it is pointing to just past a 'jump'.  */
   291   on_failure_jump_smart,
   292 
   293         /* Followed by two-byte relative address and two-byte number n.
   294            After matching N times, jump to the address upon failure.
   295            Does not work if N starts at 0: use on_failure_jump_loop
   296            instead.  */
   297   succeed_n,
   298 
   299         /* Followed by two-byte relative address, and two-byte number n.
   300            Jump to the address N times, then fail.  */
   301   jump_n,
   302 
   303         /* Set the following two-byte relative address to the
   304            subsequent two-byte number.  The address *includes* the two
   305            bytes of number.  */
   306   set_number_at,
   307 
   308   wordbeg,      /* Succeeds if at word beginning.  */
   309   wordend,      /* Succeeds if at word end.  */
   310 
   311   wordbound,    /* Succeeds if at a word boundary.  */
   312   notwordbound, /* Succeeds if not at a word boundary.  */
   313 
   314   symbeg,       /* Succeeds if at symbol beginning.  */
   315   symend,       /* Succeeds if at symbol end.  */
   316 
   317         /* Matches any character whose syntax is specified.  Followed by
   318            a byte which contains a syntax code, e.g., Sword.  */
   319   syntaxspec,
   320 
   321         /* Matches any character whose syntax is not that specified.  */
   322   notsyntaxspec,
   323 
   324   at_dot,       /* Succeeds if at point.  */
   325 
   326   /* Matches any character whose category-set contains the specified
   327      category.  The operator is followed by a byte which contains a
   328      category code (mnemonic ASCII character).  */
   329   categoryspec,
   330 
   331   /* Matches any character whose category-set does not contain the
   332      specified category.  The operator is followed by a byte which
   333      contains the category code (mnemonic ASCII character).  */
   334   notcategoryspec
   335 } re_opcode_t;
   336 
   337 /* Common operations on the compiled pattern.  */
   338 
   339 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
   340 
   341 #define STORE_NUMBER(destination, number)                               \
   342   do {                                                                  \
   343     (destination)[0] = (number) & 0377;                                 \
   344     (destination)[1] = (number) >> 8;                                   \
   345   } while (false)
   346 
   347 /* Same as STORE_NUMBER, except increment DESTINATION to
   348    the byte after where the number is stored.  Therefore, DESTINATION
   349    must be an lvalue.  */
   350 
   351 #define STORE_NUMBER_AND_INCR(destination, number)                      \
   352   do {                                                                  \
   353     STORE_NUMBER (destination, number);                                 \
   354     (destination) += 2;                                                 \
   355   } while (false)
   356 
   357 /* Put into DESTINATION a number stored in two contiguous bytes starting
   358    at SOURCE.  */
   359 
   360 #define EXTRACT_NUMBER(destination, source)                             \
   361   ((destination) = extract_number (source))
   362 
   363 static int
   364 extract_number (re_char *source)
   365 {
   366   signed char leading_byte = source[1];
   367   return leading_byte * 256 + source[0];
   368 }
   369 
   370 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
   371    SOURCE must be an lvalue.  */
   372 
   373 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
   374   ((destination) = extract_number_and_incr (&source))
   375 
   376 static int
   377 extract_number_and_incr (re_char **source)
   378 {
   379   int num = extract_number (*source);
   380   *source += 2;
   381   return num;
   382 }
   383 
   384 /* Store a multibyte character in three contiguous bytes starting
   385    DESTINATION, and increment DESTINATION to the byte after where the
   386    character is stored.  Therefore, DESTINATION must be an lvalue.  */
   387 
   388 #define STORE_CHARACTER_AND_INCR(destination, character)        \
   389   do {                                                          \
   390     (destination)[0] = (character) & 0377;                      \
   391     (destination)[1] = ((character) >> 8) & 0377;               \
   392     (destination)[2] = (character) >> 16;                       \
   393     (destination) += 3;                                         \
   394   } while (false)
   395 
   396 /* Put into DESTINATION a character stored in three contiguous bytes
   397    starting at SOURCE.  */
   398 
   399 #define EXTRACT_CHARACTER(destination, source)  \
   400   do {                                          \
   401     (destination) = ((source)[0]                \
   402                      | ((source)[1] << 8)       \
   403                      | ((source)[2] << 16));    \
   404   } while (false)
   405 
   406 
   407 /* Macros for charset. */
   408 
   409 /* Size of bitmap of charset P in bytes.  P is a start of charset,
   410    i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
   411 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
   412 
   413 /* Nonzero if charset P has range table.  */
   414 #define CHARSET_RANGE_TABLE_EXISTS_P(p)  (((p)[1] & 0x80) != 0)
   415 
   416 /* Return the address of range table of charset P.  But not the start
   417    of table itself, but the before where the number of ranges is
   418    stored.  '2 +' means to skip re_opcode_t and size of bitmap,
   419    and the 2 bytes of flags at the start of the range table.  */
   420 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
   421 
   422 /* Extract the bit flags that start a range table.  */
   423 #define CHARSET_RANGE_TABLE_BITS(p)             \
   424   ((p)[2 + CHARSET_BITMAP_SIZE (p)]             \
   425    + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
   426 
   427 /* Return the address of end of RANGE_TABLE.  COUNT is number of
   428    ranges (which is a pair of (start, end)) in the RANGE_TABLE.  '* 2'
   429    is start of range and end of range.  '* 3' is size of each start
   430    and end.  */
   431 #define CHARSET_RANGE_TABLE_END(range_table, count)     \
   432   ((range_table) + (count) * 2 * 3)
   433 
   434 /* If REGEX_EMACS_DEBUG is defined, print many voluminous messages
   435    (if the variable regex_emacs_debug is positive).  */
   436 
   437 #ifdef REGEX_EMACS_DEBUG
   438 
   439 /* Use standard I/O for debugging.  */
   440 # include "sysstdio.h"
   441 
   442 static int regex_emacs_debug = -100000;
   443 
   444 # define DEBUG_STATEMENT(e) e
   445 # define DEBUG_PRINT(...)                                       \
   446   if (regex_emacs_debug > 0) fprintf (stderr, __VA_ARGS__)
   447 # define DEBUG_COMPILES_ARGUMENTS
   448 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                          \
   449   if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
   450 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                 \
   451   if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
   452 
   453 static void
   454 debug_putchar (int c)
   455 {
   456   if (c >= 32 && c <= 126)
   457     putc (c, stderr);
   458   else
   459     {
   460       unsigned int uc = c;
   461       fprintf (stderr, "{%02x}", uc);
   462     }
   463 }
   464 
   465 /* Print the fastmap in human-readable form.  */
   466 
   467 static void
   468 print_fastmap (char *fastmap)
   469 {
   470   bool was_a_range = false;
   471   int i = 0;
   472 
   473   while (i < (1 << BYTEWIDTH))
   474     {
   475       if (fastmap[i++])
   476         {
   477           was_a_range = false;
   478           debug_putchar (i - 1);
   479           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
   480             {
   481               was_a_range = true;
   482               i++;
   483             }
   484           if (was_a_range)
   485             {
   486               debug_putchar ('-');
   487               debug_putchar (i - 1);
   488             }
   489         }
   490     }
   491   putc ('\n', stderr);
   492 }
   493 
   494 
   495 /* Print a compiled pattern string in human-readable form, starting at
   496    the START pointer into it and ending just before the pointer END.  */
   497 
   498 static void
   499 print_partial_compiled_pattern (re_char *start, re_char *end)
   500 {
   501   int mcnt, mcnt2;
   502   re_char *p = start;
   503   re_char *pend = end;
   504 
   505   if (start == NULL)
   506     {
   507       fputs ("(null)\n", stderr);
   508       return;
   509     }
   510 
   511   /* Loop over pattern commands.  */
   512   while (p < pend)
   513     {
   514       fprintf (stderr, "%td:\t", p - start);
   515 
   516       switch ((re_opcode_t) *p++)
   517         {
   518         case no_op:
   519           fputs ("/no_op", stderr);
   520           break;
   521 
   522         case succeed:
   523           fputs ("/succeed", stderr);
   524           break;
   525 
   526         case exactn:
   527           mcnt = *p++;
   528           fprintf (stderr, "/exactn/%d", mcnt);
   529           do
   530             {
   531               debug_putchar ('/');
   532               debug_putchar (*p++);
   533             }
   534           while (--mcnt);
   535           break;
   536 
   537         case start_memory:
   538           fprintf (stderr, "/start_memory/%d", *p++);
   539           break;
   540 
   541         case stop_memory:
   542           fprintf (stderr, "/stop_memory/%d", *p++);
   543           break;
   544 
   545         case duplicate:
   546           fprintf (stderr, "/duplicate/%d", *p++);
   547           break;
   548 
   549         case anychar:
   550           fputs ("/anychar", stderr);
   551           break;
   552 
   553         case charset:
   554         case charset_not:
   555           {
   556             int c, last = -100;
   557             bool in_range = false;
   558             int length = CHARSET_BITMAP_SIZE (p - 1);
   559             bool has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
   560 
   561             fprintf (stderr, "/charset [%s",
   562                      (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
   563 
   564             if (p + (*p & 0x7f) >= pend)
   565               fputs (" !extends past end of pattern! ", stderr);
   566 
   567             for (c = 0; c < 256; c++)
   568               if (c / 8 < length
   569                   && (p[1 + (c/8)] & (1 << (c % 8))))
   570                 {
   571                   /* Are we starting a range?  */
   572                   if (last + 1 == c && ! in_range)
   573                     {
   574                       debug_putchar ('-');
   575                       in_range = true;
   576                     }
   577                   /* Have we broken a range?  */
   578                   else if (last + 1 != c && in_range)
   579                     {
   580                       debug_putchar (last);
   581                       in_range = false;
   582                     }
   583 
   584                   if (! in_range)
   585                     debug_putchar (c);
   586 
   587                   last = c;
   588               }
   589 
   590             if (in_range)
   591               debug_putchar (last);
   592 
   593             debug_putchar (']');
   594 
   595             p += 1 + length;
   596 
   597             if (has_range_table)
   598               {
   599                 int count;
   600                 fputs ("has-range-table", stderr);
   601 
   602                 /* ??? Should print the range table; for now, just skip it.  */
   603                 p += 2;         /* skip range table bits */
   604                 EXTRACT_NUMBER_AND_INCR (count, p);
   605                 p = CHARSET_RANGE_TABLE_END (p, count);
   606               }
   607           }
   608           break;
   609 
   610         case begline:
   611           fputs ("/begline", stderr);
   612           break;
   613 
   614         case endline:
   615           fputs ("/endline", stderr);
   616           break;
   617 
   618         case on_failure_jump:
   619           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   620           fprintf (stderr, "/on_failure_jump to %td", p + mcnt - start);
   621           break;
   622 
   623         case on_failure_keep_string_jump:
   624           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   625           fprintf (stderr, "/on_failure_keep_string_jump to %td",
   626                    p + mcnt - start);
   627           break;
   628 
   629         case on_failure_jump_nastyloop:
   630           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   631           fprintf (stderr, "/on_failure_jump_nastyloop to %td",
   632                    p + mcnt - start);
   633           break;
   634 
   635         case on_failure_jump_loop:
   636           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   637           fprintf (stderr, "/on_failure_jump_loop to %td",
   638                    p + mcnt - start);
   639           break;
   640 
   641         case on_failure_jump_smart:
   642           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   643           fprintf (stderr, "/on_failure_jump_smart to %td",
   644                    p + mcnt - start);
   645           break;
   646 
   647         case jump:
   648           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   649           fprintf (stderr, "/jump to %td", p + mcnt - start);
   650           break;
   651 
   652         case succeed_n:
   653           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   654           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
   655           fprintf (stderr, "/succeed_n to %td, %d times",
   656                    p - 2 + mcnt - start, mcnt2);
   657           break;
   658 
   659         case jump_n:
   660           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   661           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
   662           fprintf (stderr, "/jump_n to %td, %d times",
   663                    p - 2 + mcnt - start, mcnt2);
   664           break;
   665 
   666         case set_number_at:
   667           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   668           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
   669           fprintf (stderr, "/set_number_at location %td to %d",
   670                    p - 2 + mcnt - start, mcnt2);
   671           break;
   672 
   673         case wordbound:
   674           fputs ("/wordbound", stderr);
   675           break;
   676 
   677         case notwordbound:
   678           fputs ("/notwordbound", stderr);
   679           break;
   680 
   681         case wordbeg:
   682           fputs ("/wordbeg", stderr);
   683           break;
   684 
   685         case wordend:
   686           fputs ("/wordend", stderr);
   687           break;
   688 
   689         case symbeg:
   690           fputs ("/symbeg", stderr);
   691           break;
   692 
   693         case symend:
   694           fputs ("/symend", stderr);
   695           break;
   696 
   697         case syntaxspec:
   698           fputs ("/syntaxspec", stderr);
   699           mcnt = *p++;
   700           fprintf (stderr, "/%d", mcnt);
   701           break;
   702 
   703         case notsyntaxspec:
   704           fputs ("/notsyntaxspec", stderr);
   705           mcnt = *p++;
   706           fprintf (stderr, "/%d", mcnt);
   707           break;
   708 
   709         case at_dot:
   710           fputs ("/at_dot", stderr);
   711           break;
   712 
   713         case categoryspec:
   714           fputs ("/categoryspec", stderr);
   715           mcnt = *p++;
   716           fprintf (stderr, "/%d", mcnt);
   717           break;
   718 
   719         case notcategoryspec:
   720           fputs ("/notcategoryspec", stderr);
   721           mcnt = *p++;
   722           fprintf (stderr, "/%d", mcnt);
   723           break;
   724 
   725         case begbuf:
   726           fputs ("/begbuf", stderr);
   727           break;
   728 
   729         case endbuf:
   730           fputs ("/endbuf", stderr);
   731           break;
   732 
   733         default:
   734           fprintf (stderr, "?%d", *(p-1));
   735         }
   736 
   737       putc ('\n', stderr);
   738     }
   739 
   740   fprintf (stderr, "%td:\tend of pattern.\n", p - start);
   741 }
   742 
   743 
   744 static void
   745 print_compiled_pattern (struct re_pattern_buffer *bufp)
   746 {
   747   re_char *buffer = bufp->buffer;
   748 
   749   print_partial_compiled_pattern (buffer, buffer + bufp->used);
   750   fprintf (stderr, "%td bytes used/%td bytes allocated.\n",
   751            bufp->used, bufp->allocated);
   752 
   753   if (bufp->fastmap_accurate && bufp->fastmap)
   754     {
   755       fputs ("fastmap: ", stderr);
   756       print_fastmap (bufp->fastmap);
   757     }
   758 
   759   fprintf (stderr, "re_nsub: %td\t", bufp->re_nsub);
   760   fprintf (stderr, "regs_alloc: %d\t", bufp->regs_allocated);
   761   fprintf (stderr, "can_be_null: %d\n", bufp->can_be_null);
   762   /* Perhaps we should print the translate table?  */
   763 }
   764 
   765 
   766 static void
   767 print_double_string (re_char *where, re_char *string1, ptrdiff_t size1,
   768                      re_char *string2, ptrdiff_t size2)
   769 {
   770   if (where == NULL)
   771     fputs ("(null)", stderr);
   772   else
   773     {
   774       int i;
   775       if (FIRST_STRING_P (where))
   776         {
   777           for (i = 0; i < string1 + size1 - where; i++)
   778             debug_putchar (where[i]);
   779           where = string2;
   780         }
   781 
   782       for (i = 0; i < string2 + size2 - where; i++)
   783         debug_putchar (where[i]);
   784     }
   785 }
   786 
   787 #else /* not REGEX_EMACS_DEBUG */
   788 
   789 # define DEBUG_STATEMENT(e)
   790 # define DEBUG_PRINT(...)
   791 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
   792 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
   793 
   794 #endif /* not REGEX_EMACS_DEBUG */
   795 
   796 typedef enum
   797 {
   798   REG_NOERROR = 0,      /* Success.  */
   799   REG_NOMATCH,          /* Didn't find a match (for regexec).  */
   800 
   801   /* POSIX regcomp return error codes.  (In the order listed in the
   802      standard.)  An older version of this code supported the POSIX
   803      API; this version continues to use these names internally.  */
   804   REG_BADPAT,           /* Invalid pattern.  */
   805   REG_ECOLLATE,         /* Not implemented.  */
   806   REG_ECTYPE,           /* Invalid character class name.  */
   807   REG_EESCAPE,          /* Trailing backslash.  */
   808   REG_ESUBREG,          /* Invalid back reference.  */
   809   REG_EBRACK,           /* Unmatched left bracket.  */
   810   REG_EPAREN,           /* Parenthesis imbalance.  */
   811   REG_EBRACE,           /* Unmatched \{.  */
   812   REG_BADBR,            /* Invalid contents of \{\}.  */
   813   REG_ERANGE,           /* Invalid range end.  */
   814   REG_ESPACE,           /* Ran out of memory.  */
   815   REG_BADRPT,           /* No preceding re for repetition op.  */
   816 
   817   /* Error codes we've added.  */
   818   REG_EEND,             /* Premature end.  */
   819   REG_ESIZE,            /* Compiled pattern bigger than 2^16 bytes.  */
   820   REG_ERPAREN,          /* Unmatched ) or \); not returned from regcomp.  */
   821   REG_ERANGEX,          /* Range striding over charsets.  */
   822   REG_ESIZEBR           /* n or m too big in \{n,m\} */
   823 } reg_errcode_t;
   824 
   825 static const char *re_error_msgid[] =
   826   {
   827    [REG_NOERROR] = "Success",
   828    [REG_NOMATCH] = "No match",
   829    [REG_BADPAT] = "Invalid regular expression",
   830    [REG_ECOLLATE] = "Invalid collation character",
   831    [REG_ECTYPE] = "Invalid character class name",
   832    [REG_EESCAPE] = "Trailing backslash",
   833    [REG_ESUBREG] = "Invalid back reference",
   834    [REG_EBRACK] = "Unmatched [ or [^",
   835    [REG_EPAREN] = "Unmatched ( or \\(",
   836    [REG_EBRACE] = "Unmatched \\{",
   837    [REG_BADBR] = "Invalid content of \\{\\}",
   838    [REG_ERANGE] = "Invalid range end",
   839    [REG_ESPACE] = "Memory exhausted",
   840    [REG_BADRPT] = "Invalid preceding regular expression",
   841    [REG_EEND] = "Premature end of regular expression",
   842    [REG_ESIZE] = "Regular expression too big",
   843    [REG_ERPAREN] = "Unmatched ) or \\)",
   844    [REG_ERANGEX ] = "Range striding over charsets",
   845    [REG_ESIZEBR ] = "Invalid content of \\{\\}",
   846   };
   847 
   848 /* For 'regs_allocated'.  */
   849 enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED };
   850 
   851 /* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
   852    're_match_2' returns information about at least this many registers
   853    the first time a 'regs' structure is passed.  */
   854 enum { RE_NREGS = 30 };
   855 
   856 /* The searching and matching functions allocate memory for the
   857    failure stack and registers.  Otherwise searching and matching
   858    routines would have much smaller memory resources at their
   859    disposal, and therefore might fail to handle complex regexps.  */
   860 
   861 /* Failure stack declarations and macros; both re_compile_fastmap and
   862    re_match_2 use a failure stack.  These have to be macros because of
   863    SAFE_ALLOCA.  */
   864 
   865 
   866 /* Approximate number of failure points for which to initially allocate space
   867    when matching.  If this number is exceeded, we allocate more
   868    space, so it is not a hard limit.  */
   869 #define INIT_FAILURE_ALLOC 20
   870 
   871 /* Roughly the maximum number of failure points on the stack.  Would be
   872    exactly that if failure always used TYPICAL_FAILURE_SIZE items.
   873    This is a variable only so users of regex can assign to it; we never
   874    change it ourselves.  We always multiply it by TYPICAL_FAILURE_SIZE
   875    before using it, so it should probably be a byte-count instead.  */
   876 /* Note that 4400 was enough to cause a crash on Alpha OSF/1,
   877    whose default stack limit is 2mb.  In order for a larger
   878    value to work reliably, you have to try to make it accord
   879    with the process stack limit.  */
   880 ptrdiff_t emacs_re_max_failures = 40000;
   881 
   882 union fail_stack_elt
   883 {
   884   re_char *pointer;
   885   intptr_t integer;
   886 };
   887 
   888 typedef union fail_stack_elt fail_stack_elt_t;
   889 
   890 typedef struct
   891 {
   892   fail_stack_elt_t *stack;
   893   ptrdiff_t size;
   894   ptrdiff_t avail;      /* Offset of next open position.  */
   895   ptrdiff_t frame;      /* Offset of the cur constructed frame.  */
   896 } fail_stack_type;
   897 
   898 #define FAIL_STACK_EMPTY()     (fail_stack.frame == 0)
   899 
   900 
   901 /* Define macros to initialize and free the failure stack.  */
   902 
   903 #define INIT_FAIL_STACK()                                               \
   904   do {                                                                  \
   905     fail_stack.stack =                                                  \
   906       SAFE_ALLOCA (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE            \
   907                    * sizeof (fail_stack_elt_t));                        \
   908     fail_stack.size = INIT_FAILURE_ALLOC;                               \
   909     fail_stack.avail = 0;                                               \
   910     fail_stack.frame = 0;                                               \
   911   } while (false)
   912 
   913 
   914 /* Double the size of FAIL_STACK, up to a limit
   915    which allows approximately 'emacs_re_max_failures' items.
   916 
   917    Return 1 if succeeds, and 0 if either ran out of memory
   918    allocating space for it or it was already too large.
   919 
   920    REGEX_REALLOCATE requires 'destination' be declared.   */
   921 
   922 /* Factor to increase the failure stack size by.
   923    This used to be 2, but 2 was too wasteful
   924    because the old discarded stacks added up to as much space
   925    were as ultimate, maximum-size stack.  */
   926 #define FAIL_STACK_GROWTH_FACTOR 4
   927 
   928 #define GROW_FAIL_STACK(fail_stack)                                     \
   929   (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE)        \
   930    ? 0                                                                  \
   931    : ((fail_stack).stack                                                \
   932       = REGEX_REALLOCATE ((fail_stack).stack,                           \
   933           (fail_stack).avail * sizeof (fail_stack_elt_t),               \
   934           min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE,                  \
   935                ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))          \
   936           * sizeof (fail_stack_elt_t)),                                 \
   937       ((fail_stack).size                                                \
   938        = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE,            \
   939                ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))),       \
   940       1))
   941 
   942 
   943 /* Push a pointer value onto the failure stack.
   944    Assumes the variable 'fail_stack'.  Probably should only
   945    be called from within 'PUSH_FAILURE_POINT'.  */
   946 #define PUSH_FAILURE_POINTER(item)                                      \
   947   fail_stack.stack[fail_stack.avail++].pointer = (item)
   948 
   949 /* This pushes an integer-valued item onto the failure stack.
   950    Assumes the variable 'fail_stack'.  Probably should only
   951    be called from within 'PUSH_FAILURE_POINT'.  */
   952 #define PUSH_FAILURE_INT(item)                                  \
   953   fail_stack.stack[fail_stack.avail++].integer = (item)
   954 
   955 /* These POP... operations complement the PUSH... operations.
   956    All assume that 'fail_stack' is nonempty.  */
   957 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
   958 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
   959 
   960 /* Individual items aside from the registers.  */
   961 #define NUM_NONREG_ITEMS 3
   962 
   963 /* Used to examine the stack (to detect infinite loops).  */
   964 #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
   965 #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
   966 #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
   967 #define TOP_FAILURE_HANDLE() fail_stack.frame
   968 
   969 
   970 #define ENSURE_FAIL_STACK(space)                                        \
   971 while (REMAINING_AVAIL_SLOTS <= space) {                                \
   972   if (!GROW_FAIL_STACK (fail_stack))                                    \
   973     {                                                                   \
   974       unbind_to (count, Qnil);                                          \
   975       SAFE_FREE ();                                                     \
   976       return -2;                                                        \
   977     }                                                                   \
   978   DEBUG_PRINT ("\n  Doubled stack; size now: %td\n", fail_stack.size);  \
   979   DEBUG_PRINT ("         slots available: %td\n", REMAINING_AVAIL_SLOTS);\
   980 }
   981 
   982 /* Push register NUM onto the stack.  */
   983 #define PUSH_FAILURE_REG(num)                                           \
   984 do {                                                                    \
   985   char *destination;                                                    \
   986   intptr_t n = num;                                                     \
   987   eassert (0 < n && n < num_regs);                                      \
   988   eassert (REG_UNSET (regstart[n]) <= REG_UNSET (regend[n]));           \
   989   ENSURE_FAIL_STACK(3);                                                 \
   990   DEBUG_PRINT ("    Push reg %"PRIdPTR" (spanning %p -> %p)\n",         \
   991                n, regstart[n], regend[n]);                              \
   992   PUSH_FAILURE_POINTER (regstart[n]);                                   \
   993   PUSH_FAILURE_POINTER (regend[n]);                                     \
   994   PUSH_FAILURE_INT (n);                                                 \
   995 } while (false)
   996 
   997 /* Change the counter's value to VAL, but make sure that it will
   998    be reset when backtracking.  */
   999 #define PUSH_NUMBER(ptr,val)                                            \
  1000 do {                                                                    \
  1001   char *destination;                                                    \
  1002   int c;                                                                \
  1003   ENSURE_FAIL_STACK(3);                                                 \
  1004   EXTRACT_NUMBER (c, ptr);                                              \
  1005   DEBUG_PRINT ("    Push number %p = %d -> %d\n", ptr, c, val);         \
  1006   PUSH_FAILURE_INT (c);                                                 \
  1007   PUSH_FAILURE_POINTER (ptr);                                           \
  1008   PUSH_FAILURE_INT (-1);                                                \
  1009   STORE_NUMBER (ptr, val);                                              \
  1010 } while (false)
  1011 
  1012 /* Pop a saved register off the stack.  */
  1013 #define POP_FAILURE_REG_OR_COUNT()                                      \
  1014 do {                                                                    \
  1015   intptr_t pfreg = POP_FAILURE_INT ();                                  \
  1016   if (pfreg == -1)                                                      \
  1017     {                                                                   \
  1018       /* It's a counter.  */                                            \
  1019       /* Discard 'const', making re_search non-reentrant.  */           \
  1020       unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER ();    \
  1021       pfreg = POP_FAILURE_INT ();                                       \
  1022       STORE_NUMBER (ptr, pfreg);                                        \
  1023       DEBUG_PRINT ("     Pop counter %p = %"PRIdPTR"\n", ptr, pfreg);   \
  1024     }                                                                   \
  1025   else                                                                  \
  1026     {                                                                   \
  1027       eassert (0 < pfreg && pfreg < num_regs);                          \
  1028       regend[pfreg] = POP_FAILURE_POINTER ();                           \
  1029       regstart[pfreg] = POP_FAILURE_POINTER ();                         \
  1030       eassert (REG_UNSET (regstart[pfreg]) <= REG_UNSET (regend[pfreg])); \
  1031       DEBUG_PRINT ("     Pop reg %ld (spanning %p -> %p)\n",            \
  1032                    pfreg, regstart[pfreg], regend[pfreg]);              \
  1033     }                                                                   \
  1034 } while (false)
  1035 
  1036 /* Check that we are not stuck in an infinite loop.  */
  1037 #define CHECK_INFINITE_LOOP(pat_cur, string_place)                      \
  1038 do {                                                                    \
  1039   ptrdiff_t failure = TOP_FAILURE_HANDLE ();                            \
  1040   /* Check for infinite matching loops */                               \
  1041   while (failure > 0                                                    \
  1042          && (FAILURE_STR (failure) == string_place                      \
  1043              || FAILURE_STR (failure) == NULL))                         \
  1044     {                                                                   \
  1045       eassert (FAILURE_PAT (failure) >= bufp->buffer                    \
  1046                && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);  \
  1047       if (FAILURE_PAT (failure) == pat_cur)                             \
  1048         {                                                               \
  1049           cycle = true;                                                 \
  1050           break;                                                        \
  1051         }                                                               \
  1052       DEBUG_PRINT ("  Other pattern: %p\n", FAILURE_PAT (failure));     \
  1053       failure = NEXT_FAILURE_HANDLE(failure);                           \
  1054     }                                                                   \
  1055   DEBUG_PRINT ("  Other string: %p\n", FAILURE_STR (failure));          \
  1056 } while (false)
  1057 
  1058 /* Push the information about the state we will need
  1059    if we ever fail back to it.
  1060 
  1061    Requires variables fail_stack, regstart, regend and
  1062    num_regs be declared.  GROW_FAIL_STACK requires 'destination' be
  1063    declared.
  1064 
  1065    Does 'return FAILURE_CODE' if runs out of memory.  */
  1066 
  1067 #define PUSH_FAILURE_POINT(pattern, string_place)                       \
  1068 do {                                                                    \
  1069   char *destination;                                                    \
  1070   DEBUG_STATEMENT (nfailure_points_pushed++);                           \
  1071   DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n");                              \
  1072   DEBUG_PRINT ("  Before push, next avail: %td\n", fail_stack.avail);   \
  1073   DEBUG_PRINT ("                        size: %td\n", fail_stack.size); \
  1074                                                                         \
  1075   ENSURE_FAIL_STACK (NUM_NONREG_ITEMS);                                 \
  1076                                                                         \
  1077   DEBUG_PRINT ("\n");                                                   \
  1078                                                                         \
  1079   DEBUG_PRINT ("  Push frame index: %td\n", fail_stack.frame);          \
  1080   PUSH_FAILURE_INT (fail_stack.frame);                                  \
  1081                                                                         \
  1082   DEBUG_PRINT ("  Push string %p: \"", string_place);                   \
  1083   DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
  1084   DEBUG_PRINT ("\"\n");                                                 \
  1085   PUSH_FAILURE_POINTER (string_place);                                  \
  1086                                                                         \
  1087   DEBUG_PRINT ("  Push pattern %p: ", pattern);                         \
  1088   DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend);                   \
  1089   PUSH_FAILURE_POINTER (pattern);                                       \
  1090                                                                         \
  1091   /* Close the frame by moving the frame pointer past it.  */           \
  1092   fail_stack.frame = fail_stack.avail;                                  \
  1093 } while (false)
  1094 
  1095 /* Estimate the size of data pushed by a typical failure stack entry.
  1096    An estimate is all we need, because all we use this for
  1097    is to choose a limit for how big to make the failure stack.  */
  1098 /* BEWARE, the value `20' is hard-coded in emacs.c:main().  */
  1099 #define TYPICAL_FAILURE_SIZE 20
  1100 
  1101 /* How many items can still be added to the stack without overflowing it.  */
  1102 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
  1103 
  1104 
  1105 /* Pop what PUSH_FAIL_STACK pushes.
  1106 
  1107    Restore into the parameters, all of which should be lvalues:
  1108      STR -- the saved data position.
  1109      PAT -- the saved pattern position.
  1110      REGSTART, REGEND -- arrays of string positions.
  1111 
  1112    Also assume the variables FAIL_STACK and (if debugging) BUFP, PEND,
  1113    STRING1, SIZE1, STRING2, and SIZE2.  */
  1114 
  1115 #define POP_FAILURE_POINT(str, pat)                                     \
  1116 do {                                                                    \
  1117   eassert (!FAIL_STACK_EMPTY ());                                       \
  1118                                                                         \
  1119   /* Remove failure points and point to how many regs pushed.  */       \
  1120   DEBUG_PRINT ("POP_FAILURE_POINT:\n");                                 \
  1121   DEBUG_PRINT ("  Before pop, next avail: %td\n", fail_stack.avail);    \
  1122   DEBUG_PRINT ("                     size: %td\n", fail_stack.size);    \
  1123                                                                         \
  1124   /* Pop the saved registers.  */                                       \
  1125   while (fail_stack.frame < fail_stack.avail)                           \
  1126     POP_FAILURE_REG_OR_COUNT ();                                        \
  1127                                                                         \
  1128   pat = POP_FAILURE_POINTER ();                                         \
  1129   DEBUG_PRINT ("  Popping pattern %p: ", pat);                          \
  1130   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
  1131                                                                         \
  1132   /* If the saved string location is NULL, it came from an              \
  1133      on_failure_keep_string_jump opcode, and we want to throw away the  \
  1134      saved NULL, thus retaining our current position in the string.  */ \
  1135   str = POP_FAILURE_POINTER ();                                         \
  1136   DEBUG_PRINT ("  Popping string %p: \"", str);                         \
  1137   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
  1138   DEBUG_PRINT ("\"\n");                                                 \
  1139                                                                         \
  1140   fail_stack.frame = POP_FAILURE_INT ();                                \
  1141   DEBUG_PRINT ("  Popping  frame index: %td\n", fail_stack.frame);      \
  1142                                                                         \
  1143   eassert (fail_stack.avail >= 0);                                      \
  1144   eassert (fail_stack.frame <= fail_stack.avail);                       \
  1145                                                                         \
  1146   DEBUG_STATEMENT (nfailure_points_popped++);                           \
  1147 } while (false) /* POP_FAILURE_POINT */
  1148 
  1149 
  1150 
  1151 /* Registers are set to a sentinel when they haven't yet matched.  */
  1152 #define REG_UNSET(e) ((e) == NULL)
  1153 
  1154 /* Subroutine declarations and macros for regex_compile.  */
  1155 
  1156 static reg_errcode_t regex_compile (re_char *pattern, ptrdiff_t size,
  1157                                     bool posix_backtracking,
  1158                                     const char *whitespace_regexp,
  1159                                     struct re_pattern_buffer *bufp);
  1160 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
  1161 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
  1162 static void insert_op1 (re_opcode_t op, unsigned char *loc,
  1163                         int arg, unsigned char *end);
  1164 static void insert_op2 (re_opcode_t op, unsigned char *loc,
  1165                         int arg1, int arg2, unsigned char *end);
  1166 static bool at_begline_loc_p (re_char *pattern, re_char *p);
  1167 static bool at_endline_loc_p (re_char *p, re_char *pend);
  1168 static re_char *skip_one_char (re_char *p);
  1169 static int analyze_first (re_char *p, re_char *pend,
  1170                           char *fastmap, bool multibyte);
  1171 
  1172 /* Fetch the next character in the uncompiled pattern, with no
  1173    translation.  */
  1174 #define PATFETCH(c)                                                     \
  1175   do {                                                                  \
  1176     int len;                                                            \
  1177     if (p == pend) return REG_EEND;                                     \
  1178     c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte);                  \
  1179     p += len;                                                           \
  1180   } while (false)
  1181 
  1182 
  1183 #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
  1184 #define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d))
  1185 
  1186 /* Macros for outputting the compiled pattern into 'buffer'.  */
  1187 
  1188 /* If the buffer isn't allocated when it comes in, use this.  */
  1189 #define INIT_BUF_SIZE  32
  1190 
  1191 /* Ensure at least N more bytes of space in buffer.  */
  1192 #define GET_BUFFER_SPACE(n)                                             \
  1193     if (bufp->buffer + bufp->allocated - b < (n))                       \
  1194       EXTEND_BUFFER ((n) - (bufp->buffer + bufp->allocated - b))
  1195 
  1196 /* Ensure one more byte of buffer space and then add C to it.  */
  1197 #define BUF_PUSH(c)                                                     \
  1198   do {                                                                  \
  1199     GET_BUFFER_SPACE (1);                                               \
  1200     *b++ = (unsigned char) (c);                                         \
  1201   } while (false)
  1202 
  1203 
  1204 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
  1205 #define BUF_PUSH_2(c1, c2)                                              \
  1206   do {                                                                  \
  1207     GET_BUFFER_SPACE (2);                                               \
  1208     *b++ = (unsigned char) (c1);                                        \
  1209     *b++ = (unsigned char) (c2);                                        \
  1210   } while (false)
  1211 
  1212 
  1213 /* Store a jump with opcode OP at LOC to location TO.  Store a
  1214    relative address offset by the three bytes the jump itself occupies.  */
  1215 #define STORE_JUMP(op, loc, to) \
  1216   store_op1 (op, loc, (to) - (loc) - 3)
  1217 
  1218 /* Likewise, for a two-argument jump.  */
  1219 #define STORE_JUMP2(op, loc, to, arg) \
  1220   store_op2 (op, loc, (to) - (loc) - 3, arg)
  1221 
  1222 /* Like 'STORE_JUMP', but for inserting.  Assume B is the buffer end.  */
  1223 #define INSERT_JUMP(op, loc, to) \
  1224   insert_op1 (op, loc, (to) - (loc) - 3, b)
  1225 
  1226 /* Like 'STORE_JUMP2', but for inserting.  Assume B is the buffer end.  */
  1227 #define INSERT_JUMP2(op, loc, to, arg) \
  1228   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
  1229 
  1230 
  1231 /* This is not an arbitrary limit: the arguments which represent offsets
  1232    into the pattern are two bytes long.  So if 2^15 bytes turns out to
  1233    be too small, many things would have to change.  */
  1234 # define MAX_BUF_SIZE (1 << 15)
  1235 
  1236 /* Extend the buffer by at least N bytes via realloc and
  1237    reset the pointers that pointed into the old block to point to the
  1238    correct places in the new one.  If extending the buffer results in it
  1239    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
  1240 #define EXTEND_BUFFER(n)                                                \
  1241   do {                                                                  \
  1242     ptrdiff_t requested_extension = n;                                  \
  1243     unsigned char *old_buffer = bufp->buffer;                           \
  1244     if (MAX_BUF_SIZE - bufp->allocated < requested_extension)           \
  1245       return REG_ESIZE;                                                 \
  1246     ptrdiff_t b_off = b - old_buffer;                                   \
  1247     ptrdiff_t begalt_off = begalt - old_buffer;                         \
  1248     ptrdiff_t fixup_alt_jump_off =                                      \
  1249       fixup_alt_jump ? fixup_alt_jump - old_buffer : -1;                \
  1250     ptrdiff_t laststart_off = laststart ? laststart - old_buffer : -1;  \
  1251     ptrdiff_t pending_exact_off =                                       \
  1252       pending_exact ? pending_exact - old_buffer : -1;                  \
  1253     bufp->buffer = xpalloc (bufp->buffer, &bufp->allocated,             \
  1254                             requested_extension, MAX_BUF_SIZE, 1);      \
  1255     unsigned char *new_buffer = bufp->buffer;                           \
  1256     b = new_buffer + b_off;                                             \
  1257     begalt = new_buffer + begalt_off;                                   \
  1258     if (0 <= fixup_alt_jump_off)                                        \
  1259       fixup_alt_jump = new_buffer + fixup_alt_jump_off;                 \
  1260     if (0 <= laststart_off)                                             \
  1261       laststart = new_buffer + laststart_off;                           \
  1262     if (0 <= pending_exact_off)                                         \
  1263       pending_exact = new_buffer + pending_exact_off;                   \
  1264   } while (false)
  1265 
  1266 
  1267 /* Since we have one byte reserved for the register number argument to
  1268    {start,stop}_memory, the maximum number of groups we can report
  1269    things about is what fits in that byte.  */
  1270 #define MAX_REGNUM 255
  1271 
  1272 /* But patterns can have more than 'MAX_REGNUM' registers.  Just
  1273    ignore the excess.  */
  1274 typedef int regnum_t;
  1275 
  1276 
  1277 /* Macros for the compile stack.  */
  1278 
  1279 typedef long pattern_offset_t;
  1280 verify (LONG_MIN <= -(MAX_BUF_SIZE - 1) && MAX_BUF_SIZE - 1 <= LONG_MAX);
  1281 
  1282 typedef struct
  1283 {
  1284   pattern_offset_t begalt_offset;
  1285   pattern_offset_t fixup_alt_jump;
  1286   pattern_offset_t laststart_offset;
  1287   regnum_t regnum;
  1288 } compile_stack_elt_t;
  1289 
  1290 
  1291 typedef struct
  1292 {
  1293   compile_stack_elt_t *stack;
  1294   ptrdiff_t size;
  1295   ptrdiff_t avail;              /* Offset of next open position.  */
  1296 } compile_stack_type;
  1297 
  1298 
  1299 #define INIT_COMPILE_STACK_SIZE 32
  1300 
  1301 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
  1302 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
  1303 
  1304 /* The next available element.  */
  1305 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
  1306 
  1307 /* Structure to manage work area for range table.  */
  1308 struct range_table_work_area
  1309 {
  1310   int *table;                   /* actual work area.  */
  1311   int allocated;                /* allocated size for work area in bytes.  */
  1312   int used;                     /* actually used size in words.  */
  1313   int bits;                     /* flag to record character classes */
  1314 };
  1315 
  1316 /* Make sure that WORK_AREA can hold N more multibyte characters.
  1317    If it can't get the space, it returns from the surrounding function.  */
  1318 
  1319 #define EXTEND_RANGE_TABLE(work_area, n)                                \
  1320   do {                                                                  \
  1321     if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
  1322       {                                                                 \
  1323         extend_range_table_work_area (&work_area);                      \
  1324         if ((work_area).table == 0)                                     \
  1325           return (REG_ESPACE);                                          \
  1326       }                                                                 \
  1327   } while (false)
  1328 
  1329 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)           \
  1330   (work_area).bits |= (bit)
  1331 
  1332 /* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
  1333 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)    \
  1334   do {                                                                  \
  1335     EXTEND_RANGE_TABLE ((work_area), 2);                                \
  1336     (work_area).table[(work_area).used++] = (range_start);              \
  1337     (work_area).table[(work_area).used++] = (range_end);                \
  1338   } while (false)
  1339 
  1340 /* Free allocated memory for WORK_AREA.  */
  1341 #define FREE_RANGE_TABLE_WORK_AREA(work_area)   \
  1342   do {                                          \
  1343     if ((work_area).table)                      \
  1344       xfree ((work_area).table);                        \
  1345   } while (false)
  1346 
  1347 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) \
  1348   ((work_area).used = 0, (work_area).bits = 0)
  1349 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
  1350 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
  1351 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
  1352 
  1353 /* Bits used to implement the multibyte-part of the various character classes
  1354    such as [:alnum:] in a charset's range table.  The code currently assumes
  1355    that only the low 16 bits are used.  */
  1356 #define BIT_WORD        0x1
  1357 #define BIT_LOWER       0x2
  1358 #define BIT_PUNCT       0x4
  1359 #define BIT_SPACE       0x8
  1360 #define BIT_UPPER       0x10
  1361 #define BIT_MULTIBYTE   0x20
  1362 #define BIT_ALPHA       0x40
  1363 #define BIT_ALNUM       0x80
  1364 #define BIT_GRAPH       0x100
  1365 #define BIT_PRINT       0x200
  1366 #define BIT_BLANK       0x400
  1367 
  1368 
  1369 /* Set the bit for character C in a list.  */
  1370 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
  1371 
  1372 
  1373 /* Store characters in the range FROM to TO in the bitmap at B (for
  1374    ASCII and unibyte characters) and WORK_AREA (for multibyte
  1375    characters) while translating them and paying attention to the
  1376    continuity of translated characters.
  1377 
  1378    Implementation note: It is better to implement these fairly big
  1379    macros by a function, but it's not that easy because macros called
  1380    in this macro assume various local variables already declared.  */
  1381 
  1382 /* Both FROM and TO are ASCII characters.  */
  1383 
  1384 #define SETUP_ASCII_RANGE(work_area, FROM, TO)                  \
  1385   do {                                                          \
  1386     int C0, C1;                                                 \
  1387                                                                 \
  1388     for (C0 = (FROM); C0 <= (TO); C0++)                         \
  1389       {                                                         \
  1390         C1 = TRANSLATE (C0);                                    \
  1391         if (! ASCII_CHAR_P (C1))                                \
  1392           {                                                     \
  1393             SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);    \
  1394             if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0)             \
  1395               C1 = C0;                                          \
  1396           }                                                     \
  1397         SET_LIST_BIT (C1);                                      \
  1398       }                                                         \
  1399   } while (false)
  1400 
  1401 
  1402 /* Both FROM and TO are unibyte characters (0x80..0xFF).  */
  1403 
  1404 #define SETUP_UNIBYTE_RANGE(work_area, FROM, TO)                               \
  1405   do {                                                                         \
  1406     int C0, C1, C2, I;                                                         \
  1407     int USED = RANGE_TABLE_WORK_USED (work_area);                              \
  1408                                                                                \
  1409     for (C0 = (FROM); C0 <= (TO); C0++)                                        \
  1410       {                                                                        \
  1411         C1 = RE_CHAR_TO_MULTIBYTE (C0);                                        \
  1412         if (CHAR_BYTE8_P (C1))                                                 \
  1413           SET_LIST_BIT (C0);                                                   \
  1414         else                                                                   \
  1415           {                                                                    \
  1416             C2 = TRANSLATE (C1);                                               \
  1417             if (C2 == C1                                                       \
  1418                 || (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0)                         \
  1419               C1 = C0;                                                         \
  1420             SET_LIST_BIT (C1);                                                 \
  1421             for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
  1422               {                                                                \
  1423                 int from = RANGE_TABLE_WORK_ELT (work_area, I);                \
  1424                 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);              \
  1425                                                                                \
  1426                 if (C2 >= from - 1 && C2 <= to + 1)                            \
  1427                   {                                                            \
  1428                     if (C2 == from - 1)                                        \
  1429                       RANGE_TABLE_WORK_ELT (work_area, I)--;                   \
  1430                     else if (C2 == to + 1)                                     \
  1431                       RANGE_TABLE_WORK_ELT (work_area, I + 1)++;               \
  1432                     break;                                                     \
  1433                   }                                                            \
  1434               }                                                                \
  1435             if (I < USED)                                                      \
  1436               SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2);                 \
  1437           }                                                                    \
  1438       }                                                                        \
  1439   } while (false)
  1440 
  1441 
  1442 /* Both FROM and TO are multibyte characters.  */
  1443 
  1444 #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO)                         \
  1445   do {                                                                     \
  1446     int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area);           \
  1447                                                                            \
  1448     SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO));                 \
  1449     for (C0 = (FROM); C0 <= (TO); C0++)                                    \
  1450       {                                                                    \
  1451         C1 = TRANSLATE (C0);                                               \
  1452         if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0                            \
  1453             || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0))          \
  1454           SET_LIST_BIT (C2);                                               \
  1455         if (C1 >= (FROM) && C1 <= (TO))                                    \
  1456           continue;                                                        \
  1457         for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
  1458           {                                                                \
  1459             int from = RANGE_TABLE_WORK_ELT (work_area, I);                \
  1460             int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);              \
  1461                                                                            \
  1462             if (C1 >= from - 1 && C1 <= to + 1)                            \
  1463               {                                                            \
  1464                 if (C1 == from - 1)                                        \
  1465                   RANGE_TABLE_WORK_ELT (work_area, I)--;                   \
  1466                 else if (C1 == to + 1)                                     \
  1467                   RANGE_TABLE_WORK_ELT (work_area, I + 1)++;               \
  1468                 break;                                                     \
  1469               }                                                            \
  1470           }                                                                \
  1471         if (I < USED)                                                      \
  1472           SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);                 \
  1473       }                                                                    \
  1474   } while (false)
  1475 
  1476 /* Get the next unsigned number in the uncompiled pattern.  */
  1477 #define GET_INTERVAL_COUNT(num)                                 \
  1478   do {                                                                  \
  1479     if (p == pend)                                                      \
  1480       FREE_STACK_RETURN (REG_EBRACE);                                   \
  1481     else                                                                \
  1482       {                                                                 \
  1483         PATFETCH (c);                                                   \
  1484         while ('0' <= c && c <= '9')                                    \
  1485           {                                                             \
  1486             if (num < 0)                                                \
  1487               num = 0;                                                  \
  1488             if (RE_DUP_MAX / 10 - (RE_DUP_MAX % 10 < c - '0') < num)    \
  1489               FREE_STACK_RETURN (REG_ESIZEBR);                          \
  1490             num = num * 10 + c - '0';                                   \
  1491             if (p == pend)                                              \
  1492               FREE_STACK_RETURN (REG_EBRACE);                           \
  1493             PATFETCH (c);                                               \
  1494           }                                                             \
  1495       }                                                                 \
  1496   } while (false)
  1497 
  1498 /* Parse a character class, i.e. string such as "[:name:]".  *strp
  1499    points to the string to be parsed and limit is length, in bytes, of
  1500    that string.
  1501 
  1502    If *strp point to a string that begins with "[:name:]", where name is
  1503    a non-empty sequence of lower case letters, *strp will be advanced past the
  1504    closing square bracket and RECC_* constant which maps to the name will be
  1505    returned.  If name is not a valid character class name zero, or RECC_ERROR,
  1506    is returned.
  1507 
  1508    Otherwise, if *strp doesn't begin with "[:name:]", -1 is returned.
  1509 
  1510    The function can be used on ASCII and multibyte (UTF-8-encoded) strings.
  1511  */
  1512 re_wctype_t
  1513 re_wctype_parse (const unsigned char **strp, ptrdiff_t limit)
  1514 {
  1515   const char *beg = (const char *)*strp, *it;
  1516 
  1517   if (limit < 4 || beg[0] != '[' || beg[1] != ':')
  1518     return -1;
  1519 
  1520   beg += 2;  /* skip opening "[:" */
  1521   limit -= 3;  /* opening "[:" and half of closing ":]"; --limit handles rest */
  1522   for (it = beg; it[0] != ':' || it[1] != ']'; ++it)
  1523     if (!--limit)
  1524       return -1;
  1525 
  1526   *strp = (const unsigned char *)(it + 2);
  1527 
  1528   /* Sort tests in the length=five case by frequency the classes to minimize
  1529      number of times we fail the comparison.  The frequencies of character class
  1530      names used in Emacs sources as of 2016-07-27:
  1531 
  1532      $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + |
  1533            sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr
  1534          213 [:alnum:]
  1535          104 [:alpha:]
  1536           62 [:space:]
  1537           39 [:digit:]
  1538           36 [:blank:]
  1539           26 [:word:]
  1540           26 [:upper:]
  1541           21 [:lower:]
  1542           10 [:xdigit:]
  1543           10 [:punct:]
  1544           10 [:ascii:]
  1545            4 [:nonascii:]
  1546            4 [:graph:]
  1547            2 [:print:]
  1548            2 [:cntrl:]
  1549            1 [:ff:]
  1550 
  1551      If you update this list, consider also updating chain of or'ed conditions
  1552      in execute_charset function.
  1553    */
  1554 
  1555   switch (it - beg) {
  1556   case 4:
  1557     if (!memcmp (beg, "word", 4))      return RECC_WORD;
  1558     break;
  1559   case 5:
  1560     if (!memcmp (beg, "alnum", 5))     return RECC_ALNUM;
  1561     if (!memcmp (beg, "alpha", 5))     return RECC_ALPHA;
  1562     if (!memcmp (beg, "space", 5))     return RECC_SPACE;
  1563     if (!memcmp (beg, "digit", 5))     return RECC_DIGIT;
  1564     if (!memcmp (beg, "blank", 5))     return RECC_BLANK;
  1565     if (!memcmp (beg, "upper", 5))     return RECC_UPPER;
  1566     if (!memcmp (beg, "lower", 5))     return RECC_LOWER;
  1567     if (!memcmp (beg, "punct", 5))     return RECC_PUNCT;
  1568     if (!memcmp (beg, "ascii", 5))     return RECC_ASCII;
  1569     if (!memcmp (beg, "graph", 5))     return RECC_GRAPH;
  1570     if (!memcmp (beg, "print", 5))     return RECC_PRINT;
  1571     if (!memcmp (beg, "cntrl", 5))     return RECC_CNTRL;
  1572     break;
  1573   case 6:
  1574     if (!memcmp (beg, "xdigit", 6))    return RECC_XDIGIT;
  1575     break;
  1576   case 7:
  1577     if (!memcmp (beg, "unibyte", 7))   return RECC_UNIBYTE;
  1578     break;
  1579   case 8:
  1580     if (!memcmp (beg, "nonascii", 8))  return RECC_NONASCII;
  1581     break;
  1582   case 9:
  1583     if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE;
  1584     break;
  1585   }
  1586 
  1587   return RECC_ERROR;
  1588 }
  1589 
  1590 /* True if CH is in the char class CC.  */
  1591 bool
  1592 re_iswctype (int ch, re_wctype_t cc)
  1593 {
  1594   switch (cc)
  1595     {
  1596     case RECC_ALNUM: return ISALNUM (ch) != 0;
  1597     case RECC_ALPHA: return ISALPHA (ch) != 0;
  1598     case RECC_BLANK: return ISBLANK (ch) != 0;
  1599     case RECC_CNTRL: return ISCNTRL (ch) != 0;
  1600     case RECC_DIGIT: return ISDIGIT (ch) != 0;
  1601     case RECC_GRAPH: return ISGRAPH (ch) != 0;
  1602     case RECC_LOWER: return ISLOWER (ch) != 0;
  1603     case RECC_PRINT: return ISPRINT (ch) != 0;
  1604     case RECC_PUNCT: return ISPUNCT (ch) != 0;
  1605     case RECC_SPACE: return ISSPACE (ch) != 0;
  1606     case RECC_UPPER: return ISUPPER (ch) != 0;
  1607     case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
  1608     case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
  1609     case RECC_NONASCII: return !IS_REAL_ASCII (ch);
  1610     case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
  1611     case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
  1612     case RECC_WORD: return ISWORD (ch) != 0;
  1613     case RECC_ERROR: return false;
  1614     default:
  1615       abort ();
  1616     }
  1617 }
  1618 
  1619 /* Return a bit-pattern to use in the range-table bits to match multibyte
  1620    chars of class CC.  */
  1621 static int
  1622 re_wctype_to_bit (re_wctype_t cc)
  1623 {
  1624   switch (cc)
  1625     {
  1626     case RECC_NONASCII:
  1627     case RECC_MULTIBYTE: return BIT_MULTIBYTE;
  1628     case RECC_ALPHA: return BIT_ALPHA;
  1629     case RECC_ALNUM: return BIT_ALNUM;
  1630     case RECC_WORD: return BIT_WORD;
  1631     case RECC_LOWER: return BIT_LOWER;
  1632     case RECC_UPPER: return BIT_UPPER;
  1633     case RECC_PUNCT: return BIT_PUNCT;
  1634     case RECC_SPACE: return BIT_SPACE;
  1635     case RECC_GRAPH: return BIT_GRAPH;
  1636     case RECC_PRINT: return BIT_PRINT;
  1637     case RECC_BLANK: return BIT_BLANK;
  1638     case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
  1639     case RECC_UNIBYTE: case RECC_ERROR: return 0;
  1640     default:
  1641       abort ();
  1642     }
  1643 }
  1644 
  1645 /* Filling in the work area of a range.  */
  1646 
  1647 /* Actually extend the space in WORK_AREA.  */
  1648 
  1649 static void
  1650 extend_range_table_work_area (struct range_table_work_area *work_area)
  1651 {
  1652   work_area->allocated += 16 * sizeof (int);
  1653   work_area->table = xrealloc (work_area->table, work_area->allocated);
  1654 }
  1655 
  1656 /* regex_compile and helpers.  */
  1657 
  1658 static bool group_in_compile_stack (compile_stack_type, regnum_t);
  1659 
  1660 /* Insert the 'jump' from the end of last alternative to "here".
  1661    The space for the jump has already been allocated. */
  1662 #define FIXUP_ALT_JUMP()                                                \
  1663 do {                                                                    \
  1664   if (fixup_alt_jump)                                                   \
  1665     STORE_JUMP (jump, fixup_alt_jump, b);                               \
  1666 } while (false)
  1667 
  1668 
  1669 /* Return, freeing storage we allocated.  */
  1670 #define FREE_STACK_RETURN(value)                \
  1671   do {                                                  \
  1672     FREE_RANGE_TABLE_WORK_AREA (range_table_work);      \
  1673     xfree (compile_stack.stack);                        \
  1674     return value;                                       \
  1675   } while (false)
  1676 
  1677 /* Compile PATTERN (of length SIZE) according to SYNTAX.
  1678    Return a nonzero error code on failure, or zero for success.
  1679 
  1680    If WHITESPACE_REGEXP is given, use it instead of a space
  1681    character in PATTERN.
  1682 
  1683    Assume the 'allocated' (and perhaps 'buffer') and 'translate'
  1684    fields are set in BUFP on entry.
  1685 
  1686    If successful, put results in *BUFP (otherwise the
  1687    contents of *BUFP are undefined):
  1688      'buffer' is the compiled pattern;
  1689      'syntax' is set to SYNTAX;
  1690      'used' is set to the length of the compiled pattern;
  1691      'fastmap_accurate' is false;
  1692      're_nsub' is the number of subexpressions in PATTERN;
  1693 
  1694    The 'fastmap' field is neither examined nor set.  */
  1695 
  1696 static reg_errcode_t
  1697 regex_compile (re_char *pattern, ptrdiff_t size,
  1698                bool posix_backtracking,
  1699                const char *whitespace_regexp,
  1700                struct re_pattern_buffer *bufp)
  1701 {
  1702   /* Fetch characters from PATTERN here.  */
  1703   int c, c1;
  1704 
  1705   /* Points to the end of the buffer, where we should append.  */
  1706   unsigned char *b;
  1707 
  1708   /* Keeps track of unclosed groups.  */
  1709   compile_stack_type compile_stack;
  1710 
  1711   /* Points to the current (ending) position in the pattern.  */
  1712   re_char *p = pattern;
  1713   re_char *pend = pattern + size;
  1714 
  1715   /* How to translate the characters in the pattern.  */
  1716   Lisp_Object translate = bufp->translate;
  1717 
  1718   /* Address of the count-byte of the most recently inserted 'exactn'
  1719      command.  This makes it possible to tell if a new exact-match
  1720      character can be added to that command or if the character requires
  1721      a new 'exactn' command.  */
  1722   unsigned char *pending_exact = 0;
  1723 
  1724   /* Address of start of the most recently finished expression.
  1725      This tells, e.g., postfix * where to find the start of its
  1726      operand.  Reset at the beginning of groups and alternatives,
  1727      and after ^ and \` for dusty-deck compatibility.  */
  1728   unsigned char *laststart = 0;
  1729 
  1730   /* Address of beginning of regexp, or inside of last group.  */
  1731   unsigned char *begalt;
  1732 
  1733   /* Place in the uncompiled pattern (i.e., the {) to
  1734      which to go back if the interval is invalid.  */
  1735   re_char *beg_interval;
  1736 
  1737   /* Address of the place where a forward jump should go to the end of
  1738      the containing expression.  Each alternative of an 'or' -- except the
  1739      last -- ends with a forward jump of this sort.  */
  1740   unsigned char *fixup_alt_jump = 0;
  1741 
  1742   /* Work area for range table of charset.  */
  1743   struct range_table_work_area range_table_work;
  1744 
  1745   /* If the regular expression is multibyte.  */
  1746   bool multibyte = RE_MULTIBYTE_P (bufp);
  1747 
  1748   /* Nonzero if we have pushed down into a subpattern.  */
  1749   int in_subpattern = 0;
  1750 
  1751   /* These hold the values of p, pattern, and pend from the main
  1752      pattern when we have pushed into a subpattern.  */
  1753   re_char *main_p;
  1754   re_char *main_pattern;
  1755   re_char *main_pend;
  1756 
  1757 #ifdef REGEX_EMACS_DEBUG
  1758   regex_emacs_debug++;
  1759   DEBUG_PRINT ("\nCompiling pattern: ");
  1760   if (regex_emacs_debug > 0)
  1761     {
  1762       for (ptrdiff_t debug_count = 0; debug_count < size; debug_count++)
  1763         debug_putchar (pattern[debug_count]);
  1764       putc ('\n', stderr);
  1765     }
  1766 #endif
  1767 
  1768   /* Initialize the compile stack.  */
  1769   compile_stack.stack = xmalloc (INIT_COMPILE_STACK_SIZE
  1770                                  * sizeof *compile_stack.stack);
  1771   __lsan_ignore_object (compile_stack.stack);
  1772   compile_stack.size = INIT_COMPILE_STACK_SIZE;
  1773   compile_stack.avail = 0;
  1774 
  1775   range_table_work.table = 0;
  1776   range_table_work.allocated = 0;
  1777 
  1778   /* Initialize the pattern buffer.  */
  1779   bufp->fastmap_accurate = false;
  1780   bufp->used_syntax = false;
  1781 
  1782   /* Set 'used' to zero, so that if we return an error, the pattern
  1783      printer (for debugging) will think there's no pattern.  We reset it
  1784      at the end.  */
  1785   bufp->used = 0;
  1786 
  1787   bufp->re_nsub = 0;
  1788 
  1789   if (bufp->allocated == 0)
  1790     {
  1791       /* This loses if BUFP->buffer is bogus, but that is the user's
  1792          responsibility.  */
  1793       bufp->buffer = xrealloc (bufp->buffer, INIT_BUF_SIZE);
  1794       bufp->allocated = INIT_BUF_SIZE;
  1795     }
  1796 
  1797   begalt = b = bufp->buffer;
  1798 
  1799   /* Loop through the uncompiled pattern until we're at the end.  */
  1800   while (1)
  1801     {
  1802       if (p == pend)
  1803         {
  1804           /* If this is the end of an included regexp,
  1805              pop back to the main regexp and try again.  */
  1806           if (in_subpattern)
  1807             {
  1808               in_subpattern = 0;
  1809               pattern = main_pattern;
  1810               p = main_p;
  1811               pend = main_pend;
  1812               continue;
  1813             }
  1814           /* If this is the end of the main regexp, we are done.  */
  1815           break;
  1816         }
  1817 
  1818       PATFETCH (c);
  1819 
  1820       switch (c)
  1821         {
  1822         case ' ':
  1823           {
  1824             re_char *p1 = p;
  1825 
  1826             /* If there's no special whitespace regexp, treat
  1827                spaces normally.  And don't try to do this recursively.  */
  1828             if (!whitespace_regexp || in_subpattern)
  1829               goto normal_char;
  1830 
  1831             /* Peek past following spaces.  */
  1832             while (p1 != pend)
  1833               {
  1834                 if (*p1 != ' ')
  1835                   break;
  1836                 p1++;
  1837               }
  1838             /* If the spaces are followed by a repetition op,
  1839                treat them normally.  */
  1840             if (p1 != pend
  1841                 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
  1842                     || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
  1843               goto normal_char;
  1844 
  1845             /* Replace the spaces with the whitespace regexp.  */
  1846             in_subpattern = 1;
  1847             main_p = p1;
  1848             main_pend = pend;
  1849             main_pattern = pattern;
  1850             p = pattern = (re_char *) whitespace_regexp;
  1851             pend = p + strlen (whitespace_regexp);
  1852             break;
  1853           }
  1854 
  1855         case '^':
  1856           if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
  1857             goto normal_char;
  1858           /* Special case for compatibility: postfix ops after ^ become
  1859              literals.  */
  1860           laststart = 0;
  1861           BUF_PUSH (begline);
  1862           break;
  1863 
  1864         case '$':
  1865           if (! (p == pend || at_endline_loc_p (p, pend)))
  1866             goto normal_char;
  1867           laststart = b;
  1868           BUF_PUSH (endline);
  1869           break;
  1870 
  1871 
  1872         case '+':
  1873         case '?':
  1874         case '*':
  1875           /* If there is no previous pattern...  */
  1876           if (!laststart)
  1877             goto normal_char;
  1878 
  1879           {
  1880             /* 1 means zero (many) matches is allowed.  */
  1881             bool zero_times_ok = false, many_times_ok = false;
  1882             bool greedy = true;
  1883 
  1884             /* If there is a sequence of repetition chars, collapse it
  1885                down to just one (the right one).  We can't combine
  1886                interval operators with these because of, e.g., 'a{2}*',
  1887                which should only match an even number of 'a's.  */
  1888 
  1889             for (;;)
  1890               {
  1891                 if (c == '?' && (zero_times_ok || many_times_ok))
  1892                   greedy = false;
  1893                 else
  1894                   {
  1895                     zero_times_ok |= c != '+';
  1896                     many_times_ok |= c != '?';
  1897                   }
  1898 
  1899                 if (! (p < pend && (*p == '*' || *p == '+' || *p == '?')))
  1900                   break;
  1901                 /* If we get here, we found another repeat character.  */
  1902                 c = *p++;
  1903                }
  1904 
  1905             /* Star, etc. applied to an empty pattern is equivalent
  1906                to an empty pattern.  */
  1907             if (laststart == b)
  1908               break;
  1909 
  1910             /* Now we know whether or not zero matches is allowed
  1911                and also whether or not two or more matches is allowed.  */
  1912             if (greedy)
  1913               {
  1914                 if (many_times_ok)
  1915                   {
  1916                     bool simple = skip_one_char (laststart) == b;
  1917                     ptrdiff_t startoffset = 0;
  1918                     re_opcode_t ofj =
  1919                       /* Check if the loop can match the empty string.  */
  1920                       (simple || !analyze_first (laststart, b, NULL, false))
  1921                       ? on_failure_jump : on_failure_jump_loop;
  1922                     eassert (skip_one_char (laststart) <= b);
  1923 
  1924                     if (!zero_times_ok && simple)
  1925                       { /* Since simple * loops can be made faster by using
  1926                            on_failure_keep_string_jump, we turn simple P+
  1927                            into PP* if P is simple.  */
  1928                         unsigned char *p1, *p2;
  1929                         startoffset = b - laststart;
  1930                         GET_BUFFER_SPACE (startoffset);
  1931                         p1 = b; p2 = laststart;
  1932                         while (p2 < p1)
  1933                           *b++ = *p2++;
  1934                         zero_times_ok = 1;
  1935                       }
  1936 
  1937                     GET_BUFFER_SPACE (6);
  1938                     if (!zero_times_ok)
  1939                       /* A + loop.  */
  1940                       STORE_JUMP (ofj, b, b + 6);
  1941                     else
  1942                       /* Simple * loops can use on_failure_keep_string_jump
  1943                          depending on what follows.  But since we don't know
  1944                          that yet, we leave the decision up to
  1945                          on_failure_jump_smart.  */
  1946                       INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
  1947                                    laststart + startoffset, b + 6);
  1948                     b += 3;
  1949                     STORE_JUMP (jump, b, laststart + startoffset);
  1950                     b += 3;
  1951                   }
  1952                 else
  1953                   {
  1954                     /* A simple ? pattern.  */
  1955                     eassert (zero_times_ok);
  1956                     GET_BUFFER_SPACE (3);
  1957                     INSERT_JUMP (on_failure_jump, laststart, b + 3);
  1958                     b += 3;
  1959                   }
  1960               }
  1961             else                /* not greedy */
  1962               { /* I wish the greedy and non-greedy cases could be merged.  */
  1963 
  1964                 GET_BUFFER_SPACE (7); /* We might use less.  */
  1965                 if (many_times_ok)
  1966                   {
  1967                     bool emptyp = !!analyze_first (laststart, b, NULL, false);
  1968 
  1969                     /* The non-greedy multiple match looks like
  1970                        a repeat..until: we only need a conditional jump
  1971                        at the end of the loop.  */
  1972                     if (emptyp) BUF_PUSH (no_op);
  1973                     STORE_JUMP (emptyp ? on_failure_jump_nastyloop
  1974                                 : on_failure_jump, b, laststart);
  1975                     b += 3;
  1976                     if (zero_times_ok)
  1977                       {
  1978                         /* The repeat...until naturally matches one or more.
  1979                            To also match zero times, we need to first jump to
  1980                            the end of the loop (its conditional jump).  */
  1981                         INSERT_JUMP (jump, laststart, b);
  1982                         b += 3;
  1983                       }
  1984                   }
  1985                 else
  1986                   {
  1987                     /* non-greedy a?? */
  1988                     INSERT_JUMP (jump, laststart, b + 3);
  1989                     b += 3;
  1990                     INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
  1991                     b += 3;
  1992                   }
  1993               }
  1994           }
  1995           pending_exact = 0;
  1996           break;
  1997 
  1998 
  1999         case '.':
  2000           laststart = b;
  2001           BUF_PUSH (anychar);
  2002           break;
  2003 
  2004 
  2005         case '[':
  2006           {
  2007             re_char *p1;
  2008 
  2009             CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
  2010 
  2011             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  2012 
  2013             /* Ensure that we have enough space to push a charset: the
  2014                opcode, the length count, and the bitset; 34 bytes in all.  */
  2015             GET_BUFFER_SPACE (34);
  2016 
  2017             laststart = b;
  2018 
  2019             /* Test '*p == '^' twice, instead of using an if
  2020                statement, so we need only one BUF_PUSH.  */
  2021             BUF_PUSH (*p == '^' ? charset_not : charset);
  2022             if (*p == '^')
  2023               p++;
  2024 
  2025             /* Remember the first position in the bracket expression.  */
  2026             p1 = p;
  2027 
  2028             /* Push the number of bytes in the bitmap.  */
  2029             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
  2030 
  2031             /* Clear the whole map.  */
  2032             memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
  2033 
  2034             /* Read in characters and ranges, setting map bits.  */
  2035             for (;;)
  2036               {
  2037                 const unsigned char *p2 = p;
  2038                 int ch;
  2039 
  2040                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  2041 
  2042                 /* See if we're at the beginning of a possible character
  2043                    class.  */
  2044                 re_wctype_t cc = re_wctype_parse (&p, pend - p);
  2045                 if (cc != -1)
  2046                   {
  2047                     if (cc == 0)
  2048                       FREE_STACK_RETURN (REG_ECTYPE);
  2049 
  2050                     if (p == pend)
  2051                       FREE_STACK_RETURN (REG_EBRACK);
  2052 
  2053                     /* Most character classes in a multibyte match just set
  2054                        a flag.  Exceptions are is_blank, is_digit, is_cntrl, and
  2055                        is_xdigit, since they can only match ASCII characters.
  2056                        We don't need to handle them for multibyte.  */
  2057 
  2058                     for (c = 0; c < 0x80; ++c)
  2059                       if (re_iswctype (c, cc))
  2060                         {
  2061                           SET_LIST_BIT (c);
  2062                           c1 = TRANSLATE (c);
  2063                           if (c1 == c)
  2064                             continue;
  2065                           if (ASCII_CHAR_P (c1))
  2066                             SET_LIST_BIT (c1);
  2067                           else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
  2068                             SET_LIST_BIT (c1);
  2069                         }
  2070                     SET_RANGE_TABLE_WORK_AREA_BIT
  2071                       (range_table_work, re_wctype_to_bit (cc));
  2072 
  2073                     /* In most cases the matching rule for char classes only
  2074                        uses the syntax table for multibyte chars, so that the
  2075                        content of the syntax-table is not hardcoded in the
  2076                        range_table.  SPACE and WORD are the two exceptions.  */
  2077                     if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
  2078                       bufp->used_syntax = true;
  2079 
  2080                     /* Repeat the loop. */
  2081                     continue;
  2082                   }
  2083 
  2084                 /* Don't translate yet.  The range TRANSLATE(X..Y) cannot
  2085                    always be determined from TRANSLATE(X) and TRANSLATE(Y)
  2086                    So the translation is done later in a loop.  Example:
  2087                    (let ((case-fold-search t)) (string-match "[A-_]" "A"))  */
  2088                 PATFETCH (c);
  2089 
  2090                 /* Could be the end of the bracket expression.  If it's
  2091                    not (i.e., when the bracket expression is '[]' so
  2092                    far), the ']' character bit gets set way below.  */
  2093                 if (c == ']' && p2 != p1)
  2094                   break;
  2095 
  2096                 if (p < pend && p[0] == '-' && p[1] != ']')
  2097                   {
  2098 
  2099                     /* Discard the '-'. */
  2100                     PATFETCH (c1);
  2101 
  2102                     /* Fetch the character which ends the range. */
  2103                     PATFETCH (c1);
  2104 
  2105                     if (CHAR_BYTE8_P (c1)
  2106                         && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
  2107                       /* Treat the range from a multibyte character to
  2108                          raw-byte character as empty.  */
  2109                       c = c1 + 1;
  2110                   }
  2111                 else
  2112                   /* Range from C to C. */
  2113                   c1 = c;
  2114 
  2115                 if (c <= c1)
  2116                   {
  2117                     if (c < 128)
  2118                       {
  2119                         ch = min (127, c1);
  2120                         SETUP_ASCII_RANGE (range_table_work, c, ch);
  2121                         c = ch + 1;
  2122                         if (CHAR_BYTE8_P (c1))
  2123                           c = BYTE8_TO_CHAR (128);
  2124                       }
  2125                     if (c <= c1)
  2126                       {
  2127                         if (CHAR_BYTE8_P (c))
  2128                           {
  2129                             c = CHAR_TO_BYTE8 (c);
  2130                             c1 = CHAR_TO_BYTE8 (c1);
  2131                             for (; c <= c1; c++)
  2132                               SET_LIST_BIT (c);
  2133                           }
  2134                         else if (multibyte)
  2135                           SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
  2136                         else
  2137                           SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
  2138                       }
  2139                   }
  2140               }
  2141 
  2142             /* Discard any (non)matching list bytes that are all 0 at the
  2143                end of the map.  Decrease the map-length byte too.  */
  2144             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
  2145               b[-1]--;
  2146             b += b[-1];
  2147 
  2148             /* Build real range table from work area.  */
  2149             if (RANGE_TABLE_WORK_USED (range_table_work)
  2150                 || RANGE_TABLE_WORK_BITS (range_table_work))
  2151               {
  2152                 int i;
  2153                 int used = RANGE_TABLE_WORK_USED (range_table_work);
  2154 
  2155                 /* Allocate space for COUNT + RANGE_TABLE.  Needs two
  2156                    bytes for flags, two for COUNT, and three bytes for
  2157                    each character.  */
  2158                 GET_BUFFER_SPACE (4 + used * 3);
  2159 
  2160                 /* Indicate the existence of range table.  */
  2161                 laststart[1] |= 0x80;
  2162 
  2163                 /* Store the character class flag bits into the range table.  */
  2164                 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
  2165                 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
  2166 
  2167                 STORE_NUMBER_AND_INCR (b, used / 2);
  2168                 for (i = 0; i < used; i++)
  2169                   STORE_CHARACTER_AND_INCR
  2170                     (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
  2171               }
  2172           }
  2173           break;
  2174 
  2175 
  2176         case '\\':
  2177           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
  2178 
  2179           /* Do not translate the character after the \, so that we can
  2180              distinguish, e.g., \B from \b, even if we normally would
  2181              translate, e.g., B to b.  */
  2182           PATFETCH (c);
  2183 
  2184           switch (c)
  2185             {
  2186             case '(':
  2187               {
  2188                 bool shy = false;
  2189                 regnum_t regnum = 0;
  2190                 if (p+1 < pend)
  2191                   {
  2192                     /* Look for a special (?...) construct */
  2193                     if (*p == '?')
  2194                       {
  2195                         PATFETCH (c); /* Gobble up the '?'.  */
  2196                         while (!shy)
  2197                           {
  2198                             PATFETCH (c);
  2199                             switch (c)
  2200                               {
  2201                               case ':': shy = true; break;
  2202                               case '0':
  2203                                 /* An explicitly specified regnum must start
  2204                                    with non-0. */
  2205                                 if (regnum == 0)
  2206                                   FREE_STACK_RETURN (REG_BADPAT);
  2207                                 FALLTHROUGH;
  2208                               case '1': case '2': case '3': case '4':
  2209                               case '5': case '6': case '7': case '8': case '9':
  2210                                 if (ckd_mul (&regnum, regnum, 10)
  2211                                     || ckd_add (&regnum, regnum, c - '0'))
  2212                                   FREE_STACK_RETURN (REG_ESIZE);
  2213                                 break;
  2214                               default:
  2215                                 /* Only (?:...) is supported right now. */
  2216                                 FREE_STACK_RETURN (REG_BADPAT);
  2217                               }
  2218                           }
  2219                       }
  2220                   }
  2221 
  2222                 if (!shy)
  2223                   regnum = ++bufp->re_nsub;
  2224                 else if (regnum)
  2225                   { /* It's actually not shy, but explicitly numbered.  */
  2226                     shy = false;
  2227                     if (regnum > bufp->re_nsub)
  2228                       bufp->re_nsub = regnum;
  2229                     else if (regnum > bufp->re_nsub
  2230                              /* Ideally, we'd want to check that the specified
  2231                                 group can't have matched (i.e. all subgroups
  2232                                 using the same regnum are in other branches of
  2233                                 OR patterns), but we don't currently keep track
  2234                                 of enough info to do that easily.  */
  2235                              || group_in_compile_stack (compile_stack, regnum))
  2236                       FREE_STACK_RETURN (REG_BADPAT);
  2237                   }
  2238                 else
  2239                   /* It's really shy.  */
  2240                   regnum = - bufp->re_nsub;
  2241 
  2242                 if (COMPILE_STACK_FULL)
  2243                   compile_stack.stack
  2244                     = xpalloc (compile_stack.stack, &compile_stack.size,
  2245                                1, -1, sizeof *compile_stack.stack);
  2246 
  2247                 /* These are the values to restore when we hit end of this
  2248                    group.  They are all relative offsets, so that if the
  2249                    whole pattern moves because of realloc, they will still
  2250                    be valid.  */
  2251                 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
  2252                 COMPILE_STACK_TOP.fixup_alt_jump
  2253                   = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
  2254                 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
  2255                 COMPILE_STACK_TOP.regnum = regnum;
  2256 
  2257                 /* Do not push a start_memory for groups beyond the last one
  2258                    we can represent in the compiled pattern.  */
  2259                 if (regnum <= MAX_REGNUM && regnum > 0)
  2260                   BUF_PUSH_2 (start_memory, regnum);
  2261 
  2262                 compile_stack.avail++;
  2263 
  2264                 fixup_alt_jump = 0;
  2265                 laststart = 0;
  2266                 begalt = b;
  2267                 /* If we've reached MAX_REGNUM groups, then this open
  2268                    won't actually generate any code, so we'll have to
  2269                    clear pending_exact explicitly.  */
  2270                 pending_exact = 0;
  2271                 break;
  2272               }
  2273 
  2274             case ')':
  2275               if (COMPILE_STACK_EMPTY)
  2276                 FREE_STACK_RETURN (REG_ERPAREN);
  2277 
  2278               FIXUP_ALT_JUMP ();
  2279 
  2280               /* See similar code for backslashed left paren above.  */
  2281               if (COMPILE_STACK_EMPTY)
  2282                 FREE_STACK_RETURN (REG_ERPAREN);
  2283 
  2284               /* Since we just checked for an empty stack above, this
  2285                  "can't happen".  */
  2286               eassert (compile_stack.avail != 0);
  2287               {
  2288                 /* We don't just want to restore into 'regnum', because
  2289                    later groups should continue to be numbered higher,
  2290                    as in '(ab)c(de)' -- the second group is #2.  */
  2291                 regnum_t regnum;
  2292 
  2293                 compile_stack.avail--;
  2294                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
  2295                 fixup_alt_jump
  2296                   = COMPILE_STACK_TOP.fixup_alt_jump
  2297                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
  2298                     : 0;
  2299                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
  2300                 regnum = COMPILE_STACK_TOP.regnum;
  2301                 /* If we've reached MAX_REGNUM groups, then this open
  2302                    won't actually generate any code, so we'll have to
  2303                    clear pending_exact explicitly.  */
  2304                 pending_exact = 0;
  2305 
  2306                 /* We're at the end of the group, so now we know how many
  2307                    groups were inside this one.  */
  2308                 if (regnum <= MAX_REGNUM && regnum > 0)
  2309                   BUF_PUSH_2 (stop_memory, regnum);
  2310               }
  2311               break;
  2312 
  2313 
  2314             case '|':                                   /* '\|'.  */
  2315               /* Insert before the previous alternative a jump which
  2316                  jumps to this alternative if the former fails.  */
  2317               GET_BUFFER_SPACE (3);
  2318               INSERT_JUMP (on_failure_jump, begalt, b + 6);
  2319               pending_exact = 0;
  2320               b += 3;
  2321 
  2322               /* The alternative before this one has a jump after it
  2323                  which gets executed if it gets matched.  Adjust that
  2324                  jump so it will jump to this alternative's analogous
  2325                  jump (put in below, which in turn will jump to the next
  2326                  (if any) alternative's such jump, etc.).  The last such
  2327                  jump jumps to the correct final destination.  A picture:
  2328                           _____ _____
  2329                           |   | |   |
  2330                           |   v |   v
  2331                         A | B   | C
  2332 
  2333                  If we are at B, then fixup_alt_jump right now points to a
  2334                  three-byte space after A.  We'll put in the jump, set
  2335                  fixup_alt_jump to right after B, and leave behind three
  2336                  bytes which we'll fill in when we get to after C.  */
  2337 
  2338               FIXUP_ALT_JUMP ();
  2339 
  2340               /* Mark and leave space for a jump after this alternative,
  2341                  to be filled in later either by next alternative or
  2342                  when know we're at the end of a series of alternatives.  */
  2343               fixup_alt_jump = b;
  2344               GET_BUFFER_SPACE (3);
  2345               b += 3;
  2346 
  2347               laststart = 0;
  2348               begalt = b;
  2349               break;
  2350 
  2351 
  2352             case '{':
  2353               {
  2354                 /* At least (most) this many matches must be made.  */
  2355                 int lower_bound = 0, upper_bound = -1;
  2356 
  2357                 beg_interval = p;
  2358 
  2359                 GET_INTERVAL_COUNT (lower_bound);
  2360 
  2361                 if (c == ',')
  2362                   GET_INTERVAL_COUNT (upper_bound);
  2363                 else
  2364                   /* Interval such as '{1}' => match exactly once. */
  2365                   upper_bound = lower_bound;
  2366 
  2367                 if (lower_bound < 0
  2368                     || (0 <= upper_bound && upper_bound < lower_bound)
  2369                     || c != '\\')
  2370                   FREE_STACK_RETURN (REG_BADBR);
  2371                 if (p == pend)
  2372                   FREE_STACK_RETURN (REG_EESCAPE);
  2373                 if (*p++ != '}')
  2374                   FREE_STACK_RETURN (REG_BADBR);
  2375 
  2376                 /* We just parsed a valid interval.  */
  2377 
  2378                 /* If it's invalid to have no preceding re.  */
  2379                 if (!laststart)
  2380                   goto unfetch_interval;
  2381 
  2382                 if (upper_bound == 0)
  2383                   /* If the upper bound is zero, just drop the sub pattern
  2384                      altogether.  */
  2385                   b = laststart;
  2386                 else if (lower_bound == 1 && upper_bound == 1)
  2387                   /* Just match it once: nothing to do here.  */
  2388                   ;
  2389 
  2390                 /* Otherwise, we have a nontrivial interval.  When
  2391                    we're all done, the pattern will look like:
  2392                    set_number_at <jump count> <upper bound>
  2393                    set_number_at <succeed_n count> <lower bound>
  2394                    succeed_n <after jump addr> <succeed_n count>
  2395                    <body of loop>
  2396                    jump_n <succeed_n addr> <jump count>
  2397                    (The upper bound and 'jump_n' are omitted if
  2398                    'upper_bound' is 1, though.)  */
  2399                 else
  2400                   { /* If the upper bound is > 1, we need to insert
  2401                        more at the end of the loop.  */
  2402                     int nbytes = upper_bound < 0 ? 3 : upper_bound > 1 ? 5 : 0;
  2403                     int startoffset = 0;
  2404 
  2405                     GET_BUFFER_SPACE (20); /* We might use less.  */
  2406 
  2407                     if (lower_bound == 0)
  2408                       {
  2409                         /* A succeed_n that starts with 0 is really
  2410                            a simple on_failure_jump_loop.  */
  2411                         INSERT_JUMP (on_failure_jump_loop, laststart,
  2412                                      b + 3 + nbytes);
  2413                         b += 3;
  2414                       }
  2415                     else
  2416                       {
  2417                         /* Initialize lower bound of the 'succeed_n', even
  2418                            though it will be set during matching by its
  2419                            attendant 'set_number_at' (inserted next),
  2420                            because 're_compile_fastmap' needs to know.
  2421                            Jump to the 'jump_n' we might insert below.  */
  2422                         INSERT_JUMP2 (succeed_n, laststart,
  2423                                       b + 5 + nbytes,
  2424                                       lower_bound);
  2425                         b += 5;
  2426 
  2427                         /* Code to initialize the lower bound.  Insert
  2428                            before the 'succeed_n'.  The '5' is the last two
  2429                            bytes of this 'set_number_at', plus 3 bytes of
  2430                            the following 'succeed_n'.  */
  2431                         insert_op2 (set_number_at, laststart, 5,
  2432                                     lower_bound, b);
  2433                         b += 5;
  2434                         startoffset += 5;
  2435                       }
  2436 
  2437                     if (upper_bound < 0)
  2438                       {
  2439                         /* A negative upper bound stands for infinity,
  2440                            in which case it degenerates to a plain jump.  */
  2441                         STORE_JUMP (jump, b, laststart + startoffset);
  2442                         b += 3;
  2443                       }
  2444                     else if (upper_bound > 1)
  2445                       { /* More than one repetition is allowed, so
  2446                            append a backward jump to the 'succeed_n'
  2447                            that starts this interval.
  2448 
  2449                            When we've reached this during matching,
  2450                            we'll have matched the interval once, so
  2451                            jump back only 'upper_bound - 1' times.  */
  2452                         STORE_JUMP2 (jump_n, b, laststart + startoffset,
  2453                                      upper_bound - 1);
  2454                         b += 5;
  2455 
  2456                         /* The location we want to set is the second
  2457                            parameter of the 'jump_n'; that is 'b-2' as
  2458                            an absolute address.  'laststart' will be
  2459                            the 'set_number_at' we're about to insert;
  2460                            'laststart+3' the number to set, the source
  2461                            for the relative address.  But we are
  2462                            inserting into the middle of the pattern --
  2463                            so everything is getting moved up by 5.
  2464                            Conclusion: (b - 2) - (laststart + 3) + 5,
  2465                            i.e., b - laststart.
  2466 
  2467                            Insert this at the beginning of the loop
  2468                            so that if we fail during matching, we'll
  2469                            reinitialize the bounds.  */
  2470                         insert_op2 (set_number_at, laststart, b - laststart,
  2471                                     upper_bound - 1, b);
  2472                         b += 5;
  2473                       }
  2474                   }
  2475                 pending_exact = 0;
  2476                 beg_interval = NULL;
  2477               }
  2478               break;
  2479 
  2480             unfetch_interval:
  2481               /* If an invalid interval, match the characters as literals.  */
  2482                eassert (beg_interval);
  2483                p = beg_interval;
  2484                beg_interval = NULL;
  2485                eassert (p > pattern && p[-1] == '\\');
  2486                c = '{';
  2487                goto normal_char;
  2488 
  2489             case '=':
  2490               laststart = b;
  2491               BUF_PUSH (at_dot);
  2492               break;
  2493 
  2494             case 's':
  2495               laststart = b;
  2496               PATFETCH (c);
  2497               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
  2498               break;
  2499 
  2500             case 'S':
  2501               laststart = b;
  2502               PATFETCH (c);
  2503               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
  2504               break;
  2505 
  2506             case 'c':
  2507               laststart = b;
  2508               PATFETCH (c);
  2509               BUF_PUSH_2 (categoryspec, c);
  2510               break;
  2511 
  2512             case 'C':
  2513               laststart = b;
  2514               PATFETCH (c);
  2515               BUF_PUSH_2 (notcategoryspec, c);
  2516               break;
  2517 
  2518             case 'w':
  2519               laststart = b;
  2520               BUF_PUSH_2 (syntaxspec, Sword);
  2521               break;
  2522 
  2523 
  2524             case 'W':
  2525               laststart = b;
  2526               BUF_PUSH_2 (notsyntaxspec, Sword);
  2527               break;
  2528 
  2529 
  2530             case '<':
  2531               laststart = b;
  2532               BUF_PUSH (wordbeg);
  2533               break;
  2534 
  2535             case '>':
  2536               laststart = b;
  2537               BUF_PUSH (wordend);
  2538               break;
  2539 
  2540             case '_':
  2541               laststart = b;
  2542               PATFETCH (c);
  2543               if (c == '<')
  2544                 BUF_PUSH (symbeg);
  2545               else if (c == '>')
  2546                 BUF_PUSH (symend);
  2547               else
  2548                 FREE_STACK_RETURN (REG_BADPAT);
  2549               break;
  2550 
  2551             case 'b':
  2552               laststart = b;
  2553               BUF_PUSH (wordbound);
  2554               break;
  2555 
  2556             case 'B':
  2557               laststart = b;
  2558               BUF_PUSH (notwordbound);
  2559               break;
  2560 
  2561             case '`':
  2562               /* Special case for compatibility: postfix ops after \` become
  2563                  literals, as for ^ (see above).  */
  2564               laststart = 0;
  2565               BUF_PUSH (begbuf);
  2566               break;
  2567 
  2568             case '\'':
  2569               laststart = b;
  2570               BUF_PUSH (endbuf);
  2571               break;
  2572 
  2573             case '1': case '2': case '3': case '4': case '5':
  2574             case '6': case '7': case '8': case '9':
  2575               {
  2576                 regnum_t reg = c - '0';
  2577 
  2578                 if (reg > bufp->re_nsub || reg < 1
  2579                     /* Can't back reference to a subexp before its end.  */
  2580                     || group_in_compile_stack (compile_stack, reg))
  2581                   FREE_STACK_RETURN (REG_ESUBREG);
  2582 
  2583                 laststart = b;
  2584                 BUF_PUSH_2 (duplicate, reg);
  2585               }
  2586               break;
  2587 
  2588             default:
  2589               /* You might think it would be useful for \ to mean
  2590                  not to translate; but if we don't translate it
  2591                  it will never match anything.  */
  2592               goto normal_char;
  2593             }
  2594           break;
  2595 
  2596 
  2597         default:
  2598         /* Expects the character in C.  */
  2599         normal_char:
  2600           /* If no exactn currently being built.  */
  2601           if (!pending_exact
  2602 
  2603               /* If last exactn not at current position.  */
  2604               || pending_exact + *pending_exact + 1 != b
  2605 
  2606               /* Only one byte follows the exactn for the count.  */
  2607               || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
  2608 
  2609               /* If followed by a repetition operator.  */
  2610               || (p != pend
  2611                   && (*p == '*' || *p == '+' || *p == '?'))
  2612               || (p + 1 < pend && p[0] == '\\' && p[1] == '{'))
  2613             {
  2614               /* Start building a new exactn.  */
  2615 
  2616               laststart = b;
  2617 
  2618               BUF_PUSH_2 (exactn, 0);
  2619               pending_exact = b - 1;
  2620             }
  2621 
  2622           GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
  2623           {
  2624             int len;
  2625 
  2626             if (multibyte)
  2627               {
  2628                 c = TRANSLATE (c);
  2629                 len = CHAR_STRING (c, b);
  2630                 b += len;
  2631               }
  2632             else
  2633               {
  2634                 c1 = RE_CHAR_TO_MULTIBYTE (c);
  2635                 if (! CHAR_BYTE8_P (c1))
  2636                   {
  2637                     int c2 = TRANSLATE (c1);
  2638 
  2639                     if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
  2640                       c = c1;
  2641                   }
  2642                 *b++ = c;
  2643                 len = 1;
  2644               }
  2645             (*pending_exact) += len;
  2646           }
  2647 
  2648           break;
  2649         } /* switch (c) */
  2650     } /* while p != pend */
  2651 
  2652 
  2653   /* Through the pattern now.  */
  2654 
  2655   FIXUP_ALT_JUMP ();
  2656 
  2657   if (!COMPILE_STACK_EMPTY)
  2658     FREE_STACK_RETURN (REG_EPAREN);
  2659 
  2660   /* If we don't want backtracking, force success
  2661      the first time we reach the end of the compiled pattern.  */
  2662   if (!posix_backtracking)
  2663     BUF_PUSH (succeed);
  2664 
  2665   /* Success; set the length of the buffer.  */
  2666   bufp->used = b - bufp->buffer;
  2667 
  2668 #ifdef REGEX_EMACS_DEBUG
  2669   if (regex_emacs_debug > 0)
  2670     {
  2671       re_compile_fastmap (bufp);
  2672       DEBUG_PRINT ("\nCompiled pattern:\n");
  2673       print_compiled_pattern (bufp);
  2674     }
  2675   regex_emacs_debug--;
  2676 #endif
  2677 
  2678   FREE_STACK_RETURN (REG_NOERROR);
  2679 
  2680 } /* regex_compile */
  2681 
  2682 /* Subroutines for 'regex_compile'.  */
  2683 
  2684 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
  2685 
  2686 static void
  2687 store_op1 (re_opcode_t op, unsigned char *loc, int arg)
  2688 {
  2689   *loc = (unsigned char) op;
  2690   STORE_NUMBER (loc + 1, arg);
  2691 }
  2692 
  2693 
  2694 /* Like 'store_op1', but for two two-byte parameters ARG1 and ARG2.  */
  2695 
  2696 static void
  2697 store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
  2698 {
  2699   *loc = (unsigned char) op;
  2700   STORE_NUMBER (loc + 1, arg1);
  2701   STORE_NUMBER (loc + 3, arg2);
  2702 }
  2703 
  2704 
  2705 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
  2706    for OP followed by two-byte integer parameter ARG.  */
  2707 
  2708 static void
  2709 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
  2710 {
  2711   register unsigned char *pfrom = end;
  2712   register unsigned char *pto = end + 3;
  2713 
  2714   while (pfrom != loc)
  2715     *--pto = *--pfrom;
  2716 
  2717   store_op1 (op, loc, arg);
  2718 }
  2719 
  2720 
  2721 /* Like 'insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
  2722 
  2723 static void
  2724 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
  2725             unsigned char *end)
  2726 {
  2727   register unsigned char *pfrom = end;
  2728   register unsigned char *pto = end + 5;
  2729 
  2730   while (pfrom != loc)
  2731     *--pto = *--pfrom;
  2732 
  2733   store_op2 (op, loc, arg1, arg2);
  2734 }
  2735 
  2736 
  2737 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
  2738    after an alternative or a begin-subexpression.  Assume there is at
  2739    least one character before the ^.  */
  2740 
  2741 static bool
  2742 at_begline_loc_p (re_char *pattern, re_char *p)
  2743 {
  2744   re_char *prev = p - 2;
  2745 
  2746   switch (*prev)
  2747     {
  2748     case '(': /* After a subexpression.  */
  2749     case '|': /* After an alternative.  */
  2750       break;
  2751 
  2752     case ':': /* After a shy subexpression.  */
  2753       /* Skip over optional regnum.  */
  2754       while (prev > pattern && '0' <= prev[-1] && prev[-1] <= '9')
  2755         --prev;
  2756 
  2757       if (! (prev > pattern + 1 && prev[-1] == '?' && prev[-2] == '('))
  2758         return false;
  2759       prev -= 2;
  2760       break;
  2761 
  2762     default:
  2763       return false;
  2764     }
  2765 
  2766   /* Count the number of preceding backslashes.  */
  2767   p = prev;
  2768   while (prev > pattern && prev[-1] == '\\')
  2769     --prev;
  2770   return (p - prev) & 1;
  2771 }
  2772 
  2773 
  2774 /* The dual of at_begline_loc_p.  This one is for $.  Assume there is
  2775    at least one character after the $, i.e., 'P < PEND'.  */
  2776 
  2777 static bool
  2778 at_endline_loc_p (re_char *p, re_char *pend)
  2779 {
  2780   /* Before a subexpression or an alternative?  */
  2781   return *p == '\\' && p + 1 < pend && (p[1] == ')' || p[1] == '|');
  2782 }
  2783 
  2784 
  2785 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
  2786    false if it's not.  */
  2787 
  2788 static bool
  2789 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
  2790 {
  2791   ptrdiff_t this_element;
  2792 
  2793   for (this_element = compile_stack.avail - 1;
  2794        this_element >= 0;
  2795        this_element--)
  2796     if (compile_stack.stack[this_element].regnum == regnum)
  2797       return true;
  2798 
  2799   return false;
  2800 }
  2801 
  2802 /* analyze_first.
  2803    If fastmap is non-NULL, go through the pattern and fill fastmap
  2804    with all the possible leading chars.  If fastmap is NULL, don't
  2805    bother filling it up (obviously) and only return whether the
  2806    pattern could potentially match the empty string.
  2807 
  2808    Return 1  if p..pend might match the empty string.
  2809    Return 0  if p..pend matches at least one char.
  2810    Return -1 if fastmap was not updated accurately.  */
  2811 
  2812 static int
  2813 analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
  2814 {
  2815   int j, k;
  2816   int nbits;
  2817   bool not;
  2818 
  2819   /* If all elements for base leading-codes in fastmap is set, this
  2820      flag is set true.  */
  2821   bool match_any_multibyte_characters = false;
  2822 
  2823   eassert (p);
  2824 
  2825   /* The loop below works as follows:
  2826      - It has a working-list kept in the PATTERN_STACK and which basically
  2827        starts by only containing a pointer to the first operation.
  2828      - If the opcode we're looking at is a match against some set of
  2829        chars, then we add those chars to the fastmap and go on to the
  2830        next work element from the worklist (done via 'break').
  2831      - If the opcode is a control operator on the other hand, we either
  2832        ignore it (if it's meaningless at this point, such as 'start_memory')
  2833        or execute it (if it's a jump).  If the jump has several destinations
  2834        (i.e. 'on_failure_jump'), then we push the other destination onto the
  2835        worklist.
  2836      We guarantee termination by ignoring backward jumps (more or less),
  2837      so that P is monotonically increasing.  More to the point, we
  2838      never set P (or push) anything '<= p1'.  */
  2839 
  2840   while (p < pend)
  2841     {
  2842       /* P1 is used as a marker of how far back a 'on_failure_jump'
  2843          can go without being ignored.  It is normally equal to P
  2844          (which prevents any backward 'on_failure_jump') except right
  2845          after a plain 'jump', to allow patterns such as:
  2846             0: jump 10
  2847             3..9: <body>
  2848             10: on_failure_jump 3
  2849          as used for the *? operator.  */
  2850       re_char *p1 = p;
  2851 
  2852       switch (*p++)
  2853         {
  2854         case succeed:
  2855           return 1;
  2856 
  2857         case duplicate:
  2858           /* If the first character has to match a backreference, that means
  2859              that the group was empty (since it already matched).  Since this
  2860              is the only case that interests us here, we can assume that the
  2861              backreference must match the empty string.  */
  2862           p++;
  2863           continue;
  2864 
  2865 
  2866       /* Following are the cases which match a character.  These end
  2867          with 'break'.  */
  2868 
  2869         case exactn:
  2870           if (fastmap)
  2871             {
  2872               /* If multibyte is nonzero, the first byte of each
  2873                  character is an ASCII or a leading code.  Otherwise,
  2874                  each byte is a character.  Thus, this works in both
  2875                  cases. */
  2876               fastmap[p[1]] = 1;
  2877               if (multibyte)
  2878                 {
  2879                   /* Cover the case of matching a raw char in a
  2880                      multibyte regexp against unibyte.  */
  2881                   if (CHAR_BYTE8_HEAD_P (p[1]))
  2882                     fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
  2883                 }
  2884               else
  2885                 {
  2886                   /* For the case of matching this unibyte regex
  2887                      against multibyte, we must set a leading code of
  2888                      the corresponding multibyte character.  */
  2889                   int c = RE_CHAR_TO_MULTIBYTE (p[1]);
  2890 
  2891                   fastmap[CHAR_LEADING_CODE (c)] = 1;
  2892                 }
  2893             }
  2894           break;
  2895 
  2896 
  2897         case anychar:
  2898           /* We could put all the chars except for \n (and maybe \0)
  2899              but we don't bother since it is generally not worth it.  */
  2900           if (!fastmap) break;
  2901           return -1;
  2902 
  2903 
  2904         case charset_not:
  2905           if (!fastmap) break;
  2906           {
  2907             /* Chars beyond end of bitmap are possible matches.  */
  2908             for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
  2909                  j < (1 << BYTEWIDTH); j++)
  2910               fastmap[j] = 1;
  2911           }
  2912           FALLTHROUGH;
  2913         case charset:
  2914           if (!fastmap) break;
  2915           not = (re_opcode_t) *(p - 1) == charset_not;
  2916           nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
  2917           p++;
  2918           for (j = 0; j < nbits; j++)
  2919             if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
  2920               fastmap[j] = 1;
  2921 
  2922           /* To match raw bytes (in the 80..ff range) against multibyte
  2923              strings, add their leading bytes to the fastmap.  */
  2924           for (j = 0x80; j < nbits; j++)
  2925             if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
  2926               fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
  2927 
  2928           if (/* Any leading code can possibly start a character
  2929                  which doesn't match the specified set of characters.  */
  2930               not
  2931               ||
  2932               /* If we can match a character class, we can match any
  2933                  multibyte characters.  */
  2934               (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
  2935                && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
  2936 
  2937             {
  2938               if (match_any_multibyte_characters == false)
  2939                 {
  2940                   for (j = MIN_MULTIBYTE_LEADING_CODE;
  2941                        j <= MAX_MULTIBYTE_LEADING_CODE; j++)
  2942                     fastmap[j] = 1;
  2943                   match_any_multibyte_characters = true;
  2944                 }
  2945             }
  2946 
  2947           else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
  2948                    && match_any_multibyte_characters == false)
  2949             {
  2950               /* Set fastmap[I] to 1 where I is a leading code of each
  2951                  multibyte character in the range table. */
  2952               int c, count;
  2953               unsigned char lc1, lc2;
  2954 
  2955               /* Make P points the range table.  '+ 2' is to skip flag
  2956                  bits for a character class.  */
  2957               p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
  2958 
  2959               /* Extract the number of ranges in range table into COUNT.  */
  2960               EXTRACT_NUMBER_AND_INCR (count, p);
  2961               for (; count > 0; count--, p += 3)
  2962                 {
  2963                   /* Extract the start and end of each range.  */
  2964                   EXTRACT_CHARACTER (c, p);
  2965                   lc1 = CHAR_LEADING_CODE (c);
  2966                   p += 3;
  2967                   EXTRACT_CHARACTER (c, p);
  2968                   lc2 = CHAR_LEADING_CODE (c);
  2969                   for (j = lc1; j <= lc2; j++)
  2970                     fastmap[j] = 1;
  2971                 }
  2972             }
  2973           break;
  2974 
  2975         case syntaxspec:
  2976         case notsyntaxspec:
  2977           if (!fastmap) break;
  2978           /* This match depends on text properties.  These end with
  2979              aborting optimizations.  */
  2980           return -1;
  2981 
  2982         case categoryspec:
  2983         case notcategoryspec:
  2984           if (!fastmap) break;
  2985           not = (re_opcode_t)p[-1] == notcategoryspec;
  2986           k = *p++;
  2987           for (j = (1 << BYTEWIDTH); j >= 0; j--)
  2988             if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
  2989               fastmap[j] = 1;
  2990 
  2991           /* Any leading code can possibly start a character which
  2992              has or doesn't has the specified category.  */
  2993           if (match_any_multibyte_characters == false)
  2994             {
  2995               for (j = MIN_MULTIBYTE_LEADING_CODE;
  2996                    j <= MAX_MULTIBYTE_LEADING_CODE; j++)
  2997                 fastmap[j] = 1;
  2998               match_any_multibyte_characters = true;
  2999             }
  3000           break;
  3001 
  3002       /* All cases after this match the empty string.  These end with
  3003          'continue'.  */
  3004 
  3005         case at_dot:
  3006         case no_op:
  3007         case begline:
  3008         case endline:
  3009         case begbuf:
  3010         case endbuf:
  3011         case wordbound:
  3012         case notwordbound:
  3013         case wordbeg:
  3014         case wordend:
  3015         case symbeg:
  3016         case symend:
  3017           continue;
  3018 
  3019 
  3020         case jump:
  3021           EXTRACT_NUMBER_AND_INCR (j, p);
  3022           if (j < 0)
  3023             /* Backward jumps can only go back to code that we've already
  3024                visited.  're_compile' should make sure this is true.  */
  3025             break;
  3026           p += j;
  3027           switch (*p)
  3028             {
  3029             case on_failure_jump:
  3030             case on_failure_keep_string_jump:
  3031             case on_failure_jump_loop:
  3032             case on_failure_jump_nastyloop:
  3033             case on_failure_jump_smart:
  3034               p++;
  3035               break;
  3036             default:
  3037               continue;
  3038             };
  3039           /* Keep P1 to allow the 'on_failure_jump' we are jumping to
  3040              to jump back to "just after here".  */
  3041           FALLTHROUGH;
  3042         case on_failure_jump:
  3043         case on_failure_keep_string_jump:
  3044         case on_failure_jump_nastyloop:
  3045         case on_failure_jump_loop:
  3046         case on_failure_jump_smart:
  3047           EXTRACT_NUMBER_AND_INCR (j, p);
  3048           if (p + j <= p1)
  3049             ; /* Backward jump to be ignored.  */
  3050           else
  3051             { /* We have to look down both arms.
  3052                  We first go down the "straight" path so as to minimize
  3053                  stack usage when going through alternatives.  */
  3054               int r = analyze_first (p, pend, fastmap, multibyte);
  3055               if (r) return r;
  3056               p += j;
  3057             }
  3058           continue;
  3059 
  3060 
  3061         case jump_n:
  3062           /* This code simply does not properly handle forward jump_n.  */
  3063           DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
  3064           p += 4;
  3065           /* jump_n can either jump or fall through.  The (backward) jump
  3066              case has already been handled, so we only need to look at the
  3067              fallthrough case.  */
  3068           continue;
  3069 
  3070         case succeed_n:
  3071           /* If N == 0, it should be an on_failure_jump_loop instead.  */
  3072           DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0));
  3073           p += 4;
  3074           /* We only care about one iteration of the loop, so we don't
  3075              need to consider the case where this behaves like an
  3076              on_failure_jump.  */
  3077           continue;
  3078 
  3079 
  3080         case set_number_at:
  3081           p += 4;
  3082           continue;
  3083 
  3084 
  3085         case start_memory:
  3086         case stop_memory:
  3087           p += 1;
  3088           continue;
  3089 
  3090 
  3091         default:
  3092           abort (); /* We have listed all the cases.  */
  3093         } /* switch *p++ */
  3094 
  3095       /* Getting here means we have found the possible starting
  3096          characters for one path of the pattern -- and that the empty
  3097          string does not match.  We need not follow this path further.  */
  3098       return 0;
  3099     } /* while p */
  3100 
  3101   /* We reached the end without matching anything.  */
  3102   return 1;
  3103 
  3104 } /* analyze_first */
  3105 
  3106 /* Compute a fastmap for the compiled pattern in BUFP.
  3107    A fastmap records which of the (1 << BYTEWIDTH) possible
  3108    characters can start a string that matches the pattern.  This fastmap
  3109    is used by re_search to skip quickly over impossible starting points.
  3110 
  3111    Character codes above (1 << BYTEWIDTH) are not represented in the
  3112    fastmap, but the leading codes are represented.  Thus, the fastmap
  3113    indicates which character sets could start a match.
  3114 
  3115    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
  3116    area as BUFP->fastmap.
  3117 
  3118    Set the 'fastmap', 'fastmap_accurate', and 'can_be_null' fields in
  3119    the pattern buffer.  */
  3120 
  3121 static void
  3122 re_compile_fastmap (struct re_pattern_buffer *bufp)
  3123 {
  3124   char *fastmap = bufp->fastmap;
  3125   int analysis;
  3126 
  3127   eassert (fastmap && bufp->buffer);
  3128 
  3129   memset (fastmap, 0, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
  3130 
  3131   /* FIXME: Is the following assignment correct even when ANALYSIS < 0?  */
  3132   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
  3133 
  3134   analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used,
  3135                             fastmap, RE_MULTIBYTE_P (bufp));
  3136   bufp->can_be_null = (analysis != 0);
  3137 } /* re_compile_fastmap */
  3138 
  3139 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
  3140    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
  3141    this memory for recording register information.  STARTS and ENDS
  3142    must be allocated using the malloc library routine, and must each
  3143    be at least NUM_REGS * sizeof (ptrdiff_t) bytes long.
  3144 
  3145    If NUM_REGS == 0, then subsequent matches should allocate their own
  3146    register data.
  3147 
  3148    Unless this function is called, the first search or match using
  3149    PATTERN_BUFFER will allocate its own register data, without
  3150    freeing the old data.  */
  3151 
  3152 void
  3153 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
  3154                   ptrdiff_t num_regs, ptrdiff_t *starts, ptrdiff_t *ends)
  3155 {
  3156   if (num_regs)
  3157     {
  3158       bufp->regs_allocated = REGS_REALLOCATE;
  3159       regs->num_regs = num_regs;
  3160       regs->start = starts;
  3161       regs->end = ends;
  3162     }
  3163   else
  3164     {
  3165       bufp->regs_allocated = REGS_UNALLOCATED;
  3166       regs->num_regs = 0;
  3167       regs->start = regs->end = 0;
  3168     }
  3169 }
  3170 
  3171 /* Searching routines.  */
  3172 
  3173 /* Like re_search_2, below, but only one string is specified, and
  3174    doesn't let you say where to stop matching. */
  3175 
  3176 ptrdiff_t
  3177 re_search (struct re_pattern_buffer *bufp, const char *string, ptrdiff_t size,
  3178            ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs)
  3179 {
  3180   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
  3181                       regs, size);
  3182 }
  3183 
  3184 /* Address of POS in the concatenation of virtual string. */
  3185 #define POS_ADDR_VSTRING(POS)                                   \
  3186   (((POS) >= size1 ? string2 - size1 : string1) + (POS))
  3187 
  3188 /* Using the compiled pattern in BUFP->buffer, first tries to match the
  3189    virtual concatenation of STRING1 and STRING2, starting first at index
  3190    STARTPOS, then at STARTPOS + 1, and so on.
  3191 
  3192    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
  3193 
  3194    RANGE is how far to scan while trying to match.  RANGE = 0 means try
  3195    only at STARTPOS; in general, the last start tried is STARTPOS +
  3196    RANGE.
  3197 
  3198    In REGS, return the indices of the virtual concatenation of STRING1
  3199    and STRING2 that matched the entire BUFP->buffer and its contained
  3200    subexpressions.
  3201 
  3202    Do not consider matching one past the index STOP in the virtual
  3203    concatenation of STRING1 and STRING2.
  3204 
  3205    Return either the position in the strings at which the match was
  3206    found, -1 if no match, or -2 if error (such as failure
  3207    stack overflow).  */
  3208 
  3209 ptrdiff_t
  3210 re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1,
  3211              const char *str2, ptrdiff_t size2,
  3212              ptrdiff_t startpos, ptrdiff_t range,
  3213              struct re_registers *regs, ptrdiff_t stop)
  3214 {
  3215   ptrdiff_t val;
  3216   re_char *string1 = (re_char *) str1;
  3217   re_char *string2 = (re_char *) str2;
  3218   char *fastmap = bufp->fastmap;
  3219   Lisp_Object translate = bufp->translate;
  3220   ptrdiff_t total_size = size1 + size2;
  3221   ptrdiff_t endpos = startpos + range;
  3222   bool anchored_start;
  3223   /* Nonzero if we are searching multibyte string.  */
  3224   bool multibyte = RE_TARGET_MULTIBYTE_P (bufp);
  3225 
  3226   /* Check for out-of-range STARTPOS.  */
  3227   if (startpos < 0 || startpos > total_size)
  3228     return -1;
  3229 
  3230   /* Fix up RANGE if it might eventually take us outside
  3231      the virtual concatenation of STRING1 and STRING2.
  3232      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
  3233   if (endpos < 0)
  3234     range = 0 - startpos;
  3235   else if (endpos > total_size)
  3236     range = total_size - startpos;
  3237 
  3238   /* If the search isn't to be a backwards one, don't waste time in a
  3239      search for a pattern anchored at beginning of buffer.  */
  3240   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
  3241     {
  3242       if (startpos > 0)
  3243         return -1;
  3244       else
  3245         range = 0;
  3246     }
  3247 
  3248   /* In a forward search for something that starts with \=.
  3249      don't keep searching past point.  */
  3250   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
  3251     {
  3252       range = PT_BYTE - BEGV_BYTE - startpos;
  3253       if (range < 0)
  3254         return -1;
  3255     }
  3256 
  3257   /* Update the fastmap now if not correct already.  */
  3258   if (fastmap && !bufp->fastmap_accurate)
  3259     re_compile_fastmap (bufp);
  3260 
  3261   /* See whether the pattern is anchored.  */
  3262   anchored_start = (bufp->buffer[0] == begline);
  3263 
  3264   RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, startpos);
  3265 
  3266   /* Loop through the string, looking for a place to start matching.  */
  3267   for (;;)
  3268     {
  3269       /* If the pattern is anchored,
  3270          skip quickly past places we cannot match.
  3271          Don't bother to treat startpos == 0 specially
  3272          because that case doesn't repeat.  */
  3273       if (anchored_start && startpos > 0)
  3274         {
  3275           if (! ((startpos <= size1 ? string1[startpos - 1]
  3276                   : string2[startpos - size1 - 1])
  3277                  == '\n'))
  3278             goto advance;
  3279         }
  3280 
  3281       /* If a fastmap is supplied, skip quickly over characters that
  3282          cannot be the start of a match.  If the pattern can match the
  3283          null string, however, we don't need to skip characters; we want
  3284          the first null string.  */
  3285       if (fastmap && startpos < total_size && !bufp->can_be_null)
  3286         {
  3287           re_char *d;
  3288           int buf_ch;
  3289 
  3290           d = POS_ADDR_VSTRING (startpos);
  3291 
  3292           if (range > 0)        /* Searching forwards.  */
  3293             {
  3294               ptrdiff_t irange = range, lim = 0;
  3295 
  3296               if (startpos < size1 && startpos + range >= size1)
  3297                 lim = range - (size1 - startpos);
  3298 
  3299               /* Written out as an if-else to avoid testing 'translate'
  3300                  inside the loop.  */
  3301               if (!NILP (translate))
  3302                 {
  3303                   if (multibyte)
  3304                     while (range > lim)
  3305                       {
  3306                         int buf_charlen;
  3307 
  3308                         buf_ch = string_char_and_length (d, &buf_charlen);
  3309                         buf_ch = RE_TRANSLATE (translate, buf_ch);
  3310                         if (fastmap[CHAR_LEADING_CODE (buf_ch)])
  3311                           break;
  3312 
  3313                         range -= buf_charlen;
  3314                         d += buf_charlen;
  3315                       }
  3316                   else
  3317                     while (range > lim)
  3318                       {
  3319                         buf_ch = *d;
  3320                         int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
  3321                         int translated = RE_TRANSLATE (translate, ch);
  3322                         if (translated != ch
  3323                             && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
  3324                           buf_ch = ch;
  3325                         if (fastmap[buf_ch])
  3326                           break;
  3327                         d++;
  3328                         range--;
  3329                       }
  3330                 }
  3331               else
  3332                 {
  3333                   if (multibyte)
  3334                     while (range > lim)
  3335                       {
  3336                         int buf_charlen;
  3337 
  3338                         buf_ch = string_char_and_length (d, &buf_charlen);
  3339                         if (fastmap[CHAR_LEADING_CODE (buf_ch)])
  3340                           break;
  3341                         range -= buf_charlen;
  3342                         d += buf_charlen;
  3343                       }
  3344                   else
  3345                     while (range > lim && !fastmap[*d])
  3346                       {
  3347                         d++;
  3348                         range--;
  3349                       }
  3350                 }
  3351               startpos += irange - range;
  3352             }
  3353           else                          /* Searching backwards.  */
  3354             {
  3355               if (multibyte)
  3356                 {
  3357                   buf_ch = STRING_CHAR (d);
  3358                   buf_ch = TRANSLATE (buf_ch);
  3359                   if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
  3360                     goto advance;
  3361                 }
  3362               else
  3363                 {
  3364                   buf_ch = *d;
  3365                   int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
  3366                   int translated = TRANSLATE (ch);
  3367                   if (translated != ch
  3368                       && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
  3369                     buf_ch = ch;
  3370                   if (! fastmap[TRANSLATE (buf_ch)])
  3371                     goto advance;
  3372                 }
  3373             }
  3374         }
  3375 
  3376       /* If can't match the null string, and that's all we have left, fail.  */
  3377       if (range >= 0 && startpos == total_size && fastmap
  3378           && !bufp->can_be_null)
  3379         return -1;
  3380 
  3381       val = re_match_2_internal (bufp, string1, size1, string2, size2,
  3382                                  startpos, regs, stop);
  3383 
  3384       if (val >= 0)
  3385         return startpos;
  3386 
  3387       if (val == -2)
  3388         return -2;
  3389 
  3390     advance:
  3391       if (!range)
  3392         break;
  3393       else if (range > 0)
  3394         {
  3395           /* Update STARTPOS to the next character boundary.  */
  3396           if (multibyte)
  3397             {
  3398               re_char *p = POS_ADDR_VSTRING (startpos);
  3399               int len = BYTES_BY_CHAR_HEAD (*p);
  3400 
  3401               range -= len;
  3402               if (range < 0)
  3403                 break;
  3404               startpos += len;
  3405             }
  3406           else
  3407             {
  3408               range--;
  3409               startpos++;
  3410             }
  3411         }
  3412       else
  3413         {
  3414           range++;
  3415           startpos--;
  3416 
  3417           /* Update STARTPOS to the previous character boundary.  */
  3418           if (multibyte)
  3419             {
  3420               re_char *p = POS_ADDR_VSTRING (startpos) + 1;
  3421               int len = raw_prev_char_len (p);
  3422 
  3423               range += len - 1;
  3424               if (range > 0)
  3425                 break;
  3426               startpos -= len - 1;
  3427             }
  3428         }
  3429     }
  3430   return -1;
  3431 } /* re_search_2 */
  3432 
  3433 /* Declarations and macros for re_match_2.  */
  3434 
  3435 static bool bcmp_translate (re_char *, re_char *, ptrdiff_t,
  3436                             Lisp_Object, bool);
  3437 
  3438 /* This converts PTR, a pointer into one of the search strings 'string1'
  3439    and 'string2' into an offset from the beginning of that string.  */
  3440 #define POINTER_TO_OFFSET(ptr)                  \
  3441   (FIRST_STRING_P (ptr)                         \
  3442    ? (ptr) - string1                            \
  3443    : (ptr) - string2 + (ptrdiff_t) size1)
  3444 
  3445 /* Call before fetching a character with *d.  This switches over to
  3446    string2 if necessary.
  3447    `reset' is executed before backtracking if there are no more characters.
  3448    Check re_match_2_internal for a discussion of why end_match_2 might
  3449    not be within string2 (but be equal to end_match_1 instead).  */
  3450 #define PREFETCH(reset)                                                 \
  3451   while (d == dend)                                                     \
  3452     {                                                                   \
  3453       /* End of string2 => fail.  */                                    \
  3454       if (dend == end_match_2)                                          \
  3455         {                                                               \
  3456           reset;                                                        \
  3457           goto fail;                                                    \
  3458         }                                                               \
  3459       /* End of string1 => advance to string2.  */                      \
  3460       d = string2;                                                      \
  3461       dend = end_match_2;                                               \
  3462     }
  3463 
  3464 /* Call before fetching a char with *d if you already checked other limits.
  3465    This is meant for use in lookahead operations like wordend, etc..
  3466    where we might need to look at parts of the string that might be
  3467    outside of the LIMITs (i.e past 'stop').  */
  3468 #define PREFETCH_NOLIMIT()                                              \
  3469   if (d == end1)                                                        \
  3470      {                                                                  \
  3471        d = string2;                                                     \
  3472        dend = end_match_2;                                              \
  3473      }
  3474 
  3475 /* Test if at very beginning or at very end of the virtual concatenation
  3476    of STRING1 and STRING2.  If only one string, it's STRING2.  */
  3477 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
  3478 #define AT_STRINGS_END(d) ((d) == end2)
  3479 
  3480 /* Disabled due to a compiler bug -- see comment at case wordbound */
  3481 
  3482 /* The comment at case wordbound is following one, but we don't use
  3483    AT_WORD_BOUNDARY anymore to support multibyte form.
  3484 
  3485    The DEC Alpha C compiler 3.x generates incorrect code for the
  3486    test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
  3487    AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
  3488    macro and introducing temporary variables works around the bug.  */
  3489 
  3490 #if 0
  3491 /* Test if D points to a character which is word-constituent.  We have
  3492    two special cases to check for: if past the end of string1, look at
  3493    the first character in string2; and if before the beginning of
  3494    string2, look at the last character in string1.  */
  3495 #define WORDCHAR_P(d)                                                   \
  3496   (SYNTAX ((d) == end1 ? *string2                                       \
  3497            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
  3498    == Sword)
  3499 
  3500 /* Test if the character before D and the one at D differ with respect
  3501    to being word-constituent.  */
  3502 #define AT_WORD_BOUNDARY(d)                                             \
  3503   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
  3504    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
  3505 #endif
  3506 
  3507 
  3508 /* Optimization routines.  */
  3509 
  3510 /* If the operation is a match against one or more chars,
  3511    return a pointer to the next operation, else return NULL.  */
  3512 static re_char *
  3513 skip_one_char (re_char *p)
  3514 {
  3515   switch (*p++)
  3516     {
  3517     case anychar:
  3518       break;
  3519 
  3520     case exactn:
  3521       p += *p + 1;
  3522       break;
  3523 
  3524     case charset_not:
  3525     case charset:
  3526       if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
  3527         {
  3528           int mcnt;
  3529           p = CHARSET_RANGE_TABLE (p - 1);
  3530           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  3531           p = CHARSET_RANGE_TABLE_END (p, mcnt);
  3532         }
  3533       else
  3534         p += 1 + CHARSET_BITMAP_SIZE (p - 1);
  3535       break;
  3536 
  3537     case syntaxspec:
  3538     case notsyntaxspec:
  3539     case categoryspec:
  3540     case notcategoryspec:
  3541       p++;
  3542       break;
  3543 
  3544     default:
  3545       p = NULL;
  3546     }
  3547   return p;
  3548 }
  3549 
  3550 
  3551 /* Jump over non-matching operations.  */
  3552 static re_char *
  3553 skip_noops (re_char *p, re_char *pend)
  3554 {
  3555   int mcnt;
  3556   while (p < pend)
  3557     {
  3558       switch (*p)
  3559         {
  3560         case start_memory:
  3561         case stop_memory:
  3562           p += 2; break;
  3563         case no_op:
  3564           p += 1; break;
  3565         case jump:
  3566           p += 1;
  3567           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  3568           p += mcnt;
  3569           break;
  3570         default:
  3571           return p;
  3572         }
  3573     }
  3574   eassert (p == pend);
  3575   return p;
  3576 }
  3577 
  3578 /* Test if C matches charset op.  *PP points to the charset or charset_not
  3579    opcode.  When the function finishes, *PP will be advanced past that opcode.
  3580    C is character to test (possibly after translations) and CORIG is original
  3581    character (i.e. without any translations).  UNIBYTE denotes whether c is
  3582    unibyte or multibyte character.
  3583    CANON_TABLE is the canonicalisation table for case folding or Qnil.  */
  3584 static bool
  3585 execute_charset (re_char **pp, int c, int corig, bool unibyte,
  3586                  Lisp_Object canon_table)
  3587 {
  3588   eassume (0 <= c && 0 <= corig);
  3589   re_char *p = *pp, *rtp = NULL;
  3590   bool not = (re_opcode_t) *p == charset_not;
  3591 
  3592   if (CHARSET_RANGE_TABLE_EXISTS_P (p))
  3593     {
  3594       int count;
  3595       rtp = CHARSET_RANGE_TABLE (p);
  3596       EXTRACT_NUMBER_AND_INCR (count, rtp);
  3597       *pp = CHARSET_RANGE_TABLE_END ((rtp), (count));
  3598     }
  3599   else
  3600     *pp += 2 + CHARSET_BITMAP_SIZE (p);
  3601 
  3602   if (unibyte && c < (1 << BYTEWIDTH))
  3603     {                   /* Lookup bitmap.  */
  3604       /* Cast to 'unsigned' instead of 'unsigned char' in
  3605          case the bit list is a full 32 bytes long.  */
  3606       if (c < (unsigned) (CHARSET_BITMAP_SIZE (p) * BYTEWIDTH)
  3607           && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
  3608         return !not;
  3609     }
  3610   else if (rtp)
  3611     {
  3612       int class_bits = CHARSET_RANGE_TABLE_BITS (p);
  3613       int range_start, range_end;
  3614 
  3615   /* Sort tests by the most commonly used classes with some adjustment to which
  3616      tests are easiest to perform.  Take a look at comment in re_wctype_parse
  3617      for table with frequencies of character class names. */
  3618 
  3619       if ((class_bits & BIT_MULTIBYTE) ||
  3620           (class_bits & BIT_ALNUM && ISALNUM (c)) ||
  3621           (class_bits & BIT_ALPHA && ISALPHA (c)) ||
  3622           (class_bits & BIT_SPACE && ISSPACE (c)) ||
  3623           (class_bits & BIT_BLANK && ISBLANK (c)) ||
  3624           (class_bits & BIT_WORD  && ISWORD  (c)) ||
  3625           ((class_bits & BIT_UPPER) &&
  3626            (ISUPPER (corig) || (!NILP (canon_table) && ISLOWER (corig)))) ||
  3627           ((class_bits & BIT_LOWER) &&
  3628            (ISLOWER (corig) || (!NILP (canon_table) && ISUPPER (corig)))) ||
  3629           (class_bits & BIT_PUNCT && ISPUNCT (c)) ||
  3630           (class_bits & BIT_GRAPH && ISGRAPH (c)) ||
  3631           (class_bits & BIT_PRINT && ISPRINT (c)))
  3632         return !not;
  3633 
  3634       for (p = *pp; rtp < p; rtp += 2 * 3)
  3635         {
  3636           EXTRACT_CHARACTER (range_start, rtp);
  3637           EXTRACT_CHARACTER (range_end, rtp + 3);
  3638           if (range_start <= c && c <= range_end)
  3639             return !not;
  3640         }
  3641     }
  3642 
  3643   return not;
  3644 }
  3645 
  3646 /* True if "p1 matches something" implies "p2 fails".  */
  3647 static bool
  3648 mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
  3649                       re_char *p2)
  3650 {
  3651   re_opcode_t op2;
  3652   bool multibyte = RE_MULTIBYTE_P (bufp);
  3653   unsigned char *pend = bufp->buffer + bufp->used;
  3654   re_char *p2_orig = p2;
  3655 
  3656   eassert (p1 >= bufp->buffer && p1 < pend
  3657            && p2 >= bufp->buffer && p2 <= pend);
  3658 
  3659   /* Skip over open/close-group commands.
  3660      If what follows this loop is a ...+ construct,
  3661      look at what begins its body, since we will have to
  3662      match at least one of that.  */
  3663   p2 = skip_noops (p2, pend);
  3664   /* The same skip can be done for p1, except that this function
  3665      is only used in the case where p1 is a simple match operator.  */
  3666   /* p1 = skip_noops (p1, pend); */
  3667 
  3668   eassert (p1 >= bufp->buffer && p1 < pend
  3669            && p2 >= bufp->buffer && p2 <= pend);
  3670 
  3671   op2 = p2 == pend ? succeed : *p2;
  3672 
  3673   switch (op2)
  3674     {
  3675     case succeed:
  3676     case endbuf:
  3677       /* If we're at the end of the pattern, we can change.  */
  3678       if (skip_one_char (p1))
  3679         {
  3680           DEBUG_PRINT ("  End of pattern: fast loop.\n");
  3681           return true;
  3682         }
  3683       break;
  3684 
  3685     case endline:
  3686     case exactn:
  3687       {
  3688         int c
  3689           = (re_opcode_t) *p2 == endline ? '\n'
  3690           : RE_STRING_CHAR (p2 + 2, multibyte);
  3691 
  3692         if ((re_opcode_t) *p1 == exactn)
  3693           {
  3694             if (c != RE_STRING_CHAR (p1 + 2, multibyte))
  3695               {
  3696                 DEBUG_PRINT ("  '%c' != '%c' => fast loop.\n", c, p1[2]);
  3697                 return true;
  3698               }
  3699           }
  3700 
  3701         else if ((re_opcode_t) *p1 == charset
  3702                  || (re_opcode_t) *p1 == charset_not)
  3703           {
  3704             if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
  3705                                   Qnil))
  3706               {
  3707                 DEBUG_PRINT ("   No match => fast loop.\n");
  3708                 return true;
  3709               }
  3710           }
  3711         else if ((re_opcode_t) *p1 == anychar
  3712                  && c == '\n')
  3713           {
  3714             DEBUG_PRINT ("   . != \\n => fast loop.\n");
  3715             return true;
  3716           }
  3717       }
  3718       break;
  3719 
  3720     case charset:
  3721       {
  3722         if ((re_opcode_t) *p1 == exactn)
  3723           /* Reuse the code above.  */
  3724           return mutually_exclusive_p (bufp, p2, p1);
  3725 
  3726       /* It is hard to list up all the character in charset
  3727          P2 if it includes multibyte character.  Give up in
  3728          such case.  */
  3729       else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
  3730         {
  3731           /* Now, we are sure that P2 has no range table.
  3732              So, for the size of bitmap in P2, 'p2[1]' is
  3733              enough.  But P1 may have range table, so the
  3734              size of bitmap table of P1 is extracted by
  3735              using macro 'CHARSET_BITMAP_SIZE'.
  3736 
  3737              In a multibyte case, we know that all the character
  3738              listed in P2 is ASCII.  In a unibyte case, P1 has only a
  3739              bitmap table.  So, in both cases, it is enough to test
  3740              only the bitmap table of P1.  */
  3741 
  3742           if ((re_opcode_t) *p1 == charset)
  3743             {
  3744               int idx;
  3745               /* We win if the charset inside the loop
  3746                  has no overlap with the one after the loop.  */
  3747               for (idx = 0;
  3748                    (idx < (int) p2[1]
  3749                     && idx < CHARSET_BITMAP_SIZE (p1));
  3750                    idx++)
  3751                 if ((p2[2 + idx] & p1[2 + idx]) != 0)
  3752                   break;
  3753 
  3754               if (idx == p2[1]
  3755                   || idx == CHARSET_BITMAP_SIZE (p1))
  3756                 {
  3757                   DEBUG_PRINT ("         No match => fast loop.\n");
  3758                   return true;
  3759                 }
  3760             }
  3761           else if ((re_opcode_t) *p1 == charset_not)
  3762             {
  3763               int idx;
  3764               /* We win if the charset_not inside the loop lists
  3765                  every character listed in the charset after.  */
  3766               for (idx = 0; idx < (int) p2[1]; idx++)
  3767                 if (! (p2[2 + idx] == 0
  3768                        || (idx < CHARSET_BITMAP_SIZE (p1)
  3769                            && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
  3770                   break;
  3771 
  3772               if (idx == p2[1])
  3773                 {
  3774                   DEBUG_PRINT ("         No match => fast loop.\n");
  3775                   return true;
  3776                 }
  3777               }
  3778           }
  3779       }
  3780       break;
  3781 
  3782     case charset_not:
  3783       switch (*p1)
  3784         {
  3785         case exactn:
  3786         case charset:
  3787           /* Reuse the code above.  */
  3788           return mutually_exclusive_p (bufp, p2, p1);
  3789         case charset_not:
  3790           /* When we have two charset_not, it's very unlikely that
  3791              they don't overlap.  The union of the two sets of excluded
  3792              chars should cover all possible chars, which, as a matter of
  3793              fact, is virtually impossible in multibyte buffers.  */
  3794           break;
  3795         }
  3796       break;
  3797 
  3798     case wordend:
  3799       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
  3800     case symend:
  3801       return ((re_opcode_t) *p1 == syntaxspec
  3802               && (p1[1] == Ssymbol || p1[1] == Sword));
  3803     case notsyntaxspec:
  3804       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
  3805 
  3806     case wordbeg:
  3807       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
  3808     case symbeg:
  3809       return ((re_opcode_t) *p1 == notsyntaxspec
  3810               && (p1[1] == Ssymbol || p1[1] == Sword));
  3811     case syntaxspec:
  3812       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
  3813 
  3814     case wordbound:
  3815       return (((re_opcode_t) *p1 == notsyntaxspec
  3816                || (re_opcode_t) *p1 == syntaxspec)
  3817               && p1[1] == Sword);
  3818 
  3819     case categoryspec:
  3820       return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
  3821     case notcategoryspec:
  3822       return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
  3823 
  3824     case on_failure_jump_nastyloop:
  3825     case on_failure_jump_smart:
  3826     case on_failure_jump_loop:
  3827     case on_failure_keep_string_jump:
  3828     case on_failure_jump:
  3829       {
  3830         int mcnt;
  3831         p2++;
  3832         EXTRACT_NUMBER_AND_INCR (mcnt, p2);
  3833         /* Don't just test `mcnt > 0` because non-greedy loops have
  3834            their test at the end with an unconditional jump at the start.  */
  3835         if (p2 + mcnt > p2_orig) /* Ensure forward progress.  */
  3836           return (mutually_exclusive_p (bufp, p1, p2)
  3837                   && mutually_exclusive_p (bufp, p1, p2 + mcnt));
  3838         break;
  3839       }
  3840 
  3841     default:
  3842       ;
  3843     }
  3844 
  3845   /* Safe default.  */
  3846   return false;
  3847 }
  3848 
  3849 
  3850 /* Matching routines.  */
  3851 
  3852 /* re_match_2 matches the compiled pattern in BUFP against the
  3853    (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
  3854    and SIZE2, respectively).  We start matching at POS, and stop
  3855    matching at STOP.
  3856 
  3857    If REGS is non-null, store offsets for the substring each group
  3858    matched in REGS.
  3859 
  3860    We return -1 if no match, -2 if an internal error (such as the
  3861    failure stack overflowing).  Otherwise, we return the length of the
  3862    matched substring.  */
  3863 
  3864 ptrdiff_t
  3865 re_match_2 (struct re_pattern_buffer *bufp,
  3866             char const *string1, ptrdiff_t size1,
  3867             char const *string2, ptrdiff_t size2,
  3868             ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
  3869 {
  3870   ptrdiff_t result;
  3871 
  3872   RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, pos);
  3873 
  3874   result = re_match_2_internal (bufp, (re_char *) string1, size1,
  3875                                 (re_char *) string2, size2,
  3876                                 pos, regs, stop);
  3877   return result;
  3878 }
  3879 
  3880 static void
  3881 unwind_re_match (void *ptr)
  3882 {
  3883   struct buffer *b = (struct buffer *) ptr;
  3884   b->text->inhibit_shrinking = 0;
  3885 }
  3886 
  3887 /* This is a separate function so that we can force an alloca cleanup
  3888    afterwards.  */
  3889 static ptrdiff_t
  3890 re_match_2_internal (struct re_pattern_buffer *bufp,
  3891                      re_char *string1, ptrdiff_t size1,
  3892                      re_char *string2, ptrdiff_t size2,
  3893                      ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
  3894 {
  3895   eassume (0 <= size1);
  3896   eassume (0 <= size2);
  3897   eassume (0 <= pos && pos <= stop && stop <= size1 + size2);
  3898 
  3899   /* General temporaries.  */
  3900   int mcnt;
  3901 
  3902   /* Just past the end of the corresponding string.  */
  3903   re_char *end1, *end2;
  3904 
  3905   /* Pointers into string1 and string2, just past the last characters in
  3906      each to consider matching.  */
  3907   re_char *end_match_1, *end_match_2;
  3908 
  3909   /* Where we are in the data, and the end of the current string.  */
  3910   re_char *d, *dend;
  3911 
  3912   /* Used sometimes to remember where we were before starting matching
  3913      an operator so that we can go back in case of failure.  This "atomic"
  3914      behavior of matching opcodes is indispensable to the correctness
  3915      of the on_failure_keep_string_jump optimization.  */
  3916   re_char *dfail;
  3917 
  3918   /* Where we are in the pattern, and the end of the pattern.  */
  3919   re_char *p = bufp->buffer;
  3920   re_char *pend = p + bufp->used;
  3921 
  3922   /* We use this to map every character in the string.  */
  3923   Lisp_Object translate = bufp->translate;
  3924 
  3925   /* True if BUFP is setup from a multibyte regex.  */
  3926   bool multibyte = RE_MULTIBYTE_P (bufp);
  3927 
  3928   /* True if STRING1/STRING2 are multibyte.  */
  3929   bool target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
  3930 
  3931   /* Failure point stack.  Each place that can handle a failure further
  3932      down the line pushes a failure point on this stack.  It consists of
  3933      regstart, and regend for all registers corresponding to
  3934      the subexpressions we're currently inside, plus the number of such
  3935      registers, and, finally, two char *'s.  The first char * is where
  3936      to resume scanning the pattern; the second one is where to resume
  3937      scanning the strings.  */
  3938   fail_stack_type fail_stack;
  3939 #ifdef DEBUG_COMPILES_ARGUMENTS
  3940   ptrdiff_t nfailure_points_pushed = 0, nfailure_points_popped = 0;
  3941 #endif
  3942 
  3943   /* We fill all the registers internally, independent of what we
  3944      return, for use in backreferences.  The number here includes
  3945      an element for register zero.  */
  3946   ptrdiff_t num_regs = bufp->re_nsub + 1;
  3947   eassume (0 < num_regs);
  3948 
  3949   /* Information on the contents of registers. These are pointers into
  3950      the input strings; they record just what was matched (on this
  3951      attempt) by a subexpression part of the pattern, that is, the
  3952      regnum-th regstart pointer points to where in the pattern we began
  3953      matching and the regnum-th regend points to right after where we
  3954      stopped matching the regnum-th subexpression.  */
  3955   re_char **regstart UNINIT, **regend UNINIT;
  3956 
  3957   /* The following record the register info as found in the above
  3958      variables when we find a match better than any we've seen before.
  3959      This happens as we backtrack through the failure points, which in
  3960      turn happens only if we have not yet matched the entire string. */
  3961   bool best_regs_set = false;
  3962   re_char **best_regstart UNINIT, **best_regend UNINIT;
  3963 
  3964   /* Logically, this is 'best_regend[0]'.  But we don't want to have to
  3965      allocate space for that if we're not allocating space for anything
  3966      else (see below).  Also, we never need info about register 0 for
  3967      any of the other register vectors, and it seems rather a kludge to
  3968      treat 'best_regend' differently from the rest.  So we keep track of
  3969      the end of the best match so far in a separate variable.  We
  3970      initialize this to NULL so that when we backtrack the first time
  3971      and need to test it, it's not garbage.  */
  3972   re_char *match_end = NULL;
  3973 
  3974   /* This keeps track of how many buffer/string positions we examined.  */
  3975   ptrdiff_t nchars = 0;
  3976 
  3977 #ifdef DEBUG_COMPILES_ARGUMENTS
  3978   /* Counts the total number of registers pushed.  */
  3979   ptrdiff_t num_regs_pushed = 0;
  3980 #endif
  3981 
  3982   DEBUG_PRINT ("\nEntering re_match_2.\n");
  3983 
  3984   REGEX_USE_SAFE_ALLOCA;
  3985 
  3986   INIT_FAIL_STACK ();
  3987 
  3988   specpdl_ref count = SPECPDL_INDEX ();
  3989 
  3990   /* Prevent shrinking and relocation of buffer text if GC happens
  3991      while we are inside this function.  The calls to
  3992      UPDATE_SYNTAX_TABLE_* macros can call Lisp (via
  3993      `internal--syntax-propertize`); these calls are careful to defend against
  3994      buffer modifications, but even with no modifications, the buffer text may
  3995      be relocated during GC by `compact_buffer` which would invalidate
  3996      our C pointers to buffer text.  */
  3997   if (!current_buffer->text->inhibit_shrinking)
  3998     {
  3999       record_unwind_protect_ptr (unwind_re_match, current_buffer);
  4000       current_buffer->text->inhibit_shrinking = 1;
  4001     }
  4002 
  4003   /* Do not bother to initialize all the register variables if there are
  4004      no groups in the pattern, as it takes a fair amount of time.  If
  4005      there are groups, we include space for register 0 (the whole
  4006      pattern) in REGSTART[0], even though we never use it, to avoid
  4007      the undefined behavior of subtracting 1 from REGSTART.  */
  4008   ptrdiff_t re_nsub = num_regs - 1;
  4009   if (0 < re_nsub)
  4010     {
  4011       regstart = SAFE_ALLOCA ((re_nsub * 4 + 1) * sizeof *regstart);
  4012       regend = regstart + num_regs;
  4013       best_regstart = regend + re_nsub;
  4014       best_regend = best_regstart + re_nsub;
  4015 
  4016       /* Initialize subexpression text positions to unset, to mark ones
  4017          that no start_memory/stop_memory has been seen for.  */
  4018       for (re_char **apos = regstart + 1; apos < best_regstart + 1; apos++)
  4019         *apos = NULL;
  4020     }
  4021 
  4022   /* We move 'string1' into 'string2' if the latter's empty -- but not if
  4023      'string1' is null.  */
  4024   if (size2 == 0 && string1 != NULL)
  4025     {
  4026       string2 = string1;
  4027       size2 = size1;
  4028       string1 = 0;
  4029       size1 = 0;
  4030     }
  4031   end1 = string1 + size1;
  4032   end2 = string2 + size2;
  4033 
  4034   /* P scans through the pattern as D scans through the data.
  4035      DEND is the end of the input string that D points within.
  4036      Advance D into the following input string whenever necessary, but
  4037      this happens before fetching; therefore, at the beginning of the
  4038      loop, D can be pointing at the end of a string, but it cannot
  4039      equal STRING2.  */
  4040   if (pos >= size1)
  4041     {
  4042       /* Only match within string2.  */
  4043       d = string2 + pos - size1;
  4044       dend = end_match_2 = string2 + stop - size1;
  4045       end_match_1 = end1;       /* Just to give it a value.  */
  4046     }
  4047   else
  4048     {
  4049       if (stop < size1)
  4050         {
  4051           /* Only match within string1.  */
  4052           end_match_1 = string1 + stop;
  4053           /* BEWARE!
  4054              When we reach end_match_1, PREFETCH normally switches to string2.
  4055              But in the present case, this means that just doing a PREFETCH
  4056              makes us jump from 'stop' to 'gap' within the string.
  4057              What we really want here is for the search to stop as
  4058              soon as we hit end_match_1.  That's why we set end_match_2
  4059              to end_match_1 (since PREFETCH fails as soon as we hit
  4060              end_match_2).  */
  4061           end_match_2 = end_match_1;
  4062         }
  4063       else
  4064         { /* It's important to use this code when STOP == SIZE so that
  4065              moving D from end1 to string2 will not prevent the D == DEND
  4066              check from catching the end of string.  */
  4067           end_match_1 = end1;
  4068           end_match_2 = string2 + stop - size1;
  4069         }
  4070       d = string1 + pos;
  4071       dend = end_match_1;
  4072     }
  4073 
  4074   DEBUG_PRINT ("The compiled pattern is:\n");
  4075   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
  4076   DEBUG_PRINT ("The string to match is: \"");
  4077   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
  4078   DEBUG_PRINT ("\"\n");
  4079 
  4080   /* This loops over pattern commands.  It exits by returning from the
  4081      function if the match is complete, or it drops through if the match
  4082      fails at this starting point in the input data.  */
  4083   for (;;)
  4084     {
  4085       DEBUG_PRINT ("\n%p: ", p);
  4086 
  4087       if (p == pend)
  4088         {
  4089           /* End of pattern means we might have succeeded.  */
  4090           DEBUG_PRINT ("end of pattern ... ");
  4091 
  4092           /* If we haven't matched the entire string, and we want the
  4093              longest match, try backtracking.  */
  4094           if (d != end_match_2)
  4095             {
  4096               /* True if this match is the best seen so far.  */
  4097               bool best_match_p;
  4098 
  4099               {
  4100                 /* True if this match ends in the same string (string1
  4101                    or string2) as the best previous match.  */
  4102                 bool same_str_p = (FIRST_STRING_P (match_end)
  4103                                    == FIRST_STRING_P (d));
  4104 
  4105                 /* AIX compiler got confused when this was combined
  4106                    with the previous declaration.  */
  4107                 if (same_str_p)
  4108                   best_match_p = d > match_end;
  4109                 else
  4110                   best_match_p = !FIRST_STRING_P (d);
  4111               }
  4112 
  4113               DEBUG_PRINT ("backtracking.\n");
  4114 
  4115               if (!FAIL_STACK_EMPTY ())
  4116                 { /* More failure points to try.  */
  4117 
  4118                   /* If exceeds best match so far, save it.  */
  4119                   if (!best_regs_set || best_match_p)
  4120                     {
  4121                       best_regs_set = true;
  4122                       match_end = d;
  4123 
  4124                       DEBUG_PRINT ("\nSAVING match as best so far.\n");
  4125 
  4126                       for (ptrdiff_t reg = 1; reg < num_regs; reg++)
  4127                         {
  4128                           best_regstart[reg] = regstart[reg];
  4129                           best_regend[reg] = regend[reg];
  4130                         }
  4131                     }
  4132                   goto fail;
  4133                 }
  4134 
  4135               /* If no failure points, don't restore garbage.  And if
  4136                  last match is real best match, don't restore second
  4137                  best one. */
  4138               else if (best_regs_set && !best_match_p)
  4139                 {
  4140                 restore_best_regs:
  4141                   /* Restore best match.  It may happen that 'dend ==
  4142                      end_match_1' while the restored d is in string2.
  4143                      For example, the pattern 'x.*y.*z' against the
  4144                      strings 'x-' and 'y-z-', if the two strings are
  4145                      not consecutive in memory.  */
  4146                   DEBUG_PRINT ("Restoring best registers.\n");
  4147 
  4148                   d = match_end;
  4149                   dend = ((d >= string1 && d <= end1)
  4150                            ? end_match_1 : end_match_2);
  4151 
  4152                   for (ptrdiff_t reg = 1; reg < num_regs; reg++)
  4153                     {
  4154                       regstart[reg] = best_regstart[reg];
  4155                       regend[reg] = best_regend[reg];
  4156                       eassert (REG_UNSET (regstart[reg])
  4157                                <= REG_UNSET (regend[reg]));
  4158                     }
  4159                 }
  4160             } /* d != end_match_2 */
  4161 
  4162         succeed_label:
  4163           DEBUG_PRINT ("Accepting match.\n");
  4164 
  4165           /* If caller wants register contents data back, do it.  */
  4166           if (regs)
  4167             {
  4168               /* Have the register data arrays been allocated?  */
  4169               if (bufp->regs_allocated == REGS_UNALLOCATED)
  4170                 { /* No.  So allocate them with malloc.  */
  4171                   ptrdiff_t n = max (RE_NREGS, num_regs);
  4172                   regs->start = xnmalloc (n, sizeof *regs->start);
  4173                   regs->end = xnmalloc (n, sizeof *regs->end);
  4174                   regs->num_regs = n;
  4175                   bufp->regs_allocated = REGS_REALLOCATE;
  4176                 }
  4177               else if (bufp->regs_allocated == REGS_REALLOCATE)
  4178                 { /* Yes.  If we need more elements than were already
  4179                      allocated, reallocate them.  If we need fewer, just
  4180                      leave it alone.  */
  4181                   ptrdiff_t n = regs->num_regs;
  4182                   if (n < num_regs)
  4183                     {
  4184                       n = max (n + (n >> 1), num_regs);
  4185                       regs->start
  4186                         = xnrealloc (regs->start, n, sizeof *regs->start);
  4187                       regs->end = xnrealloc (regs->end, n, sizeof *regs->end);
  4188                       regs->num_regs = n;
  4189                     }
  4190                 }
  4191               else
  4192                 eassert (bufp->regs_allocated == REGS_FIXED);
  4193 
  4194               /* Convert the pointer data in 'regstart' and 'regend' to
  4195                  indices.  Register zero has to be set differently,
  4196                  since we haven't kept track of any info for it.  */
  4197               if (regs->num_regs > 0)
  4198                 {
  4199                   regs->start[0] = pos;
  4200                   regs->end[0] = POINTER_TO_OFFSET (d);
  4201                 }
  4202 
  4203               for (ptrdiff_t reg = 1; reg < num_regs; reg++)
  4204                 {
  4205                   eassert (REG_UNSET (regstart[reg])
  4206                            <= REG_UNSET (regend[reg]));
  4207                   if (REG_UNSET (regend[reg]))
  4208                     regs->start[reg] = regs->end[reg] = -1;
  4209                   else
  4210                     {
  4211                       regs->start[reg] = POINTER_TO_OFFSET (regstart[reg]);
  4212                       regs->end[reg] = POINTER_TO_OFFSET (regend[reg]);
  4213                     }
  4214                 }
  4215 
  4216               /* If the regs structure we return has more elements than
  4217                  were in the pattern, set the extra elements to -1.  */
  4218               for (ptrdiff_t reg = num_regs; reg < regs->num_regs; reg++)
  4219                 regs->start[reg] = regs->end[reg] = -1;
  4220             }
  4221 
  4222           DEBUG_PRINT ("%td failure points pushed, %td popped (%td remain).\n",
  4223                        nfailure_points_pushed, nfailure_points_popped,
  4224                        nfailure_points_pushed - nfailure_points_popped);
  4225           DEBUG_PRINT ("%td registers pushed.\n", num_regs_pushed);
  4226 
  4227           ptrdiff_t dcnt = POINTER_TO_OFFSET (d) - pos;
  4228 
  4229           DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
  4230 
  4231           unbind_to (count, Qnil);
  4232           SAFE_FREE ();
  4233           /* The factor of 50 below is a heuristic that needs to be tuned.  It
  4234              means we consider 50 buffer positions examined by this function
  4235              roughly equivalent to the display engine iterating over a single
  4236              buffer position.  */
  4237           if (max_redisplay_ticks > 0 && nchars > 0)
  4238             update_redisplay_ticks (nchars / 50 + 1, NULL);
  4239           return dcnt;
  4240         }
  4241 
  4242       /* Otherwise match next pattern command.  */
  4243       switch (*p++)
  4244         {
  4245         /* Ignore these.  Used to ignore the n of succeed_n's which
  4246            currently have n == 0.  */
  4247         case no_op:
  4248           DEBUG_PRINT ("EXECUTING no_op.\n");
  4249           break;
  4250 
  4251         case succeed:
  4252           DEBUG_PRINT ("EXECUTING succeed.\n");
  4253           goto succeed_label;
  4254 
  4255         /* Match the next n pattern characters exactly.  The following
  4256            byte in the pattern defines n, and the n bytes after that
  4257            are the characters to match.  */
  4258         case exactn:
  4259           mcnt = *p++;
  4260           DEBUG_PRINT ("EXECUTING exactn %d.\n", mcnt);
  4261 
  4262           /* Remember the start point to rollback upon failure.  */
  4263           dfail = d;
  4264 
  4265           /* The cost of testing 'translate' is comparatively small.  */
  4266           if (target_multibyte)
  4267             do
  4268               {
  4269                 int pat_charlen, buf_charlen;
  4270                 int pat_ch, buf_ch;
  4271 
  4272                 PREFETCH (d = dfail);
  4273                 if (multibyte)
  4274                   pat_ch = string_char_and_length (p, &pat_charlen);
  4275                 else
  4276                   {
  4277                     pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
  4278                     pat_charlen = 1;
  4279                   }
  4280                 buf_ch = string_char_and_length (d, &buf_charlen);
  4281 
  4282                 if (TRANSLATE (buf_ch) != pat_ch)
  4283                   {
  4284                     d = dfail;
  4285                     goto fail;
  4286                   }
  4287 
  4288                 p += pat_charlen;
  4289                 d += buf_charlen;
  4290                 mcnt -= pat_charlen;
  4291                 nchars++;
  4292               }
  4293             while (mcnt > 0);
  4294           else
  4295             do
  4296               {
  4297                 int pat_charlen;
  4298                 int pat_ch, buf_ch;
  4299 
  4300                 PREFETCH (d = dfail);
  4301                 if (multibyte)
  4302                   {
  4303                     pat_ch = string_char_and_length (p, &pat_charlen);
  4304                     pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
  4305                   }
  4306                 else
  4307                   {
  4308                     pat_ch = *p;
  4309                     pat_charlen = 1;
  4310                   }
  4311                 buf_ch = RE_CHAR_TO_MULTIBYTE (*d);
  4312                 if (! CHAR_BYTE8_P (buf_ch))
  4313                   {
  4314                     buf_ch = TRANSLATE (buf_ch);
  4315                     buf_ch = RE_CHAR_TO_UNIBYTE (buf_ch);
  4316                     if (buf_ch < 0)
  4317                       buf_ch = *d;
  4318                   }
  4319                 else
  4320                   buf_ch = *d;
  4321                 if (buf_ch != pat_ch)
  4322                   {
  4323                     d = dfail;
  4324                     goto fail;
  4325                   }
  4326                 p += pat_charlen;
  4327                 d++;
  4328                 mcnt -= pat_charlen;
  4329                 nchars++;
  4330               }
  4331             while (mcnt > 0);
  4332 
  4333           break;
  4334 
  4335 
  4336         /* Match any character except newline.  */
  4337         case anychar:
  4338           {
  4339             int buf_charlen;
  4340             int buf_ch;
  4341 
  4342             DEBUG_PRINT ("EXECUTING anychar.\n");
  4343 
  4344             PREFETCH ();
  4345             buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
  4346                                                 target_multibyte);
  4347             buf_ch = TRANSLATE (buf_ch);
  4348             if (buf_ch == '\n')
  4349               goto fail;
  4350 
  4351             DEBUG_PRINT ("  Matched \"%d\".\n", *d);
  4352             d += buf_charlen;
  4353             nchars++;
  4354           }
  4355           break;
  4356 
  4357 
  4358         case charset:
  4359         case charset_not:
  4360           {
  4361             /* Whether matching against a unibyte character.  */
  4362             bool unibyte_char = false;
  4363 
  4364             DEBUG_PRINT ("EXECUTING charset%s.\n",
  4365                          (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
  4366 
  4367             PREFETCH ();
  4368             int len;
  4369             int corig = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
  4370             int c = corig;
  4371             if (target_multibyte)
  4372               {
  4373                 int c1;
  4374 
  4375                 c = TRANSLATE (c);
  4376                 c1 = RE_CHAR_TO_UNIBYTE (c);
  4377                 if (c1 >= 0)
  4378                   {
  4379                     unibyte_char = true;
  4380                     c = c1;
  4381                   }
  4382               }
  4383             else
  4384               {
  4385                 int c1 = RE_CHAR_TO_MULTIBYTE (c);
  4386 
  4387                 if (! CHAR_BYTE8_P (c1))
  4388                   {
  4389                     c1 = TRANSLATE (c1);
  4390                     c1 = RE_CHAR_TO_UNIBYTE (c1);
  4391                     if (c1 >= 0)
  4392                       {
  4393                         unibyte_char = true;
  4394                         c = c1;
  4395                       }
  4396                   }
  4397                 else
  4398                   unibyte_char = true;
  4399               }
  4400 
  4401             p -= 1;
  4402             if (!execute_charset (&p, c, corig, unibyte_char, translate))
  4403               goto fail;
  4404 
  4405             d += len;
  4406             nchars++;
  4407           }
  4408           break;
  4409 
  4410 
  4411         /* The beginning of a group is represented by start_memory.
  4412            The argument is the register number.  The text
  4413            matched within the group is recorded (in the internal
  4414            registers data structure) under the register number.  */
  4415         case start_memory:
  4416           DEBUG_PRINT ("EXECUTING start_memory %d:\n", *p);
  4417           eassert (0 < *p && *p < num_regs);
  4418 
  4419           /* In case we need to undo this operation (via backtracking).  */
  4420           PUSH_FAILURE_REG (*p);
  4421 
  4422           regstart[*p] = d;
  4423           DEBUG_PRINT ("  regstart: %td\n", POINTER_TO_OFFSET (regstart[*p]));
  4424 
  4425           /* Move past the register number and inner group count.  */
  4426           p += 1;
  4427           break;
  4428 
  4429 
  4430         /* The stop_memory opcode represents the end of a group.  Its
  4431            argument is the same as start_memory's: the register number.  */
  4432         case stop_memory:
  4433           DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p);
  4434 
  4435           eassert (0 < *p && *p < num_regs);
  4436           eassert (!REG_UNSET (regstart[*p]));
  4437           /* Strictly speaking, there should be code such as:
  4438 
  4439                 eassert (REG_UNSET (regend[*p]));
  4440                 PUSH_FAILURE_REGSTOP (*p);
  4441 
  4442              But the only info to be pushed is regend[*p] and it is known to
  4443              be UNSET, so there really isn't anything to push.
  4444              Not pushing anything, on the other hand deprives us from the
  4445              guarantee that regend[*p] is UNSET since undoing this operation
  4446              will not reset its value properly.  This is not important since
  4447              the value will only be read on the next start_memory or at
  4448              the very end and both events can only happen if this stop_memory
  4449              is *not* undone.  */
  4450 
  4451           regend[*p] = d;
  4452           DEBUG_PRINT ("      regend: %td\n", POINTER_TO_OFFSET (regend[*p]));
  4453 
  4454           /* Move past the register number and the inner group count.  */
  4455           p += 1;
  4456           break;
  4457 
  4458 
  4459         /* \<digit> has been turned into a 'duplicate' command which is
  4460            followed by the numeric value of <digit> as the register number.  */
  4461         case duplicate:
  4462           {
  4463             re_char *d2, *dend2;
  4464             int regno = *p++;   /* Get which register to match against.  */
  4465             DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno);
  4466 
  4467             /* Can't back reference a group which we've never matched.  */
  4468             eassert (0 < regno && regno < num_regs);
  4469             eassert (REG_UNSET (regstart[regno]) <= REG_UNSET (regend[regno]));
  4470             if (REG_UNSET (regend[regno]))
  4471               goto fail;
  4472 
  4473             /* Where in input to try to start matching.  */
  4474             d2 = regstart[regno];
  4475 
  4476             /* Remember the start point to rollback upon failure.  */
  4477             dfail = d;
  4478 
  4479             /* Where to stop matching; if both the place to start and
  4480                the place to stop matching are in the same string, then
  4481                set to the place to stop, otherwise, for now have to use
  4482                the end of the first string.  */
  4483 
  4484             dend2 = ((FIRST_STRING_P (regstart[regno])
  4485                       == FIRST_STRING_P (regend[regno]))
  4486                      ? regend[regno] : end_match_1);
  4487             for (;;)
  4488               {
  4489                 ptrdiff_t dcnt;
  4490 
  4491                 /* If necessary, advance to next segment in register
  4492                    contents.  */
  4493                 while (d2 == dend2)
  4494                   {
  4495                     if (dend2 == end_match_2) break;
  4496                     if (dend2 == regend[regno]) break;
  4497 
  4498                     /* End of string1 => advance to string2. */
  4499                     d2 = string2;
  4500                     dend2 = regend[regno];
  4501                   }
  4502                 /* At end of register contents => success */
  4503                 if (d2 == dend2) break;
  4504 
  4505                 /* If necessary, advance to next segment in data.  */
  4506                 PREFETCH (d = dfail);
  4507 
  4508                 /* How many characters left in this segment to match.  */
  4509                 dcnt = dend - d;
  4510 
  4511                 /* Want how many consecutive characters we can match in
  4512                    one shot, so, if necessary, adjust the count.  */
  4513                 if (dcnt > dend2 - d2)
  4514                   dcnt = dend2 - d2;
  4515 
  4516                 /* Compare that many; failure if mismatch, else move
  4517                    past them.  */
  4518                 if (!NILP (translate)
  4519                     ? bcmp_translate (d, d2, dcnt, translate, target_multibyte)
  4520                     : memcmp (d, d2, dcnt))
  4521                   {
  4522                     d = dfail;
  4523                     goto fail;
  4524                   }
  4525                 d += dcnt, d2 += dcnt;
  4526                 nchars++;
  4527               }
  4528           }
  4529           break;
  4530 
  4531 
  4532         /* begline matches the empty string at the beginning of the string,
  4533            and after newlines.  */
  4534         case begline:
  4535           DEBUG_PRINT ("EXECUTING begline.\n");
  4536 
  4537           if (AT_STRINGS_BEG (d))
  4538             break;
  4539           else
  4540             {
  4541               unsigned c;
  4542               GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
  4543               if (c == '\n')
  4544                 break;
  4545             }
  4546           goto fail;
  4547 
  4548 
  4549         /* endline is the dual of begline.  */
  4550         case endline:
  4551           DEBUG_PRINT ("EXECUTING endline.\n");
  4552 
  4553           if (AT_STRINGS_END (d))
  4554             break;
  4555           PREFETCH_NOLIMIT ();
  4556           if (*d == '\n')
  4557             break;
  4558           goto fail;
  4559 
  4560 
  4561         /* Match at the very beginning of the data.  */
  4562         case begbuf:
  4563           DEBUG_PRINT ("EXECUTING begbuf.\n");
  4564           if (AT_STRINGS_BEG (d))
  4565             break;
  4566           goto fail;
  4567 
  4568 
  4569         /* Match at the very end of the data.  */
  4570         case endbuf:
  4571           DEBUG_PRINT ("EXECUTING endbuf.\n");
  4572           if (AT_STRINGS_END (d))
  4573             break;
  4574           goto fail;
  4575 
  4576 
  4577         /* on_failure_keep_string_jump is used to optimize '.*\n'.  It
  4578            pushes NULL as the value for the string on the stack.  Then
  4579            'POP_FAILURE_POINT' will keep the current value for the
  4580            string, instead of restoring it.  To see why, consider
  4581            matching 'foo\nbar' against '.*\n'.  The .* matches the foo;
  4582            then the . fails against the \n.  But the next thing we want
  4583            to do is match the \n against the \n; if we restored the
  4584            string value, we would be back at the foo.
  4585 
  4586            Because this is used only in specific cases, we don't need to
  4587            check all the things that 'on_failure_jump' does, to make
  4588            sure the right things get saved on the stack.  Hence we don't
  4589            share its code.  The only reason to push anything on the
  4590            stack at all is that otherwise we would have to change
  4591            'anychar's code to do something besides goto fail in this
  4592            case; that seems worse than this.  */
  4593         case on_failure_keep_string_jump:
  4594           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4595           DEBUG_PRINT ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
  4596                        mcnt, p + mcnt);
  4597 
  4598           PUSH_FAILURE_POINT (p - 3, NULL);
  4599           break;
  4600 
  4601           /* A nasty loop is introduced by the non-greedy *? and +?.
  4602              With such loops, the stack only ever contains one failure point
  4603              at a time, so that a plain on_failure_jump_loop kind of
  4604              cycle detection cannot work.  Worse yet, such a detection
  4605              can not only fail to detect a cycle, but it can also wrongly
  4606              detect a cycle (between different instantiations of the same
  4607              loop).
  4608              So the method used for those nasty loops is a little different:
  4609              We use a special cycle-detection-stack-frame which is pushed
  4610              when the on_failure_jump_nastyloop failure-point is *popped*.
  4611              This special frame thus marks the beginning of one iteration
  4612              through the loop and we can hence easily check right here
  4613              whether something matched between the beginning and the end of
  4614              the loop.  */
  4615         case on_failure_jump_nastyloop:
  4616           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4617           DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
  4618                        mcnt, p + mcnt);
  4619 
  4620           eassert ((re_opcode_t)p[-4] == no_op);
  4621           {
  4622             bool cycle = false;
  4623             CHECK_INFINITE_LOOP (p - 4, d);
  4624             if (!cycle)
  4625               /* If there's a cycle, just continue without pushing
  4626                  this failure point.  The failure point is the "try again"
  4627                  option, which shouldn't be tried.
  4628                  We want (x?)*?y\1z to match both xxyz and xxyxz.  */
  4629               PUSH_FAILURE_POINT (p - 3, d);
  4630           }
  4631           break;
  4632 
  4633           /* Simple loop detecting on_failure_jump:  just check on the
  4634              failure stack if the same spot was already hit earlier.  */
  4635         case on_failure_jump_loop:
  4636         on_failure:
  4637           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4638           DEBUG_PRINT ("EXECUTING on_failure_jump_loop %d (to %p):\n",
  4639                        mcnt, p + mcnt);
  4640           {
  4641             bool cycle = false;
  4642             CHECK_INFINITE_LOOP (p - 3, d);
  4643             if (cycle)
  4644               /* If there's a cycle, get out of the loop, as if the matching
  4645                  had failed.  We used to just 'goto fail' here, but that was
  4646                  aborting the search a bit too early: we want to keep the
  4647                  empty-loop-match and keep matching after the loop.
  4648                  We want (x?)*y\1z to match both xxyz and xxyxz.  */
  4649               p += mcnt;
  4650             else
  4651               PUSH_FAILURE_POINT (p - 3, d);
  4652           }
  4653           break;
  4654 
  4655 
  4656         /* Uses of on_failure_jump:
  4657 
  4658            Each alternative starts with an on_failure_jump that points
  4659            to the beginning of the next alternative.  Each alternative
  4660            except the last ends with a jump that in effect jumps past
  4661            the rest of the alternatives.  (They really jump to the
  4662            ending jump of the following alternative, because tensioning
  4663            these jumps is a hassle.)
  4664 
  4665            Repeats start with an on_failure_jump that points past both
  4666            the repetition text and either the following jump or
  4667            pop_failure_jump back to this on_failure_jump.  */
  4668         case on_failure_jump:
  4669           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4670           DEBUG_PRINT ("EXECUTING on_failure_jump %d (to %p):\n",
  4671                        mcnt, p + mcnt);
  4672 
  4673           PUSH_FAILURE_POINT (p -3, d);
  4674           break;
  4675 
  4676         /* This operation is used for greedy *.
  4677            Compare the beginning of the repeat with what in the
  4678            pattern follows its end. If we can establish that there
  4679            is nothing that they would both match, i.e., that we
  4680            would have to backtrack because of (as in, e.g., 'a*a')
  4681            then we can use a non-backtracking loop based on
  4682            on_failure_keep_string_jump instead of on_failure_jump.  */
  4683         case on_failure_jump_smart:
  4684           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4685           DEBUG_PRINT ("EXECUTING on_failure_jump_smart %d (to %p).\n",
  4686                        mcnt, p + mcnt);
  4687           {
  4688             re_char *p1 = p; /* Next operation.  */
  4689             /* Discard 'const', making re_search non-reentrant.  */
  4690             unsigned char *p2 = (unsigned char *) p + mcnt; /* Jump dest.  */
  4691             unsigned char *p3 = (unsigned char *) p - 3; /* opcode location.  */
  4692 
  4693             p -= 3;             /* Reset so that we will re-execute the
  4694                                    instruction once it's been changed. */
  4695 
  4696             EXTRACT_NUMBER (mcnt, p2 - 2);
  4697 
  4698             /* Ensure this is indeed the trivial kind of loop
  4699                we are expecting.  */
  4700             eassert (skip_one_char (p1) == p2 - 3);
  4701             eassert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
  4702             DEBUG_STATEMENT (regex_emacs_debug += 2);
  4703             if (mutually_exclusive_p (bufp, p1, p2))
  4704               {
  4705                 /* Use a fast 'on_failure_keep_string_jump' loop.  */
  4706                 DEBUG_PRINT ("  smart exclusive => fast loop.\n");
  4707                 *p3 = (unsigned char) on_failure_keep_string_jump;
  4708                 STORE_NUMBER (p2 - 2, mcnt + 3);
  4709               }
  4710             else
  4711               {
  4712                 /* Default to a safe 'on_failure_jump' loop.  */
  4713                 DEBUG_PRINT ("  smart default => slow loop.\n");
  4714                 *p3 = (unsigned char) on_failure_jump;
  4715               }
  4716             DEBUG_STATEMENT (regex_emacs_debug -= 2);
  4717           }
  4718           break;
  4719 
  4720         /* Unconditionally jump (without popping any failure points).  */
  4721         case jump:
  4722         unconditional_jump:
  4723           maybe_quit ();
  4724           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
  4725           DEBUG_PRINT ("EXECUTING jump %d ", mcnt);
  4726           p += mcnt;                            /* Do the jump.  */
  4727           DEBUG_PRINT ("(to %p).\n", p);
  4728           break;
  4729 
  4730 
  4731         /* Have to succeed matching what follows at least n times.
  4732            After that, handle like 'on_failure_jump'.  */
  4733         case succeed_n:
  4734           /* Signedness doesn't matter since we only compare MCNT to 0.  */
  4735           EXTRACT_NUMBER (mcnt, p + 2);
  4736           DEBUG_PRINT ("EXECUTING succeed_n %d.\n", mcnt);
  4737 
  4738           /* Originally, mcnt is how many times we HAVE to succeed.  */
  4739           if (mcnt != 0)
  4740             {
  4741               /* Discard 'const', making re_search non-reentrant.  */
  4742               unsigned char *p2 = (unsigned char *) p + 2; /* counter loc.  */
  4743               mcnt--;
  4744               p += 4;
  4745               PUSH_NUMBER (p2, mcnt);
  4746             }
  4747           else
  4748             /* The two bytes encoding mcnt == 0 are two no_op opcodes.  */
  4749             goto on_failure;
  4750           break;
  4751 
  4752         case jump_n:
  4753           /* Signedness doesn't matter since we only compare MCNT to 0.  */
  4754           EXTRACT_NUMBER (mcnt, p + 2);
  4755           DEBUG_PRINT ("EXECUTING jump_n %d.\n", mcnt);
  4756 
  4757           /* Originally, this is how many times we CAN jump.  */
  4758           if (mcnt != 0)
  4759             {
  4760               /* Discard 'const', making re_search non-reentrant.  */
  4761               unsigned char *p2 = (unsigned char *) p + 2; /* counter loc.  */
  4762               mcnt--;
  4763               PUSH_NUMBER (p2, mcnt);
  4764               goto unconditional_jump;
  4765             }
  4766           /* If don't have to jump any more, skip over the rest of command.  */
  4767           else
  4768             p += 4;
  4769           break;
  4770 
  4771         case set_number_at:
  4772           {
  4773             unsigned char *p2;  /* Location of the counter.  */
  4774             DEBUG_PRINT ("EXECUTING set_number_at.\n");
  4775 
  4776             EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4777             /* Discard 'const', making re_search non-reentrant.  */
  4778             p2 = (unsigned char *) p + mcnt;
  4779             /* Signedness doesn't matter since we only copy MCNT's bits.  */
  4780             EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4781             DEBUG_PRINT ("  Setting %p to %d.\n", p2, mcnt);
  4782             PUSH_NUMBER (p2, mcnt);
  4783             break;
  4784           }
  4785 
  4786         case wordbound:
  4787         case notwordbound:
  4788           {
  4789             bool not = (re_opcode_t) *(p - 1) == notwordbound;
  4790             DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : "");
  4791 
  4792             /* We SUCCEED (or FAIL) in one of the following cases: */
  4793 
  4794             /* Case 1: D is at the beginning or the end of string.  */
  4795             if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
  4796               not = !not;
  4797             else
  4798               {
  4799                 /* C1 is the character before D, S1 is the syntax of C1, C2
  4800                    is the character at D, and S2 is the syntax of C2.  */
  4801                 int c1, c2;
  4802                 int s1, s2;
  4803                 int dummy;
  4804                 ptrdiff_t offset = POINTER_TO_OFFSET (d);
  4805                 ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
  4806                 UPDATE_SYNTAX_TABLE (charpos);
  4807                 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4808                 nchars++;
  4809                 s1 = SYNTAX (c1);
  4810                 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  4811                 PREFETCH_NOLIMIT ();
  4812                 GET_CHAR_AFTER (c2, d, dummy);
  4813                 nchars++;
  4814                 s2 = SYNTAX (c2);
  4815 
  4816                 if (/* Case 2: Only one of S1 and S2 is Sword.  */
  4817                     ((s1 == Sword) != (s2 == Sword))
  4818                     /* Case 3: Both of S1 and S2 are Sword, and macro
  4819                        WORD_BOUNDARY_P (C1, C2) returns nonzero.  */
  4820                     || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
  4821                   not = !not;
  4822               }
  4823             if (not)
  4824               break;
  4825             else
  4826               goto fail;
  4827           }
  4828 
  4829         case wordbeg:
  4830           DEBUG_PRINT ("EXECUTING wordbeg.\n");
  4831 
  4832           /* We FAIL in one of the following cases: */
  4833 
  4834           /* Case 1: D is at the end of string.  */
  4835           if (AT_STRINGS_END (d))
  4836             goto fail;
  4837           else
  4838             {
  4839               /* C1 is the character before D, S1 is the syntax of C1, C2
  4840                  is the character at D, and S2 is the syntax of C2.  */
  4841               int c1, c2;
  4842               int s1, s2;
  4843               int dummy;
  4844               ptrdiff_t offset = POINTER_TO_OFFSET (d);
  4845               ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
  4846               UPDATE_SYNTAX_TABLE (charpos);
  4847               PREFETCH ();
  4848               GET_CHAR_AFTER (c2, d, dummy);
  4849               nchars++;
  4850               s2 = SYNTAX (c2);
  4851 
  4852               /* Case 2: S2 is not Sword. */
  4853               if (s2 != Sword)
  4854                 goto fail;
  4855 
  4856               /* Case 3: D is not at the beginning of string ... */
  4857               if (!AT_STRINGS_BEG (d))
  4858                 {
  4859                   GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4860                   nchars++;
  4861                   UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
  4862                   s1 = SYNTAX (c1);
  4863 
  4864                   /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
  4865                      returns 0.  */
  4866                   if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
  4867                     goto fail;
  4868                 }
  4869             }
  4870           break;
  4871 
  4872         case wordend:
  4873           DEBUG_PRINT ("EXECUTING wordend.\n");
  4874 
  4875           /* We FAIL in one of the following cases: */
  4876 
  4877           /* Case 1: D is at the beginning of string.  */
  4878           if (AT_STRINGS_BEG (d))
  4879             goto fail;
  4880           else
  4881             {
  4882               /* C1 is the character before D, S1 is the syntax of C1, C2
  4883                  is the character at D, and S2 is the syntax of C2.  */
  4884               int c1, c2;
  4885               int s1, s2;
  4886               int dummy;
  4887               ptrdiff_t offset = POINTER_TO_OFFSET (d);
  4888               ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
  4889               UPDATE_SYNTAX_TABLE (charpos);
  4890               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4891               nchars++;
  4892               s1 = SYNTAX (c1);
  4893 
  4894               /* Case 2: S1 is not Sword.  */
  4895               if (s1 != Sword)
  4896                 goto fail;
  4897 
  4898               /* Case 3: D is not at the end of string ... */
  4899               if (!AT_STRINGS_END (d))
  4900                 {
  4901                   PREFETCH_NOLIMIT ();
  4902                   GET_CHAR_AFTER (c2, d, dummy);
  4903                   nchars++;
  4904                   UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  4905                   s2 = SYNTAX (c2);
  4906 
  4907                   /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
  4908                      returns 0.  */
  4909                   if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
  4910           goto fail;
  4911                 }
  4912             }
  4913           break;
  4914 
  4915         case symbeg:
  4916           DEBUG_PRINT ("EXECUTING symbeg.\n");
  4917 
  4918           /* We FAIL in one of the following cases: */
  4919 
  4920           /* Case 1: D is at the end of string.  */
  4921           if (AT_STRINGS_END (d))
  4922             goto fail;
  4923           else
  4924             {
  4925               /* C1 is the character before D, S1 is the syntax of C1, C2
  4926                  is the character at D, and S2 is the syntax of C2.  */
  4927               int c1, c2;
  4928               int s1, s2;
  4929               ptrdiff_t offset = POINTER_TO_OFFSET (d);
  4930               ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
  4931               UPDATE_SYNTAX_TABLE (charpos);
  4932               PREFETCH ();
  4933               c2 = RE_STRING_CHAR (d, target_multibyte);
  4934               nchars++;
  4935               s2 = SYNTAX (c2);
  4936 
  4937               /* Case 2: S2 is neither Sword nor Ssymbol. */
  4938               if (s2 != Sword && s2 != Ssymbol)
  4939                 goto fail;
  4940 
  4941               /* Case 3: D is not at the beginning of string ... */
  4942               if (!AT_STRINGS_BEG (d))
  4943                 {
  4944                   GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4945                   nchars++;
  4946                   UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
  4947                   s1 = SYNTAX (c1);
  4948 
  4949                   /* ... and S1 is Sword or Ssymbol.  */
  4950                   if (s1 == Sword || s1 == Ssymbol)
  4951                     goto fail;
  4952                 }
  4953             }
  4954           break;
  4955 
  4956         case symend:
  4957           DEBUG_PRINT ("EXECUTING symend.\n");
  4958 
  4959           /* We FAIL in one of the following cases: */
  4960 
  4961           /* Case 1: D is at the beginning of string.  */
  4962           if (AT_STRINGS_BEG (d))
  4963             goto fail;
  4964           else
  4965             {
  4966               /* C1 is the character before D, S1 is the syntax of C1, C2
  4967                  is the character at D, and S2 is the syntax of C2.  */
  4968               int c1, c2;
  4969               int s1, s2;
  4970               ptrdiff_t offset = POINTER_TO_OFFSET (d);
  4971               ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
  4972               UPDATE_SYNTAX_TABLE (charpos);
  4973               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4974               nchars++;
  4975               s1 = SYNTAX (c1);
  4976 
  4977               /* Case 2: S1 is neither Ssymbol nor Sword.  */
  4978               if (s1 != Sword && s1 != Ssymbol)
  4979                 goto fail;
  4980 
  4981               /* Case 3: D is not at the end of string ... */
  4982               if (!AT_STRINGS_END (d))
  4983                 {
  4984                   PREFETCH_NOLIMIT ();
  4985                   c2 = RE_STRING_CHAR (d, target_multibyte);
  4986                   nchars++;
  4987                   UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  4988                   s2 = SYNTAX (c2);
  4989 
  4990                   /* ... and S2 is Sword or Ssymbol.  */
  4991                   if (s2 == Sword || s2 == Ssymbol)
  4992                     goto fail;
  4993                 }
  4994             }
  4995           break;
  4996 
  4997         case syntaxspec:
  4998         case notsyntaxspec:
  4999           {
  5000             bool not = (re_opcode_t) *(p - 1) == notsyntaxspec;
  5001             mcnt = *p++;
  5002             DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "",
  5003                          mcnt);
  5004             PREFETCH ();
  5005             {
  5006               ptrdiff_t offset = POINTER_TO_OFFSET (d);
  5007               ptrdiff_t pos1 = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
  5008               UPDATE_SYNTAX_TABLE (pos1);
  5009             }
  5010             {
  5011               int len;
  5012               int c;
  5013 
  5014               GET_CHAR_AFTER (c, d, len);
  5015               if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
  5016                 goto fail;
  5017               d += len;
  5018               nchars++;
  5019             }
  5020           }
  5021           break;
  5022 
  5023         case at_dot:
  5024           DEBUG_PRINT ("EXECUTING at_dot.\n");
  5025           if (PTR_BYTE_POS (d) != PT_BYTE)
  5026             goto fail;
  5027           break;
  5028 
  5029         case categoryspec:
  5030         case notcategoryspec:
  5031           {
  5032             bool not = (re_opcode_t) *(p - 1) == notcategoryspec;
  5033             mcnt = *p++;
  5034             DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n",
  5035                          not ? "not" : "", mcnt);
  5036             PREFETCH ();
  5037 
  5038             {
  5039               int len;
  5040               int c;
  5041               GET_CHAR_AFTER (c, d, len);
  5042               if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
  5043                 goto fail;
  5044               d += len;
  5045               nchars++;
  5046             }
  5047           }
  5048           break;
  5049 
  5050         default:
  5051           abort ();
  5052         }
  5053       continue;  /* Successfully executed one pattern command; keep going.  */
  5054 
  5055 
  5056     /* We goto here if a matching operation fails. */
  5057     fail:
  5058       maybe_quit ();
  5059       if (!FAIL_STACK_EMPTY ())
  5060         {
  5061           re_char *str, *pat;
  5062           /* A restart point is known.  Restore to that state.  */
  5063           DEBUG_PRINT ("\nFAIL:\n");
  5064           POP_FAILURE_POINT (str, pat);
  5065           switch (*pat++)
  5066             {
  5067             case on_failure_keep_string_jump:
  5068               eassert (str == NULL);
  5069               goto continue_failure_jump;
  5070 
  5071             case on_failure_jump_nastyloop:
  5072               eassert ((re_opcode_t)pat[-2] == no_op);
  5073               PUSH_FAILURE_POINT (pat - 2, str);
  5074               FALLTHROUGH;
  5075             case on_failure_jump_loop:
  5076             case on_failure_jump:
  5077             case succeed_n:
  5078               d = str;
  5079             continue_failure_jump:
  5080               EXTRACT_NUMBER_AND_INCR (mcnt, pat);
  5081               p = pat + mcnt;
  5082               break;
  5083 
  5084             case no_op:
  5085               /* A special frame used for nastyloops. */
  5086               goto fail;
  5087 
  5088             default:
  5089               abort ();
  5090             }
  5091 
  5092           eassert (p >= bufp->buffer && p <= pend);
  5093 
  5094           if (d >= string1 && d <= end1)
  5095             dend = end_match_1;
  5096         }
  5097       else
  5098         break;   /* Matching at this starting point really fails.  */
  5099     } /* for (;;) */
  5100 
  5101   if (best_regs_set)
  5102     goto restore_best_regs;
  5103 
  5104   unbind_to (count, Qnil);
  5105   SAFE_FREE ();
  5106 
  5107   if (max_redisplay_ticks > 0 && nchars > 0)
  5108     update_redisplay_ticks (nchars / 50 + 1, NULL);
  5109 
  5110   return -1;                            /* Failure to match.  */
  5111 }
  5112 
  5113 /* Subroutine definitions for re_match_2.  */
  5114 
  5115 /* Return true if TRANSLATE[S1] and TRANSLATE[S2] are not identical
  5116    for LEN bytes.  */
  5117 
  5118 static bool
  5119 bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len,
  5120                 Lisp_Object translate, bool target_multibyte)
  5121 {
  5122   re_char *p1 = s1, *p2 = s2;
  5123   re_char *p1_end = s1 + len;
  5124   re_char *p2_end = s2 + len;
  5125 
  5126   /* FIXME: Checking both p1 and p2 presumes that the two strings might have
  5127      different lengths, but relying on a single LEN would break this. -sm  */
  5128   while (p1 < p1_end && p2 < p2_end)
  5129     {
  5130       int p1_charlen, p2_charlen;
  5131       int p1_ch, p2_ch;
  5132 
  5133       GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
  5134       GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
  5135 
  5136       if (RE_TRANSLATE (translate, p1_ch)
  5137           != RE_TRANSLATE (translate, p2_ch))
  5138         return true;
  5139 
  5140       p1 += p1_charlen, p2 += p2_charlen;
  5141     }
  5142 
  5143   return p1 != p1_end || p2 != p2_end;
  5144 }
  5145 
  5146 /* Entry points for GNU code.  */
  5147 
  5148 /* re_compile_pattern is the GNU regular expression compiler: it
  5149    compiles PATTERN (of length SIZE) and puts the result in BUFP.
  5150    Returns 0 if the pattern was valid, otherwise an error string.
  5151 
  5152    Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
  5153    are set in BUFP on entry.
  5154 
  5155    We call regex_compile to do the actual compilation.  */
  5156 
  5157 const char *
  5158 re_compile_pattern (const char *pattern, ptrdiff_t length,
  5159                     bool posix_backtracking, const char *whitespace_regexp,
  5160                     struct re_pattern_buffer *bufp)
  5161 {
  5162   bufp->regs_allocated = REGS_UNALLOCATED;
  5163 
  5164   reg_errcode_t ret
  5165       = regex_compile ((re_char *) pattern, length,
  5166                        posix_backtracking,
  5167                        whitespace_regexp,
  5168                        bufp);
  5169 
  5170   if (!ret)
  5171     return NULL;
  5172   return re_error_msgid[ret];
  5173 }

/* [<][>][^][v][top][bottom][index][help] */