src/regex-emacs.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
extract_number
extract_number_and_incr
debug_putchar
print_fastmap
print_partial_compiled_pattern
print_compiled_pattern
print_double_string
re_wctype_parse
re_iswctype
re_wctype_to_bit
extend_range_table_work_area
regex_compile
store_op1
store_op2
insert_op1
insert_op2
at_begline_loc_p
at_endline_loc_p
group_in_compile_stack
analyze_first
re_compile_fastmap
re_set_registers
re_search
re_search_2
skip_one_char
skip_noops
execute_charset
mutually_exclusive_p
re_match_2
unwind_re_match
re_match_2_internal
bcmp_translate
re_compile_pattern
     1 /* Emacs regular expression matching and search
     2 
     3    Copyright (C) 1993-2023 Free Software Foundation, Inc.
     4 
     5    This program is free software; you can redistribute it and/or modify
     6    it under the terms of the GNU General Public License as published by
     7    the Free Software Foundation; either version 3, or (at your option)
     8    any later version.
     9 
    10    This program is distributed in the hope that it will be useful,
    11    but WITHOUT ANY WARRANTY; without even the implied warranty of
    12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    13    GNU General Public License for more details.
    14 
    15    You should have received a copy of the GNU General Public License
    16    along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
    17 
    18 /* TODO:
    19    - structure the opcode space into opcode+flag.
    20    - replace (succeed_n + jump_n + set_number_at) with something that doesn't
    21      need to modify the compiled regexp so that re_search can be reentrant.
    22    - get rid of on_failure_jump_smart by doing the optimization in re_comp
    23      rather than at run-time, so that re_search can be reentrant.
    24 */
    25 
    26 #include <config.h>
    27 
    28 #include "regex-emacs.h"
    29 
    30 #include <stdlib.h>
    31 
    32 #include "character.h"
    33 #include "buffer.h"
    34 #include "syntax.h"
    35 #include "category.h"
    36 #include "dispextern.h"
    37 
    38 /* Maximum number of duplicates an interval can allow.  Some systems
    39    define this in other header files, but we want our value, so remove
    40    any previous define.  Repeat counts are stored in opcodes as 2-byte
    41    unsigned integers.  */
    42 #ifdef RE_DUP_MAX
    43 # undef RE_DUP_MAX
    44 #endif
    45 #define RE_DUP_MAX (0xffff)
    46 
    47 /* Make syntax table lookup grant data in gl_state.  */
    48 #define SYNTAX(c) syntax_property (c, 1)
    49 
    50 /* Convert the pointer to the char to BEG-based offset from the start.  */
    51 #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
    52 /* Strings are 0-indexed, buffers are 1-indexed; pun on the boolean
    53    result to get the right base index.  */
    54 #define POS_AS_IN_BUFFER(p)                                    \
    55   ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
    56 
    57 #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
    58 #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
    59 #define RE_STRING_CHAR(p, multibyte) \
    60   (multibyte ? STRING_CHAR (p) : *(p))
    61 #define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
    62   (multibyte ? string_char_and_length (p, &(len)) : ((len) = 1, *(p)))
    63 
    64 #define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
    65 
    66 #define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
    67 
    68 /* Set C a (possibly converted to multibyte) character before P.  P
    69    points into a string which is the virtual concatenation of STR1
    70    (which ends at END1) or STR2 (which ends at END2).  */
    71 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)                      \
    72   do {                                                                       \
    73     if (target_multibyte)                                                    \
    74       {                                                                      \
    75         re_char *dtemp = (p) == (str2) ? (end1) : (p);                       \
    76         re_char *dlimit = (p) > (str2) && (p) <= (end2) ? (str2) : (str1);   \
    77         while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp))                    \
    78           continue;                                                          \
    79         c = STRING_CHAR (dtemp);                                             \
    80       }                                                                      \
    81     else                                                                     \
    82       {                                                                      \
    83         (c = ((p) == (str2) ? (end1) : (p))[-1]);                            \
    84         (c) = RE_CHAR_TO_MULTIBYTE (c);                                      \
    85       }                                                                      \
    86   } while (false)
    87 
    88 /* Set C a (possibly converted to multibyte) character at P, and set
    89    LEN to the byte length of that character.  */
    90 #define GET_CHAR_AFTER(c, p, len)               \
    91   do {                                          \
    92     if (target_multibyte)                       \
    93       (c) = string_char_and_length (p, &(len)); \
    94     else                                        \
    95       {                                         \
    96         (c) = *p;                               \
    97         len = 1;                                \
    98         (c) = RE_CHAR_TO_MULTIBYTE (c);         \
    99       }                                         \
   100    } while (false)
   101 
   102 /* 1 if C is an ASCII character.  */
   103 #define IS_REAL_ASCII(c) ((c) < 0200)
   104 
   105 /* 1 if C is a unibyte character.  */
   106 #define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
   107 
   108 /* The Emacs definitions should not be directly affected by locales.  */
   109 
   110 /* In Emacs, these are only used for single-byte characters.  */
   111 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
   112 #define ISCNTRL(c) ((c) < ' ')
   113 #define ISXDIGIT(c) (0 <= char_hexdigit (c))
   114 
   115 /* The rest must handle multibyte characters.  */
   116 
   117 #define ISBLANK(c) (IS_REAL_ASCII (c)                   \
   118                      ? ((c) == ' ' || (c) == '\t')      \
   119                      : blankp (c))
   120 
   121 #define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)                              \
   122                      ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240)       \
   123                      : graphicp (c))
   124 
   125 #define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)                              \
   126                     ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)       \
   127                      : printablep (c))
   128 
   129 #define ISALNUM(c) (IS_REAL_ASCII (c)                   \
   130                     ? (((c) >= 'a' && (c) <= 'z')       \
   131                        || ((c) >= 'A' && (c) <= 'Z')    \
   132                        || ((c) >= '0' && (c) <= '9'))   \
   133                     : alphanumericp (c))
   134 
   135 #define ISALPHA(c) (IS_REAL_ASCII (c)                   \
   136                     ? (((c) >= 'a' && (c) <= 'z')       \
   137                        || ((c) >= 'A' && (c) <= 'Z'))   \
   138                     : alphabeticp (c))
   139 
   140 #define ISLOWER(c) lowercasep (c)
   141 
   142 #define ISPUNCT(c) (IS_REAL_ASCII (c)                           \
   143                     ? ((c) > ' ' && (c) < 0177                  \
   144                        && !(((c) >= 'a' && (c) <= 'z')          \
   145                             || ((c) >= 'A' && (c) <= 'Z')       \
   146                             || ((c) >= '0' && (c) <= '9')))     \
   147                     : SYNTAX (c) != Sword)
   148 
   149 #define ISSPACE(c) (SYNTAX (c) == Swhitespace)
   150 
   151 #define ISUPPER(c) uppercasep (c)
   152 
   153 #define ISWORD(c) (SYNTAX (c) == Sword)
   154 
   155 /* Use alloca instead of malloc.  This is because using malloc in
   156    re_search* or re_match* could cause memory leaks when C-g is used
   157    in Emacs (note that SAFE_ALLOCA could also call malloc, but does so
   158    via 'record_xmalloc' which uses 'unwind_protect' to ensure the
   159    memory is freed even in case of non-local exits); also, malloc is
   160    slower and causes storage fragmentation.  On the other hand, malloc
   161    is more portable, and easier to debug.
   162 
   163    Because we sometimes use alloca, some routines have to be macros,
   164    not functions -- 'alloca'-allocated space disappears at the end of the
   165    function it is called in.  */
   166 
   167 /* This may be adjusted in main(), if the stack is successfully grown.  */
   168 ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
   169 /* Like USE_SAFE_ALLOCA, but use emacs_re_safe_alloca.  */
   170 #define REGEX_USE_SAFE_ALLOCA                                          \
   171   USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca
   172 
   173 /* Assumes a 'char *destination' variable.  */
   174 #define REGEX_REALLOCATE(source, osize, nsize)                          \
   175   (destination = SAFE_ALLOCA (nsize),                                   \
   176    memcpy (destination, source, osize))
   177 
   178 /* True if 'size1' is non-NULL and PTR is pointing anywhere inside
   179    'string1' or just past its end.  This works if PTR is NULL, which is
   180    a good thing.  */
   181 #define FIRST_STRING_P(ptr)                                     \
   182   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
   183 
   184 #define BYTEWIDTH 8 /* In bits.  */
   185 
   186 /* Type of source-pattern and string chars.  */
   187 typedef const unsigned char re_char;
   188 
   189 static void re_compile_fastmap (struct re_pattern_buffer *);
   190 static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp,
   191                                      re_char *string1, ptrdiff_t size1,
   192                                      re_char *string2, ptrdiff_t size2,
   193                                      ptrdiff_t pos,
   194                                      struct re_registers *regs,
   195                                      ptrdiff_t stop);
   196 
   197 /* These are the command codes that appear in compiled regular
   198    expressions.  Some opcodes are followed by argument bytes.  A
   199    command code can specify any interpretation whatsoever for its
   200    arguments.  Zero bytes may appear in the compiled regular expression.  */
   201 
   202 typedef enum
   203 {
   204   no_op = 0,
   205 
   206   /* Succeed right away--no more backtracking.  */
   207   succeed,
   208 
   209         /* Followed by one byte giving n, then by n literal bytes.  */
   210   exactn,
   211 
   212         /* Matches any (more or less) character.  */
   213   anychar,
   214 
   215         /* Matches any one char belonging to specified set.  First
   216            following byte is number of bitmap bytes.  Then come bytes
   217            for a bitmap saying which chars are in.  Bits in each byte
   218            are ordered low-bit-first.  A character is in the set if its
   219            bit is 1.  A character too large to have a bit in the map is
   220            automatically not in the set.
   221 
   222            If the length byte has the 0x80 bit set, then that stuff
   223            is followed by a range table:
   224                2 bytes of flags for character sets (low 8 bits, high 8 bits)
   225                    See RANGE_TABLE_WORK_BITS below.
   226                2 bytes, the number of pairs that follow (up to 32767)
   227                pairs, each 2 multibyte characters,
   228                    each multibyte character represented as 3 bytes.  */
   229   charset,
   230 
   231         /* Same parameters as charset, but match any character that is
   232            not one of those specified.  */
   233   charset_not,
   234 
   235         /* Start remembering the text that is matched, for storing in a
   236            register.  Followed by one byte with the register number, in
   237            the range 0 to one less than the pattern buffer's re_nsub
   238            field.  */
   239   start_memory,
   240 
   241         /* Stop remembering the text that is matched and store it in a
   242            memory register.  Followed by one byte with the register
   243            number, in the range 0 to one less than 're_nsub' in the
   244            pattern buffer.  */
   245   stop_memory,
   246 
   247         /* Match a duplicate of something remembered. Followed by one
   248            byte containing the register number.  */
   249   duplicate,
   250 
   251         /* Fail unless at beginning of line.  */
   252   begline,
   253 
   254         /* Fail unless at end of line.  */
   255   endline,
   256 
   257         /* Succeeds if at beginning of buffer.  */
   258   begbuf,
   259 
   260         /* Analogously, for end of buffer/string.  */
   261   endbuf,
   262 
   263         /* Followed by two byte relative address to which to jump.  */
   264   jump,
   265 
   266         /* Followed by two-byte relative address of place to resume at
   267            in case of failure.  */
   268   on_failure_jump,
   269 
   270         /* Like on_failure_jump, but pushes a placeholder instead of the
   271            current string position when executed.  */
   272   on_failure_keep_string_jump,
   273 
   274         /* Just like 'on_failure_jump', except that it checks that we
   275            don't get stuck in an infinite loop (matching an empty string
   276            indefinitely).  */
   277   on_failure_jump_loop,
   278 
   279         /* Just like 'on_failure_jump_loop', except that it checks for
   280            a different kind of loop (the kind that shows up with non-greedy
   281            operators).  This operation has to be immediately preceded
   282            by a 'no_op'.  */
   283   on_failure_jump_nastyloop,
   284 
   285         /* A smart 'on_failure_jump' used for greedy * and + operators.
   286            It analyzes the loop before which it is put and if the
   287            loop does not require backtracking, it changes itself to
   288            'on_failure_keep_string_jump' and short-circuits the loop,
   289            else it just defaults to changing itself into 'on_failure_jump'.
   290            It assumes that it is pointing to just past a 'jump'.  */
   291   on_failure_jump_smart,
   292 
   293         /* Followed by two-byte relative address and two-byte number n.
   294            After matching N times, jump to the address upon failure.
   295            Does not work if N starts at 0: use on_failure_jump_loop
   296            instead.  */
   297   succeed_n,
   298 
   299         /* Followed by two-byte relative address, and two-byte number n.
   300            Jump to the address N times, then fail.  */
   301   jump_n,
   302 
   303         /* Set the following two-byte relative address to the
   304            subsequent two-byte number.  The address *includes* the two
   305            bytes of number.  */
   306   set_number_at,
   307 
   308   wordbeg,      /* Succeeds if at word beginning.  */
   309   wordend,      /* Succeeds if at word end.  */
   310 
   311   wordbound,    /* Succeeds if at a word boundary.  */
   312   notwordbound, /* Succeeds if not at a word boundary.  */
   313 
   314   symbeg,       /* Succeeds if at symbol beginning.  */
   315   symend,       /* Succeeds if at symbol end.  */
   316 
   317         /* Matches any character whose syntax is specified.  Followed by
   318            a byte which contains a syntax code, e.g., Sword.  */
   319   syntaxspec,
   320 
   321         /* Matches any character whose syntax is not that specified.  */
   322   notsyntaxspec,
   323 
   324   at_dot,       /* Succeeds if at point.  */
   325 
   326   /* Matches any character whose category-set contains the specified
   327      category.  The operator is followed by a byte which contains a
   328      category code (mnemonic ASCII character).  */
   329   categoryspec,
   330 
   331   /* Matches any character whose category-set does not contain the
   332      specified category.  The operator is followed by a byte which
   333      contains the category code (mnemonic ASCII character).  */
   334   notcategoryspec
   335 } re_opcode_t;
   336 
   337 /* Common operations on the compiled pattern.  */
   338 
   339 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
   340 
   341 #define STORE_NUMBER(destination, number)                               \
   342   do {                                                                  \
   343     (destination)[0] = (number) & 0377;                                 \
   344     (destination)[1] = (number) >> 8;                                   \
   345   } while (false)
   346 
   347 /* Same as STORE_NUMBER, except increment DESTINATION to
   348    the byte after where the number is stored.  Therefore, DESTINATION
   349    must be an lvalue.  */
   350 
   351 #define STORE_NUMBER_AND_INCR(destination, number)                      \
   352   do {                                                                  \
   353     STORE_NUMBER (destination, number);                                 \
   354     (destination) += 2;                                                 \
   355   } while (false)
   356 
   357 /* Put into DESTINATION a number stored in two contiguous bytes starting
   358    at SOURCE.  */
   359 
   360 #define EXTRACT_NUMBER(destination, source)                             \
   361   ((destination) = extract_number (source))
   362 
   363 static int
   364 extract_number (re_char *source)
   365 {
   366   signed char leading_byte = source[1];
   367   return leading_byte * 256 + source[0];
   368 }
   369 
   370 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
   371    SOURCE must be an lvalue.  */
   372 
   373 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
   374   ((destination) = extract_number_and_incr (&source))
   375 
   376 static int
   377 extract_number_and_incr (re_char **source)
   378 {
   379   int num = extract_number (*source);
   380   *source += 2;
   381   return num;
   382 }
   383 
   384 /* Store a multibyte character in three contiguous bytes starting
   385    DESTINATION, and increment DESTINATION to the byte after where the
   386    character is stored.  Therefore, DESTINATION must be an lvalue.  */
   387 
   388 #define STORE_CHARACTER_AND_INCR(destination, character)        \
   389   do {                                                          \
   390     (destination)[0] = (character) & 0377;                      \
   391     (destination)[1] = ((character) >> 8) & 0377;               \
   392     (destination)[2] = (character) >> 16;                       \
   393     (destination) += 3;                                         \
   394   } while (false)
   395 
   396 /* Put into DESTINATION a character stored in three contiguous bytes
   397    starting at SOURCE.  */
   398 
   399 #define EXTRACT_CHARACTER(destination, source)  \
   400   do {                                          \
   401     (destination) = ((source)[0]                \
   402                      | ((source)[1] << 8)       \
   403                      | ((source)[2] << 16));    \
   404   } while (false)
   405 
   406 
   407 /* Macros for charset. */
   408 
   409 /* Size of bitmap of charset P in bytes.  P is a start of charset,
   410    i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
   411 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
   412 
   413 /* Nonzero if charset P has range table.  */
   414 #define CHARSET_RANGE_TABLE_EXISTS_P(p)  (((p)[1] & 0x80) != 0)
   415 
   416 /* Return the address of range table of charset P.  But not the start
   417    of table itself, but the before where the number of ranges is
   418    stored.  '2 +' means to skip re_opcode_t and size of bitmap,
   419    and the 2 bytes of flags at the start of the range table.  */
   420 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
   421 
   422 /* Extract the bit flags that start a range table.  */
   423 #define CHARSET_RANGE_TABLE_BITS(p)             \
   424   ((p)[2 + CHARSET_BITMAP_SIZE (p)]             \
   425    + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
   426 
   427 /* Return the address of end of RANGE_TABLE.  COUNT is number of
   428    ranges (which is a pair of (start, end)) in the RANGE_TABLE.  '* 2'
   429    is start of range and end of range.  '* 3' is size of each start
   430    and end.  */
   431 #define CHARSET_RANGE_TABLE_END(range_table, count)     \
   432   ((range_table) + (count) * 2 * 3)
   433 
   434 /* If REGEX_EMACS_DEBUG is defined, print many voluminous messages
   435    (if the variable regex_emacs_debug is positive).  */
   436 
   437 #ifdef REGEX_EMACS_DEBUG
   438 
   439 /* Use standard I/O for debugging.  */
   440 # include "sysstdio.h"
   441 
   442 static int regex_emacs_debug = -100000;
   443 
   444 # define DEBUG_STATEMENT(e) e
   445 # define DEBUG_PRINT(...)                                       \
   446   if (regex_emacs_debug > 0) fprintf (stderr, __VA_ARGS__)
   447 # define DEBUG_COMPILES_ARGUMENTS
   448 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                          \
   449   if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
   450 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                 \
   451   if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
   452 
   453 static void
   454 debug_putchar (int c)
   455 {
   456   if (c >= 32 && c <= 126)
   457     putc (c, stderr);
   458   else
   459     {
   460       unsigned int uc = c;
   461       fprintf (stderr, "{%02x}", uc);
   462     }
   463 }
   464 
   465 /* Print the fastmap in human-readable form.  */
   466 
   467 static void
   468 print_fastmap (char *fastmap)
   469 {
   470   bool was_a_range = false;
   471   int i = 0;
   472 
   473   while (i < (1 << BYTEWIDTH))
   474     {
   475       if (fastmap[i++])
   476         {
   477           was_a_range = false;
   478           debug_putchar (i - 1);
   479           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
   480             {
   481               was_a_range = true;
   482               i++;
   483             }
   484           if (was_a_range)
   485             {
   486               debug_putchar ('-');
   487               debug_putchar (i - 1);
   488             }
   489         }
   490     }
   491   putc ('\n', stderr);
   492 }
   493 
   494 
   495 /* Print a compiled pattern string in human-readable form, starting at
   496    the START pointer into it and ending just before the pointer END.  */
   497 
   498 static void
   499 print_partial_compiled_pattern (re_char *start, re_char *end)
   500 {
   501   int mcnt, mcnt2;
   502   re_char *p = start;
   503   re_char *pend = end;
   504 
   505   if (start == NULL)
   506     {
   507       fputs ("(null)\n", stderr);
   508       return;
   509     }
   510 
   511   /* Loop over pattern commands.  */
   512   while (p < pend)
   513     {
   514       fprintf (stderr, "%td:\t", p - start);
   515 
   516       switch ((re_opcode_t) *p++)
   517         {
   518         case no_op:
   519           fputs ("/no_op", stderr);
   520           break;
   521 
   522         case succeed:
   523           fputs ("/succeed", stderr);
   524           break;
   525 
   526         case exactn:
   527           mcnt = *p++;
   528           fprintf (stderr, "/exactn/%d", mcnt);
   529           do
   530             {
   531               debug_putchar ('/');
   532               debug_putchar (*p++);
   533             }
   534           while (--mcnt);
   535           break;
   536 
   537         case start_memory:
   538           fprintf (stderr, "/start_memory/%d", *p++);
   539           break;
   540 
   541         case stop_memory:
   542           fprintf (stderr, "/stop_memory/%d", *p++);
   543           break;
   544 
   545         case duplicate:
   546           fprintf (stderr, "/duplicate/%d", *p++);
   547           break;
   548 
   549         case anychar:
   550           fputs ("/anychar", stderr);
   551           break;
   552 
   553         case charset:
   554         case charset_not:
   555           {
   556             int c, last = -100;
   557             bool in_range = false;
   558             int length = CHARSET_BITMAP_SIZE (p - 1);
   559             bool has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
   560 
   561             fprintf (stderr, "/charset [%s",
   562                      (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
   563 
   564             if (p + *p >= pend)
   565               fputs (" !extends past end of pattern! ", stderr);
   566 
   567             for (c = 0; c < 256; c++)
   568               if (c / 8 < length
   569                   && (p[1 + (c/8)] & (1 << (c % 8))))
   570                 {
   571                   /* Are we starting a range?  */
   572                   if (last + 1 == c && ! in_range)
   573                     {
   574                       debug_putchar ('-');
   575                       in_range = true;
   576                     }
   577                   /* Have we broken a range?  */
   578                   else if (last + 1 != c && in_range)
   579                     {
   580                       debug_putchar (last);
   581                       in_range = false;
   582                     }
   583 
   584                   if (! in_range)
   585                     debug_putchar (c);
   586 
   587                   last = c;
   588               }
   589 
   590             if (in_range)
   591               debug_putchar (last);
   592 
   593             debug_putchar (']');
   594 
   595             p += 1 + length;
   596 
   597             if (has_range_table)
   598               {
   599                 int count;
   600                 fputs ("has-range-table", stderr);
   601 
   602                 /* ??? Should print the range table; for now, just skip it.  */
   603                 p += 2;         /* skip range table bits */
   604                 EXTRACT_NUMBER_AND_INCR (count, p);
   605                 p = CHARSET_RANGE_TABLE_END (p, count);
   606               }
   607           }
   608           break;
   609 
   610         case begline:
   611           fputs ("/begline", stderr);
   612           break;
   613 
   614         case endline:
   615           fputs ("/endline", stderr);
   616           break;
   617 
   618         case on_failure_jump:
   619           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   620           fprintf (stderr, "/on_failure_jump to %td", p + mcnt - start);
   621           break;
   622 
   623         case on_failure_keep_string_jump:
   624           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   625           fprintf (stderr, "/on_failure_keep_string_jump to %td",
   626                    p + mcnt - start);
   627           break;
   628 
   629         case on_failure_jump_nastyloop:
   630           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   631           fprintf (stderr, "/on_failure_jump_nastyloop to %td",
   632                    p + mcnt - start);
   633           break;
   634 
   635         case on_failure_jump_loop:
   636           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   637           fprintf (stderr, "/on_failure_jump_loop to %td",
   638                    p + mcnt - start);
   639           break;
   640 
   641         case on_failure_jump_smart:
   642           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   643           fprintf (stderr, "/on_failure_jump_smart to %td",
   644                    p + mcnt - start);
   645           break;
   646 
   647         case jump:
   648           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   649           fprintf (stderr, "/jump to %td", p + mcnt - start);
   650           break;
   651 
   652         case succeed_n:
   653           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   654           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
   655           fprintf (stderr, "/succeed_n to %td, %d times",
   656                    p - 2 + mcnt - start, mcnt2);
   657           break;
   658 
   659         case jump_n:
   660           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   661           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
   662           fprintf (stderr, "/jump_n to %td, %d times",
   663                    p - 2 + mcnt - start, mcnt2);
   664           break;
   665 
   666         case set_number_at:
   667           EXTRACT_NUMBER_AND_INCR (mcnt, p);
   668           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
   669           fprintf (stderr, "/set_number_at location %td to %d",
   670                    p - 2 + mcnt - start, mcnt2);
   671           break;
   672 
   673         case wordbound:
   674           fputs ("/wordbound", stderr);
   675           break;
   676 
   677         case notwordbound:
   678           fputs ("/notwordbound", stderr);
   679           break;
   680 
   681         case wordbeg:
   682           fputs ("/wordbeg", stderr);
   683           break;
   684 
   685         case wordend:
   686           fputs ("/wordend", stderr);
   687           break;
   688 
   689         case symbeg:
   690           fputs ("/symbeg", stderr);
   691           break;
   692 
   693         case symend:
   694           fputs ("/symend", stderr);
   695           break;
   696 
   697         case syntaxspec:
   698           fputs ("/syntaxspec", stderr);
   699           mcnt = *p++;
   700           fprintf (stderr, "/%d", mcnt);
   701           break;
   702 
   703         case notsyntaxspec:
   704           fputs ("/notsyntaxspec", stderr);
   705           mcnt = *p++;
   706           fprintf (stderr, "/%d", mcnt);
   707           break;
   708 
   709         case at_dot:
   710           fputs ("/at_dot", stderr);
   711           break;
   712 
   713         case categoryspec:
   714           fputs ("/categoryspec", stderr);
   715           mcnt = *p++;
   716           fprintf (stderr, "/%d", mcnt);
   717           break;
   718 
   719         case notcategoryspec:
   720           fputs ("/notcategoryspec", stderr);
   721           mcnt = *p++;
   722           fprintf (stderr, "/%d", mcnt);
   723           break;
   724 
   725         case begbuf:
   726           fputs ("/begbuf", stderr);
   727           break;
   728 
   729         case endbuf:
   730           fputs ("/endbuf", stderr);
   731           break;
   732 
   733         default:
   734           fprintf (stderr, "?%d", *(p-1));
   735         }
   736 
   737       putc ('\n', stderr);
   738     }
   739 
   740   fprintf (stderr, "%td:\tend of pattern.\n", p - start);
   741 }
   742 
   743 
   744 static void
   745 print_compiled_pattern (struct re_pattern_buffer *bufp)
   746 {
   747   re_char *buffer = bufp->buffer;
   748 
   749   print_partial_compiled_pattern (buffer, buffer + bufp->used);
   750   fprintf (stderr, "%td bytes used/%td bytes allocated.\n",
   751            bufp->used, bufp->allocated);
   752 
   753   if (bufp->fastmap_accurate && bufp->fastmap)
   754     {
   755       fputs ("fastmap: ", stderr);
   756       print_fastmap (bufp->fastmap);
   757     }
   758 
   759   fprintf (stderr, "re_nsub: %td\t", bufp->re_nsub);
   760   fprintf (stderr, "regs_alloc: %d\t", bufp->regs_allocated);
   761   fprintf (stderr, "can_be_null: %d\n", bufp->can_be_null);
   762   /* Perhaps we should print the translate table?  */
   763 }
   764 
   765 
   766 static void
   767 print_double_string (re_char *where, re_char *string1, ptrdiff_t size1,
   768                      re_char *string2, ptrdiff_t size2)
   769 {
   770   if (where == NULL)
   771     fputs ("(null)", stderr);
   772   else
   773     {
   774       int i;
   775       if (FIRST_STRING_P (where))
   776         {
   777           for (i = 0; i < string1 + size1 - where; i++)
   778             debug_putchar (where[i]);
   779           where = string2;
   780         }
   781 
   782       for (i = 0; i < string2 + size2 - where; i++)
   783         debug_putchar (where[i]);
   784     }
   785 }
   786 
   787 #else /* not REGEX_EMACS_DEBUG */
   788 
   789 # define DEBUG_STATEMENT(e)
   790 # define DEBUG_PRINT(...)
   791 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
   792 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
   793 
   794 #endif /* not REGEX_EMACS_DEBUG */
   795 
   796 typedef enum
   797 {
   798   REG_NOERROR = 0,      /* Success.  */
   799   REG_NOMATCH,          /* Didn't find a match (for regexec).  */
   800 
   801   /* POSIX regcomp return error codes.  (In the order listed in the
   802      standard.)  An older version of this code supported the POSIX
   803      API; this version continues to use these names internally.  */
   804   REG_BADPAT,           /* Invalid pattern.  */
   805   REG_ECOLLATE,         /* Not implemented.  */
   806   REG_ECTYPE,           /* Invalid character class name.  */
   807   REG_EESCAPE,          /* Trailing backslash.  */
   808   REG_ESUBREG,          /* Invalid back reference.  */
   809   REG_EBRACK,           /* Unmatched left bracket.  */
   810   REG_EPAREN,           /* Parenthesis imbalance.  */
   811   REG_EBRACE,           /* Unmatched \{.  */
   812   REG_BADBR,            /* Invalid contents of \{\}.  */
   813   REG_ERANGE,           /* Invalid range end.  */
   814   REG_ESPACE,           /* Ran out of memory.  */
   815   REG_BADRPT,           /* No preceding re for repetition op.  */
   816 
   817   /* Error codes we've added.  */
   818   REG_EEND,             /* Premature end.  */
   819   REG_ESIZE,            /* Compiled pattern bigger than 2^16 bytes.  */
   820   REG_ERPAREN,          /* Unmatched ) or \); not returned from regcomp.  */
   821   REG_ERANGEX,          /* Range striding over charsets.  */
   822   REG_ESIZEBR           /* n or m too big in \{n,m\} */
   823 } reg_errcode_t;
   824 
   825 static const char *re_error_msgid[] =
   826   {
   827    [REG_NOERROR] = "Success",
   828    [REG_NOMATCH] = "No match",
   829    [REG_BADPAT] = "Invalid regular expression",
   830    [REG_ECOLLATE] = "Invalid collation character",
   831    [REG_ECTYPE] = "Invalid character class name",
   832    [REG_EESCAPE] = "Trailing backslash",
   833    [REG_ESUBREG] = "Invalid back reference",
   834    [REG_EBRACK] = "Unmatched [ or [^",
   835    [REG_EPAREN] = "Unmatched ( or \\(",
   836    [REG_EBRACE] = "Unmatched \\{",
   837    [REG_BADBR] = "Invalid content of \\{\\}",
   838    [REG_ERANGE] = "Invalid range end",
   839    [REG_ESPACE] = "Memory exhausted",
   840    [REG_BADRPT] = "Invalid preceding regular expression",
   841    [REG_EEND] = "Premature end of regular expression",
   842    [REG_ESIZE] = "Regular expression too big",
   843    [REG_ERPAREN] = "Unmatched ) or \\)",
   844    [REG_ERANGEX ] = "Range striding over charsets",
   845    [REG_ESIZEBR ] = "Invalid content of \\{\\}",
   846   };
   847 
   848 /* For 'regs_allocated'.  */
   849 enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED };
   850 
   851 /* If 'regs_allocated' is REGS_UNALLOCATED in the pattern buffer,
   852    're_match_2' returns information about at least this many registers
   853    the first time a 'regs' structure is passed.  */
   854 enum { RE_NREGS = 30 };
   855 
   856 /* The searching and matching functions allocate memory for the
   857    failure stack and registers.  Otherwise searching and matching
   858    routines would have much smaller memory resources at their
   859    disposal, and therefore might fail to handle complex regexps.  */
   860 
   861 /* Failure stack declarations and macros; both re_compile_fastmap and
   862    re_match_2 use a failure stack.  These have to be macros because of
   863    SAFE_ALLOCA.  */
   864 
   865 
   866 /* Approximate number of failure points for which to initially allocate space
   867    when matching.  If this number is exceeded, we allocate more
   868    space, so it is not a hard limit.  */
   869 #define INIT_FAILURE_ALLOC 20
   870 
   871 /* Roughly the maximum number of failure points on the stack.  Would be
   872    exactly that if failure always used TYPICAL_FAILURE_SIZE items.
   873    This is a variable only so users of regex can assign to it; we never
   874    change it ourselves.  We always multiply it by TYPICAL_FAILURE_SIZE
   875    before using it, so it should probably be a byte-count instead.  */
   876 /* Note that 4400 was enough to cause a crash on Alpha OSF/1,
   877    whose default stack limit is 2mb.  In order for a larger
   878    value to work reliably, you have to try to make it accord
   879    with the process stack limit.  */
   880 ptrdiff_t emacs_re_max_failures = 40000;
   881 
   882 union fail_stack_elt
   883 {
   884   re_char *pointer;
   885   intptr_t integer;
   886 };
   887 
   888 typedef union fail_stack_elt fail_stack_elt_t;
   889 
   890 typedef struct
   891 {
   892   fail_stack_elt_t *stack;
   893   ptrdiff_t size;
   894   ptrdiff_t avail;      /* Offset of next open position.  */
   895   ptrdiff_t frame;      /* Offset of the cur constructed frame.  */
   896 } fail_stack_type;
   897 
   898 #define FAIL_STACK_EMPTY()     (fail_stack.frame == 0)
   899 
   900 
   901 /* Define macros to initialize and free the failure stack.  */
   902 
   903 #define INIT_FAIL_STACK()                                               \
   904   do {                                                                  \
   905     fail_stack.stack =                                                  \
   906       SAFE_ALLOCA (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE            \
   907                    * sizeof (fail_stack_elt_t));                        \
   908     fail_stack.size = INIT_FAILURE_ALLOC;                               \
   909     fail_stack.avail = 0;                                               \
   910     fail_stack.frame = 0;                                               \
   911   } while (false)
   912 
   913 
   914 /* Double the size of FAIL_STACK, up to a limit
   915    which allows approximately 'emacs_re_max_failures' items.
   916 
   917    Return 1 if succeeds, and 0 if either ran out of memory
   918    allocating space for it or it was already too large.
   919 
   920    REGEX_REALLOCATE requires 'destination' be declared.   */
   921 
   922 /* Factor to increase the failure stack size by.
   923    This used to be 2, but 2 was too wasteful
   924    because the old discarded stacks added up to as much space
   925    were as ultimate, maximum-size stack.  */
   926 #define FAIL_STACK_GROWTH_FACTOR 4
   927 
   928 #define GROW_FAIL_STACK(fail_stack)                                     \
   929   (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE)        \
   930    ? 0                                                                  \
   931    : ((fail_stack).stack                                                \
   932       = REGEX_REALLOCATE ((fail_stack).stack,                           \
   933           (fail_stack).avail * sizeof (fail_stack_elt_t),               \
   934           min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE,                  \
   935                ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR))          \
   936           * sizeof (fail_stack_elt_t)),                                 \
   937       ((fail_stack).size                                                \
   938        = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE,            \
   939                ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))),       \
   940       1))
   941 
   942 
   943 /* Push a pointer value onto the failure stack.
   944    Assumes the variable 'fail_stack'.  Probably should only
   945    be called from within 'PUSH_FAILURE_POINT'.  */
   946 #define PUSH_FAILURE_POINTER(item)                                      \
   947   fail_stack.stack[fail_stack.avail++].pointer = (item)
   948 
   949 /* This pushes an integer-valued item onto the failure stack.
   950    Assumes the variable 'fail_stack'.  Probably should only
   951    be called from within 'PUSH_FAILURE_POINT'.  */
   952 #define PUSH_FAILURE_INT(item)                                  \
   953   fail_stack.stack[fail_stack.avail++].integer = (item)
   954 
   955 /* These POP... operations complement the PUSH... operations.
   956    All assume that 'fail_stack' is nonempty.  */
   957 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
   958 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
   959 
   960 /* Individual items aside from the registers.  */
   961 #define NUM_NONREG_ITEMS 3
   962 
   963 /* Used to examine the stack (to detect infinite loops).  */
   964 #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
   965 #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
   966 #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
   967 #define TOP_FAILURE_HANDLE() fail_stack.frame
   968 
   969 
   970 #define ENSURE_FAIL_STACK(space)                                        \
   971 while (REMAINING_AVAIL_SLOTS <= space) {                                \
   972   if (!GROW_FAIL_STACK (fail_stack))                                    \
   973     {                                                                   \
   974       unbind_to (count, Qnil);                                          \
   975       SAFE_FREE ();                                                     \
   976       return -2;                                                        \
   977     }                                                                   \
   978   DEBUG_PRINT ("\n  Doubled stack; size now: %td\n", fail_stack.size);  \
   979   DEBUG_PRINT ("         slots available: %td\n", REMAINING_AVAIL_SLOTS);\
   980 }
   981 
   982 /* Push register NUM onto the stack.  */
   983 #define PUSH_FAILURE_REG(num)                                           \
   984 do {                                                                    \
   985   char *destination;                                                    \
   986   intptr_t n = num;                                                     \
   987   eassert (0 < n && n < num_regs);                                      \
   988   eassert (REG_UNSET (regstart[n]) <= REG_UNSET (regend[n]));           \
   989   ENSURE_FAIL_STACK(3);                                                 \
   990   DEBUG_PRINT ("    Push reg %"PRIdPTR" (spanning %p -> %p)\n",         \
   991                n, regstart[n], regend[n]);                              \
   992   PUSH_FAILURE_POINTER (regstart[n]);                                   \
   993   PUSH_FAILURE_POINTER (regend[n]);                                     \
   994   PUSH_FAILURE_INT (n);                                                 \
   995 } while (false)
   996 
   997 /* Change the counter's value to VAL, but make sure that it will
   998    be reset when backtracking.  */
   999 #define PUSH_NUMBER(ptr,val)                                            \
  1000 do {                                                                    \
  1001   char *destination;                                                    \
  1002   int c;                                                                \
  1003   ENSURE_FAIL_STACK(3);                                                 \
  1004   EXTRACT_NUMBER (c, ptr);                                              \
  1005   DEBUG_PRINT ("    Push number %p = %d -> %d\n", ptr, c, val);         \
  1006   PUSH_FAILURE_INT (c);                                                 \
  1007   PUSH_FAILURE_POINTER (ptr);                                           \
  1008   PUSH_FAILURE_INT (-1);                                                \
  1009   STORE_NUMBER (ptr, val);                                              \
  1010 } while (false)
  1011 
  1012 /* Pop a saved register off the stack.  */
  1013 #define POP_FAILURE_REG_OR_COUNT()                                      \
  1014 do {                                                                    \
  1015   intptr_t pfreg = POP_FAILURE_INT ();                                  \
  1016   if (pfreg == -1)                                                      \
  1017     {                                                                   \
  1018       /* It's a counter.  */                                            \
  1019       /* Discard 'const', making re_search non-reentrant.  */           \
  1020       unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER ();    \
  1021       pfreg = POP_FAILURE_INT ();                                       \
  1022       STORE_NUMBER (ptr, pfreg);                                        \
  1023       DEBUG_PRINT ("     Pop counter %p = %"PRIdPTR"\n", ptr, pfreg);   \
  1024     }                                                                   \
  1025   else                                                                  \
  1026     {                                                                   \
  1027       eassert (0 < pfreg && pfreg < num_regs);                          \
  1028       regend[pfreg] = POP_FAILURE_POINTER ();                           \
  1029       regstart[pfreg] = POP_FAILURE_POINTER ();                         \
  1030       eassert (REG_UNSET (regstart[pfreg]) <= REG_UNSET (regend[pfreg])); \
  1031       DEBUG_PRINT ("     Pop reg %ld (spanning %p -> %p)\n",            \
  1032                    pfreg, regstart[pfreg], regend[pfreg]);              \
  1033     }                                                                   \
  1034 } while (false)
  1035 
  1036 /* Check that we are not stuck in an infinite loop.  */
  1037 #define CHECK_INFINITE_LOOP(pat_cur, string_place)                      \
  1038 do {                                                                    \
  1039   ptrdiff_t failure = TOP_FAILURE_HANDLE ();                            \
  1040   /* Check for infinite matching loops */                               \
  1041   while (failure > 0                                                    \
  1042          && (FAILURE_STR (failure) == string_place                      \
  1043              || FAILURE_STR (failure) == NULL))                         \
  1044     {                                                                   \
  1045       eassert (FAILURE_PAT (failure) >= bufp->buffer                    \
  1046                && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);  \
  1047       if (FAILURE_PAT (failure) == pat_cur)                             \
  1048         {                                                               \
  1049           cycle = true;                                                 \
  1050           break;                                                        \
  1051         }                                                               \
  1052       DEBUG_PRINT ("  Other pattern: %p\n", FAILURE_PAT (failure));     \
  1053       failure = NEXT_FAILURE_HANDLE(failure);                           \
  1054     }                                                                   \
  1055   DEBUG_PRINT ("  Other string: %p\n", FAILURE_STR (failure));          \
  1056 } while (false)
  1057 
  1058 /* Push the information about the state we will need
  1059    if we ever fail back to it.
  1060 
  1061    Requires variables fail_stack, regstart, regend and
  1062    num_regs be declared.  GROW_FAIL_STACK requires 'destination' be
  1063    declared.
  1064 
  1065    Does 'return FAILURE_CODE' if runs out of memory.  */
  1066 
  1067 #define PUSH_FAILURE_POINT(pattern, string_place)                       \
  1068 do {                                                                    \
  1069   char *destination;                                                    \
  1070   DEBUG_STATEMENT (nfailure_points_pushed++);                           \
  1071   DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n");                              \
  1072   DEBUG_PRINT ("  Before push, next avail: %td\n", fail_stack.avail);   \
  1073   DEBUG_PRINT ("                        size: %td\n", fail_stack.size); \
  1074                                                                         \
  1075   ENSURE_FAIL_STACK (NUM_NONREG_ITEMS);                                 \
  1076                                                                         \
  1077   DEBUG_PRINT ("\n");                                                   \
  1078                                                                         \
  1079   DEBUG_PRINT ("  Push frame index: %td\n", fail_stack.frame);          \
  1080   PUSH_FAILURE_INT (fail_stack.frame);                                  \
  1081                                                                         \
  1082   DEBUG_PRINT ("  Push string %p: \"", string_place);                   \
  1083   DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
  1084   DEBUG_PRINT ("\"\n");                                                 \
  1085   PUSH_FAILURE_POINTER (string_place);                                  \
  1086                                                                         \
  1087   DEBUG_PRINT ("  Push pattern %p: ", pattern);                         \
  1088   DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend);                   \
  1089   PUSH_FAILURE_POINTER (pattern);                                       \
  1090                                                                         \
  1091   /* Close the frame by moving the frame pointer past it.  */           \
  1092   fail_stack.frame = fail_stack.avail;                                  \
  1093 } while (false)
  1094 
  1095 /* Estimate the size of data pushed by a typical failure stack entry.
  1096    An estimate is all we need, because all we use this for
  1097    is to choose a limit for how big to make the failure stack.  */
  1098 /* BEWARE, the value `20' is hard-coded in emacs.c:main().  */
  1099 #define TYPICAL_FAILURE_SIZE 20
  1100 
  1101 /* How many items can still be added to the stack without overflowing it.  */
  1102 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
  1103 
  1104 
  1105 /* Pop what PUSH_FAIL_STACK pushes.
  1106 
  1107    Restore into the parameters, all of which should be lvalues:
  1108      STR -- the saved data position.
  1109      PAT -- the saved pattern position.
  1110      REGSTART, REGEND -- arrays of string positions.
  1111 
  1112    Also assume the variables FAIL_STACK and (if debugging) BUFP, PEND,
  1113    STRING1, SIZE1, STRING2, and SIZE2.  */
  1114 
  1115 #define POP_FAILURE_POINT(str, pat)                                     \
  1116 do {                                                                    \
  1117   eassert (!FAIL_STACK_EMPTY ());                                       \
  1118                                                                         \
  1119   /* Remove failure points and point to how many regs pushed.  */       \
  1120   DEBUG_PRINT ("POP_FAILURE_POINT:\n");                                 \
  1121   DEBUG_PRINT ("  Before pop, next avail: %td\n", fail_stack.avail);    \
  1122   DEBUG_PRINT ("                     size: %td\n", fail_stack.size);    \
  1123                                                                         \
  1124   /* Pop the saved registers.  */                                       \
  1125   while (fail_stack.frame < fail_stack.avail)                           \
  1126     POP_FAILURE_REG_OR_COUNT ();                                        \
  1127                                                                         \
  1128   pat = POP_FAILURE_POINTER ();                                         \
  1129   DEBUG_PRINT ("  Popping pattern %p: ", pat);                          \
  1130   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
  1131                                                                         \
  1132   /* If the saved string location is NULL, it came from an              \
  1133      on_failure_keep_string_jump opcode, and we want to throw away the  \
  1134      saved NULL, thus retaining our current position in the string.  */ \
  1135   str = POP_FAILURE_POINTER ();                                         \
  1136   DEBUG_PRINT ("  Popping string %p: \"", str);                         \
  1137   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
  1138   DEBUG_PRINT ("\"\n");                                                 \
  1139                                                                         \
  1140   fail_stack.frame = POP_FAILURE_INT ();                                \
  1141   DEBUG_PRINT ("  Popping  frame index: %td\n", fail_stack.frame);      \
  1142                                                                         \
  1143   eassert (fail_stack.avail >= 0);                                      \
  1144   eassert (fail_stack.frame <= fail_stack.avail);                       \
  1145                                                                         \
  1146   DEBUG_STATEMENT (nfailure_points_popped++);                           \
  1147 } while (false) /* POP_FAILURE_POINT */
  1148 
  1149 
  1150 
  1151 /* Registers are set to a sentinel when they haven't yet matched.  */
  1152 #define REG_UNSET(e) ((e) == NULL)
  1153 
  1154 /* Subroutine declarations and macros for regex_compile.  */
  1155 
  1156 static reg_errcode_t regex_compile (re_char *pattern, ptrdiff_t size,
  1157                                     bool posix_backtracking,
  1158                                     const char *whitespace_regexp,
  1159                                     struct re_pattern_buffer *bufp);
  1160 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
  1161 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
  1162 static void insert_op1 (re_opcode_t op, unsigned char *loc,
  1163                         int arg, unsigned char *end);
  1164 static void insert_op2 (re_opcode_t op, unsigned char *loc,
  1165                         int arg1, int arg2, unsigned char *end);
  1166 static bool at_begline_loc_p (re_char *pattern, re_char *p);
  1167 static bool at_endline_loc_p (re_char *p, re_char *pend);
  1168 static re_char *skip_one_char (re_char *p);
  1169 static int analyze_first (re_char *p, re_char *pend,
  1170                           char *fastmap, bool multibyte);
  1171 
  1172 /* Fetch the next character in the uncompiled pattern, with no
  1173    translation.  */
  1174 #define PATFETCH(c)                                                     \
  1175   do {                                                                  \
  1176     int len;                                                            \
  1177     if (p == pend) return REG_EEND;                                     \
  1178     c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte);                  \
  1179     p += len;                                                           \
  1180   } while (false)
  1181 
  1182 
  1183 #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
  1184 #define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d))
  1185 
  1186 /* Macros for outputting the compiled pattern into 'buffer'.  */
  1187 
  1188 /* If the buffer isn't allocated when it comes in, use this.  */
  1189 #define INIT_BUF_SIZE  32
  1190 
  1191 /* Ensure at least N more bytes of space in buffer.  */
  1192 #define GET_BUFFER_SPACE(n)                                             \
  1193     if (bufp->buffer + bufp->allocated - b < (n))                       \
  1194       EXTEND_BUFFER ((n) - (bufp->buffer + bufp->allocated - b))
  1195 
  1196 /* Ensure one more byte of buffer space and then add C to it.  */
  1197 #define BUF_PUSH(c)                                                     \
  1198   do {                                                                  \
  1199     GET_BUFFER_SPACE (1);                                               \
  1200     *b++ = (unsigned char) (c);                                         \
  1201   } while (false)
  1202 
  1203 
  1204 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
  1205 #define BUF_PUSH_2(c1, c2)                                              \
  1206   do {                                                                  \
  1207     GET_BUFFER_SPACE (2);                                               \
  1208     *b++ = (unsigned char) (c1);                                        \
  1209     *b++ = (unsigned char) (c2);                                        \
  1210   } while (false)
  1211 
  1212 
  1213 /* Store a jump with opcode OP at LOC to location TO.  Store a
  1214    relative address offset by the three bytes the jump itself occupies.  */
  1215 #define STORE_JUMP(op, loc, to) \
  1216   store_op1 (op, loc, (to) - (loc) - 3)
  1217 
  1218 /* Likewise, for a two-argument jump.  */
  1219 #define STORE_JUMP2(op, loc, to, arg) \
  1220   store_op2 (op, loc, (to) - (loc) - 3, arg)
  1221 
  1222 /* Like 'STORE_JUMP', but for inserting.  Assume B is the buffer end.  */
  1223 #define INSERT_JUMP(op, loc, to) \
  1224   insert_op1 (op, loc, (to) - (loc) - 3, b)
  1225 
  1226 /* Like 'STORE_JUMP2', but for inserting.  Assume B is the buffer end.  */
  1227 #define INSERT_JUMP2(op, loc, to, arg) \
  1228   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
  1229 
  1230 
  1231 /* This is not an arbitrary limit: the arguments which represent offsets
  1232    into the pattern are two bytes long.  So if 2^15 bytes turns out to
  1233    be too small, many things would have to change.  */
  1234 # define MAX_BUF_SIZE (1 << 15)
  1235 
  1236 /* Extend the buffer by at least N bytes via realloc and
  1237    reset the pointers that pointed into the old block to point to the
  1238    correct places in the new one.  If extending the buffer results in it
  1239    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
  1240 #define EXTEND_BUFFER(n)                                                \
  1241   do {                                                                  \
  1242     ptrdiff_t requested_extension = n;                                  \
  1243     unsigned char *old_buffer = bufp->buffer;                           \
  1244     if (MAX_BUF_SIZE - bufp->allocated < requested_extension)           \
  1245       return REG_ESIZE;                                                 \
  1246     ptrdiff_t b_off = b - old_buffer;                                   \
  1247     ptrdiff_t begalt_off = begalt - old_buffer;                         \
  1248     ptrdiff_t fixup_alt_jump_off =                                      \
  1249       fixup_alt_jump ? fixup_alt_jump - old_buffer : -1;                \
  1250     ptrdiff_t laststart_off = laststart ? laststart - old_buffer : -1;  \
  1251     ptrdiff_t pending_exact_off =                                       \
  1252       pending_exact ? pending_exact - old_buffer : -1;                  \
  1253     bufp->buffer = xpalloc (bufp->buffer, &bufp->allocated,             \
  1254                             requested_extension, MAX_BUF_SIZE, 1);      \
  1255     unsigned char *new_buffer = bufp->buffer;                           \
  1256     b = new_buffer + b_off;                                             \
  1257     begalt = new_buffer + begalt_off;                                   \
  1258     if (0 <= fixup_alt_jump_off)                                        \
  1259       fixup_alt_jump = new_buffer + fixup_alt_jump_off;                 \
  1260     if (0 <= laststart_off)                                             \
  1261       laststart = new_buffer + laststart_off;                           \
  1262     if (0 <= pending_exact_off)                                         \
  1263       pending_exact = new_buffer + pending_exact_off;                   \
  1264   } while (false)
  1265 
  1266 
  1267 /* Since we have one byte reserved for the register number argument to
  1268    {start,stop}_memory, the maximum number of groups we can report
  1269    things about is what fits in that byte.  */
  1270 #define MAX_REGNUM 255
  1271 
  1272 /* But patterns can have more than 'MAX_REGNUM' registers.  Just
  1273    ignore the excess.  */
  1274 typedef int regnum_t;
  1275 
  1276 
  1277 /* Macros for the compile stack.  */
  1278 
  1279 typedef long pattern_offset_t;
  1280 verify (LONG_MIN <= -(MAX_BUF_SIZE - 1) && MAX_BUF_SIZE - 1 <= LONG_MAX);
  1281 
  1282 typedef struct
  1283 {
  1284   pattern_offset_t begalt_offset;
  1285   pattern_offset_t fixup_alt_jump;
  1286   pattern_offset_t laststart_offset;
  1287   regnum_t regnum;
  1288 } compile_stack_elt_t;
  1289 
  1290 
  1291 typedef struct
  1292 {
  1293   compile_stack_elt_t *stack;
  1294   ptrdiff_t size;
  1295   ptrdiff_t avail;              /* Offset of next open position.  */
  1296 } compile_stack_type;
  1297 
  1298 
  1299 #define INIT_COMPILE_STACK_SIZE 32
  1300 
  1301 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
  1302 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
  1303 
  1304 /* The next available element.  */
  1305 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
  1306 
  1307 /* Structure to manage work area for range table.  */
  1308 struct range_table_work_area
  1309 {
  1310   int *table;                   /* actual work area.  */
  1311   int allocated;                /* allocated size for work area in bytes.  */
  1312   int used;                     /* actually used size in words.  */
  1313   int bits;                     /* flag to record character classes */
  1314 };
  1315 
  1316 /* Make sure that WORK_AREA can hold N more multibyte characters.
  1317    If it can't get the space, it returns from the surrounding function.  */
  1318 
  1319 #define EXTEND_RANGE_TABLE(work_area, n)                                \
  1320   do {                                                                  \
  1321     if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
  1322       {                                                                 \
  1323         extend_range_table_work_area (&work_area);                      \
  1324         if ((work_area).table == 0)                                     \
  1325           return (REG_ESPACE);                                          \
  1326       }                                                                 \
  1327   } while (false)
  1328 
  1329 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)           \
  1330   (work_area).bits |= (bit)
  1331 
  1332 /* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
  1333 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)    \
  1334   do {                                                                  \
  1335     EXTEND_RANGE_TABLE ((work_area), 2);                                \
  1336     (work_area).table[(work_area).used++] = (range_start);              \
  1337     (work_area).table[(work_area).used++] = (range_end);                \
  1338   } while (false)
  1339 
  1340 /* Free allocated memory for WORK_AREA.  */
  1341 #define FREE_RANGE_TABLE_WORK_AREA(work_area)   \
  1342   do {                                          \
  1343     if ((work_area).table)                      \
  1344       xfree ((work_area).table);                        \
  1345   } while (false)
  1346 
  1347 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) \
  1348   ((work_area).used = 0, (work_area).bits = 0)
  1349 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
  1350 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
  1351 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
  1352 
  1353 /* Bits used to implement the multibyte-part of the various character classes
  1354    such as [:alnum:] in a charset's range table.  The code currently assumes
  1355    that only the low 16 bits are used.  */
  1356 #define BIT_WORD        0x1
  1357 #define BIT_LOWER       0x2
  1358 #define BIT_PUNCT       0x4
  1359 #define BIT_SPACE       0x8
  1360 #define BIT_UPPER       0x10
  1361 #define BIT_MULTIBYTE   0x20
  1362 #define BIT_ALPHA       0x40
  1363 #define BIT_ALNUM       0x80
  1364 #define BIT_GRAPH       0x100
  1365 #define BIT_PRINT       0x200
  1366 #define BIT_BLANK       0x400
  1367 
  1368 
  1369 /* Set the bit for character C in a list.  */
  1370 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
  1371 
  1372 
  1373 /* Store characters in the range FROM to TO in the bitmap at B (for
  1374    ASCII and unibyte characters) and WORK_AREA (for multibyte
  1375    characters) while translating them and paying attention to the
  1376    continuity of translated characters.
  1377 
  1378    Implementation note: It is better to implement these fairly big
  1379    macros by a function, but it's not that easy because macros called
  1380    in this macro assume various local variables already declared.  */
  1381 
  1382 /* Both FROM and TO are ASCII characters.  */
  1383 
  1384 #define SETUP_ASCII_RANGE(work_area, FROM, TO)                  \
  1385   do {                                                          \
  1386     int C0, C1;                                                 \
  1387                                                                 \
  1388     for (C0 = (FROM); C0 <= (TO); C0++)                         \
  1389       {                                                         \
  1390         C1 = TRANSLATE (C0);                                    \
  1391         if (! ASCII_CHAR_P (C1))                                \
  1392           {                                                     \
  1393             SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);    \
  1394             if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0)             \
  1395               C1 = C0;                                          \
  1396           }                                                     \
  1397         SET_LIST_BIT (C1);                                      \
  1398       }                                                         \
  1399   } while (false)
  1400 
  1401 
  1402 /* Both FROM and TO are unibyte characters (0x80..0xFF).  */
  1403 
  1404 #define SETUP_UNIBYTE_RANGE(work_area, FROM, TO)                               \
  1405   do {                                                                         \
  1406     int C0, C1, C2, I;                                                         \
  1407     int USED = RANGE_TABLE_WORK_USED (work_area);                              \
  1408                                                                                \
  1409     for (C0 = (FROM); C0 <= (TO); C0++)                                        \
  1410       {                                                                        \
  1411         C1 = RE_CHAR_TO_MULTIBYTE (C0);                                        \
  1412         if (CHAR_BYTE8_P (C1))                                                 \
  1413           SET_LIST_BIT (C0);                                                   \
  1414         else                                                                   \
  1415           {                                                                    \
  1416             C2 = TRANSLATE (C1);                                               \
  1417             if (C2 == C1                                                       \
  1418                 || (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0)                         \
  1419               C1 = C0;                                                         \
  1420             SET_LIST_BIT (C1);                                                 \
  1421             for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
  1422               {                                                                \
  1423                 int from = RANGE_TABLE_WORK_ELT (work_area, I);                \
  1424                 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);              \
  1425                                                                                \
  1426                 if (C2 >= from - 1 && C2 <= to + 1)                            \
  1427                   {                                                            \
  1428                     if (C2 == from - 1)                                        \
  1429                       RANGE_TABLE_WORK_ELT (work_area, I)--;                   \
  1430                     else if (C2 == to + 1)                                     \
  1431                       RANGE_TABLE_WORK_ELT (work_area, I + 1)++;               \
  1432                     break;                                                     \
  1433                   }                                                            \
  1434               }                                                                \
  1435             if (I < USED)                                                      \
  1436               SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2);                 \
  1437           }                                                                    \
  1438       }                                                                        \
  1439   } while (false)
  1440 
  1441 
  1442 /* Both FROM and TO are multibyte characters.  */
  1443 
  1444 #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO)                         \
  1445   do {                                                                     \
  1446     int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area);           \
  1447                                                                            \
  1448     SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO));                 \
  1449     for (C0 = (FROM); C0 <= (TO); C0++)                                    \
  1450       {                                                                    \
  1451         C1 = TRANSLATE (C0);                                               \
  1452         if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0                            \
  1453             || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0))          \
  1454           SET_LIST_BIT (C2);                                               \
  1455         if (C1 >= (FROM) && C1 <= (TO))                                    \
  1456           continue;                                                        \
  1457         for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
  1458           {                                                                \
  1459             int from = RANGE_TABLE_WORK_ELT (work_area, I);                \
  1460             int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);              \
  1461                                                                            \
  1462             if (C1 >= from - 1 && C1 <= to + 1)                            \
  1463               {                                                            \
  1464                 if (C1 == from - 1)                                        \
  1465                   RANGE_TABLE_WORK_ELT (work_area, I)--;                   \
  1466                 else if (C1 == to + 1)                                     \
  1467                   RANGE_TABLE_WORK_ELT (work_area, I + 1)++;               \
  1468                 break;                                                     \
  1469               }                                                            \
  1470           }                                                                \
  1471         if (I < USED)                                                      \
  1472           SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);                 \
  1473       }                                                                    \
  1474   } while (false)
  1475 
  1476 /* Get the next unsigned number in the uncompiled pattern.  */
  1477 #define GET_INTERVAL_COUNT(num)                                 \
  1478   do {                                                                  \
  1479     if (p == pend)                                                      \
  1480       FREE_STACK_RETURN (REG_EBRACE);                                   \
  1481     else                                                                \
  1482       {                                                                 \
  1483         PATFETCH (c);                                                   \
  1484         while ('0' <= c && c <= '9')                                    \
  1485           {                                                             \
  1486             if (num < 0)                                                \
  1487               num = 0;                                                  \
  1488             if (RE_DUP_MAX / 10 - (RE_DUP_MAX % 10 < c - '0') < num)    \
  1489               FREE_STACK_RETURN (REG_ESIZEBR);                          \
  1490             num = num * 10 + c - '0';                                   \
  1491             if (p == pend)                                              \
  1492               FREE_STACK_RETURN (REG_EBRACE);                           \
  1493             PATFETCH (c);                                               \
  1494           }                                                             \
  1495       }                                                                 \
  1496   } while (false)
  1497 
  1498 /* Parse a character class, i.e. string such as "[:name:]".  *strp
  1499    points to the string to be parsed and limit is length, in bytes, of
  1500    that string.
  1501 
  1502    If *strp point to a string that begins with "[:name:]", where name is
  1503    a non-empty sequence of lower case letters, *strp will be advanced past the
  1504    closing square bracket and RECC_* constant which maps to the name will be
  1505    returned.  If name is not a valid character class name zero, or RECC_ERROR,
  1506    is returned.
  1507 
  1508    Otherwise, if *strp doesn't begin with "[:name:]", -1 is returned.
  1509 
  1510    The function can be used on ASCII and multibyte (UTF-8-encoded) strings.
  1511  */
  1512 re_wctype_t
  1513 re_wctype_parse (const unsigned char **strp, ptrdiff_t limit)
  1514 {
  1515   const char *beg = (const char *)*strp, *it;
  1516 
  1517   if (limit < 4 || beg[0] != '[' || beg[1] != ':')
  1518     return -1;
  1519 
  1520   beg += 2;  /* skip opening "[:" */
  1521   limit -= 3;  /* opening "[:" and half of closing ":]"; --limit handles rest */
  1522   for (it = beg; it[0] != ':' || it[1] != ']'; ++it)
  1523     if (!--limit)
  1524       return -1;
  1525 
  1526   *strp = (const unsigned char *)(it + 2);
  1527 
  1528   /* Sort tests in the length=five case by frequency the classes to minimize
  1529      number of times we fail the comparison.  The frequencies of character class
  1530      names used in Emacs sources as of 2016-07-27:
  1531 
  1532      $ find \( -name \*.c -o -name \*.el \) -exec grep -h '\[:[a-z]*:]' {} + |
  1533            sed 's/]/]\n/g' |grep -o '\[:[a-z]*:]' |sort |uniq -c |sort -nr
  1534          213 [:alnum:]
  1535          104 [:alpha:]
  1536           62 [:space:]
  1537           39 [:digit:]
  1538           36 [:blank:]
  1539           26 [:word:]
  1540           26 [:upper:]
  1541           21 [:lower:]
  1542           10 [:xdigit:]
  1543           10 [:punct:]
  1544           10 [:ascii:]
  1545            4 [:nonascii:]
  1546            4 [:graph:]
  1547            2 [:print:]
  1548            2 [:cntrl:]
  1549            1 [:ff:]
  1550 
  1551      If you update this list, consider also updating chain of or'ed conditions
  1552      in execute_charset function.
  1553    */
  1554 
  1555   switch (it - beg) {
  1556   case 4:
  1557     if (!memcmp (beg, "word", 4))      return RECC_WORD;
  1558     break;
  1559   case 5:
  1560     if (!memcmp (beg, "alnum", 5))     return RECC_ALNUM;
  1561     if (!memcmp (beg, "alpha", 5))     return RECC_ALPHA;
  1562     if (!memcmp (beg, "space", 5))     return RECC_SPACE;
  1563     if (!memcmp (beg, "digit", 5))     return RECC_DIGIT;
  1564     if (!memcmp (beg, "blank", 5))     return RECC_BLANK;
  1565     if (!memcmp (beg, "upper", 5))     return RECC_UPPER;
  1566     if (!memcmp (beg, "lower", 5))     return RECC_LOWER;
  1567     if (!memcmp (beg, "punct", 5))     return RECC_PUNCT;
  1568     if (!memcmp (beg, "ascii", 5))     return RECC_ASCII;
  1569     if (!memcmp (beg, "graph", 5))     return RECC_GRAPH;
  1570     if (!memcmp (beg, "print", 5))     return RECC_PRINT;
  1571     if (!memcmp (beg, "cntrl", 5))     return RECC_CNTRL;
  1572     break;
  1573   case 6:
  1574     if (!memcmp (beg, "xdigit", 6))    return RECC_XDIGIT;
  1575     break;
  1576   case 7:
  1577     if (!memcmp (beg, "unibyte", 7))   return RECC_UNIBYTE;
  1578     break;
  1579   case 8:
  1580     if (!memcmp (beg, "nonascii", 8))  return RECC_NONASCII;
  1581     break;
  1582   case 9:
  1583     if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE;
  1584     break;
  1585   }
  1586 
  1587   return RECC_ERROR;
  1588 }
  1589 
  1590 /* True if CH is in the char class CC.  */
  1591 bool
  1592 re_iswctype (int ch, re_wctype_t cc)
  1593 {
  1594   switch (cc)
  1595     {
  1596     case RECC_ALNUM: return ISALNUM (ch) != 0;
  1597     case RECC_ALPHA: return ISALPHA (ch) != 0;
  1598     case RECC_BLANK: return ISBLANK (ch) != 0;
  1599     case RECC_CNTRL: return ISCNTRL (ch) != 0;
  1600     case RECC_DIGIT: return ISDIGIT (ch) != 0;
  1601     case RECC_GRAPH: return ISGRAPH (ch) != 0;
  1602     case RECC_LOWER: return ISLOWER (ch) != 0;
  1603     case RECC_PRINT: return ISPRINT (ch) != 0;
  1604     case RECC_PUNCT: return ISPUNCT (ch) != 0;
  1605     case RECC_SPACE: return ISSPACE (ch) != 0;
  1606     case RECC_UPPER: return ISUPPER (ch) != 0;
  1607     case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
  1608     case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
  1609     case RECC_NONASCII: return !IS_REAL_ASCII (ch);
  1610     case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
  1611     case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
  1612     case RECC_WORD: return ISWORD (ch) != 0;
  1613     case RECC_ERROR: return false;
  1614     default:
  1615       abort ();
  1616     }
  1617 }
  1618 
  1619 /* Return a bit-pattern to use in the range-table bits to match multibyte
  1620    chars of class CC.  */
  1621 static int
  1622 re_wctype_to_bit (re_wctype_t cc)
  1623 {
  1624   switch (cc)
  1625     {
  1626     case RECC_NONASCII:
  1627     case RECC_MULTIBYTE: return BIT_MULTIBYTE;
  1628     case RECC_ALPHA: return BIT_ALPHA;
  1629     case RECC_ALNUM: return BIT_ALNUM;
  1630     case RECC_WORD: return BIT_WORD;
  1631     case RECC_LOWER: return BIT_LOWER;
  1632     case RECC_UPPER: return BIT_UPPER;
  1633     case RECC_PUNCT: return BIT_PUNCT;
  1634     case RECC_SPACE: return BIT_SPACE;
  1635     case RECC_GRAPH: return BIT_GRAPH;
  1636     case RECC_PRINT: return BIT_PRINT;
  1637     case RECC_BLANK: return BIT_BLANK;
  1638     case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
  1639     case RECC_UNIBYTE: case RECC_ERROR: return 0;
  1640     default:
  1641       abort ();
  1642     }
  1643 }
  1644 
  1645 /* Filling in the work area of a range.  */
  1646 
  1647 /* Actually extend the space in WORK_AREA.  */
  1648 
  1649 static void
  1650 extend_range_table_work_area (struct range_table_work_area *work_area)
  1651 {
  1652   work_area->allocated += 16 * sizeof (int);
  1653   work_area->table = xrealloc (work_area->table, work_area->allocated);
  1654 }
  1655 
  1656 /* regex_compile and helpers.  */
  1657 
  1658 static bool group_in_compile_stack (compile_stack_type, regnum_t);
  1659 
  1660 /* Insert the 'jump' from the end of last alternative to "here".
  1661    The space for the jump has already been allocated. */
  1662 #define FIXUP_ALT_JUMP()                                                \
  1663 do {                                                                    \
  1664   if (fixup_alt_jump)                                                   \
  1665     STORE_JUMP (jump, fixup_alt_jump, b);                               \
  1666 } while (false)
  1667 
  1668 
  1669 /* Return, freeing storage we allocated.  */
  1670 #define FREE_STACK_RETURN(value)                \
  1671   do {                                                  \
  1672     FREE_RANGE_TABLE_WORK_AREA (range_table_work);      \
  1673     xfree (compile_stack.stack);                        \
  1674     return value;                                       \
  1675   } while (false)
  1676 
  1677 /* Compile PATTERN (of length SIZE) according to SYNTAX.
  1678    Return a nonzero error code on failure, or zero for success.
  1679 
  1680    If WHITESPACE_REGEXP is given, use it instead of a space
  1681    character in PATTERN.
  1682 
  1683    Assume the 'allocated' (and perhaps 'buffer') and 'translate'
  1684    fields are set in BUFP on entry.
  1685 
  1686    If successful, put results in *BUFP (otherwise the
  1687    contents of *BUFP are undefined):
  1688      'buffer' is the compiled pattern;
  1689      'syntax' is set to SYNTAX;
  1690      'used' is set to the length of the compiled pattern;
  1691      'fastmap_accurate' is false;
  1692      're_nsub' is the number of subexpressions in PATTERN;
  1693 
  1694    The 'fastmap' field is neither examined nor set.  */
  1695 
  1696 static reg_errcode_t
  1697 regex_compile (re_char *pattern, ptrdiff_t size,
  1698                bool posix_backtracking,
  1699                const char *whitespace_regexp,
  1700                struct re_pattern_buffer *bufp)
  1701 {
  1702   /* Fetch characters from PATTERN here.  */
  1703   int c, c1;
  1704 
  1705   /* Points to the end of the buffer, where we should append.  */
  1706   unsigned char *b;
  1707 
  1708   /* Keeps track of unclosed groups.  */
  1709   compile_stack_type compile_stack;
  1710 
  1711   /* Points to the current (ending) position in the pattern.  */
  1712   re_char *p = pattern;
  1713   re_char *pend = pattern + size;
  1714 
  1715   /* How to translate the characters in the pattern.  */
  1716   Lisp_Object translate = bufp->translate;
  1717 
  1718   /* Address of the count-byte of the most recently inserted 'exactn'
  1719      command.  This makes it possible to tell if a new exact-match
  1720      character can be added to that command or if the character requires
  1721      a new 'exactn' command.  */
  1722   unsigned char *pending_exact = 0;
  1723 
  1724   /* Address of start of the most recently finished expression.
  1725      This tells, e.g., postfix * where to find the start of its
  1726      operand.  Reset at the beginning of groups and alternatives.  */
  1727   unsigned char *laststart = 0;
  1728 
  1729   /* Address of beginning of regexp, or inside of last group.  */
  1730   unsigned char *begalt;
  1731 
  1732   /* Place in the uncompiled pattern (i.e., the {) to
  1733      which to go back if the interval is invalid.  */
  1734   re_char *beg_interval;
  1735 
  1736   /* Address of the place where a forward jump should go to the end of
  1737      the containing expression.  Each alternative of an 'or' -- except the
  1738      last -- ends with a forward jump of this sort.  */
  1739   unsigned char *fixup_alt_jump = 0;
  1740 
  1741   /* Work area for range table of charset.  */
  1742   struct range_table_work_area range_table_work;
  1743 
  1744   /* If the regular expression is multibyte.  */
  1745   bool multibyte = RE_MULTIBYTE_P (bufp);
  1746 
  1747   /* Nonzero if we have pushed down into a subpattern.  */
  1748   int in_subpattern = 0;
  1749 
  1750   /* These hold the values of p, pattern, and pend from the main
  1751      pattern when we have pushed into a subpattern.  */
  1752   re_char *main_p;
  1753   re_char *main_pattern;
  1754   re_char *main_pend;
  1755 
  1756 #ifdef REGEX_EMACS_DEBUG
  1757   regex_emacs_debug++;
  1758   DEBUG_PRINT ("\nCompiling pattern: ");
  1759   if (regex_emacs_debug > 0)
  1760     {
  1761       for (ptrdiff_t debug_count = 0; debug_count < size; debug_count++)
  1762         debug_putchar (pattern[debug_count]);
  1763       putc ('\n', stderr);
  1764     }
  1765 #endif
  1766 
  1767   /* Initialize the compile stack.  */
  1768   compile_stack.stack = xmalloc (INIT_COMPILE_STACK_SIZE
  1769                                  * sizeof *compile_stack.stack);
  1770   __lsan_ignore_object (compile_stack.stack);
  1771   compile_stack.size = INIT_COMPILE_STACK_SIZE;
  1772   compile_stack.avail = 0;
  1773 
  1774   range_table_work.table = 0;
  1775   range_table_work.allocated = 0;
  1776 
  1777   /* Initialize the pattern buffer.  */
  1778   bufp->fastmap_accurate = false;
  1779   bufp->used_syntax = false;
  1780 
  1781   /* Set 'used' to zero, so that if we return an error, the pattern
  1782      printer (for debugging) will think there's no pattern.  We reset it
  1783      at the end.  */
  1784   bufp->used = 0;
  1785 
  1786   bufp->re_nsub = 0;
  1787 
  1788   if (bufp->allocated == 0)
  1789     {
  1790       /* This loses if BUFP->buffer is bogus, but that is the user's
  1791          responsibility.  */
  1792       bufp->buffer = xrealloc (bufp->buffer, INIT_BUF_SIZE);
  1793       bufp->allocated = INIT_BUF_SIZE;
  1794     }
  1795 
  1796   begalt = b = bufp->buffer;
  1797 
  1798   /* Loop through the uncompiled pattern until we're at the end.  */
  1799   while (1)
  1800     {
  1801       if (p == pend)
  1802         {
  1803           /* If this is the end of an included regexp,
  1804              pop back to the main regexp and try again.  */
  1805           if (in_subpattern)
  1806             {
  1807               in_subpattern = 0;
  1808               pattern = main_pattern;
  1809               p = main_p;
  1810               pend = main_pend;
  1811               continue;
  1812             }
  1813           /* If this is the end of the main regexp, we are done.  */
  1814           break;
  1815         }
  1816 
  1817       PATFETCH (c);
  1818 
  1819       switch (c)
  1820         {
  1821         case ' ':
  1822           {
  1823             re_char *p1 = p;
  1824 
  1825             /* If there's no special whitespace regexp, treat
  1826                spaces normally.  And don't try to do this recursively.  */
  1827             if (!whitespace_regexp || in_subpattern)
  1828               goto normal_char;
  1829 
  1830             /* Peek past following spaces.  */
  1831             while (p1 != pend)
  1832               {
  1833                 if (*p1 != ' ')
  1834                   break;
  1835                 p1++;
  1836               }
  1837             /* If the spaces are followed by a repetition op,
  1838                treat them normally.  */
  1839             if (p1 != pend
  1840                 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
  1841                     || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
  1842               goto normal_char;
  1843 
  1844             /* Replace the spaces with the whitespace regexp.  */
  1845             in_subpattern = 1;
  1846             main_p = p1;
  1847             main_pend = pend;
  1848             main_pattern = pattern;
  1849             p = pattern = (re_char *) whitespace_regexp;
  1850             pend = p + strlen (whitespace_regexp);
  1851             break;
  1852           }
  1853 
  1854         case '^':
  1855           if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
  1856             goto normal_char;
  1857           BUF_PUSH (begline);
  1858           break;
  1859 
  1860         case '$':
  1861           if (! (p == pend || at_endline_loc_p (p, pend)))
  1862             goto normal_char;
  1863           BUF_PUSH (endline);
  1864           break;
  1865 
  1866 
  1867         case '+':
  1868         case '?':
  1869         case '*':
  1870           /* If there is no previous pattern...  */
  1871           if (!laststart)
  1872             goto normal_char;
  1873 
  1874           {
  1875             /* 1 means zero (many) matches is allowed.  */
  1876             bool zero_times_ok = false, many_times_ok = false;
  1877             bool greedy = true;
  1878 
  1879             /* If there is a sequence of repetition chars, collapse it
  1880                down to just one (the right one).  We can't combine
  1881                interval operators with these because of, e.g., 'a{2}*',
  1882                which should only match an even number of 'a's.  */
  1883 
  1884             for (;;)
  1885               {
  1886                 if (c == '?' && (zero_times_ok || many_times_ok))
  1887                   greedy = false;
  1888                 else
  1889                   {
  1890                     zero_times_ok |= c != '+';
  1891                     many_times_ok |= c != '?';
  1892                   }
  1893 
  1894                 if (! (p < pend && (*p == '*' || *p == '+' || *p == '?')))
  1895                   break;
  1896                 /* If we get here, we found another repeat character.  */
  1897                 c = *p++;
  1898                }
  1899 
  1900             /* Star, etc. applied to an empty pattern is equivalent
  1901                to an empty pattern.  */
  1902             if (!laststart || laststart == b)
  1903               break;
  1904 
  1905             /* Now we know whether or not zero matches is allowed
  1906                and also whether or not two or more matches is allowed.  */
  1907             if (greedy)
  1908               {
  1909                 if (many_times_ok)
  1910                   {
  1911                     bool simple = skip_one_char (laststart) == b;
  1912                     ptrdiff_t startoffset = 0;
  1913                     re_opcode_t ofj =
  1914                       /* Check if the loop can match the empty string.  */
  1915                       (simple || !analyze_first (laststart, b, NULL, false))
  1916                       ? on_failure_jump : on_failure_jump_loop;
  1917                     eassert (skip_one_char (laststart) <= b);
  1918 
  1919                     if (!zero_times_ok && simple)
  1920                       { /* Since simple * loops can be made faster by using
  1921                            on_failure_keep_string_jump, we turn simple P+
  1922                            into PP* if P is simple.  */
  1923                         unsigned char *p1, *p2;
  1924                         startoffset = b - laststart;
  1925                         GET_BUFFER_SPACE (startoffset);
  1926                         p1 = b; p2 = laststart;
  1927                         while (p2 < p1)
  1928                           *b++ = *p2++;
  1929                         zero_times_ok = 1;
  1930                       }
  1931 
  1932                     GET_BUFFER_SPACE (6);
  1933                     if (!zero_times_ok)
  1934                       /* A + loop.  */
  1935                       STORE_JUMP (ofj, b, b + 6);
  1936                     else
  1937                       /* Simple * loops can use on_failure_keep_string_jump
  1938                          depending on what follows.  But since we don't know
  1939                          that yet, we leave the decision up to
  1940                          on_failure_jump_smart.  */
  1941                       INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
  1942                                    laststart + startoffset, b + 6);
  1943                     b += 3;
  1944                     STORE_JUMP (jump, b, laststart + startoffset);
  1945                     b += 3;
  1946                   }
  1947                 else
  1948                   {
  1949                     /* A simple ? pattern.  */
  1950                     eassert (zero_times_ok);
  1951                     GET_BUFFER_SPACE (3);
  1952                     INSERT_JUMP (on_failure_jump, laststart, b + 3);
  1953                     b += 3;
  1954                   }
  1955               }
  1956             else                /* not greedy */
  1957               { /* I wish the greedy and non-greedy cases could be merged.  */
  1958 
  1959                 GET_BUFFER_SPACE (7); /* We might use less.  */
  1960                 if (many_times_ok)
  1961                   {
  1962                     bool emptyp = !!analyze_first (laststart, b, NULL, false);
  1963 
  1964                     /* The non-greedy multiple match looks like
  1965                        a repeat..until: we only need a conditional jump
  1966                        at the end of the loop.  */
  1967                     if (emptyp) BUF_PUSH (no_op);
  1968                     STORE_JUMP (emptyp ? on_failure_jump_nastyloop
  1969                                 : on_failure_jump, b, laststart);
  1970                     b += 3;
  1971                     if (zero_times_ok)
  1972                       {
  1973                         /* The repeat...until naturally matches one or more.
  1974                            To also match zero times, we need to first jump to
  1975                            the end of the loop (its conditional jump).  */
  1976                         INSERT_JUMP (jump, laststart, b);
  1977                         b += 3;
  1978                       }
  1979                   }
  1980                 else
  1981                   {
  1982                     /* non-greedy a?? */
  1983                     INSERT_JUMP (jump, laststart, b + 3);
  1984                     b += 3;
  1985                     INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
  1986                     b += 3;
  1987                   }
  1988               }
  1989           }
  1990           pending_exact = 0;
  1991           break;
  1992 
  1993 
  1994         case '.':
  1995           laststart = b;
  1996           BUF_PUSH (anychar);
  1997           break;
  1998 
  1999 
  2000         case '[':
  2001           {
  2002             re_char *p1;
  2003 
  2004             CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
  2005 
  2006             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  2007 
  2008             /* Ensure that we have enough space to push a charset: the
  2009                opcode, the length count, and the bitset; 34 bytes in all.  */
  2010             GET_BUFFER_SPACE (34);
  2011 
  2012             laststart = b;
  2013 
  2014             /* Test '*p == '^' twice, instead of using an if
  2015                statement, so we need only one BUF_PUSH.  */
  2016             BUF_PUSH (*p == '^' ? charset_not : charset);
  2017             if (*p == '^')
  2018               p++;
  2019 
  2020             /* Remember the first position in the bracket expression.  */
  2021             p1 = p;
  2022 
  2023             /* Push the number of bytes in the bitmap.  */
  2024             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
  2025 
  2026             /* Clear the whole map.  */
  2027             memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
  2028 
  2029             /* Read in characters and ranges, setting map bits.  */
  2030             for (;;)
  2031               {
  2032                 const unsigned char *p2 = p;
  2033                 int ch;
  2034 
  2035                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
  2036 
  2037                 /* See if we're at the beginning of a possible character
  2038                    class.  */
  2039                 re_wctype_t cc = re_wctype_parse (&p, pend - p);
  2040                 if (cc != -1)
  2041                   {
  2042                     if (cc == 0)
  2043                       FREE_STACK_RETURN (REG_ECTYPE);
  2044 
  2045                     if (p == pend)
  2046                       FREE_STACK_RETURN (REG_EBRACK);
  2047 
  2048                     /* Most character classes in a multibyte match just set
  2049                        a flag.  Exceptions are is_blank, is_digit, is_cntrl, and
  2050                        is_xdigit, since they can only match ASCII characters.
  2051                        We don't need to handle them for multibyte.  */
  2052 
  2053                     /* Setup the gl_state object to its buffer-defined value.
  2054                        This hardcodes the buffer-global syntax-table for ASCII
  2055                        chars, while the other chars will obey syntax-table
  2056                        properties.  It's not ideal, but it's the way it's been
  2057                        done until now.  */
  2058                     SETUP_BUFFER_SYNTAX_TABLE ();
  2059 
  2060                     for (c = 0; c < 0x80; ++c)
  2061                       if (re_iswctype (c, cc))
  2062                         {
  2063                           SET_LIST_BIT (c);
  2064                           c1 = TRANSLATE (c);
  2065                           if (c1 == c)
  2066                             continue;
  2067                           if (ASCII_CHAR_P (c1))
  2068                             SET_LIST_BIT (c1);
  2069                           else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
  2070                             SET_LIST_BIT (c1);
  2071                         }
  2072                     SET_RANGE_TABLE_WORK_AREA_BIT
  2073                       (range_table_work, re_wctype_to_bit (cc));
  2074 
  2075                     /* In most cases the matching rule for char classes only
  2076                        uses the syntax table for multibyte chars, so that the
  2077                        content of the syntax-table is not hardcoded in the
  2078                        range_table.  SPACE and WORD are the two exceptions.  */
  2079                     if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
  2080                       bufp->used_syntax = true;
  2081 
  2082                     /* Repeat the loop. */
  2083                     continue;
  2084                   }
  2085 
  2086                 /* Don't translate yet.  The range TRANSLATE(X..Y) cannot
  2087                    always be determined from TRANSLATE(X) and TRANSLATE(Y)
  2088                    So the translation is done later in a loop.  Example:
  2089                    (let ((case-fold-search t)) (string-match "[A-_]" "A"))  */
  2090                 PATFETCH (c);
  2091 
  2092                 /* Could be the end of the bracket expression.  If it's
  2093                    not (i.e., when the bracket expression is '[]' so
  2094                    far), the ']' character bit gets set way below.  */
  2095                 if (c == ']' && p2 != p1)
  2096                   break;
  2097 
  2098                 if (p < pend && p[0] == '-' && p[1] != ']')
  2099                   {
  2100 
  2101                     /* Discard the '-'. */
  2102                     PATFETCH (c1);
  2103 
  2104                     /* Fetch the character which ends the range. */
  2105                     PATFETCH (c1);
  2106 
  2107                     if (CHAR_BYTE8_P (c1)
  2108                         && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
  2109                       /* Treat the range from a multibyte character to
  2110                          raw-byte character as empty.  */
  2111                       c = c1 + 1;
  2112                   }
  2113                 else
  2114                   /* Range from C to C. */
  2115                   c1 = c;
  2116 
  2117                 if (c <= c1)
  2118                   {
  2119                     if (c < 128)
  2120                       {
  2121                         ch = min (127, c1);
  2122                         SETUP_ASCII_RANGE (range_table_work, c, ch);
  2123                         c = ch + 1;
  2124                         if (CHAR_BYTE8_P (c1))
  2125                           c = BYTE8_TO_CHAR (128);
  2126                       }
  2127                     if (c <= c1)
  2128                       {
  2129                         if (CHAR_BYTE8_P (c))
  2130                           {
  2131                             c = CHAR_TO_BYTE8 (c);
  2132                             c1 = CHAR_TO_BYTE8 (c1);
  2133                             for (; c <= c1; c++)
  2134                               SET_LIST_BIT (c);
  2135                           }
  2136                         else if (multibyte)
  2137                           SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
  2138                         else
  2139                           SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
  2140                       }
  2141                   }
  2142               }
  2143 
  2144             /* Discard any (non)matching list bytes that are all 0 at the
  2145                end of the map.  Decrease the map-length byte too.  */
  2146             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
  2147               b[-1]--;
  2148             b += b[-1];
  2149 
  2150             /* Build real range table from work area.  */
  2151             if (RANGE_TABLE_WORK_USED (range_table_work)
  2152                 || RANGE_TABLE_WORK_BITS (range_table_work))
  2153               {
  2154                 int i;
  2155                 int used = RANGE_TABLE_WORK_USED (range_table_work);
  2156 
  2157                 /* Allocate space for COUNT + RANGE_TABLE.  Needs two
  2158                    bytes for flags, two for COUNT, and three bytes for
  2159                    each character.  */
  2160                 GET_BUFFER_SPACE (4 + used * 3);
  2161 
  2162                 /* Indicate the existence of range table.  */
  2163                 laststart[1] |= 0x80;
  2164 
  2165                 /* Store the character class flag bits into the range table.  */
  2166                 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
  2167                 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
  2168 
  2169                 STORE_NUMBER_AND_INCR (b, used / 2);
  2170                 for (i = 0; i < used; i++)
  2171                   STORE_CHARACTER_AND_INCR
  2172                     (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
  2173               }
  2174           }
  2175           break;
  2176 
  2177 
  2178         case '\\':
  2179           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
  2180 
  2181           /* Do not translate the character after the \, so that we can
  2182              distinguish, e.g., \B from \b, even if we normally would
  2183              translate, e.g., B to b.  */
  2184           PATFETCH (c);
  2185 
  2186           switch (c)
  2187             {
  2188             case '(':
  2189               {
  2190                 bool shy = false;
  2191                 regnum_t regnum = 0;
  2192                 if (p+1 < pend)
  2193                   {
  2194                     /* Look for a special (?...) construct */
  2195                     if (*p == '?')
  2196                       {
  2197                         PATFETCH (c); /* Gobble up the '?'.  */
  2198                         while (!shy)
  2199                           {
  2200                             PATFETCH (c);
  2201                             switch (c)
  2202                               {
  2203                               case ':': shy = true; break;
  2204                               case '0':
  2205                                 /* An explicitly specified regnum must start
  2206                                    with non-0. */
  2207                                 if (regnum == 0)
  2208                                   FREE_STACK_RETURN (REG_BADPAT);
  2209                                 FALLTHROUGH;
  2210                               case '1': case '2': case '3': case '4':
  2211                               case '5': case '6': case '7': case '8': case '9':
  2212                                 if (INT_MULTIPLY_WRAPV (regnum, 10, &regnum)
  2213                                     || INT_ADD_WRAPV (regnum, c - '0',
  2214                                                       &regnum))
  2215                                   FREE_STACK_RETURN (REG_ESIZE);
  2216                                 break;
  2217                               default:
  2218                                 /* Only (?:...) is supported right now. */
  2219                                 FREE_STACK_RETURN (REG_BADPAT);
  2220                               }
  2221                           }
  2222                       }
  2223                   }
  2224 
  2225                 if (!shy)
  2226                   regnum = ++bufp->re_nsub;
  2227                 else if (regnum)
  2228                   { /* It's actually not shy, but explicitly numbered.  */
  2229                     shy = false;
  2230                     if (regnum > bufp->re_nsub)
  2231                       bufp->re_nsub = regnum;
  2232                     else if (regnum > bufp->re_nsub
  2233                              /* Ideally, we'd want to check that the specified
  2234                                 group can't have matched (i.e. all subgroups
  2235                                 using the same regnum are in other branches of
  2236                                 OR patterns), but we don't currently keep track
  2237                                 of enough info to do that easily.  */
  2238                              || group_in_compile_stack (compile_stack, regnum))
  2239                       FREE_STACK_RETURN (REG_BADPAT);
  2240                   }
  2241                 else
  2242                   /* It's really shy.  */
  2243                   regnum = - bufp->re_nsub;
  2244 
  2245                 if (COMPILE_STACK_FULL)
  2246                   compile_stack.stack
  2247                     = xpalloc (compile_stack.stack, &compile_stack.size,
  2248                                1, -1, sizeof *compile_stack.stack);
  2249 
  2250                 /* These are the values to restore when we hit end of this
  2251                    group.  They are all relative offsets, so that if the
  2252                    whole pattern moves because of realloc, they will still
  2253                    be valid.  */
  2254                 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
  2255                 COMPILE_STACK_TOP.fixup_alt_jump
  2256                   = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
  2257                 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
  2258                 COMPILE_STACK_TOP.regnum = regnum;
  2259 
  2260                 /* Do not push a start_memory for groups beyond the last one
  2261                    we can represent in the compiled pattern.  */
  2262                 if (regnum <= MAX_REGNUM && regnum > 0)
  2263                   BUF_PUSH_2 (start_memory, regnum);
  2264 
  2265                 compile_stack.avail++;
  2266 
  2267                 fixup_alt_jump = 0;
  2268                 laststart = 0;
  2269                 begalt = b;
  2270                 /* If we've reached MAX_REGNUM groups, then this open
  2271                    won't actually generate any code, so we'll have to
  2272                    clear pending_exact explicitly.  */
  2273                 pending_exact = 0;
  2274                 break;
  2275               }
  2276 
  2277             case ')':
  2278               if (COMPILE_STACK_EMPTY)
  2279                 FREE_STACK_RETURN (REG_ERPAREN);
  2280 
  2281               FIXUP_ALT_JUMP ();
  2282 
  2283               /* See similar code for backslashed left paren above.  */
  2284               if (COMPILE_STACK_EMPTY)
  2285                 FREE_STACK_RETURN (REG_ERPAREN);
  2286 
  2287               /* Since we just checked for an empty stack above, this
  2288                  "can't happen".  */
  2289               eassert (compile_stack.avail != 0);
  2290               {
  2291                 /* We don't just want to restore into 'regnum', because
  2292                    later groups should continue to be numbered higher,
  2293                    as in '(ab)c(de)' -- the second group is #2.  */
  2294                 regnum_t regnum;
  2295 
  2296                 compile_stack.avail--;
  2297                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
  2298                 fixup_alt_jump
  2299                   = COMPILE_STACK_TOP.fixup_alt_jump
  2300                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
  2301                     : 0;
  2302                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
  2303                 regnum = COMPILE_STACK_TOP.regnum;
  2304                 /* If we've reached MAX_REGNUM groups, then this open
  2305                    won't actually generate any code, so we'll have to
  2306                    clear pending_exact explicitly.  */
  2307                 pending_exact = 0;
  2308 
  2309                 /* We're at the end of the group, so now we know how many
  2310                    groups were inside this one.  */
  2311                 if (regnum <= MAX_REGNUM && regnum > 0)
  2312                   BUF_PUSH_2 (stop_memory, regnum);
  2313               }
  2314               break;
  2315 
  2316 
  2317             case '|':                                   /* '\|'.  */
  2318               /* Insert before the previous alternative a jump which
  2319                  jumps to this alternative if the former fails.  */
  2320               GET_BUFFER_SPACE (3);
  2321               INSERT_JUMP (on_failure_jump, begalt, b + 6);
  2322               pending_exact = 0;
  2323               b += 3;
  2324 
  2325               /* The alternative before this one has a jump after it
  2326                  which gets executed if it gets matched.  Adjust that
  2327                  jump so it will jump to this alternative's analogous
  2328                  jump (put in below, which in turn will jump to the next
  2329                  (if any) alternative's such jump, etc.).  The last such
  2330                  jump jumps to the correct final destination.  A picture:
  2331                           _____ _____
  2332                           |   | |   |
  2333                           |   v |   v
  2334                         A | B   | C
  2335 
  2336                  If we are at B, then fixup_alt_jump right now points to a
  2337                  three-byte space after A.  We'll put in the jump, set
  2338                  fixup_alt_jump to right after B, and leave behind three
  2339                  bytes which we'll fill in when we get to after C.  */
  2340 
  2341               FIXUP_ALT_JUMP ();
  2342 
  2343               /* Mark and leave space for a jump after this alternative,
  2344                  to be filled in later either by next alternative or
  2345                  when know we're at the end of a series of alternatives.  */
  2346               fixup_alt_jump = b;
  2347               GET_BUFFER_SPACE (3);
  2348               b += 3;
  2349 
  2350               laststart = 0;
  2351               begalt = b;
  2352               break;
  2353 
  2354 
  2355             case '{':
  2356               {
  2357                 /* At least (most) this many matches must be made.  */
  2358                 int lower_bound = 0, upper_bound = -1;
  2359 
  2360                 beg_interval = p;
  2361 
  2362                 GET_INTERVAL_COUNT (lower_bound);
  2363 
  2364                 if (c == ',')
  2365                   GET_INTERVAL_COUNT (upper_bound);
  2366                 else
  2367                   /* Interval such as '{1}' => match exactly once. */
  2368                   upper_bound = lower_bound;
  2369 
  2370                 if (lower_bound < 0
  2371                     || (0 <= upper_bound && upper_bound < lower_bound)
  2372                     || c != '\\')
  2373                   FREE_STACK_RETURN (REG_BADBR);
  2374                 if (p == pend)
  2375                   FREE_STACK_RETURN (REG_EESCAPE);
  2376                 if (*p++ != '}')
  2377                   FREE_STACK_RETURN (REG_BADBR);
  2378 
  2379                 /* We just parsed a valid interval.  */
  2380 
  2381                 /* If it's invalid to have no preceding re.  */
  2382                 if (!laststart)
  2383                   goto unfetch_interval;
  2384 
  2385                 if (upper_bound == 0)
  2386                   /* If the upper bound is zero, just drop the sub pattern
  2387                      altogether.  */
  2388                   b = laststart;
  2389                 else if (lower_bound == 1 && upper_bound == 1)
  2390                   /* Just match it once: nothing to do here.  */
  2391                   ;
  2392 
  2393                 /* Otherwise, we have a nontrivial interval.  When
  2394                    we're all done, the pattern will look like:
  2395                    set_number_at <jump count> <upper bound>
  2396                    set_number_at <succeed_n count> <lower bound>
  2397                    succeed_n <after jump addr> <succeed_n count>
  2398                    <body of loop>
  2399                    jump_n <succeed_n addr> <jump count>
  2400                    (The upper bound and 'jump_n' are omitted if
  2401                    'upper_bound' is 1, though.)  */
  2402                 else
  2403                   { /* If the upper bound is > 1, we need to insert
  2404                        more at the end of the loop.  */
  2405                     int nbytes = upper_bound < 0 ? 3 : upper_bound > 1 ? 5 : 0;
  2406                     int startoffset = 0;
  2407 
  2408                     GET_BUFFER_SPACE (20); /* We might use less.  */
  2409 
  2410                     if (lower_bound == 0)
  2411                       {
  2412                         /* A succeed_n that starts with 0 is really
  2413                            a simple on_failure_jump_loop.  */
  2414                         INSERT_JUMP (on_failure_jump_loop, laststart,
  2415                                      b + 3 + nbytes);
  2416                         b += 3;
  2417                       }
  2418                     else
  2419                       {
  2420                         /* Initialize lower bound of the 'succeed_n', even
  2421                            though it will be set during matching by its
  2422                            attendant 'set_number_at' (inserted next),
  2423                            because 're_compile_fastmap' needs to know.
  2424                            Jump to the 'jump_n' we might insert below.  */
  2425                         INSERT_JUMP2 (succeed_n, laststart,
  2426                                       b + 5 + nbytes,
  2427                                       lower_bound);
  2428                         b += 5;
  2429 
  2430                         /* Code to initialize the lower bound.  Insert
  2431                            before the 'succeed_n'.  The '5' is the last two
  2432                            bytes of this 'set_number_at', plus 3 bytes of
  2433                            the following 'succeed_n'.  */
  2434                         insert_op2 (set_number_at, laststart, 5,
  2435                                     lower_bound, b);
  2436                         b += 5;
  2437                         startoffset += 5;
  2438                       }
  2439 
  2440                     if (upper_bound < 0)
  2441                       {
  2442                         /* A negative upper bound stands for infinity,
  2443                            in which case it degenerates to a plain jump.  */
  2444                         STORE_JUMP (jump, b, laststart + startoffset);
  2445                         b += 3;
  2446                       }
  2447                     else if (upper_bound > 1)
  2448                       { /* More than one repetition is allowed, so
  2449                            append a backward jump to the 'succeed_n'
  2450                            that starts this interval.
  2451 
  2452                            When we've reached this during matching,
  2453                            we'll have matched the interval once, so
  2454                            jump back only 'upper_bound - 1' times.  */
  2455                         STORE_JUMP2 (jump_n, b, laststart + startoffset,
  2456                                      upper_bound - 1);
  2457                         b += 5;
  2458 
  2459                         /* The location we want to set is the second
  2460                            parameter of the 'jump_n'; that is 'b-2' as
  2461                            an absolute address.  'laststart' will be
  2462                            the 'set_number_at' we're about to insert;
  2463                            'laststart+3' the number to set, the source
  2464                            for the relative address.  But we are
  2465                            inserting into the middle of the pattern --
  2466                            so everything is getting moved up by 5.
  2467                            Conclusion: (b - 2) - (laststart + 3) + 5,
  2468                            i.e., b - laststart.
  2469 
  2470                            Insert this at the beginning of the loop
  2471                            so that if we fail during matching, we'll
  2472                            reinitialize the bounds.  */
  2473                         insert_op2 (set_number_at, laststart, b - laststart,
  2474                                     upper_bound - 1, b);
  2475                         b += 5;
  2476                       }
  2477                   }
  2478                 pending_exact = 0;
  2479                 beg_interval = NULL;
  2480               }
  2481               break;
  2482 
  2483             unfetch_interval:
  2484               /* If an invalid interval, match the characters as literals.  */
  2485                eassert (beg_interval);
  2486                p = beg_interval;
  2487                beg_interval = NULL;
  2488                eassert (p > pattern && p[-1] == '\\');
  2489                c = '{';
  2490                goto normal_char;
  2491 
  2492             case '=':
  2493               laststart = b;
  2494               BUF_PUSH (at_dot);
  2495               break;
  2496 
  2497             case 's':
  2498               laststart = b;
  2499               PATFETCH (c);
  2500               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
  2501               break;
  2502 
  2503             case 'S':
  2504               laststart = b;
  2505               PATFETCH (c);
  2506               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
  2507               break;
  2508 
  2509             case 'c':
  2510               laststart = b;
  2511               PATFETCH (c);
  2512               BUF_PUSH_2 (categoryspec, c);
  2513               break;
  2514 
  2515             case 'C':
  2516               laststart = b;
  2517               PATFETCH (c);
  2518               BUF_PUSH_2 (notcategoryspec, c);
  2519               break;
  2520 
  2521             case 'w':
  2522               laststart = b;
  2523               BUF_PUSH_2 (syntaxspec, Sword);
  2524               break;
  2525 
  2526 
  2527             case 'W':
  2528               laststart = b;
  2529               BUF_PUSH_2 (notsyntaxspec, Sword);
  2530               break;
  2531 
  2532 
  2533             case '<':
  2534               laststart = b;
  2535               BUF_PUSH (wordbeg);
  2536               break;
  2537 
  2538             case '>':
  2539               laststart = b;
  2540               BUF_PUSH (wordend);
  2541               break;
  2542 
  2543             case '_':
  2544               laststart = b;
  2545               PATFETCH (c);
  2546               if (c == '<')
  2547                 BUF_PUSH (symbeg);
  2548               else if (c == '>')
  2549                 BUF_PUSH (symend);
  2550               else
  2551                 FREE_STACK_RETURN (REG_BADPAT);
  2552               break;
  2553 
  2554             case 'b':
  2555               BUF_PUSH (wordbound);
  2556               break;
  2557 
  2558             case 'B':
  2559               BUF_PUSH (notwordbound);
  2560               break;
  2561 
  2562             case '`':
  2563               BUF_PUSH (begbuf);
  2564               break;
  2565 
  2566             case '\'':
  2567               BUF_PUSH (endbuf);
  2568               break;
  2569 
  2570             case '1': case '2': case '3': case '4': case '5':
  2571             case '6': case '7': case '8': case '9':
  2572               {
  2573                 regnum_t reg = c - '0';
  2574 
  2575                 if (reg > bufp->re_nsub || reg < 1
  2576                     /* Can't back reference to a subexp before its end.  */
  2577                     || group_in_compile_stack (compile_stack, reg))
  2578                   FREE_STACK_RETURN (REG_ESUBREG);
  2579 
  2580                 laststart = b;
  2581                 BUF_PUSH_2 (duplicate, reg);
  2582               }
  2583               break;
  2584 
  2585             default:
  2586               /* You might think it would be useful for \ to mean
  2587                  not to translate; but if we don't translate it
  2588                  it will never match anything.  */
  2589               goto normal_char;
  2590             }
  2591           break;
  2592 
  2593 
  2594         default:
  2595         /* Expects the character in C.  */
  2596         normal_char:
  2597           /* If no exactn currently being built.  */
  2598           if (!pending_exact
  2599 
  2600               /* If last exactn not at current position.  */
  2601               || pending_exact + *pending_exact + 1 != b
  2602 
  2603               /* Only one byte follows the exactn for the count.  */
  2604               || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
  2605 
  2606               /* If followed by a repetition operator.  */
  2607               || (p != pend
  2608                   && (*p == '*' || *p == '+' || *p == '?' || *p == '^'))
  2609               || (p + 1 < pend && p[0] == '\\' && p[1] == '{'))
  2610             {
  2611               /* Start building a new exactn.  */
  2612 
  2613               laststart = b;
  2614 
  2615               BUF_PUSH_2 (exactn, 0);
  2616               pending_exact = b - 1;
  2617             }
  2618 
  2619           GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
  2620           {
  2621             int len;
  2622 
  2623             if (multibyte)
  2624               {
  2625                 c = TRANSLATE (c);
  2626                 len = CHAR_STRING (c, b);
  2627                 b += len;
  2628               }
  2629             else
  2630               {
  2631                 c1 = RE_CHAR_TO_MULTIBYTE (c);
  2632                 if (! CHAR_BYTE8_P (c1))
  2633                   {
  2634                     int c2 = TRANSLATE (c1);
  2635 
  2636                     if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
  2637                       c = c1;
  2638                   }
  2639                 *b++ = c;
  2640                 len = 1;
  2641               }
  2642             (*pending_exact) += len;
  2643           }
  2644 
  2645           break;
  2646         } /* switch (c) */
  2647     } /* while p != pend */
  2648 
  2649 
  2650   /* Through the pattern now.  */
  2651 
  2652   FIXUP_ALT_JUMP ();
  2653 
  2654   if (!COMPILE_STACK_EMPTY)
  2655     FREE_STACK_RETURN (REG_EPAREN);
  2656 
  2657   /* If we don't want backtracking, force success
  2658      the first time we reach the end of the compiled pattern.  */
  2659   if (!posix_backtracking)
  2660     BUF_PUSH (succeed);
  2661 
  2662   /* Success; set the length of the buffer.  */
  2663   bufp->used = b - bufp->buffer;
  2664 
  2665 #ifdef REGEX_EMACS_DEBUG
  2666   if (regex_emacs_debug > 0)
  2667     {
  2668       re_compile_fastmap (bufp);
  2669       DEBUG_PRINT ("\nCompiled pattern:\n");
  2670       print_compiled_pattern (bufp);
  2671     }
  2672   regex_emacs_debug--;
  2673 #endif
  2674 
  2675   FREE_STACK_RETURN (REG_NOERROR);
  2676 
  2677 } /* regex_compile */
  2678 
  2679 /* Subroutines for 'regex_compile'.  */
  2680 
  2681 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
  2682 
  2683 static void
  2684 store_op1 (re_opcode_t op, unsigned char *loc, int arg)
  2685 {
  2686   *loc = (unsigned char) op;
  2687   STORE_NUMBER (loc + 1, arg);
  2688 }
  2689 
  2690 
  2691 /* Like 'store_op1', but for two two-byte parameters ARG1 and ARG2.  */
  2692 
  2693 static void
  2694 store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
  2695 {
  2696   *loc = (unsigned char) op;
  2697   STORE_NUMBER (loc + 1, arg1);
  2698   STORE_NUMBER (loc + 3, arg2);
  2699 }
  2700 
  2701 
  2702 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
  2703    for OP followed by two-byte integer parameter ARG.  */
  2704 
  2705 static void
  2706 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
  2707 {
  2708   register unsigned char *pfrom = end;
  2709   register unsigned char *pto = end + 3;
  2710 
  2711   while (pfrom != loc)
  2712     *--pto = *--pfrom;
  2713 
  2714   store_op1 (op, loc, arg);
  2715 }
  2716 
  2717 
  2718 /* Like 'insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
  2719 
  2720 static void
  2721 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
  2722             unsigned char *end)
  2723 {
  2724   register unsigned char *pfrom = end;
  2725   register unsigned char *pto = end + 5;
  2726 
  2727   while (pfrom != loc)
  2728     *--pto = *--pfrom;
  2729 
  2730   store_op2 (op, loc, arg1, arg2);
  2731 }
  2732 
  2733 
  2734 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
  2735    after an alternative or a begin-subexpression.  Assume there is at
  2736    least one character before the ^.  */
  2737 
  2738 static bool
  2739 at_begline_loc_p (re_char *pattern, re_char *p)
  2740 {
  2741   re_char *prev = p - 2;
  2742 
  2743   switch (*prev)
  2744     {
  2745     case '(': /* After a subexpression.  */
  2746     case '|': /* After an alternative.  */
  2747       break;
  2748 
  2749     case ':': /* After a shy subexpression.  */
  2750       /* Skip over optional regnum.  */
  2751       while (prev > pattern && '0' <= prev[-1] && prev[-1] <= '9')
  2752         --prev;
  2753 
  2754       if (! (prev > pattern + 1 && prev[-1] == '?' && prev[-2] == '('))
  2755         return false;
  2756       prev -= 2;
  2757       break;
  2758 
  2759     default:
  2760       return false;
  2761     }
  2762 
  2763   /* Count the number of preceding backslashes.  */
  2764   p = prev;
  2765   while (prev > pattern && prev[-1] == '\\')
  2766     --prev;
  2767   return (p - prev) & 1;
  2768 }
  2769 
  2770 
  2771 /* The dual of at_begline_loc_p.  This one is for $.  Assume there is
  2772    at least one character after the $, i.e., 'P < PEND'.  */
  2773 
  2774 static bool
  2775 at_endline_loc_p (re_char *p, re_char *pend)
  2776 {
  2777   /* Before a subexpression or an alternative?  */
  2778   return *p == '\\' && p + 1 < pend && (p[1] == ')' || p[1] == '|');
  2779 }
  2780 
  2781 
  2782 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
  2783    false if it's not.  */
  2784 
  2785 static bool
  2786 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
  2787 {
  2788   ptrdiff_t this_element;
  2789 
  2790   for (this_element = compile_stack.avail - 1;
  2791        this_element >= 0;
  2792        this_element--)
  2793     if (compile_stack.stack[this_element].regnum == regnum)
  2794       return true;
  2795 
  2796   return false;
  2797 }
  2798 
  2799 /* analyze_first.
  2800    If fastmap is non-NULL, go through the pattern and fill fastmap
  2801    with all the possible leading chars.  If fastmap is NULL, don't
  2802    bother filling it up (obviously) and only return whether the
  2803    pattern could potentially match the empty string.
  2804 
  2805    Return 1  if p..pend might match the empty string.
  2806    Return 0  if p..pend matches at least one char.
  2807    Return -1 if fastmap was not updated accurately.  */
  2808 
  2809 static int
  2810 analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
  2811 {
  2812   int j, k;
  2813   int nbits;
  2814   bool not;
  2815 
  2816   /* If all elements for base leading-codes in fastmap is set, this
  2817      flag is set true.  */
  2818   bool match_any_multibyte_characters = false;
  2819 
  2820   eassert (p);
  2821 
  2822   /* The loop below works as follows:
  2823      - It has a working-list kept in the PATTERN_STACK and which basically
  2824        starts by only containing a pointer to the first operation.
  2825      - If the opcode we're looking at is a match against some set of
  2826        chars, then we add those chars to the fastmap and go on to the
  2827        next work element from the worklist (done via 'break').
  2828      - If the opcode is a control operator on the other hand, we either
  2829        ignore it (if it's meaningless at this point, such as 'start_memory')
  2830        or execute it (if it's a jump).  If the jump has several destinations
  2831        (i.e. 'on_failure_jump'), then we push the other destination onto the
  2832        worklist.
  2833      We guarantee termination by ignoring backward jumps (more or less),
  2834      so that P is monotonically increasing.  More to the point, we
  2835      never set P (or push) anything '<= p1'.  */
  2836 
  2837   while (p < pend)
  2838     {
  2839       /* P1 is used as a marker of how far back a 'on_failure_jump'
  2840          can go without being ignored.  It is normally equal to P
  2841          (which prevents any backward 'on_failure_jump') except right
  2842          after a plain 'jump', to allow patterns such as:
  2843             0: jump 10
  2844             3..9: <body>
  2845             10: on_failure_jump 3
  2846          as used for the *? operator.  */
  2847       re_char *p1 = p;
  2848 
  2849       switch (*p++)
  2850         {
  2851         case succeed:
  2852           return 1;
  2853 
  2854         case duplicate:
  2855           /* If the first character has to match a backreference, that means
  2856              that the group was empty (since it already matched).  Since this
  2857              is the only case that interests us here, we can assume that the
  2858              backreference must match the empty string.  */
  2859           p++;
  2860           continue;
  2861 
  2862 
  2863       /* Following are the cases which match a character.  These end
  2864          with 'break'.  */
  2865 
  2866         case exactn:
  2867           if (fastmap)
  2868             {
  2869               /* If multibyte is nonzero, the first byte of each
  2870                  character is an ASCII or a leading code.  Otherwise,
  2871                  each byte is a character.  Thus, this works in both
  2872                  cases. */
  2873               fastmap[p[1]] = 1;
  2874               if (multibyte)
  2875                 {
  2876                   /* Cover the case of matching a raw char in a
  2877                      multibyte regexp against unibyte.  */
  2878                   if (CHAR_BYTE8_HEAD_P (p[1]))
  2879                     fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
  2880                 }
  2881               else
  2882                 {
  2883                   /* For the case of matching this unibyte regex
  2884                      against multibyte, we must set a leading code of
  2885                      the corresponding multibyte character.  */
  2886                   int c = RE_CHAR_TO_MULTIBYTE (p[1]);
  2887 
  2888                   fastmap[CHAR_LEADING_CODE (c)] = 1;
  2889                 }
  2890             }
  2891           break;
  2892 
  2893 
  2894         case anychar:
  2895           /* We could put all the chars except for \n (and maybe \0)
  2896              but we don't bother since it is generally not worth it.  */
  2897           if (!fastmap) break;
  2898           return -1;
  2899 
  2900 
  2901         case charset_not:
  2902           if (!fastmap) break;
  2903           {
  2904             /* Chars beyond end of bitmap are possible matches.  */
  2905             for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
  2906                  j < (1 << BYTEWIDTH); j++)
  2907               fastmap[j] = 1;
  2908           }
  2909           FALLTHROUGH;
  2910         case charset:
  2911           if (!fastmap) break;
  2912           not = (re_opcode_t) *(p - 1) == charset_not;
  2913           nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
  2914           p++;
  2915           for (j = 0; j < nbits; j++)
  2916             if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
  2917               fastmap[j] = 1;
  2918 
  2919           /* To match raw bytes (in the 80..ff range) against multibyte
  2920              strings, add their leading bytes to the fastmap.  */
  2921           for (j = 0x80; j < nbits; j++)
  2922             if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
  2923               fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
  2924 
  2925           if (/* Any leading code can possibly start a character
  2926                  which doesn't match the specified set of characters.  */
  2927               not
  2928               ||
  2929               /* If we can match a character class, we can match any
  2930                  multibyte characters.  */
  2931               (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
  2932                && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
  2933 
  2934             {
  2935               if (match_any_multibyte_characters == false)
  2936                 {
  2937                   for (j = MIN_MULTIBYTE_LEADING_CODE;
  2938                        j <= MAX_MULTIBYTE_LEADING_CODE; j++)
  2939                     fastmap[j] = 1;
  2940                   match_any_multibyte_characters = true;
  2941                 }
  2942             }
  2943 
  2944           else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
  2945                    && match_any_multibyte_characters == false)
  2946             {
  2947               /* Set fastmap[I] to 1 where I is a leading code of each
  2948                  multibyte character in the range table. */
  2949               int c, count;
  2950               unsigned char lc1, lc2;
  2951 
  2952               /* Make P points the range table.  '+ 2' is to skip flag
  2953                  bits for a character class.  */
  2954               p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
  2955 
  2956               /* Extract the number of ranges in range table into COUNT.  */
  2957               EXTRACT_NUMBER_AND_INCR (count, p);
  2958               for (; count > 0; count--, p += 3)
  2959                 {
  2960                   /* Extract the start and end of each range.  */
  2961                   EXTRACT_CHARACTER (c, p);
  2962                   lc1 = CHAR_LEADING_CODE (c);
  2963                   p += 3;
  2964                   EXTRACT_CHARACTER (c, p);
  2965                   lc2 = CHAR_LEADING_CODE (c);
  2966                   for (j = lc1; j <= lc2; j++)
  2967                     fastmap[j] = 1;
  2968                 }
  2969             }
  2970           break;
  2971 
  2972         case syntaxspec:
  2973         case notsyntaxspec:
  2974           if (!fastmap) break;
  2975           /* This match depends on text properties.  These end with
  2976              aborting optimizations.  */
  2977           return -1;
  2978 
  2979         case categoryspec:
  2980         case notcategoryspec:
  2981           if (!fastmap) break;
  2982           not = (re_opcode_t)p[-1] == notcategoryspec;
  2983           k = *p++;
  2984           for (j = (1 << BYTEWIDTH); j >= 0; j--)
  2985             if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
  2986               fastmap[j] = 1;
  2987 
  2988           /* Any leading code can possibly start a character which
  2989              has or doesn't has the specified category.  */
  2990           if (match_any_multibyte_characters == false)
  2991             {
  2992               for (j = MIN_MULTIBYTE_LEADING_CODE;
  2993                    j <= MAX_MULTIBYTE_LEADING_CODE; j++)
  2994                 fastmap[j] = 1;
  2995               match_any_multibyte_characters = true;
  2996             }
  2997           break;
  2998 
  2999       /* All cases after this match the empty string.  These end with
  3000          'continue'.  */
  3001 
  3002         case at_dot:
  3003         case no_op:
  3004         case begline:
  3005         case endline:
  3006         case begbuf:
  3007         case endbuf:
  3008         case wordbound:
  3009         case notwordbound:
  3010         case wordbeg:
  3011         case wordend:
  3012         case symbeg:
  3013         case symend:
  3014           continue;
  3015 
  3016 
  3017         case jump:
  3018           EXTRACT_NUMBER_AND_INCR (j, p);
  3019           if (j < 0)
  3020             /* Backward jumps can only go back to code that we've already
  3021                visited.  're_compile' should make sure this is true.  */
  3022             break;
  3023           p += j;
  3024           switch (*p)
  3025             {
  3026             case on_failure_jump:
  3027             case on_failure_keep_string_jump:
  3028             case on_failure_jump_loop:
  3029             case on_failure_jump_nastyloop:
  3030             case on_failure_jump_smart:
  3031               p++;
  3032               break;
  3033             default:
  3034               continue;
  3035             };
  3036           /* Keep P1 to allow the 'on_failure_jump' we are jumping to
  3037              to jump back to "just after here".  */
  3038           FALLTHROUGH;
  3039         case on_failure_jump:
  3040         case on_failure_keep_string_jump:
  3041         case on_failure_jump_nastyloop:
  3042         case on_failure_jump_loop:
  3043         case on_failure_jump_smart:
  3044           EXTRACT_NUMBER_AND_INCR (j, p);
  3045           if (p + j <= p1)
  3046             ; /* Backward jump to be ignored.  */
  3047           else
  3048             { /* We have to look down both arms.
  3049                  We first go down the "straight" path so as to minimize
  3050                  stack usage when going through alternatives.  */
  3051               int r = analyze_first (p, pend, fastmap, multibyte);
  3052               if (r) return r;
  3053               p += j;
  3054             }
  3055           continue;
  3056 
  3057 
  3058         case jump_n:
  3059           /* This code simply does not properly handle forward jump_n.  */
  3060           DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
  3061           p += 4;
  3062           /* jump_n can either jump or fall through.  The (backward) jump
  3063              case has already been handled, so we only need to look at the
  3064              fallthrough case.  */
  3065           continue;
  3066 
  3067         case succeed_n:
  3068           /* If N == 0, it should be an on_failure_jump_loop instead.  */
  3069           DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0));
  3070           p += 4;
  3071           /* We only care about one iteration of the loop, so we don't
  3072              need to consider the case where this behaves like an
  3073              on_failure_jump.  */
  3074           continue;
  3075 
  3076 
  3077         case set_number_at:
  3078           p += 4;
  3079           continue;
  3080 
  3081 
  3082         case start_memory:
  3083         case stop_memory:
  3084           p += 1;
  3085           continue;
  3086 
  3087 
  3088         default:
  3089           abort (); /* We have listed all the cases.  */
  3090         } /* switch *p++ */
  3091 
  3092       /* Getting here means we have found the possible starting
  3093          characters for one path of the pattern -- and that the empty
  3094          string does not match.  We need not follow this path further.  */
  3095       return 0;
  3096     } /* while p */
  3097 
  3098   /* We reached the end without matching anything.  */
  3099   return 1;
  3100 
  3101 } /* analyze_first */
  3102 
  3103 /* Compute a fastmap for the compiled pattern in BUFP.
  3104    A fastmap records which of the (1 << BYTEWIDTH) possible
  3105    characters can start a string that matches the pattern.  This fastmap
  3106    is used by re_search to skip quickly over impossible starting points.
  3107 
  3108    Character codes above (1 << BYTEWIDTH) are not represented in the
  3109    fastmap, but the leading codes are represented.  Thus, the fastmap
  3110    indicates which character sets could start a match.
  3111 
  3112    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
  3113    area as BUFP->fastmap.
  3114 
  3115    Set the 'fastmap', 'fastmap_accurate', and 'can_be_null' fields in
  3116    the pattern buffer.  */
  3117 
  3118 static void
  3119 re_compile_fastmap (struct re_pattern_buffer *bufp)
  3120 {
  3121   char *fastmap = bufp->fastmap;
  3122   int analysis;
  3123 
  3124   eassert (fastmap && bufp->buffer);
  3125 
  3126   memset (fastmap, 0, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
  3127 
  3128   /* FIXME: Is the following assignment correct even when ANALYSIS < 0?  */
  3129   bufp->fastmap_accurate = 1;       /* It will be when we're done.  */
  3130 
  3131   analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used,
  3132                             fastmap, RE_MULTIBYTE_P (bufp));
  3133   bufp->can_be_null = (analysis != 0);
  3134 } /* re_compile_fastmap */
  3135 
  3136 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
  3137    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
  3138    this memory for recording register information.  STARTS and ENDS
  3139    must be allocated using the malloc library routine, and must each
  3140    be at least NUM_REGS * sizeof (ptrdiff_t) bytes long.
  3141 
  3142    If NUM_REGS == 0, then subsequent matches should allocate their own
  3143    register data.
  3144 
  3145    Unless this function is called, the first search or match using
  3146    PATTERN_BUFFER will allocate its own register data, without
  3147    freeing the old data.  */
  3148 
  3149 void
  3150 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
  3151                   ptrdiff_t num_regs, ptrdiff_t *starts, ptrdiff_t *ends)
  3152 {
  3153   if (num_regs)
  3154     {
  3155       bufp->regs_allocated = REGS_REALLOCATE;
  3156       regs->num_regs = num_regs;
  3157       regs->start = starts;
  3158       regs->end = ends;
  3159     }
  3160   else
  3161     {
  3162       bufp->regs_allocated = REGS_UNALLOCATED;
  3163       regs->num_regs = 0;
  3164       regs->start = regs->end = 0;
  3165     }
  3166 }
  3167 
  3168 /* Searching routines.  */
  3169 
  3170 /* Like re_search_2, below, but only one string is specified, and
  3171    doesn't let you say where to stop matching. */
  3172 
  3173 ptrdiff_t
  3174 re_search (struct re_pattern_buffer *bufp, const char *string, ptrdiff_t size,
  3175            ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs)
  3176 {
  3177   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
  3178                       regs, size);
  3179 }
  3180 
  3181 /* Address of POS in the concatenation of virtual string. */
  3182 #define POS_ADDR_VSTRING(POS)                                   \
  3183   (((POS) >= size1 ? string2 - size1 : string1) + (POS))
  3184 
  3185 /* Using the compiled pattern in BUFP->buffer, first tries to match the
  3186    virtual concatenation of STRING1 and STRING2, starting first at index
  3187    STARTPOS, then at STARTPOS + 1, and so on.
  3188 
  3189    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
  3190 
  3191    RANGE is how far to scan while trying to match.  RANGE = 0 means try
  3192    only at STARTPOS; in general, the last start tried is STARTPOS +
  3193    RANGE.
  3194 
  3195    In REGS, return the indices of the virtual concatenation of STRING1
  3196    and STRING2 that matched the entire BUFP->buffer and its contained
  3197    subexpressions.
  3198 
  3199    Do not consider matching one past the index STOP in the virtual
  3200    concatenation of STRING1 and STRING2.
  3201 
  3202    Return either the position in the strings at which the match was
  3203    found, -1 if no match, or -2 if error (such as failure
  3204    stack overflow).  */
  3205 
  3206 ptrdiff_t
  3207 re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1,
  3208              const char *str2, ptrdiff_t size2,
  3209              ptrdiff_t startpos, ptrdiff_t range,
  3210              struct re_registers *regs, ptrdiff_t stop)
  3211 {
  3212   ptrdiff_t val;
  3213   re_char *string1 = (re_char *) str1;
  3214   re_char *string2 = (re_char *) str2;
  3215   char *fastmap = bufp->fastmap;
  3216   Lisp_Object translate = bufp->translate;
  3217   ptrdiff_t total_size = size1 + size2;
  3218   ptrdiff_t endpos = startpos + range;
  3219   bool anchored_start;
  3220   /* Nonzero if we are searching multibyte string.  */
  3221   bool multibyte = RE_TARGET_MULTIBYTE_P (bufp);
  3222 
  3223   /* Check for out-of-range STARTPOS.  */
  3224   if (startpos < 0 || startpos > total_size)
  3225     return -1;
  3226 
  3227   /* Fix up RANGE if it might eventually take us outside
  3228      the virtual concatenation of STRING1 and STRING2.
  3229      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
  3230   if (endpos < 0)
  3231     range = 0 - startpos;
  3232   else if (endpos > total_size)
  3233     range = total_size - startpos;
  3234 
  3235   /* If the search isn't to be a backwards one, don't waste time in a
  3236      search for a pattern anchored at beginning of buffer.  */
  3237   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
  3238     {
  3239       if (startpos > 0)
  3240         return -1;
  3241       else
  3242         range = 0;
  3243     }
  3244 
  3245   /* In a forward search for something that starts with \=.
  3246      don't keep searching past point.  */
  3247   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
  3248     {
  3249       range = PT_BYTE - BEGV_BYTE - startpos;
  3250       if (range < 0)
  3251         return -1;
  3252     }
  3253 
  3254   /* Update the fastmap now if not correct already.  */
  3255   if (fastmap && !bufp->fastmap_accurate)
  3256     re_compile_fastmap (bufp);
  3257 
  3258   /* See whether the pattern is anchored.  */
  3259   anchored_start = (bufp->buffer[0] == begline);
  3260 
  3261   gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
  3262   {
  3263     ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
  3264 
  3265     SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
  3266   }
  3267 
  3268   /* Loop through the string, looking for a place to start matching.  */
  3269   for (;;)
  3270     {
  3271       /* If the pattern is anchored,
  3272          skip quickly past places we cannot match.
  3273          Don't bother to treat startpos == 0 specially
  3274          because that case doesn't repeat.  */
  3275       if (anchored_start && startpos > 0)
  3276         {
  3277           if (! ((startpos <= size1 ? string1[startpos - 1]
  3278                   : string2[startpos - size1 - 1])
  3279                  == '\n'))
  3280             goto advance;
  3281         }
  3282 
  3283       /* If a fastmap is supplied, skip quickly over characters that
  3284          cannot be the start of a match.  If the pattern can match the
  3285          null string, however, we don't need to skip characters; we want
  3286          the first null string.  */
  3287       if (fastmap && startpos < total_size && !bufp->can_be_null)
  3288         {
  3289           re_char *d;
  3290           int buf_ch;
  3291 
  3292           d = POS_ADDR_VSTRING (startpos);
  3293 
  3294           if (range > 0)        /* Searching forwards.  */
  3295             {
  3296               ptrdiff_t irange = range, lim = 0;
  3297 
  3298               if (startpos < size1 && startpos + range >= size1)
  3299                 lim = range - (size1 - startpos);
  3300 
  3301               /* Written out as an if-else to avoid testing 'translate'
  3302                  inside the loop.  */
  3303               if (!NILP (translate))
  3304                 {
  3305                   if (multibyte)
  3306                     while (range > lim)
  3307                       {
  3308                         int buf_charlen;
  3309 
  3310                         buf_ch = string_char_and_length (d, &buf_charlen);
  3311                         buf_ch = RE_TRANSLATE (translate, buf_ch);
  3312                         if (fastmap[CHAR_LEADING_CODE (buf_ch)])
  3313                           break;
  3314 
  3315                         range -= buf_charlen;
  3316                         d += buf_charlen;
  3317                       }
  3318                   else
  3319                     while (range > lim)
  3320                       {
  3321                         buf_ch = *d;
  3322                         int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
  3323                         int translated = RE_TRANSLATE (translate, ch);
  3324                         if (translated != ch
  3325                             && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
  3326                           buf_ch = ch;
  3327                         if (fastmap[buf_ch])
  3328                           break;
  3329                         d++;
  3330                         range--;
  3331                       }
  3332                 }
  3333               else
  3334                 {
  3335                   if (multibyte)
  3336                     while (range > lim)
  3337                       {
  3338                         int buf_charlen;
  3339 
  3340                         buf_ch = string_char_and_length (d, &buf_charlen);
  3341                         if (fastmap[CHAR_LEADING_CODE (buf_ch)])
  3342                           break;
  3343                         range -= buf_charlen;
  3344                         d += buf_charlen;
  3345                       }
  3346                   else
  3347                     while (range > lim && !fastmap[*d])
  3348                       {
  3349                         d++;
  3350                         range--;
  3351                       }
  3352                 }
  3353               startpos += irange - range;
  3354             }
  3355           else                          /* Searching backwards.  */
  3356             {
  3357               if (multibyte)
  3358                 {
  3359                   buf_ch = STRING_CHAR (d);
  3360                   buf_ch = TRANSLATE (buf_ch);
  3361                   if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
  3362                     goto advance;
  3363                 }
  3364               else
  3365                 {
  3366                   buf_ch = *d;
  3367                   int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
  3368                   int translated = TRANSLATE (ch);
  3369                   if (translated != ch
  3370                       && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
  3371                     buf_ch = ch;
  3372                   if (! fastmap[TRANSLATE (buf_ch)])
  3373                     goto advance;
  3374                 }
  3375             }
  3376         }
  3377 
  3378       /* If can't match the null string, and that's all we have left, fail.  */
  3379       if (range >= 0 && startpos == total_size && fastmap
  3380           && !bufp->can_be_null)
  3381         return -1;
  3382 
  3383       val = re_match_2_internal (bufp, string1, size1, string2, size2,
  3384                                  startpos, regs, stop);
  3385 
  3386       if (val >= 0)
  3387         return startpos;
  3388 
  3389       if (val == -2)
  3390         return -2;
  3391 
  3392     advance:
  3393       if (!range)
  3394         break;
  3395       else if (range > 0)
  3396         {
  3397           /* Update STARTPOS to the next character boundary.  */
  3398           if (multibyte)
  3399             {
  3400               re_char *p = POS_ADDR_VSTRING (startpos);
  3401               int len = BYTES_BY_CHAR_HEAD (*p);
  3402 
  3403               range -= len;
  3404               if (range < 0)
  3405                 break;
  3406               startpos += len;
  3407             }
  3408           else
  3409             {
  3410               range--;
  3411               startpos++;
  3412             }
  3413         }
  3414       else
  3415         {
  3416           range++;
  3417           startpos--;
  3418 
  3419           /* Update STARTPOS to the previous character boundary.  */
  3420           if (multibyte)
  3421             {
  3422               re_char *p = POS_ADDR_VSTRING (startpos) + 1;
  3423               int len = raw_prev_char_len (p);
  3424 
  3425               range += len - 1;
  3426               if (range > 0)
  3427                 break;
  3428               startpos -= len - 1;
  3429             }
  3430         }
  3431     }
  3432   return -1;
  3433 } /* re_search_2 */
  3434 
  3435 /* Declarations and macros for re_match_2.  */
  3436 
  3437 static bool bcmp_translate (re_char *, re_char *, ptrdiff_t,
  3438                             Lisp_Object, bool);
  3439 
  3440 /* This converts PTR, a pointer into one of the search strings 'string1'
  3441    and 'string2' into an offset from the beginning of that string.  */
  3442 #define POINTER_TO_OFFSET(ptr)                  \
  3443   (FIRST_STRING_P (ptr)                         \
  3444    ? (ptr) - string1                            \
  3445    : (ptr) - string2 + (ptrdiff_t) size1)
  3446 
  3447 /* Call before fetching a character with *d.  This switches over to
  3448    string2 if necessary.
  3449    `reset' is executed before backtracking if there are no more characters.
  3450    Check re_match_2_internal for a discussion of why end_match_2 might
  3451    not be within string2 (but be equal to end_match_1 instead).  */
  3452 #define PREFETCH(reset)                                                 \
  3453   while (d == dend)                                                     \
  3454     {                                                                   \
  3455       /* End of string2 => fail.  */                                    \
  3456       if (dend == end_match_2)                                          \
  3457         {                                                               \
  3458           reset;                                                        \
  3459           goto fail;                                                    \
  3460         }                                                               \
  3461       /* End of string1 => advance to string2.  */                      \
  3462       d = string2;                                                      \
  3463       dend = end_match_2;                                               \
  3464     }
  3465 
  3466 /* Call before fetching a char with *d if you already checked other limits.
  3467    This is meant for use in lookahead operations like wordend, etc..
  3468    where we might need to look at parts of the string that might be
  3469    outside of the LIMITs (i.e past 'stop').  */
  3470 #define PREFETCH_NOLIMIT()                                              \
  3471   if (d == end1)                                                        \
  3472      {                                                                  \
  3473        d = string2;                                                     \
  3474        dend = end_match_2;                                              \
  3475      }
  3476 
  3477 /* Test if at very beginning or at very end of the virtual concatenation
  3478    of STRING1 and STRING2.  If only one string, it's STRING2.  */
  3479 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
  3480 #define AT_STRINGS_END(d) ((d) == end2)
  3481 
  3482 /* Disabled due to a compiler bug -- see comment at case wordbound */
  3483 
  3484 /* The comment at case wordbound is following one, but we don't use
  3485    AT_WORD_BOUNDARY anymore to support multibyte form.
  3486 
  3487    The DEC Alpha C compiler 3.x generates incorrect code for the
  3488    test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
  3489    AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
  3490    macro and introducing temporary variables works around the bug.  */
  3491 
  3492 #if 0
  3493 /* Test if D points to a character which is word-constituent.  We have
  3494    two special cases to check for: if past the end of string1, look at
  3495    the first character in string2; and if before the beginning of
  3496    string2, look at the last character in string1.  */
  3497 #define WORDCHAR_P(d)                                                   \
  3498   (SYNTAX ((d) == end1 ? *string2                                       \
  3499            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
  3500    == Sword)
  3501 
  3502 /* Test if the character before D and the one at D differ with respect
  3503    to being word-constituent.  */
  3504 #define AT_WORD_BOUNDARY(d)                                             \
  3505   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
  3506    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
  3507 #endif
  3508 
  3509 
  3510 /* Optimization routines.  */
  3511 
  3512 /* If the operation is a match against one or more chars,
  3513    return a pointer to the next operation, else return NULL.  */
  3514 static re_char *
  3515 skip_one_char (re_char *p)
  3516 {
  3517   switch (*p++)
  3518     {
  3519     case anychar:
  3520       break;
  3521 
  3522     case exactn:
  3523       p += *p + 1;
  3524       break;
  3525 
  3526     case charset_not:
  3527     case charset:
  3528       if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
  3529         {
  3530           int mcnt;
  3531           p = CHARSET_RANGE_TABLE (p - 1);
  3532           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  3533           p = CHARSET_RANGE_TABLE_END (p, mcnt);
  3534         }
  3535       else
  3536         p += 1 + CHARSET_BITMAP_SIZE (p - 1);
  3537       break;
  3538 
  3539     case syntaxspec:
  3540     case notsyntaxspec:
  3541     case categoryspec:
  3542     case notcategoryspec:
  3543       p++;
  3544       break;
  3545 
  3546     default:
  3547       p = NULL;
  3548     }
  3549   return p;
  3550 }
  3551 
  3552 
  3553 /* Jump over non-matching operations.  */
  3554 static re_char *
  3555 skip_noops (re_char *p, re_char *pend)
  3556 {
  3557   int mcnt;
  3558   while (p < pend)
  3559     {
  3560       switch (*p)
  3561         {
  3562         case start_memory:
  3563         case stop_memory:
  3564           p += 2; break;
  3565         case no_op:
  3566           p += 1; break;
  3567         case jump:
  3568           p += 1;
  3569           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  3570           p += mcnt;
  3571           break;
  3572         default:
  3573           return p;
  3574         }
  3575     }
  3576   eassert (p == pend);
  3577   return p;
  3578 }
  3579 
  3580 /* Test if C matches charset op.  *PP points to the charset or charset_not
  3581    opcode.  When the function finishes, *PP will be advanced past that opcode.
  3582    C is character to test (possibly after translations) and CORIG is original
  3583    character (i.e. without any translations).  UNIBYTE denotes whether c is
  3584    unibyte or multibyte character.
  3585    CANON_TABLE is the canonicalisation table for case folding or Qnil.  */
  3586 static bool
  3587 execute_charset (re_char **pp, int c, int corig, bool unibyte,
  3588                  Lisp_Object canon_table)
  3589 {
  3590   eassume (0 <= c && 0 <= corig);
  3591   re_char *p = *pp, *rtp = NULL;
  3592   bool not = (re_opcode_t) *p == charset_not;
  3593 
  3594   if (CHARSET_RANGE_TABLE_EXISTS_P (p))
  3595     {
  3596       int count;
  3597       rtp = CHARSET_RANGE_TABLE (p);
  3598       EXTRACT_NUMBER_AND_INCR (count, rtp);
  3599       *pp = CHARSET_RANGE_TABLE_END ((rtp), (count));
  3600     }
  3601   else
  3602     *pp += 2 + CHARSET_BITMAP_SIZE (p);
  3603 
  3604   if (unibyte && c < (1 << BYTEWIDTH))
  3605     {                   /* Lookup bitmap.  */
  3606       /* Cast to 'unsigned' instead of 'unsigned char' in
  3607          case the bit list is a full 32 bytes long.  */
  3608       if (c < (unsigned) (CHARSET_BITMAP_SIZE (p) * BYTEWIDTH)
  3609           && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
  3610         return !not;
  3611     }
  3612   else if (rtp)
  3613     {
  3614       int class_bits = CHARSET_RANGE_TABLE_BITS (p);
  3615       int range_start, range_end;
  3616 
  3617   /* Sort tests by the most commonly used classes with some adjustment to which
  3618      tests are easiest to perform.  Take a look at comment in re_wctype_parse
  3619      for table with frequencies of character class names. */
  3620 
  3621       if ((class_bits & BIT_MULTIBYTE) ||
  3622           (class_bits & BIT_ALNUM && ISALNUM (c)) ||
  3623           (class_bits & BIT_ALPHA && ISALPHA (c)) ||
  3624           (class_bits & BIT_SPACE && ISSPACE (c)) ||
  3625           (class_bits & BIT_BLANK && ISBLANK (c)) ||
  3626           (class_bits & BIT_WORD  && ISWORD  (c)) ||
  3627           ((class_bits & BIT_UPPER) &&
  3628            (ISUPPER (corig) || (!NILP (canon_table) && ISLOWER (corig)))) ||
  3629           ((class_bits & BIT_LOWER) &&
  3630            (ISLOWER (corig) || (!NILP (canon_table) && ISUPPER (corig)))) ||
  3631           (class_bits & BIT_PUNCT && ISPUNCT (c)) ||
  3632           (class_bits & BIT_GRAPH && ISGRAPH (c)) ||
  3633           (class_bits & BIT_PRINT && ISPRINT (c)))
  3634         return !not;
  3635 
  3636       for (p = *pp; rtp < p; rtp += 2 * 3)
  3637         {
  3638           EXTRACT_CHARACTER (range_start, rtp);
  3639           EXTRACT_CHARACTER (range_end, rtp + 3);
  3640           if (range_start <= c && c <= range_end)
  3641             return !not;
  3642         }
  3643     }
  3644 
  3645   return not;
  3646 }
  3647 
  3648 /* True if "p1 matches something" implies "p2 fails".  */
  3649 static bool
  3650 mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
  3651                       re_char *p2)
  3652 {
  3653   re_opcode_t op2;
  3654   bool multibyte = RE_MULTIBYTE_P (bufp);
  3655   unsigned char *pend = bufp->buffer + bufp->used;
  3656   re_char *p2_orig = p2;
  3657 
  3658   eassert (p1 >= bufp->buffer && p1 < pend
  3659            && p2 >= bufp->buffer && p2 <= pend);
  3660 
  3661   /* Skip over open/close-group commands.
  3662      If what follows this loop is a ...+ construct,
  3663      look at what begins its body, since we will have to
  3664      match at least one of that.  */
  3665   p2 = skip_noops (p2, pend);
  3666   /* The same skip can be done for p1, except that this function
  3667      is only used in the case where p1 is a simple match operator.  */
  3668   /* p1 = skip_noops (p1, pend); */
  3669 
  3670   eassert (p1 >= bufp->buffer && p1 < pend
  3671            && p2 >= bufp->buffer && p2 <= pend);
  3672 
  3673   op2 = p2 == pend ? succeed : *p2;
  3674 
  3675   switch (op2)
  3676     {
  3677     case succeed:
  3678     case endbuf:
  3679       /* If we're at the end of the pattern, we can change.  */
  3680       if (skip_one_char (p1))
  3681         {
  3682           DEBUG_PRINT ("  End of pattern: fast loop.\n");
  3683           return true;
  3684         }
  3685       break;
  3686 
  3687     case endline:
  3688     case exactn:
  3689       {
  3690         int c
  3691           = (re_opcode_t) *p2 == endline ? '\n'
  3692           : RE_STRING_CHAR (p2 + 2, multibyte);
  3693 
  3694         if ((re_opcode_t) *p1 == exactn)
  3695           {
  3696             if (c != RE_STRING_CHAR (p1 + 2, multibyte))
  3697               {
  3698                 DEBUG_PRINT ("  '%c' != '%c' => fast loop.\n", c, p1[2]);
  3699                 return true;
  3700               }
  3701           }
  3702 
  3703         else if ((re_opcode_t) *p1 == charset
  3704                  || (re_opcode_t) *p1 == charset_not)
  3705           {
  3706             if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
  3707                                   Qnil))
  3708               {
  3709                 DEBUG_PRINT ("   No match => fast loop.\n");
  3710                 return true;
  3711               }
  3712           }
  3713         else if ((re_opcode_t) *p1 == anychar
  3714                  && c == '\n')
  3715           {
  3716             DEBUG_PRINT ("   . != \\n => fast loop.\n");
  3717             return true;
  3718           }
  3719       }
  3720       break;
  3721 
  3722     case charset:
  3723       {
  3724         if ((re_opcode_t) *p1 == exactn)
  3725           /* Reuse the code above.  */
  3726           return mutually_exclusive_p (bufp, p2, p1);
  3727 
  3728       /* It is hard to list up all the character in charset
  3729          P2 if it includes multibyte character.  Give up in
  3730          such case.  */
  3731       else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
  3732         {
  3733           /* Now, we are sure that P2 has no range table.
  3734              So, for the size of bitmap in P2, 'p2[1]' is
  3735              enough.  But P1 may have range table, so the
  3736              size of bitmap table of P1 is extracted by
  3737              using macro 'CHARSET_BITMAP_SIZE'.
  3738 
  3739              In a multibyte case, we know that all the character
  3740              listed in P2 is ASCII.  In a unibyte case, P1 has only a
  3741              bitmap table.  So, in both cases, it is enough to test
  3742              only the bitmap table of P1.  */
  3743 
  3744           if ((re_opcode_t) *p1 == charset)
  3745             {
  3746               int idx;
  3747               /* We win if the charset inside the loop
  3748                  has no overlap with the one after the loop.  */
  3749               for (idx = 0;
  3750                    (idx < (int) p2[1]
  3751                     && idx < CHARSET_BITMAP_SIZE (p1));
  3752                    idx++)
  3753                 if ((p2[2 + idx] & p1[2 + idx]) != 0)
  3754                   break;
  3755 
  3756               if (idx == p2[1]
  3757                   || idx == CHARSET_BITMAP_SIZE (p1))
  3758                 {
  3759                   DEBUG_PRINT ("         No match => fast loop.\n");
  3760                   return true;
  3761                 }
  3762             }
  3763           else if ((re_opcode_t) *p1 == charset_not)
  3764             {
  3765               int idx;
  3766               /* We win if the charset_not inside the loop lists
  3767                  every character listed in the charset after.  */
  3768               for (idx = 0; idx < (int) p2[1]; idx++)
  3769                 if (! (p2[2 + idx] == 0
  3770                        || (idx < CHARSET_BITMAP_SIZE (p1)
  3771                            && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
  3772                   break;
  3773 
  3774               if (idx == p2[1])
  3775                 {
  3776                   DEBUG_PRINT ("         No match => fast loop.\n");
  3777                   return true;
  3778                 }
  3779               }
  3780           }
  3781       }
  3782       break;
  3783 
  3784     case charset_not:
  3785       switch (*p1)
  3786         {
  3787         case exactn:
  3788         case charset:
  3789           /* Reuse the code above.  */
  3790           return mutually_exclusive_p (bufp, p2, p1);
  3791         case charset_not:
  3792           /* When we have two charset_not, it's very unlikely that
  3793              they don't overlap.  The union of the two sets of excluded
  3794              chars should cover all possible chars, which, as a matter of
  3795              fact, is virtually impossible in multibyte buffers.  */
  3796           break;
  3797         }
  3798       break;
  3799 
  3800     case wordend:
  3801       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
  3802     case symend:
  3803       return ((re_opcode_t) *p1 == syntaxspec
  3804               && (p1[1] == Ssymbol || p1[1] == Sword));
  3805     case notsyntaxspec:
  3806       return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
  3807 
  3808     case wordbeg:
  3809       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
  3810     case symbeg:
  3811       return ((re_opcode_t) *p1 == notsyntaxspec
  3812               && (p1[1] == Ssymbol || p1[1] == Sword));
  3813     case syntaxspec:
  3814       return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
  3815 
  3816     case wordbound:
  3817       return (((re_opcode_t) *p1 == notsyntaxspec
  3818                || (re_opcode_t) *p1 == syntaxspec)
  3819               && p1[1] == Sword);
  3820 
  3821     case categoryspec:
  3822       return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
  3823     case notcategoryspec:
  3824       return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
  3825 
  3826     case on_failure_jump_nastyloop:
  3827     case on_failure_jump_smart:
  3828     case on_failure_jump_loop:
  3829     case on_failure_keep_string_jump:
  3830     case on_failure_jump:
  3831       {
  3832         int mcnt;
  3833         p2++;
  3834         EXTRACT_NUMBER_AND_INCR (mcnt, p2);
  3835         /* Don't just test `mcnt > 0` because non-greedy loops have
  3836            their test at the end with an unconditional jump at the start.  */
  3837         if (p2 + mcnt > p2_orig) /* Ensure forward progress.  */
  3838           return (mutually_exclusive_p (bufp, p1, p2)
  3839                   && mutually_exclusive_p (bufp, p1, p2 + mcnt));
  3840         break;
  3841       }
  3842 
  3843     default:
  3844       ;
  3845     }
  3846 
  3847   /* Safe default.  */
  3848   return false;
  3849 }
  3850 
  3851 
  3852 /* Matching routines.  */
  3853 
  3854 /* re_match_2 matches the compiled pattern in BUFP against the
  3855    (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
  3856    and SIZE2, respectively).  We start matching at POS, and stop
  3857    matching at STOP.
  3858 
  3859    If REGS is non-null, store offsets for the substring each group
  3860    matched in REGS.
  3861 
  3862    We return -1 if no match, -2 if an internal error (such as the
  3863    failure stack overflowing).  Otherwise, we return the length of the
  3864    matched substring.  */
  3865 
  3866 ptrdiff_t
  3867 re_match_2 (struct re_pattern_buffer *bufp,
  3868             char const *string1, ptrdiff_t size1,
  3869             char const *string2, ptrdiff_t size2,
  3870             ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
  3871 {
  3872   ptrdiff_t result;
  3873 
  3874   ptrdiff_t charpos;
  3875   gl_state.object = re_match_object; /* Used by SYNTAX_TABLE_BYTE_TO_CHAR. */
  3876   charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
  3877   SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
  3878 
  3879   result = re_match_2_internal (bufp, (re_char *) string1, size1,
  3880                                 (re_char *) string2, size2,
  3881                                 pos, regs, stop);
  3882   return result;
  3883 }
  3884 
  3885 static void
  3886 unwind_re_match (void *ptr)
  3887 {
  3888   struct buffer *b = (struct buffer *) ptr;
  3889   b->text->inhibit_shrinking = 0;
  3890 }
  3891 
  3892 /* This is a separate function so that we can force an alloca cleanup
  3893    afterwards.  */
  3894 static ptrdiff_t
  3895 re_match_2_internal (struct re_pattern_buffer *bufp,
  3896                      re_char *string1, ptrdiff_t size1,
  3897                      re_char *string2, ptrdiff_t size2,
  3898                      ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
  3899 {
  3900   eassume (0 <= size1);
  3901   eassume (0 <= size2);
  3902   eassume (0 <= pos && pos <= stop && stop <= size1 + size2);
  3903 
  3904   /* General temporaries.  */
  3905   int mcnt;
  3906 
  3907   /* Just past the end of the corresponding string.  */
  3908   re_char *end1, *end2;
  3909 
  3910   /* Pointers into string1 and string2, just past the last characters in
  3911      each to consider matching.  */
  3912   re_char *end_match_1, *end_match_2;
  3913 
  3914   /* Where we are in the data, and the end of the current string.  */
  3915   re_char *d, *dend;
  3916 
  3917   /* Used sometimes to remember where we were before starting matching
  3918      an operator so that we can go back in case of failure.  This "atomic"
  3919      behavior of matching opcodes is indispensable to the correctness
  3920      of the on_failure_keep_string_jump optimization.  */
  3921   re_char *dfail;
  3922 
  3923   /* Where we are in the pattern, and the end of the pattern.  */
  3924   re_char *p = bufp->buffer;
  3925   re_char *pend = p + bufp->used;
  3926 
  3927   /* We use this to map every character in the string.  */
  3928   Lisp_Object translate = bufp->translate;
  3929 
  3930   /* True if BUFP is setup from a multibyte regex.  */
  3931   bool multibyte = RE_MULTIBYTE_P (bufp);
  3932 
  3933   /* True if STRING1/STRING2 are multibyte.  */
  3934   bool target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
  3935 
  3936   /* Failure point stack.  Each place that can handle a failure further
  3937      down the line pushes a failure point on this stack.  It consists of
  3938      regstart, and regend for all registers corresponding to
  3939      the subexpressions we're currently inside, plus the number of such
  3940      registers, and, finally, two char *'s.  The first char * is where
  3941      to resume scanning the pattern; the second one is where to resume
  3942      scanning the strings.  */
  3943   fail_stack_type fail_stack;
  3944 #ifdef DEBUG_COMPILES_ARGUMENTS
  3945   ptrdiff_t nfailure_points_pushed = 0, nfailure_points_popped = 0;
  3946 #endif
  3947 
  3948   /* We fill all the registers internally, independent of what we
  3949      return, for use in backreferences.  The number here includes
  3950      an element for register zero.  */
  3951   ptrdiff_t num_regs = bufp->re_nsub + 1;
  3952   eassume (0 < num_regs);
  3953 
  3954   /* Information on the contents of registers. These are pointers into
  3955      the input strings; they record just what was matched (on this
  3956      attempt) by a subexpression part of the pattern, that is, the
  3957      regnum-th regstart pointer points to where in the pattern we began
  3958      matching and the regnum-th regend points to right after where we
  3959      stopped matching the regnum-th subexpression.  */
  3960   re_char **regstart UNINIT, **regend UNINIT;
  3961 
  3962   /* The following record the register info as found in the above
  3963      variables when we find a match better than any we've seen before.
  3964      This happens as we backtrack through the failure points, which in
  3965      turn happens only if we have not yet matched the entire string. */
  3966   bool best_regs_set = false;
  3967   re_char **best_regstart UNINIT, **best_regend UNINIT;
  3968 
  3969   /* Logically, this is 'best_regend[0]'.  But we don't want to have to
  3970      allocate space for that if we're not allocating space for anything
  3971      else (see below).  Also, we never need info about register 0 for
  3972      any of the other register vectors, and it seems rather a kludge to
  3973      treat 'best_regend' differently from the rest.  So we keep track of
  3974      the end of the best match so far in a separate variable.  We
  3975      initialize this to NULL so that when we backtrack the first time
  3976      and need to test it, it's not garbage.  */
  3977   re_char *match_end = NULL;
  3978 
  3979   /* This keeps track of how many buffer/string positions we examined.  */
  3980   ptrdiff_t nchars = 0;
  3981 
  3982 #ifdef DEBUG_COMPILES_ARGUMENTS
  3983   /* Counts the total number of registers pushed.  */
  3984   ptrdiff_t num_regs_pushed = 0;
  3985 #endif
  3986 
  3987   DEBUG_PRINT ("\nEntering re_match_2.\n");
  3988 
  3989   REGEX_USE_SAFE_ALLOCA;
  3990 
  3991   INIT_FAIL_STACK ();
  3992 
  3993   specpdl_ref count = SPECPDL_INDEX ();
  3994 
  3995   /* Prevent shrinking and relocation of buffer text if GC happens
  3996      while we are inside this function.  The calls to
  3997      UPDATE_SYNTAX_TABLE_* macros can call Lisp (via
  3998      `internal--syntax-propertize`); these calls are careful to defend against
  3999      buffer modifications, but even with no modifications, the buffer text may
  4000      be relocated during GC by `compact_buffer` which would invalidate
  4001      our C pointers to buffer text.  */
  4002   if (!current_buffer->text->inhibit_shrinking)
  4003     {
  4004       record_unwind_protect_ptr (unwind_re_match, current_buffer);
  4005       current_buffer->text->inhibit_shrinking = 1;
  4006     }
  4007 
  4008   /* Do not bother to initialize all the register variables if there are
  4009      no groups in the pattern, as it takes a fair amount of time.  If
  4010      there are groups, we include space for register 0 (the whole
  4011      pattern) in REGSTART[0], even though we never use it, to avoid
  4012      the undefined behavior of subtracting 1 from REGSTART.  */
  4013   ptrdiff_t re_nsub = num_regs - 1;
  4014   if (0 < re_nsub)
  4015     {
  4016       regstart = SAFE_ALLOCA ((re_nsub * 4 + 1) * sizeof *regstart);
  4017       regend = regstart + num_regs;
  4018       best_regstart = regend + re_nsub;
  4019       best_regend = best_regstart + re_nsub;
  4020 
  4021       /* Initialize subexpression text positions to unset, to mark ones
  4022          that no start_memory/stop_memory has been seen for.  */
  4023       for (re_char **apos = regstart + 1; apos < best_regstart + 1; apos++)
  4024         *apos = NULL;
  4025     }
  4026 
  4027   /* We move 'string1' into 'string2' if the latter's empty -- but not if
  4028      'string1' is null.  */
  4029   if (size2 == 0 && string1 != NULL)
  4030     {
  4031       string2 = string1;
  4032       size2 = size1;
  4033       string1 = 0;
  4034       size1 = 0;
  4035     }
  4036   end1 = string1 + size1;
  4037   end2 = string2 + size2;
  4038 
  4039   /* P scans through the pattern as D scans through the data.
  4040      DEND is the end of the input string that D points within.
  4041      Advance D into the following input string whenever necessary, but
  4042      this happens before fetching; therefore, at the beginning of the
  4043      loop, D can be pointing at the end of a string, but it cannot
  4044      equal STRING2.  */
  4045   if (pos >= size1)
  4046     {
  4047       /* Only match within string2.  */
  4048       d = string2 + pos - size1;
  4049       dend = end_match_2 = string2 + stop - size1;
  4050       end_match_1 = end1;       /* Just to give it a value.  */
  4051     }
  4052   else
  4053     {
  4054       if (stop < size1)
  4055         {
  4056           /* Only match within string1.  */
  4057           end_match_1 = string1 + stop;
  4058           /* BEWARE!
  4059              When we reach end_match_1, PREFETCH normally switches to string2.
  4060              But in the present case, this means that just doing a PREFETCH
  4061              makes us jump from 'stop' to 'gap' within the string.
  4062              What we really want here is for the search to stop as
  4063              soon as we hit end_match_1.  That's why we set end_match_2
  4064              to end_match_1 (since PREFETCH fails as soon as we hit
  4065              end_match_2).  */
  4066           end_match_2 = end_match_1;
  4067         }
  4068       else
  4069         { /* It's important to use this code when STOP == SIZE so that
  4070              moving D from end1 to string2 will not prevent the D == DEND
  4071              check from catching the end of string.  */
  4072           end_match_1 = end1;
  4073           end_match_2 = string2 + stop - size1;
  4074         }
  4075       d = string1 + pos;
  4076       dend = end_match_1;
  4077     }
  4078 
  4079   DEBUG_PRINT ("The compiled pattern is:\n");
  4080   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
  4081   DEBUG_PRINT ("The string to match is: \"");
  4082   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
  4083   DEBUG_PRINT ("\"\n");
  4084 
  4085   /* This loops over pattern commands.  It exits by returning from the
  4086      function if the match is complete, or it drops through if the match
  4087      fails at this starting point in the input data.  */
  4088   for (;;)
  4089     {
  4090       DEBUG_PRINT ("\n%p: ", p);
  4091 
  4092       if (p == pend)
  4093         {
  4094           /* End of pattern means we might have succeeded.  */
  4095           DEBUG_PRINT ("end of pattern ... ");
  4096 
  4097           /* If we haven't matched the entire string, and we want the
  4098              longest match, try backtracking.  */
  4099           if (d != end_match_2)
  4100             {
  4101               /* True if this match is the best seen so far.  */
  4102               bool best_match_p;
  4103 
  4104               {
  4105                 /* True if this match ends in the same string (string1
  4106                    or string2) as the best previous match.  */
  4107                 bool same_str_p = (FIRST_STRING_P (match_end)
  4108                                    == FIRST_STRING_P (d));
  4109 
  4110                 /* AIX compiler got confused when this was combined
  4111                    with the previous declaration.  */
  4112                 if (same_str_p)
  4113                   best_match_p = d > match_end;
  4114                 else
  4115                   best_match_p = !FIRST_STRING_P (d);
  4116               }
  4117 
  4118               DEBUG_PRINT ("backtracking.\n");
  4119 
  4120               if (!FAIL_STACK_EMPTY ())
  4121                 { /* More failure points to try.  */
  4122 
  4123                   /* If exceeds best match so far, save it.  */
  4124                   if (!best_regs_set || best_match_p)
  4125                     {
  4126                       best_regs_set = true;
  4127                       match_end = d;
  4128 
  4129                       DEBUG_PRINT ("\nSAVING match as best so far.\n");
  4130 
  4131                       for (ptrdiff_t reg = 1; reg < num_regs; reg++)
  4132                         {
  4133                           best_regstart[reg] = regstart[reg];
  4134                           best_regend[reg] = regend[reg];
  4135                         }
  4136                     }
  4137                   goto fail;
  4138                 }
  4139 
  4140               /* If no failure points, don't restore garbage.  And if
  4141                  last match is real best match, don't restore second
  4142                  best one. */
  4143               else if (best_regs_set && !best_match_p)
  4144                 {
  4145                 restore_best_regs:
  4146                   /* Restore best match.  It may happen that 'dend ==
  4147                      end_match_1' while the restored d is in string2.
  4148                      For example, the pattern 'x.*y.*z' against the
  4149                      strings 'x-' and 'y-z-', if the two strings are
  4150                      not consecutive in memory.  */
  4151                   DEBUG_PRINT ("Restoring best registers.\n");
  4152 
  4153                   d = match_end;
  4154                   dend = ((d >= string1 && d <= end1)
  4155                            ? end_match_1 : end_match_2);
  4156 
  4157                   for (ptrdiff_t reg = 1; reg < num_regs; reg++)
  4158                     {
  4159                       regstart[reg] = best_regstart[reg];
  4160                       regend[reg] = best_regend[reg];
  4161                       eassert (REG_UNSET (regstart[reg])
  4162                                <= REG_UNSET (regend[reg]));
  4163                     }
  4164                 }
  4165             } /* d != end_match_2 */
  4166 
  4167         succeed_label:
  4168           DEBUG_PRINT ("Accepting match.\n");
  4169 
  4170           /* If caller wants register contents data back, do it.  */
  4171           if (regs)
  4172             {
  4173               /* Have the register data arrays been allocated?  */
  4174               if (bufp->regs_allocated == REGS_UNALLOCATED)
  4175                 { /* No.  So allocate them with malloc.  */
  4176                   ptrdiff_t n = max (RE_NREGS, num_regs);
  4177                   regs->start = xnmalloc (n, sizeof *regs->start);
  4178                   regs->end = xnmalloc (n, sizeof *regs->end);
  4179                   regs->num_regs = n;
  4180                   bufp->regs_allocated = REGS_REALLOCATE;
  4181                 }
  4182               else if (bufp->regs_allocated == REGS_REALLOCATE)
  4183                 { /* Yes.  If we need more elements than were already
  4184                      allocated, reallocate them.  If we need fewer, just
  4185                      leave it alone.  */
  4186                   ptrdiff_t n = regs->num_regs;
  4187                   if (n < num_regs)
  4188                     {
  4189                       n = max (n + (n >> 1), num_regs);
  4190                       regs->start
  4191                         = xnrealloc (regs->start, n, sizeof *regs->start);
  4192                       regs->end = xnrealloc (regs->end, n, sizeof *regs->end);
  4193                       regs->num_regs = n;
  4194                     }
  4195                 }
  4196               else
  4197                 eassert (bufp->regs_allocated == REGS_FIXED);
  4198 
  4199               /* Convert the pointer data in 'regstart' and 'regend' to
  4200                  indices.  Register zero has to be set differently,
  4201                  since we haven't kept track of any info for it.  */
  4202               if (regs->num_regs > 0)
  4203                 {
  4204                   regs->start[0] = pos;
  4205                   regs->end[0] = POINTER_TO_OFFSET (d);
  4206                 }
  4207 
  4208               for (ptrdiff_t reg = 1; reg < num_regs; reg++)
  4209                 {
  4210                   eassert (REG_UNSET (regstart[reg])
  4211                            <= REG_UNSET (regend[reg]));
  4212                   if (REG_UNSET (regend[reg]))
  4213                     regs->start[reg] = regs->end[reg] = -1;
  4214                   else
  4215                     {
  4216                       regs->start[reg] = POINTER_TO_OFFSET (regstart[reg]);
  4217                       regs->end[reg] = POINTER_TO_OFFSET (regend[reg]);
  4218                     }
  4219                 }
  4220 
  4221               /* If the regs structure we return has more elements than
  4222                  were in the pattern, set the extra elements to -1.  */
  4223               for (ptrdiff_t reg = num_regs; reg < regs->num_regs; reg++)
  4224                 regs->start[reg] = regs->end[reg] = -1;
  4225             }
  4226 
  4227           DEBUG_PRINT ("%td failure points pushed, %td popped (%td remain).\n",
  4228                        nfailure_points_pushed, nfailure_points_popped,
  4229                        nfailure_points_pushed - nfailure_points_popped);
  4230           DEBUG_PRINT ("%td registers pushed.\n", num_regs_pushed);
  4231 
  4232           ptrdiff_t dcnt = POINTER_TO_OFFSET (d) - pos;
  4233 
  4234           DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
  4235 
  4236           unbind_to (count, Qnil);
  4237           SAFE_FREE ();
  4238           /* The factor of 50 below is a heuristic that needs to be tuned.  It
  4239              means we consider 50 buffer positions examined by this function
  4240              roughly equivalent to the display engine iterating over a single
  4241              buffer position.  */
  4242           if (max_redisplay_ticks > 0 && nchars > 0)
  4243             update_redisplay_ticks (nchars / 50 + 1, NULL);
  4244           return dcnt;
  4245         }
  4246 
  4247       /* Otherwise match next pattern command.  */
  4248       switch (*p++)
  4249         {
  4250         /* Ignore these.  Used to ignore the n of succeed_n's which
  4251            currently have n == 0.  */
  4252         case no_op:
  4253           DEBUG_PRINT ("EXECUTING no_op.\n");
  4254           break;
  4255 
  4256         case succeed:
  4257           DEBUG_PRINT ("EXECUTING succeed.\n");
  4258           goto succeed_label;
  4259 
  4260         /* Match the next n pattern characters exactly.  The following
  4261            byte in the pattern defines n, and the n bytes after that
  4262            are the characters to match.  */
  4263         case exactn:
  4264           mcnt = *p++;
  4265           DEBUG_PRINT ("EXECUTING exactn %d.\n", mcnt);
  4266 
  4267           /* Remember the start point to rollback upon failure.  */
  4268           dfail = d;
  4269 
  4270           /* The cost of testing 'translate' is comparatively small.  */
  4271           if (target_multibyte)
  4272             do
  4273               {
  4274                 int pat_charlen, buf_charlen;
  4275                 int pat_ch, buf_ch;
  4276 
  4277                 PREFETCH (d = dfail);
  4278                 if (multibyte)
  4279                   pat_ch = string_char_and_length (p, &pat_charlen);
  4280                 else
  4281                   {
  4282                     pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
  4283                     pat_charlen = 1;
  4284                   }
  4285                 buf_ch = string_char_and_length (d, &buf_charlen);
  4286 
  4287                 if (TRANSLATE (buf_ch) != pat_ch)
  4288                   {
  4289                     d = dfail;
  4290                     goto fail;
  4291                   }
  4292 
  4293                 p += pat_charlen;
  4294                 d += buf_charlen;
  4295                 mcnt -= pat_charlen;
  4296                 nchars++;
  4297               }
  4298             while (mcnt > 0);
  4299           else
  4300             do
  4301               {
  4302                 int pat_charlen;
  4303                 int pat_ch, buf_ch;
  4304 
  4305                 PREFETCH (d = dfail);
  4306                 if (multibyte)
  4307                   {
  4308                     pat_ch = string_char_and_length (p, &pat_charlen);
  4309                     pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
  4310                   }
  4311                 else
  4312                   {
  4313                     pat_ch = *p;
  4314                     pat_charlen = 1;
  4315                   }
  4316                 buf_ch = RE_CHAR_TO_MULTIBYTE (*d);
  4317                 if (! CHAR_BYTE8_P (buf_ch))
  4318                   {
  4319                     buf_ch = TRANSLATE (buf_ch);
  4320                     buf_ch = RE_CHAR_TO_UNIBYTE (buf_ch);
  4321                     if (buf_ch < 0)
  4322                       buf_ch = *d;
  4323                   }
  4324                 else
  4325                   buf_ch = *d;
  4326                 if (buf_ch != pat_ch)
  4327                   {
  4328                     d = dfail;
  4329                     goto fail;
  4330                   }
  4331                 p += pat_charlen;
  4332                 d++;
  4333                 mcnt -= pat_charlen;
  4334                 nchars++;
  4335               }
  4336             while (mcnt > 0);
  4337 
  4338           break;
  4339 
  4340 
  4341         /* Match any character except newline.  */
  4342         case anychar:
  4343           {
  4344             int buf_charlen;
  4345             int buf_ch;
  4346 
  4347             DEBUG_PRINT ("EXECUTING anychar.\n");
  4348 
  4349             PREFETCH ();
  4350             buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
  4351                                                 target_multibyte);
  4352             buf_ch = TRANSLATE (buf_ch);
  4353             if (buf_ch == '\n')
  4354               goto fail;
  4355 
  4356             DEBUG_PRINT ("  Matched \"%d\".\n", *d);
  4357             d += buf_charlen;
  4358             nchars++;
  4359           }
  4360           break;
  4361 
  4362 
  4363         case charset:
  4364         case charset_not:
  4365           {
  4366             /* Whether matching against a unibyte character.  */
  4367             bool unibyte_char = false;
  4368 
  4369             DEBUG_PRINT ("EXECUTING charset%s.\n",
  4370                          (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
  4371 
  4372             PREFETCH ();
  4373             int len;
  4374             int corig = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
  4375             int c = corig;
  4376             if (target_multibyte)
  4377               {
  4378                 int c1;
  4379 
  4380                 c = TRANSLATE (c);
  4381                 c1 = RE_CHAR_TO_UNIBYTE (c);
  4382                 if (c1 >= 0)
  4383                   {
  4384                     unibyte_char = true;
  4385                     c = c1;
  4386                   }
  4387               }
  4388             else
  4389               {
  4390                 int c1 = RE_CHAR_TO_MULTIBYTE (c);
  4391 
  4392                 if (! CHAR_BYTE8_P (c1))
  4393                   {
  4394                     c1 = TRANSLATE (c1);
  4395                     c1 = RE_CHAR_TO_UNIBYTE (c1);
  4396                     if (c1 >= 0)
  4397                       {
  4398                         unibyte_char = true;
  4399                         c = c1;
  4400                       }
  4401                   }
  4402                 else
  4403                   unibyte_char = true;
  4404               }
  4405 
  4406             p -= 1;
  4407             if (!execute_charset (&p, c, corig, unibyte_char, translate))
  4408               goto fail;
  4409 
  4410             d += len;
  4411             nchars++;
  4412           }
  4413           break;
  4414 
  4415 
  4416         /* The beginning of a group is represented by start_memory.
  4417            The argument is the register number.  The text
  4418            matched within the group is recorded (in the internal
  4419            registers data structure) under the register number.  */
  4420         case start_memory:
  4421           DEBUG_PRINT ("EXECUTING start_memory %d:\n", *p);
  4422           eassert (0 < *p && *p < num_regs);
  4423 
  4424           /* In case we need to undo this operation (via backtracking).  */
  4425           PUSH_FAILURE_REG (*p);
  4426 
  4427           regstart[*p] = d;
  4428           DEBUG_PRINT ("  regstart: %td\n", POINTER_TO_OFFSET (regstart[*p]));
  4429 
  4430           /* Move past the register number and inner group count.  */
  4431           p += 1;
  4432           break;
  4433 
  4434 
  4435         /* The stop_memory opcode represents the end of a group.  Its
  4436            argument is the same as start_memory's: the register number.  */
  4437         case stop_memory:
  4438           DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p);
  4439 
  4440           eassert (0 < *p && *p < num_regs);
  4441           eassert (!REG_UNSET (regstart[*p]));
  4442           /* Strictly speaking, there should be code such as:
  4443 
  4444                 eassert (REG_UNSET (regend[*p]));
  4445                 PUSH_FAILURE_REGSTOP (*p);
  4446 
  4447              But the only info to be pushed is regend[*p] and it is known to
  4448              be UNSET, so there really isn't anything to push.
  4449              Not pushing anything, on the other hand deprives us from the
  4450              guarantee that regend[*p] is UNSET since undoing this operation
  4451              will not reset its value properly.  This is not important since
  4452              the value will only be read on the next start_memory or at
  4453              the very end and both events can only happen if this stop_memory
  4454              is *not* undone.  */
  4455 
  4456           regend[*p] = d;
  4457           DEBUG_PRINT ("      regend: %td\n", POINTER_TO_OFFSET (regend[*p]));
  4458 
  4459           /* Move past the register number and the inner group count.  */
  4460           p += 1;
  4461           break;
  4462 
  4463 
  4464         /* \<digit> has been turned into a 'duplicate' command which is
  4465            followed by the numeric value of <digit> as the register number.  */
  4466         case duplicate:
  4467           {
  4468             re_char *d2, *dend2;
  4469             int regno = *p++;   /* Get which register to match against.  */
  4470             DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno);
  4471 
  4472             /* Can't back reference a group which we've never matched.  */
  4473             eassert (0 < regno && regno < num_regs);
  4474             eassert (REG_UNSET (regstart[regno]) <= REG_UNSET (regend[regno]));
  4475             if (REG_UNSET (regend[regno]))
  4476               goto fail;
  4477 
  4478             /* Where in input to try to start matching.  */
  4479             d2 = regstart[regno];
  4480 
  4481             /* Remember the start point to rollback upon failure.  */
  4482             dfail = d;
  4483 
  4484             /* Where to stop matching; if both the place to start and
  4485                the place to stop matching are in the same string, then
  4486                set to the place to stop, otherwise, for now have to use
  4487                the end of the first string.  */
  4488 
  4489             dend2 = ((FIRST_STRING_P (regstart[regno])
  4490                       == FIRST_STRING_P (regend[regno]))
  4491                      ? regend[regno] : end_match_1);
  4492             for (;;)
  4493               {
  4494                 ptrdiff_t dcnt;
  4495 
  4496                 /* If necessary, advance to next segment in register
  4497                    contents.  */
  4498                 while (d2 == dend2)
  4499                   {
  4500                     if (dend2 == end_match_2) break;
  4501                     if (dend2 == regend[regno]) break;
  4502 
  4503                     /* End of string1 => advance to string2. */
  4504                     d2 = string2;
  4505                     dend2 = regend[regno];
  4506                   }
  4507                 /* At end of register contents => success */
  4508                 if (d2 == dend2) break;
  4509 
  4510                 /* If necessary, advance to next segment in data.  */
  4511                 PREFETCH (d = dfail);
  4512 
  4513                 /* How many characters left in this segment to match.  */
  4514                 dcnt = dend - d;
  4515 
  4516                 /* Want how many consecutive characters we can match in
  4517                    one shot, so, if necessary, adjust the count.  */
  4518                 if (dcnt > dend2 - d2)
  4519                   dcnt = dend2 - d2;
  4520 
  4521                 /* Compare that many; failure if mismatch, else move
  4522                    past them.  */
  4523                 if (!NILP (translate)
  4524                     ? bcmp_translate (d, d2, dcnt, translate, target_multibyte)
  4525                     : memcmp (d, d2, dcnt))
  4526                   {
  4527                     d = dfail;
  4528                     goto fail;
  4529                   }
  4530                 d += dcnt, d2 += dcnt;
  4531                 nchars++;
  4532               }
  4533           }
  4534           break;
  4535 
  4536 
  4537         /* begline matches the empty string at the beginning of the string,
  4538            and after newlines.  */
  4539         case begline:
  4540           DEBUG_PRINT ("EXECUTING begline.\n");
  4541 
  4542           if (AT_STRINGS_BEG (d))
  4543             break;
  4544           else
  4545             {
  4546               unsigned c;
  4547               GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
  4548               if (c == '\n')
  4549                 break;
  4550             }
  4551           goto fail;
  4552 
  4553 
  4554         /* endline is the dual of begline.  */
  4555         case endline:
  4556           DEBUG_PRINT ("EXECUTING endline.\n");
  4557 
  4558           if (AT_STRINGS_END (d))
  4559             break;
  4560           PREFETCH_NOLIMIT ();
  4561           if (*d == '\n')
  4562             break;
  4563           goto fail;
  4564 
  4565 
  4566         /* Match at the very beginning of the data.  */
  4567         case begbuf:
  4568           DEBUG_PRINT ("EXECUTING begbuf.\n");
  4569           if (AT_STRINGS_BEG (d))
  4570             break;
  4571           goto fail;
  4572 
  4573 
  4574         /* Match at the very end of the data.  */
  4575         case endbuf:
  4576           DEBUG_PRINT ("EXECUTING endbuf.\n");
  4577           if (AT_STRINGS_END (d))
  4578             break;
  4579           goto fail;
  4580 
  4581 
  4582         /* on_failure_keep_string_jump is used to optimize '.*\n'.  It
  4583            pushes NULL as the value for the string on the stack.  Then
  4584            'POP_FAILURE_POINT' will keep the current value for the
  4585            string, instead of restoring it.  To see why, consider
  4586            matching 'foo\nbar' against '.*\n'.  The .* matches the foo;
  4587            then the . fails against the \n.  But the next thing we want
  4588            to do is match the \n against the \n; if we restored the
  4589            string value, we would be back at the foo.
  4590 
  4591            Because this is used only in specific cases, we don't need to
  4592            check all the things that 'on_failure_jump' does, to make
  4593            sure the right things get saved on the stack.  Hence we don't
  4594            share its code.  The only reason to push anything on the
  4595            stack at all is that otherwise we would have to change
  4596            'anychar's code to do something besides goto fail in this
  4597            case; that seems worse than this.  */
  4598         case on_failure_keep_string_jump:
  4599           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4600           DEBUG_PRINT ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
  4601                        mcnt, p + mcnt);
  4602 
  4603           PUSH_FAILURE_POINT (p - 3, NULL);
  4604           break;
  4605 
  4606           /* A nasty loop is introduced by the non-greedy *? and +?.
  4607              With such loops, the stack only ever contains one failure point
  4608              at a time, so that a plain on_failure_jump_loop kind of
  4609              cycle detection cannot work.  Worse yet, such a detection
  4610              can not only fail to detect a cycle, but it can also wrongly
  4611              detect a cycle (between different instantiations of the same
  4612              loop).
  4613              So the method used for those nasty loops is a little different:
  4614              We use a special cycle-detection-stack-frame which is pushed
  4615              when the on_failure_jump_nastyloop failure-point is *popped*.
  4616              This special frame thus marks the beginning of one iteration
  4617              through the loop and we can hence easily check right here
  4618              whether something matched between the beginning and the end of
  4619              the loop.  */
  4620         case on_failure_jump_nastyloop:
  4621           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4622           DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
  4623                        mcnt, p + mcnt);
  4624 
  4625           eassert ((re_opcode_t)p[-4] == no_op);
  4626           {
  4627             bool cycle = false;
  4628             CHECK_INFINITE_LOOP (p - 4, d);
  4629             if (!cycle)
  4630               /* If there's a cycle, just continue without pushing
  4631                  this failure point.  The failure point is the "try again"
  4632                  option, which shouldn't be tried.
  4633                  We want (x?)*?y\1z to match both xxyz and xxyxz.  */
  4634               PUSH_FAILURE_POINT (p - 3, d);
  4635           }
  4636           break;
  4637 
  4638           /* Simple loop detecting on_failure_jump:  just check on the
  4639              failure stack if the same spot was already hit earlier.  */
  4640         case on_failure_jump_loop:
  4641         on_failure:
  4642           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4643           DEBUG_PRINT ("EXECUTING on_failure_jump_loop %d (to %p):\n",
  4644                        mcnt, p + mcnt);
  4645           {
  4646             bool cycle = false;
  4647             CHECK_INFINITE_LOOP (p - 3, d);
  4648             if (cycle)
  4649               /* If there's a cycle, get out of the loop, as if the matching
  4650                  had failed.  We used to just 'goto fail' here, but that was
  4651                  aborting the search a bit too early: we want to keep the
  4652                  empty-loop-match and keep matching after the loop.
  4653                  We want (x?)*y\1z to match both xxyz and xxyxz.  */
  4654               p += mcnt;
  4655             else
  4656               PUSH_FAILURE_POINT (p - 3, d);
  4657           }
  4658           break;
  4659 
  4660 
  4661         /* Uses of on_failure_jump:
  4662 
  4663            Each alternative starts with an on_failure_jump that points
  4664            to the beginning of the next alternative.  Each alternative
  4665            except the last ends with a jump that in effect jumps past
  4666            the rest of the alternatives.  (They really jump to the
  4667            ending jump of the following alternative, because tensioning
  4668            these jumps is a hassle.)
  4669 
  4670            Repeats start with an on_failure_jump that points past both
  4671            the repetition text and either the following jump or
  4672            pop_failure_jump back to this on_failure_jump.  */
  4673         case on_failure_jump:
  4674           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4675           DEBUG_PRINT ("EXECUTING on_failure_jump %d (to %p):\n",
  4676                        mcnt, p + mcnt);
  4677 
  4678           PUSH_FAILURE_POINT (p -3, d);
  4679           break;
  4680 
  4681         /* This operation is used for greedy *.
  4682            Compare the beginning of the repeat with what in the
  4683            pattern follows its end. If we can establish that there
  4684            is nothing that they would both match, i.e., that we
  4685            would have to backtrack because of (as in, e.g., 'a*a')
  4686            then we can use a non-backtracking loop based on
  4687            on_failure_keep_string_jump instead of on_failure_jump.  */
  4688         case on_failure_jump_smart:
  4689           EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4690           DEBUG_PRINT ("EXECUTING on_failure_jump_smart %d (to %p).\n",
  4691                        mcnt, p + mcnt);
  4692           {
  4693             re_char *p1 = p; /* Next operation.  */
  4694             /* Discard 'const', making re_search non-reentrant.  */
  4695             unsigned char *p2 = (unsigned char *) p + mcnt; /* Jump dest.  */
  4696             unsigned char *p3 = (unsigned char *) p - 3; /* opcode location.  */
  4697 
  4698             p -= 3;             /* Reset so that we will re-execute the
  4699                                    instruction once it's been changed. */
  4700 
  4701             EXTRACT_NUMBER (mcnt, p2 - 2);
  4702 
  4703             /* Ensure this is indeed the trivial kind of loop
  4704                we are expecting.  */
  4705             eassert (skip_one_char (p1) == p2 - 3);
  4706             eassert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
  4707             DEBUG_STATEMENT (regex_emacs_debug += 2);
  4708             if (mutually_exclusive_p (bufp, p1, p2))
  4709               {
  4710                 /* Use a fast 'on_failure_keep_string_jump' loop.  */
  4711                 DEBUG_PRINT ("  smart exclusive => fast loop.\n");
  4712                 *p3 = (unsigned char) on_failure_keep_string_jump;
  4713                 STORE_NUMBER (p2 - 2, mcnt + 3);
  4714               }
  4715             else
  4716               {
  4717                 /* Default to a safe 'on_failure_jump' loop.  */
  4718                 DEBUG_PRINT ("  smart default => slow loop.\n");
  4719                 *p3 = (unsigned char) on_failure_jump;
  4720               }
  4721             DEBUG_STATEMENT (regex_emacs_debug -= 2);
  4722           }
  4723           break;
  4724 
  4725         /* Unconditionally jump (without popping any failure points).  */
  4726         case jump:
  4727         unconditional_jump:
  4728           maybe_quit ();
  4729           EXTRACT_NUMBER_AND_INCR (mcnt, p);    /* Get the amount to jump.  */
  4730           DEBUG_PRINT ("EXECUTING jump %d ", mcnt);
  4731           p += mcnt;                            /* Do the jump.  */
  4732           DEBUG_PRINT ("(to %p).\n", p);
  4733           break;
  4734 
  4735 
  4736         /* Have to succeed matching what follows at least n times.
  4737            After that, handle like 'on_failure_jump'.  */
  4738         case succeed_n:
  4739           /* Signedness doesn't matter since we only compare MCNT to 0.  */
  4740           EXTRACT_NUMBER (mcnt, p + 2);
  4741           DEBUG_PRINT ("EXECUTING succeed_n %d.\n", mcnt);
  4742 
  4743           /* Originally, mcnt is how many times we HAVE to succeed.  */
  4744           if (mcnt != 0)
  4745             {
  4746               /* Discard 'const', making re_search non-reentrant.  */
  4747               unsigned char *p2 = (unsigned char *) p + 2; /* counter loc.  */
  4748               mcnt--;
  4749               p += 4;
  4750               PUSH_NUMBER (p2, mcnt);
  4751             }
  4752           else
  4753             /* The two bytes encoding mcnt == 0 are two no_op opcodes.  */
  4754             goto on_failure;
  4755           break;
  4756 
  4757         case jump_n:
  4758           /* Signedness doesn't matter since we only compare MCNT to 0.  */
  4759           EXTRACT_NUMBER (mcnt, p + 2);
  4760           DEBUG_PRINT ("EXECUTING jump_n %d.\n", mcnt);
  4761 
  4762           /* Originally, this is how many times we CAN jump.  */
  4763           if (mcnt != 0)
  4764             {
  4765               /* Discard 'const', making re_search non-reentrant.  */
  4766               unsigned char *p2 = (unsigned char *) p + 2; /* counter loc.  */
  4767               mcnt--;
  4768               PUSH_NUMBER (p2, mcnt);
  4769               goto unconditional_jump;
  4770             }
  4771           /* If don't have to jump any more, skip over the rest of command.  */
  4772           else
  4773             p += 4;
  4774           break;
  4775 
  4776         case set_number_at:
  4777           {
  4778             unsigned char *p2;  /* Location of the counter.  */
  4779             DEBUG_PRINT ("EXECUTING set_number_at.\n");
  4780 
  4781             EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4782             /* Discard 'const', making re_search non-reentrant.  */
  4783             p2 = (unsigned char *) p + mcnt;
  4784             /* Signedness doesn't matter since we only copy MCNT's bits.  */
  4785             EXTRACT_NUMBER_AND_INCR (mcnt, p);
  4786             DEBUG_PRINT ("  Setting %p to %d.\n", p2, mcnt);
  4787             PUSH_NUMBER (p2, mcnt);
  4788             break;
  4789           }
  4790 
  4791         case wordbound:
  4792         case notwordbound:
  4793           {
  4794             bool not = (re_opcode_t) *(p - 1) == notwordbound;
  4795             DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : "");
  4796 
  4797             /* We SUCCEED (or FAIL) in one of the following cases: */
  4798 
  4799             /* Case 1: D is at the beginning or the end of string.  */
  4800             if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
  4801               not = !not;
  4802             else
  4803               {
  4804                 /* C1 is the character before D, S1 is the syntax of C1, C2
  4805                    is the character at D, and S2 is the syntax of C2.  */
  4806                 int c1, c2;
  4807                 int s1, s2;
  4808                 int dummy;
  4809                 ptrdiff_t offset = PTR_TO_OFFSET (d);
  4810                 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
  4811                 UPDATE_SYNTAX_TABLE (charpos);
  4812                 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4813                 nchars++;
  4814                 s1 = SYNTAX (c1);
  4815                 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  4816                 PREFETCH_NOLIMIT ();
  4817                 GET_CHAR_AFTER (c2, d, dummy);
  4818                 nchars++;
  4819                 s2 = SYNTAX (c2);
  4820 
  4821                 if (/* Case 2: Only one of S1 and S2 is Sword.  */
  4822                     ((s1 == Sword) != (s2 == Sword))
  4823                     /* Case 3: Both of S1 and S2 are Sword, and macro
  4824                        WORD_BOUNDARY_P (C1, C2) returns nonzero.  */
  4825                     || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
  4826                   not = !not;
  4827               }
  4828             if (not)
  4829               break;
  4830             else
  4831               goto fail;
  4832           }
  4833 
  4834         case wordbeg:
  4835           DEBUG_PRINT ("EXECUTING wordbeg.\n");
  4836 
  4837           /* We FAIL in one of the following cases: */
  4838 
  4839           /* Case 1: D is at the end of string.  */
  4840           if (AT_STRINGS_END (d))
  4841             goto fail;
  4842           else
  4843             {
  4844               /* C1 is the character before D, S1 is the syntax of C1, C2
  4845                  is the character at D, and S2 is the syntax of C2.  */
  4846               int c1, c2;
  4847               int s1, s2;
  4848               int dummy;
  4849               ptrdiff_t offset = PTR_TO_OFFSET (d);
  4850               ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
  4851               UPDATE_SYNTAX_TABLE (charpos);
  4852               PREFETCH ();
  4853               GET_CHAR_AFTER (c2, d, dummy);
  4854               nchars++;
  4855               s2 = SYNTAX (c2);
  4856 
  4857               /* Case 2: S2 is not Sword. */
  4858               if (s2 != Sword)
  4859                 goto fail;
  4860 
  4861               /* Case 3: D is not at the beginning of string ... */
  4862               if (!AT_STRINGS_BEG (d))
  4863                 {
  4864                   GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4865                   nchars++;
  4866                   UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
  4867                   s1 = SYNTAX (c1);
  4868 
  4869                   /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
  4870                      returns 0.  */
  4871                   if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
  4872                     goto fail;
  4873                 }
  4874             }
  4875           break;
  4876 
  4877         case wordend:
  4878           DEBUG_PRINT ("EXECUTING wordend.\n");
  4879 
  4880           /* We FAIL in one of the following cases: */
  4881 
  4882           /* Case 1: D is at the beginning of string.  */
  4883           if (AT_STRINGS_BEG (d))
  4884             goto fail;
  4885           else
  4886             {
  4887               /* C1 is the character before D, S1 is the syntax of C1, C2
  4888                  is the character at D, and S2 is the syntax of C2.  */
  4889               int c1, c2;
  4890               int s1, s2;
  4891               int dummy;
  4892               ptrdiff_t offset = PTR_TO_OFFSET (d);
  4893               ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
  4894               UPDATE_SYNTAX_TABLE (charpos);
  4895               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4896               nchars++;
  4897               s1 = SYNTAX (c1);
  4898 
  4899               /* Case 2: S1 is not Sword.  */
  4900               if (s1 != Sword)
  4901                 goto fail;
  4902 
  4903               /* Case 3: D is not at the end of string ... */
  4904               if (!AT_STRINGS_END (d))
  4905                 {
  4906                   PREFETCH_NOLIMIT ();
  4907                   GET_CHAR_AFTER (c2, d, dummy);
  4908                   nchars++;
  4909                   UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  4910                   s2 = SYNTAX (c2);
  4911 
  4912                   /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
  4913                      returns 0.  */
  4914                   if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
  4915           goto fail;
  4916                 }
  4917             }
  4918           break;
  4919 
  4920         case symbeg:
  4921           DEBUG_PRINT ("EXECUTING symbeg.\n");
  4922 
  4923           /* We FAIL in one of the following cases: */
  4924 
  4925           /* Case 1: D is at the end of string.  */
  4926           if (AT_STRINGS_END (d))
  4927             goto fail;
  4928           else
  4929             {
  4930               /* C1 is the character before D, S1 is the syntax of C1, C2
  4931                  is the character at D, and S2 is the syntax of C2.  */
  4932               int c1, c2;
  4933               int s1, s2;
  4934               ptrdiff_t offset = PTR_TO_OFFSET (d);
  4935               ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
  4936               UPDATE_SYNTAX_TABLE (charpos);
  4937               PREFETCH ();
  4938               c2 = RE_STRING_CHAR (d, target_multibyte);
  4939               nchars++;
  4940               s2 = SYNTAX (c2);
  4941 
  4942               /* Case 2: S2 is neither Sword nor Ssymbol. */
  4943               if (s2 != Sword && s2 != Ssymbol)
  4944                 goto fail;
  4945 
  4946               /* Case 3: D is not at the beginning of string ... */
  4947               if (!AT_STRINGS_BEG (d))
  4948                 {
  4949                   GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4950                   nchars++;
  4951                   UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
  4952                   s1 = SYNTAX (c1);
  4953 
  4954                   /* ... and S1 is Sword or Ssymbol.  */
  4955                   if (s1 == Sword || s1 == Ssymbol)
  4956                     goto fail;
  4957                 }
  4958             }
  4959           break;
  4960 
  4961         case symend:
  4962           DEBUG_PRINT ("EXECUTING symend.\n");
  4963 
  4964           /* We FAIL in one of the following cases: */
  4965 
  4966           /* Case 1: D is at the beginning of string.  */
  4967           if (AT_STRINGS_BEG (d))
  4968             goto fail;
  4969           else
  4970             {
  4971               /* C1 is the character before D, S1 is the syntax of C1, C2
  4972                  is the character at D, and S2 is the syntax of C2.  */
  4973               int c1, c2;
  4974               int s1, s2;
  4975               ptrdiff_t offset = PTR_TO_OFFSET (d);
  4976               ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
  4977               UPDATE_SYNTAX_TABLE (charpos);
  4978               GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
  4979               nchars++;
  4980               s1 = SYNTAX (c1);
  4981 
  4982               /* Case 2: S1 is neither Ssymbol nor Sword.  */
  4983               if (s1 != Sword && s1 != Ssymbol)
  4984                 goto fail;
  4985 
  4986               /* Case 3: D is not at the end of string ... */
  4987               if (!AT_STRINGS_END (d))
  4988                 {
  4989                   PREFETCH_NOLIMIT ();
  4990                   c2 = RE_STRING_CHAR (d, target_multibyte);
  4991                   nchars++;
  4992                   UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
  4993                   s2 = SYNTAX (c2);
  4994 
  4995                   /* ... and S2 is Sword or Ssymbol.  */
  4996                   if (s2 == Sword || s2 == Ssymbol)
  4997                     goto fail;
  4998                 }
  4999             }
  5000           break;
  5001 
  5002         case syntaxspec:
  5003         case notsyntaxspec:
  5004           {
  5005             bool not = (re_opcode_t) *(p - 1) == notsyntaxspec;
  5006             mcnt = *p++;
  5007             DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "",
  5008                          mcnt);
  5009             PREFETCH ();
  5010             {
  5011               ptrdiff_t offset = PTR_TO_OFFSET (d);
  5012               ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
  5013               UPDATE_SYNTAX_TABLE (pos1);
  5014             }
  5015             {
  5016               int len;
  5017               int c;
  5018 
  5019               GET_CHAR_AFTER (c, d, len);
  5020               if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
  5021                 goto fail;
  5022               d += len;
  5023               nchars++;
  5024             }
  5025           }
  5026           break;
  5027 
  5028         case at_dot:
  5029           DEBUG_PRINT ("EXECUTING at_dot.\n");
  5030           if (PTR_BYTE_POS (d) != PT_BYTE)
  5031             goto fail;
  5032           break;
  5033 
  5034         case categoryspec:
  5035         case notcategoryspec:
  5036           {
  5037             bool not = (re_opcode_t) *(p - 1) == notcategoryspec;
  5038             mcnt = *p++;
  5039             DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n",
  5040                          not ? "not" : "", mcnt);
  5041             PREFETCH ();
  5042 
  5043             {
  5044               int len;
  5045               int c;
  5046               GET_CHAR_AFTER (c, d, len);
  5047               if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
  5048                 goto fail;
  5049               d += len;
  5050               nchars++;
  5051             }
  5052           }
  5053           break;
  5054 
  5055         default:
  5056           abort ();
  5057         }
  5058       continue;  /* Successfully executed one pattern command; keep going.  */
  5059 
  5060 
  5061     /* We goto here if a matching operation fails. */
  5062     fail:
  5063       maybe_quit ();
  5064       if (!FAIL_STACK_EMPTY ())
  5065         {
  5066           re_char *str, *pat;
  5067           /* A restart point is known.  Restore to that state.  */
  5068           DEBUG_PRINT ("\nFAIL:\n");
  5069           POP_FAILURE_POINT (str, pat);
  5070           switch (*pat++)
  5071             {
  5072             case on_failure_keep_string_jump:
  5073               eassert (str == NULL);
  5074               goto continue_failure_jump;
  5075 
  5076             case on_failure_jump_nastyloop:
  5077               eassert ((re_opcode_t)pat[-2] == no_op);
  5078               PUSH_FAILURE_POINT (pat - 2, str);
  5079               FALLTHROUGH;
  5080             case on_failure_jump_loop:
  5081             case on_failure_jump:
  5082             case succeed_n:
  5083               d = str;
  5084             continue_failure_jump:
  5085               EXTRACT_NUMBER_AND_INCR (mcnt, pat);
  5086               p = pat + mcnt;
  5087               break;
  5088 
  5089             case no_op:
  5090               /* A special frame used for nastyloops. */
  5091               goto fail;
  5092 
  5093             default:
  5094               abort ();
  5095             }
  5096 
  5097           eassert (p >= bufp->buffer && p <= pend);
  5098 
  5099           if (d >= string1 && d <= end1)
  5100             dend = end_match_1;
  5101         }
  5102       else
  5103         break;   /* Matching at this starting point really fails.  */
  5104     } /* for (;;) */
  5105 
  5106   if (best_regs_set)
  5107     goto restore_best_regs;
  5108 
  5109   unbind_to (count, Qnil);
  5110   SAFE_FREE ();
  5111 
  5112   if (max_redisplay_ticks > 0 && nchars > 0)
  5113     update_redisplay_ticks (nchars / 50 + 1, NULL);
  5114 
  5115   return -1;                            /* Failure to match.  */
  5116 }
  5117 
  5118 /* Subroutine definitions for re_match_2.  */
  5119 
  5120 /* Return true if TRANSLATE[S1] and TRANSLATE[S2] are not identical
  5121    for LEN bytes.  */
  5122 
  5123 static bool
  5124 bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len,
  5125                 Lisp_Object translate, bool target_multibyte)
  5126 {
  5127   re_char *p1 = s1, *p2 = s2;
  5128   re_char *p1_end = s1 + len;
  5129   re_char *p2_end = s2 + len;
  5130 
  5131   /* FIXME: Checking both p1 and p2 presumes that the two strings might have
  5132      different lengths, but relying on a single LEN would break this. -sm  */
  5133   while (p1 < p1_end && p2 < p2_end)
  5134     {
  5135       int p1_charlen, p2_charlen;
  5136       int p1_ch, p2_ch;
  5137 
  5138       GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
  5139       GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
  5140 
  5141       if (RE_TRANSLATE (translate, p1_ch)
  5142           != RE_TRANSLATE (translate, p2_ch))
  5143         return true;
  5144 
  5145       p1 += p1_charlen, p2 += p2_charlen;
  5146     }
  5147 
  5148   return p1 != p1_end || p2 != p2_end;
  5149 }
  5150 
  5151 /* Entry points for GNU code.  */
  5152 
  5153 /* re_compile_pattern is the GNU regular expression compiler: it
  5154    compiles PATTERN (of length SIZE) and puts the result in BUFP.
  5155    Returns 0 if the pattern was valid, otherwise an error string.
  5156 
  5157    Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
  5158    are set in BUFP on entry.
  5159 
  5160    We call regex_compile to do the actual compilation.  */
  5161 
  5162 const char *
  5163 re_compile_pattern (const char *pattern, ptrdiff_t length,
  5164                     bool posix_backtracking, const char *whitespace_regexp,
  5165                     struct re_pattern_buffer *bufp)
  5166 {
  5167   bufp->regs_allocated = REGS_UNALLOCATED;
  5168 
  5169   reg_errcode_t ret
  5170       = regex_compile ((re_char *) pattern, length,
  5171                        posix_backtracking,
  5172                        whitespace_regexp,
  5173                        bufp);
  5174 
  5175   if (!ret)
  5176     return NULL;
  5177   return re_error_msgid[ret];
  5178 }
/* [<][>][^][v][top][bottom][index][help] */
root/src/regex-emacs.c

DEFINITIONS