lib/regexec.c

/* [<][>][^][v][top][bottom][index][help] */
This source file includes following definitions.
regexec
libc_hidden_def
re_match
weak_alias
weak_alias
weak_alias
weak_alias
re_search_stub
re_copy_regs
re_set_registers
weak_alias
re_search_internal
prune_impossible_nodes
acquire_init_state_context
check_matching
check_halt_node_context
check_halt_state_context
proceed_next_node
push_fail_stack
pop_fail_stack
set_regs
free_fail_stack_return
update_regs
sift_states_backward
build_sifted_states
clean_state_log_if_needed
merge_state_array
update_cur_sifted_state
add_epsilon_src_nodes
sub_epsilon_src_nodes
check_dst_limits
check_dst_limits_calc_pos_1
check_dst_limits_calc_pos
check_subexp_limits
sift_states_bkref
sift_states_iter_mb
transit_state
merge_state_with_log
find_recover_state
check_subexp_matching_top
transit_state_sb
transit_state_mb
transit_state_bkref
get_subexp
get_subexp_sub
find_subexp_node
check_arrival
check_arrival_add_next_nodes
check_arrival_expand_ecl
check_arrival_expand_ecl_sub
expand_bkref_cache
build_trtable
group_nodes_into_DFAstates
check_node_accept_bytes
find_collation_sequence_value
check_node_accept
extend_buffers
match_ctx_init
match_ctx_clean
match_ctx_free
match_ctx_add_entry
search_cur_bkref_entry
match_ctx_add_subtop
match_ctx_add_sublast
sift_ctx_init
     1 /* Extended regular expression matching and search library.
     2    Copyright (C) 2002-2023 Free Software Foundation, Inc.
     3    This file is part of the GNU C Library.
     4    Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>.
     5 
     6    The GNU C Library is free software; you can redistribute it and/or
     7    modify it under the terms of the GNU Lesser General Public
     8    License as published by the Free Software Foundation; either
     9    version 2.1 of the License, or (at your option) any later version.
    10 
    11    The GNU C Library is distributed in the hope that it will be useful,
    12    but WITHOUT ANY WARRANTY; without even the implied warranty of
    13    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
    14    Lesser General Public License for more details.
    15 
    16    You should have received a copy of the GNU Lesser General Public
    17    License along with the GNU C Library; if not, see
    18    <https://www.gnu.org/licenses/>.  */
    19 
    20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
    21                                      Idx n);
    22 static void match_ctx_clean (re_match_context_t *mctx);
    23 static void match_ctx_free (re_match_context_t *cache);
    24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
    25                                           Idx str_idx, Idx from, Idx to);
    26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
    27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
    28                                            Idx str_idx);
    29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
    30                                                     Idx node, Idx str_idx);
    31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
    32                            re_dfastate_t **limited_sts, Idx last_node,
    33                            Idx last_str_idx);
    34 static reg_errcode_t re_search_internal (const regex_t *preg,
    35                                          const char *string, Idx length,
    36                                          Idx start, Idx last_start, Idx stop,
    37                                          size_t nmatch, regmatch_t pmatch[],
    38                                          int eflags);
    39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
    40                                   const char *string1, Idx length1,
    41                                   const char *string2, Idx length2,
    42                                   Idx start, regoff_t range,
    43                                   struct re_registers *regs,
    44                                   Idx stop, bool ret_len);
    45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
    46                                 const char *string, Idx length, Idx start,
    47                                 regoff_t range, Idx stop,
    48                                 struct re_registers *regs,
    49                                 bool ret_len);
    50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
    51                               Idx nregs, int regs_allocated);
    52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
    53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
    54                            Idx *p_match_first);
    55 static Idx check_halt_state_context (const re_match_context_t *mctx,
    56                                      const re_dfastate_t *state, Idx idx);
    57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
    58                          regmatch_t *prev_idx_match, Idx cur_node,
    59                          Idx cur_idx, Idx nmatch);
    60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
    61                                       Idx str_idx, Idx dest_node, Idx nregs,
    62                                       regmatch_t *regs, regmatch_t *prevregs,
    63                                       re_node_set *eps_via_nodes);
    64 static reg_errcode_t set_regs (const regex_t *preg,
    65                                const re_match_context_t *mctx,
    66                                size_t nmatch, regmatch_t *pmatch,
    67                                bool fl_backtrack);
    68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
    69 
    70 static int sift_states_iter_mb (const re_match_context_t *mctx,
    71                                 re_sift_context_t *sctx,
    72                                 Idx node_idx, Idx str_idx, Idx max_str_idx);
    73 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
    74                                            re_sift_context_t *sctx);
    75 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
    76                                           re_sift_context_t *sctx, Idx str_idx,
    77                                           re_node_set *cur_dest);
    78 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
    79                                               re_sift_context_t *sctx,
    80                                               Idx str_idx,
    81                                               re_node_set *dest_nodes);
    82 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
    83                                             re_node_set *dest_nodes,
    84                                             const re_node_set *candidates);
    85 static bool check_dst_limits (const re_match_context_t *mctx,
    86                               const re_node_set *limits,
    87                               Idx dst_node, Idx dst_idx, Idx src_node,
    88                               Idx src_idx);
    89 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
    90                                         int boundaries, Idx subexp_idx,
    91                                         Idx from_node, Idx bkref_idx);
    92 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
    93                                       Idx limit, Idx subexp_idx,
    94                                       Idx node, Idx str_idx,
    95                                       Idx bkref_idx);
    96 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
    97                                           re_node_set *dest_nodes,
    98                                           const re_node_set *candidates,
    99                                           re_node_set *limits,
   100                                           struct re_backref_cache_entry *bkref_ents,
   101                                           Idx str_idx);
   102 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
   103                                         re_sift_context_t *sctx,
   104                                         Idx str_idx, const re_node_set *candidates);
   105 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
   106                                         re_dfastate_t **dst,
   107                                         re_dfastate_t **src, Idx num);
   108 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
   109                                          re_match_context_t *mctx);
   110 static re_dfastate_t *transit_state (reg_errcode_t *err,
   111                                      re_match_context_t *mctx,
   112                                      re_dfastate_t *state);
   113 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
   114                                             re_match_context_t *mctx,
   115                                             re_dfastate_t *next_state);
   116 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
   117                                                 re_node_set *cur_nodes,
   118                                                 Idx str_idx);
   119 #if 0
   120 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
   121                                         re_match_context_t *mctx,
   122                                         re_dfastate_t *pstate);
   123 #endif
   124 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
   125                                        re_dfastate_t *pstate);
   126 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
   127                                           const re_node_set *nodes);
   128 static reg_errcode_t get_subexp (re_match_context_t *mctx,
   129                                  Idx bkref_node, Idx bkref_str_idx);
   130 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
   131                                      const re_sub_match_top_t *sub_top,
   132                                      re_sub_match_last_t *sub_last,
   133                                      Idx bkref_node, Idx bkref_str);
   134 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
   135                              Idx subexp_idx, int type);
   136 static reg_errcode_t check_arrival (re_match_context_t *mctx,
   137                                     state_array_t *path, Idx top_node,
   138                                     Idx top_str, Idx last_node, Idx last_str,
   139                                     int type);
   140 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
   141                                                    Idx str_idx,
   142                                                    re_node_set *cur_nodes,
   143                                                    re_node_set *next_nodes);
   144 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
   145                                                re_node_set *cur_nodes,
   146                                                Idx ex_subexp, int type);
   147 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
   148                                                    re_node_set *dst_nodes,
   149                                                    Idx target, Idx ex_subexp,
   150                                                    int type);
   151 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
   152                                          re_node_set *cur_nodes, Idx cur_str,
   153                                          Idx subexp_num, int type);
   154 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
   155 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
   156                                     const re_string_t *input, Idx idx);
   157 #ifdef _LIBC
   158 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
   159                                                    size_t name_len);
   160 #endif
   161 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
   162                                        const re_dfastate_t *state,
   163                                        re_node_set *states_node,
   164                                        bitset_t *states_ch);
   165 static bool check_node_accept (const re_match_context_t *mctx,
   166                                const re_token_t *node, Idx idx);
   167 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
   168 
   169 /* Entry point for POSIX code.  */
   170 
   171 /* regexec searches for a given pattern, specified by PREG, in the
   172    string STRING.
   173 
   174    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
   175    'regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
   176    least NMATCH elements, and we set them to the offsets of the
   177    corresponding matched substrings.
   178 
   179    EFLAGS specifies "execution flags" which affect matching: if
   180    REG_NOTBOL is set, then ^ does not match at the beginning of the
   181    string; if REG_NOTEOL is set, then $ does not match at the end.
   182 
   183    Return 0 if a match is found, REG_NOMATCH if not, REG_BADPAT if
   184    EFLAGS is invalid.  */
   185 
   186 int
   187 regexec (const regex_t *__restrict preg, const char *__restrict string,
   188          size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
   189 {
   190   reg_errcode_t err;
   191   Idx start, length;
   192   re_dfa_t *dfa = preg->buffer;
   193 
   194   if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
   195     return REG_BADPAT;
   196 
   197   if (eflags & REG_STARTEND)
   198     {
   199       start = pmatch[0].rm_so;
   200       length = pmatch[0].rm_eo;
   201     }
   202   else
   203     {
   204       start = 0;
   205       length = strlen (string);
   206     }
   207 
   208   lock_lock (dfa->lock);
   209   if (preg->no_sub)
   210     err = re_search_internal (preg, string, length, start, length,
   211                               length, 0, NULL, eflags);
   212   else
   213     err = re_search_internal (preg, string, length, start, length,
   214                               length, nmatch, pmatch, eflags);
   215   lock_unlock (dfa->lock);
   216   return err != REG_NOERROR;
   217 }
   218 
   219 #ifdef _LIBC
   220 libc_hidden_def (__regexec)
   221 
   222 # include <shlib-compat.h>
   223 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
   224 
   225 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
   226 __typeof__ (__regexec) __compat_regexec;
   227 
   228 int
   229 attribute_compat_text_section
   230 __compat_regexec (const regex_t *__restrict preg,
   231                   const char *__restrict string, size_t nmatch,
   232                   regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
   233 {
   234   return regexec (preg, string, nmatch, pmatch,
   235                   eflags & (REG_NOTBOL | REG_NOTEOL));
   236 }
   237 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
   238 # endif
   239 #endif
   240 
   241 /* Entry points for GNU code.  */
   242 
   243 /* re_match, re_search, re_match_2, re_search_2
   244 
   245    The former two functions operate on STRING with length LENGTH,
   246    while the later two operate on concatenation of STRING1 and STRING2
   247    with lengths LENGTH1 and LENGTH2, respectively.
   248 
   249    re_match() matches the compiled pattern in BUFP against the string,
   250    starting at index START.
   251 
   252    re_search() first tries matching at index START, then it tries to match
   253    starting from index START + 1, and so on.  The last start position tried
   254    is START + RANGE.  (Thus RANGE = 0 forces re_search to operate the same
   255    way as re_match().)
   256 
   257    The parameter STOP of re_{match,search}_2 specifies that no match exceeding
   258    the first STOP characters of the concatenation of the strings should be
   259    concerned.
   260 
   261    If REGS is not NULL, and BUFP->no_sub is not set, the offsets of the match
   262    and all groups is stored in REGS.  (For the "_2" variants, the offsets are
   263    computed relative to the concatenation, not relative to the individual
   264    strings.)
   265 
   266    On success, re_match* functions return the length of the match, re_search*
   267    return the position of the start of the match.  They return -1 on
   268    match failure, -2 on error.  */
   269 
   270 regoff_t
   271 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
   272           Idx start, struct re_registers *regs)
   273 {
   274   return re_search_stub (bufp, string, length, start, 0, length, regs, true);
   275 }
   276 #ifdef _LIBC
   277 weak_alias (__re_match, re_match)
   278 #endif
   279 
   280 regoff_t
   281 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
   282            Idx start, regoff_t range, struct re_registers *regs)
   283 {
   284   return re_search_stub (bufp, string, length, start, range, length, regs,
   285                          false);
   286 }
   287 #ifdef _LIBC
   288 weak_alias (__re_search, re_search)
   289 #endif
   290 
   291 regoff_t
   292 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
   293             const char *string2, Idx length2, Idx start,
   294             struct re_registers *regs, Idx stop)
   295 {
   296   return re_search_2_stub (bufp, string1, length1, string2, length2,
   297                            start, 0, regs, stop, true);
   298 }
   299 #ifdef _LIBC
   300 weak_alias (__re_match_2, re_match_2)
   301 #endif
   302 
   303 regoff_t
   304 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
   305              const char *string2, Idx length2, Idx start, regoff_t range,
   306              struct re_registers *regs, Idx stop)
   307 {
   308   return re_search_2_stub (bufp, string1, length1, string2, length2,
   309                            start, range, regs, stop, false);
   310 }
   311 #ifdef _LIBC
   312 weak_alias (__re_search_2, re_search_2)
   313 #endif
   314 
   315 static regoff_t
   316 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
   317                   Idx length1, const char *string2, Idx length2, Idx start,
   318                   regoff_t range, struct re_registers *regs,
   319                   Idx stop, bool ret_len)
   320 {
   321   const char *str;
   322   regoff_t rval;
   323   Idx len;
   324   char *s = NULL;
   325 
   326   if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
   327                          || INT_ADD_WRAPV (length1, length2, &len))))
   328     return -2;
   329 
   330   /* Concatenate the strings.  */
   331   if (length2 > 0)
   332     if (length1 > 0)
   333       {
   334         s = re_malloc (char, len);
   335 
   336         if (__glibc_unlikely (s == NULL))
   337           return -2;
   338 #ifdef _LIBC
   339         memcpy (__mempcpy (s, string1, length1), string2, length2);
   340 #else
   341         memcpy (s, string1, length1);
   342         memcpy (s + length1, string2, length2);
   343 #endif
   344         str = s;
   345       }
   346     else
   347       str = string2;
   348   else
   349     str = string1;
   350 
   351   rval = re_search_stub (bufp, str, len, start, range, stop, regs,
   352                          ret_len);
   353   re_free (s);
   354   return rval;
   355 }
   356 
   357 /* The parameters have the same meaning as those of re_search.
   358    Additional parameters:
   359    If RET_LEN is true the length of the match is returned (re_match style);
   360    otherwise the position of the match is returned.  */
   361 
   362 static regoff_t
   363 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
   364                 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
   365                 bool ret_len)
   366 {
   367   reg_errcode_t result;
   368   regmatch_t *pmatch;
   369   Idx nregs;
   370   regoff_t rval;
   371   int eflags = 0;
   372   re_dfa_t *dfa = bufp->buffer;
   373   Idx last_start = start + range;
   374 
   375   /* Check for out-of-range.  */
   376   if (__glibc_unlikely (start < 0 || start > length))
   377     return -1;
   378   if (__glibc_unlikely (length < last_start
   379                         || (0 <= range && last_start < start)))
   380     last_start = length;
   381   else if (__glibc_unlikely (last_start < 0
   382                              || (range < 0 && start <= last_start)))
   383     last_start = 0;
   384 
   385   lock_lock (dfa->lock);
   386 
   387   eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
   388   eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
   389 
   390   /* Compile fastmap if we haven't yet.  */
   391   if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
   392     re_compile_fastmap (bufp);
   393 
   394   if (__glibc_unlikely (bufp->no_sub))
   395     regs = NULL;
   396 
   397   /* We need at least 1 register.  */
   398   if (regs == NULL)
   399     nregs = 1;
   400   else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
   401                              && regs->num_regs <= bufp->re_nsub))
   402     {
   403       nregs = regs->num_regs;
   404       if (__glibc_unlikely (nregs < 1))
   405         {
   406           /* Nothing can be copied to regs.  */
   407           regs = NULL;
   408           nregs = 1;
   409         }
   410     }
   411   else
   412     nregs = bufp->re_nsub + 1;
   413   pmatch = re_malloc (regmatch_t, nregs);
   414   if (__glibc_unlikely (pmatch == NULL))
   415     {
   416       rval = -2;
   417       goto out;
   418     }
   419 
   420   result = re_search_internal (bufp, string, length, start, last_start, stop,
   421                                nregs, pmatch, eflags);
   422 
   423   rval = 0;
   424 
   425   /* I hope we needn't fill their regs with -1's when no match was found.  */
   426   if (result != REG_NOERROR)
   427     rval = result == REG_NOMATCH ? -1 : -2;
   428   else if (regs != NULL)
   429     {
   430       /* If caller wants register contents data back, copy them.  */
   431       bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
   432                                            bufp->regs_allocated);
   433       if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
   434         rval = -2;
   435     }
   436 
   437   if (__glibc_likely (rval == 0))
   438     {
   439       if (ret_len)
   440         {
   441           DEBUG_ASSERT (pmatch[0].rm_so == start);
   442           rval = pmatch[0].rm_eo - start;
   443         }
   444       else
   445         rval = pmatch[0].rm_so;
   446     }
   447   re_free (pmatch);
   448  out:
   449   lock_unlock (dfa->lock);
   450   return rval;
   451 }
   452 
   453 static unsigned
   454 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
   455               int regs_allocated)
   456 {
   457   int rval = REGS_REALLOCATE;
   458   Idx i;
   459   Idx need_regs = nregs + 1;
   460   /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code
   461      uses.  */
   462 
   463   /* Have the register data arrays been allocated?  */
   464   if (regs_allocated == REGS_UNALLOCATED)
   465     { /* No.  So allocate them with malloc.  */
   466       regs->start = re_malloc (regoff_t, need_regs);
   467       if (__glibc_unlikely (regs->start == NULL))
   468         return REGS_UNALLOCATED;
   469       regs->end = re_malloc (regoff_t, need_regs);
   470       if (__glibc_unlikely (regs->end == NULL))
   471         {
   472           re_free (regs->start);
   473           return REGS_UNALLOCATED;
   474         }
   475       regs->num_regs = need_regs;
   476     }
   477   else if (regs_allocated == REGS_REALLOCATE)
   478     { /* Yes.  If we need more elements than were already
   479          allocated, reallocate them.  If we need fewer, just
   480          leave it alone.  */
   481       if (__glibc_unlikely (need_regs > regs->num_regs))
   482         {
   483           regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
   484           regoff_t *new_end;
   485           if (__glibc_unlikely (new_start == NULL))
   486             return REGS_UNALLOCATED;
   487           new_end = re_realloc (regs->end, regoff_t, need_regs);
   488           if (__glibc_unlikely (new_end == NULL))
   489             {
   490               re_free (new_start);
   491               return REGS_UNALLOCATED;
   492             }
   493           regs->start = new_start;
   494           regs->end = new_end;
   495           regs->num_regs = need_regs;
   496         }
   497     }
   498   else
   499     {
   500       DEBUG_ASSERT (regs_allocated == REGS_FIXED);
   501       /* This function may not be called with REGS_FIXED and nregs too big.  */
   502       DEBUG_ASSERT (nregs <= regs->num_regs);
   503       rval = REGS_FIXED;
   504     }
   505 
   506   /* Copy the regs.  */
   507   for (i = 0; i < nregs; ++i)
   508     {
   509       regs->start[i] = pmatch[i].rm_so;
   510       regs->end[i] = pmatch[i].rm_eo;
   511     }
   512   for ( ; i < regs->num_regs; ++i)
   513     regs->start[i] = regs->end[i] = -1;
   514 
   515   return rval;
   516 }
   517 
   518 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
   519    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
   520    this memory for recording register information.  STARTS and ENDS
   521    must be allocated using the malloc library routine, and must each
   522    be at least NUM_REGS * sizeof (regoff_t) bytes long.
   523 
   524    If NUM_REGS == 0, then subsequent matches should allocate their own
   525    register data.
   526 
   527    Unless this function is called, the first search or match using
   528    PATTERN_BUFFER will allocate its own register data, without
   529    freeing the old data.  */
   530 
   531 void
   532 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
   533                   __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
   534 {
   535   if (num_regs)
   536     {
   537       bufp->regs_allocated = REGS_REALLOCATE;
   538       regs->num_regs = num_regs;
   539       regs->start = starts;
   540       regs->end = ends;
   541     }
   542   else
   543     {
   544       bufp->regs_allocated = REGS_UNALLOCATED;
   545       regs->num_regs = 0;
   546       regs->start = regs->end = NULL;
   547     }
   548 }
   549 #ifdef _LIBC
   550 weak_alias (__re_set_registers, re_set_registers)
   551 #endif
   552 
   553 /* Entry points compatible with 4.2 BSD regex library.  We don't define
   554    them unless specifically requested.  */
   555 
   556 #if defined _REGEX_RE_COMP || defined _LIBC
   557 int
   558 # ifdef _LIBC
   559 weak_function
   560 # endif
   561 re_exec (const char *s)
   562 {
   563   return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
   564 }
   565 #endif /* _REGEX_RE_COMP */
   566 
   567 /* Internal entry point.  */
   568 
   569 /* Searches for a compiled pattern PREG in the string STRING, whose
   570    length is LENGTH.  NMATCH, PMATCH, and EFLAGS have the same
   571    meaning as with regexec.  LAST_START is START + RANGE, where
   572    START and RANGE have the same meaning as with re_search.
   573    Return REG_NOERROR if we find a match, and REG_NOMATCH if not,
   574    otherwise return the error code.
   575    Note: We assume front end functions already check ranges.
   576    (0 <= LAST_START && LAST_START <= LENGTH)  */
   577 
   578 static reg_errcode_t
   579 __attribute_warn_unused_result__
   580 re_search_internal (const regex_t *preg, const char *string, Idx length,
   581                     Idx start, Idx last_start, Idx stop, size_t nmatch,
   582                     regmatch_t pmatch[], int eflags)
   583 {
   584   reg_errcode_t err;
   585   const re_dfa_t *dfa = preg->buffer;
   586   Idx left_lim, right_lim;
   587   int incr;
   588   bool fl_longest_match;
   589   int match_kind;
   590   Idx match_first;
   591   Idx match_last = -1;
   592   Idx extra_nmatch;
   593   bool sb;
   594   int ch;
   595   re_match_context_t mctx = { .dfa = dfa };
   596   char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
   597                     && start != last_start && !preg->can_be_null)
   598                    ? preg->fastmap : NULL);
   599   RE_TRANSLATE_TYPE t = preg->translate;
   600 
   601   extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
   602   nmatch -= extra_nmatch;
   603 
   604   /* Check if the DFA haven't been compiled.  */
   605   if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
   606                         || dfa->init_state_word == NULL
   607                         || dfa->init_state_nl == NULL
   608                         || dfa->init_state_begbuf == NULL))
   609     return REG_NOMATCH;
   610 
   611   /* We assume front-end functions already check them.  */
   612   DEBUG_ASSERT (0 <= last_start && last_start <= length);
   613 
   614   /* If initial states with non-begbuf contexts have no elements,
   615      the regex must be anchored.  If preg->newline_anchor is set,
   616      we'll never use init_state_nl, so do not check it.  */
   617   if (dfa->init_state->nodes.nelem == 0
   618       && dfa->init_state_word->nodes.nelem == 0
   619       && (dfa->init_state_nl->nodes.nelem == 0
   620           || !preg->newline_anchor))
   621     {
   622       if (start != 0 && last_start != 0)
   623         return REG_NOMATCH;
   624       start = last_start = 0;
   625     }
   626 
   627   /* We must check the longest matching, if nmatch > 0.  */
   628   fl_longest_match = (nmatch != 0 || dfa->nbackref);
   629 
   630   err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
   631                             preg->translate, (preg->syntax & RE_ICASE) != 0,
   632                             dfa);
   633   if (__glibc_unlikely (err != REG_NOERROR))
   634     goto free_return;
   635   mctx.input.stop = stop;
   636   mctx.input.raw_stop = stop;
   637   mctx.input.newline_anchor = preg->newline_anchor;
   638 
   639   err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
   640   if (__glibc_unlikely (err != REG_NOERROR))
   641     goto free_return;
   642 
   643   /* We will log all the DFA states through which the dfa pass,
   644      if nmatch > 1, or this dfa has "multibyte node", which is a
   645      back-reference or a node which can accept multibyte character or
   646      multi character collating element.  */
   647   if (nmatch > 1 || dfa->has_mb_node)
   648     {
   649       /* Avoid overflow.  */
   650       if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
   651                              <= mctx.input.bufs_len)))
   652         {
   653           err = REG_ESPACE;
   654           goto free_return;
   655         }
   656 
   657       mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
   658       if (__glibc_unlikely (mctx.state_log == NULL))
   659         {
   660           err = REG_ESPACE;
   661           goto free_return;
   662         }
   663     }
   664 
   665   match_first = start;
   666   mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
   667                            : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
   668 
   669   /* Check incrementally whether the input string matches.  */
   670   incr = (last_start < start) ? -1 : 1;
   671   left_lim = (last_start < start) ? last_start : start;
   672   right_lim = (last_start < start) ? start : last_start;
   673   sb = dfa->mb_cur_max == 1;
   674   match_kind =
   675     (fastmap
   676      ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
   677         | (start <= last_start ? 2 : 0)
   678         | (t != NULL ? 1 : 0))
   679      : 8);
   680 
   681   for (;; match_first += incr)
   682     {
   683       err = REG_NOMATCH;
   684       if (match_first < left_lim || right_lim < match_first)
   685         goto free_return;
   686 
   687       /* Advance as rapidly as possible through the string, until we
   688          find a plausible place to start matching.  This may be done
   689          with varying efficiency, so there are various possibilities:
   690          only the most common of them are specialized, in order to
   691          save on code size.  We use a switch statement for speed.  */
   692       switch (match_kind)
   693         {
   694         case 8:
   695           /* No fastmap.  */
   696           break;
   697 
   698         case 7:
   699           /* Fastmap with single-byte translation, match forward.  */
   700           while (__glibc_likely (match_first < right_lim)
   701                  && !fastmap[t[(unsigned char) string[match_first]]])
   702             ++match_first;
   703           goto forward_match_found_start_or_reached_end;
   704 
   705         case 6:
   706           /* Fastmap without translation, match forward.  */
   707           while (__glibc_likely (match_first < right_lim)
   708                  && !fastmap[(unsigned char) string[match_first]])
   709             ++match_first;
   710 
   711         forward_match_found_start_or_reached_end:
   712           if (__glibc_unlikely (match_first == right_lim))
   713             {
   714               ch = match_first >= length
   715                        ? 0 : (unsigned char) string[match_first];
   716               if (!fastmap[t ? t[ch] : ch])
   717                 goto free_return;
   718             }
   719           break;
   720 
   721         case 4:
   722         case 5:
   723           /* Fastmap without multi-byte translation, match backwards.  */
   724           while (match_first >= left_lim)
   725             {
   726               ch = match_first >= length
   727                        ? 0 : (unsigned char) string[match_first];
   728               if (fastmap[t ? t[ch] : ch])
   729                 break;
   730               --match_first;
   731             }
   732           if (match_first < left_lim)
   733             goto free_return;
   734           break;
   735 
   736         default:
   737           /* In this case, we can't determine easily the current byte,
   738              since it might be a component byte of a multibyte
   739              character.  Then we use the constructed buffer instead.  */
   740           for (;;)
   741             {
   742               /* If MATCH_FIRST is out of the valid range, reconstruct the
   743                  buffers.  */
   744               __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
   745               if (__glibc_unlikely (offset
   746                                     >= (__re_size_t) mctx.input.valid_raw_len))
   747                 {
   748                   err = re_string_reconstruct (&mctx.input, match_first,
   749                                                eflags);
   750                   if (__glibc_unlikely (err != REG_NOERROR))
   751                     goto free_return;
   752 
   753                   offset = match_first - mctx.input.raw_mbs_idx;
   754                 }
   755               /* Use buffer byte if OFFSET is in buffer, otherwise '\0'.  */
   756               ch = (offset < mctx.input.valid_len
   757                     ? re_string_byte_at (&mctx.input, offset) : 0);
   758               if (fastmap[ch])
   759                 break;
   760               match_first += incr;
   761               if (match_first < left_lim || match_first > right_lim)
   762                 {
   763                   err = REG_NOMATCH;
   764                   goto free_return;
   765                 }
   766             }
   767           break;
   768         }
   769 
   770       /* Reconstruct the buffers so that the matcher can assume that
   771          the matching starts from the beginning of the buffer.  */
   772       err = re_string_reconstruct (&mctx.input, match_first, eflags);
   773       if (__glibc_unlikely (err != REG_NOERROR))
   774         goto free_return;
   775 
   776       /* Don't consider this char as a possible match start if it part,
   777          yet isn't the head, of a multibyte character.  */
   778       if (!sb && !re_string_first_byte (&mctx.input, 0))
   779         continue;
   780 
   781       /* It seems to be appropriate one, then use the matcher.  */
   782       /* We assume that the matching starts from 0.  */
   783       mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
   784       match_last = check_matching (&mctx, fl_longest_match,
   785                                    start <= last_start ? &match_first : NULL);
   786       if (match_last != -1)
   787         {
   788           if (__glibc_unlikely (match_last == -2))
   789             {
   790               err = REG_ESPACE;
   791               goto free_return;
   792             }
   793           else
   794             {
   795               mctx.match_last = match_last;
   796               if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
   797                 {
   798                   re_dfastate_t *pstate = mctx.state_log[match_last];
   799                   mctx.last_node = check_halt_state_context (&mctx, pstate,
   800                                                              match_last);
   801                 }
   802               if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
   803                   || dfa->nbackref)
   804                 {
   805                   err = prune_impossible_nodes (&mctx);
   806                   if (err == REG_NOERROR)
   807                     break;
   808                   if (__glibc_unlikely (err != REG_NOMATCH))
   809                     goto free_return;
   810                   match_last = -1;
   811                 }
   812               else
   813                 break; /* We found a match.  */
   814             }
   815         }
   816 
   817       match_ctx_clean (&mctx);
   818     }
   819 
   820   DEBUG_ASSERT (match_last != -1);
   821   DEBUG_ASSERT (err == REG_NOERROR);
   822 
   823   /* Set pmatch[] if we need.  */
   824   if (nmatch > 0)
   825     {
   826       Idx reg_idx;
   827 
   828       /* Initialize registers.  */
   829       for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
   830         pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
   831 
   832       /* Set the points where matching start/end.  */
   833       pmatch[0].rm_so = 0;
   834       pmatch[0].rm_eo = mctx.match_last;
   835       /* FIXME: This function should fail if mctx.match_last exceeds
   836          the maximum possible regoff_t value.  We need a new error
   837          code REG_OVERFLOW.  */
   838 
   839       if (!preg->no_sub && nmatch > 1)
   840         {
   841           err = set_regs (preg, &mctx, nmatch, pmatch,
   842                           dfa->has_plural_match && dfa->nbackref > 0);
   843           if (__glibc_unlikely (err != REG_NOERROR))
   844             goto free_return;
   845         }
   846 
   847       /* At last, add the offset to each register, since we slid
   848          the buffers so that we could assume that the matching starts
   849          from 0.  */
   850       for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
   851         if (pmatch[reg_idx].rm_so != -1)
   852           {
   853             if (__glibc_unlikely (mctx.input.offsets_needed != 0))
   854               {
   855                 pmatch[reg_idx].rm_so =
   856                   (pmatch[reg_idx].rm_so == mctx.input.valid_len
   857                    ? mctx.input.valid_raw_len
   858                    : mctx.input.offsets[pmatch[reg_idx].rm_so]);
   859                 pmatch[reg_idx].rm_eo =
   860                   (pmatch[reg_idx].rm_eo == mctx.input.valid_len
   861                    ? mctx.input.valid_raw_len
   862                    : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
   863               }
   864             pmatch[reg_idx].rm_so += match_first;
   865             pmatch[reg_idx].rm_eo += match_first;
   866           }
   867       for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
   868         {
   869           pmatch[nmatch + reg_idx].rm_so = -1;
   870           pmatch[nmatch + reg_idx].rm_eo = -1;
   871         }
   872 
   873       if (dfa->subexp_map)
   874         for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
   875           if (dfa->subexp_map[reg_idx] != reg_idx)
   876             {
   877               pmatch[reg_idx + 1].rm_so
   878                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
   879               pmatch[reg_idx + 1].rm_eo
   880                 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
   881             }
   882     }
   883 
   884  free_return:
   885   re_free (mctx.state_log);
   886   if (dfa->nbackref)
   887     match_ctx_free (&mctx);
   888   re_string_destruct (&mctx.input);
   889   return err;
   890 }
   891 
   892 static reg_errcode_t
   893 __attribute_warn_unused_result__
   894 prune_impossible_nodes (re_match_context_t *mctx)
   895 {
   896   const re_dfa_t *const dfa = mctx->dfa;
   897   Idx halt_node, match_last;
   898   reg_errcode_t ret;
   899   re_dfastate_t **sifted_states;
   900   re_dfastate_t **lim_states = NULL;
   901   re_sift_context_t sctx;
   902   DEBUG_ASSERT (mctx->state_log != NULL);
   903   match_last = mctx->match_last;
   904   halt_node = mctx->last_node;
   905 
   906   /* Avoid overflow.  */
   907   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
   908                         <= match_last))
   909     return REG_ESPACE;
   910 
   911   sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
   912   if (__glibc_unlikely (sifted_states == NULL))
   913     {
   914       ret = REG_ESPACE;
   915       goto free_return;
   916     }
   917   if (dfa->nbackref)
   918     {
   919       lim_states = re_malloc (re_dfastate_t *, match_last + 1);
   920       if (__glibc_unlikely (lim_states == NULL))
   921         {
   922           ret = REG_ESPACE;
   923           goto free_return;
   924         }
   925       while (1)
   926         {
   927           memset (lim_states, '\0',
   928                   sizeof (re_dfastate_t *) * (match_last + 1));
   929           sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
   930                          match_last);
   931           ret = sift_states_backward (mctx, &sctx);
   932           re_node_set_free (&sctx.limits);
   933           if (__glibc_unlikely (ret != REG_NOERROR))
   934               goto free_return;
   935           if (sifted_states[0] != NULL || lim_states[0] != NULL)
   936             break;
   937           do
   938             {
   939               --match_last;
   940               if (match_last < 0)
   941                 {
   942                   ret = REG_NOMATCH;
   943                   goto free_return;
   944                 }
   945             } while (mctx->state_log[match_last] == NULL
   946                      || !mctx->state_log[match_last]->halt);
   947           halt_node = check_halt_state_context (mctx,
   948                                                 mctx->state_log[match_last],
   949                                                 match_last);
   950         }
   951       ret = merge_state_array (dfa, sifted_states, lim_states,
   952                                match_last + 1);
   953       re_free (lim_states);
   954       lim_states = NULL;
   955       if (__glibc_unlikely (ret != REG_NOERROR))
   956         goto free_return;
   957     }
   958   else
   959     {
   960       sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
   961       ret = sift_states_backward (mctx, &sctx);
   962       re_node_set_free (&sctx.limits);
   963       if (__glibc_unlikely (ret != REG_NOERROR))
   964         goto free_return;
   965       if (sifted_states[0] == NULL)
   966         {
   967           ret = REG_NOMATCH;
   968           goto free_return;
   969         }
   970     }
   971   re_free (mctx->state_log);
   972   mctx->state_log = sifted_states;
   973   sifted_states = NULL;
   974   mctx->last_node = halt_node;
   975   mctx->match_last = match_last;
   976   ret = REG_NOERROR;
   977  free_return:
   978   re_free (sifted_states);
   979   re_free (lim_states);
   980   return ret;
   981 }
   982 
   983 /* Acquire an initial state and return it.
   984    We must select appropriate initial state depending on the context,
   985    since initial states may have constraints like "\<", "^", etc..  */
   986 
   987 static __always_inline re_dfastate_t *
   988 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
   989                             Idx idx)
   990 {
   991   const re_dfa_t *const dfa = mctx->dfa;
   992   if (dfa->init_state->has_constraint)
   993     {
   994       unsigned int context;
   995       context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
   996       if (IS_WORD_CONTEXT (context))
   997         return dfa->init_state_word;
   998       else if (IS_ORDINARY_CONTEXT (context))
   999         return dfa->init_state;
  1000       else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
  1001         return dfa->init_state_begbuf;
  1002       else if (IS_NEWLINE_CONTEXT (context))
  1003         return dfa->init_state_nl;
  1004       else if (IS_BEGBUF_CONTEXT (context))
  1005         {
  1006           /* It is relatively rare case, then calculate on demand.  */
  1007           return re_acquire_state_context (err, dfa,
  1008                                            dfa->init_state->entrance_nodes,
  1009                                            context);
  1010         }
  1011       else
  1012         /* Must not happen?  */
  1013         return dfa->init_state;
  1014     }
  1015   else
  1016     return dfa->init_state;
  1017 }
  1018 
  1019 /* Check whether the regular expression match input string INPUT or not,
  1020    and return the index where the matching end.  Return -1 if
  1021    there is no match, and return -2 in case of an error.
  1022    FL_LONGEST_MATCH means we want the POSIX longest matching.
  1023    If P_MATCH_FIRST is not NULL, and the match fails, it is set to the
  1024    next place where we may want to try matching.
  1025    Note that the matcher assumes that the matching starts from the current
  1026    index of the buffer.  */
  1027 
  1028 static Idx
  1029 __attribute_warn_unused_result__
  1030 check_matching (re_match_context_t *mctx, bool fl_longest_match,
  1031                 Idx *p_match_first)
  1032 {
  1033   const re_dfa_t *const dfa = mctx->dfa;
  1034   reg_errcode_t err;
  1035   Idx match = 0;
  1036   Idx match_last = -1;
  1037   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
  1038   re_dfastate_t *cur_state;
  1039   bool at_init_state = p_match_first != NULL;
  1040   Idx next_start_idx = cur_str_idx;
  1041 
  1042   err = REG_NOERROR;
  1043   cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
  1044   /* An initial state must not be NULL (invalid).  */
  1045   if (__glibc_unlikely (cur_state == NULL))
  1046     {
  1047       DEBUG_ASSERT (err == REG_ESPACE);
  1048       return -2;
  1049     }
  1050 
  1051   if (mctx->state_log != NULL)
  1052     {
  1053       mctx->state_log[cur_str_idx] = cur_state;
  1054 
  1055       /* Check OP_OPEN_SUBEXP in the initial state in case that we use them
  1056          later.  E.g. Processing back references.  */
  1057       if (__glibc_unlikely (dfa->nbackref))
  1058         {
  1059           at_init_state = false;
  1060           err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
  1061           if (__glibc_unlikely (err != REG_NOERROR))
  1062             return err;
  1063 
  1064           if (cur_state->has_backref)
  1065             {
  1066               err = transit_state_bkref (mctx, &cur_state->nodes);
  1067               if (__glibc_unlikely (err != REG_NOERROR))
  1068                 return err;
  1069             }
  1070         }
  1071     }
  1072 
  1073   /* If the RE accepts NULL string.  */
  1074   if (__glibc_unlikely (cur_state->halt))
  1075     {
  1076       if (!cur_state->has_constraint
  1077           || check_halt_state_context (mctx, cur_state, cur_str_idx))
  1078         {
  1079           if (!fl_longest_match)
  1080             return cur_str_idx;
  1081           else
  1082             {
  1083               match_last = cur_str_idx;
  1084               match = 1;
  1085             }
  1086         }
  1087     }
  1088 
  1089   while (!re_string_eoi (&mctx->input))
  1090     {
  1091       re_dfastate_t *old_state = cur_state;
  1092       Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
  1093 
  1094       if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
  1095            && mctx->input.bufs_len < mctx->input.len)
  1096           || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
  1097               && mctx->input.valid_len < mctx->input.len))
  1098         {
  1099           err = extend_buffers (mctx, next_char_idx + 1);
  1100           if (__glibc_unlikely (err != REG_NOERROR))
  1101             {
  1102               DEBUG_ASSERT (err == REG_ESPACE);
  1103               return -2;
  1104             }
  1105         }
  1106 
  1107       cur_state = transit_state (&err, mctx, cur_state);
  1108       if (mctx->state_log != NULL)
  1109         cur_state = merge_state_with_log (&err, mctx, cur_state);
  1110 
  1111       if (cur_state == NULL)
  1112         {
  1113           /* Reached the invalid state or an error.  Try to recover a valid
  1114              state using the state log, if available and if we have not
  1115              already found a valid (even if not the longest) match.  */
  1116           if (__glibc_unlikely (err != REG_NOERROR))
  1117             return -2;
  1118 
  1119           if (mctx->state_log == NULL
  1120               || (match && !fl_longest_match)
  1121               || (cur_state = find_recover_state (&err, mctx)) == NULL)
  1122             break;
  1123         }
  1124 
  1125       if (__glibc_unlikely (at_init_state))
  1126         {
  1127           if (old_state == cur_state)
  1128             next_start_idx = next_char_idx;
  1129           else
  1130             at_init_state = false;
  1131         }
  1132 
  1133       if (cur_state->halt)
  1134         {
  1135           /* Reached a halt state.
  1136              Check the halt state can satisfy the current context.  */
  1137           if (!cur_state->has_constraint
  1138               || check_halt_state_context (mctx, cur_state,
  1139                                            re_string_cur_idx (&mctx->input)))
  1140             {
  1141               /* We found an appropriate halt state.  */
  1142               match_last = re_string_cur_idx (&mctx->input);
  1143               match = 1;
  1144 
  1145               /* We found a match, do not modify match_first below.  */
  1146               p_match_first = NULL;
  1147               if (!fl_longest_match)
  1148                 break;
  1149             }
  1150         }
  1151     }
  1152 
  1153   if (p_match_first)
  1154     *p_match_first += next_start_idx;
  1155 
  1156   return match_last;
  1157 }
  1158 
  1159 /* Check NODE match the current context.  */
  1160 
  1161 static bool
  1162 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
  1163 {
  1164   re_token_type_t type = dfa->nodes[node].type;
  1165   unsigned int constraint = dfa->nodes[node].constraint;
  1166   if (type != END_OF_RE)
  1167     return false;
  1168   if (!constraint)
  1169     return true;
  1170   if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
  1171     return false;
  1172   return true;
  1173 }
  1174 
  1175 /* Check the halt state STATE match the current context.
  1176    Return 0 if not match, if the node, STATE has, is a halt node and
  1177    match the context, return the node.  */
  1178 
  1179 static Idx
  1180 check_halt_state_context (const re_match_context_t *mctx,
  1181                           const re_dfastate_t *state, Idx idx)
  1182 {
  1183   Idx i;
  1184   unsigned int context;
  1185   DEBUG_ASSERT (state->halt);
  1186   context = re_string_context_at (&mctx->input, idx, mctx->eflags);
  1187   for (i = 0; i < state->nodes.nelem; ++i)
  1188     if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
  1189       return state->nodes.elems[i];
  1190   return 0;
  1191 }
  1192 
  1193 /* Compute the next node to which "NFA" transit from NODE("NFA" is a NFA
  1194    corresponding to the DFA).
  1195    Return the destination node, and update EPS_VIA_NODES;
  1196    return -1 on match failure, -2 on error.  */
  1197 
  1198 static Idx
  1199 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
  1200                    regmatch_t *prevregs,
  1201                    Idx *pidx, Idx node, re_node_set *eps_via_nodes,
  1202                    struct re_fail_stack_t *fs)
  1203 {
  1204   const re_dfa_t *const dfa = mctx->dfa;
  1205   if (IS_EPSILON_NODE (dfa->nodes[node].type))
  1206     {
  1207       re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
  1208       re_node_set *edests = &dfa->edests[node];
  1209 
  1210       if (! re_node_set_contains (eps_via_nodes, node))
  1211         {
  1212           bool ok = re_node_set_insert (eps_via_nodes, node);
  1213           if (__glibc_unlikely (! ok))
  1214             return -2;
  1215         }
  1216 
  1217       /* Pick a valid destination, or return -1 if none is found.  */
  1218       Idx dest_node = -1;
  1219       for (Idx i = 0; i < edests->nelem; i++)
  1220         {
  1221           Idx candidate = edests->elems[i];
  1222           if (!re_node_set_contains (cur_nodes, candidate))
  1223             continue;
  1224           if (dest_node == -1)
  1225             dest_node = candidate;
  1226 
  1227           else
  1228             {
  1229               /* In order to avoid infinite loop like "(a*)*", return the second
  1230                  epsilon-transition if the first was already considered.  */
  1231               if (re_node_set_contains (eps_via_nodes, dest_node))
  1232                 return candidate;
  1233 
  1234               /* Otherwise, push the second epsilon-transition on the fail stack.  */
  1235               else if (fs != NULL
  1236                        && push_fail_stack (fs, *pidx, candidate, nregs, regs,
  1237                                            prevregs, eps_via_nodes))
  1238                 return -2;
  1239 
  1240               /* We know we are going to exit.  */
  1241               break;
  1242             }
  1243         }
  1244       return dest_node;
  1245     }
  1246   else
  1247     {
  1248       Idx naccepted = 0;
  1249       re_token_type_t type = dfa->nodes[node].type;
  1250 
  1251       if (dfa->nodes[node].accept_mb)
  1252         naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
  1253       else if (type == OP_BACK_REF)
  1254         {
  1255           Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
  1256           if (subexp_idx < nregs)
  1257             naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
  1258           if (fs != NULL)
  1259             {
  1260               if (subexp_idx >= nregs
  1261                   || regs[subexp_idx].rm_so == -1
  1262                   || regs[subexp_idx].rm_eo == -1)
  1263                 return -1;
  1264               else if (naccepted)
  1265                 {
  1266                   char *buf = (char *) re_string_get_buffer (&mctx->input);
  1267                   if (mctx->input.valid_len - *pidx < naccepted
  1268                       || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
  1269                                   naccepted)
  1270                           != 0))
  1271                     return -1;
  1272                 }
  1273             }
  1274 
  1275           if (naccepted == 0)
  1276             {
  1277               Idx dest_node;
  1278               bool ok = re_node_set_insert (eps_via_nodes, node);
  1279               if (__glibc_unlikely (! ok))
  1280                 return -2;
  1281               dest_node = dfa->edests[node].elems[0];
  1282               if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
  1283                                         dest_node))
  1284                 return dest_node;
  1285             }
  1286         }
  1287 
  1288       if (naccepted != 0
  1289           || check_node_accept (mctx, dfa->nodes + node, *pidx))
  1290         {
  1291           Idx dest_node = dfa->nexts[node];
  1292           *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
  1293           if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
  1294                      || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
  1295                                                dest_node)))
  1296             return -1;
  1297           re_node_set_empty (eps_via_nodes);
  1298           return dest_node;
  1299         }
  1300     }
  1301   return -1;
  1302 }
  1303 
  1304 static reg_errcode_t
  1305 __attribute_warn_unused_result__
  1306 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
  1307                  Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
  1308                  re_node_set *eps_via_nodes)
  1309 {
  1310   reg_errcode_t err;
  1311   Idx num = fs->num;
  1312   if (num == fs->alloc)
  1313     {
  1314       struct re_fail_stack_ent_t *new_array;
  1315       new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
  1316                               fs->alloc * 2);
  1317       if (new_array == NULL)
  1318         return REG_ESPACE;
  1319       fs->alloc *= 2;
  1320       fs->stack = new_array;
  1321     }
  1322   fs->stack[num].idx = str_idx;
  1323   fs->stack[num].node = dest_node;
  1324   fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
  1325   if (fs->stack[num].regs == NULL)
  1326     return REG_ESPACE;
  1327   fs->num = num + 1;
  1328   memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
  1329   memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
  1330   err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
  1331   return err;
  1332 }
  1333 
  1334 static Idx
  1335 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
  1336                 regmatch_t *regs, regmatch_t *prevregs,
  1337                 re_node_set *eps_via_nodes)
  1338 {
  1339   if (fs == NULL || fs->num == 0)
  1340     return -1;
  1341   Idx num = --fs->num;
  1342   *pidx = fs->stack[num].idx;
  1343   memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
  1344   memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
  1345   re_node_set_free (eps_via_nodes);
  1346   re_free (fs->stack[num].regs);
  1347   *eps_via_nodes = fs->stack[num].eps_via_nodes;
  1348   DEBUG_ASSERT (0 <= fs->stack[num].node);
  1349   return fs->stack[num].node;
  1350 }
  1351 
  1352 
  1353 #define DYNARRAY_STRUCT  regmatch_list
  1354 #define DYNARRAY_ELEMENT regmatch_t
  1355 #define DYNARRAY_PREFIX  regmatch_list_
  1356 #include <malloc/dynarray-skeleton.c>
  1357 
  1358 /* Set the positions where the subexpressions are starts/ends to registers
  1359    PMATCH.
  1360    Note: We assume that pmatch[0] is already set, and
  1361    pmatch[i].rm_so == pmatch[i].rm_eo == -1 for 0 < i < nmatch.  */
  1362 
  1363 static reg_errcode_t
  1364 __attribute_warn_unused_result__
  1365 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
  1366           regmatch_t *pmatch, bool fl_backtrack)
  1367 {
  1368   const re_dfa_t *dfa = preg->buffer;
  1369   Idx idx, cur_node;
  1370   re_node_set eps_via_nodes;
  1371   struct re_fail_stack_t *fs;
  1372   struct re_fail_stack_t fs_body = { 0, 2, NULL };
  1373   struct regmatch_list prev_match;
  1374   regmatch_list_init (&prev_match);
  1375 
  1376   DEBUG_ASSERT (nmatch > 1);
  1377   DEBUG_ASSERT (mctx->state_log != NULL);
  1378   if (fl_backtrack)
  1379     {
  1380       fs = &fs_body;
  1381       fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
  1382       if (fs->stack == NULL)
  1383         return REG_ESPACE;
  1384     }
  1385   else
  1386     fs = NULL;
  1387 
  1388   cur_node = dfa->init_node;
  1389   re_node_set_init_empty (&eps_via_nodes);
  1390 
  1391   if (!regmatch_list_resize (&prev_match, nmatch))
  1392     {
  1393       regmatch_list_free (&prev_match);
  1394       free_fail_stack_return (fs);
  1395       return REG_ESPACE;
  1396     }
  1397   regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
  1398   memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
  1399 
  1400   for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
  1401     {
  1402       update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
  1403 
  1404       if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
  1405           || (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
  1406         {
  1407           Idx reg_idx;
  1408           cur_node = -1;
  1409           if (fs)
  1410             {
  1411               for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
  1412                 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
  1413                   {
  1414                     cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
  1415                                                prev_idx_match, &eps_via_nodes);
  1416                     break;
  1417                   }
  1418             }
  1419           if (cur_node < 0)
  1420             {
  1421               re_node_set_free (&eps_via_nodes);
  1422               regmatch_list_free (&prev_match);
  1423               return free_fail_stack_return (fs);
  1424             }
  1425         }
  1426 
  1427       /* Proceed to next node.  */
  1428       cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
  1429                                     &idx, cur_node,
  1430                                     &eps_via_nodes, fs);
  1431 
  1432       if (__glibc_unlikely (cur_node < 0))
  1433         {
  1434           if (__glibc_unlikely (cur_node == -2))
  1435             {
  1436               re_node_set_free (&eps_via_nodes);
  1437               regmatch_list_free (&prev_match);
  1438               free_fail_stack_return (fs);
  1439               return REG_ESPACE;
  1440             }
  1441           cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
  1442                                      prev_idx_match, &eps_via_nodes);
  1443           if (cur_node < 0)
  1444             {
  1445               re_node_set_free (&eps_via_nodes);
  1446               regmatch_list_free (&prev_match);
  1447               free_fail_stack_return (fs);
  1448               return REG_NOMATCH;
  1449             }
  1450         }
  1451     }
  1452   re_node_set_free (&eps_via_nodes);
  1453   regmatch_list_free (&prev_match);
  1454   return free_fail_stack_return (fs);
  1455 }
  1456 
  1457 static reg_errcode_t
  1458 free_fail_stack_return (struct re_fail_stack_t *fs)
  1459 {
  1460   if (fs)
  1461     {
  1462       Idx fs_idx;
  1463       for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
  1464         {
  1465           re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
  1466           re_free (fs->stack[fs_idx].regs);
  1467         }
  1468       re_free (fs->stack);
  1469     }
  1470   return REG_NOERROR;
  1471 }
  1472 
  1473 static void
  1474 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
  1475              regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
  1476 {
  1477   int type = dfa->nodes[cur_node].type;
  1478   if (type == OP_OPEN_SUBEXP)
  1479     {
  1480       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
  1481 
  1482       /* We are at the first node of this sub expression.  */
  1483       if (reg_num < nmatch)
  1484         {
  1485           pmatch[reg_num].rm_so = cur_idx;
  1486           pmatch[reg_num].rm_eo = -1;
  1487         }
  1488     }
  1489   else if (type == OP_CLOSE_SUBEXP)
  1490     {
  1491       /* We are at the last node of this sub expression.  */
  1492       Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
  1493       if (reg_num < nmatch)
  1494         {
  1495           if (pmatch[reg_num].rm_so < cur_idx)
  1496             {
  1497               pmatch[reg_num].rm_eo = cur_idx;
  1498               /* This is a non-empty match or we are not inside an optional
  1499                  subexpression.  Accept this right away.  */
  1500               memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
  1501             }
  1502           else
  1503             {
  1504               if (dfa->nodes[cur_node].opt_subexp
  1505                   && prev_idx_match[reg_num].rm_so != -1)
  1506                 /* We transited through an empty match for an optional
  1507                    subexpression, like (a?)*, and this is not the subexp's
  1508                    first match.  Copy back the old content of the registers
  1509                    so that matches of an inner subexpression are undone as
  1510                    well, like in ((a?))*.  */
  1511                 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
  1512               else
  1513                 /* We completed a subexpression, but it may be part of
  1514                    an optional one, so do not update PREV_IDX_MATCH.  */
  1515                 pmatch[reg_num].rm_eo = cur_idx;
  1516             }
  1517         }
  1518     }
  1519 }
  1520 
  1521 /* This function checks the STATE_LOG from the SCTX->last_str_idx to 0
  1522    and sift the nodes in each states according to the following rules.
  1523    Updated state_log will be wrote to STATE_LOG.
  1524 
  1525    Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if...
  1526      1. When STR_IDX == MATCH_LAST(the last index in the state_log):
  1527         If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to
  1528         the LAST_NODE, we throw away the node 'a'.
  1529      2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts
  1530         string 's' and transit to 'b':
  1531         i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw
  1532            away the node 'a'.
  1533         ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is
  1534             thrown away, we throw away the node 'a'.
  1535      3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b':
  1536         i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the
  1537            node 'a'.
  1538         ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away,
  1539             we throw away the node 'a'.  */
  1540 
  1541 #define STATE_NODE_CONTAINS(state,node) \
  1542   ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
  1543 
  1544 static reg_errcode_t
  1545 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
  1546 {
  1547   reg_errcode_t err;
  1548   int null_cnt = 0;
  1549   Idx str_idx = sctx->last_str_idx;
  1550   re_node_set cur_dest;
  1551 
  1552   DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
  1553 
  1554   /* Build sifted state_log[str_idx].  It has the nodes which can epsilon
  1555      transit to the last_node and the last_node itself.  */
  1556   err = re_node_set_init_1 (&cur_dest, sctx->last_node);
  1557   if (__glibc_unlikely (err != REG_NOERROR))
  1558     return err;
  1559   err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
  1560   if (__glibc_unlikely (err != REG_NOERROR))
  1561     goto free_return;
  1562 
  1563   /* Then check each states in the state_log.  */
  1564   while (str_idx > 0)
  1565     {
  1566       /* Update counters.  */
  1567       null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
  1568       if (null_cnt > mctx->max_mb_elem_len)
  1569         {
  1570           memset (sctx->sifted_states, '\0',
  1571                   sizeof (re_dfastate_t *) * str_idx);
  1572           re_node_set_free (&cur_dest);
  1573           return REG_NOERROR;
  1574         }
  1575       re_node_set_empty (&cur_dest);
  1576       --str_idx;
  1577 
  1578       if (mctx->state_log[str_idx])
  1579         {
  1580           err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
  1581           if (__glibc_unlikely (err != REG_NOERROR))
  1582             goto free_return;
  1583         }
  1584 
  1585       /* Add all the nodes which satisfy the following conditions:
  1586          - It can epsilon transit to a node in CUR_DEST.
  1587          - It is in CUR_SRC.
  1588          And update state_log.  */
  1589       err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
  1590       if (__glibc_unlikely (err != REG_NOERROR))
  1591         goto free_return;
  1592     }
  1593   err = REG_NOERROR;
  1594  free_return:
  1595   re_node_set_free (&cur_dest);
  1596   return err;
  1597 }
  1598 
  1599 static reg_errcode_t
  1600 __attribute_warn_unused_result__
  1601 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
  1602                      Idx str_idx, re_node_set *cur_dest)
  1603 {
  1604   const re_dfa_t *const dfa = mctx->dfa;
  1605   const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
  1606   Idx i;
  1607 
  1608   /* Then build the next sifted state.
  1609      We build the next sifted state on 'cur_dest', and update
  1610      'sifted_states[str_idx]' with 'cur_dest'.
  1611      Note:
  1612      'cur_dest' is the sifted state from 'state_log[str_idx + 1]'.
  1613      'cur_src' points the node_set of the old 'state_log[str_idx]'
  1614      (with the epsilon nodes pre-filtered out).  */
  1615   for (i = 0; i < cur_src->nelem; i++)
  1616     {
  1617       Idx prev_node = cur_src->elems[i];
  1618       int naccepted = 0;
  1619       bool ok;
  1620       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
  1621 
  1622       /* If the node may accept "multi byte".  */
  1623       if (dfa->nodes[prev_node].accept_mb)
  1624         naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
  1625                                          str_idx, sctx->last_str_idx);
  1626 
  1627       /* We don't check backreferences here.
  1628          See update_cur_sifted_state().  */
  1629       if (!naccepted
  1630           && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
  1631           && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
  1632                                   dfa->nexts[prev_node]))
  1633         naccepted = 1;
  1634 
  1635       if (naccepted == 0)
  1636         continue;
  1637 
  1638       if (sctx->limits.nelem)
  1639         {
  1640           Idx to_idx = str_idx + naccepted;
  1641           if (check_dst_limits (mctx, &sctx->limits,
  1642                                 dfa->nexts[prev_node], to_idx,
  1643                                 prev_node, str_idx))
  1644             continue;
  1645         }
  1646       ok = re_node_set_insert (cur_dest, prev_node);
  1647       if (__glibc_unlikely (! ok))
  1648         return REG_ESPACE;
  1649     }
  1650 
  1651   return REG_NOERROR;
  1652 }
  1653 
  1654 /* Helper functions.  */
  1655 
  1656 static reg_errcode_t
  1657 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
  1658 {
  1659   Idx top = mctx->state_log_top;
  1660 
  1661   if ((next_state_log_idx >= mctx->input.bufs_len
  1662        && mctx->input.bufs_len < mctx->input.len)
  1663       || (next_state_log_idx >= mctx->input.valid_len
  1664           && mctx->input.valid_len < mctx->input.len))
  1665     {
  1666       reg_errcode_t err;
  1667       err = extend_buffers (mctx, next_state_log_idx + 1);
  1668       if (__glibc_unlikely (err != REG_NOERROR))
  1669         return err;
  1670     }
  1671 
  1672   if (top < next_state_log_idx)
  1673     {
  1674       DEBUG_ASSERT (mctx->state_log != NULL);
  1675       memset (mctx->state_log + top + 1, '\0',
  1676               sizeof (re_dfastate_t *) * (next_state_log_idx - top));
  1677       mctx->state_log_top = next_state_log_idx;
  1678     }
  1679   return REG_NOERROR;
  1680 }
  1681 
  1682 static reg_errcode_t
  1683 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
  1684                    re_dfastate_t **src, Idx num)
  1685 {
  1686   Idx st_idx;
  1687   reg_errcode_t err;
  1688   for (st_idx = 0; st_idx < num; ++st_idx)
  1689     {
  1690       if (dst[st_idx] == NULL)
  1691         dst[st_idx] = src[st_idx];
  1692       else if (src[st_idx] != NULL)
  1693         {
  1694           re_node_set merged_set;
  1695           err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
  1696                                         &src[st_idx]->nodes);
  1697           if (__glibc_unlikely (err != REG_NOERROR))
  1698             return err;
  1699           dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
  1700           re_node_set_free (&merged_set);
  1701           if (__glibc_unlikely (err != REG_NOERROR))
  1702             return err;
  1703         }
  1704     }
  1705   return REG_NOERROR;
  1706 }
  1707 
  1708 static reg_errcode_t
  1709 update_cur_sifted_state (const re_match_context_t *mctx,
  1710                          re_sift_context_t *sctx, Idx str_idx,
  1711                          re_node_set *dest_nodes)
  1712 {
  1713   const re_dfa_t *const dfa = mctx->dfa;
  1714   reg_errcode_t err = REG_NOERROR;
  1715   const re_node_set *candidates;
  1716   candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
  1717                 : &mctx->state_log[str_idx]->nodes);
  1718 
  1719   if (dest_nodes->nelem == 0)
  1720     sctx->sifted_states[str_idx] = NULL;
  1721   else
  1722     {
  1723       if (candidates)
  1724         {
  1725           /* At first, add the nodes which can epsilon transit to a node in
  1726              DEST_NODE.  */
  1727           err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
  1728           if (__glibc_unlikely (err != REG_NOERROR))
  1729             return err;
  1730 
  1731           /* Then, check the limitations in the current sift_context.  */
  1732           if (sctx->limits.nelem)
  1733             {
  1734               err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
  1735                                          mctx->bkref_ents, str_idx);
  1736               if (__glibc_unlikely (err != REG_NOERROR))
  1737                 return err;
  1738             }
  1739         }
  1740 
  1741       sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
  1742       if (__glibc_unlikely (err != REG_NOERROR))
  1743         return err;
  1744     }
  1745 
  1746   if (candidates && mctx->state_log[str_idx]->has_backref)
  1747     {
  1748       err = sift_states_bkref (mctx, sctx, str_idx, candidates);
  1749       if (__glibc_unlikely (err != REG_NOERROR))
  1750         return err;
  1751     }
  1752   return REG_NOERROR;
  1753 }
  1754 
  1755 static reg_errcode_t
  1756 __attribute_warn_unused_result__
  1757 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
  1758                        const re_node_set *candidates)
  1759 {
  1760   reg_errcode_t err = REG_NOERROR;
  1761   Idx i;
  1762 
  1763   re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
  1764   if (__glibc_unlikely (err != REG_NOERROR))
  1765     return err;
  1766 
  1767   if (!state->inveclosure.alloc)
  1768     {
  1769       err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
  1770       if (__glibc_unlikely (err != REG_NOERROR))
  1771         return REG_ESPACE;
  1772       for (i = 0; i < dest_nodes->nelem; i++)
  1773         {
  1774           err = re_node_set_merge (&state->inveclosure,
  1775                                    dfa->inveclosures + dest_nodes->elems[i]);
  1776           if (__glibc_unlikely (err != REG_NOERROR))
  1777             return REG_ESPACE;
  1778         }
  1779     }
  1780   return re_node_set_add_intersect (dest_nodes, candidates,
  1781                                     &state->inveclosure);
  1782 }
  1783 
  1784 static reg_errcode_t
  1785 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
  1786                        const re_node_set *candidates)
  1787 {
  1788     Idx ecl_idx;
  1789     reg_errcode_t err;
  1790     re_node_set *inv_eclosure = dfa->inveclosures + node;
  1791     re_node_set except_nodes;
  1792     re_node_set_init_empty (&except_nodes);
  1793     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
  1794       {
  1795         Idx cur_node = inv_eclosure->elems[ecl_idx];
  1796         if (cur_node == node)
  1797           continue;
  1798         if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
  1799           {
  1800             Idx edst1 = dfa->edests[cur_node].elems[0];
  1801             Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
  1802                          ? dfa->edests[cur_node].elems[1] : -1);
  1803             if ((!re_node_set_contains (inv_eclosure, edst1)
  1804                  && re_node_set_contains (dest_nodes, edst1))
  1805                 || (edst2 > 0
  1806                     && !re_node_set_contains (inv_eclosure, edst2)
  1807                     && re_node_set_contains (dest_nodes, edst2)))
  1808               {
  1809                 err = re_node_set_add_intersect (&except_nodes, candidates,
  1810                                                  dfa->inveclosures + cur_node);
  1811                 if (__glibc_unlikely (err != REG_NOERROR))
  1812                   {
  1813                     re_node_set_free (&except_nodes);
  1814                     return err;
  1815                   }
  1816               }
  1817           }
  1818       }
  1819     for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
  1820       {
  1821         Idx cur_node = inv_eclosure->elems[ecl_idx];
  1822         if (!re_node_set_contains (&except_nodes, cur_node))
  1823           {
  1824             Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
  1825             re_node_set_remove_at (dest_nodes, idx);
  1826           }
  1827       }
  1828     re_node_set_free (&except_nodes);
  1829     return REG_NOERROR;
  1830 }
  1831 
  1832 static bool
  1833 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
  1834                   Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
  1835 {
  1836   const re_dfa_t *const dfa = mctx->dfa;
  1837   Idx lim_idx, src_pos, dst_pos;
  1838 
  1839   Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
  1840   Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
  1841   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
  1842     {
  1843       Idx subexp_idx;
  1844       struct re_backref_cache_entry *ent;
  1845       ent = mctx->bkref_ents + limits->elems[lim_idx];
  1846       subexp_idx = dfa->nodes[ent->node].opr.idx;
  1847 
  1848       dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
  1849                                            subexp_idx, dst_node, dst_idx,
  1850                                            dst_bkref_idx);
  1851       src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
  1852                                            subexp_idx, src_node, src_idx,
  1853                                            src_bkref_idx);
  1854 
  1855       /* In case of:
  1856          <src> <dst> ( <subexp> )
  1857          ( <subexp> ) <src> <dst>
  1858          ( <subexp1> <src> <subexp2> <dst> <subexp3> )  */
  1859       if (src_pos == dst_pos)
  1860         continue; /* This is unrelated limitation.  */
  1861       else
  1862         return true;
  1863     }
  1864   return false;
  1865 }
  1866 
  1867 static int
  1868 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
  1869                              Idx subexp_idx, Idx from_node, Idx bkref_idx)
  1870 {
  1871   const re_dfa_t *const dfa = mctx->dfa;
  1872   const re_node_set *eclosures = dfa->eclosures + from_node;
  1873   Idx node_idx;
  1874 
  1875   /* Else, we are on the boundary: examine the nodes on the epsilon
  1876      closure.  */
  1877   for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
  1878     {
  1879       Idx node = eclosures->elems[node_idx];
  1880       switch (dfa->nodes[node].type)
  1881         {
  1882         case OP_BACK_REF:
  1883           if (bkref_idx != -1)
  1884             {
  1885               struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
  1886               do
  1887                 {
  1888                   Idx dst;
  1889                   int cpos;
  1890 
  1891                   if (ent->node != node)
  1892                     continue;
  1893 
  1894                   if (subexp_idx < BITSET_WORD_BITS
  1895                       && !(ent->eps_reachable_subexps_map
  1896                            & ((bitset_word_t) 1 << subexp_idx)))
  1897                     continue;
  1898 
  1899                   /* Recurse trying to reach the OP_OPEN_SUBEXP and
  1900                      OP_CLOSE_SUBEXP cases below.  But, if the
  1901                      destination node is the same node as the source
  1902                      node, don't recurse because it would cause an
  1903                      infinite loop: a regex that exhibits this behavior
  1904                      is ()\1*\1*  */
  1905                   dst = dfa->edests[node].elems[0];
  1906                   if (dst == from_node)
  1907                     {
  1908                       if (boundaries & 1)
  1909                         return -1;
  1910                       else /* if (boundaries & 2) */
  1911                         return 0;
  1912                     }
  1913 
  1914                   cpos =
  1915                     check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
  1916                                                  dst, bkref_idx);
  1917                   if (cpos == -1 /* && (boundaries & 1) */)
  1918                     return -1;
  1919                   if (cpos == 0 && (boundaries & 2))
  1920                     return 0;
  1921 
  1922                   if (subexp_idx < BITSET_WORD_BITS)
  1923                     ent->eps_reachable_subexps_map
  1924                       &= ~((bitset_word_t) 1 << subexp_idx);
  1925                 }
  1926               while (ent++->more);
  1927             }
  1928           break;
  1929 
  1930         case OP_OPEN_SUBEXP:
  1931           if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
  1932             return -1;
  1933           break;
  1934 
  1935         case OP_CLOSE_SUBEXP:
  1936           if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
  1937             return 0;
  1938           break;
  1939 
  1940         default:
  1941             break;
  1942         }
  1943     }
  1944 
  1945   return (boundaries & 2) ? 1 : 0;
  1946 }
  1947 
  1948 static int
  1949 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
  1950                            Idx subexp_idx, Idx from_node, Idx str_idx,
  1951                            Idx bkref_idx)
  1952 {
  1953   struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
  1954   int boundaries;
  1955 
  1956   /* If we are outside the range of the subexpression, return -1 or 1.  */
  1957   if (str_idx < lim->subexp_from)
  1958     return -1;
  1959 
  1960   if (lim->subexp_to < str_idx)
  1961     return 1;
  1962 
  1963   /* If we are within the subexpression, return 0.  */
  1964   boundaries = (str_idx == lim->subexp_from);
  1965   boundaries |= (str_idx == lim->subexp_to) << 1;
  1966   if (boundaries == 0)
  1967     return 0;
  1968 
  1969   /* Else, examine epsilon closure.  */
  1970   return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
  1971                                       from_node, bkref_idx);
  1972 }
  1973 
  1974 /* Check the limitations of sub expressions LIMITS, and remove the nodes
  1975    which are against limitations from DEST_NODES. */
  1976 
  1977 static reg_errcode_t
  1978 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
  1979                      const re_node_set *candidates, re_node_set *limits,
  1980                      struct re_backref_cache_entry *bkref_ents, Idx str_idx)
  1981 {
  1982   reg_errcode_t err;
  1983   Idx node_idx, lim_idx;
  1984 
  1985   for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
  1986     {
  1987       Idx subexp_idx;
  1988       struct re_backref_cache_entry *ent;
  1989       ent = bkref_ents + limits->elems[lim_idx];
  1990 
  1991       if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
  1992         continue; /* This is unrelated limitation.  */
  1993 
  1994       subexp_idx = dfa->nodes[ent->node].opr.idx;
  1995       if (ent->subexp_to == str_idx)
  1996         {
  1997           Idx ops_node = -1;
  1998           Idx cls_node = -1;
  1999           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
  2000             {
  2001               Idx node = dest_nodes->elems[node_idx];
  2002               re_token_type_t type = dfa->nodes[node].type;
  2003               if (type == OP_OPEN_SUBEXP
  2004                   && subexp_idx == dfa->nodes[node].opr.idx)
  2005                 ops_node = node;
  2006               else if (type == OP_CLOSE_SUBEXP
  2007                        && subexp_idx == dfa->nodes[node].opr.idx)
  2008                 cls_node = node;
  2009             }
  2010 
  2011           /* Check the limitation of the open subexpression.  */
  2012           /* Note that (ent->subexp_to = str_idx != ent->subexp_from).  */
  2013           if (ops_node >= 0)
  2014             {
  2015               err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
  2016                                            candidates);
  2017               if (__glibc_unlikely (err != REG_NOERROR))
  2018                 return err;
  2019             }
  2020 
  2021           /* Check the limitation of the close subexpression.  */
  2022           if (cls_node >= 0)
  2023             for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
  2024               {
  2025                 Idx node = dest_nodes->elems[node_idx];
  2026                 if (!re_node_set_contains (dfa->inveclosures + node,
  2027                                            cls_node)
  2028                     && !re_node_set_contains (dfa->eclosures + node,
  2029                                               cls_node))
  2030                   {
  2031                     /* It is against this limitation.
  2032                        Remove it form the current sifted state.  */
  2033                     err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
  2034                                                  candidates);
  2035                     if (__glibc_unlikely (err != REG_NOERROR))
  2036                       return err;
  2037                     --node_idx;
  2038                   }
  2039               }
  2040         }
  2041       else /* (ent->subexp_to != str_idx)  */
  2042         {
  2043           for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
  2044             {
  2045               Idx node = dest_nodes->elems[node_idx];
  2046               re_token_type_t type = dfa->nodes[node].type;
  2047               if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
  2048                 {
  2049                   if (subexp_idx != dfa->nodes[node].opr.idx)
  2050                     continue;
  2051                   /* It is against this limitation.
  2052                      Remove it form the current sifted state.  */
  2053                   err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
  2054                                                candidates);
  2055                   if (__glibc_unlikely (err != REG_NOERROR))
  2056                     return err;
  2057                 }
  2058             }
  2059         }
  2060     }
  2061   return REG_NOERROR;
  2062 }
  2063 
  2064 static reg_errcode_t
  2065 __attribute_warn_unused_result__
  2066 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
  2067                    Idx str_idx, const re_node_set *candidates)
  2068 {
  2069   const re_dfa_t *const dfa = mctx->dfa;
  2070   reg_errcode_t err;
  2071   Idx node_idx, node;
  2072   re_sift_context_t local_sctx;
  2073   Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
  2074 
  2075   if (first_idx == -1)
  2076     return REG_NOERROR;
  2077 
  2078   local_sctx.sifted_states = NULL; /* Mark that it hasn't been initialized.  */
  2079 
  2080   for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
  2081     {
  2082       Idx enabled_idx;
  2083       re_token_type_t type;
  2084       struct re_backref_cache_entry *entry;
  2085       node = candidates->elems[node_idx];
  2086       type = dfa->nodes[node].type;
  2087       /* Avoid infinite loop for the REs like "()\1+".  */
  2088       if (node == sctx->last_node && str_idx == sctx->last_str_idx)
  2089         continue;
  2090       if (type != OP_BACK_REF)
  2091         continue;
  2092 
  2093       entry = mctx->bkref_ents + first_idx;
  2094       enabled_idx = first_idx;
  2095       do
  2096         {
  2097           Idx subexp_len;
  2098           Idx to_idx;
  2099           Idx dst_node;
  2100           bool ok;
  2101           re_dfastate_t *cur_state;
  2102 
  2103           if (entry->node != node)
  2104             continue;
  2105           subexp_len = entry->subexp_to - entry->subexp_from;
  2106           to_idx = str_idx + subexp_len;
  2107           dst_node = (subexp_len ? dfa->nexts[node]
  2108                       : dfa->edests[node].elems[0]);
  2109 
  2110           if (to_idx > sctx->last_str_idx
  2111               || sctx->sifted_states[to_idx] == NULL
  2112               || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
  2113               || check_dst_limits (mctx, &sctx->limits, node,
  2114                                    str_idx, dst_node, to_idx))
  2115             continue;
  2116 
  2117           if (local_sctx.sifted_states == NULL)
  2118             {
  2119               local_sctx = *sctx;
  2120               err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
  2121               if (__glibc_unlikely (err != REG_NOERROR))
  2122                 goto free_return;
  2123             }
  2124           local_sctx.last_node = node;
  2125           local_sctx.last_str_idx = str_idx;
  2126           ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
  2127           if (__glibc_unlikely (! ok))
  2128             {
  2129               err = REG_ESPACE;
  2130               goto free_return;
  2131             }
  2132           cur_state = local_sctx.sifted_states[str_idx];
  2133           err = sift_states_backward (mctx, &local_sctx);
  2134           if (__glibc_unlikely (err != REG_NOERROR))
  2135             goto free_return;
  2136           if (sctx->limited_states != NULL)
  2137             {
  2138               err = merge_state_array (dfa, sctx->limited_states,
  2139                                        local_sctx.sifted_states,
  2140                                        str_idx + 1);
  2141               if (__glibc_unlikely (err != REG_NOERROR))
  2142                 goto free_return;
  2143             }
  2144           local_sctx.sifted_states[str_idx] = cur_state;
  2145           re_node_set_remove (&local_sctx.limits, enabled_idx);
  2146 
  2147           /* mctx->bkref_ents may have changed, reload the pointer.  */
  2148           entry = mctx->bkref_ents + enabled_idx;
  2149         }
  2150       while (enabled_idx++, entry++->more);
  2151     }
  2152   err = REG_NOERROR;
  2153  free_return:
  2154   if (local_sctx.sifted_states != NULL)
  2155     {
  2156       re_node_set_free (&local_sctx.limits);
  2157     }
  2158 
  2159   return err;
  2160 }
  2161 
  2162 
  2163 static int
  2164 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
  2165                      Idx node_idx, Idx str_idx, Idx max_str_idx)
  2166 {
  2167   const re_dfa_t *const dfa = mctx->dfa;
  2168   int naccepted;
  2169   /* Check the node can accept "multi byte".  */
  2170   naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
  2171   if (naccepted > 0 && str_idx + naccepted <= max_str_idx
  2172       && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
  2173                                dfa->nexts[node_idx]))
  2174     /* The node can't accept the "multi byte", or the
  2175        destination was already thrown away, then the node
  2176        couldn't accept the current input "multi byte".   */
  2177     naccepted = 0;
  2178   /* Otherwise, it is sure that the node could accept
  2179      'naccepted' bytes input.  */
  2180   return naccepted;
  2181 }
  2182 
  2183 /* Functions for state transition.  */
  2184 
  2185 /* Return the next state to which the current state STATE will transit by
  2186    accepting the current input byte, and update STATE_LOG if necessary.
  2187    Return NULL on failure.
  2188    If STATE can accept a multibyte char/collating element/back reference
  2189    update the destination of STATE_LOG.  */
  2190 
  2191 static re_dfastate_t *
  2192 __attribute_warn_unused_result__
  2193 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
  2194                re_dfastate_t *state)
  2195 {
  2196   re_dfastate_t **trtable;
  2197   unsigned char ch;
  2198 
  2199   /* If the current state can accept multibyte.  */
  2200   if (__glibc_unlikely (state->accept_mb))
  2201     {
  2202       *err = transit_state_mb (mctx, state);
  2203       if (__glibc_unlikely (*err != REG_NOERROR))
  2204         return NULL;
  2205     }
  2206 
  2207   /* Then decide the next state with the single byte.  */
  2208 #if 0
  2209   if (0)
  2210     /* don't use transition table  */
  2211     return transit_state_sb (err, mctx, state);
  2212 #endif
  2213 
  2214   /* Use transition table  */
  2215   ch = re_string_fetch_byte (&mctx->input);
  2216   for (;;)
  2217     {
  2218       trtable = state->trtable;
  2219       if (__glibc_likely (trtable != NULL))
  2220         return trtable[ch];
  2221 
  2222       trtable = state->word_trtable;
  2223       if (__glibc_likely (trtable != NULL))
  2224         {
  2225           unsigned int context;
  2226           context
  2227             = re_string_context_at (&mctx->input,
  2228                                     re_string_cur_idx (&mctx->input) - 1,
  2229                                     mctx->eflags);
  2230           if (IS_WORD_CONTEXT (context))
  2231             return trtable[ch + SBC_MAX];
  2232           else
  2233             return trtable[ch];
  2234         }
  2235 
  2236       if (!build_trtable (mctx->dfa, state))
  2237         {
  2238           *err = REG_ESPACE;
  2239           return NULL;
  2240         }
  2241 
  2242       /* Retry, we now have a transition table.  */
  2243     }
  2244 }
  2245 
  2246 /* Update the state_log if we need */
  2247 static re_dfastate_t *
  2248 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
  2249                       re_dfastate_t *next_state)
  2250 {
  2251   const re_dfa_t *const dfa = mctx->dfa;
  2252   Idx cur_idx = re_string_cur_idx (&mctx->input);
  2253 
  2254   if (cur_idx > mctx->state_log_top)
  2255     {
  2256       mctx->state_log[cur_idx] = next_state;
  2257       mctx->state_log_top = cur_idx;
  2258     }
  2259   else if (mctx->state_log[cur_idx] == 0)
  2260     {
  2261       mctx->state_log[cur_idx] = next_state;
  2262     }
  2263   else
  2264     {
  2265       re_dfastate_t *pstate;
  2266       unsigned int context;
  2267       re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
  2268       /* If (state_log[cur_idx] != 0), it implies that cur_idx is
  2269          the destination of a multibyte char/collating element/
  2270          back reference.  Then the next state is the union set of
  2271          these destinations and the results of the transition table.  */
  2272       pstate = mctx->state_log[cur_idx];
  2273       log_nodes = pstate->entrance_nodes;
  2274       if (next_state != NULL)
  2275         {
  2276           table_nodes = next_state->entrance_nodes;
  2277           *err = re_node_set_init_union (&next_nodes, table_nodes,
  2278                                              log_nodes);
  2279           if (__glibc_unlikely (*err != REG_NOERROR))
  2280             return NULL;
  2281         }
  2282       else
  2283         next_nodes = *log_nodes;
  2284       /* Note: We already add the nodes of the initial state,
  2285          then we don't need to add them here.  */
  2286 
  2287       context = re_string_context_at (&mctx->input,
  2288                                       re_string_cur_idx (&mctx->input) - 1,
  2289                                       mctx->eflags);
  2290       next_state = mctx->state_log[cur_idx]
  2291         = re_acquire_state_context (err, dfa, &next_nodes, context);
  2292       /* We don't need to check errors here, since the return value of
  2293          this function is next_state and ERR is already set.  */
  2294 
  2295       if (table_nodes != NULL)
  2296         re_node_set_free (&next_nodes);
  2297     }
  2298 
  2299   if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
  2300     {
  2301       /* Check OP_OPEN_SUBEXP in the current state in case that we use them
  2302          later.  We must check them here, since the back references in the
  2303          next state might use them.  */
  2304       *err = check_subexp_matching_top (mctx, &next_state->nodes,
  2305                                         cur_idx);
  2306       if (__glibc_unlikely (*err != REG_NOERROR))
  2307         return NULL;
  2308 
  2309       /* If the next state has back references.  */
  2310       if (next_state->has_backref)
  2311         {
  2312           *err = transit_state_bkref (mctx, &next_state->nodes);
  2313           if (__glibc_unlikely (*err != REG_NOERROR))
  2314             return NULL;
  2315           next_state = mctx->state_log[cur_idx];
  2316         }
  2317     }
  2318 
  2319   return next_state;
  2320 }
  2321 
  2322 /* Skip bytes in the input that correspond to part of a
  2323    multi-byte match, then look in the log for a state
  2324    from which to restart matching.  */
  2325 static re_dfastate_t *
  2326 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
  2327 {
  2328   re_dfastate_t *cur_state;
  2329   do
  2330     {
  2331       Idx max = mctx->state_log_top;
  2332       Idx cur_str_idx = re_string_cur_idx (&mctx->input);
  2333 
  2334       do
  2335         {
  2336           if (++cur_str_idx > max)
  2337             return NULL;
  2338           re_string_skip_bytes (&mctx->input, 1);
  2339         }
  2340       while (mctx->state_log[cur_str_idx] == NULL);
  2341 
  2342       cur_state = merge_state_with_log (err, mctx, NULL);
  2343     }
  2344   while (*err == REG_NOERROR && cur_state == NULL);
  2345   return cur_state;
  2346 }
  2347 
  2348 /* Helper functions for transit_state.  */
  2349 
  2350 /* From the node set CUR_NODES, pick up the nodes whose types are
  2351    OP_OPEN_SUBEXP and which have corresponding back references in the regular
  2352    expression. And register them to use them later for evaluating the
  2353    corresponding back references.  */
  2354 
  2355 static reg_errcode_t
  2356 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
  2357                            Idx str_idx)
  2358 {
  2359   const re_dfa_t *const dfa = mctx->dfa;
  2360   Idx node_idx;
  2361   reg_errcode_t err;
  2362 
  2363   /* TODO: This isn't efficient.
  2364            Because there might be more than one nodes whose types are
  2365            OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
  2366            nodes.
  2367            E.g. RE: (a){2}  */
  2368   for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
  2369     {
  2370       Idx node = cur_nodes->elems[node_idx];
  2371       if (dfa->nodes[node].type == OP_OPEN_SUBEXP
  2372           && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
  2373           && (dfa->used_bkref_map
  2374               & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
  2375         {
  2376           err = match_ctx_add_subtop (mctx, node, str_idx);
  2377           if (__glibc_unlikely (err != REG_NOERROR))
  2378             return err;
  2379         }
  2380     }
  2381   return REG_NOERROR;
  2382 }
  2383 
  2384 #if 0
  2385 /* Return the next state to which the current state STATE will transit by
  2386    accepting the current input byte.  Return NULL on failure.  */
  2387 
  2388 static re_dfastate_t *
  2389 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
  2390                   re_dfastate_t *state)
  2391 {
  2392   const re_dfa_t *const dfa = mctx->dfa;
  2393   re_node_set next_nodes;
  2394   re_dfastate_t *next_state;
  2395   Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
  2396   unsigned int context;
  2397 
  2398   *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
  2399   if (__glibc_unlikely (*err != REG_NOERROR))
  2400     return NULL;
  2401   for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
  2402     {
  2403       Idx cur_node = state->nodes.elems[node_cnt];
  2404       if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
  2405         {
  2406           *err = re_node_set_merge (&next_nodes,
  2407                                     dfa->eclosures + dfa->nexts[cur_node]);
  2408           if (__glibc_unlikely (*err != REG_NOERROR))
  2409             {
  2410               re_node_set_free (&next_nodes);
  2411               return NULL;
  2412             }
  2413         }
  2414     }
  2415   context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
  2416   next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
  2417   /* We don't need to check errors here, since the return value of
  2418      this function is next_state and ERR is already set.  */
  2419 
  2420   re_node_set_free (&next_nodes);
  2421   re_string_skip_bytes (&mctx->input, 1);
  2422   return next_state;
  2423 }
  2424 #endif
  2425 
  2426 static reg_errcode_t
  2427 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
  2428 {
  2429   const re_dfa_t *const dfa = mctx->dfa;
  2430   reg_errcode_t err;
  2431   Idx i;
  2432 
  2433   for (i = 0; i < pstate->nodes.nelem; ++i)
  2434     {
  2435       re_node_set dest_nodes, *new_nodes;
  2436       Idx cur_node_idx = pstate->nodes.elems[i];
  2437       int naccepted;
  2438       Idx dest_idx;
  2439       unsigned int context;
  2440       re_dfastate_t *dest_state;
  2441 
  2442       if (!dfa->nodes[cur_node_idx].accept_mb)
  2443         continue;
  2444 
  2445       if (dfa->nodes[cur_node_idx].constraint)
  2446         {
  2447           context = re_string_context_at (&mctx->input,
  2448                                           re_string_cur_idx (&mctx->input),
  2449                                           mctx->eflags);
  2450           if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
  2451                                            context))
  2452             continue;
  2453         }
  2454 
  2455       /* How many bytes the node can accept?  */
  2456       naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
  2457                                            re_string_cur_idx (&mctx->input));
  2458       if (naccepted == 0)
  2459         continue;
  2460 
  2461       /* The node can accepts 'naccepted' bytes.  */
  2462       dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
  2463       mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
  2464                                : mctx->max_mb_elem_len);
  2465       err = clean_state_log_if_needed (mctx, dest_idx);
  2466       if (__glibc_unlikely (err != REG_NOERROR))
  2467         return err;
  2468       DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
  2469       new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
  2470 
  2471       dest_state = mctx->state_log[dest_idx];
  2472       if (dest_state == NULL)
  2473         dest_nodes = *new_nodes;
  2474       else
  2475         {
  2476           err = re_node_set_init_union (&dest_nodes,
  2477                                         dest_state->entrance_nodes, new_nodes);
  2478           if (__glibc_unlikely (err != REG_NOERROR))
  2479             return err;
  2480         }
  2481       context = re_string_context_at (&mctx->input, dest_idx - 1,
  2482                                       mctx->eflags);
  2483       mctx->state_log[dest_idx]
  2484         = re_acquire_state_context (&err, dfa, &dest_nodes, context);
  2485       if (dest_state != NULL)
  2486         re_node_set_free (&dest_nodes);
  2487       if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
  2488                             && err != REG_NOERROR))
  2489         return err;
  2490     }
  2491   return REG_NOERROR;
  2492 }
  2493 
  2494 static reg_errcode_t
  2495 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
  2496 {
  2497   const re_dfa_t *const dfa = mctx->dfa;
  2498   reg_errcode_t err;
  2499   Idx i;
  2500   Idx cur_str_idx = re_string_cur_idx (&mctx->input);
  2501 
  2502   for (i = 0; i < nodes->nelem; ++i)
  2503     {
  2504       Idx dest_str_idx, prev_nelem, bkc_idx;
  2505       Idx node_idx = nodes->elems[i];
  2506       unsigned int context;
  2507       const re_token_t *node = dfa->nodes + node_idx;
  2508       re_node_set *new_dest_nodes;
  2509 
  2510       /* Check whether 'node' is a backreference or not.  */
  2511       if (node->type != OP_BACK_REF)
  2512         continue;
  2513 
  2514       if (node->constraint)
  2515         {
  2516           context = re_string_context_at (&mctx->input, cur_str_idx,
  2517                                           mctx->eflags);
  2518           if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
  2519             continue;
  2520         }
  2521 
  2522       /* 'node' is a backreference.
  2523          Check the substring which the substring matched.  */
  2524       bkc_idx = mctx->nbkref_ents;
  2525       err = get_subexp (mctx, node_idx, cur_str_idx);
  2526       if (__glibc_unlikely (err != REG_NOERROR))
  2527         goto free_return;
  2528 
  2529       /* And add the epsilon closures (which is 'new_dest_nodes') of
  2530          the backreference to appropriate state_log.  */
  2531       DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
  2532       for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
  2533         {
  2534           Idx subexp_len;
  2535           re_dfastate_t *dest_state;
  2536           struct re_backref_cache_entry *bkref_ent;
  2537           bkref_ent = mctx->bkref_ents + bkc_idx;
  2538           if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
  2539             continue;
  2540           subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
  2541           new_dest_nodes = (subexp_len == 0
  2542                             ? dfa->eclosures + dfa->edests[node_idx].elems[0]
  2543                             : dfa->eclosures + dfa->nexts[node_idx]);
  2544           dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
  2545                           - bkref_ent->subexp_from);
  2546           context = re_string_context_at (&mctx->input, dest_str_idx - 1,
  2547                                           mctx->eflags);
  2548           dest_state = mctx->state_log[dest_str_idx];
  2549           prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
  2550                         : mctx->state_log[cur_str_idx]->nodes.nelem);
  2551           /* Add 'new_dest_node' to state_log.  */
  2552           if (dest_state == NULL)
  2553             {
  2554               mctx->state_log[dest_str_idx]
  2555                 = re_acquire_state_context (&err, dfa, new_dest_nodes,
  2556                                             context);
  2557               if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
  2558                                     && err != REG_NOERROR))
  2559                 goto free_return;
  2560             }
  2561           else
  2562             {
  2563               re_node_set dest_nodes;
  2564               err = re_node_set_init_union (&dest_nodes,
  2565                                             dest_state->entrance_nodes,
  2566                                             new_dest_nodes);
  2567               if (__glibc_unlikely (err != REG_NOERROR))
  2568                 {
  2569                   re_node_set_free (&dest_nodes);
  2570                   goto free_return;
  2571                 }
  2572               mctx->state_log[dest_str_idx]
  2573                 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
  2574               re_node_set_free (&dest_nodes);
  2575               if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
  2576                                     && err != REG_NOERROR))
  2577                 goto free_return;
  2578             }
  2579           /* We need to check recursively if the backreference can epsilon
  2580              transit.  */
  2581           if (subexp_len == 0
  2582               && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
  2583             {
  2584               err = check_subexp_matching_top (mctx, new_dest_nodes,
  2585                                                cur_str_idx);
  2586               if (__glibc_unlikely (err != REG_NOERROR))
  2587                 goto free_return;
  2588               err = transit_state_bkref (mctx, new_dest_nodes);
  2589               if (__glibc_unlikely (err != REG_NOERROR))
  2590                 goto free_return;
  2591             }
  2592         }
  2593     }
  2594   err = REG_NOERROR;
  2595  free_return:
  2596   return err;
  2597 }
  2598 
  2599 /* Enumerate all the candidates which the backreference BKREF_NODE can match
  2600    at BKREF_STR_IDX, and register them by match_ctx_add_entry().
  2601    Note that we might collect inappropriate candidates here.
  2602    However, the cost of checking them strictly here is too high, then we
  2603    delay these checking for prune_impossible_nodes().  */
  2604 
  2605 static reg_errcode_t
  2606 __attribute_warn_unused_result__
  2607 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
  2608 {
  2609   const re_dfa_t *const dfa = mctx->dfa;
  2610   Idx subexp_num, sub_top_idx;
  2611   const char *buf = (const char *) re_string_get_buffer (&mctx->input);
  2612   /* Return if we have already checked BKREF_NODE at BKREF_STR_IDX.  */
  2613   Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
  2614   if (cache_idx != -1)
  2615     {
  2616       const struct re_backref_cache_entry *entry
  2617         = mctx->bkref_ents + cache_idx;
  2618       do
  2619         if (entry->node == bkref_node)
  2620           return REG_NOERROR; /* We already checked it.  */
  2621       while (entry++->more);
  2622     }
  2623 
  2624   subexp_num = dfa->nodes[bkref_node].opr.idx;
  2625 
  2626   /* For each sub expression  */
  2627   for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
  2628     {
  2629       reg_errcode_t err;
  2630       re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
  2631       re_sub_match_last_t *sub_last;
  2632       Idx sub_last_idx, sl_str, bkref_str_off;
  2633 
  2634       if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
  2635         continue; /* It isn't related.  */
  2636 
  2637       sl_str = sub_top->str_idx;
  2638       bkref_str_off = bkref_str_idx;
  2639       /* At first, check the last node of sub expressions we already
  2640          evaluated.  */
  2641       for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
  2642         {
  2643           regoff_t sl_str_diff;
  2644           sub_last = sub_top->lasts[sub_last_idx];
  2645           sl_str_diff = sub_last->str_idx - sl_str;
  2646           /* The matched string by the sub expression match with the substring
  2647              at the back reference?  */
  2648           if (sl_str_diff > 0)
  2649             {
  2650               if (__glibc_unlikely (bkref_str_off + sl_str_diff
  2651                                     > mctx->input.valid_len))
  2652                 {
  2653                   /* Not enough chars for a successful match.  */
  2654                   if (bkref_str_off + sl_str_diff > mctx->input.len)
  2655                     break;
  2656 
  2657                   err = clean_state_log_if_needed (mctx,
  2658                                                    bkref_str_off
  2659                                                    + sl_str_diff);
  2660                   if (__glibc_unlikely (err != REG_NOERROR))
  2661                     return err;
  2662                   buf = (const char *) re_string_get_buffer (&mctx->input);
  2663                 }
  2664               if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
  2665                 /* We don't need to search this sub expression any more.  */
  2666                 break;
  2667             }
  2668           bkref_str_off += sl_str_diff;
  2669           sl_str += sl_str_diff;
  2670           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
  2671                                 bkref_str_idx);
  2672 
  2673           /* Reload buf, since the preceding call might have reallocated
  2674              the buffer.  */
  2675           buf = (const char *) re_string_get_buffer (&mctx->input);
  2676 
  2677           if (err == REG_NOMATCH)
  2678             continue;
  2679           if (__glibc_unlikely (err != REG_NOERROR))
  2680             return err;
  2681         }
  2682 
  2683       if (sub_last_idx < sub_top->nlasts)
  2684         continue;
  2685       if (sub_last_idx > 0)
  2686         ++sl_str;
  2687       /* Then, search for the other last nodes of the sub expression.  */
  2688       for (; sl_str <= bkref_str_idx; ++sl_str)
  2689         {
  2690           Idx cls_node;
  2691           regoff_t sl_str_off;
  2692           const re_node_set *nodes;
  2693           sl_str_off = sl_str - sub_top->str_idx;
  2694           /* The matched string by the sub expression match with the substring
  2695              at the back reference?  */
  2696           if (sl_str_off > 0)
  2697             {
  2698               if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
  2699                 {
  2700                   /* If we are at the end of the input, we cannot match.  */
  2701                   if (bkref_str_off >= mctx->input.len)
  2702                     break;
  2703 
  2704                   err = extend_buffers (mctx, bkref_str_off + 1);
  2705                   if (__glibc_unlikely (err != REG_NOERROR))
  2706                     return err;
  2707 
  2708                   buf = (const char *) re_string_get_buffer (&mctx->input);
  2709                 }
  2710               if (buf [bkref_str_off++] != buf[sl_str - 1])
  2711                 break; /* We don't need to search this sub expression
  2712                           any more.  */
  2713             }
  2714           if (mctx->state_log[sl_str] == NULL)
  2715             continue;
  2716           /* Does this state have a ')' of the sub expression?  */
  2717           nodes = &mctx->state_log[sl_str]->nodes;
  2718           cls_node = find_subexp_node (dfa, nodes, subexp_num,
  2719                                        OP_CLOSE_SUBEXP);
  2720           if (cls_node == -1)
  2721             continue; /* No.  */
  2722           if (sub_top->path == NULL)
  2723             {
  2724               sub_top->path = calloc (sizeof (state_array_t),
  2725                                       sl_str - sub_top->str_idx + 1);
  2726               if (sub_top->path == NULL)
  2727                 return REG_ESPACE;
  2728             }
  2729           /* Can the OP_OPEN_SUBEXP node arrive the OP_CLOSE_SUBEXP node
  2730              in the current context?  */
  2731           err = check_arrival (mctx, sub_top->path, sub_top->node,
  2732                                sub_top->str_idx, cls_node, sl_str,
  2733                                OP_CLOSE_SUBEXP);
  2734           if (err == REG_NOMATCH)
  2735               continue;
  2736           if (__glibc_unlikely (err != REG_NOERROR))
  2737               return err;
  2738           sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
  2739           if (__glibc_unlikely (sub_last == NULL))
  2740             return REG_ESPACE;
  2741           err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
  2742                                 bkref_str_idx);
  2743           buf = (const char *) re_string_get_buffer (&mctx->input);
  2744           if (err == REG_NOMATCH)
  2745             continue;
  2746           if (__glibc_unlikely (err != REG_NOERROR))
  2747             return err;
  2748         }
  2749     }
  2750   return REG_NOERROR;
  2751 }
  2752 
  2753 /* Helper functions for get_subexp().  */
  2754 
  2755 /* Check SUB_LAST can arrive to the back reference BKREF_NODE at BKREF_STR.
  2756    If it can arrive, register the sub expression expressed with SUB_TOP
  2757    and SUB_LAST.  */
  2758 
  2759 static reg_errcode_t
  2760 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
  2761                 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
  2762 {
  2763   reg_errcode_t err;
  2764   Idx to_idx;
  2765   /* Can the subexpression arrive the back reference?  */
  2766   err = check_arrival (mctx, &sub_last->path, sub_last->node,
  2767                        sub_last->str_idx, bkref_node, bkref_str,
  2768                        OP_OPEN_SUBEXP);
  2769   if (err != REG_NOERROR)
  2770     return err;
  2771   err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
  2772                              sub_last->str_idx);
  2773   if (__glibc_unlikely (err != REG_NOERROR))
  2774     return err;
  2775   to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
  2776   return clean_state_log_if_needed (mctx, to_idx);
  2777 }
  2778 
  2779 /* Find the first node which is '(' or ')' and whose index is SUBEXP_IDX.
  2780    Search '(' if FL_OPEN, or search ')' otherwise.
  2781    TODO: This function isn't efficient...
  2782          Because there might be more than one nodes whose types are
  2783          OP_OPEN_SUBEXP and whose index is SUBEXP_IDX, we must check all
  2784          nodes.
  2785          E.g. RE: (a){2}  */
  2786 
  2787 static Idx
  2788 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
  2789                   Idx subexp_idx, int type)
  2790 {
  2791   Idx cls_idx;
  2792   for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
  2793     {
  2794       Idx cls_node = nodes->elems[cls_idx];
  2795       const re_token_t *node = dfa->nodes + cls_node;
  2796       if (node->type == type
  2797           && node->opr.idx == subexp_idx)
  2798         return cls_node;
  2799     }
  2800   return -1;
  2801 }
  2802 
  2803 /* Check whether the node TOP_NODE at TOP_STR can arrive to the node
  2804    LAST_NODE at LAST_STR.  We record the path onto PATH since it will be
  2805    heavily reused.
  2806    Return REG_NOERROR if it can arrive, REG_NOMATCH if it cannot,
  2807    REG_ESPACE if memory is exhausted.  */
  2808 
  2809 static reg_errcode_t
  2810 __attribute_warn_unused_result__
  2811 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
  2812                Idx top_str, Idx last_node, Idx last_str, int type)
  2813 {
  2814   const re_dfa_t *const dfa = mctx->dfa;
  2815   reg_errcode_t err = REG_NOERROR;
  2816   Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
  2817   re_dfastate_t *cur_state = NULL;
  2818   re_node_set *cur_nodes, next_nodes;
  2819   re_dfastate_t **backup_state_log;
  2820   unsigned int context;
  2821 
  2822   subexp_num = dfa->nodes[top_node].opr.idx;
  2823   /* Extend the buffer if we need.  */
  2824   if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
  2825     {
  2826       re_dfastate_t **new_array;
  2827       Idx old_alloc = path->alloc;
  2828       Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
  2829       Idx new_alloc;
  2830       if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
  2831         return REG_ESPACE;
  2832       new_alloc = old_alloc + incr_alloc;
  2833       if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
  2834         return REG_ESPACE;
  2835       new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
  2836       if (__glibc_unlikely (new_array == NULL))
  2837         return REG_ESPACE;
  2838       path->array = new_array;
  2839       path->alloc = new_alloc;
  2840       memset (new_array + old_alloc, '\0',
  2841               sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
  2842     }
  2843 
  2844   str_idx = path->next_idx ? path->next_idx : top_str;
  2845 
  2846   /* Temporary modify MCTX.  */
  2847   backup_state_log = mctx->state_log;
  2848   backup_cur_idx = mctx->input.cur_idx;
  2849   mctx->state_log = path->array;
  2850   mctx->input.cur_idx = str_idx;
  2851 
  2852   /* Setup initial node set.  */
  2853   context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
  2854   if (str_idx == top_str)
  2855     {
  2856       err = re_node_set_init_1 (&next_nodes, top_node);
  2857       if (__glibc_unlikely (err != REG_NOERROR))
  2858         return err;
  2859       err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
  2860       if (__glibc_unlikely (err != REG_NOERROR))
  2861         {
  2862           re_node_set_free (&next_nodes);
  2863           return err;
  2864         }
  2865     }
  2866   else
  2867     {
  2868       cur_state = mctx->state_log[str_idx];
  2869       if (cur_state && cur_state->has_backref)
  2870         {
  2871           err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
  2872           if (__glibc_unlikely (err != REG_NOERROR))
  2873             return err;
  2874         }
  2875       else
  2876         re_node_set_init_empty (&next_nodes);
  2877     }
  2878   if (str_idx == top_str || (cur_state && cur_state->has_backref))
  2879     {
  2880       if (next_nodes.nelem)
  2881         {
  2882           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
  2883                                     subexp_num, type);
  2884           if (__glibc_unlikely (err != REG_NOERROR))
  2885             {
  2886               re_node_set_free (&next_nodes);
  2887               return err;
  2888             }
  2889         }
  2890       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
  2891       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
  2892         {
  2893           re_node_set_free (&next_nodes);
  2894           return err;
  2895         }
  2896       mctx->state_log[str_idx] = cur_state;
  2897     }
  2898 
  2899   for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
  2900     {
  2901       re_node_set_empty (&next_nodes);
  2902       if (mctx->state_log[str_idx + 1])
  2903         {
  2904           err = re_node_set_merge (&next_nodes,
  2905                                    &mctx->state_log[str_idx + 1]->nodes);
  2906           if (__glibc_unlikely (err != REG_NOERROR))
  2907             {
  2908               re_node_set_free (&next_nodes);
  2909               return err;
  2910             }
  2911         }
  2912       if (cur_state)
  2913         {
  2914           err = check_arrival_add_next_nodes (mctx, str_idx,
  2915                                               &cur_state->non_eps_nodes,
  2916                                               &next_nodes);
  2917           if (__glibc_unlikely (err != REG_NOERROR))
  2918             {
  2919               re_node_set_free (&next_nodes);
  2920               return err;
  2921             }
  2922         }
  2923       ++str_idx;
  2924       if (next_nodes.nelem)
  2925         {
  2926           err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
  2927           if (__glibc_unlikely (err != REG_NOERROR))
  2928             {
  2929               re_node_set_free (&next_nodes);
  2930               return err;
  2931             }
  2932           err = expand_bkref_cache (mctx, &next_nodes, str_idx,
  2933                                     subexp_num, type);
  2934           if (__glibc_unlikely (err != REG_NOERROR))
  2935             {
  2936               re_node_set_free (&next_nodes);
  2937               return err;
  2938             }
  2939         }
  2940       context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
  2941       cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
  2942       if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
  2943         {
  2944           re_node_set_free (&next_nodes);
  2945           return err;
  2946         }
  2947       mctx->state_log[str_idx] = cur_state;
  2948       null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
  2949     }
  2950   re_node_set_free (&next_nodes);
  2951   cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
  2952                : &mctx->state_log[last_str]->nodes);
  2953   path->next_idx = str_idx;
  2954 
  2955   /* Fix MCTX.  */
  2956   mctx->state_log = backup_state_log;
  2957   mctx->input.cur_idx = backup_cur_idx;
  2958 
  2959   /* Then check the current node set has the node LAST_NODE.  */
  2960   if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
  2961     return REG_NOERROR;
  2962 
  2963   return REG_NOMATCH;
  2964 }
  2965 
  2966 /* Helper functions for check_arrival.  */
  2967 
  2968 /* Calculate the destination nodes of CUR_NODES at STR_IDX, and append them
  2969    to NEXT_NODES.
  2970    TODO: This function is similar to the functions transit_state*(),
  2971          however this function has many additional works.
  2972          Can't we unify them?  */
  2973 
  2974 static reg_errcode_t
  2975 __attribute_warn_unused_result__
  2976 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
  2977                               re_node_set *cur_nodes, re_node_set *next_nodes)
  2978 {
  2979   const re_dfa_t *const dfa = mctx->dfa;
  2980   bool ok;
  2981   Idx cur_idx;
  2982   reg_errcode_t err = REG_NOERROR;
  2983   re_node_set union_set;
  2984   re_node_set_init_empty (&union_set);
  2985   for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
  2986     {
  2987       int naccepted = 0;
  2988       Idx cur_node = cur_nodes->elems[cur_idx];
  2989       DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
  2990 
  2991       /* If the node may accept "multi byte".  */
  2992       if (dfa->nodes[cur_node].accept_mb)
  2993         {
  2994           naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
  2995                                                str_idx);
  2996           if (naccepted > 1)
  2997             {
  2998               re_dfastate_t *dest_state;
  2999               Idx next_node = dfa->nexts[cur_node];
  3000               Idx next_idx = str_idx + naccepted;
  3001               dest_state = mctx->state_log[next_idx];
  3002               re_node_set_empty (&union_set);
  3003               if (dest_state)
  3004                 {
  3005                   err = re_node_set_merge (&union_set, &dest_state->nodes);
  3006                   if (__glibc_unlikely (err != REG_NOERROR))
  3007                     {
  3008                       re_node_set_free (&union_set);
  3009                       return err;
  3010                     }
  3011                 }
  3012               ok = re_node_set_insert (&union_set, next_node);
  3013               if (__glibc_unlikely (! ok))
  3014                 {
  3015                   re_node_set_free (&union_set);
  3016                   return REG_ESPACE;
  3017                 }
  3018               mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
  3019                                                             &union_set);
  3020               if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
  3021                                     && err != REG_NOERROR))
  3022                 {
  3023                   re_node_set_free (&union_set);
  3024                   return err;
  3025                 }
  3026             }
  3027         }
  3028 
  3029       if (naccepted
  3030           || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
  3031         {
  3032           ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
  3033           if (__glibc_unlikely (! ok))
  3034             {
  3035               re_node_set_free (&union_set);
  3036               return REG_ESPACE;
  3037             }
  3038         }
  3039     }
  3040   re_node_set_free (&union_set);
  3041   return REG_NOERROR;
  3042 }
  3043 
  3044 /* For all the nodes in CUR_NODES, add the epsilon closures of them to
  3045    CUR_NODES, however exclude the nodes which are:
  3046     - inside the sub expression whose number is EX_SUBEXP, if FL_OPEN.
  3047     - out of the sub expression whose number is EX_SUBEXP, if !FL_OPEN.
  3048 */
  3049 
  3050 static reg_errcode_t
  3051 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
  3052                           Idx ex_subexp, int type)
  3053 {
  3054   reg_errcode_t err;
  3055   Idx idx, outside_node;
  3056   re_node_set new_nodes;
  3057   DEBUG_ASSERT (cur_nodes->nelem);
  3058   err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
  3059   if (__glibc_unlikely (err != REG_NOERROR))
  3060     return err;
  3061   /* Create a new node set NEW_NODES with the nodes which are epsilon
  3062      closures of the node in CUR_NODES.  */
  3063 
  3064   for (idx = 0; idx < cur_nodes->nelem; ++idx)
  3065     {
  3066       Idx cur_node = cur_nodes->elems[idx];
  3067       const re_node_set *eclosure = dfa->eclosures + cur_node;
  3068       outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
  3069       if (outside_node == -1)
  3070         {
  3071           /* There are no problematic nodes, just merge them.  */
  3072           err = re_node_set_merge (&new_nodes, eclosure);
  3073           if (__glibc_unlikely (err != REG_NOERROR))
  3074             {
  3075               re_node_set_free (&new_nodes);
  3076               return err;
  3077             }
  3078         }
  3079       else
  3080         {
  3081           /* There are problematic nodes, re-calculate incrementally.  */
  3082           err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
  3083                                               ex_subexp, type);
  3084           if (__glibc_unlikely (err != REG_NOERROR))
  3085             {
  3086               re_node_set_free (&new_nodes);
  3087               return err;
  3088             }
  3089         }
  3090     }
  3091   re_node_set_free (cur_nodes);
  3092   *cur_nodes = new_nodes;
  3093   return REG_NOERROR;
  3094 }
  3095 
  3096 /* Helper function for check_arrival_expand_ecl.
  3097    Check incrementally the epsilon closure of TARGET, and if it isn't
  3098    problematic append it to DST_NODES.  */
  3099 
  3100 static reg_errcode_t
  3101 __attribute_warn_unused_result__
  3102 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
  3103                               Idx target, Idx ex_subexp, int type)
  3104 {
  3105   Idx cur_node;
  3106   for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
  3107     {
  3108       bool ok;
  3109 
  3110       if (dfa->nodes[cur_node].type == type
  3111           && dfa->nodes[cur_node].opr.idx == ex_subexp)
  3112         {
  3113           if (type == OP_CLOSE_SUBEXP)
  3114             {
  3115               ok = re_node_set_insert (dst_nodes, cur_node);
  3116               if (__glibc_unlikely (! ok))
  3117                 return REG_ESPACE;
  3118             }
  3119           break;
  3120         }
  3121       ok = re_node_set_insert (dst_nodes, cur_node);
  3122       if (__glibc_unlikely (! ok))
  3123         return REG_ESPACE;
  3124       if (dfa->edests[cur_node].nelem == 0)
  3125         break;
  3126       if (dfa->edests[cur_node].nelem == 2)
  3127         {
  3128           reg_errcode_t err;
  3129           err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
  3130                                               dfa->edests[cur_node].elems[1],
  3131                                               ex_subexp, type);
  3132           if (__glibc_unlikely (err != REG_NOERROR))
  3133             return err;
  3134         }
  3135       cur_node = dfa->edests[cur_node].elems[0];
  3136     }
  3137   return REG_NOERROR;
  3138 }
  3139 
  3140 
  3141 /* For all the back references in the current state, calculate the
  3142    destination of the back references by the appropriate entry
  3143    in MCTX->BKREF_ENTS.  */
  3144 
  3145 static reg_errcode_t
  3146 __attribute_warn_unused_result__
  3147 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
  3148                     Idx cur_str, Idx subexp_num, int type)
  3149 {
  3150   const re_dfa_t *const dfa = mctx->dfa;
  3151   reg_errcode_t err;
  3152   Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
  3153   struct re_backref_cache_entry *ent;
  3154 
  3155   if (cache_idx_start == -1)
  3156     return REG_NOERROR;
  3157 
  3158  restart:
  3159   ent = mctx->bkref_ents + cache_idx_start;
  3160   do
  3161     {
  3162       Idx to_idx, next_node;
  3163 
  3164       /* Is this entry ENT is appropriate?  */
  3165       if (!re_node_set_contains (cur_nodes, ent->node))
  3166         continue; /* No.  */
  3167 
  3168       to_idx = cur_str + ent->subexp_to - ent->subexp_from;
  3169       /* Calculate the destination of the back reference, and append it
  3170          to MCTX->STATE_LOG.  */
  3171       if (to_idx == cur_str)
  3172         {
  3173           /* The backreference did epsilon transit, we must re-check all the
  3174              node in the current state.  */
  3175           re_node_set new_dests;
  3176           reg_errcode_t err2, err3;
  3177           next_node = dfa->edests[ent->node].elems[0];
  3178           if (re_node_set_contains (cur_nodes, next_node))
  3179             continue;
  3180           err = re_node_set_init_1 (&new_dests, next_node);
  3181           err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
  3182           err3 = re_node_set_merge (cur_nodes, &new_dests);
  3183           re_node_set_free (&new_dests);
  3184           if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
  3185                                 || err3 != REG_NOERROR))
  3186             {
  3187               err = (err != REG_NOERROR ? err
  3188                      : (err2 != REG_NOERROR ? err2 : err3));
  3189               return err;
  3190             }
  3191           /* TODO: It is still inefficient...  */
  3192           goto restart;
  3193         }
  3194       else
  3195         {
  3196           re_node_set union_set;
  3197           next_node = dfa->nexts[ent->node];
  3198           if (mctx->state_log[to_idx])
  3199             {
  3200               bool ok;
  3201               if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
  3202                                         next_node))
  3203                 continue;
  3204               err = re_node_set_init_copy (&union_set,
  3205                                            &mctx->state_log[to_idx]->nodes);
  3206               ok = re_node_set_insert (&union_set, next_node);
  3207               if (__glibc_unlikely (err != REG_NOERROR || ! ok))
  3208                 {
  3209                   re_node_set_free (&union_set);
  3210                   err = err != REG_NOERROR ? err : REG_ESPACE;
  3211                   return err;
  3212                 }
  3213             }
  3214           else
  3215             {
  3216               err = re_node_set_init_1 (&union_set, next_node);
  3217               if (__glibc_unlikely (err != REG_NOERROR))
  3218                 return err;
  3219             }
  3220           mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
  3221           re_node_set_free (&union_set);
  3222           if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
  3223                                 && err != REG_NOERROR))
  3224             return err;
  3225         }
  3226     }
  3227   while (ent++->more);
  3228   return REG_NOERROR;
  3229 }
  3230 
  3231 /* Build transition table for the state.
  3232    Return true if successful.  */
  3233 
  3234 static bool __attribute_noinline__
  3235 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
  3236 {
  3237   reg_errcode_t err;
  3238   Idx i, j;
  3239   int ch;
  3240   bool need_word_trtable = false;
  3241   bitset_word_t elem, mask;
  3242   Idx ndests; /* Number of the destination states from 'state'.  */
  3243   re_dfastate_t **trtable;
  3244   re_dfastate_t *dest_states[SBC_MAX];
  3245   re_dfastate_t *dest_states_word[SBC_MAX];
  3246   re_dfastate_t *dest_states_nl[SBC_MAX];
  3247   re_node_set follows;
  3248   bitset_t acceptable;
  3249 
  3250   /* We build DFA states which corresponds to the destination nodes
  3251      from 'state'.  'dests_node[i]' represents the nodes which i-th
  3252      destination state contains, and 'dests_ch[i]' represents the
  3253      characters which i-th destination state accepts.  */
  3254   re_node_set dests_node[SBC_MAX];
  3255   bitset_t dests_ch[SBC_MAX];
  3256 
  3257   /* Initialize transition table.  */
  3258   state->word_trtable = state->trtable = NULL;
  3259 
  3260   /* At first, group all nodes belonging to 'state' into several
  3261      destinations.  */
  3262   ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
  3263   if (__glibc_unlikely (ndests <= 0))
  3264     {
  3265       /* Return false in case of an error, true otherwise.  */
  3266       if (ndests == 0)
  3267         {
  3268           state->trtable = (re_dfastate_t **)
  3269             calloc (sizeof (re_dfastate_t *), SBC_MAX);
  3270           if (__glibc_unlikely (state->trtable == NULL))
  3271             return false;
  3272           return true;
  3273         }
  3274       return false;
  3275     }
  3276 
  3277   err = re_node_set_alloc (&follows, ndests + 1);
  3278   if (__glibc_unlikely (err != REG_NOERROR))
  3279     {
  3280     out_free:
  3281       re_node_set_free (&follows);
  3282       for (i = 0; i < ndests; ++i)
  3283         re_node_set_free (dests_node + i);
  3284       return false;
  3285     }
  3286 
  3287   bitset_empty (acceptable);
  3288 
  3289   /* Then build the states for all destinations.  */
  3290   for (i = 0; i < ndests; ++i)
  3291     {
  3292       Idx next_node;
  3293       re_node_set_empty (&follows);
  3294       /* Merge the follows of this destination states.  */
  3295       for (j = 0; j < dests_node[i].nelem; ++j)
  3296         {
  3297           next_node = dfa->nexts[dests_node[i].elems[j]];
  3298           if (next_node != -1)
  3299             {
  3300               err = re_node_set_merge (&follows, dfa->eclosures + next_node);
  3301               if (__glibc_unlikely (err != REG_NOERROR))
  3302                 goto out_free;
  3303             }
  3304         }
  3305       dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
  3306       if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
  3307         goto out_free;
  3308       /* If the new state has context constraint,
  3309          build appropriate states for these contexts.  */
  3310       if (dest_states[i]->has_constraint)
  3311         {
  3312           dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
  3313                                                           CONTEXT_WORD);
  3314           if (__glibc_unlikely (dest_states_word[i] == NULL
  3315                                 && err != REG_NOERROR))
  3316             goto out_free;
  3317 
  3318           if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
  3319             need_word_trtable = true;
  3320 
  3321           dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
  3322                                                         CONTEXT_NEWLINE);
  3323           if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
  3324             goto out_free;
  3325         }
  3326       else
  3327         {
  3328           dest_states_word[i] = dest_states[i];
  3329           dest_states_nl[i] = dest_states[i];
  3330         }
  3331       bitset_merge (acceptable, dests_ch[i]);
  3332     }
  3333 
  3334   if (!__glibc_unlikely (need_word_trtable))
  3335     {
  3336       /* We don't care about whether the following character is a word
  3337          character, or we are in a single-byte character set so we can
  3338          discern by looking at the character code: allocate a
  3339          256-entry transition table.  */
  3340       trtable = state->trtable =
  3341         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
  3342       if (__glibc_unlikely (trtable == NULL))
  3343         goto out_free;
  3344 
  3345       /* For all characters ch...:  */
  3346       for (i = 0; i < BITSET_WORDS; ++i)
  3347         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
  3348              elem;
  3349              mask <<= 1, elem >>= 1, ++ch)
  3350           if (__glibc_unlikely (elem & 1))
  3351             {
  3352               /* There must be exactly one destination which accepts
  3353                  character ch.  See group_nodes_into_DFAstates.  */
  3354               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
  3355                 ;
  3356 
  3357               /* j-th destination accepts the word character ch.  */
  3358               if (dfa->word_char[i] & mask)
  3359                 trtable[ch] = dest_states_word[j];
  3360               else
  3361                 trtable[ch] = dest_states[j];
  3362             }
  3363     }
  3364   else
  3365     {
  3366       /* We care about whether the following character is a word
  3367          character, and we are in a multi-byte character set: discern
  3368          by looking at the character code: build two 256-entry
  3369          transition tables, one starting at trtable[0] and one
  3370          starting at trtable[SBC_MAX].  */
  3371       trtable = state->word_trtable =
  3372         (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
  3373       if (__glibc_unlikely (trtable == NULL))
  3374         goto out_free;
  3375 
  3376       /* For all characters ch...:  */
  3377       for (i = 0; i < BITSET_WORDS; ++i)
  3378         for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
  3379              elem;
  3380              mask <<= 1, elem >>= 1, ++ch)
  3381           if (__glibc_unlikely (elem & 1))
  3382             {
  3383               /* There must be exactly one destination which accepts
  3384                  character ch.  See group_nodes_into_DFAstates.  */
  3385               for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
  3386                 ;
  3387 
  3388               /* j-th destination accepts the word character ch.  */
  3389               trtable[ch] = dest_states[j];
  3390               trtable[ch + SBC_MAX] = dest_states_word[j];
  3391             }
  3392     }
  3393 
  3394   /* new line */
  3395   if (bitset_contain (acceptable, NEWLINE_CHAR))
  3396     {
  3397       /* The current state accepts newline character.  */
  3398       for (j = 0; j < ndests; ++j)
  3399         if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
  3400           {
  3401             /* k-th destination accepts newline character.  */
  3402             trtable[NEWLINE_CHAR] = dest_states_nl[j];
  3403             if (need_word_trtable)
  3404               trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
  3405             /* There must be only one destination which accepts
  3406                newline.  See group_nodes_into_DFAstates.  */
  3407             break;
  3408           }
  3409     }
  3410 
  3411   re_node_set_free (&follows);
  3412   for (i = 0; i < ndests; ++i)
  3413     re_node_set_free (dests_node + i);
  3414   return true;
  3415 }
  3416 
  3417 /* Group all nodes belonging to STATE into several destinations.
  3418    Then for all destinations, set the nodes belonging to the destination
  3419    to DESTS_NODE[i] and set the characters accepted by the destination
  3420    to DEST_CH[i].  Return the number of destinations if successful,
  3421    -1 on internal error.  */
  3422 
  3423 static Idx
  3424 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
  3425                             re_node_set *dests_node, bitset_t *dests_ch)
  3426 {
  3427   reg_errcode_t err;
  3428   bool ok;
  3429   Idx i, j, k;
  3430   Idx ndests; /* Number of the destinations from 'state'.  */
  3431   bitset_t accepts; /* Characters a node can accept.  */
  3432   const re_node_set *cur_nodes = &state->nodes;
  3433   bitset_empty (accepts);
  3434   ndests = 0;
  3435 
  3436   /* For all the nodes belonging to 'state',  */
  3437   for (i = 0; i < cur_nodes->nelem; ++i)
  3438     {
  3439       re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
  3440       re_token_type_t type = node->type;
  3441       unsigned int constraint = node->constraint;
  3442 
  3443       /* Enumerate all single byte character this node can accept.  */
  3444       if (type == CHARACTER)
  3445         bitset_set (accepts, node->opr.c);
  3446       else if (type == SIMPLE_BRACKET)
  3447         {
  3448           bitset_merge (accepts, node->opr.sbcset);
  3449         }
  3450       else if (type == OP_PERIOD)
  3451         {
  3452           if (dfa->mb_cur_max > 1)
  3453             bitset_merge (accepts, dfa->sb_char);
  3454           else
  3455             bitset_set_all (accepts);
  3456           if (!(dfa->syntax & RE_DOT_NEWLINE))
  3457             bitset_clear (accepts, '\n');
  3458           if (dfa->syntax & RE_DOT_NOT_NULL)
  3459             bitset_clear (accepts, '\0');
  3460         }
  3461       else if (type == OP_UTF8_PERIOD)
  3462         {
  3463           if (ASCII_CHARS % BITSET_WORD_BITS == 0)
  3464             memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
  3465           else
  3466             bitset_merge (accepts, utf8_sb_map);
  3467           if (!(dfa->syntax & RE_DOT_NEWLINE))
  3468             bitset_clear (accepts, '\n');
  3469           if (dfa->syntax & RE_DOT_NOT_NULL)
  3470             bitset_clear (accepts, '\0');
  3471         }
  3472       else
  3473         continue;
  3474 
  3475       /* Check the 'accepts' and sift the characters which are not
  3476          match it the context.  */
  3477       if (constraint)
  3478         {
  3479           if (constraint & NEXT_NEWLINE_CONSTRAINT)
  3480             {
  3481               bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
  3482               bitset_empty (accepts);
  3483               if (accepts_newline)
  3484                 bitset_set (accepts, NEWLINE_CHAR);
  3485               else
  3486                 continue;
  3487             }
  3488           if (constraint & NEXT_ENDBUF_CONSTRAINT)
  3489             {
  3490               bitset_empty (accepts);
  3491               continue;
  3492             }
  3493 
  3494           if (constraint & NEXT_WORD_CONSTRAINT)
  3495             {
  3496               bitset_word_t any_set = 0;
  3497               if (type == CHARACTER && !node->word_char)
  3498                 {
  3499                   bitset_empty (accepts);
  3500                   continue;
  3501                 }
  3502               if (dfa->mb_cur_max > 1)
  3503                 for (j = 0; j < BITSET_WORDS; ++j)
  3504                   any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
  3505               else
  3506                 for (j = 0; j < BITSET_WORDS; ++j)
  3507                   any_set |= (accepts[j] &= dfa->word_char[j]);
  3508               if (!any_set)
  3509                 continue;
  3510             }
  3511           if (constraint & NEXT_NOTWORD_CONSTRAINT)
  3512             {
  3513               bitset_word_t any_set = 0;
  3514               if (type == CHARACTER && node->word_char)
  3515                 {
  3516                   bitset_empty (accepts);
  3517                   continue;
  3518                 }
  3519               if (dfa->mb_cur_max > 1)
  3520                 for (j = 0; j < BITSET_WORDS; ++j)
  3521                   any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
  3522               else
  3523                 for (j = 0; j < BITSET_WORDS; ++j)
  3524                   any_set |= (accepts[j] &= ~dfa->word_char[j]);
  3525               if (!any_set)
  3526                 continue;
  3527             }
  3528         }
  3529 
  3530       /* Then divide 'accepts' into DFA states, or create a new
  3531          state.  Above, we make sure that accepts is not empty.  */
  3532       for (j = 0; j < ndests; ++j)
  3533         {
  3534           bitset_t intersec; /* Intersection sets, see below.  */
  3535           bitset_t remains;
  3536           /* Flags, see below.  */
  3537           bitset_word_t has_intersec, not_subset, not_consumed;
  3538 
  3539           /* Optimization, skip if this state doesn't accept the character.  */
  3540           if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
  3541             continue;
  3542 
  3543           /* Enumerate the intersection set of this state and 'accepts'.  */
  3544           has_intersec = 0;
  3545           for (k = 0; k < BITSET_WORDS; ++k)
  3546             has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
  3547           /* And skip if the intersection set is empty.  */
  3548           if (!has_intersec)
  3549             continue;
  3550 
  3551           /* Then check if this state is a subset of 'accepts'.  */
  3552           not_subset = not_consumed = 0;
  3553           for (k = 0; k < BITSET_WORDS; ++k)
  3554             {
  3555               not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
  3556               not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
  3557             }
  3558 
  3559           /* If this state isn't a subset of 'accepts', create a
  3560              new group state, which has the 'remains'. */
  3561           if (not_subset)
  3562             {
  3563               bitset_copy (dests_ch[ndests], remains);
  3564               bitset_copy (dests_ch[j], intersec);
  3565               err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
  3566               if (__glibc_unlikely (err != REG_NOERROR))
  3567                 goto error_return;
  3568               ++ndests;
  3569             }
  3570 
  3571           /* Put the position in the current group. */
  3572           ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
  3573           if (__glibc_unlikely (! ok))
  3574             goto error_return;
  3575 
  3576           /* If all characters are consumed, go to next node. */
  3577           if (!not_consumed)
  3578             break;
  3579         }
  3580       /* Some characters remain, create a new group. */
  3581       if (j == ndests)
  3582         {
  3583           bitset_copy (dests_ch[ndests], accepts);
  3584           err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
  3585           if (__glibc_unlikely (err != REG_NOERROR))
  3586             goto error_return;
  3587           ++ndests;
  3588           bitset_empty (accepts);
  3589         }
  3590     }
  3591   assume (ndests <= SBC_MAX);
  3592   return ndests;
  3593  error_return:
  3594   for (j = 0; j < ndests; ++j)
  3595     re_node_set_free (dests_node + j);
  3596   return -1;
  3597 }
  3598 
  3599 /* Check how many bytes the node 'dfa->nodes[node_idx]' accepts.
  3600    Return the number of the bytes the node accepts.
  3601    STR_IDX is the current index of the input string.
  3602 
  3603    This function handles the nodes which can accept one character, or
  3604    one collating element like '.', '[a-z]', opposite to the other nodes
  3605    can only accept one byte.  */
  3606 
  3607 #ifdef _LIBC
  3608 # include <locale/weight.h>
  3609 #endif
  3610 
  3611 static int
  3612 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
  3613                          const re_string_t *input, Idx str_idx)
  3614 {
  3615   const re_token_t *node = dfa->nodes + node_idx;
  3616   int char_len, elem_len;
  3617   Idx i;
  3618 
  3619   if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
  3620     {
  3621       unsigned char c = re_string_byte_at (input, str_idx), d;
  3622       if (__glibc_likely (c < 0xc2))
  3623         return 0;
  3624 
  3625       if (str_idx + 2 > input->len)
  3626         return 0;
  3627 
  3628       d = re_string_byte_at (input, str_idx + 1);
  3629       if (c < 0xe0)
  3630         return (d < 0x80 || d > 0xbf) ? 0 : 2;
  3631       else if (c < 0xf0)
  3632         {
  3633           char_len = 3;
  3634           if (c == 0xe0 && d < 0xa0)
  3635             return 0;
  3636         }
  3637       else if (c < 0xf8)
  3638         {
  3639           char_len = 4;
  3640           if (c == 0xf0 && d < 0x90)
  3641             return 0;
  3642         }
  3643       else if (c < 0xfc)
  3644         {
  3645           char_len = 5;
  3646           if (c == 0xf8 && d < 0x88)
  3647             return 0;
  3648         }
  3649       else if (c < 0xfe)
  3650         {
  3651           char_len = 6;
  3652           if (c == 0xfc && d < 0x84)
  3653             return 0;
  3654         }
  3655       else
  3656         return 0;
  3657 
  3658       if (str_idx + char_len > input->len)
  3659         return 0;
  3660 
  3661       for (i = 1; i < char_len; ++i)
  3662         {
  3663           d = re_string_byte_at (input, str_idx + i);
  3664           if (d < 0x80 || d > 0xbf)
  3665             return 0;
  3666         }
  3667       return char_len;
  3668     }
  3669 
  3670   char_len = re_string_char_size_at (input, str_idx);
  3671   if (node->type == OP_PERIOD)
  3672     {
  3673       if (char_len <= 1)
  3674         return 0;
  3675       /* FIXME: I don't think this if is needed, as both '\n'
  3676          and '\0' are char_len == 1.  */
  3677       /* '.' accepts any one character except the following two cases.  */
  3678       if ((!(dfa->syntax & RE_DOT_NEWLINE)
  3679            && re_string_byte_at (input, str_idx) == '\n')
  3680           || ((dfa->syntax & RE_DOT_NOT_NULL)
  3681               && re_string_byte_at (input, str_idx) == '\0'))
  3682         return 0;
  3683       return char_len;
  3684     }
  3685 
  3686   elem_len = re_string_elem_size_at (input, str_idx);
  3687   if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
  3688     return 0;
  3689 
  3690   if (node->type == COMPLEX_BRACKET)
  3691     {
  3692       const re_charset_t *cset = node->opr.mbcset;
  3693 #ifdef _LIBC
  3694       const unsigned char *pin
  3695         = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
  3696       Idx j;
  3697       uint32_t nrules;
  3698 #endif
  3699       int match_len = 0;
  3700       wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
  3701                     ? re_string_wchar_at (input, str_idx) : 0);
  3702 
  3703       /* match with multibyte character?  */
  3704       for (i = 0; i < cset->nmbchars; ++i)
  3705         if (wc == cset->mbchars[i])
  3706           {
  3707             match_len = char_len;
  3708             goto check_node_accept_bytes_match;
  3709           }
  3710       /* match with character_class?  */
  3711       for (i = 0; i < cset->nchar_classes; ++i)
  3712         {
  3713           wctype_t wt = cset->char_classes[i];
  3714           if (__iswctype (wc, wt))
  3715             {
  3716               match_len = char_len;
  3717               goto check_node_accept_bytes_match;
  3718             }
  3719         }
  3720 
  3721 #ifdef _LIBC
  3722       nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
  3723       if (nrules != 0)
  3724         {
  3725           unsigned int in_collseq = 0;
  3726           const int32_t *table, *indirect;
  3727           const unsigned char *weights, *extra;
  3728           const char *collseqwc;
  3729 
  3730           /* match with collating_symbol?  */
  3731           if (cset->ncoll_syms)
  3732             extra = (const unsigned char *)
  3733               _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
  3734           for (i = 0; i < cset->ncoll_syms; ++i)
  3735             {
  3736               const unsigned char *coll_sym = extra + cset->coll_syms[i];
  3737               /* Compare the length of input collating element and
  3738                  the length of current collating element.  */
  3739               if (*coll_sym != elem_len)
  3740                 continue;
  3741               /* Compare each bytes.  */
  3742               for (j = 0; j < *coll_sym; j++)
  3743                 if (pin[j] != coll_sym[1 + j])
  3744                   break;
  3745               if (j == *coll_sym)
  3746                 {
  3747                   /* Match if every bytes is equal.  */
  3748                   match_len = j;
  3749                   goto check_node_accept_bytes_match;
  3750                 }
  3751             }
  3752 
  3753           if (cset->nranges)
  3754             {
  3755               if (elem_len <= char_len)
  3756                 {
  3757                   collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
  3758                   in_collseq = __collseq_table_lookup (collseqwc, wc);
  3759                 }
  3760               else
  3761                 in_collseq = find_collation_sequence_value (pin, elem_len);
  3762             }
  3763           /* match with range expression?  */
  3764           /* FIXME: Implement rational ranges here, too.  */
  3765           for (i = 0; i < cset->nranges; ++i)
  3766             if (cset->range_starts[i] <= in_collseq
  3767                 && in_collseq <= cset->range_ends[i])
  3768               {
  3769                 match_len = elem_len;
  3770                 goto check_node_accept_bytes_match;
  3771               }
  3772 
  3773           /* match with equivalence_class?  */
  3774           if (cset->nequiv_classes)
  3775             {
  3776               const unsigned char *cp = pin;
  3777               table = (const int32_t *)
  3778                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
  3779               weights = (const unsigned char *)
  3780                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
  3781               extra = (const unsigned char *)
  3782                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
  3783               indirect = (const int32_t *)
  3784                 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
  3785               int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
  3786               int32_t rule = idx >> 24;
  3787               idx &= 0xffffff;
  3788               if (idx > 0)
  3789                 {
  3790                   size_t weight_len = weights[idx];
  3791                   for (i = 0; i < cset->nequiv_classes; ++i)
  3792                     {
  3793                       int32_t equiv_class_idx = cset->equiv_classes[i];
  3794                       int32_t equiv_class_rule = equiv_class_idx >> 24;
  3795                       equiv_class_idx &= 0xffffff;
  3796                       if (weights[equiv_class_idx] == weight_len
  3797                           && equiv_class_rule == rule
  3798                           && memcmp (weights + idx + 1,
  3799                                      weights + equiv_class_idx + 1,
  3800                                      weight_len) == 0)
  3801                         {
  3802                           match_len = elem_len;
  3803                           goto check_node_accept_bytes_match;
  3804                         }
  3805                     }
  3806                 }
  3807             }
  3808         }
  3809       else
  3810 #endif /* _LIBC */
  3811         {
  3812           /* match with range expression?  */
  3813           for (i = 0; i < cset->nranges; ++i)
  3814             {
  3815               if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
  3816                 {
  3817                   match_len = char_len;
  3818                   goto check_node_accept_bytes_match;
  3819                 }
  3820             }
  3821         }
  3822     check_node_accept_bytes_match:
  3823       if (!cset->non_match)
  3824         return match_len;
  3825       else
  3826         {
  3827           if (match_len > 0)
  3828             return 0;
  3829           else
  3830             return (elem_len > char_len) ? elem_len : char_len;
  3831         }
  3832     }
  3833   return 0;
  3834 }
  3835 
  3836 #ifdef _LIBC
  3837 static unsigned int
  3838 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
  3839 {
  3840   uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
  3841   if (nrules == 0)
  3842     {
  3843       if (mbs_len == 1)
  3844         {
  3845           /* No valid character.  Match it as a single byte character.  */
  3846           const unsigned char *collseq = (const unsigned char *)
  3847             _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
  3848           return collseq[mbs[0]];
  3849         }
  3850       return UINT_MAX;
  3851     }
  3852   else
  3853     {
  3854       int32_t idx;
  3855       const unsigned char *extra = (const unsigned char *)
  3856         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
  3857       int32_t extrasize = (const unsigned char *)
  3858         _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
  3859 
  3860       for (idx = 0; idx < extrasize;)
  3861         {
  3862           int mbs_cnt;
  3863           bool found = false;
  3864           int32_t elem_mbs_len;
  3865           /* Skip the name of collating element name.  */
  3866           idx = idx + extra[idx] + 1;
  3867           elem_mbs_len = extra[idx++];
  3868           if (mbs_len == elem_mbs_len)
  3869             {
  3870               for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
  3871                 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
  3872                   break;
  3873               if (mbs_cnt == elem_mbs_len)
  3874                 /* Found the entry.  */
  3875                 found = true;
  3876             }
  3877           /* Skip the byte sequence of the collating element.  */
  3878           idx += elem_mbs_len;
  3879           /* Adjust for the alignment.  */
  3880           idx = (idx + 3) & ~3;
  3881           /* Skip the collation sequence value.  */
  3882           idx += sizeof (uint32_t);
  3883           /* Skip the wide char sequence of the collating element.  */
  3884           idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
  3885           /* If we found the entry, return the sequence value.  */
  3886           if (found)
  3887             return *(uint32_t *) (extra + idx);
  3888           /* Skip the collation sequence value.  */
  3889           idx += sizeof (uint32_t);
  3890         }
  3891       return UINT_MAX;
  3892     }
  3893 }
  3894 #endif /* _LIBC */
  3895 
  3896 /* Check whether the node accepts the byte which is IDX-th
  3897    byte of the INPUT.  */
  3898 
  3899 static bool
  3900 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
  3901                    Idx idx)
  3902 {
  3903   unsigned char ch;
  3904   ch = re_string_byte_at (&mctx->input, idx);
  3905   switch (node->type)
  3906     {
  3907     case CHARACTER:
  3908       if (node->opr.c != ch)
  3909         return false;
  3910       break;
  3911 
  3912     case SIMPLE_BRACKET:
  3913       if (!bitset_contain (node->opr.sbcset, ch))
  3914         return false;
  3915       break;
  3916 
  3917     case OP_UTF8_PERIOD:
  3918       if (ch >= ASCII_CHARS)
  3919         return false;
  3920       FALLTHROUGH;
  3921     case OP_PERIOD:
  3922       if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
  3923           || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
  3924         return false;
  3925       break;
  3926 
  3927     default:
  3928       return false;
  3929     }
  3930 
  3931   if (node->constraint)
  3932     {
  3933       /* The node has constraints.  Check whether the current context
  3934          satisfies the constraints.  */
  3935       unsigned int context = re_string_context_at (&mctx->input, idx,
  3936                                                    mctx->eflags);
  3937       if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
  3938         return false;
  3939     }
  3940 
  3941   return true;
  3942 }
  3943 
  3944 /* Extend the buffers, if the buffers have run out.  */
  3945 
  3946 static reg_errcode_t
  3947 __attribute_warn_unused_result__
  3948 extend_buffers (re_match_context_t *mctx, int min_len)
  3949 {
  3950   reg_errcode_t ret;
  3951   re_string_t *pstr = &mctx->input;
  3952 
  3953   /* Avoid overflow.  */
  3954   if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
  3955                         <= pstr->bufs_len))
  3956     return REG_ESPACE;
  3957 
  3958   /* Double the lengths of the buffers, but allocate at least MIN_LEN.  */
  3959   ret = re_string_realloc_buffers (pstr,
  3960                                    MAX (min_len,
  3961                                         MIN (pstr->len, pstr->bufs_len * 2)));
  3962   if (__glibc_unlikely (ret != REG_NOERROR))
  3963     return ret;
  3964 
  3965   if (mctx->state_log != NULL)
  3966     {
  3967       /* And double the length of state_log.  */
  3968       /* XXX We have no indication of the size of this buffer.  If this
  3969          allocation fail we have no indication that the state_log array
  3970          does not have the right size.  */
  3971       re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
  3972                                               pstr->bufs_len + 1);
  3973       if (__glibc_unlikely (new_array == NULL))
  3974         return REG_ESPACE;
  3975       mctx->state_log = new_array;
  3976     }
  3977 
  3978   /* Then reconstruct the buffers.  */
  3979   if (pstr->icase)
  3980     {
  3981       if (pstr->mb_cur_max > 1)
  3982         {
  3983           ret = build_wcs_upper_buffer (pstr);
  3984           if (__glibc_unlikely (ret != REG_NOERROR))
  3985             return ret;
  3986         }
  3987       else
  3988         build_upper_buffer (pstr);
  3989     }
  3990   else
  3991     {
  3992       if (pstr->mb_cur_max > 1)
  3993         build_wcs_buffer (pstr);
  3994       else
  3995         {
  3996           if (pstr->trans != NULL)
  3997             re_string_translate_buffer (pstr);
  3998         }
  3999     }
  4000   return REG_NOERROR;
  4001 }
  4002 
  4003 
  4004 /* Functions for matching context.  */
  4005 
  4006 /* Initialize MCTX.  */
  4007 
  4008 static reg_errcode_t
  4009 __attribute_warn_unused_result__
  4010 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
  4011 {
  4012   mctx->eflags = eflags;
  4013   mctx->match_last = -1;
  4014   if (n > 0)
  4015     {
  4016       /* Avoid overflow.  */
  4017       size_t max_object_size =
  4018         MAX (sizeof (struct re_backref_cache_entry),
  4019              sizeof (re_sub_match_top_t *));
  4020       if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
  4021         return REG_ESPACE;
  4022 
  4023       mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
  4024       mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
  4025       if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
  4026         return REG_ESPACE;
  4027     }
  4028   /* Already zero-ed by the caller.
  4029      else
  4030        mctx->bkref_ents = NULL;
  4031      mctx->nbkref_ents = 0;
  4032      mctx->nsub_tops = 0;  */
  4033   mctx->abkref_ents = n;
  4034   mctx->max_mb_elem_len = 1;
  4035   mctx->asub_tops = n;
  4036   return REG_NOERROR;
  4037 }
  4038 
  4039 /* Clean the entries which depend on the current input in MCTX.
  4040    This function must be invoked when the matcher changes the start index
  4041    of the input, or changes the input string.  */
  4042 
  4043 static void
  4044 match_ctx_clean (re_match_context_t *mctx)
  4045 {
  4046   Idx st_idx;
  4047   for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
  4048     {
  4049       Idx sl_idx;
  4050       re_sub_match_top_t *top = mctx->sub_tops[st_idx];
  4051       for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
  4052         {
  4053           re_sub_match_last_t *last = top->lasts[sl_idx];
  4054           re_free (last->path.array);
  4055           re_free (last);
  4056         }
  4057       re_free (top->lasts);
  4058       if (top->path)
  4059         {
  4060           re_free (top->path->array);
  4061           re_free (top->path);
  4062         }
  4063       re_free (top);
  4064     }
  4065 
  4066   mctx->nsub_tops = 0;
  4067   mctx->nbkref_ents = 0;
  4068 }
  4069 
  4070 /* Free all the memory associated with MCTX.  */
  4071 
  4072 static void
  4073 match_ctx_free (re_match_context_t *mctx)
  4074 {
  4075   /* First, free all the memory associated with MCTX->SUB_TOPS.  */
  4076   match_ctx_clean (mctx);
  4077   re_free (mctx->sub_tops);
  4078   re_free (mctx->bkref_ents);
  4079 }
  4080 
  4081 /* Add a new backreference entry to MCTX.
  4082    Note that we assume that caller never call this function with duplicate
  4083    entry, and call with STR_IDX which isn't smaller than any existing entry.
  4084 */
  4085 
  4086 static reg_errcode_t
  4087 __attribute_warn_unused_result__
  4088 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
  4089                      Idx to)
  4090 {
  4091   if (mctx->nbkref_ents >= mctx->abkref_ents)
  4092     {
  4093       struct re_backref_cache_entry* new_entry;
  4094       new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
  4095                               mctx->abkref_ents * 2);
  4096       if (__glibc_unlikely (new_entry == NULL))
  4097         {
  4098           re_free (mctx->bkref_ents);
  4099           return REG_ESPACE;
  4100         }
  4101       mctx->bkref_ents = new_entry;
  4102       memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
  4103               sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
  4104       mctx->abkref_ents *= 2;
  4105     }
  4106   if (mctx->nbkref_ents > 0
  4107       && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
  4108     mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
  4109 
  4110   mctx->bkref_ents[mctx->nbkref_ents].node = node;
  4111   mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
  4112   mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
  4113   mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
  4114 
  4115   /* This is a cache that saves negative results of check_dst_limits_calc_pos.
  4116      If bit N is clear, means that this entry won't epsilon-transition to
  4117      an OP_OPEN_SUBEXP or OP_CLOSE_SUBEXP for the N+1-th subexpression.  If
  4118      it is set, check_dst_limits_calc_pos_1 will recurse and try to find one
  4119      such node.
  4120 
  4121      A backreference does not epsilon-transition unless it is empty, so set
  4122      to all zeros if FROM != TO.  */
  4123   mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
  4124     = (from == to ? -1 : 0);
  4125 
  4126   mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
  4127   if (mctx->max_mb_elem_len < to - from)
  4128     mctx->max_mb_elem_len = to - from;
  4129   return REG_NOERROR;
  4130 }
  4131 
  4132 /* Return the first entry with the same str_idx, or -1 if none is
  4133    found.  Note that MCTX->BKREF_ENTS is already sorted by MCTX->STR_IDX.  */
  4134 
  4135 static Idx
  4136 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
  4137 {
  4138   Idx left, right, mid, last;
  4139   last = right = mctx->nbkref_ents;
  4140   for (left = 0; left < right;)
  4141     {
  4142       mid = (left + right) / 2;
  4143       if (mctx->bkref_ents[mid].str_idx < str_idx)
  4144         left = mid + 1;
  4145       else
  4146         right = mid;
  4147     }
  4148   if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
  4149     return left;
  4150   else
  4151     return -1;
  4152 }
  4153 
  4154 /* Register the node NODE, whose type is OP_OPEN_SUBEXP, and which matches
  4155    at STR_IDX.  */
  4156 
  4157 static reg_errcode_t
  4158 __attribute_warn_unused_result__
  4159 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
  4160 {
  4161   DEBUG_ASSERT (mctx->sub_tops != NULL);
  4162   DEBUG_ASSERT (mctx->asub_tops > 0);
  4163   if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
  4164     {
  4165       Idx new_asub_tops = mctx->asub_tops * 2;
  4166       re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
  4167                                                    re_sub_match_top_t *,
  4168                                                    new_asub_tops);
  4169       if (__glibc_unlikely (new_array == NULL))
  4170         return REG_ESPACE;
  4171       mctx->sub_tops = new_array;
  4172       mctx->asub_tops = new_asub_tops;
  4173     }
  4174   mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
  4175   if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
  4176     return REG_ESPACE;
  4177   mctx->sub_tops[mctx->nsub_tops]->node = node;
  4178   mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
  4179   return REG_NOERROR;
  4180 }
  4181 
  4182 /* Register the node NODE, whose type is OP_CLOSE_SUBEXP, and which matches
  4183    at STR_IDX, whose corresponding OP_OPEN_SUBEXP is SUB_TOP.
  4184    Return the new entry if successful, NULL if memory is exhausted.  */
  4185 
  4186 static re_sub_match_last_t *
  4187 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
  4188 {
  4189   re_sub_match_last_t *new_entry;
  4190   if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
  4191     {
  4192       Idx new_alasts = 2 * subtop->alasts + 1;
  4193       re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
  4194                                                     re_sub_match_last_t *,
  4195                                                     new_alasts);
  4196       if (__glibc_unlikely (new_array == NULL))
  4197         return NULL;
  4198       subtop->lasts = new_array;
  4199       subtop->alasts = new_alasts;
  4200     }
  4201   new_entry = calloc (1, sizeof (re_sub_match_last_t));
  4202   if (__glibc_likely (new_entry != NULL))
  4203     {
  4204       subtop->lasts[subtop->nlasts] = new_entry;
  4205       new_entry->node = node;
  4206       new_entry->str_idx = str_idx;
  4207       ++subtop->nlasts;
  4208     }
  4209   return new_entry;
  4210 }
  4211 
  4212 static void
  4213 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
  4214                re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
  4215 {
  4216   sctx->sifted_states = sifted_sts;
  4217   sctx->limited_states = limited_sts;
  4218   sctx->last_node = last_node;
  4219   sctx->last_str_idx = last_str_idx;
  4220   re_node_set_init_empty (&sctx->limits);
  4221 }
/* [<][>][^][v][top][bottom][index][help] */
root/lib/regexec.c

DEFINITIONS