This source file includes following definitions.
- regexec
- libc_hidden_def
- re_match
- weak_alias
- weak_alias
- weak_alias
- weak_alias
- re_search_stub
- re_copy_regs
- re_set_registers
- weak_alias
- re_search_internal
- prune_impossible_nodes
- acquire_init_state_context
- check_matching
- check_halt_node_context
- check_halt_state_context
- proceed_next_node
- push_fail_stack
- pop_fail_stack
- set_regs
- free_fail_stack_return
- update_regs
- sift_states_backward
- build_sifted_states
- clean_state_log_if_needed
- merge_state_array
- update_cur_sifted_state
- add_epsilon_src_nodes
- sub_epsilon_src_nodes
- check_dst_limits
- check_dst_limits_calc_pos_1
- check_dst_limits_calc_pos
- check_subexp_limits
- sift_states_bkref
- sift_states_iter_mb
- transit_state
- merge_state_with_log
- find_recover_state
- check_subexp_matching_top
- transit_state_sb
- transit_state_mb
- transit_state_bkref
- get_subexp
- get_subexp_sub
- find_subexp_node
- check_arrival
- check_arrival_add_next_nodes
- check_arrival_expand_ecl
- check_arrival_expand_ecl_sub
- expand_bkref_cache
- build_trtable
- group_nodes_into_DFAstates
- check_node_accept_bytes
- find_collation_sequence_value
- check_node_accept
- extend_buffers
- match_ctx_init
- match_ctx_clean
- match_ctx_free
- match_ctx_add_entry
- search_cur_bkref_entry
- match_ctx_add_subtop
- match_ctx_add_sublast
- sift_ctx_init
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags,
21 Idx n);
22 static void match_ctx_clean (re_match_context_t *mctx);
23 static void match_ctx_free (re_match_context_t *cache);
24 static reg_errcode_t match_ctx_add_entry (re_match_context_t *cache, Idx node,
25 Idx str_idx, Idx from, Idx to);
26 static Idx search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx);
27 static reg_errcode_t match_ctx_add_subtop (re_match_context_t *mctx, Idx node,
28 Idx str_idx);
29 static re_sub_match_last_t * match_ctx_add_sublast (re_sub_match_top_t *subtop,
30 Idx node, Idx str_idx);
31 static void sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
32 re_dfastate_t **limited_sts, Idx last_node,
33 Idx last_str_idx);
34 static reg_errcode_t re_search_internal (const regex_t *preg,
35 const char *string, Idx length,
36 Idx start, Idx last_start, Idx stop,
37 size_t nmatch, regmatch_t pmatch[],
38 int eflags);
39 static regoff_t re_search_2_stub (struct re_pattern_buffer *bufp,
40 const char *string1, Idx length1,
41 const char *string2, Idx length2,
42 Idx start, regoff_t range,
43 struct re_registers *regs,
44 Idx stop, bool ret_len);
45 static regoff_t re_search_stub (struct re_pattern_buffer *bufp,
46 const char *string, Idx length, Idx start,
47 regoff_t range, Idx stop,
48 struct re_registers *regs,
49 bool ret_len);
50 static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch,
51 Idx nregs, int regs_allocated);
52 static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx);
53 static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match,
54 Idx *p_match_first);
55 static Idx check_halt_state_context (const re_match_context_t *mctx,
56 const re_dfastate_t *state, Idx idx);
57 static void update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
58 regmatch_t *prev_idx_match, Idx cur_node,
59 Idx cur_idx, Idx nmatch);
60 static reg_errcode_t push_fail_stack (struct re_fail_stack_t *fs,
61 Idx str_idx, Idx dest_node, Idx nregs,
62 regmatch_t *regs, regmatch_t *prevregs,
63 re_node_set *eps_via_nodes);
64 static reg_errcode_t set_regs (const regex_t *preg,
65 const re_match_context_t *mctx,
66 size_t nmatch, regmatch_t *pmatch,
67 bool fl_backtrack);
68 static reg_errcode_t free_fail_stack_return (struct re_fail_stack_t *fs);
69
70 static int sift_states_iter_mb (const re_match_context_t *mctx,
71 re_sift_context_t *sctx,
72 Idx node_idx, Idx str_idx, Idx max_str_idx);
73 static reg_errcode_t sift_states_backward (const re_match_context_t *mctx,
74 re_sift_context_t *sctx);
75 static reg_errcode_t build_sifted_states (const re_match_context_t *mctx,
76 re_sift_context_t *sctx, Idx str_idx,
77 re_node_set *cur_dest);
78 static reg_errcode_t update_cur_sifted_state (const re_match_context_t *mctx,
79 re_sift_context_t *sctx,
80 Idx str_idx,
81 re_node_set *dest_nodes);
82 static reg_errcode_t add_epsilon_src_nodes (const re_dfa_t *dfa,
83 re_node_set *dest_nodes,
84 const re_node_set *candidates);
85 static bool check_dst_limits (const re_match_context_t *mctx,
86 const re_node_set *limits,
87 Idx dst_node, Idx dst_idx, Idx src_node,
88 Idx src_idx);
89 static int check_dst_limits_calc_pos_1 (const re_match_context_t *mctx,
90 int boundaries, Idx subexp_idx,
91 Idx from_node, Idx bkref_idx);
92 static int check_dst_limits_calc_pos (const re_match_context_t *mctx,
93 Idx limit, Idx subexp_idx,
94 Idx node, Idx str_idx,
95 Idx bkref_idx);
96 static reg_errcode_t check_subexp_limits (const re_dfa_t *dfa,
97 re_node_set *dest_nodes,
98 const re_node_set *candidates,
99 re_node_set *limits,
100 struct re_backref_cache_entry *bkref_ents,
101 Idx str_idx);
102 static reg_errcode_t sift_states_bkref (const re_match_context_t *mctx,
103 re_sift_context_t *sctx,
104 Idx str_idx, const re_node_set *candidates);
105 static reg_errcode_t merge_state_array (const re_dfa_t *dfa,
106 re_dfastate_t **dst,
107 re_dfastate_t **src, Idx num);
108 static re_dfastate_t *find_recover_state (reg_errcode_t *err,
109 re_match_context_t *mctx);
110 static re_dfastate_t *transit_state (reg_errcode_t *err,
111 re_match_context_t *mctx,
112 re_dfastate_t *state);
113 static re_dfastate_t *merge_state_with_log (reg_errcode_t *err,
114 re_match_context_t *mctx,
115 re_dfastate_t *next_state);
116 static reg_errcode_t check_subexp_matching_top (re_match_context_t *mctx,
117 re_node_set *cur_nodes,
118 Idx str_idx);
119 #if 0
120 static re_dfastate_t *transit_state_sb (reg_errcode_t *err,
121 re_match_context_t *mctx,
122 re_dfastate_t *pstate);
123 #endif
124 static reg_errcode_t transit_state_mb (re_match_context_t *mctx,
125 re_dfastate_t *pstate);
126 static reg_errcode_t transit_state_bkref (re_match_context_t *mctx,
127 const re_node_set *nodes);
128 static reg_errcode_t get_subexp (re_match_context_t *mctx,
129 Idx bkref_node, Idx bkref_str_idx);
130 static reg_errcode_t get_subexp_sub (re_match_context_t *mctx,
131 const re_sub_match_top_t *sub_top,
132 re_sub_match_last_t *sub_last,
133 Idx bkref_node, Idx bkref_str);
134 static Idx find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
135 Idx subexp_idx, int type);
136 static reg_errcode_t check_arrival (re_match_context_t *mctx,
137 state_array_t *path, Idx top_node,
138 Idx top_str, Idx last_node, Idx last_str,
139 int type);
140 static reg_errcode_t check_arrival_add_next_nodes (re_match_context_t *mctx,
141 Idx str_idx,
142 re_node_set *cur_nodes,
143 re_node_set *next_nodes);
144 static reg_errcode_t check_arrival_expand_ecl (const re_dfa_t *dfa,
145 re_node_set *cur_nodes,
146 Idx ex_subexp, int type);
147 static reg_errcode_t check_arrival_expand_ecl_sub (const re_dfa_t *dfa,
148 re_node_set *dst_nodes,
149 Idx target, Idx ex_subexp,
150 int type);
151 static reg_errcode_t expand_bkref_cache (re_match_context_t *mctx,
152 re_node_set *cur_nodes, Idx cur_str,
153 Idx subexp_num, int type);
154 static bool build_trtable (const re_dfa_t *dfa, re_dfastate_t *state);
155 static int check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
156 const re_string_t *input, Idx idx);
157 #ifdef _LIBC
158 static unsigned int find_collation_sequence_value (const unsigned char *mbs,
159 size_t name_len);
160 #endif
161 static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa,
162 const re_dfastate_t *state,
163 re_node_set *states_node,
164 bitset_t *states_ch);
165 static bool check_node_accept (const re_match_context_t *mctx,
166 const re_token_t *node, Idx idx);
167 static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len);
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186 int
187 regexec (const regex_t *__restrict preg, const char *__restrict string,
188 size_t nmatch, regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
189 {
190 reg_errcode_t err;
191 Idx start, length;
192 re_dfa_t *dfa = preg->buffer;
193
194 if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND))
195 return REG_BADPAT;
196
197 if (eflags & REG_STARTEND)
198 {
199 start = pmatch[0].rm_so;
200 length = pmatch[0].rm_eo;
201 }
202 else
203 {
204 start = 0;
205 length = strlen (string);
206 }
207
208 lock_lock (dfa->lock);
209 if (preg->no_sub)
210 err = re_search_internal (preg, string, length, start, length,
211 length, 0, NULL, eflags);
212 else
213 err = re_search_internal (preg, string, length, start, length,
214 length, nmatch, pmatch, eflags);
215 lock_unlock (dfa->lock);
216 return err != REG_NOERROR;
217 }
218
219 #ifdef _LIBC
220 libc_hidden_def (__regexec)
221
222 # include <shlib-compat.h>
223 versioned_symbol (libc, __regexec, regexec, GLIBC_2_3_4);
224
225 # if SHLIB_COMPAT (libc, GLIBC_2_0, GLIBC_2_3_4)
226 __typeof__ (__regexec) __compat_regexec;
227
228 int
229 attribute_compat_text_section
230 __compat_regexec (const regex_t *__restrict preg,
231 const char *__restrict string, size_t nmatch,
232 regmatch_t pmatch[_REGEX_NELTS (nmatch)], int eflags)
233 {
234 return regexec (preg, string, nmatch, pmatch,
235 eflags & (REG_NOTBOL | REG_NOTEOL));
236 }
237 compat_symbol (libc, __compat_regexec, regexec, GLIBC_2_0);
238 # endif
239 #endif
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270 regoff_t
271 re_match (struct re_pattern_buffer *bufp, const char *string, Idx length,
272 Idx start, struct re_registers *regs)
273 {
274 return re_search_stub (bufp, string, length, start, 0, length, regs, true);
275 }
276 #ifdef _LIBC
277 weak_alias (__re_match, re_match)
278 #endif
279
280 regoff_t
281 re_search (struct re_pattern_buffer *bufp, const char *string, Idx length,
282 Idx start, regoff_t range, struct re_registers *regs)
283 {
284 return re_search_stub (bufp, string, length, start, range, length, regs,
285 false);
286 }
287 #ifdef _LIBC
288 weak_alias (__re_search, re_search)
289 #endif
290
291 regoff_t
292 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
293 const char *string2, Idx length2, Idx start,
294 struct re_registers *regs, Idx stop)
295 {
296 return re_search_2_stub (bufp, string1, length1, string2, length2,
297 start, 0, regs, stop, true);
298 }
299 #ifdef _LIBC
300 weak_alias (__re_match_2, re_match_2)
301 #endif
302
303 regoff_t
304 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, Idx length1,
305 const char *string2, Idx length2, Idx start, regoff_t range,
306 struct re_registers *regs, Idx stop)
307 {
308 return re_search_2_stub (bufp, string1, length1, string2, length2,
309 start, range, regs, stop, false);
310 }
311 #ifdef _LIBC
312 weak_alias (__re_search_2, re_search_2)
313 #endif
314
315 static regoff_t
316 re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1,
317 Idx length1, const char *string2, Idx length2, Idx start,
318 regoff_t range, struct re_registers *regs,
319 Idx stop, bool ret_len)
320 {
321 const char *str;
322 regoff_t rval;
323 Idx len;
324 char *s = NULL;
325
326 if (__glibc_unlikely ((length1 < 0 || length2 < 0 || stop < 0
327 || INT_ADD_WRAPV (length1, length2, &len))))
328 return -2;
329
330
331 if (length2 > 0)
332 if (length1 > 0)
333 {
334 s = re_malloc (char, len);
335
336 if (__glibc_unlikely (s == NULL))
337 return -2;
338 #ifdef _LIBC
339 memcpy (__mempcpy (s, string1, length1), string2, length2);
340 #else
341 memcpy (s, string1, length1);
342 memcpy (s + length1, string2, length2);
343 #endif
344 str = s;
345 }
346 else
347 str = string2;
348 else
349 str = string1;
350
351 rval = re_search_stub (bufp, str, len, start, range, stop, regs,
352 ret_len);
353 re_free (s);
354 return rval;
355 }
356
357
358
359
360
361
362 static regoff_t
363 re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length,
364 Idx start, regoff_t range, Idx stop, struct re_registers *regs,
365 bool ret_len)
366 {
367 reg_errcode_t result;
368 regmatch_t *pmatch;
369 Idx nregs;
370 regoff_t rval;
371 int eflags = 0;
372 re_dfa_t *dfa = bufp->buffer;
373 Idx last_start = start + range;
374
375
376 if (__glibc_unlikely (start < 0 || start > length))
377 return -1;
378 if (__glibc_unlikely (length < last_start
379 || (0 <= range && last_start < start)))
380 last_start = length;
381 else if (__glibc_unlikely (last_start < 0
382 || (range < 0 && start <= last_start)))
383 last_start = 0;
384
385 lock_lock (dfa->lock);
386
387 eflags |= (bufp->not_bol) ? REG_NOTBOL : 0;
388 eflags |= (bufp->not_eol) ? REG_NOTEOL : 0;
389
390
391 if (start < last_start && bufp->fastmap != NULL && !bufp->fastmap_accurate)
392 re_compile_fastmap (bufp);
393
394 if (__glibc_unlikely (bufp->no_sub))
395 regs = NULL;
396
397
398 if (regs == NULL)
399 nregs = 1;
400 else if (__glibc_unlikely (bufp->regs_allocated == REGS_FIXED
401 && regs->num_regs <= bufp->re_nsub))
402 {
403 nregs = regs->num_regs;
404 if (__glibc_unlikely (nregs < 1))
405 {
406
407 regs = NULL;
408 nregs = 1;
409 }
410 }
411 else
412 nregs = bufp->re_nsub + 1;
413 pmatch = re_malloc (regmatch_t, nregs);
414 if (__glibc_unlikely (pmatch == NULL))
415 {
416 rval = -2;
417 goto out;
418 }
419
420 result = re_search_internal (bufp, string, length, start, last_start, stop,
421 nregs, pmatch, eflags);
422
423 rval = 0;
424
425
426 if (result != REG_NOERROR)
427 rval = result == REG_NOMATCH ? -1 : -2;
428 else if (regs != NULL)
429 {
430
431 bufp->regs_allocated = re_copy_regs (regs, pmatch, nregs,
432 bufp->regs_allocated);
433 if (__glibc_unlikely (bufp->regs_allocated == REGS_UNALLOCATED))
434 rval = -2;
435 }
436
437 if (__glibc_likely (rval == 0))
438 {
439 if (ret_len)
440 {
441 DEBUG_ASSERT (pmatch[0].rm_so == start);
442 rval = pmatch[0].rm_eo - start;
443 }
444 else
445 rval = pmatch[0].rm_so;
446 }
447 re_free (pmatch);
448 out:
449 lock_unlock (dfa->lock);
450 return rval;
451 }
452
453 static unsigned
454 re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs,
455 int regs_allocated)
456 {
457 int rval = REGS_REALLOCATE;
458 Idx i;
459 Idx need_regs = nregs + 1;
460
461
462
463
464 if (regs_allocated == REGS_UNALLOCATED)
465 {
466 regs->start = re_malloc (regoff_t, need_regs);
467 if (__glibc_unlikely (regs->start == NULL))
468 return REGS_UNALLOCATED;
469 regs->end = re_malloc (regoff_t, need_regs);
470 if (__glibc_unlikely (regs->end == NULL))
471 {
472 re_free (regs->start);
473 return REGS_UNALLOCATED;
474 }
475 regs->num_regs = need_regs;
476 }
477 else if (regs_allocated == REGS_REALLOCATE)
478 {
479
480
481 if (__glibc_unlikely (need_regs > regs->num_regs))
482 {
483 regoff_t *new_start = re_realloc (regs->start, regoff_t, need_regs);
484 regoff_t *new_end;
485 if (__glibc_unlikely (new_start == NULL))
486 return REGS_UNALLOCATED;
487 new_end = re_realloc (regs->end, regoff_t, need_regs);
488 if (__glibc_unlikely (new_end == NULL))
489 {
490 re_free (new_start);
491 return REGS_UNALLOCATED;
492 }
493 regs->start = new_start;
494 regs->end = new_end;
495 regs->num_regs = need_regs;
496 }
497 }
498 else
499 {
500 DEBUG_ASSERT (regs_allocated == REGS_FIXED);
501
502 DEBUG_ASSERT (nregs <= regs->num_regs);
503 rval = REGS_FIXED;
504 }
505
506
507 for (i = 0; i < nregs; ++i)
508 {
509 regs->start[i] = pmatch[i].rm_so;
510 regs->end[i] = pmatch[i].rm_eo;
511 }
512 for ( ; i < regs->num_regs; ++i)
513 regs->start[i] = regs->end[i] = -1;
514
515 return rval;
516 }
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531 void
532 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
533 __re_size_t num_regs, regoff_t *starts, regoff_t *ends)
534 {
535 if (num_regs)
536 {
537 bufp->regs_allocated = REGS_REALLOCATE;
538 regs->num_regs = num_regs;
539 regs->start = starts;
540 regs->end = ends;
541 }
542 else
543 {
544 bufp->regs_allocated = REGS_UNALLOCATED;
545 regs->num_regs = 0;
546 regs->start = regs->end = NULL;
547 }
548 }
549 #ifdef _LIBC
550 weak_alias (__re_set_registers, re_set_registers)
551 #endif
552
553
554
555
556 #if defined _REGEX_RE_COMP || defined _LIBC
557 int
558 # ifdef _LIBC
559 weak_function
560 # endif
561 re_exec (const char *s)
562 {
563 return 0 == regexec (&re_comp_buf, s, 0, NULL, 0);
564 }
565 #endif
566
567
568
569
570
571
572
573
574
575
576
577
578 static reg_errcode_t
579 __attribute_warn_unused_result__
580 re_search_internal (const regex_t *preg, const char *string, Idx length,
581 Idx start, Idx last_start, Idx stop, size_t nmatch,
582 regmatch_t pmatch[], int eflags)
583 {
584 reg_errcode_t err;
585 const re_dfa_t *dfa = preg->buffer;
586 Idx left_lim, right_lim;
587 int incr;
588 bool fl_longest_match;
589 int match_kind;
590 Idx match_first;
591 Idx match_last = -1;
592 Idx extra_nmatch;
593 bool sb;
594 int ch;
595 re_match_context_t mctx = { .dfa = dfa };
596 char *fastmap = ((preg->fastmap != NULL && preg->fastmap_accurate
597 && start != last_start && !preg->can_be_null)
598 ? preg->fastmap : NULL);
599 RE_TRANSLATE_TYPE t = preg->translate;
600
601 extra_nmatch = (nmatch > preg->re_nsub) ? nmatch - (preg->re_nsub + 1) : 0;
602 nmatch -= extra_nmatch;
603
604
605 if (__glibc_unlikely (preg->used == 0 || dfa->init_state == NULL
606 || dfa->init_state_word == NULL
607 || dfa->init_state_nl == NULL
608 || dfa->init_state_begbuf == NULL))
609 return REG_NOMATCH;
610
611
612 DEBUG_ASSERT (0 <= last_start && last_start <= length);
613
614
615
616
617 if (dfa->init_state->nodes.nelem == 0
618 && dfa->init_state_word->nodes.nelem == 0
619 && (dfa->init_state_nl->nodes.nelem == 0
620 || !preg->newline_anchor))
621 {
622 if (start != 0 && last_start != 0)
623 return REG_NOMATCH;
624 start = last_start = 0;
625 }
626
627
628 fl_longest_match = (nmatch != 0 || dfa->nbackref);
629
630 err = re_string_allocate (&mctx.input, string, length, dfa->nodes_len + 1,
631 preg->translate, (preg->syntax & RE_ICASE) != 0,
632 dfa);
633 if (__glibc_unlikely (err != REG_NOERROR))
634 goto free_return;
635 mctx.input.stop = stop;
636 mctx.input.raw_stop = stop;
637 mctx.input.newline_anchor = preg->newline_anchor;
638
639 err = match_ctx_init (&mctx, eflags, dfa->nbackref * 2);
640 if (__glibc_unlikely (err != REG_NOERROR))
641 goto free_return;
642
643
644
645
646
647 if (nmatch > 1 || dfa->has_mb_node)
648 {
649
650 if (__glibc_unlikely ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
651 <= mctx.input.bufs_len)))
652 {
653 err = REG_ESPACE;
654 goto free_return;
655 }
656
657 mctx.state_log = re_malloc (re_dfastate_t *, mctx.input.bufs_len + 1);
658 if (__glibc_unlikely (mctx.state_log == NULL))
659 {
660 err = REG_ESPACE;
661 goto free_return;
662 }
663 }
664
665 match_first = start;
666 mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
667 : CONTEXT_NEWLINE | CONTEXT_BEGBUF;
668
669
670 incr = (last_start < start) ? -1 : 1;
671 left_lim = (last_start < start) ? last_start : start;
672 right_lim = (last_start < start) ? start : last_start;
673 sb = dfa->mb_cur_max == 1;
674 match_kind =
675 (fastmap
676 ? ((sb || !(preg->syntax & RE_ICASE || t) ? 4 : 0)
677 | (start <= last_start ? 2 : 0)
678 | (t != NULL ? 1 : 0))
679 : 8);
680
681 for (;; match_first += incr)
682 {
683 err = REG_NOMATCH;
684 if (match_first < left_lim || right_lim < match_first)
685 goto free_return;
686
687
688
689
690
691
692 switch (match_kind)
693 {
694 case 8:
695
696 break;
697
698 case 7:
699
700 while (__glibc_likely (match_first < right_lim)
701 && !fastmap[t[(unsigned char) string[match_first]]])
702 ++match_first;
703 goto forward_match_found_start_or_reached_end;
704
705 case 6:
706
707 while (__glibc_likely (match_first < right_lim)
708 && !fastmap[(unsigned char) string[match_first]])
709 ++match_first;
710
711 forward_match_found_start_or_reached_end:
712 if (__glibc_unlikely (match_first == right_lim))
713 {
714 ch = match_first >= length
715 ? 0 : (unsigned char) string[match_first];
716 if (!fastmap[t ? t[ch] : ch])
717 goto free_return;
718 }
719 break;
720
721 case 4:
722 case 5:
723
724 while (match_first >= left_lim)
725 {
726 ch = match_first >= length
727 ? 0 : (unsigned char) string[match_first];
728 if (fastmap[t ? t[ch] : ch])
729 break;
730 --match_first;
731 }
732 if (match_first < left_lim)
733 goto free_return;
734 break;
735
736 default:
737
738
739
740 for (;;)
741 {
742
743
744 __re_size_t offset = match_first - mctx.input.raw_mbs_idx;
745 if (__glibc_unlikely (offset
746 >= (__re_size_t) mctx.input.valid_raw_len))
747 {
748 err = re_string_reconstruct (&mctx.input, match_first,
749 eflags);
750 if (__glibc_unlikely (err != REG_NOERROR))
751 goto free_return;
752
753 offset = match_first - mctx.input.raw_mbs_idx;
754 }
755
756 ch = (offset < mctx.input.valid_len
757 ? re_string_byte_at (&mctx.input, offset) : 0);
758 if (fastmap[ch])
759 break;
760 match_first += incr;
761 if (match_first < left_lim || match_first > right_lim)
762 {
763 err = REG_NOMATCH;
764 goto free_return;
765 }
766 }
767 break;
768 }
769
770
771
772 err = re_string_reconstruct (&mctx.input, match_first, eflags);
773 if (__glibc_unlikely (err != REG_NOERROR))
774 goto free_return;
775
776
777
778 if (!sb && !re_string_first_byte (&mctx.input, 0))
779 continue;
780
781
782
783 mctx.state_log_top = mctx.nbkref_ents = mctx.max_mb_elem_len = 0;
784 match_last = check_matching (&mctx, fl_longest_match,
785 start <= last_start ? &match_first : NULL);
786 if (match_last != -1)
787 {
788 if (__glibc_unlikely (match_last == -2))
789 {
790 err = REG_ESPACE;
791 goto free_return;
792 }
793 else
794 {
795 mctx.match_last = match_last;
796 if ((!preg->no_sub && nmatch > 1) || dfa->nbackref)
797 {
798 re_dfastate_t *pstate = mctx.state_log[match_last];
799 mctx.last_node = check_halt_state_context (&mctx, pstate,
800 match_last);
801 }
802 if ((!preg->no_sub && nmatch > 1 && dfa->has_plural_match)
803 || dfa->nbackref)
804 {
805 err = prune_impossible_nodes (&mctx);
806 if (err == REG_NOERROR)
807 break;
808 if (__glibc_unlikely (err != REG_NOMATCH))
809 goto free_return;
810 match_last = -1;
811 }
812 else
813 break;
814 }
815 }
816
817 match_ctx_clean (&mctx);
818 }
819
820 DEBUG_ASSERT (match_last != -1);
821 DEBUG_ASSERT (err == REG_NOERROR);
822
823
824 if (nmatch > 0)
825 {
826 Idx reg_idx;
827
828
829 for (reg_idx = 1; reg_idx < nmatch; ++reg_idx)
830 pmatch[reg_idx].rm_so = pmatch[reg_idx].rm_eo = -1;
831
832
833 pmatch[0].rm_so = 0;
834 pmatch[0].rm_eo = mctx.match_last;
835
836
837
838
839 if (!preg->no_sub && nmatch > 1)
840 {
841 err = set_regs (preg, &mctx, nmatch, pmatch,
842 dfa->has_plural_match && dfa->nbackref > 0);
843 if (__glibc_unlikely (err != REG_NOERROR))
844 goto free_return;
845 }
846
847
848
849
850 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
851 if (pmatch[reg_idx].rm_so != -1)
852 {
853 if (__glibc_unlikely (mctx.input.offsets_needed != 0))
854 {
855 pmatch[reg_idx].rm_so =
856 (pmatch[reg_idx].rm_so == mctx.input.valid_len
857 ? mctx.input.valid_raw_len
858 : mctx.input.offsets[pmatch[reg_idx].rm_so]);
859 pmatch[reg_idx].rm_eo =
860 (pmatch[reg_idx].rm_eo == mctx.input.valid_len
861 ? mctx.input.valid_raw_len
862 : mctx.input.offsets[pmatch[reg_idx].rm_eo]);
863 }
864 pmatch[reg_idx].rm_so += match_first;
865 pmatch[reg_idx].rm_eo += match_first;
866 }
867 for (reg_idx = 0; reg_idx < extra_nmatch; ++reg_idx)
868 {
869 pmatch[nmatch + reg_idx].rm_so = -1;
870 pmatch[nmatch + reg_idx].rm_eo = -1;
871 }
872
873 if (dfa->subexp_map)
874 for (reg_idx = 0; reg_idx + 1 < nmatch; reg_idx++)
875 if (dfa->subexp_map[reg_idx] != reg_idx)
876 {
877 pmatch[reg_idx + 1].rm_so
878 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_so;
879 pmatch[reg_idx + 1].rm_eo
880 = pmatch[dfa->subexp_map[reg_idx] + 1].rm_eo;
881 }
882 }
883
884 free_return:
885 re_free (mctx.state_log);
886 if (dfa->nbackref)
887 match_ctx_free (&mctx);
888 re_string_destruct (&mctx.input);
889 return err;
890 }
891
892 static reg_errcode_t
893 __attribute_warn_unused_result__
894 prune_impossible_nodes (re_match_context_t *mctx)
895 {
896 const re_dfa_t *const dfa = mctx->dfa;
897 Idx halt_node, match_last;
898 reg_errcode_t ret;
899 re_dfastate_t **sifted_states;
900 re_dfastate_t **lim_states = NULL;
901 re_sift_context_t sctx;
902 DEBUG_ASSERT (mctx->state_log != NULL);
903 match_last = mctx->match_last;
904 halt_node = mctx->last_node;
905
906
907 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *))
908 <= match_last))
909 return REG_ESPACE;
910
911 sifted_states = re_malloc (re_dfastate_t *, match_last + 1);
912 if (__glibc_unlikely (sifted_states == NULL))
913 {
914 ret = REG_ESPACE;
915 goto free_return;
916 }
917 if (dfa->nbackref)
918 {
919 lim_states = re_malloc (re_dfastate_t *, match_last + 1);
920 if (__glibc_unlikely (lim_states == NULL))
921 {
922 ret = REG_ESPACE;
923 goto free_return;
924 }
925 while (1)
926 {
927 memset (lim_states, '\0',
928 sizeof (re_dfastate_t *) * (match_last + 1));
929 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node,
930 match_last);
931 ret = sift_states_backward (mctx, &sctx);
932 re_node_set_free (&sctx.limits);
933 if (__glibc_unlikely (ret != REG_NOERROR))
934 goto free_return;
935 if (sifted_states[0] != NULL || lim_states[0] != NULL)
936 break;
937 do
938 {
939 --match_last;
940 if (match_last < 0)
941 {
942 ret = REG_NOMATCH;
943 goto free_return;
944 }
945 } while (mctx->state_log[match_last] == NULL
946 || !mctx->state_log[match_last]->halt);
947 halt_node = check_halt_state_context (mctx,
948 mctx->state_log[match_last],
949 match_last);
950 }
951 ret = merge_state_array (dfa, sifted_states, lim_states,
952 match_last + 1);
953 re_free (lim_states);
954 lim_states = NULL;
955 if (__glibc_unlikely (ret != REG_NOERROR))
956 goto free_return;
957 }
958 else
959 {
960 sift_ctx_init (&sctx, sifted_states, lim_states, halt_node, match_last);
961 ret = sift_states_backward (mctx, &sctx);
962 re_node_set_free (&sctx.limits);
963 if (__glibc_unlikely (ret != REG_NOERROR))
964 goto free_return;
965 if (sifted_states[0] == NULL)
966 {
967 ret = REG_NOMATCH;
968 goto free_return;
969 }
970 }
971 re_free (mctx->state_log);
972 mctx->state_log = sifted_states;
973 sifted_states = NULL;
974 mctx->last_node = halt_node;
975 mctx->match_last = match_last;
976 ret = REG_NOERROR;
977 free_return:
978 re_free (sifted_states);
979 re_free (lim_states);
980 return ret;
981 }
982
983
984
985
986
987 static __always_inline re_dfastate_t *
988 acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx,
989 Idx idx)
990 {
991 const re_dfa_t *const dfa = mctx->dfa;
992 if (dfa->init_state->has_constraint)
993 {
994 unsigned int context;
995 context = re_string_context_at (&mctx->input, idx - 1, mctx->eflags);
996 if (IS_WORD_CONTEXT (context))
997 return dfa->init_state_word;
998 else if (IS_ORDINARY_CONTEXT (context))
999 return dfa->init_state;
1000 else if (IS_BEGBUF_CONTEXT (context) && IS_NEWLINE_CONTEXT (context))
1001 return dfa->init_state_begbuf;
1002 else if (IS_NEWLINE_CONTEXT (context))
1003 return dfa->init_state_nl;
1004 else if (IS_BEGBUF_CONTEXT (context))
1005 {
1006
1007 return re_acquire_state_context (err, dfa,
1008 dfa->init_state->entrance_nodes,
1009 context);
1010 }
1011 else
1012
1013 return dfa->init_state;
1014 }
1015 else
1016 return dfa->init_state;
1017 }
1018
1019
1020
1021
1022
1023
1024
1025
1026
1027
1028 static Idx
1029 __attribute_warn_unused_result__
1030 check_matching (re_match_context_t *mctx, bool fl_longest_match,
1031 Idx *p_match_first)
1032 {
1033 const re_dfa_t *const dfa = mctx->dfa;
1034 reg_errcode_t err;
1035 Idx match = 0;
1036 Idx match_last = -1;
1037 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
1038 re_dfastate_t *cur_state;
1039 bool at_init_state = p_match_first != NULL;
1040 Idx next_start_idx = cur_str_idx;
1041
1042 err = REG_NOERROR;
1043 cur_state = acquire_init_state_context (&err, mctx, cur_str_idx);
1044
1045 if (__glibc_unlikely (cur_state == NULL))
1046 {
1047 DEBUG_ASSERT (err == REG_ESPACE);
1048 return -2;
1049 }
1050
1051 if (mctx->state_log != NULL)
1052 {
1053 mctx->state_log[cur_str_idx] = cur_state;
1054
1055
1056
1057 if (__glibc_unlikely (dfa->nbackref))
1058 {
1059 at_init_state = false;
1060 err = check_subexp_matching_top (mctx, &cur_state->nodes, 0);
1061 if (__glibc_unlikely (err != REG_NOERROR))
1062 return err;
1063
1064 if (cur_state->has_backref)
1065 {
1066 err = transit_state_bkref (mctx, &cur_state->nodes);
1067 if (__glibc_unlikely (err != REG_NOERROR))
1068 return err;
1069 }
1070 }
1071 }
1072
1073
1074 if (__glibc_unlikely (cur_state->halt))
1075 {
1076 if (!cur_state->has_constraint
1077 || check_halt_state_context (mctx, cur_state, cur_str_idx))
1078 {
1079 if (!fl_longest_match)
1080 return cur_str_idx;
1081 else
1082 {
1083 match_last = cur_str_idx;
1084 match = 1;
1085 }
1086 }
1087 }
1088
1089 while (!re_string_eoi (&mctx->input))
1090 {
1091 re_dfastate_t *old_state = cur_state;
1092 Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1;
1093
1094 if ((__glibc_unlikely (next_char_idx >= mctx->input.bufs_len)
1095 && mctx->input.bufs_len < mctx->input.len)
1096 || (__glibc_unlikely (next_char_idx >= mctx->input.valid_len)
1097 && mctx->input.valid_len < mctx->input.len))
1098 {
1099 err = extend_buffers (mctx, next_char_idx + 1);
1100 if (__glibc_unlikely (err != REG_NOERROR))
1101 {
1102 DEBUG_ASSERT (err == REG_ESPACE);
1103 return -2;
1104 }
1105 }
1106
1107 cur_state = transit_state (&err, mctx, cur_state);
1108 if (mctx->state_log != NULL)
1109 cur_state = merge_state_with_log (&err, mctx, cur_state);
1110
1111 if (cur_state == NULL)
1112 {
1113
1114
1115
1116 if (__glibc_unlikely (err != REG_NOERROR))
1117 return -2;
1118
1119 if (mctx->state_log == NULL
1120 || (match && !fl_longest_match)
1121 || (cur_state = find_recover_state (&err, mctx)) == NULL)
1122 break;
1123 }
1124
1125 if (__glibc_unlikely (at_init_state))
1126 {
1127 if (old_state == cur_state)
1128 next_start_idx = next_char_idx;
1129 else
1130 at_init_state = false;
1131 }
1132
1133 if (cur_state->halt)
1134 {
1135
1136
1137 if (!cur_state->has_constraint
1138 || check_halt_state_context (mctx, cur_state,
1139 re_string_cur_idx (&mctx->input)))
1140 {
1141
1142 match_last = re_string_cur_idx (&mctx->input);
1143 match = 1;
1144
1145
1146 p_match_first = NULL;
1147 if (!fl_longest_match)
1148 break;
1149 }
1150 }
1151 }
1152
1153 if (p_match_first)
1154 *p_match_first += next_start_idx;
1155
1156 return match_last;
1157 }
1158
1159
1160
1161 static bool
1162 check_halt_node_context (const re_dfa_t *dfa, Idx node, unsigned int context)
1163 {
1164 re_token_type_t type = dfa->nodes[node].type;
1165 unsigned int constraint = dfa->nodes[node].constraint;
1166 if (type != END_OF_RE)
1167 return false;
1168 if (!constraint)
1169 return true;
1170 if (NOT_SATISFY_NEXT_CONSTRAINT (constraint, context))
1171 return false;
1172 return true;
1173 }
1174
1175
1176
1177
1178
1179 static Idx
1180 check_halt_state_context (const re_match_context_t *mctx,
1181 const re_dfastate_t *state, Idx idx)
1182 {
1183 Idx i;
1184 unsigned int context;
1185 DEBUG_ASSERT (state->halt);
1186 context = re_string_context_at (&mctx->input, idx, mctx->eflags);
1187 for (i = 0; i < state->nodes.nelem; ++i)
1188 if (check_halt_node_context (mctx->dfa, state->nodes.elems[i], context))
1189 return state->nodes.elems[i];
1190 return 0;
1191 }
1192
1193
1194
1195
1196
1197
1198 static Idx
1199 proceed_next_node (const re_match_context_t *mctx, Idx nregs, regmatch_t *regs,
1200 regmatch_t *prevregs,
1201 Idx *pidx, Idx node, re_node_set *eps_via_nodes,
1202 struct re_fail_stack_t *fs)
1203 {
1204 const re_dfa_t *const dfa = mctx->dfa;
1205 if (IS_EPSILON_NODE (dfa->nodes[node].type))
1206 {
1207 re_node_set *cur_nodes = &mctx->state_log[*pidx]->nodes;
1208 re_node_set *edests = &dfa->edests[node];
1209
1210 if (! re_node_set_contains (eps_via_nodes, node))
1211 {
1212 bool ok = re_node_set_insert (eps_via_nodes, node);
1213 if (__glibc_unlikely (! ok))
1214 return -2;
1215 }
1216
1217
1218 Idx dest_node = -1;
1219 for (Idx i = 0; i < edests->nelem; i++)
1220 {
1221 Idx candidate = edests->elems[i];
1222 if (!re_node_set_contains (cur_nodes, candidate))
1223 continue;
1224 if (dest_node == -1)
1225 dest_node = candidate;
1226
1227 else
1228 {
1229
1230
1231 if (re_node_set_contains (eps_via_nodes, dest_node))
1232 return candidate;
1233
1234
1235 else if (fs != NULL
1236 && push_fail_stack (fs, *pidx, candidate, nregs, regs,
1237 prevregs, eps_via_nodes))
1238 return -2;
1239
1240
1241 break;
1242 }
1243 }
1244 return dest_node;
1245 }
1246 else
1247 {
1248 Idx naccepted = 0;
1249 re_token_type_t type = dfa->nodes[node].type;
1250
1251 if (dfa->nodes[node].accept_mb)
1252 naccepted = check_node_accept_bytes (dfa, node, &mctx->input, *pidx);
1253 else if (type == OP_BACK_REF)
1254 {
1255 Idx subexp_idx = dfa->nodes[node].opr.idx + 1;
1256 if (subexp_idx < nregs)
1257 naccepted = regs[subexp_idx].rm_eo - regs[subexp_idx].rm_so;
1258 if (fs != NULL)
1259 {
1260 if (subexp_idx >= nregs
1261 || regs[subexp_idx].rm_so == -1
1262 || regs[subexp_idx].rm_eo == -1)
1263 return -1;
1264 else if (naccepted)
1265 {
1266 char *buf = (char *) re_string_get_buffer (&mctx->input);
1267 if (mctx->input.valid_len - *pidx < naccepted
1268 || (memcmp (buf + regs[subexp_idx].rm_so, buf + *pidx,
1269 naccepted)
1270 != 0))
1271 return -1;
1272 }
1273 }
1274
1275 if (naccepted == 0)
1276 {
1277 Idx dest_node;
1278 bool ok = re_node_set_insert (eps_via_nodes, node);
1279 if (__glibc_unlikely (! ok))
1280 return -2;
1281 dest_node = dfa->edests[node].elems[0];
1282 if (re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1283 dest_node))
1284 return dest_node;
1285 }
1286 }
1287
1288 if (naccepted != 0
1289 || check_node_accept (mctx, dfa->nodes + node, *pidx))
1290 {
1291 Idx dest_node = dfa->nexts[node];
1292 *pidx = (naccepted == 0) ? *pidx + 1 : *pidx + naccepted;
1293 if (fs && (*pidx > mctx->match_last || mctx->state_log[*pidx] == NULL
1294 || !re_node_set_contains (&mctx->state_log[*pidx]->nodes,
1295 dest_node)))
1296 return -1;
1297 re_node_set_empty (eps_via_nodes);
1298 return dest_node;
1299 }
1300 }
1301 return -1;
1302 }
1303
1304 static reg_errcode_t
1305 __attribute_warn_unused_result__
1306 push_fail_stack (struct re_fail_stack_t *fs, Idx str_idx, Idx dest_node,
1307 Idx nregs, regmatch_t *regs, regmatch_t *prevregs,
1308 re_node_set *eps_via_nodes)
1309 {
1310 reg_errcode_t err;
1311 Idx num = fs->num;
1312 if (num == fs->alloc)
1313 {
1314 struct re_fail_stack_ent_t *new_array;
1315 new_array = re_realloc (fs->stack, struct re_fail_stack_ent_t,
1316 fs->alloc * 2);
1317 if (new_array == NULL)
1318 return REG_ESPACE;
1319 fs->alloc *= 2;
1320 fs->stack = new_array;
1321 }
1322 fs->stack[num].idx = str_idx;
1323 fs->stack[num].node = dest_node;
1324 fs->stack[num].regs = re_malloc (regmatch_t, 2 * nregs);
1325 if (fs->stack[num].regs == NULL)
1326 return REG_ESPACE;
1327 fs->num = num + 1;
1328 memcpy (fs->stack[num].regs, regs, sizeof (regmatch_t) * nregs);
1329 memcpy (fs->stack[num].regs + nregs, prevregs, sizeof (regmatch_t) * nregs);
1330 err = re_node_set_init_copy (&fs->stack[num].eps_via_nodes, eps_via_nodes);
1331 return err;
1332 }
1333
1334 static Idx
1335 pop_fail_stack (struct re_fail_stack_t *fs, Idx *pidx, Idx nregs,
1336 regmatch_t *regs, regmatch_t *prevregs,
1337 re_node_set *eps_via_nodes)
1338 {
1339 if (fs == NULL || fs->num == 0)
1340 return -1;
1341 Idx num = --fs->num;
1342 *pidx = fs->stack[num].idx;
1343 memcpy (regs, fs->stack[num].regs, sizeof (regmatch_t) * nregs);
1344 memcpy (prevregs, fs->stack[num].regs + nregs, sizeof (regmatch_t) * nregs);
1345 re_node_set_free (eps_via_nodes);
1346 re_free (fs->stack[num].regs);
1347 *eps_via_nodes = fs->stack[num].eps_via_nodes;
1348 DEBUG_ASSERT (0 <= fs->stack[num].node);
1349 return fs->stack[num].node;
1350 }
1351
1352
1353 #define DYNARRAY_STRUCT regmatch_list
1354 #define DYNARRAY_ELEMENT regmatch_t
1355 #define DYNARRAY_PREFIX regmatch_list_
1356 #include <malloc/dynarray-skeleton.c>
1357
1358
1359
1360
1361
1362
1363 static reg_errcode_t
1364 __attribute_warn_unused_result__
1365 set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch,
1366 regmatch_t *pmatch, bool fl_backtrack)
1367 {
1368 const re_dfa_t *dfa = preg->buffer;
1369 Idx idx, cur_node;
1370 re_node_set eps_via_nodes;
1371 struct re_fail_stack_t *fs;
1372 struct re_fail_stack_t fs_body = { 0, 2, NULL };
1373 struct regmatch_list prev_match;
1374 regmatch_list_init (&prev_match);
1375
1376 DEBUG_ASSERT (nmatch > 1);
1377 DEBUG_ASSERT (mctx->state_log != NULL);
1378 if (fl_backtrack)
1379 {
1380 fs = &fs_body;
1381 fs->stack = re_malloc (struct re_fail_stack_ent_t, fs->alloc);
1382 if (fs->stack == NULL)
1383 return REG_ESPACE;
1384 }
1385 else
1386 fs = NULL;
1387
1388 cur_node = dfa->init_node;
1389 re_node_set_init_empty (&eps_via_nodes);
1390
1391 if (!regmatch_list_resize (&prev_match, nmatch))
1392 {
1393 regmatch_list_free (&prev_match);
1394 free_fail_stack_return (fs);
1395 return REG_ESPACE;
1396 }
1397 regmatch_t *prev_idx_match = regmatch_list_begin (&prev_match);
1398 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1399
1400 for (idx = pmatch[0].rm_so; idx <= pmatch[0].rm_eo ;)
1401 {
1402 update_regs (dfa, pmatch, prev_idx_match, cur_node, idx, nmatch);
1403
1404 if ((idx == pmatch[0].rm_eo && cur_node == mctx->last_node)
1405 || (fs && re_node_set_contains (&eps_via_nodes, cur_node)))
1406 {
1407 Idx reg_idx;
1408 cur_node = -1;
1409 if (fs)
1410 {
1411 for (reg_idx = 0; reg_idx < nmatch; ++reg_idx)
1412 if (pmatch[reg_idx].rm_so > -1 && pmatch[reg_idx].rm_eo == -1)
1413 {
1414 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1415 prev_idx_match, &eps_via_nodes);
1416 break;
1417 }
1418 }
1419 if (cur_node < 0)
1420 {
1421 re_node_set_free (&eps_via_nodes);
1422 regmatch_list_free (&prev_match);
1423 return free_fail_stack_return (fs);
1424 }
1425 }
1426
1427
1428 cur_node = proceed_next_node (mctx, nmatch, pmatch, prev_idx_match,
1429 &idx, cur_node,
1430 &eps_via_nodes, fs);
1431
1432 if (__glibc_unlikely (cur_node < 0))
1433 {
1434 if (__glibc_unlikely (cur_node == -2))
1435 {
1436 re_node_set_free (&eps_via_nodes);
1437 regmatch_list_free (&prev_match);
1438 free_fail_stack_return (fs);
1439 return REG_ESPACE;
1440 }
1441 cur_node = pop_fail_stack (fs, &idx, nmatch, pmatch,
1442 prev_idx_match, &eps_via_nodes);
1443 if (cur_node < 0)
1444 {
1445 re_node_set_free (&eps_via_nodes);
1446 regmatch_list_free (&prev_match);
1447 free_fail_stack_return (fs);
1448 return REG_NOMATCH;
1449 }
1450 }
1451 }
1452 re_node_set_free (&eps_via_nodes);
1453 regmatch_list_free (&prev_match);
1454 return free_fail_stack_return (fs);
1455 }
1456
1457 static reg_errcode_t
1458 free_fail_stack_return (struct re_fail_stack_t *fs)
1459 {
1460 if (fs)
1461 {
1462 Idx fs_idx;
1463 for (fs_idx = 0; fs_idx < fs->num; ++fs_idx)
1464 {
1465 re_node_set_free (&fs->stack[fs_idx].eps_via_nodes);
1466 re_free (fs->stack[fs_idx].regs);
1467 }
1468 re_free (fs->stack);
1469 }
1470 return REG_NOERROR;
1471 }
1472
1473 static void
1474 update_regs (const re_dfa_t *dfa, regmatch_t *pmatch,
1475 regmatch_t *prev_idx_match, Idx cur_node, Idx cur_idx, Idx nmatch)
1476 {
1477 int type = dfa->nodes[cur_node].type;
1478 if (type == OP_OPEN_SUBEXP)
1479 {
1480 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1481
1482
1483 if (reg_num < nmatch)
1484 {
1485 pmatch[reg_num].rm_so = cur_idx;
1486 pmatch[reg_num].rm_eo = -1;
1487 }
1488 }
1489 else if (type == OP_CLOSE_SUBEXP)
1490 {
1491
1492 Idx reg_num = dfa->nodes[cur_node].opr.idx + 1;
1493 if (reg_num < nmatch)
1494 {
1495 if (pmatch[reg_num].rm_so < cur_idx)
1496 {
1497 pmatch[reg_num].rm_eo = cur_idx;
1498
1499
1500 memcpy (prev_idx_match, pmatch, sizeof (regmatch_t) * nmatch);
1501 }
1502 else
1503 {
1504 if (dfa->nodes[cur_node].opt_subexp
1505 && prev_idx_match[reg_num].rm_so != -1)
1506
1507
1508
1509
1510
1511 memcpy (pmatch, prev_idx_match, sizeof (regmatch_t) * nmatch);
1512 else
1513
1514
1515 pmatch[reg_num].rm_eo = cur_idx;
1516 }
1517 }
1518 }
1519 }
1520
1521
1522
1523
1524
1525
1526
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541 #define STATE_NODE_CONTAINS(state,node) \
1542 ((state) != NULL && re_node_set_contains (&(state)->nodes, node))
1543
1544 static reg_errcode_t
1545 sift_states_backward (const re_match_context_t *mctx, re_sift_context_t *sctx)
1546 {
1547 reg_errcode_t err;
1548 int null_cnt = 0;
1549 Idx str_idx = sctx->last_str_idx;
1550 re_node_set cur_dest;
1551
1552 DEBUG_ASSERT (mctx->state_log != NULL && mctx->state_log[str_idx] != NULL);
1553
1554
1555
1556 err = re_node_set_init_1 (&cur_dest, sctx->last_node);
1557 if (__glibc_unlikely (err != REG_NOERROR))
1558 return err;
1559 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1560 if (__glibc_unlikely (err != REG_NOERROR))
1561 goto free_return;
1562
1563
1564 while (str_idx > 0)
1565 {
1566
1567 null_cnt = (sctx->sifted_states[str_idx] == NULL) ? null_cnt + 1 : 0;
1568 if (null_cnt > mctx->max_mb_elem_len)
1569 {
1570 memset (sctx->sifted_states, '\0',
1571 sizeof (re_dfastate_t *) * str_idx);
1572 re_node_set_free (&cur_dest);
1573 return REG_NOERROR;
1574 }
1575 re_node_set_empty (&cur_dest);
1576 --str_idx;
1577
1578 if (mctx->state_log[str_idx])
1579 {
1580 err = build_sifted_states (mctx, sctx, str_idx, &cur_dest);
1581 if (__glibc_unlikely (err != REG_NOERROR))
1582 goto free_return;
1583 }
1584
1585
1586
1587
1588
1589 err = update_cur_sifted_state (mctx, sctx, str_idx, &cur_dest);
1590 if (__glibc_unlikely (err != REG_NOERROR))
1591 goto free_return;
1592 }
1593 err = REG_NOERROR;
1594 free_return:
1595 re_node_set_free (&cur_dest);
1596 return err;
1597 }
1598
1599 static reg_errcode_t
1600 __attribute_warn_unused_result__
1601 build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx,
1602 Idx str_idx, re_node_set *cur_dest)
1603 {
1604 const re_dfa_t *const dfa = mctx->dfa;
1605 const re_node_set *cur_src = &mctx->state_log[str_idx]->non_eps_nodes;
1606 Idx i;
1607
1608
1609
1610
1611
1612
1613
1614
1615 for (i = 0; i < cur_src->nelem; i++)
1616 {
1617 Idx prev_node = cur_src->elems[i];
1618 int naccepted = 0;
1619 bool ok;
1620 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[prev_node].type));
1621
1622
1623 if (dfa->nodes[prev_node].accept_mb)
1624 naccepted = sift_states_iter_mb (mctx, sctx, prev_node,
1625 str_idx, sctx->last_str_idx);
1626
1627
1628
1629 if (!naccepted
1630 && check_node_accept (mctx, dfa->nodes + prev_node, str_idx)
1631 && STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + 1],
1632 dfa->nexts[prev_node]))
1633 naccepted = 1;
1634
1635 if (naccepted == 0)
1636 continue;
1637
1638 if (sctx->limits.nelem)
1639 {
1640 Idx to_idx = str_idx + naccepted;
1641 if (check_dst_limits (mctx, &sctx->limits,
1642 dfa->nexts[prev_node], to_idx,
1643 prev_node, str_idx))
1644 continue;
1645 }
1646 ok = re_node_set_insert (cur_dest, prev_node);
1647 if (__glibc_unlikely (! ok))
1648 return REG_ESPACE;
1649 }
1650
1651 return REG_NOERROR;
1652 }
1653
1654
1655
1656 static reg_errcode_t
1657 clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx)
1658 {
1659 Idx top = mctx->state_log_top;
1660
1661 if ((next_state_log_idx >= mctx->input.bufs_len
1662 && mctx->input.bufs_len < mctx->input.len)
1663 || (next_state_log_idx >= mctx->input.valid_len
1664 && mctx->input.valid_len < mctx->input.len))
1665 {
1666 reg_errcode_t err;
1667 err = extend_buffers (mctx, next_state_log_idx + 1);
1668 if (__glibc_unlikely (err != REG_NOERROR))
1669 return err;
1670 }
1671
1672 if (top < next_state_log_idx)
1673 {
1674 DEBUG_ASSERT (mctx->state_log != NULL);
1675 memset (mctx->state_log + top + 1, '\0',
1676 sizeof (re_dfastate_t *) * (next_state_log_idx - top));
1677 mctx->state_log_top = next_state_log_idx;
1678 }
1679 return REG_NOERROR;
1680 }
1681
1682 static reg_errcode_t
1683 merge_state_array (const re_dfa_t *dfa, re_dfastate_t **dst,
1684 re_dfastate_t **src, Idx num)
1685 {
1686 Idx st_idx;
1687 reg_errcode_t err;
1688 for (st_idx = 0; st_idx < num; ++st_idx)
1689 {
1690 if (dst[st_idx] == NULL)
1691 dst[st_idx] = src[st_idx];
1692 else if (src[st_idx] != NULL)
1693 {
1694 re_node_set merged_set;
1695 err = re_node_set_init_union (&merged_set, &dst[st_idx]->nodes,
1696 &src[st_idx]->nodes);
1697 if (__glibc_unlikely (err != REG_NOERROR))
1698 return err;
1699 dst[st_idx] = re_acquire_state (&err, dfa, &merged_set);
1700 re_node_set_free (&merged_set);
1701 if (__glibc_unlikely (err != REG_NOERROR))
1702 return err;
1703 }
1704 }
1705 return REG_NOERROR;
1706 }
1707
1708 static reg_errcode_t
1709 update_cur_sifted_state (const re_match_context_t *mctx,
1710 re_sift_context_t *sctx, Idx str_idx,
1711 re_node_set *dest_nodes)
1712 {
1713 const re_dfa_t *const dfa = mctx->dfa;
1714 reg_errcode_t err = REG_NOERROR;
1715 const re_node_set *candidates;
1716 candidates = ((mctx->state_log[str_idx] == NULL) ? NULL
1717 : &mctx->state_log[str_idx]->nodes);
1718
1719 if (dest_nodes->nelem == 0)
1720 sctx->sifted_states[str_idx] = NULL;
1721 else
1722 {
1723 if (candidates)
1724 {
1725
1726
1727 err = add_epsilon_src_nodes (dfa, dest_nodes, candidates);
1728 if (__glibc_unlikely (err != REG_NOERROR))
1729 return err;
1730
1731
1732 if (sctx->limits.nelem)
1733 {
1734 err = check_subexp_limits (dfa, dest_nodes, candidates, &sctx->limits,
1735 mctx->bkref_ents, str_idx);
1736 if (__glibc_unlikely (err != REG_NOERROR))
1737 return err;
1738 }
1739 }
1740
1741 sctx->sifted_states[str_idx] = re_acquire_state (&err, dfa, dest_nodes);
1742 if (__glibc_unlikely (err != REG_NOERROR))
1743 return err;
1744 }
1745
1746 if (candidates && mctx->state_log[str_idx]->has_backref)
1747 {
1748 err = sift_states_bkref (mctx, sctx, str_idx, candidates);
1749 if (__glibc_unlikely (err != REG_NOERROR))
1750 return err;
1751 }
1752 return REG_NOERROR;
1753 }
1754
1755 static reg_errcode_t
1756 __attribute_warn_unused_result__
1757 add_epsilon_src_nodes (const re_dfa_t *dfa, re_node_set *dest_nodes,
1758 const re_node_set *candidates)
1759 {
1760 reg_errcode_t err = REG_NOERROR;
1761 Idx i;
1762
1763 re_dfastate_t *state = re_acquire_state (&err, dfa, dest_nodes);
1764 if (__glibc_unlikely (err != REG_NOERROR))
1765 return err;
1766
1767 if (!state->inveclosure.alloc)
1768 {
1769 err = re_node_set_alloc (&state->inveclosure, dest_nodes->nelem);
1770 if (__glibc_unlikely (err != REG_NOERROR))
1771 return REG_ESPACE;
1772 for (i = 0; i < dest_nodes->nelem; i++)
1773 {
1774 err = re_node_set_merge (&state->inveclosure,
1775 dfa->inveclosures + dest_nodes->elems[i]);
1776 if (__glibc_unlikely (err != REG_NOERROR))
1777 return REG_ESPACE;
1778 }
1779 }
1780 return re_node_set_add_intersect (dest_nodes, candidates,
1781 &state->inveclosure);
1782 }
1783
1784 static reg_errcode_t
1785 sub_epsilon_src_nodes (const re_dfa_t *dfa, Idx node, re_node_set *dest_nodes,
1786 const re_node_set *candidates)
1787 {
1788 Idx ecl_idx;
1789 reg_errcode_t err;
1790 re_node_set *inv_eclosure = dfa->inveclosures + node;
1791 re_node_set except_nodes;
1792 re_node_set_init_empty (&except_nodes);
1793 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1794 {
1795 Idx cur_node = inv_eclosure->elems[ecl_idx];
1796 if (cur_node == node)
1797 continue;
1798 if (IS_EPSILON_NODE (dfa->nodes[cur_node].type))
1799 {
1800 Idx edst1 = dfa->edests[cur_node].elems[0];
1801 Idx edst2 = ((dfa->edests[cur_node].nelem > 1)
1802 ? dfa->edests[cur_node].elems[1] : -1);
1803 if ((!re_node_set_contains (inv_eclosure, edst1)
1804 && re_node_set_contains (dest_nodes, edst1))
1805 || (edst2 > 0
1806 && !re_node_set_contains (inv_eclosure, edst2)
1807 && re_node_set_contains (dest_nodes, edst2)))
1808 {
1809 err = re_node_set_add_intersect (&except_nodes, candidates,
1810 dfa->inveclosures + cur_node);
1811 if (__glibc_unlikely (err != REG_NOERROR))
1812 {
1813 re_node_set_free (&except_nodes);
1814 return err;
1815 }
1816 }
1817 }
1818 }
1819 for (ecl_idx = 0; ecl_idx < inv_eclosure->nelem; ++ecl_idx)
1820 {
1821 Idx cur_node = inv_eclosure->elems[ecl_idx];
1822 if (!re_node_set_contains (&except_nodes, cur_node))
1823 {
1824 Idx idx = re_node_set_contains (dest_nodes, cur_node) - 1;
1825 re_node_set_remove_at (dest_nodes, idx);
1826 }
1827 }
1828 re_node_set_free (&except_nodes);
1829 return REG_NOERROR;
1830 }
1831
1832 static bool
1833 check_dst_limits (const re_match_context_t *mctx, const re_node_set *limits,
1834 Idx dst_node, Idx dst_idx, Idx src_node, Idx src_idx)
1835 {
1836 const re_dfa_t *const dfa = mctx->dfa;
1837 Idx lim_idx, src_pos, dst_pos;
1838
1839 Idx dst_bkref_idx = search_cur_bkref_entry (mctx, dst_idx);
1840 Idx src_bkref_idx = search_cur_bkref_entry (mctx, src_idx);
1841 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1842 {
1843 Idx subexp_idx;
1844 struct re_backref_cache_entry *ent;
1845 ent = mctx->bkref_ents + limits->elems[lim_idx];
1846 subexp_idx = dfa->nodes[ent->node].opr.idx;
1847
1848 dst_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1849 subexp_idx, dst_node, dst_idx,
1850 dst_bkref_idx);
1851 src_pos = check_dst_limits_calc_pos (mctx, limits->elems[lim_idx],
1852 subexp_idx, src_node, src_idx,
1853 src_bkref_idx);
1854
1855
1856
1857
1858
1859 if (src_pos == dst_pos)
1860 continue;
1861 else
1862 return true;
1863 }
1864 return false;
1865 }
1866
1867 static int
1868 check_dst_limits_calc_pos_1 (const re_match_context_t *mctx, int boundaries,
1869 Idx subexp_idx, Idx from_node, Idx bkref_idx)
1870 {
1871 const re_dfa_t *const dfa = mctx->dfa;
1872 const re_node_set *eclosures = dfa->eclosures + from_node;
1873 Idx node_idx;
1874
1875
1876
1877 for (node_idx = 0; node_idx < eclosures->nelem; ++node_idx)
1878 {
1879 Idx node = eclosures->elems[node_idx];
1880 switch (dfa->nodes[node].type)
1881 {
1882 case OP_BACK_REF:
1883 if (bkref_idx != -1)
1884 {
1885 struct re_backref_cache_entry *ent = mctx->bkref_ents + bkref_idx;
1886 do
1887 {
1888 Idx dst;
1889 int cpos;
1890
1891 if (ent->node != node)
1892 continue;
1893
1894 if (subexp_idx < BITSET_WORD_BITS
1895 && !(ent->eps_reachable_subexps_map
1896 & ((bitset_word_t) 1 << subexp_idx)))
1897 continue;
1898
1899
1900
1901
1902
1903
1904
1905 dst = dfa->edests[node].elems[0];
1906 if (dst == from_node)
1907 {
1908 if (boundaries & 1)
1909 return -1;
1910 else
1911 return 0;
1912 }
1913
1914 cpos =
1915 check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1916 dst, bkref_idx);
1917 if (cpos == -1 )
1918 return -1;
1919 if (cpos == 0 && (boundaries & 2))
1920 return 0;
1921
1922 if (subexp_idx < BITSET_WORD_BITS)
1923 ent->eps_reachable_subexps_map
1924 &= ~((bitset_word_t) 1 << subexp_idx);
1925 }
1926 while (ent++->more);
1927 }
1928 break;
1929
1930 case OP_OPEN_SUBEXP:
1931 if ((boundaries & 1) && subexp_idx == dfa->nodes[node].opr.idx)
1932 return -1;
1933 break;
1934
1935 case OP_CLOSE_SUBEXP:
1936 if ((boundaries & 2) && subexp_idx == dfa->nodes[node].opr.idx)
1937 return 0;
1938 break;
1939
1940 default:
1941 break;
1942 }
1943 }
1944
1945 return (boundaries & 2) ? 1 : 0;
1946 }
1947
1948 static int
1949 check_dst_limits_calc_pos (const re_match_context_t *mctx, Idx limit,
1950 Idx subexp_idx, Idx from_node, Idx str_idx,
1951 Idx bkref_idx)
1952 {
1953 struct re_backref_cache_entry *lim = mctx->bkref_ents + limit;
1954 int boundaries;
1955
1956
1957 if (str_idx < lim->subexp_from)
1958 return -1;
1959
1960 if (lim->subexp_to < str_idx)
1961 return 1;
1962
1963
1964 boundaries = (str_idx == lim->subexp_from);
1965 boundaries |= (str_idx == lim->subexp_to) << 1;
1966 if (boundaries == 0)
1967 return 0;
1968
1969
1970 return check_dst_limits_calc_pos_1 (mctx, boundaries, subexp_idx,
1971 from_node, bkref_idx);
1972 }
1973
1974
1975
1976
1977 static reg_errcode_t
1978 check_subexp_limits (const re_dfa_t *dfa, re_node_set *dest_nodes,
1979 const re_node_set *candidates, re_node_set *limits,
1980 struct re_backref_cache_entry *bkref_ents, Idx str_idx)
1981 {
1982 reg_errcode_t err;
1983 Idx node_idx, lim_idx;
1984
1985 for (lim_idx = 0; lim_idx < limits->nelem; ++lim_idx)
1986 {
1987 Idx subexp_idx;
1988 struct re_backref_cache_entry *ent;
1989 ent = bkref_ents + limits->elems[lim_idx];
1990
1991 if (str_idx <= ent->subexp_from || ent->str_idx < str_idx)
1992 continue;
1993
1994 subexp_idx = dfa->nodes[ent->node].opr.idx;
1995 if (ent->subexp_to == str_idx)
1996 {
1997 Idx ops_node = -1;
1998 Idx cls_node = -1;
1999 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2000 {
2001 Idx node = dest_nodes->elems[node_idx];
2002 re_token_type_t type = dfa->nodes[node].type;
2003 if (type == OP_OPEN_SUBEXP
2004 && subexp_idx == dfa->nodes[node].opr.idx)
2005 ops_node = node;
2006 else if (type == OP_CLOSE_SUBEXP
2007 && subexp_idx == dfa->nodes[node].opr.idx)
2008 cls_node = node;
2009 }
2010
2011
2012
2013 if (ops_node >= 0)
2014 {
2015 err = sub_epsilon_src_nodes (dfa, ops_node, dest_nodes,
2016 candidates);
2017 if (__glibc_unlikely (err != REG_NOERROR))
2018 return err;
2019 }
2020
2021
2022 if (cls_node >= 0)
2023 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2024 {
2025 Idx node = dest_nodes->elems[node_idx];
2026 if (!re_node_set_contains (dfa->inveclosures + node,
2027 cls_node)
2028 && !re_node_set_contains (dfa->eclosures + node,
2029 cls_node))
2030 {
2031
2032
2033 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2034 candidates);
2035 if (__glibc_unlikely (err != REG_NOERROR))
2036 return err;
2037 --node_idx;
2038 }
2039 }
2040 }
2041 else
2042 {
2043 for (node_idx = 0; node_idx < dest_nodes->nelem; ++node_idx)
2044 {
2045 Idx node = dest_nodes->elems[node_idx];
2046 re_token_type_t type = dfa->nodes[node].type;
2047 if (type == OP_CLOSE_SUBEXP || type == OP_OPEN_SUBEXP)
2048 {
2049 if (subexp_idx != dfa->nodes[node].opr.idx)
2050 continue;
2051
2052
2053 err = sub_epsilon_src_nodes (dfa, node, dest_nodes,
2054 candidates);
2055 if (__glibc_unlikely (err != REG_NOERROR))
2056 return err;
2057 }
2058 }
2059 }
2060 }
2061 return REG_NOERROR;
2062 }
2063
2064 static reg_errcode_t
2065 __attribute_warn_unused_result__
2066 sift_states_bkref (const re_match_context_t *mctx, re_sift_context_t *sctx,
2067 Idx str_idx, const re_node_set *candidates)
2068 {
2069 const re_dfa_t *const dfa = mctx->dfa;
2070 reg_errcode_t err;
2071 Idx node_idx, node;
2072 re_sift_context_t local_sctx;
2073 Idx first_idx = search_cur_bkref_entry (mctx, str_idx);
2074
2075 if (first_idx == -1)
2076 return REG_NOERROR;
2077
2078 local_sctx.sifted_states = NULL;
2079
2080 for (node_idx = 0; node_idx < candidates->nelem; ++node_idx)
2081 {
2082 Idx enabled_idx;
2083 re_token_type_t type;
2084 struct re_backref_cache_entry *entry;
2085 node = candidates->elems[node_idx];
2086 type = dfa->nodes[node].type;
2087
2088 if (node == sctx->last_node && str_idx == sctx->last_str_idx)
2089 continue;
2090 if (type != OP_BACK_REF)
2091 continue;
2092
2093 entry = mctx->bkref_ents + first_idx;
2094 enabled_idx = first_idx;
2095 do
2096 {
2097 Idx subexp_len;
2098 Idx to_idx;
2099 Idx dst_node;
2100 bool ok;
2101 re_dfastate_t *cur_state;
2102
2103 if (entry->node != node)
2104 continue;
2105 subexp_len = entry->subexp_to - entry->subexp_from;
2106 to_idx = str_idx + subexp_len;
2107 dst_node = (subexp_len ? dfa->nexts[node]
2108 : dfa->edests[node].elems[0]);
2109
2110 if (to_idx > sctx->last_str_idx
2111 || sctx->sifted_states[to_idx] == NULL
2112 || !STATE_NODE_CONTAINS (sctx->sifted_states[to_idx], dst_node)
2113 || check_dst_limits (mctx, &sctx->limits, node,
2114 str_idx, dst_node, to_idx))
2115 continue;
2116
2117 if (local_sctx.sifted_states == NULL)
2118 {
2119 local_sctx = *sctx;
2120 err = re_node_set_init_copy (&local_sctx.limits, &sctx->limits);
2121 if (__glibc_unlikely (err != REG_NOERROR))
2122 goto free_return;
2123 }
2124 local_sctx.last_node = node;
2125 local_sctx.last_str_idx = str_idx;
2126 ok = re_node_set_insert (&local_sctx.limits, enabled_idx);
2127 if (__glibc_unlikely (! ok))
2128 {
2129 err = REG_ESPACE;
2130 goto free_return;
2131 }
2132 cur_state = local_sctx.sifted_states[str_idx];
2133 err = sift_states_backward (mctx, &local_sctx);
2134 if (__glibc_unlikely (err != REG_NOERROR))
2135 goto free_return;
2136 if (sctx->limited_states != NULL)
2137 {
2138 err = merge_state_array (dfa, sctx->limited_states,
2139 local_sctx.sifted_states,
2140 str_idx + 1);
2141 if (__glibc_unlikely (err != REG_NOERROR))
2142 goto free_return;
2143 }
2144 local_sctx.sifted_states[str_idx] = cur_state;
2145 re_node_set_remove (&local_sctx.limits, enabled_idx);
2146
2147
2148 entry = mctx->bkref_ents + enabled_idx;
2149 }
2150 while (enabled_idx++, entry++->more);
2151 }
2152 err = REG_NOERROR;
2153 free_return:
2154 if (local_sctx.sifted_states != NULL)
2155 {
2156 re_node_set_free (&local_sctx.limits);
2157 }
2158
2159 return err;
2160 }
2161
2162
2163 static int
2164 sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx,
2165 Idx node_idx, Idx str_idx, Idx max_str_idx)
2166 {
2167 const re_dfa_t *const dfa = mctx->dfa;
2168 int naccepted;
2169
2170 naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx);
2171 if (naccepted > 0 && str_idx + naccepted <= max_str_idx
2172 && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted],
2173 dfa->nexts[node_idx]))
2174
2175
2176
2177 naccepted = 0;
2178
2179
2180 return naccepted;
2181 }
2182
2183
2184
2185
2186
2187
2188
2189
2190
2191 static re_dfastate_t *
2192 __attribute_warn_unused_result__
2193 transit_state (reg_errcode_t *err, re_match_context_t *mctx,
2194 re_dfastate_t *state)
2195 {
2196 re_dfastate_t **trtable;
2197 unsigned char ch;
2198
2199
2200 if (__glibc_unlikely (state->accept_mb))
2201 {
2202 *err = transit_state_mb (mctx, state);
2203 if (__glibc_unlikely (*err != REG_NOERROR))
2204 return NULL;
2205 }
2206
2207
2208 #if 0
2209 if (0)
2210
2211 return transit_state_sb (err, mctx, state);
2212 #endif
2213
2214
2215 ch = re_string_fetch_byte (&mctx->input);
2216 for (;;)
2217 {
2218 trtable = state->trtable;
2219 if (__glibc_likely (trtable != NULL))
2220 return trtable[ch];
2221
2222 trtable = state->word_trtable;
2223 if (__glibc_likely (trtable != NULL))
2224 {
2225 unsigned int context;
2226 context
2227 = re_string_context_at (&mctx->input,
2228 re_string_cur_idx (&mctx->input) - 1,
2229 mctx->eflags);
2230 if (IS_WORD_CONTEXT (context))
2231 return trtable[ch + SBC_MAX];
2232 else
2233 return trtable[ch];
2234 }
2235
2236 if (!build_trtable (mctx->dfa, state))
2237 {
2238 *err = REG_ESPACE;
2239 return NULL;
2240 }
2241
2242
2243 }
2244 }
2245
2246
2247 static re_dfastate_t *
2248 merge_state_with_log (reg_errcode_t *err, re_match_context_t *mctx,
2249 re_dfastate_t *next_state)
2250 {
2251 const re_dfa_t *const dfa = mctx->dfa;
2252 Idx cur_idx = re_string_cur_idx (&mctx->input);
2253
2254 if (cur_idx > mctx->state_log_top)
2255 {
2256 mctx->state_log[cur_idx] = next_state;
2257 mctx->state_log_top = cur_idx;
2258 }
2259 else if (mctx->state_log[cur_idx] == 0)
2260 {
2261 mctx->state_log[cur_idx] = next_state;
2262 }
2263 else
2264 {
2265 re_dfastate_t *pstate;
2266 unsigned int context;
2267 re_node_set next_nodes, *log_nodes, *table_nodes = NULL;
2268
2269
2270
2271
2272 pstate = mctx->state_log[cur_idx];
2273 log_nodes = pstate->entrance_nodes;
2274 if (next_state != NULL)
2275 {
2276 table_nodes = next_state->entrance_nodes;
2277 *err = re_node_set_init_union (&next_nodes, table_nodes,
2278 log_nodes);
2279 if (__glibc_unlikely (*err != REG_NOERROR))
2280 return NULL;
2281 }
2282 else
2283 next_nodes = *log_nodes;
2284
2285
2286
2287 context = re_string_context_at (&mctx->input,
2288 re_string_cur_idx (&mctx->input) - 1,
2289 mctx->eflags);
2290 next_state = mctx->state_log[cur_idx]
2291 = re_acquire_state_context (err, dfa, &next_nodes, context);
2292
2293
2294
2295 if (table_nodes != NULL)
2296 re_node_set_free (&next_nodes);
2297 }
2298
2299 if (__glibc_unlikely (dfa->nbackref) && next_state != NULL)
2300 {
2301
2302
2303
2304 *err = check_subexp_matching_top (mctx, &next_state->nodes,
2305 cur_idx);
2306 if (__glibc_unlikely (*err != REG_NOERROR))
2307 return NULL;
2308
2309
2310 if (next_state->has_backref)
2311 {
2312 *err = transit_state_bkref (mctx, &next_state->nodes);
2313 if (__glibc_unlikely (*err != REG_NOERROR))
2314 return NULL;
2315 next_state = mctx->state_log[cur_idx];
2316 }
2317 }
2318
2319 return next_state;
2320 }
2321
2322
2323
2324
2325 static re_dfastate_t *
2326 find_recover_state (reg_errcode_t *err, re_match_context_t *mctx)
2327 {
2328 re_dfastate_t *cur_state;
2329 do
2330 {
2331 Idx max = mctx->state_log_top;
2332 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2333
2334 do
2335 {
2336 if (++cur_str_idx > max)
2337 return NULL;
2338 re_string_skip_bytes (&mctx->input, 1);
2339 }
2340 while (mctx->state_log[cur_str_idx] == NULL);
2341
2342 cur_state = merge_state_with_log (err, mctx, NULL);
2343 }
2344 while (*err == REG_NOERROR && cur_state == NULL);
2345 return cur_state;
2346 }
2347
2348
2349
2350
2351
2352
2353
2354
2355 static reg_errcode_t
2356 check_subexp_matching_top (re_match_context_t *mctx, re_node_set *cur_nodes,
2357 Idx str_idx)
2358 {
2359 const re_dfa_t *const dfa = mctx->dfa;
2360 Idx node_idx;
2361 reg_errcode_t err;
2362
2363
2364
2365
2366
2367
2368 for (node_idx = 0; node_idx < cur_nodes->nelem; ++node_idx)
2369 {
2370 Idx node = cur_nodes->elems[node_idx];
2371 if (dfa->nodes[node].type == OP_OPEN_SUBEXP
2372 && dfa->nodes[node].opr.idx < BITSET_WORD_BITS
2373 && (dfa->used_bkref_map
2374 & ((bitset_word_t) 1 << dfa->nodes[node].opr.idx)))
2375 {
2376 err = match_ctx_add_subtop (mctx, node, str_idx);
2377 if (__glibc_unlikely (err != REG_NOERROR))
2378 return err;
2379 }
2380 }
2381 return REG_NOERROR;
2382 }
2383
2384 #if 0
2385
2386
2387
2388 static re_dfastate_t *
2389 transit_state_sb (reg_errcode_t *err, re_match_context_t *mctx,
2390 re_dfastate_t *state)
2391 {
2392 const re_dfa_t *const dfa = mctx->dfa;
2393 re_node_set next_nodes;
2394 re_dfastate_t *next_state;
2395 Idx node_cnt, cur_str_idx = re_string_cur_idx (&mctx->input);
2396 unsigned int context;
2397
2398 *err = re_node_set_alloc (&next_nodes, state->nodes.nelem + 1);
2399 if (__glibc_unlikely (*err != REG_NOERROR))
2400 return NULL;
2401 for (node_cnt = 0; node_cnt < state->nodes.nelem; ++node_cnt)
2402 {
2403 Idx cur_node = state->nodes.elems[node_cnt];
2404 if (check_node_accept (mctx, dfa->nodes + cur_node, cur_str_idx))
2405 {
2406 *err = re_node_set_merge (&next_nodes,
2407 dfa->eclosures + dfa->nexts[cur_node]);
2408 if (__glibc_unlikely (*err != REG_NOERROR))
2409 {
2410 re_node_set_free (&next_nodes);
2411 return NULL;
2412 }
2413 }
2414 }
2415 context = re_string_context_at (&mctx->input, cur_str_idx, mctx->eflags);
2416 next_state = re_acquire_state_context (err, dfa, &next_nodes, context);
2417
2418
2419
2420 re_node_set_free (&next_nodes);
2421 re_string_skip_bytes (&mctx->input, 1);
2422 return next_state;
2423 }
2424 #endif
2425
2426 static reg_errcode_t
2427 transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate)
2428 {
2429 const re_dfa_t *const dfa = mctx->dfa;
2430 reg_errcode_t err;
2431 Idx i;
2432
2433 for (i = 0; i < pstate->nodes.nelem; ++i)
2434 {
2435 re_node_set dest_nodes, *new_nodes;
2436 Idx cur_node_idx = pstate->nodes.elems[i];
2437 int naccepted;
2438 Idx dest_idx;
2439 unsigned int context;
2440 re_dfastate_t *dest_state;
2441
2442 if (!dfa->nodes[cur_node_idx].accept_mb)
2443 continue;
2444
2445 if (dfa->nodes[cur_node_idx].constraint)
2446 {
2447 context = re_string_context_at (&mctx->input,
2448 re_string_cur_idx (&mctx->input),
2449 mctx->eflags);
2450 if (NOT_SATISFY_NEXT_CONSTRAINT (dfa->nodes[cur_node_idx].constraint,
2451 context))
2452 continue;
2453 }
2454
2455
2456 naccepted = check_node_accept_bytes (dfa, cur_node_idx, &mctx->input,
2457 re_string_cur_idx (&mctx->input));
2458 if (naccepted == 0)
2459 continue;
2460
2461
2462 dest_idx = re_string_cur_idx (&mctx->input) + naccepted;
2463 mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted
2464 : mctx->max_mb_elem_len);
2465 err = clean_state_log_if_needed (mctx, dest_idx);
2466 if (__glibc_unlikely (err != REG_NOERROR))
2467 return err;
2468 DEBUG_ASSERT (dfa->nexts[cur_node_idx] != -1);
2469 new_nodes = dfa->eclosures + dfa->nexts[cur_node_idx];
2470
2471 dest_state = mctx->state_log[dest_idx];
2472 if (dest_state == NULL)
2473 dest_nodes = *new_nodes;
2474 else
2475 {
2476 err = re_node_set_init_union (&dest_nodes,
2477 dest_state->entrance_nodes, new_nodes);
2478 if (__glibc_unlikely (err != REG_NOERROR))
2479 return err;
2480 }
2481 context = re_string_context_at (&mctx->input, dest_idx - 1,
2482 mctx->eflags);
2483 mctx->state_log[dest_idx]
2484 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2485 if (dest_state != NULL)
2486 re_node_set_free (&dest_nodes);
2487 if (__glibc_unlikely (mctx->state_log[dest_idx] == NULL
2488 && err != REG_NOERROR))
2489 return err;
2490 }
2491 return REG_NOERROR;
2492 }
2493
2494 static reg_errcode_t
2495 transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes)
2496 {
2497 const re_dfa_t *const dfa = mctx->dfa;
2498 reg_errcode_t err;
2499 Idx i;
2500 Idx cur_str_idx = re_string_cur_idx (&mctx->input);
2501
2502 for (i = 0; i < nodes->nelem; ++i)
2503 {
2504 Idx dest_str_idx, prev_nelem, bkc_idx;
2505 Idx node_idx = nodes->elems[i];
2506 unsigned int context;
2507 const re_token_t *node = dfa->nodes + node_idx;
2508 re_node_set *new_dest_nodes;
2509
2510
2511 if (node->type != OP_BACK_REF)
2512 continue;
2513
2514 if (node->constraint)
2515 {
2516 context = re_string_context_at (&mctx->input, cur_str_idx,
2517 mctx->eflags);
2518 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
2519 continue;
2520 }
2521
2522
2523
2524 bkc_idx = mctx->nbkref_ents;
2525 err = get_subexp (mctx, node_idx, cur_str_idx);
2526 if (__glibc_unlikely (err != REG_NOERROR))
2527 goto free_return;
2528
2529
2530
2531 DEBUG_ASSERT (dfa->nexts[node_idx] != -1);
2532 for (; bkc_idx < mctx->nbkref_ents; ++bkc_idx)
2533 {
2534 Idx subexp_len;
2535 re_dfastate_t *dest_state;
2536 struct re_backref_cache_entry *bkref_ent;
2537 bkref_ent = mctx->bkref_ents + bkc_idx;
2538 if (bkref_ent->node != node_idx || bkref_ent->str_idx != cur_str_idx)
2539 continue;
2540 subexp_len = bkref_ent->subexp_to - bkref_ent->subexp_from;
2541 new_dest_nodes = (subexp_len == 0
2542 ? dfa->eclosures + dfa->edests[node_idx].elems[0]
2543 : dfa->eclosures + dfa->nexts[node_idx]);
2544 dest_str_idx = (cur_str_idx + bkref_ent->subexp_to
2545 - bkref_ent->subexp_from);
2546 context = re_string_context_at (&mctx->input, dest_str_idx - 1,
2547 mctx->eflags);
2548 dest_state = mctx->state_log[dest_str_idx];
2549 prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0
2550 : mctx->state_log[cur_str_idx]->nodes.nelem);
2551
2552 if (dest_state == NULL)
2553 {
2554 mctx->state_log[dest_str_idx]
2555 = re_acquire_state_context (&err, dfa, new_dest_nodes,
2556 context);
2557 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2558 && err != REG_NOERROR))
2559 goto free_return;
2560 }
2561 else
2562 {
2563 re_node_set dest_nodes;
2564 err = re_node_set_init_union (&dest_nodes,
2565 dest_state->entrance_nodes,
2566 new_dest_nodes);
2567 if (__glibc_unlikely (err != REG_NOERROR))
2568 {
2569 re_node_set_free (&dest_nodes);
2570 goto free_return;
2571 }
2572 mctx->state_log[dest_str_idx]
2573 = re_acquire_state_context (&err, dfa, &dest_nodes, context);
2574 re_node_set_free (&dest_nodes);
2575 if (__glibc_unlikely (mctx->state_log[dest_str_idx] == NULL
2576 && err != REG_NOERROR))
2577 goto free_return;
2578 }
2579
2580
2581 if (subexp_len == 0
2582 && mctx->state_log[cur_str_idx]->nodes.nelem > prev_nelem)
2583 {
2584 err = check_subexp_matching_top (mctx, new_dest_nodes,
2585 cur_str_idx);
2586 if (__glibc_unlikely (err != REG_NOERROR))
2587 goto free_return;
2588 err = transit_state_bkref (mctx, new_dest_nodes);
2589 if (__glibc_unlikely (err != REG_NOERROR))
2590 goto free_return;
2591 }
2592 }
2593 }
2594 err = REG_NOERROR;
2595 free_return:
2596 return err;
2597 }
2598
2599
2600
2601
2602
2603
2604
2605 static reg_errcode_t
2606 __attribute_warn_unused_result__
2607 get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx)
2608 {
2609 const re_dfa_t *const dfa = mctx->dfa;
2610 Idx subexp_num, sub_top_idx;
2611 const char *buf = (const char *) re_string_get_buffer (&mctx->input);
2612
2613 Idx cache_idx = search_cur_bkref_entry (mctx, bkref_str_idx);
2614 if (cache_idx != -1)
2615 {
2616 const struct re_backref_cache_entry *entry
2617 = mctx->bkref_ents + cache_idx;
2618 do
2619 if (entry->node == bkref_node)
2620 return REG_NOERROR;
2621 while (entry++->more);
2622 }
2623
2624 subexp_num = dfa->nodes[bkref_node].opr.idx;
2625
2626
2627 for (sub_top_idx = 0; sub_top_idx < mctx->nsub_tops; ++sub_top_idx)
2628 {
2629 reg_errcode_t err;
2630 re_sub_match_top_t *sub_top = mctx->sub_tops[sub_top_idx];
2631 re_sub_match_last_t *sub_last;
2632 Idx sub_last_idx, sl_str, bkref_str_off;
2633
2634 if (dfa->nodes[sub_top->node].opr.idx != subexp_num)
2635 continue;
2636
2637 sl_str = sub_top->str_idx;
2638 bkref_str_off = bkref_str_idx;
2639
2640
2641 for (sub_last_idx = 0; sub_last_idx < sub_top->nlasts; ++sub_last_idx)
2642 {
2643 regoff_t sl_str_diff;
2644 sub_last = sub_top->lasts[sub_last_idx];
2645 sl_str_diff = sub_last->str_idx - sl_str;
2646
2647
2648 if (sl_str_diff > 0)
2649 {
2650 if (__glibc_unlikely (bkref_str_off + sl_str_diff
2651 > mctx->input.valid_len))
2652 {
2653
2654 if (bkref_str_off + sl_str_diff > mctx->input.len)
2655 break;
2656
2657 err = clean_state_log_if_needed (mctx,
2658 bkref_str_off
2659 + sl_str_diff);
2660 if (__glibc_unlikely (err != REG_NOERROR))
2661 return err;
2662 buf = (const char *) re_string_get_buffer (&mctx->input);
2663 }
2664 if (memcmp (buf + bkref_str_off, buf + sl_str, sl_str_diff) != 0)
2665
2666 break;
2667 }
2668 bkref_str_off += sl_str_diff;
2669 sl_str += sl_str_diff;
2670 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2671 bkref_str_idx);
2672
2673
2674
2675 buf = (const char *) re_string_get_buffer (&mctx->input);
2676
2677 if (err == REG_NOMATCH)
2678 continue;
2679 if (__glibc_unlikely (err != REG_NOERROR))
2680 return err;
2681 }
2682
2683 if (sub_last_idx < sub_top->nlasts)
2684 continue;
2685 if (sub_last_idx > 0)
2686 ++sl_str;
2687
2688 for (; sl_str <= bkref_str_idx; ++sl_str)
2689 {
2690 Idx cls_node;
2691 regoff_t sl_str_off;
2692 const re_node_set *nodes;
2693 sl_str_off = sl_str - sub_top->str_idx;
2694
2695
2696 if (sl_str_off > 0)
2697 {
2698 if (__glibc_unlikely (bkref_str_off >= mctx->input.valid_len))
2699 {
2700
2701 if (bkref_str_off >= mctx->input.len)
2702 break;
2703
2704 err = extend_buffers (mctx, bkref_str_off + 1);
2705 if (__glibc_unlikely (err != REG_NOERROR))
2706 return err;
2707
2708 buf = (const char *) re_string_get_buffer (&mctx->input);
2709 }
2710 if (buf [bkref_str_off++] != buf[sl_str - 1])
2711 break;
2712
2713 }
2714 if (mctx->state_log[sl_str] == NULL)
2715 continue;
2716
2717 nodes = &mctx->state_log[sl_str]->nodes;
2718 cls_node = find_subexp_node (dfa, nodes, subexp_num,
2719 OP_CLOSE_SUBEXP);
2720 if (cls_node == -1)
2721 continue;
2722 if (sub_top->path == NULL)
2723 {
2724 sub_top->path = calloc (sizeof (state_array_t),
2725 sl_str - sub_top->str_idx + 1);
2726 if (sub_top->path == NULL)
2727 return REG_ESPACE;
2728 }
2729
2730
2731 err = check_arrival (mctx, sub_top->path, sub_top->node,
2732 sub_top->str_idx, cls_node, sl_str,
2733 OP_CLOSE_SUBEXP);
2734 if (err == REG_NOMATCH)
2735 continue;
2736 if (__glibc_unlikely (err != REG_NOERROR))
2737 return err;
2738 sub_last = match_ctx_add_sublast (sub_top, cls_node, sl_str);
2739 if (__glibc_unlikely (sub_last == NULL))
2740 return REG_ESPACE;
2741 err = get_subexp_sub (mctx, sub_top, sub_last, bkref_node,
2742 bkref_str_idx);
2743 buf = (const char *) re_string_get_buffer (&mctx->input);
2744 if (err == REG_NOMATCH)
2745 continue;
2746 if (__glibc_unlikely (err != REG_NOERROR))
2747 return err;
2748 }
2749 }
2750 return REG_NOERROR;
2751 }
2752
2753
2754
2755
2756
2757
2758
2759 static reg_errcode_t
2760 get_subexp_sub (re_match_context_t *mctx, const re_sub_match_top_t *sub_top,
2761 re_sub_match_last_t *sub_last, Idx bkref_node, Idx bkref_str)
2762 {
2763 reg_errcode_t err;
2764 Idx to_idx;
2765
2766 err = check_arrival (mctx, &sub_last->path, sub_last->node,
2767 sub_last->str_idx, bkref_node, bkref_str,
2768 OP_OPEN_SUBEXP);
2769 if (err != REG_NOERROR)
2770 return err;
2771 err = match_ctx_add_entry (mctx, bkref_node, bkref_str, sub_top->str_idx,
2772 sub_last->str_idx);
2773 if (__glibc_unlikely (err != REG_NOERROR))
2774 return err;
2775 to_idx = bkref_str + sub_last->str_idx - sub_top->str_idx;
2776 return clean_state_log_if_needed (mctx, to_idx);
2777 }
2778
2779
2780
2781
2782
2783
2784
2785
2786
2787 static Idx
2788 find_subexp_node (const re_dfa_t *dfa, const re_node_set *nodes,
2789 Idx subexp_idx, int type)
2790 {
2791 Idx cls_idx;
2792 for (cls_idx = 0; cls_idx < nodes->nelem; ++cls_idx)
2793 {
2794 Idx cls_node = nodes->elems[cls_idx];
2795 const re_token_t *node = dfa->nodes + cls_node;
2796 if (node->type == type
2797 && node->opr.idx == subexp_idx)
2798 return cls_node;
2799 }
2800 return -1;
2801 }
2802
2803
2804
2805
2806
2807
2808
2809 static reg_errcode_t
2810 __attribute_warn_unused_result__
2811 check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node,
2812 Idx top_str, Idx last_node, Idx last_str, int type)
2813 {
2814 const re_dfa_t *const dfa = mctx->dfa;
2815 reg_errcode_t err = REG_NOERROR;
2816 Idx subexp_num, backup_cur_idx, str_idx, null_cnt;
2817 re_dfastate_t *cur_state = NULL;
2818 re_node_set *cur_nodes, next_nodes;
2819 re_dfastate_t **backup_state_log;
2820 unsigned int context;
2821
2822 subexp_num = dfa->nodes[top_node].opr.idx;
2823
2824 if (__glibc_unlikely (path->alloc < last_str + mctx->max_mb_elem_len + 1))
2825 {
2826 re_dfastate_t **new_array;
2827 Idx old_alloc = path->alloc;
2828 Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1;
2829 Idx new_alloc;
2830 if (__glibc_unlikely (IDX_MAX - old_alloc < incr_alloc))
2831 return REG_ESPACE;
2832 new_alloc = old_alloc + incr_alloc;
2833 if (__glibc_unlikely (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc))
2834 return REG_ESPACE;
2835 new_array = re_realloc (path->array, re_dfastate_t *, new_alloc);
2836 if (__glibc_unlikely (new_array == NULL))
2837 return REG_ESPACE;
2838 path->array = new_array;
2839 path->alloc = new_alloc;
2840 memset (new_array + old_alloc, '\0',
2841 sizeof (re_dfastate_t *) * (path->alloc - old_alloc));
2842 }
2843
2844 str_idx = path->next_idx ? path->next_idx : top_str;
2845
2846
2847 backup_state_log = mctx->state_log;
2848 backup_cur_idx = mctx->input.cur_idx;
2849 mctx->state_log = path->array;
2850 mctx->input.cur_idx = str_idx;
2851
2852
2853 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2854 if (str_idx == top_str)
2855 {
2856 err = re_node_set_init_1 (&next_nodes, top_node);
2857 if (__glibc_unlikely (err != REG_NOERROR))
2858 return err;
2859 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2860 if (__glibc_unlikely (err != REG_NOERROR))
2861 {
2862 re_node_set_free (&next_nodes);
2863 return err;
2864 }
2865 }
2866 else
2867 {
2868 cur_state = mctx->state_log[str_idx];
2869 if (cur_state && cur_state->has_backref)
2870 {
2871 err = re_node_set_init_copy (&next_nodes, &cur_state->nodes);
2872 if (__glibc_unlikely (err != REG_NOERROR))
2873 return err;
2874 }
2875 else
2876 re_node_set_init_empty (&next_nodes);
2877 }
2878 if (str_idx == top_str || (cur_state && cur_state->has_backref))
2879 {
2880 if (next_nodes.nelem)
2881 {
2882 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2883 subexp_num, type);
2884 if (__glibc_unlikely (err != REG_NOERROR))
2885 {
2886 re_node_set_free (&next_nodes);
2887 return err;
2888 }
2889 }
2890 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2891 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2892 {
2893 re_node_set_free (&next_nodes);
2894 return err;
2895 }
2896 mctx->state_log[str_idx] = cur_state;
2897 }
2898
2899 for (null_cnt = 0; str_idx < last_str && null_cnt <= mctx->max_mb_elem_len;)
2900 {
2901 re_node_set_empty (&next_nodes);
2902 if (mctx->state_log[str_idx + 1])
2903 {
2904 err = re_node_set_merge (&next_nodes,
2905 &mctx->state_log[str_idx + 1]->nodes);
2906 if (__glibc_unlikely (err != REG_NOERROR))
2907 {
2908 re_node_set_free (&next_nodes);
2909 return err;
2910 }
2911 }
2912 if (cur_state)
2913 {
2914 err = check_arrival_add_next_nodes (mctx, str_idx,
2915 &cur_state->non_eps_nodes,
2916 &next_nodes);
2917 if (__glibc_unlikely (err != REG_NOERROR))
2918 {
2919 re_node_set_free (&next_nodes);
2920 return err;
2921 }
2922 }
2923 ++str_idx;
2924 if (next_nodes.nelem)
2925 {
2926 err = check_arrival_expand_ecl (dfa, &next_nodes, subexp_num, type);
2927 if (__glibc_unlikely (err != REG_NOERROR))
2928 {
2929 re_node_set_free (&next_nodes);
2930 return err;
2931 }
2932 err = expand_bkref_cache (mctx, &next_nodes, str_idx,
2933 subexp_num, type);
2934 if (__glibc_unlikely (err != REG_NOERROR))
2935 {
2936 re_node_set_free (&next_nodes);
2937 return err;
2938 }
2939 }
2940 context = re_string_context_at (&mctx->input, str_idx - 1, mctx->eflags);
2941 cur_state = re_acquire_state_context (&err, dfa, &next_nodes, context);
2942 if (__glibc_unlikely (cur_state == NULL && err != REG_NOERROR))
2943 {
2944 re_node_set_free (&next_nodes);
2945 return err;
2946 }
2947 mctx->state_log[str_idx] = cur_state;
2948 null_cnt = cur_state == NULL ? null_cnt + 1 : 0;
2949 }
2950 re_node_set_free (&next_nodes);
2951 cur_nodes = (mctx->state_log[last_str] == NULL ? NULL
2952 : &mctx->state_log[last_str]->nodes);
2953 path->next_idx = str_idx;
2954
2955
2956 mctx->state_log = backup_state_log;
2957 mctx->input.cur_idx = backup_cur_idx;
2958
2959
2960 if (cur_nodes != NULL && re_node_set_contains (cur_nodes, last_node))
2961 return REG_NOERROR;
2962
2963 return REG_NOMATCH;
2964 }
2965
2966
2967
2968
2969
2970
2971
2972
2973
2974 static reg_errcode_t
2975 __attribute_warn_unused_result__
2976 check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx,
2977 re_node_set *cur_nodes, re_node_set *next_nodes)
2978 {
2979 const re_dfa_t *const dfa = mctx->dfa;
2980 bool ok;
2981 Idx cur_idx;
2982 reg_errcode_t err = REG_NOERROR;
2983 re_node_set union_set;
2984 re_node_set_init_empty (&union_set);
2985 for (cur_idx = 0; cur_idx < cur_nodes->nelem; ++cur_idx)
2986 {
2987 int naccepted = 0;
2988 Idx cur_node = cur_nodes->elems[cur_idx];
2989 DEBUG_ASSERT (!IS_EPSILON_NODE (dfa->nodes[cur_node].type));
2990
2991
2992 if (dfa->nodes[cur_node].accept_mb)
2993 {
2994 naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input,
2995 str_idx);
2996 if (naccepted > 1)
2997 {
2998 re_dfastate_t *dest_state;
2999 Idx next_node = dfa->nexts[cur_node];
3000 Idx next_idx = str_idx + naccepted;
3001 dest_state = mctx->state_log[next_idx];
3002 re_node_set_empty (&union_set);
3003 if (dest_state)
3004 {
3005 err = re_node_set_merge (&union_set, &dest_state->nodes);
3006 if (__glibc_unlikely (err != REG_NOERROR))
3007 {
3008 re_node_set_free (&union_set);
3009 return err;
3010 }
3011 }
3012 ok = re_node_set_insert (&union_set, next_node);
3013 if (__glibc_unlikely (! ok))
3014 {
3015 re_node_set_free (&union_set);
3016 return REG_ESPACE;
3017 }
3018 mctx->state_log[next_idx] = re_acquire_state (&err, dfa,
3019 &union_set);
3020 if (__glibc_unlikely (mctx->state_log[next_idx] == NULL
3021 && err != REG_NOERROR))
3022 {
3023 re_node_set_free (&union_set);
3024 return err;
3025 }
3026 }
3027 }
3028
3029 if (naccepted
3030 || check_node_accept (mctx, dfa->nodes + cur_node, str_idx))
3031 {
3032 ok = re_node_set_insert (next_nodes, dfa->nexts[cur_node]);
3033 if (__glibc_unlikely (! ok))
3034 {
3035 re_node_set_free (&union_set);
3036 return REG_ESPACE;
3037 }
3038 }
3039 }
3040 re_node_set_free (&union_set);
3041 return REG_NOERROR;
3042 }
3043
3044
3045
3046
3047
3048
3049
3050 static reg_errcode_t
3051 check_arrival_expand_ecl (const re_dfa_t *dfa, re_node_set *cur_nodes,
3052 Idx ex_subexp, int type)
3053 {
3054 reg_errcode_t err;
3055 Idx idx, outside_node;
3056 re_node_set new_nodes;
3057 DEBUG_ASSERT (cur_nodes->nelem);
3058 err = re_node_set_alloc (&new_nodes, cur_nodes->nelem);
3059 if (__glibc_unlikely (err != REG_NOERROR))
3060 return err;
3061
3062
3063
3064 for (idx = 0; idx < cur_nodes->nelem; ++idx)
3065 {
3066 Idx cur_node = cur_nodes->elems[idx];
3067 const re_node_set *eclosure = dfa->eclosures + cur_node;
3068 outside_node = find_subexp_node (dfa, eclosure, ex_subexp, type);
3069 if (outside_node == -1)
3070 {
3071
3072 err = re_node_set_merge (&new_nodes, eclosure);
3073 if (__glibc_unlikely (err != REG_NOERROR))
3074 {
3075 re_node_set_free (&new_nodes);
3076 return err;
3077 }
3078 }
3079 else
3080 {
3081
3082 err = check_arrival_expand_ecl_sub (dfa, &new_nodes, cur_node,
3083 ex_subexp, type);
3084 if (__glibc_unlikely (err != REG_NOERROR))
3085 {
3086 re_node_set_free (&new_nodes);
3087 return err;
3088 }
3089 }
3090 }
3091 re_node_set_free (cur_nodes);
3092 *cur_nodes = new_nodes;
3093 return REG_NOERROR;
3094 }
3095
3096
3097
3098
3099
3100 static reg_errcode_t
3101 __attribute_warn_unused_result__
3102 check_arrival_expand_ecl_sub (const re_dfa_t *dfa, re_node_set *dst_nodes,
3103 Idx target, Idx ex_subexp, int type)
3104 {
3105 Idx cur_node;
3106 for (cur_node = target; !re_node_set_contains (dst_nodes, cur_node);)
3107 {
3108 bool ok;
3109
3110 if (dfa->nodes[cur_node].type == type
3111 && dfa->nodes[cur_node].opr.idx == ex_subexp)
3112 {
3113 if (type == OP_CLOSE_SUBEXP)
3114 {
3115 ok = re_node_set_insert (dst_nodes, cur_node);
3116 if (__glibc_unlikely (! ok))
3117 return REG_ESPACE;
3118 }
3119 break;
3120 }
3121 ok = re_node_set_insert (dst_nodes, cur_node);
3122 if (__glibc_unlikely (! ok))
3123 return REG_ESPACE;
3124 if (dfa->edests[cur_node].nelem == 0)
3125 break;
3126 if (dfa->edests[cur_node].nelem == 2)
3127 {
3128 reg_errcode_t err;
3129 err = check_arrival_expand_ecl_sub (dfa, dst_nodes,
3130 dfa->edests[cur_node].elems[1],
3131 ex_subexp, type);
3132 if (__glibc_unlikely (err != REG_NOERROR))
3133 return err;
3134 }
3135 cur_node = dfa->edests[cur_node].elems[0];
3136 }
3137 return REG_NOERROR;
3138 }
3139
3140
3141
3142
3143
3144
3145 static reg_errcode_t
3146 __attribute_warn_unused_result__
3147 expand_bkref_cache (re_match_context_t *mctx, re_node_set *cur_nodes,
3148 Idx cur_str, Idx subexp_num, int type)
3149 {
3150 const re_dfa_t *const dfa = mctx->dfa;
3151 reg_errcode_t err;
3152 Idx cache_idx_start = search_cur_bkref_entry (mctx, cur_str);
3153 struct re_backref_cache_entry *ent;
3154
3155 if (cache_idx_start == -1)
3156 return REG_NOERROR;
3157
3158 restart:
3159 ent = mctx->bkref_ents + cache_idx_start;
3160 do
3161 {
3162 Idx to_idx, next_node;
3163
3164
3165 if (!re_node_set_contains (cur_nodes, ent->node))
3166 continue;
3167
3168 to_idx = cur_str + ent->subexp_to - ent->subexp_from;
3169
3170
3171 if (to_idx == cur_str)
3172 {
3173
3174
3175 re_node_set new_dests;
3176 reg_errcode_t err2, err3;
3177 next_node = dfa->edests[ent->node].elems[0];
3178 if (re_node_set_contains (cur_nodes, next_node))
3179 continue;
3180 err = re_node_set_init_1 (&new_dests, next_node);
3181 err2 = check_arrival_expand_ecl (dfa, &new_dests, subexp_num, type);
3182 err3 = re_node_set_merge (cur_nodes, &new_dests);
3183 re_node_set_free (&new_dests);
3184 if (__glibc_unlikely (err != REG_NOERROR || err2 != REG_NOERROR
3185 || err3 != REG_NOERROR))
3186 {
3187 err = (err != REG_NOERROR ? err
3188 : (err2 != REG_NOERROR ? err2 : err3));
3189 return err;
3190 }
3191
3192 goto restart;
3193 }
3194 else
3195 {
3196 re_node_set union_set;
3197 next_node = dfa->nexts[ent->node];
3198 if (mctx->state_log[to_idx])
3199 {
3200 bool ok;
3201 if (re_node_set_contains (&mctx->state_log[to_idx]->nodes,
3202 next_node))
3203 continue;
3204 err = re_node_set_init_copy (&union_set,
3205 &mctx->state_log[to_idx]->nodes);
3206 ok = re_node_set_insert (&union_set, next_node);
3207 if (__glibc_unlikely (err != REG_NOERROR || ! ok))
3208 {
3209 re_node_set_free (&union_set);
3210 err = err != REG_NOERROR ? err : REG_ESPACE;
3211 return err;
3212 }
3213 }
3214 else
3215 {
3216 err = re_node_set_init_1 (&union_set, next_node);
3217 if (__glibc_unlikely (err != REG_NOERROR))
3218 return err;
3219 }
3220 mctx->state_log[to_idx] = re_acquire_state (&err, dfa, &union_set);
3221 re_node_set_free (&union_set);
3222 if (__glibc_unlikely (mctx->state_log[to_idx] == NULL
3223 && err != REG_NOERROR))
3224 return err;
3225 }
3226 }
3227 while (ent++->more);
3228 return REG_NOERROR;
3229 }
3230
3231
3232
3233
3234 static bool __attribute_noinline__
3235 build_trtable (const re_dfa_t *dfa, re_dfastate_t *state)
3236 {
3237 reg_errcode_t err;
3238 Idx i, j;
3239 int ch;
3240 bool need_word_trtable = false;
3241 bitset_word_t elem, mask;
3242 Idx ndests;
3243 re_dfastate_t **trtable;
3244 re_dfastate_t *dest_states[SBC_MAX];
3245 re_dfastate_t *dest_states_word[SBC_MAX];
3246 re_dfastate_t *dest_states_nl[SBC_MAX];
3247 re_node_set follows;
3248 bitset_t acceptable;
3249
3250
3251
3252
3253
3254 re_node_set dests_node[SBC_MAX];
3255 bitset_t dests_ch[SBC_MAX];
3256
3257
3258 state->word_trtable = state->trtable = NULL;
3259
3260
3261
3262 ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch);
3263 if (__glibc_unlikely (ndests <= 0))
3264 {
3265
3266 if (ndests == 0)
3267 {
3268 state->trtable = (re_dfastate_t **)
3269 calloc (sizeof (re_dfastate_t *), SBC_MAX);
3270 if (__glibc_unlikely (state->trtable == NULL))
3271 return false;
3272 return true;
3273 }
3274 return false;
3275 }
3276
3277 err = re_node_set_alloc (&follows, ndests + 1);
3278 if (__glibc_unlikely (err != REG_NOERROR))
3279 {
3280 out_free:
3281 re_node_set_free (&follows);
3282 for (i = 0; i < ndests; ++i)
3283 re_node_set_free (dests_node + i);
3284 return false;
3285 }
3286
3287 bitset_empty (acceptable);
3288
3289
3290 for (i = 0; i < ndests; ++i)
3291 {
3292 Idx next_node;
3293 re_node_set_empty (&follows);
3294
3295 for (j = 0; j < dests_node[i].nelem; ++j)
3296 {
3297 next_node = dfa->nexts[dests_node[i].elems[j]];
3298 if (next_node != -1)
3299 {
3300 err = re_node_set_merge (&follows, dfa->eclosures + next_node);
3301 if (__glibc_unlikely (err != REG_NOERROR))
3302 goto out_free;
3303 }
3304 }
3305 dest_states[i] = re_acquire_state_context (&err, dfa, &follows, 0);
3306 if (__glibc_unlikely (dest_states[i] == NULL && err != REG_NOERROR))
3307 goto out_free;
3308
3309
3310 if (dest_states[i]->has_constraint)
3311 {
3312 dest_states_word[i] = re_acquire_state_context (&err, dfa, &follows,
3313 CONTEXT_WORD);
3314 if (__glibc_unlikely (dest_states_word[i] == NULL
3315 && err != REG_NOERROR))
3316 goto out_free;
3317
3318 if (dest_states[i] != dest_states_word[i] && dfa->mb_cur_max > 1)
3319 need_word_trtable = true;
3320
3321 dest_states_nl[i] = re_acquire_state_context (&err, dfa, &follows,
3322 CONTEXT_NEWLINE);
3323 if (__glibc_unlikely (dest_states_nl[i] == NULL && err != REG_NOERROR))
3324 goto out_free;
3325 }
3326 else
3327 {
3328 dest_states_word[i] = dest_states[i];
3329 dest_states_nl[i] = dest_states[i];
3330 }
3331 bitset_merge (acceptable, dests_ch[i]);
3332 }
3333
3334 if (!__glibc_unlikely (need_word_trtable))
3335 {
3336
3337
3338
3339
3340 trtable = state->trtable =
3341 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), SBC_MAX);
3342 if (__glibc_unlikely (trtable == NULL))
3343 goto out_free;
3344
3345
3346 for (i = 0; i < BITSET_WORDS; ++i)
3347 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3348 elem;
3349 mask <<= 1, elem >>= 1, ++ch)
3350 if (__glibc_unlikely (elem & 1))
3351 {
3352
3353
3354 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3355 ;
3356
3357
3358 if (dfa->word_char[i] & mask)
3359 trtable[ch] = dest_states_word[j];
3360 else
3361 trtable[ch] = dest_states[j];
3362 }
3363 }
3364 else
3365 {
3366
3367
3368
3369
3370
3371 trtable = state->word_trtable =
3372 (re_dfastate_t **) calloc (sizeof (re_dfastate_t *), 2 * SBC_MAX);
3373 if (__glibc_unlikely (trtable == NULL))
3374 goto out_free;
3375
3376
3377 for (i = 0; i < BITSET_WORDS; ++i)
3378 for (ch = i * BITSET_WORD_BITS, elem = acceptable[i], mask = 1;
3379 elem;
3380 mask <<= 1, elem >>= 1, ++ch)
3381 if (__glibc_unlikely (elem & 1))
3382 {
3383
3384
3385 for (j = 0; (dests_ch[j][i] & mask) == 0; ++j)
3386 ;
3387
3388
3389 trtable[ch] = dest_states[j];
3390 trtable[ch + SBC_MAX] = dest_states_word[j];
3391 }
3392 }
3393
3394
3395 if (bitset_contain (acceptable, NEWLINE_CHAR))
3396 {
3397
3398 for (j = 0; j < ndests; ++j)
3399 if (bitset_contain (dests_ch[j], NEWLINE_CHAR))
3400 {
3401
3402 trtable[NEWLINE_CHAR] = dest_states_nl[j];
3403 if (need_word_trtable)
3404 trtable[NEWLINE_CHAR + SBC_MAX] = dest_states_nl[j];
3405
3406
3407 break;
3408 }
3409 }
3410
3411 re_node_set_free (&follows);
3412 for (i = 0; i < ndests; ++i)
3413 re_node_set_free (dests_node + i);
3414 return true;
3415 }
3416
3417
3418
3419
3420
3421
3422
3423 static Idx
3424 group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state,
3425 re_node_set *dests_node, bitset_t *dests_ch)
3426 {
3427 reg_errcode_t err;
3428 bool ok;
3429 Idx i, j, k;
3430 Idx ndests;
3431 bitset_t accepts;
3432 const re_node_set *cur_nodes = &state->nodes;
3433 bitset_empty (accepts);
3434 ndests = 0;
3435
3436
3437 for (i = 0; i < cur_nodes->nelem; ++i)
3438 {
3439 re_token_t *node = &dfa->nodes[cur_nodes->elems[i]];
3440 re_token_type_t type = node->type;
3441 unsigned int constraint = node->constraint;
3442
3443
3444 if (type == CHARACTER)
3445 bitset_set (accepts, node->opr.c);
3446 else if (type == SIMPLE_BRACKET)
3447 {
3448 bitset_merge (accepts, node->opr.sbcset);
3449 }
3450 else if (type == OP_PERIOD)
3451 {
3452 if (dfa->mb_cur_max > 1)
3453 bitset_merge (accepts, dfa->sb_char);
3454 else
3455 bitset_set_all (accepts);
3456 if (!(dfa->syntax & RE_DOT_NEWLINE))
3457 bitset_clear (accepts, '\n');
3458 if (dfa->syntax & RE_DOT_NOT_NULL)
3459 bitset_clear (accepts, '\0');
3460 }
3461 else if (type == OP_UTF8_PERIOD)
3462 {
3463 if (ASCII_CHARS % BITSET_WORD_BITS == 0)
3464 memset (accepts, -1, ASCII_CHARS / CHAR_BIT);
3465 else
3466 bitset_merge (accepts, utf8_sb_map);
3467 if (!(dfa->syntax & RE_DOT_NEWLINE))
3468 bitset_clear (accepts, '\n');
3469 if (dfa->syntax & RE_DOT_NOT_NULL)
3470 bitset_clear (accepts, '\0');
3471 }
3472 else
3473 continue;
3474
3475
3476
3477 if (constraint)
3478 {
3479 if (constraint & NEXT_NEWLINE_CONSTRAINT)
3480 {
3481 bool accepts_newline = bitset_contain (accepts, NEWLINE_CHAR);
3482 bitset_empty (accepts);
3483 if (accepts_newline)
3484 bitset_set (accepts, NEWLINE_CHAR);
3485 else
3486 continue;
3487 }
3488 if (constraint & NEXT_ENDBUF_CONSTRAINT)
3489 {
3490 bitset_empty (accepts);
3491 continue;
3492 }
3493
3494 if (constraint & NEXT_WORD_CONSTRAINT)
3495 {
3496 bitset_word_t any_set = 0;
3497 if (type == CHARACTER && !node->word_char)
3498 {
3499 bitset_empty (accepts);
3500 continue;
3501 }
3502 if (dfa->mb_cur_max > 1)
3503 for (j = 0; j < BITSET_WORDS; ++j)
3504 any_set |= (accepts[j] &= (dfa->word_char[j] | ~dfa->sb_char[j]));
3505 else
3506 for (j = 0; j < BITSET_WORDS; ++j)
3507 any_set |= (accepts[j] &= dfa->word_char[j]);
3508 if (!any_set)
3509 continue;
3510 }
3511 if (constraint & NEXT_NOTWORD_CONSTRAINT)
3512 {
3513 bitset_word_t any_set = 0;
3514 if (type == CHARACTER && node->word_char)
3515 {
3516 bitset_empty (accepts);
3517 continue;
3518 }
3519 if (dfa->mb_cur_max > 1)
3520 for (j = 0; j < BITSET_WORDS; ++j)
3521 any_set |= (accepts[j] &= ~(dfa->word_char[j] & dfa->sb_char[j]));
3522 else
3523 for (j = 0; j < BITSET_WORDS; ++j)
3524 any_set |= (accepts[j] &= ~dfa->word_char[j]);
3525 if (!any_set)
3526 continue;
3527 }
3528 }
3529
3530
3531
3532 for (j = 0; j < ndests; ++j)
3533 {
3534 bitset_t intersec;
3535 bitset_t remains;
3536
3537 bitset_word_t has_intersec, not_subset, not_consumed;
3538
3539
3540 if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c))
3541 continue;
3542
3543
3544 has_intersec = 0;
3545 for (k = 0; k < BITSET_WORDS; ++k)
3546 has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k];
3547
3548 if (!has_intersec)
3549 continue;
3550
3551
3552 not_subset = not_consumed = 0;
3553 for (k = 0; k < BITSET_WORDS; ++k)
3554 {
3555 not_subset |= remains[k] = ~accepts[k] & dests_ch[j][k];
3556 not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k];
3557 }
3558
3559
3560
3561 if (not_subset)
3562 {
3563 bitset_copy (dests_ch[ndests], remains);
3564 bitset_copy (dests_ch[j], intersec);
3565 err = re_node_set_init_copy (dests_node + ndests, &dests_node[j]);
3566 if (__glibc_unlikely (err != REG_NOERROR))
3567 goto error_return;
3568 ++ndests;
3569 }
3570
3571
3572 ok = re_node_set_insert (&dests_node[j], cur_nodes->elems[i]);
3573 if (__glibc_unlikely (! ok))
3574 goto error_return;
3575
3576
3577 if (!not_consumed)
3578 break;
3579 }
3580
3581 if (j == ndests)
3582 {
3583 bitset_copy (dests_ch[ndests], accepts);
3584 err = re_node_set_init_1 (dests_node + ndests, cur_nodes->elems[i]);
3585 if (__glibc_unlikely (err != REG_NOERROR))
3586 goto error_return;
3587 ++ndests;
3588 bitset_empty (accepts);
3589 }
3590 }
3591 assume (ndests <= SBC_MAX);
3592 return ndests;
3593 error_return:
3594 for (j = 0; j < ndests; ++j)
3595 re_node_set_free (dests_node + j);
3596 return -1;
3597 }
3598
3599
3600
3601
3602
3603
3604
3605
3606
3607 #ifdef _LIBC
3608 # include <locale/weight.h>
3609 #endif
3610
3611 static int
3612 check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx,
3613 const re_string_t *input, Idx str_idx)
3614 {
3615 const re_token_t *node = dfa->nodes + node_idx;
3616 int char_len, elem_len;
3617 Idx i;
3618
3619 if (__glibc_unlikely (node->type == OP_UTF8_PERIOD))
3620 {
3621 unsigned char c = re_string_byte_at (input, str_idx), d;
3622 if (__glibc_likely (c < 0xc2))
3623 return 0;
3624
3625 if (str_idx + 2 > input->len)
3626 return 0;
3627
3628 d = re_string_byte_at (input, str_idx + 1);
3629 if (c < 0xe0)
3630 return (d < 0x80 || d > 0xbf) ? 0 : 2;
3631 else if (c < 0xf0)
3632 {
3633 char_len = 3;
3634 if (c == 0xe0 && d < 0xa0)
3635 return 0;
3636 }
3637 else if (c < 0xf8)
3638 {
3639 char_len = 4;
3640 if (c == 0xf0 && d < 0x90)
3641 return 0;
3642 }
3643 else if (c < 0xfc)
3644 {
3645 char_len = 5;
3646 if (c == 0xf8 && d < 0x88)
3647 return 0;
3648 }
3649 else if (c < 0xfe)
3650 {
3651 char_len = 6;
3652 if (c == 0xfc && d < 0x84)
3653 return 0;
3654 }
3655 else
3656 return 0;
3657
3658 if (str_idx + char_len > input->len)
3659 return 0;
3660
3661 for (i = 1; i < char_len; ++i)
3662 {
3663 d = re_string_byte_at (input, str_idx + i);
3664 if (d < 0x80 || d > 0xbf)
3665 return 0;
3666 }
3667 return char_len;
3668 }
3669
3670 char_len = re_string_char_size_at (input, str_idx);
3671 if (node->type == OP_PERIOD)
3672 {
3673 if (char_len <= 1)
3674 return 0;
3675
3676
3677
3678 if ((!(dfa->syntax & RE_DOT_NEWLINE)
3679 && re_string_byte_at (input, str_idx) == '\n')
3680 || ((dfa->syntax & RE_DOT_NOT_NULL)
3681 && re_string_byte_at (input, str_idx) == '\0'))
3682 return 0;
3683 return char_len;
3684 }
3685
3686 elem_len = re_string_elem_size_at (input, str_idx);
3687 if ((elem_len <= 1 && char_len <= 1) || char_len == 0)
3688 return 0;
3689
3690 if (node->type == COMPLEX_BRACKET)
3691 {
3692 const re_charset_t *cset = node->opr.mbcset;
3693 #ifdef _LIBC
3694 const unsigned char *pin
3695 = ((const unsigned char *) re_string_get_buffer (input) + str_idx);
3696 Idx j;
3697 uint32_t nrules;
3698 #endif
3699 int match_len = 0;
3700 wchar_t wc = ((cset->nranges || cset->nchar_classes || cset->nmbchars)
3701 ? re_string_wchar_at (input, str_idx) : 0);
3702
3703
3704 for (i = 0; i < cset->nmbchars; ++i)
3705 if (wc == cset->mbchars[i])
3706 {
3707 match_len = char_len;
3708 goto check_node_accept_bytes_match;
3709 }
3710
3711 for (i = 0; i < cset->nchar_classes; ++i)
3712 {
3713 wctype_t wt = cset->char_classes[i];
3714 if (__iswctype (wc, wt))
3715 {
3716 match_len = char_len;
3717 goto check_node_accept_bytes_match;
3718 }
3719 }
3720
3721 #ifdef _LIBC
3722 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3723 if (nrules != 0)
3724 {
3725 unsigned int in_collseq = 0;
3726 const int32_t *table, *indirect;
3727 const unsigned char *weights, *extra;
3728 const char *collseqwc;
3729
3730
3731 if (cset->ncoll_syms)
3732 extra = (const unsigned char *)
3733 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3734 for (i = 0; i < cset->ncoll_syms; ++i)
3735 {
3736 const unsigned char *coll_sym = extra + cset->coll_syms[i];
3737
3738
3739 if (*coll_sym != elem_len)
3740 continue;
3741
3742 for (j = 0; j < *coll_sym; j++)
3743 if (pin[j] != coll_sym[1 + j])
3744 break;
3745 if (j == *coll_sym)
3746 {
3747
3748 match_len = j;
3749 goto check_node_accept_bytes_match;
3750 }
3751 }
3752
3753 if (cset->nranges)
3754 {
3755 if (elem_len <= char_len)
3756 {
3757 collseqwc = _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQWC);
3758 in_collseq = __collseq_table_lookup (collseqwc, wc);
3759 }
3760 else
3761 in_collseq = find_collation_sequence_value (pin, elem_len);
3762 }
3763
3764
3765 for (i = 0; i < cset->nranges; ++i)
3766 if (cset->range_starts[i] <= in_collseq
3767 && in_collseq <= cset->range_ends[i])
3768 {
3769 match_len = elem_len;
3770 goto check_node_accept_bytes_match;
3771 }
3772
3773
3774 if (cset->nequiv_classes)
3775 {
3776 const unsigned char *cp = pin;
3777 table = (const int32_t *)
3778 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
3779 weights = (const unsigned char *)
3780 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
3781 extra = (const unsigned char *)
3782 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
3783 indirect = (const int32_t *)
3784 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
3785 int32_t idx = findidx (table, indirect, extra, &cp, elem_len);
3786 int32_t rule = idx >> 24;
3787 idx &= 0xffffff;
3788 if (idx > 0)
3789 {
3790 size_t weight_len = weights[idx];
3791 for (i = 0; i < cset->nequiv_classes; ++i)
3792 {
3793 int32_t equiv_class_idx = cset->equiv_classes[i];
3794 int32_t equiv_class_rule = equiv_class_idx >> 24;
3795 equiv_class_idx &= 0xffffff;
3796 if (weights[equiv_class_idx] == weight_len
3797 && equiv_class_rule == rule
3798 && memcmp (weights + idx + 1,
3799 weights + equiv_class_idx + 1,
3800 weight_len) == 0)
3801 {
3802 match_len = elem_len;
3803 goto check_node_accept_bytes_match;
3804 }
3805 }
3806 }
3807 }
3808 }
3809 else
3810 #endif
3811 {
3812
3813 for (i = 0; i < cset->nranges; ++i)
3814 {
3815 if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i])
3816 {
3817 match_len = char_len;
3818 goto check_node_accept_bytes_match;
3819 }
3820 }
3821 }
3822 check_node_accept_bytes_match:
3823 if (!cset->non_match)
3824 return match_len;
3825 else
3826 {
3827 if (match_len > 0)
3828 return 0;
3829 else
3830 return (elem_len > char_len) ? elem_len : char_len;
3831 }
3832 }
3833 return 0;
3834 }
3835
3836 #ifdef _LIBC
3837 static unsigned int
3838 find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len)
3839 {
3840 uint32_t nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
3841 if (nrules == 0)
3842 {
3843 if (mbs_len == 1)
3844 {
3845
3846 const unsigned char *collseq = (const unsigned char *)
3847 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_COLLSEQMB);
3848 return collseq[mbs[0]];
3849 }
3850 return UINT_MAX;
3851 }
3852 else
3853 {
3854 int32_t idx;
3855 const unsigned char *extra = (const unsigned char *)
3856 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
3857 int32_t extrasize = (const unsigned char *)
3858 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB + 1) - extra;
3859
3860 for (idx = 0; idx < extrasize;)
3861 {
3862 int mbs_cnt;
3863 bool found = false;
3864 int32_t elem_mbs_len;
3865
3866 idx = idx + extra[idx] + 1;
3867 elem_mbs_len = extra[idx++];
3868 if (mbs_len == elem_mbs_len)
3869 {
3870 for (mbs_cnt = 0; mbs_cnt < elem_mbs_len; ++mbs_cnt)
3871 if (extra[idx + mbs_cnt] != mbs[mbs_cnt])
3872 break;
3873 if (mbs_cnt == elem_mbs_len)
3874
3875 found = true;
3876 }
3877
3878 idx += elem_mbs_len;
3879
3880 idx = (idx + 3) & ~3;
3881
3882 idx += sizeof (uint32_t);
3883
3884 idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1);
3885
3886 if (found)
3887 return *(uint32_t *) (extra + idx);
3888
3889 idx += sizeof (uint32_t);
3890 }
3891 return UINT_MAX;
3892 }
3893 }
3894 #endif
3895
3896
3897
3898
3899 static bool
3900 check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
3901 Idx idx)
3902 {
3903 unsigned char ch;
3904 ch = re_string_byte_at (&mctx->input, idx);
3905 switch (node->type)
3906 {
3907 case CHARACTER:
3908 if (node->opr.c != ch)
3909 return false;
3910 break;
3911
3912 case SIMPLE_BRACKET:
3913 if (!bitset_contain (node->opr.sbcset, ch))
3914 return false;
3915 break;
3916
3917 case OP_UTF8_PERIOD:
3918 if (ch >= ASCII_CHARS)
3919 return false;
3920 FALLTHROUGH;
3921 case OP_PERIOD:
3922 if ((ch == '\n' && !(mctx->dfa->syntax & RE_DOT_NEWLINE))
3923 || (ch == '\0' && (mctx->dfa->syntax & RE_DOT_NOT_NULL)))
3924 return false;
3925 break;
3926
3927 default:
3928 return false;
3929 }
3930
3931 if (node->constraint)
3932 {
3933
3934
3935 unsigned int context = re_string_context_at (&mctx->input, idx,
3936 mctx->eflags);
3937 if (NOT_SATISFY_NEXT_CONSTRAINT (node->constraint, context))
3938 return false;
3939 }
3940
3941 return true;
3942 }
3943
3944
3945
3946 static reg_errcode_t
3947 __attribute_warn_unused_result__
3948 extend_buffers (re_match_context_t *mctx, int min_len)
3949 {
3950 reg_errcode_t ret;
3951 re_string_t *pstr = &mctx->input;
3952
3953
3954 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2
3955 <= pstr->bufs_len))
3956 return REG_ESPACE;
3957
3958
3959 ret = re_string_realloc_buffers (pstr,
3960 MAX (min_len,
3961 MIN (pstr->len, pstr->bufs_len * 2)));
3962 if (__glibc_unlikely (ret != REG_NOERROR))
3963 return ret;
3964
3965 if (mctx->state_log != NULL)
3966 {
3967
3968
3969
3970
3971 re_dfastate_t **new_array = re_realloc (mctx->state_log, re_dfastate_t *,
3972 pstr->bufs_len + 1);
3973 if (__glibc_unlikely (new_array == NULL))
3974 return REG_ESPACE;
3975 mctx->state_log = new_array;
3976 }
3977
3978
3979 if (pstr->icase)
3980 {
3981 if (pstr->mb_cur_max > 1)
3982 {
3983 ret = build_wcs_upper_buffer (pstr);
3984 if (__glibc_unlikely (ret != REG_NOERROR))
3985 return ret;
3986 }
3987 else
3988 build_upper_buffer (pstr);
3989 }
3990 else
3991 {
3992 if (pstr->mb_cur_max > 1)
3993 build_wcs_buffer (pstr);
3994 else
3995 {
3996 if (pstr->trans != NULL)
3997 re_string_translate_buffer (pstr);
3998 }
3999 }
4000 return REG_NOERROR;
4001 }
4002
4003
4004
4005
4006
4007
4008 static reg_errcode_t
4009 __attribute_warn_unused_result__
4010 match_ctx_init (re_match_context_t *mctx, int eflags, Idx n)
4011 {
4012 mctx->eflags = eflags;
4013 mctx->match_last = -1;
4014 if (n > 0)
4015 {
4016
4017 size_t max_object_size =
4018 MAX (sizeof (struct re_backref_cache_entry),
4019 sizeof (re_sub_match_top_t *));
4020 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n))
4021 return REG_ESPACE;
4022
4023 mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n);
4024 mctx->sub_tops = re_malloc (re_sub_match_top_t *, n);
4025 if (__glibc_unlikely (mctx->bkref_ents == NULL || mctx->sub_tops == NULL))
4026 return REG_ESPACE;
4027 }
4028
4029
4030
4031
4032
4033 mctx->abkref_ents = n;
4034 mctx->max_mb_elem_len = 1;
4035 mctx->asub_tops = n;
4036 return REG_NOERROR;
4037 }
4038
4039
4040
4041
4042
4043 static void
4044 match_ctx_clean (re_match_context_t *mctx)
4045 {
4046 Idx st_idx;
4047 for (st_idx = 0; st_idx < mctx->nsub_tops; ++st_idx)
4048 {
4049 Idx sl_idx;
4050 re_sub_match_top_t *top = mctx->sub_tops[st_idx];
4051 for (sl_idx = 0; sl_idx < top->nlasts; ++sl_idx)
4052 {
4053 re_sub_match_last_t *last = top->lasts[sl_idx];
4054 re_free (last->path.array);
4055 re_free (last);
4056 }
4057 re_free (top->lasts);
4058 if (top->path)
4059 {
4060 re_free (top->path->array);
4061 re_free (top->path);
4062 }
4063 re_free (top);
4064 }
4065
4066 mctx->nsub_tops = 0;
4067 mctx->nbkref_ents = 0;
4068 }
4069
4070
4071
4072 static void
4073 match_ctx_free (re_match_context_t *mctx)
4074 {
4075
4076 match_ctx_clean (mctx);
4077 re_free (mctx->sub_tops);
4078 re_free (mctx->bkref_ents);
4079 }
4080
4081
4082
4083
4084
4085
4086 static reg_errcode_t
4087 __attribute_warn_unused_result__
4088 match_ctx_add_entry (re_match_context_t *mctx, Idx node, Idx str_idx, Idx from,
4089 Idx to)
4090 {
4091 if (mctx->nbkref_ents >= mctx->abkref_ents)
4092 {
4093 struct re_backref_cache_entry* new_entry;
4094 new_entry = re_realloc (mctx->bkref_ents, struct re_backref_cache_entry,
4095 mctx->abkref_ents * 2);
4096 if (__glibc_unlikely (new_entry == NULL))
4097 {
4098 re_free (mctx->bkref_ents);
4099 return REG_ESPACE;
4100 }
4101 mctx->bkref_ents = new_entry;
4102 memset (mctx->bkref_ents + mctx->nbkref_ents, '\0',
4103 sizeof (struct re_backref_cache_entry) * mctx->abkref_ents);
4104 mctx->abkref_ents *= 2;
4105 }
4106 if (mctx->nbkref_ents > 0
4107 && mctx->bkref_ents[mctx->nbkref_ents - 1].str_idx == str_idx)
4108 mctx->bkref_ents[mctx->nbkref_ents - 1].more = 1;
4109
4110 mctx->bkref_ents[mctx->nbkref_ents].node = node;
4111 mctx->bkref_ents[mctx->nbkref_ents].str_idx = str_idx;
4112 mctx->bkref_ents[mctx->nbkref_ents].subexp_from = from;
4113 mctx->bkref_ents[mctx->nbkref_ents].subexp_to = to;
4114
4115
4116
4117
4118
4119
4120
4121
4122
4123 mctx->bkref_ents[mctx->nbkref_ents].eps_reachable_subexps_map
4124 = (from == to ? -1 : 0);
4125
4126 mctx->bkref_ents[mctx->nbkref_ents++].more = 0;
4127 if (mctx->max_mb_elem_len < to - from)
4128 mctx->max_mb_elem_len = to - from;
4129 return REG_NOERROR;
4130 }
4131
4132
4133
4134
4135 static Idx
4136 search_cur_bkref_entry (const re_match_context_t *mctx, Idx str_idx)
4137 {
4138 Idx left, right, mid, last;
4139 last = right = mctx->nbkref_ents;
4140 for (left = 0; left < right;)
4141 {
4142 mid = (left + right) / 2;
4143 if (mctx->bkref_ents[mid].str_idx < str_idx)
4144 left = mid + 1;
4145 else
4146 right = mid;
4147 }
4148 if (left < last && mctx->bkref_ents[left].str_idx == str_idx)
4149 return left;
4150 else
4151 return -1;
4152 }
4153
4154
4155
4156
4157 static reg_errcode_t
4158 __attribute_warn_unused_result__
4159 match_ctx_add_subtop (re_match_context_t *mctx, Idx node, Idx str_idx)
4160 {
4161 DEBUG_ASSERT (mctx->sub_tops != NULL);
4162 DEBUG_ASSERT (mctx->asub_tops > 0);
4163 if (__glibc_unlikely (mctx->nsub_tops == mctx->asub_tops))
4164 {
4165 Idx new_asub_tops = mctx->asub_tops * 2;
4166 re_sub_match_top_t **new_array = re_realloc (mctx->sub_tops,
4167 re_sub_match_top_t *,
4168 new_asub_tops);
4169 if (__glibc_unlikely (new_array == NULL))
4170 return REG_ESPACE;
4171 mctx->sub_tops = new_array;
4172 mctx->asub_tops = new_asub_tops;
4173 }
4174 mctx->sub_tops[mctx->nsub_tops] = calloc (1, sizeof (re_sub_match_top_t));
4175 if (__glibc_unlikely (mctx->sub_tops[mctx->nsub_tops] == NULL))
4176 return REG_ESPACE;
4177 mctx->sub_tops[mctx->nsub_tops]->node = node;
4178 mctx->sub_tops[mctx->nsub_tops++]->str_idx = str_idx;
4179 return REG_NOERROR;
4180 }
4181
4182
4183
4184
4185
4186 static re_sub_match_last_t *
4187 match_ctx_add_sublast (re_sub_match_top_t *subtop, Idx node, Idx str_idx)
4188 {
4189 re_sub_match_last_t *new_entry;
4190 if (__glibc_unlikely (subtop->nlasts == subtop->alasts))
4191 {
4192 Idx new_alasts = 2 * subtop->alasts + 1;
4193 re_sub_match_last_t **new_array = re_realloc (subtop->lasts,
4194 re_sub_match_last_t *,
4195 new_alasts);
4196 if (__glibc_unlikely (new_array == NULL))
4197 return NULL;
4198 subtop->lasts = new_array;
4199 subtop->alasts = new_alasts;
4200 }
4201 new_entry = calloc (1, sizeof (re_sub_match_last_t));
4202 if (__glibc_likely (new_entry != NULL))
4203 {
4204 subtop->lasts[subtop->nlasts] = new_entry;
4205 new_entry->node = node;
4206 new_entry->str_idx = str_idx;
4207 ++subtop->nlasts;
4208 }
4209 return new_entry;
4210 }
4211
4212 static void
4213 sift_ctx_init (re_sift_context_t *sctx, re_dfastate_t **sifted_sts,
4214 re_dfastate_t **limited_sts, Idx last_node, Idx last_str_idx)
4215 {
4216 sctx->sifted_states = sifted_sts;
4217 sctx->limited_states = limited_sts;
4218 sctx->last_node = last_node;
4219 sctx->last_str_idx = last_str_idx;
4220 re_node_set_init_empty (&sctx->limits);
4221 }