This source file includes following definitions.
- re_string_allocate
- re_string_construct
- re_string_realloc_buffers
- re_string_construct_common
- build_wcs_buffer
- build_wcs_upper_buffer
- re_string_skip_chars
- build_upper_buffer
- re_string_translate_buffer
- re_string_reconstruct
- re_string_peek_byte_case
- re_string_fetch_byte_case
- re_string_destruct
- re_string_context_at
- re_node_set_alloc
- re_node_set_init_1
- re_node_set_init_2
- re_node_set_init_copy
- re_node_set_add_intersect
- re_node_set_init_union
- re_node_set_merge
- re_node_set_insert
- re_node_set_insert_last
- re_node_set_compare
- re_node_set_contains
- re_node_set_remove_at
- re_dfa_add_node
- calc_state_hash
- re_acquire_state
- re_acquire_state_context
- register_state
- free_state
- create_ci_newstate
- create_cd_newstate
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 static void re_string_construct_common (const char *str, Idx len,
21 re_string_t *pstr,
22 RE_TRANSLATE_TYPE trans, bool icase,
23 const re_dfa_t *dfa);
24 static re_dfastate_t *create_ci_newstate (const re_dfa_t *dfa,
25 const re_node_set *nodes,
26 re_hashval_t hash);
27 static re_dfastate_t *create_cd_newstate (const re_dfa_t *dfa,
28 const re_node_set *nodes,
29 unsigned int context,
30 re_hashval_t hash);
31 static reg_errcode_t re_string_realloc_buffers (re_string_t *pstr,
32 Idx new_buf_len);
33 static void build_wcs_buffer (re_string_t *pstr);
34 static reg_errcode_t build_wcs_upper_buffer (re_string_t *pstr);
35 static void build_upper_buffer (re_string_t *pstr);
36 static void re_string_translate_buffer (re_string_t *pstr);
37 static unsigned int re_string_context_at (const re_string_t *input, Idx idx,
38 int eflags) __attribute__ ((pure));
39
40
41
42
43
44
45 static reg_errcode_t
46 __attribute_warn_unused_result__
47 re_string_allocate (re_string_t *pstr, const char *str, Idx len, Idx init_len,
48 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
49 {
50 reg_errcode_t ret;
51 Idx init_buf_len;
52
53
54 if (init_len < dfa->mb_cur_max)
55 init_len = dfa->mb_cur_max;
56 init_buf_len = (len + 1 < init_len) ? len + 1: init_len;
57 re_string_construct_common (str, len, pstr, trans, icase, dfa);
58
59 ret = re_string_realloc_buffers (pstr, init_buf_len);
60 if (__glibc_unlikely (ret != REG_NOERROR))
61 return ret;
62
63 pstr->word_char = dfa->word_char;
64 pstr->word_ops_used = dfa->word_ops_used;
65 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
66 pstr->valid_len = (pstr->mbs_allocated || dfa->mb_cur_max > 1) ? 0 : len;
67 pstr->valid_raw_len = pstr->valid_len;
68 return REG_NOERROR;
69 }
70
71
72
73 static reg_errcode_t
74 __attribute_warn_unused_result__
75 re_string_construct (re_string_t *pstr, const char *str, Idx len,
76 RE_TRANSLATE_TYPE trans, bool icase, const re_dfa_t *dfa)
77 {
78 reg_errcode_t ret;
79 memset (pstr, '\0', sizeof (re_string_t));
80 re_string_construct_common (str, len, pstr, trans, icase, dfa);
81
82 if (len > 0)
83 {
84 ret = re_string_realloc_buffers (pstr, len + 1);
85 if (__glibc_unlikely (ret != REG_NOERROR))
86 return ret;
87 }
88 pstr->mbs = pstr->mbs_allocated ? pstr->mbs : (unsigned char *) str;
89
90 if (icase)
91 {
92 if (dfa->mb_cur_max > 1)
93 {
94 while (1)
95 {
96 ret = build_wcs_upper_buffer (pstr);
97 if (__glibc_unlikely (ret != REG_NOERROR))
98 return ret;
99 if (pstr->valid_raw_len >= len)
100 break;
101 if (pstr->bufs_len > pstr->valid_len + dfa->mb_cur_max)
102 break;
103 ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2);
104 if (__glibc_unlikely (ret != REG_NOERROR))
105 return ret;
106 }
107 }
108 else
109 build_upper_buffer (pstr);
110 }
111 else
112 {
113 if (dfa->mb_cur_max > 1)
114 build_wcs_buffer (pstr);
115 else
116 {
117 if (trans != NULL)
118 re_string_translate_buffer (pstr);
119 else
120 {
121 pstr->valid_len = pstr->bufs_len;
122 pstr->valid_raw_len = pstr->bufs_len;
123 }
124 }
125 }
126
127 return REG_NOERROR;
128 }
129
130
131
132 static reg_errcode_t
133 __attribute_warn_unused_result__
134 re_string_realloc_buffers (re_string_t *pstr, Idx new_buf_len)
135 {
136 if (pstr->mb_cur_max > 1)
137 {
138 wint_t *new_wcs;
139
140
141 const size_t max_object_size = MAX (sizeof (wint_t), sizeof (Idx));
142 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
143 < new_buf_len))
144 return REG_ESPACE;
145
146 new_wcs = re_realloc (pstr->wcs, wint_t, new_buf_len);
147 if (__glibc_unlikely (new_wcs == NULL))
148 return REG_ESPACE;
149 pstr->wcs = new_wcs;
150 if (pstr->offsets != NULL)
151 {
152 Idx *new_offsets = re_realloc (pstr->offsets, Idx, new_buf_len);
153 if (__glibc_unlikely (new_offsets == NULL))
154 return REG_ESPACE;
155 pstr->offsets = new_offsets;
156 }
157 }
158 if (pstr->mbs_allocated)
159 {
160 unsigned char *new_mbs = re_realloc (pstr->mbs, unsigned char,
161 new_buf_len);
162 if (__glibc_unlikely (new_mbs == NULL))
163 return REG_ESPACE;
164 pstr->mbs = new_mbs;
165 }
166 pstr->bufs_len = new_buf_len;
167 return REG_NOERROR;
168 }
169
170
171 static void
172 re_string_construct_common (const char *str, Idx len, re_string_t *pstr,
173 RE_TRANSLATE_TYPE trans, bool icase,
174 const re_dfa_t *dfa)
175 {
176 pstr->raw_mbs = (const unsigned char *) str;
177 pstr->len = len;
178 pstr->raw_len = len;
179 pstr->trans = trans;
180 pstr->icase = icase;
181 pstr->mbs_allocated = (trans != NULL || icase);
182 pstr->mb_cur_max = dfa->mb_cur_max;
183 pstr->is_utf8 = dfa->is_utf8;
184 pstr->map_notascii = dfa->map_notascii;
185 pstr->stop = pstr->len;
186 pstr->raw_stop = pstr->stop;
187 }
188
189
190
191
192
193
194
195
196
197
198
199
200
201 static void
202 build_wcs_buffer (re_string_t *pstr)
203 {
204 #ifdef _LIBC
205 unsigned char buf[MB_LEN_MAX];
206 DEBUG_ASSERT (MB_LEN_MAX >= pstr->mb_cur_max);
207 #else
208 unsigned char buf[64];
209 #endif
210 mbstate_t prev_st;
211 Idx byte_idx, end_idx, remain_len;
212 size_t mbclen;
213
214
215
216 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
217 for (byte_idx = pstr->valid_len; byte_idx < end_idx;)
218 {
219 wchar_t wc;
220 const char *p;
221
222 remain_len = end_idx - byte_idx;
223 prev_st = pstr->cur_state;
224
225 if (__glibc_unlikely (pstr->trans != NULL))
226 {
227 int i, ch;
228
229 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
230 {
231 ch = pstr->raw_mbs [pstr->raw_mbs_idx + byte_idx + i];
232 buf[i] = pstr->mbs[byte_idx + i] = pstr->trans[ch];
233 }
234 p = (const char *) buf;
235 }
236 else
237 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
238 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
239 if (__glibc_unlikely (mbclen == (size_t) -1 || mbclen == 0
240 || (mbclen == (size_t) -2
241 && pstr->bufs_len >= pstr->len)))
242 {
243
244 mbclen = 1;
245 wc = (wchar_t) pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
246 if (__glibc_unlikely (pstr->trans != NULL))
247 wc = pstr->trans[wc];
248 pstr->cur_state = prev_st;
249 }
250 else if (__glibc_unlikely (mbclen == (size_t) -2))
251 {
252
253 pstr->cur_state = prev_st;
254 break;
255 }
256
257
258 pstr->wcs[byte_idx++] = wc;
259
260 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
261 pstr->wcs[byte_idx++] = WEOF;
262 }
263 pstr->valid_len = byte_idx;
264 pstr->valid_raw_len = byte_idx;
265 }
266
267
268
269
270 static reg_errcode_t
271 __attribute_warn_unused_result__
272 build_wcs_upper_buffer (re_string_t *pstr)
273 {
274 mbstate_t prev_st;
275 Idx src_idx, byte_idx, end_idx, remain_len;
276 size_t mbclen;
277 #ifdef _LIBC
278 char buf[MB_LEN_MAX];
279 DEBUG_ASSERT (pstr->mb_cur_max <= MB_LEN_MAX);
280 #else
281 char buf[64];
282 #endif
283
284 byte_idx = pstr->valid_len;
285 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
286
287
288
289 if (! pstr->map_notascii && pstr->trans == NULL && !pstr->offsets_needed)
290 {
291 while (byte_idx < end_idx)
292 {
293 wchar_t wc;
294 unsigned char ch = pstr->raw_mbs[pstr->raw_mbs_idx + byte_idx];
295
296 if (isascii (ch) && mbsinit (&pstr->cur_state))
297 {
298
299
300 wchar_t wcu = __towupper (ch);
301 if (isascii (wcu))
302 {
303 pstr->mbs[byte_idx] = wcu;
304 pstr->wcs[byte_idx] = wcu;
305 byte_idx++;
306 continue;
307 }
308 }
309
310 remain_len = end_idx - byte_idx;
311 prev_st = pstr->cur_state;
312 mbclen = __mbrtowc (&wc,
313 ((const char *) pstr->raw_mbs + pstr->raw_mbs_idx
314 + byte_idx), remain_len, &pstr->cur_state);
315 if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2))
316 {
317 wchar_t wcu = __towupper (wc);
318 if (wcu != wc)
319 {
320 size_t mbcdlen;
321
322 mbcdlen = __wcrtomb (buf, wcu, &prev_st);
323 if (__glibc_likely (mbclen == mbcdlen))
324 memcpy (pstr->mbs + byte_idx, buf, mbclen);
325 else
326 {
327 src_idx = byte_idx;
328 goto offsets_needed;
329 }
330 }
331 else
332 memcpy (pstr->mbs + byte_idx,
333 pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx, mbclen);
334 pstr->wcs[byte_idx++] = wcu;
335
336 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
337 pstr->wcs[byte_idx++] = WEOF;
338 }
339 else if (mbclen == (size_t) -1 || mbclen == 0
340 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
341 {
342
343
344 pstr->mbs[byte_idx] = ch;
345
346 pstr->wcs[byte_idx++] = (wchar_t) ch;
347 if (__glibc_unlikely (mbclen == (size_t) -1))
348 pstr->cur_state = prev_st;
349 }
350 else
351 {
352
353 pstr->cur_state = prev_st;
354 break;
355 }
356 }
357 pstr->valid_len = byte_idx;
358 pstr->valid_raw_len = byte_idx;
359 return REG_NOERROR;
360 }
361 else
362 for (src_idx = pstr->valid_raw_len; byte_idx < end_idx;)
363 {
364 wchar_t wc;
365 const char *p;
366 offsets_needed:
367 remain_len = end_idx - byte_idx;
368 prev_st = pstr->cur_state;
369 if (__glibc_unlikely (pstr->trans != NULL))
370 {
371 int i, ch;
372
373 for (i = 0; i < pstr->mb_cur_max && i < remain_len; ++i)
374 {
375 ch = pstr->raw_mbs [pstr->raw_mbs_idx + src_idx + i];
376 buf[i] = pstr->trans[ch];
377 }
378 p = (const char *) buf;
379 }
380 else
381 p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + src_idx;
382 mbclen = __mbrtowc (&wc, p, remain_len, &pstr->cur_state);
383 if (__glibc_likely (0 < mbclen && mbclen < (size_t) -2))
384 {
385 wchar_t wcu = __towupper (wc);
386 if (wcu != wc)
387 {
388 size_t mbcdlen;
389
390 mbcdlen = __wcrtomb ((char *) buf, wcu, &prev_st);
391 if (__glibc_likely (mbclen == mbcdlen))
392 memcpy (pstr->mbs + byte_idx, buf, mbclen);
393 else if (mbcdlen != (size_t) -1)
394 {
395 size_t i;
396
397 if (byte_idx + mbcdlen > pstr->bufs_len)
398 {
399 pstr->cur_state = prev_st;
400 break;
401 }
402
403 if (pstr->offsets == NULL)
404 {
405 pstr->offsets = re_malloc (Idx, pstr->bufs_len);
406
407 if (pstr->offsets == NULL)
408 return REG_ESPACE;
409 }
410 if (!pstr->offsets_needed)
411 {
412 for (i = 0; i < (size_t) byte_idx; ++i)
413 pstr->offsets[i] = i;
414 pstr->offsets_needed = 1;
415 }
416
417 memcpy (pstr->mbs + byte_idx, buf, mbcdlen);
418 pstr->wcs[byte_idx] = wcu;
419 pstr->offsets[byte_idx] = src_idx;
420 for (i = 1; i < mbcdlen; ++i)
421 {
422 pstr->offsets[byte_idx + i]
423 = src_idx + (i < mbclen ? i : mbclen - 1);
424 pstr->wcs[byte_idx + i] = WEOF;
425 }
426 pstr->len += mbcdlen - mbclen;
427 if (pstr->raw_stop > src_idx)
428 pstr->stop += mbcdlen - mbclen;
429 end_idx = (pstr->bufs_len > pstr->len)
430 ? pstr->len : pstr->bufs_len;
431 byte_idx += mbcdlen;
432 src_idx += mbclen;
433 continue;
434 }
435 else
436 memcpy (pstr->mbs + byte_idx, p, mbclen);
437 }
438 else
439 memcpy (pstr->mbs + byte_idx, p, mbclen);
440
441 if (__glibc_unlikely (pstr->offsets_needed != 0))
442 {
443 size_t i;
444 for (i = 0; i < mbclen; ++i)
445 pstr->offsets[byte_idx + i] = src_idx + i;
446 }
447 src_idx += mbclen;
448
449 pstr->wcs[byte_idx++] = wcu;
450
451 for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
452 pstr->wcs[byte_idx++] = WEOF;
453 }
454 else if (mbclen == (size_t) -1 || mbclen == 0
455 || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
456 {
457
458 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + src_idx];
459
460 if (__glibc_unlikely (pstr->trans != NULL))
461 ch = pstr->trans [ch];
462 pstr->mbs[byte_idx] = ch;
463
464 if (__glibc_unlikely (pstr->offsets_needed != 0))
465 pstr->offsets[byte_idx] = src_idx;
466 ++src_idx;
467
468
469 pstr->wcs[byte_idx++] = (wchar_t) ch;
470 if (__glibc_unlikely (mbclen == (size_t) -1))
471 pstr->cur_state = prev_st;
472 }
473 else
474 {
475
476 pstr->cur_state = prev_st;
477 break;
478 }
479 }
480 pstr->valid_len = byte_idx;
481 pstr->valid_raw_len = src_idx;
482 return REG_NOERROR;
483 }
484
485
486
487
488 static Idx
489 re_string_skip_chars (re_string_t *pstr, Idx new_raw_idx, wint_t *last_wc)
490 {
491 mbstate_t prev_st;
492 Idx rawbuf_idx;
493 size_t mbclen;
494 wint_t wc = WEOF;
495
496
497 for (rawbuf_idx = pstr->raw_mbs_idx + pstr->valid_raw_len;
498 rawbuf_idx < new_raw_idx;)
499 {
500 wchar_t wc2;
501 Idx remain_len = pstr->raw_len - rawbuf_idx;
502 prev_st = pstr->cur_state;
503 mbclen = __mbrtowc (&wc2, (const char *) pstr->raw_mbs + rawbuf_idx,
504 remain_len, &pstr->cur_state);
505 if (__glibc_unlikely (mbclen == (size_t) -2 || mbclen == (size_t) -1
506 || mbclen == 0))
507 {
508
509 if (mbclen == 0 || remain_len == 0)
510 wc = L'\0';
511 else
512 wc = *(unsigned char *) (pstr->raw_mbs + rawbuf_idx);
513 mbclen = 1;
514 pstr->cur_state = prev_st;
515 }
516 else
517 wc = wc2;
518
519 rawbuf_idx += mbclen;
520 }
521 *last_wc = wc;
522 return rawbuf_idx;
523 }
524
525
526
527
528 static void
529 build_upper_buffer (re_string_t *pstr)
530 {
531 Idx char_idx, end_idx;
532 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
533
534 for (char_idx = pstr->valid_len; char_idx < end_idx; ++char_idx)
535 {
536 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + char_idx];
537 if (__glibc_unlikely (pstr->trans != NULL))
538 ch = pstr->trans[ch];
539 pstr->mbs[char_idx] = toupper (ch);
540 }
541 pstr->valid_len = char_idx;
542 pstr->valid_raw_len = char_idx;
543 }
544
545
546
547 static void
548 re_string_translate_buffer (re_string_t *pstr)
549 {
550 Idx buf_idx, end_idx;
551 end_idx = (pstr->bufs_len > pstr->len) ? pstr->len : pstr->bufs_len;
552
553 for (buf_idx = pstr->valid_len; buf_idx < end_idx; ++buf_idx)
554 {
555 int ch = pstr->raw_mbs[pstr->raw_mbs_idx + buf_idx];
556 pstr->mbs[buf_idx] = pstr->trans[ch];
557 }
558
559 pstr->valid_len = buf_idx;
560 pstr->valid_raw_len = buf_idx;
561 }
562
563
564
565
566
567 static reg_errcode_t
568 __attribute_warn_unused_result__
569 re_string_reconstruct (re_string_t *pstr, Idx idx, int eflags)
570 {
571 Idx offset;
572
573 if (__glibc_unlikely (pstr->raw_mbs_idx <= idx))
574 offset = idx - pstr->raw_mbs_idx;
575 else
576 {
577
578 if (pstr->mb_cur_max > 1)
579 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
580 pstr->len = pstr->raw_len;
581 pstr->stop = pstr->raw_stop;
582 pstr->valid_len = 0;
583 pstr->raw_mbs_idx = 0;
584 pstr->valid_raw_len = 0;
585 pstr->offsets_needed = 0;
586 pstr->tip_context = ((eflags & REG_NOTBOL) ? CONTEXT_BEGBUF
587 : CONTEXT_NEWLINE | CONTEXT_BEGBUF);
588 if (!pstr->mbs_allocated)
589 pstr->mbs = (unsigned char *) pstr->raw_mbs;
590 offset = idx;
591 }
592
593 if (__glibc_likely (offset != 0))
594 {
595
596 if (__glibc_likely (offset < pstr->valid_raw_len))
597 {
598
599 if (__glibc_unlikely (pstr->offsets_needed))
600 {
601 Idx low = 0, high = pstr->valid_len, mid;
602 do
603 {
604 mid = (high + low) / 2;
605 if (pstr->offsets[mid] > offset)
606 high = mid;
607 else if (pstr->offsets[mid] < offset)
608 low = mid + 1;
609 else
610 break;
611 }
612 while (low < high);
613 if (pstr->offsets[mid] < offset)
614 ++mid;
615 pstr->tip_context = re_string_context_at (pstr, mid - 1,
616 eflags);
617
618
619
620
621 if (pstr->valid_len > offset
622 && mid == offset && pstr->offsets[mid] == offset)
623 {
624 memmove (pstr->wcs, pstr->wcs + offset,
625 (pstr->valid_len - offset) * sizeof (wint_t));
626 memmove (pstr->mbs, pstr->mbs + offset, pstr->valid_len - offset);
627 pstr->valid_len -= offset;
628 pstr->valid_raw_len -= offset;
629 for (low = 0; low < pstr->valid_len; low++)
630 pstr->offsets[low] = pstr->offsets[low + offset] - offset;
631 }
632 else
633 {
634
635
636 pstr->len = pstr->raw_len - idx + offset;
637 pstr->stop = pstr->raw_stop - idx + offset;
638 pstr->offsets_needed = 0;
639 while (mid > 0 && pstr->offsets[mid - 1] == offset)
640 --mid;
641 while (mid < pstr->valid_len)
642 if (pstr->wcs[mid] != WEOF)
643 break;
644 else
645 ++mid;
646 if (mid == pstr->valid_len)
647 pstr->valid_len = 0;
648 else
649 {
650 pstr->valid_len = pstr->offsets[mid] - offset;
651 if (pstr->valid_len)
652 {
653 for (low = 0; low < pstr->valid_len; ++low)
654 pstr->wcs[low] = WEOF;
655 memset (pstr->mbs, 255, pstr->valid_len);
656 }
657 }
658 pstr->valid_raw_len = pstr->valid_len;
659 }
660 }
661 else
662 {
663 pstr->tip_context = re_string_context_at (pstr, offset - 1,
664 eflags);
665 if (pstr->mb_cur_max > 1)
666 memmove (pstr->wcs, pstr->wcs + offset,
667 (pstr->valid_len - offset) * sizeof (wint_t));
668 if (__glibc_unlikely (pstr->mbs_allocated))
669 memmove (pstr->mbs, pstr->mbs + offset,
670 pstr->valid_len - offset);
671 pstr->valid_len -= offset;
672 pstr->valid_raw_len -= offset;
673 DEBUG_ASSERT (pstr->valid_len > 0);
674 }
675 }
676 else
677 {
678
679 Idx prev_valid_len = pstr->valid_len;
680
681 if (__glibc_unlikely (pstr->offsets_needed))
682 {
683 pstr->len = pstr->raw_len - idx + offset;
684 pstr->stop = pstr->raw_stop - idx + offset;
685 pstr->offsets_needed = 0;
686 }
687 pstr->valid_len = 0;
688 if (pstr->mb_cur_max > 1)
689 {
690 Idx wcs_idx;
691 wint_t wc = WEOF;
692
693 if (pstr->is_utf8)
694 {
695 const unsigned char *raw, *p, *end;
696
697
698
699 raw = pstr->raw_mbs + pstr->raw_mbs_idx;
700 end = raw + (offset - pstr->mb_cur_max);
701 if (end < pstr->raw_mbs)
702 end = pstr->raw_mbs;
703 p = raw + offset - 1;
704 #ifdef _LIBC
705
706
707 if (isascii (*p) && __glibc_likely (pstr->trans == NULL))
708 {
709 memset (&pstr->cur_state, '\0', sizeof (mbstate_t));
710
711 wc = (wchar_t) *p;
712 }
713 else
714 #endif
715 for (; p >= end; --p)
716 if ((*p & 0xc0) != 0x80)
717 {
718 mbstate_t cur_state;
719 wchar_t wc2;
720 Idx mlen = raw + pstr->len - p;
721 unsigned char buf[6];
722 size_t mbclen;
723
724 const unsigned char *pp = p;
725 if (__glibc_unlikely (pstr->trans != NULL))
726 {
727 int i = mlen < 6 ? mlen : 6;
728 while (--i >= 0)
729 buf[i] = pstr->trans[p[i]];
730 pp = buf;
731 }
732
733
734 memset (&cur_state, 0, sizeof (cur_state));
735 mbclen = __mbrtowc (&wc2, (const char *) pp, mlen,
736 &cur_state);
737 if (raw + offset - p <= mbclen
738 && mbclen < (size_t) -2)
739 {
740 memset (&pstr->cur_state, '\0',
741 sizeof (mbstate_t));
742 pstr->valid_len = mbclen - (raw + offset - p);
743 wc = wc2;
744 }
745 break;
746 }
747 }
748
749 if (wc == WEOF)
750 pstr->valid_len = re_string_skip_chars (pstr, idx, &wc) - idx;
751 if (wc == WEOF)
752 pstr->tip_context
753 = re_string_context_at (pstr, prev_valid_len - 1, eflags);
754 else
755 pstr->tip_context = ((__glibc_unlikely (pstr->word_ops_used != 0)
756 && IS_WIDE_WORD_CHAR (wc))
757 ? CONTEXT_WORD
758 : ((IS_WIDE_NEWLINE (wc)
759 && pstr->newline_anchor)
760 ? CONTEXT_NEWLINE : 0));
761 if (__glibc_unlikely (pstr->valid_len))
762 {
763 for (wcs_idx = 0; wcs_idx < pstr->valid_len; ++wcs_idx)
764 pstr->wcs[wcs_idx] = WEOF;
765 if (pstr->mbs_allocated)
766 memset (pstr->mbs, 255, pstr->valid_len);
767 }
768 pstr->valid_raw_len = pstr->valid_len;
769 }
770 else
771 {
772 int c = pstr->raw_mbs[pstr->raw_mbs_idx + offset - 1];
773 pstr->valid_raw_len = 0;
774 if (pstr->trans)
775 c = pstr->trans[c];
776 pstr->tip_context = (bitset_contain (pstr->word_char, c)
777 ? CONTEXT_WORD
778 : ((IS_NEWLINE (c) && pstr->newline_anchor)
779 ? CONTEXT_NEWLINE : 0));
780 }
781 }
782 if (!__glibc_unlikely (pstr->mbs_allocated))
783 pstr->mbs += offset;
784 }
785 pstr->raw_mbs_idx = idx;
786 pstr->len -= offset;
787 pstr->stop -= offset;
788
789
790 if (pstr->mb_cur_max > 1)
791 {
792 if (pstr->icase)
793 {
794 reg_errcode_t ret = build_wcs_upper_buffer (pstr);
795 if (__glibc_unlikely (ret != REG_NOERROR))
796 return ret;
797 }
798 else
799 build_wcs_buffer (pstr);
800 }
801 else
802 if (__glibc_unlikely (pstr->mbs_allocated))
803 {
804 if (pstr->icase)
805 build_upper_buffer (pstr);
806 else if (pstr->trans != NULL)
807 re_string_translate_buffer (pstr);
808 }
809 else
810 pstr->valid_len = pstr->len;
811
812 pstr->cur_idx = 0;
813 return REG_NOERROR;
814 }
815
816 static unsigned char
817 __attribute__ ((pure))
818 re_string_peek_byte_case (const re_string_t *pstr, Idx idx)
819 {
820 int ch;
821 Idx off;
822
823
824 if (__glibc_likely (!pstr->mbs_allocated))
825 return re_string_peek_byte (pstr, idx);
826
827 if (pstr->mb_cur_max > 1
828 && ! re_string_is_single_byte_char (pstr, pstr->cur_idx + idx))
829 return re_string_peek_byte (pstr, idx);
830
831 off = pstr->cur_idx + idx;
832 if (pstr->offsets_needed)
833 off = pstr->offsets[off];
834
835 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
836
837
838
839
840
841 if (pstr->offsets_needed && !isascii (ch))
842 return re_string_peek_byte (pstr, idx);
843
844 return ch;
845 }
846
847 static unsigned char
848 re_string_fetch_byte_case (re_string_t *pstr)
849 {
850 if (__glibc_likely (!pstr->mbs_allocated))
851 return re_string_fetch_byte (pstr);
852
853 if (pstr->offsets_needed)
854 {
855 Idx off;
856 int ch;
857
858
859
860
861
862
863
864
865 if (!re_string_first_byte (pstr, pstr->cur_idx))
866 return re_string_fetch_byte (pstr);
867
868 off = pstr->offsets[pstr->cur_idx];
869 ch = pstr->raw_mbs[pstr->raw_mbs_idx + off];
870
871 if (! isascii (ch))
872 return re_string_fetch_byte (pstr);
873
874 re_string_skip_bytes (pstr,
875 re_string_char_size_at (pstr, pstr->cur_idx));
876 return ch;
877 }
878
879 return pstr->raw_mbs[pstr->raw_mbs_idx + pstr->cur_idx++];
880 }
881
882 static void
883 re_string_destruct (re_string_t *pstr)
884 {
885 re_free (pstr->wcs);
886 re_free (pstr->offsets);
887 if (pstr->mbs_allocated)
888 re_free (pstr->mbs);
889 }
890
891
892
893 static unsigned int
894 re_string_context_at (const re_string_t *input, Idx idx, int eflags)
895 {
896 int c;
897 if (__glibc_unlikely (idx < 0))
898
899
900 return input->tip_context;
901 if (__glibc_unlikely (idx == input->len))
902 return ((eflags & REG_NOTEOL) ? CONTEXT_ENDBUF
903 : CONTEXT_NEWLINE | CONTEXT_ENDBUF);
904 if (input->mb_cur_max > 1)
905 {
906 wint_t wc;
907 Idx wc_idx = idx;
908 while(input->wcs[wc_idx] == WEOF)
909 {
910 DEBUG_ASSERT (wc_idx >= 0);
911 --wc_idx;
912 if (wc_idx < 0)
913 return input->tip_context;
914 }
915 wc = input->wcs[wc_idx];
916 if (__glibc_unlikely (input->word_ops_used != 0)
917 && IS_WIDE_WORD_CHAR (wc))
918 return CONTEXT_WORD;
919 return (IS_WIDE_NEWLINE (wc) && input->newline_anchor
920 ? CONTEXT_NEWLINE : 0);
921 }
922 else
923 {
924 c = re_string_byte_at (input, idx);
925 if (bitset_contain (input->word_char, c))
926 return CONTEXT_WORD;
927 return IS_NEWLINE (c) && input->newline_anchor ? CONTEXT_NEWLINE : 0;
928 }
929 }
930
931
932
933 static reg_errcode_t
934 __attribute_warn_unused_result__
935 re_node_set_alloc (re_node_set *set, Idx size)
936 {
937 set->alloc = size;
938 set->nelem = 0;
939 set->elems = re_malloc (Idx, size);
940 if (__glibc_unlikely (set->elems == NULL)
941 && (MALLOC_0_IS_NONNULL || size != 0))
942 return REG_ESPACE;
943 return REG_NOERROR;
944 }
945
946 static reg_errcode_t
947 __attribute_warn_unused_result__
948 re_node_set_init_1 (re_node_set *set, Idx elem)
949 {
950 set->alloc = 1;
951 set->nelem = 1;
952 set->elems = re_malloc (Idx, 1);
953 if (__glibc_unlikely (set->elems == NULL))
954 {
955 set->alloc = set->nelem = 0;
956 return REG_ESPACE;
957 }
958 set->elems[0] = elem;
959 return REG_NOERROR;
960 }
961
962 static reg_errcode_t
963 __attribute_warn_unused_result__
964 re_node_set_init_2 (re_node_set *set, Idx elem1, Idx elem2)
965 {
966 set->alloc = 2;
967 set->elems = re_malloc (Idx, 2);
968 if (__glibc_unlikely (set->elems == NULL))
969 return REG_ESPACE;
970 if (elem1 == elem2)
971 {
972 set->nelem = 1;
973 set->elems[0] = elem1;
974 }
975 else
976 {
977 set->nelem = 2;
978 if (elem1 < elem2)
979 {
980 set->elems[0] = elem1;
981 set->elems[1] = elem2;
982 }
983 else
984 {
985 set->elems[0] = elem2;
986 set->elems[1] = elem1;
987 }
988 }
989 return REG_NOERROR;
990 }
991
992 static reg_errcode_t
993 __attribute_warn_unused_result__
994 re_node_set_init_copy (re_node_set *dest, const re_node_set *src)
995 {
996 dest->nelem = src->nelem;
997 if (src->nelem > 0)
998 {
999 dest->alloc = dest->nelem;
1000 dest->elems = re_malloc (Idx, dest->alloc);
1001 if (__glibc_unlikely (dest->elems == NULL))
1002 {
1003 dest->alloc = dest->nelem = 0;
1004 return REG_ESPACE;
1005 }
1006 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1007 }
1008 else
1009 re_node_set_init_empty (dest);
1010 return REG_NOERROR;
1011 }
1012
1013
1014
1015
1016
1017 static reg_errcode_t
1018 __attribute_warn_unused_result__
1019 re_node_set_add_intersect (re_node_set *dest, const re_node_set *src1,
1020 const re_node_set *src2)
1021 {
1022 Idx i1, i2, is, id, delta, sbase;
1023 if (src1->nelem == 0 || src2->nelem == 0)
1024 return REG_NOERROR;
1025
1026
1027
1028 if (src1->nelem + src2->nelem + dest->nelem > dest->alloc)
1029 {
1030 Idx new_alloc = src1->nelem + src2->nelem + dest->alloc;
1031 Idx *new_elems = re_realloc (dest->elems, Idx, new_alloc);
1032 if (__glibc_unlikely (new_elems == NULL))
1033 return REG_ESPACE;
1034 dest->elems = new_elems;
1035 dest->alloc = new_alloc;
1036 }
1037
1038
1039
1040 sbase = dest->nelem + src1->nelem + src2->nelem;
1041 i1 = src1->nelem - 1;
1042 i2 = src2->nelem - 1;
1043 id = dest->nelem - 1;
1044 for (;;)
1045 {
1046 if (src1->elems[i1] == src2->elems[i2])
1047 {
1048
1049 while (id >= 0 && dest->elems[id] > src1->elems[i1])
1050 --id;
1051
1052 if (id < 0 || dest->elems[id] != src1->elems[i1])
1053 dest->elems[--sbase] = src1->elems[i1];
1054
1055 if (--i1 < 0 || --i2 < 0)
1056 break;
1057 }
1058
1059
1060 else if (src1->elems[i1] < src2->elems[i2])
1061 {
1062 if (--i2 < 0)
1063 break;
1064 }
1065 else
1066 {
1067 if (--i1 < 0)
1068 break;
1069 }
1070 }
1071
1072 id = dest->nelem - 1;
1073 is = dest->nelem + src1->nelem + src2->nelem - 1;
1074 delta = is - sbase + 1;
1075
1076
1077
1078
1079 dest->nelem += delta;
1080 if (delta > 0 && id >= 0)
1081 for (;;)
1082 {
1083 if (dest->elems[is] > dest->elems[id])
1084 {
1085
1086 dest->elems[id + delta--] = dest->elems[is--];
1087 if (delta == 0)
1088 break;
1089 }
1090 else
1091 {
1092
1093 dest->elems[id + delta] = dest->elems[id];
1094 if (--id < 0)
1095 break;
1096 }
1097 }
1098
1099
1100 memcpy (dest->elems, dest->elems + sbase, delta * sizeof (Idx));
1101
1102 return REG_NOERROR;
1103 }
1104
1105
1106
1107
1108 static reg_errcode_t
1109 __attribute_warn_unused_result__
1110 re_node_set_init_union (re_node_set *dest, const re_node_set *src1,
1111 const re_node_set *src2)
1112 {
1113 Idx i1, i2, id;
1114 if (src1 != NULL && src1->nelem > 0 && src2 != NULL && src2->nelem > 0)
1115 {
1116 dest->alloc = src1->nelem + src2->nelem;
1117 dest->elems = re_malloc (Idx, dest->alloc);
1118 if (__glibc_unlikely (dest->elems == NULL))
1119 return REG_ESPACE;
1120 }
1121 else
1122 {
1123 if (src1 != NULL && src1->nelem > 0)
1124 return re_node_set_init_copy (dest, src1);
1125 else if (src2 != NULL && src2->nelem > 0)
1126 return re_node_set_init_copy (dest, src2);
1127 else
1128 re_node_set_init_empty (dest);
1129 return REG_NOERROR;
1130 }
1131 for (i1 = i2 = id = 0 ; i1 < src1->nelem && i2 < src2->nelem ;)
1132 {
1133 if (src1->elems[i1] > src2->elems[i2])
1134 {
1135 dest->elems[id++] = src2->elems[i2++];
1136 continue;
1137 }
1138 if (src1->elems[i1] == src2->elems[i2])
1139 ++i2;
1140 dest->elems[id++] = src1->elems[i1++];
1141 }
1142 if (i1 < src1->nelem)
1143 {
1144 memcpy (dest->elems + id, src1->elems + i1,
1145 (src1->nelem - i1) * sizeof (Idx));
1146 id += src1->nelem - i1;
1147 }
1148 else if (i2 < src2->nelem)
1149 {
1150 memcpy (dest->elems + id, src2->elems + i2,
1151 (src2->nelem - i2) * sizeof (Idx));
1152 id += src2->nelem - i2;
1153 }
1154 dest->nelem = id;
1155 return REG_NOERROR;
1156 }
1157
1158
1159
1160
1161 static reg_errcode_t
1162 __attribute_warn_unused_result__
1163 re_node_set_merge (re_node_set *dest, const re_node_set *src)
1164 {
1165 Idx is, id, sbase, delta;
1166 if (src == NULL || src->nelem == 0)
1167 return REG_NOERROR;
1168 if (dest->alloc < 2 * src->nelem + dest->nelem)
1169 {
1170 Idx new_alloc = 2 * (src->nelem + dest->alloc);
1171 Idx *new_buffer = re_realloc (dest->elems, Idx, new_alloc);
1172 if (__glibc_unlikely (new_buffer == NULL))
1173 return REG_ESPACE;
1174 dest->elems = new_buffer;
1175 dest->alloc = new_alloc;
1176 }
1177
1178 if (__glibc_unlikely (dest->nelem == 0))
1179 {
1180
1181
1182
1183 DEBUG_ASSERT (dest->elems);
1184 dest->nelem = src->nelem;
1185 memcpy (dest->elems, src->elems, src->nelem * sizeof (Idx));
1186 return REG_NOERROR;
1187 }
1188
1189
1190
1191 for (sbase = dest->nelem + 2 * src->nelem,
1192 is = src->nelem - 1, id = dest->nelem - 1; is >= 0 && id >= 0; )
1193 {
1194 if (dest->elems[id] == src->elems[is])
1195 is--, id--;
1196 else if (dest->elems[id] < src->elems[is])
1197 dest->elems[--sbase] = src->elems[is--];
1198 else
1199 --id;
1200 }
1201
1202 if (is >= 0)
1203 {
1204
1205 sbase -= is + 1;
1206 memcpy (dest->elems + sbase, src->elems, (is + 1) * sizeof (Idx));
1207 }
1208
1209 id = dest->nelem - 1;
1210 is = dest->nelem + 2 * src->nelem - 1;
1211 delta = is - sbase + 1;
1212 if (delta == 0)
1213 return REG_NOERROR;
1214
1215
1216
1217 dest->nelem += delta;
1218 for (;;)
1219 {
1220 if (dest->elems[is] > dest->elems[id])
1221 {
1222
1223 dest->elems[id + delta--] = dest->elems[is--];
1224 if (delta == 0)
1225 break;
1226 }
1227 else
1228 {
1229
1230 dest->elems[id + delta] = dest->elems[id];
1231 if (--id < 0)
1232 {
1233
1234 memcpy (dest->elems, dest->elems + sbase,
1235 delta * sizeof (Idx));
1236 break;
1237 }
1238 }
1239 }
1240
1241 return REG_NOERROR;
1242 }
1243
1244
1245
1246
1247
1248 static bool
1249 __attribute_warn_unused_result__
1250 re_node_set_insert (re_node_set *set, Idx elem)
1251 {
1252 Idx idx;
1253
1254 if (set->alloc == 0)
1255 return __glibc_likely (re_node_set_init_1 (set, elem) == REG_NOERROR);
1256
1257 if (__glibc_unlikely (set->nelem) == 0)
1258 {
1259
1260
1261
1262 DEBUG_ASSERT (set->elems);
1263 set->elems[0] = elem;
1264 ++set->nelem;
1265 return true;
1266 }
1267
1268
1269 if (set->alloc == set->nelem)
1270 {
1271 Idx *new_elems;
1272 set->alloc = set->alloc * 2;
1273 new_elems = re_realloc (set->elems, Idx, set->alloc);
1274 if (__glibc_unlikely (new_elems == NULL))
1275 return false;
1276 set->elems = new_elems;
1277 }
1278
1279
1280
1281 if (elem < set->elems[0])
1282 {
1283 for (idx = set->nelem; idx > 0; idx--)
1284 set->elems[idx] = set->elems[idx - 1];
1285 }
1286 else
1287 {
1288 for (idx = set->nelem; set->elems[idx - 1] > elem; idx--)
1289 set->elems[idx] = set->elems[idx - 1];
1290 DEBUG_ASSERT (set->elems[idx - 1] < elem);
1291 }
1292
1293
1294 set->elems[idx] = elem;
1295 ++set->nelem;
1296 return true;
1297 }
1298
1299
1300
1301
1302
1303 static bool
1304 __attribute_warn_unused_result__
1305 re_node_set_insert_last (re_node_set *set, Idx elem)
1306 {
1307
1308 if (set->alloc == set->nelem)
1309 {
1310 Idx *new_elems;
1311 set->alloc = (set->alloc + 1) * 2;
1312 new_elems = re_realloc (set->elems, Idx, set->alloc);
1313 if (__glibc_unlikely (new_elems == NULL))
1314 return false;
1315 set->elems = new_elems;
1316 }
1317
1318
1319 set->elems[set->nelem++] = elem;
1320 return true;
1321 }
1322
1323
1324
1325
1326 static bool
1327 __attribute__ ((pure))
1328 re_node_set_compare (const re_node_set *set1, const re_node_set *set2)
1329 {
1330 Idx i;
1331 if (set1 == NULL || set2 == NULL || set1->nelem != set2->nelem)
1332 return false;
1333 for (i = set1->nelem ; --i >= 0 ; )
1334 if (set1->elems[i] != set2->elems[i])
1335 return false;
1336 return true;
1337 }
1338
1339
1340
1341 static Idx
1342 __attribute__ ((pure))
1343 re_node_set_contains (const re_node_set *set, Idx elem)
1344 {
1345 __re_size_t idx, right, mid;
1346 if (set->nelem <= 0)
1347 return 0;
1348
1349
1350 idx = 0;
1351 right = set->nelem - 1;
1352 while (idx < right)
1353 {
1354 mid = (idx + right) / 2;
1355 if (set->elems[mid] < elem)
1356 idx = mid + 1;
1357 else
1358 right = mid;
1359 }
1360 return set->elems[idx] == elem ? idx + 1 : 0;
1361 }
1362
1363 static void
1364 re_node_set_remove_at (re_node_set *set, Idx idx)
1365 {
1366 if (idx < 0 || idx >= set->nelem)
1367 return;
1368 --set->nelem;
1369 for (; idx < set->nelem; idx++)
1370 set->elems[idx] = set->elems[idx + 1];
1371 }
1372
1373
1374
1375
1376
1377 static Idx
1378 re_dfa_add_node (re_dfa_t *dfa, re_token_t token)
1379 {
1380 if (__glibc_unlikely (dfa->nodes_len >= dfa->nodes_alloc))
1381 {
1382 size_t new_nodes_alloc = dfa->nodes_alloc * 2;
1383 Idx *new_nexts, *new_indices;
1384 re_node_set *new_edests, *new_eclosures;
1385 re_token_t *new_nodes;
1386
1387
1388 const size_t max_object_size = MAX (sizeof (re_token_t),
1389 MAX (sizeof (re_node_set),
1390 sizeof (Idx)));
1391 if (__glibc_unlikely (MIN (IDX_MAX, SIZE_MAX / max_object_size)
1392 < new_nodes_alloc))
1393 return -1;
1394
1395 new_nodes = re_realloc (dfa->nodes, re_token_t, new_nodes_alloc);
1396 if (__glibc_unlikely (new_nodes == NULL))
1397 return -1;
1398 dfa->nodes = new_nodes;
1399 dfa->nodes_alloc = new_nodes_alloc;
1400 new_nexts = re_realloc (dfa->nexts, Idx, new_nodes_alloc);
1401 if (new_nexts != NULL)
1402 dfa->nexts = new_nexts;
1403 new_indices = re_realloc (dfa->org_indices, Idx, new_nodes_alloc);
1404 if (new_indices != NULL)
1405 dfa->org_indices = new_indices;
1406 new_edests = re_realloc (dfa->edests, re_node_set, new_nodes_alloc);
1407 if (new_edests != NULL)
1408 dfa->edests = new_edests;
1409 new_eclosures = re_realloc (dfa->eclosures, re_node_set, new_nodes_alloc);
1410 if (new_eclosures != NULL)
1411 dfa->eclosures = new_eclosures;
1412 if (__glibc_unlikely (new_nexts == NULL || new_indices == NULL
1413 || new_edests == NULL || new_eclosures == NULL))
1414 return -1;
1415 }
1416 dfa->nodes[dfa->nodes_len] = token;
1417 dfa->nodes[dfa->nodes_len].constraint = 0;
1418 dfa->nodes[dfa->nodes_len].accept_mb =
1419 ((token.type == OP_PERIOD && dfa->mb_cur_max > 1)
1420 || token.type == COMPLEX_BRACKET);
1421 dfa->nexts[dfa->nodes_len] = -1;
1422 re_node_set_init_empty (dfa->edests + dfa->nodes_len);
1423 re_node_set_init_empty (dfa->eclosures + dfa->nodes_len);
1424 return dfa->nodes_len++;
1425 }
1426
1427 static re_hashval_t
1428 calc_state_hash (const re_node_set *nodes, unsigned int context)
1429 {
1430 re_hashval_t hash = nodes->nelem + context;
1431 Idx i;
1432 for (i = 0 ; i < nodes->nelem ; i++)
1433 hash += nodes->elems[i];
1434 return hash;
1435 }
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446 static re_dfastate_t *
1447 __attribute_warn_unused_result__
1448 re_acquire_state (reg_errcode_t *err, const re_dfa_t *dfa,
1449 const re_node_set *nodes)
1450 {
1451 re_hashval_t hash;
1452 re_dfastate_t *new_state;
1453 struct re_state_table_entry *spot;
1454 Idx i;
1455 #if defined GCC_LINT || defined lint
1456
1457 *err = REG_NOERROR;
1458 #endif
1459 if (__glibc_unlikely (nodes->nelem == 0))
1460 {
1461 *err = REG_NOERROR;
1462 return NULL;
1463 }
1464 hash = calc_state_hash (nodes, 0);
1465 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1466
1467 for (i = 0 ; i < spot->num ; i++)
1468 {
1469 re_dfastate_t *state = spot->array[i];
1470 if (hash != state->hash)
1471 continue;
1472 if (re_node_set_compare (&state->nodes, nodes))
1473 return state;
1474 }
1475
1476
1477 new_state = create_ci_newstate (dfa, nodes, hash);
1478 if (__glibc_unlikely (new_state == NULL))
1479 *err = REG_ESPACE;
1480
1481 return new_state;
1482 }
1483
1484
1485
1486
1487
1488
1489
1490
1491
1492
1493
1494 static re_dfastate_t *
1495 __attribute_warn_unused_result__
1496 re_acquire_state_context (reg_errcode_t *err, const re_dfa_t *dfa,
1497 const re_node_set *nodes, unsigned int context)
1498 {
1499 re_hashval_t hash;
1500 re_dfastate_t *new_state;
1501 struct re_state_table_entry *spot;
1502 Idx i;
1503 #if defined GCC_LINT || defined lint
1504
1505 *err = REG_NOERROR;
1506 #endif
1507 if (nodes->nelem == 0)
1508 {
1509 *err = REG_NOERROR;
1510 return NULL;
1511 }
1512 hash = calc_state_hash (nodes, context);
1513 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1514
1515 for (i = 0 ; i < spot->num ; i++)
1516 {
1517 re_dfastate_t *state = spot->array[i];
1518 if (state->hash == hash
1519 && state->context == context
1520 && re_node_set_compare (state->entrance_nodes, nodes))
1521 return state;
1522 }
1523
1524 new_state = create_cd_newstate (dfa, nodes, context, hash);
1525 if (__glibc_unlikely (new_state == NULL))
1526 *err = REG_ESPACE;
1527
1528 return new_state;
1529 }
1530
1531
1532
1533
1534
1535 static reg_errcode_t
1536 __attribute_warn_unused_result__
1537 register_state (const re_dfa_t *dfa, re_dfastate_t *newstate,
1538 re_hashval_t hash)
1539 {
1540 struct re_state_table_entry *spot;
1541 reg_errcode_t err;
1542 Idx i;
1543
1544 newstate->hash = hash;
1545 err = re_node_set_alloc (&newstate->non_eps_nodes, newstate->nodes.nelem);
1546 if (__glibc_unlikely (err != REG_NOERROR))
1547 return REG_ESPACE;
1548 for (i = 0; i < newstate->nodes.nelem; i++)
1549 {
1550 Idx elem = newstate->nodes.elems[i];
1551 if (!IS_EPSILON_NODE (dfa->nodes[elem].type))
1552 if (! re_node_set_insert_last (&newstate->non_eps_nodes, elem))
1553 return REG_ESPACE;
1554 }
1555
1556 spot = dfa->state_table + (hash & dfa->state_hash_mask);
1557 if (__glibc_unlikely (spot->alloc <= spot->num))
1558 {
1559 Idx new_alloc = 2 * spot->num + 2;
1560 re_dfastate_t **new_array = re_realloc (spot->array, re_dfastate_t *,
1561 new_alloc);
1562 if (__glibc_unlikely (new_array == NULL))
1563 return REG_ESPACE;
1564 spot->array = new_array;
1565 spot->alloc = new_alloc;
1566 }
1567 spot->array[spot->num++] = newstate;
1568 return REG_NOERROR;
1569 }
1570
1571 static void
1572 free_state (re_dfastate_t *state)
1573 {
1574 re_node_set_free (&state->non_eps_nodes);
1575 re_node_set_free (&state->inveclosure);
1576 if (state->entrance_nodes != &state->nodes)
1577 {
1578 re_node_set_free (state->entrance_nodes);
1579 re_free (state->entrance_nodes);
1580 }
1581 re_node_set_free (&state->nodes);
1582 re_free (state->word_trtable);
1583 re_free (state->trtable);
1584 re_free (state);
1585 }
1586
1587
1588
1589
1590 static re_dfastate_t *
1591 __attribute_warn_unused_result__
1592 create_ci_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1593 re_hashval_t hash)
1594 {
1595 Idx i;
1596 reg_errcode_t err;
1597 re_dfastate_t *newstate;
1598
1599 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1600 if (__glibc_unlikely (newstate == NULL))
1601 return NULL;
1602 err = re_node_set_init_copy (&newstate->nodes, nodes);
1603 if (__glibc_unlikely (err != REG_NOERROR))
1604 {
1605 re_free (newstate);
1606 return NULL;
1607 }
1608
1609 newstate->entrance_nodes = &newstate->nodes;
1610 for (i = 0 ; i < nodes->nelem ; i++)
1611 {
1612 re_token_t *node = dfa->nodes + nodes->elems[i];
1613 re_token_type_t type = node->type;
1614 if (type == CHARACTER && !node->constraint)
1615 continue;
1616 newstate->accept_mb |= node->accept_mb;
1617
1618
1619 if (type == END_OF_RE)
1620 newstate->halt = 1;
1621 else if (type == OP_BACK_REF)
1622 newstate->has_backref = 1;
1623 else if (type == ANCHOR || node->constraint)
1624 newstate->has_constraint = 1;
1625 }
1626 err = register_state (dfa, newstate, hash);
1627 if (__glibc_unlikely (err != REG_NOERROR))
1628 {
1629 free_state (newstate);
1630 newstate = NULL;
1631 }
1632 return newstate;
1633 }
1634
1635
1636
1637
1638 static re_dfastate_t *
1639 __attribute_warn_unused_result__
1640 create_cd_newstate (const re_dfa_t *dfa, const re_node_set *nodes,
1641 unsigned int context, re_hashval_t hash)
1642 {
1643 Idx i, nctx_nodes = 0;
1644 reg_errcode_t err;
1645 re_dfastate_t *newstate;
1646
1647 newstate = (re_dfastate_t *) calloc (sizeof (re_dfastate_t), 1);
1648 if (__glibc_unlikely (newstate == NULL))
1649 return NULL;
1650 err = re_node_set_init_copy (&newstate->nodes, nodes);
1651 if (__glibc_unlikely (err != REG_NOERROR))
1652 {
1653 re_free (newstate);
1654 return NULL;
1655 }
1656
1657 newstate->context = context;
1658 newstate->entrance_nodes = &newstate->nodes;
1659
1660 for (i = 0 ; i < nodes->nelem ; i++)
1661 {
1662 re_token_t *node = dfa->nodes + nodes->elems[i];
1663 re_token_type_t type = node->type;
1664 unsigned int constraint = node->constraint;
1665
1666 if (type == CHARACTER && !constraint)
1667 continue;
1668 newstate->accept_mb |= node->accept_mb;
1669
1670
1671 if (type == END_OF_RE)
1672 newstate->halt = 1;
1673 else if (type == OP_BACK_REF)
1674 newstate->has_backref = 1;
1675
1676 if (constraint)
1677 {
1678 if (newstate->entrance_nodes == &newstate->nodes)
1679 {
1680 re_node_set *entrance_nodes = re_malloc (re_node_set, 1);
1681 if (__glibc_unlikely (entrance_nodes == NULL))
1682 {
1683 free_state (newstate);
1684 return NULL;
1685 }
1686 newstate->entrance_nodes = entrance_nodes;
1687 if (re_node_set_init_copy (newstate->entrance_nodes, nodes)
1688 != REG_NOERROR)
1689 {
1690 free_state (newstate);
1691 return NULL;
1692 }
1693 nctx_nodes = 0;
1694 newstate->has_constraint = 1;
1695 }
1696
1697 if (NOT_SATISFY_PREV_CONSTRAINT (constraint,context))
1698 {
1699 re_node_set_remove_at (&newstate->nodes, i - nctx_nodes);
1700 ++nctx_nodes;
1701 }
1702 }
1703 }
1704 err = register_state (dfa, newstate, hash);
1705 if (__glibc_unlikely (err != REG_NOERROR))
1706 {
1707 free_state (newstate);
1708 newstate = NULL;
1709 }
1710 return newstate;
1711 }