This source file includes following definitions.
- extract_number
- extract_number_and_incr
- debug_putchar
- print_fastmap
- print_partial_compiled_pattern
- print_compiled_pattern
- print_double_string
- re_wctype_parse
- re_iswctype
- re_wctype_to_bit
- extend_range_table_work_area
- regex_compile
- store_op1
- store_op2
- insert_op1
- insert_op2
- at_begline_loc_p
- at_endline_loc_p
- group_in_compile_stack
- analyze_first
- re_compile_fastmap
- re_set_registers
- re_search
- re_search_2
- skip_one_char
- skip_noops
- execute_charset
- mutually_exclusive_p
- re_match_2
- unwind_re_match
- re_match_2_internal
- bcmp_translate
- re_compile_pattern
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #include <config.h>
27
28 #include "regex-emacs.h"
29
30 #include <stdlib.h>
31
32 #include "character.h"
33 #include "buffer.h"
34 #include "syntax.h"
35 #include "category.h"
36 #include "dispextern.h"
37
38
39
40
41
42 #ifdef RE_DUP_MAX
43 # undef RE_DUP_MAX
44 #endif
45 #define RE_DUP_MAX (0xffff)
46
47
48 #define SYNTAX(c) syntax_property (c, 1)
49
50
51 #define BUFFER_SYNTAX(c) syntax_property (c, 0)
52
53 #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
54 #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
55 #define RE_STRING_CHAR(p, multibyte) \
56 (multibyte ? STRING_CHAR (p) : *(p))
57 #define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
58 (multibyte ? string_char_and_length (p, &(len)) : ((len) = 1, *(p)))
59
60 #define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
61
62 #define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
63
64
65
66
67 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
68 do { \
69 if (target_multibyte) \
70 { \
71 re_char *dtemp = (p) == (str2) ? (end1) : (p); \
72 re_char *dlimit = (p) > (str2) && (p) <= (end2) ? (str2) : (str1); \
73 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)) \
74 continue; \
75 c = STRING_CHAR (dtemp); \
76 } \
77 else \
78 { \
79 (c = ((p) == (str2) ? (end1) : (p))[-1]); \
80 (c) = RE_CHAR_TO_MULTIBYTE (c); \
81 } \
82 } while (false)
83
84
85
86 #define GET_CHAR_AFTER(c, p, len) \
87 do { \
88 if (target_multibyte) \
89 (c) = string_char_and_length (p, &(len)); \
90 else \
91 { \
92 (c) = *p; \
93 len = 1; \
94 (c) = RE_CHAR_TO_MULTIBYTE (c); \
95 } \
96 } while (false)
97
98
99 #define IS_REAL_ASCII(c) ((c) < 0200)
100
101
102 #define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
103
104
105
106
107 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
108 #define ISCNTRL(c) ((c) < ' ')
109 #define ISXDIGIT(c) (0 <= char_hexdigit (c))
110
111
112
113 #define ISBLANK(c) (IS_REAL_ASCII (c) \
114 ? ((c) == ' ' || (c) == '\t') \
115 : blankp (c))
116
117 #define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
118 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
119 : graphicp (c))
120
121 #define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
122 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
123 : printablep (c))
124
125 #define ISALNUM(c) (IS_REAL_ASCII (c) \
126 ? (((c) >= 'a' && (c) <= 'z') \
127 || ((c) >= 'A' && (c) <= 'Z') \
128 || ((c) >= '0' && (c) <= '9')) \
129 : alphanumericp (c))
130
131 #define ISALPHA(c) (IS_REAL_ASCII (c) \
132 ? (((c) >= 'a' && (c) <= 'z') \
133 || ((c) >= 'A' && (c) <= 'Z')) \
134 : alphabeticp (c))
135
136 #define ISLOWER(c) lowercasep (c)
137
138 #define ISUPPER(c) uppercasep (c)
139
140
141
142
143
144 #define ISPUNCT(c) (IS_REAL_ASCII (c) \
145 ? ((c) > ' ' && (c) < 0177 \
146 && !(((c) >= 'a' && (c) <= 'z') \
147 || ((c) >= 'A' && (c) <= 'Z') \
148 || ((c) >= '0' && (c) <= '9'))) \
149 : BUFFER_SYNTAX (c) != Sword)
150
151 #define ISSPACE(c) (BUFFER_SYNTAX (c) == Swhitespace)
152
153 #define ISWORD(c) (BUFFER_SYNTAX (c) == Sword)
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168 ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
169
170 #define REGEX_USE_SAFE_ALLOCA \
171 USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca
172
173
174 #define REGEX_REALLOCATE(source, osize, nsize) \
175 (destination = SAFE_ALLOCA (nsize), \
176 memcpy (destination, source, osize))
177
178
179
180
181 #define FIRST_STRING_P(ptr) \
182 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
183
184 #define BYTEWIDTH 8
185
186
187 typedef const unsigned char re_char;
188
189 static void re_compile_fastmap (struct re_pattern_buffer *);
190 static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp,
191 re_char *string1, ptrdiff_t size1,
192 re_char *string2, ptrdiff_t size2,
193 ptrdiff_t pos,
194 struct re_registers *regs,
195 ptrdiff_t stop);
196
197
198
199
200
201
202 typedef enum
203 {
204 no_op = 0,
205
206
207 succeed,
208
209
210 exactn,
211
212
213 anychar,
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229 charset,
230
231
232
233 charset_not,
234
235
236
237
238
239 start_memory,
240
241
242
243
244
245 stop_memory,
246
247
248
249 duplicate,
250
251
252 begline,
253
254
255 endline,
256
257
258 begbuf,
259
260
261 endbuf,
262
263
264 jump,
265
266
267
268 on_failure_jump,
269
270
271
272 on_failure_keep_string_jump,
273
274
275
276
277 on_failure_jump_loop,
278
279
280
281
282
283 on_failure_jump_nastyloop,
284
285
286
287
288
289
290
291 on_failure_jump_smart,
292
293
294
295
296
297 succeed_n,
298
299
300
301 jump_n,
302
303
304
305
306 set_number_at,
307
308 wordbeg,
309 wordend,
310
311 wordbound,
312 notwordbound,
313
314 symbeg,
315 symend,
316
317
318
319 syntaxspec,
320
321
322 notsyntaxspec,
323
324 at_dot,
325
326
327
328
329 categoryspec,
330
331
332
333
334 notcategoryspec
335 } re_opcode_t;
336
337
338
339
340
341 #define STORE_NUMBER(destination, number) \
342 do { \
343 (destination)[0] = (number) & 0377; \
344 (destination)[1] = (number) >> 8; \
345 } while (false)
346
347
348
349
350
351 #define STORE_NUMBER_AND_INCR(destination, number) \
352 do { \
353 STORE_NUMBER (destination, number); \
354 (destination) += 2; \
355 } while (false)
356
357
358
359
360 #define EXTRACT_NUMBER(destination, source) \
361 ((destination) = extract_number (source))
362
363 static int
364 extract_number (re_char *source)
365 {
366 signed char leading_byte = source[1];
367 return leading_byte * 256 + source[0];
368 }
369
370
371
372
373 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
374 ((destination) = extract_number_and_incr (&source))
375
376 static int
377 extract_number_and_incr (re_char **source)
378 {
379 int num = extract_number (*source);
380 *source += 2;
381 return num;
382 }
383
384
385
386
387
388 #define STORE_CHARACTER_AND_INCR(destination, character) \
389 do { \
390 (destination)[0] = (character) & 0377; \
391 (destination)[1] = ((character) >> 8) & 0377; \
392 (destination)[2] = (character) >> 16; \
393 (destination) += 3; \
394 } while (false)
395
396
397
398
399 #define EXTRACT_CHARACTER(destination, source) \
400 do { \
401 (destination) = ((source)[0] \
402 | ((source)[1] << 8) \
403 | ((source)[2] << 16)); \
404 } while (false)
405
406
407
408
409
410
411 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
412
413
414 #define CHARSET_RANGE_TABLE_EXISTS_P(p) (((p)[1] & 0x80) != 0)
415
416
417
418
419
420 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
421
422
423 #define CHARSET_RANGE_TABLE_BITS(p) \
424 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \
425 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
426
427
428
429
430
431 #define CHARSET_RANGE_TABLE_END(range_table, count) \
432 ((range_table) + (count) * 2 * 3)
433
434
435
436
437 #ifdef REGEX_EMACS_DEBUG
438
439
440 # include "sysstdio.h"
441
442 static int regex_emacs_debug = -100000;
443
444 # define DEBUG_STATEMENT(e) e
445 # define DEBUG_PRINT(...) \
446 if (regex_emacs_debug > 0) fprintf (stderr, __VA_ARGS__)
447 # define DEBUG_COMPILES_ARGUMENTS
448 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
449 if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
450 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
451 if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
452
453 static void
454 debug_putchar (int c)
455 {
456 if (c >= 32 && c <= 126)
457 putc (c, stderr);
458 else
459 {
460 unsigned int uc = c;
461 fprintf (stderr, "{%02x}", uc);
462 }
463 }
464
465
466
467 static void
468 print_fastmap (char *fastmap)
469 {
470 bool was_a_range = false;
471 int i = 0;
472
473 while (i < (1 << BYTEWIDTH))
474 {
475 if (fastmap[i++])
476 {
477 was_a_range = false;
478 debug_putchar (i - 1);
479 while (i < (1 << BYTEWIDTH) && fastmap[i])
480 {
481 was_a_range = true;
482 i++;
483 }
484 if (was_a_range)
485 {
486 debug_putchar ('-');
487 debug_putchar (i - 1);
488 }
489 }
490 }
491 putc ('\n', stderr);
492 }
493
494
495
496
497
498 static void
499 print_partial_compiled_pattern (re_char *start, re_char *end)
500 {
501 int mcnt, mcnt2;
502 re_char *p = start;
503 re_char *pend = end;
504
505 if (start == NULL)
506 {
507 fputs ("(null)\n", stderr);
508 return;
509 }
510
511
512 while (p < pend)
513 {
514 fprintf (stderr, "%td:\t", p - start);
515
516 switch ((re_opcode_t) *p++)
517 {
518 case no_op:
519 fputs ("/no_op", stderr);
520 break;
521
522 case succeed:
523 fputs ("/succeed", stderr);
524 break;
525
526 case exactn:
527 mcnt = *p++;
528 fprintf (stderr, "/exactn/%d", mcnt);
529 do
530 {
531 debug_putchar ('/');
532 debug_putchar (*p++);
533 }
534 while (--mcnt);
535 break;
536
537 case start_memory:
538 fprintf (stderr, "/start_memory/%d", *p++);
539 break;
540
541 case stop_memory:
542 fprintf (stderr, "/stop_memory/%d", *p++);
543 break;
544
545 case duplicate:
546 fprintf (stderr, "/duplicate/%d", *p++);
547 break;
548
549 case anychar:
550 fputs ("/anychar", stderr);
551 break;
552
553 case charset:
554 case charset_not:
555 {
556 int c, last = -100;
557 bool in_range = false;
558 int length = CHARSET_BITMAP_SIZE (p - 1);
559 bool has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
560
561 fprintf (stderr, "/charset [%s",
562 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
563
564 if (p + (*p & 0x7f) >= pend)
565 fputs (" !extends past end of pattern! ", stderr);
566
567 for (c = 0; c < 256; c++)
568 if (c / 8 < length
569 && (p[1 + (c/8)] & (1 << (c % 8))))
570 {
571
572 if (last + 1 == c && ! in_range)
573 {
574 debug_putchar ('-');
575 in_range = true;
576 }
577
578 else if (last + 1 != c && in_range)
579 {
580 debug_putchar (last);
581 in_range = false;
582 }
583
584 if (! in_range)
585 debug_putchar (c);
586
587 last = c;
588 }
589
590 if (in_range)
591 debug_putchar (last);
592
593 debug_putchar (']');
594
595 p += 1 + length;
596
597 if (has_range_table)
598 {
599 int count;
600 fputs ("has-range-table", stderr);
601
602
603 p += 2;
604 EXTRACT_NUMBER_AND_INCR (count, p);
605 p = CHARSET_RANGE_TABLE_END (p, count);
606 }
607 }
608 break;
609
610 case begline:
611 fputs ("/begline", stderr);
612 break;
613
614 case endline:
615 fputs ("/endline", stderr);
616 break;
617
618 case on_failure_jump:
619 EXTRACT_NUMBER_AND_INCR (mcnt, p);
620 fprintf (stderr, "/on_failure_jump to %td", p + mcnt - start);
621 break;
622
623 case on_failure_keep_string_jump:
624 EXTRACT_NUMBER_AND_INCR (mcnt, p);
625 fprintf (stderr, "/on_failure_keep_string_jump to %td",
626 p + mcnt - start);
627 break;
628
629 case on_failure_jump_nastyloop:
630 EXTRACT_NUMBER_AND_INCR (mcnt, p);
631 fprintf (stderr, "/on_failure_jump_nastyloop to %td",
632 p + mcnt - start);
633 break;
634
635 case on_failure_jump_loop:
636 EXTRACT_NUMBER_AND_INCR (mcnt, p);
637 fprintf (stderr, "/on_failure_jump_loop to %td",
638 p + mcnt - start);
639 break;
640
641 case on_failure_jump_smart:
642 EXTRACT_NUMBER_AND_INCR (mcnt, p);
643 fprintf (stderr, "/on_failure_jump_smart to %td",
644 p + mcnt - start);
645 break;
646
647 case jump:
648 EXTRACT_NUMBER_AND_INCR (mcnt, p);
649 fprintf (stderr, "/jump to %td", p + mcnt - start);
650 break;
651
652 case succeed_n:
653 EXTRACT_NUMBER_AND_INCR (mcnt, p);
654 EXTRACT_NUMBER_AND_INCR (mcnt2, p);
655 fprintf (stderr, "/succeed_n to %td, %d times",
656 p - 2 + mcnt - start, mcnt2);
657 break;
658
659 case jump_n:
660 EXTRACT_NUMBER_AND_INCR (mcnt, p);
661 EXTRACT_NUMBER_AND_INCR (mcnt2, p);
662 fprintf (stderr, "/jump_n to %td, %d times",
663 p - 2 + mcnt - start, mcnt2);
664 break;
665
666 case set_number_at:
667 EXTRACT_NUMBER_AND_INCR (mcnt, p);
668 EXTRACT_NUMBER_AND_INCR (mcnt2, p);
669 fprintf (stderr, "/set_number_at location %td to %d",
670 p - 2 + mcnt - start, mcnt2);
671 break;
672
673 case wordbound:
674 fputs ("/wordbound", stderr);
675 break;
676
677 case notwordbound:
678 fputs ("/notwordbound", stderr);
679 break;
680
681 case wordbeg:
682 fputs ("/wordbeg", stderr);
683 break;
684
685 case wordend:
686 fputs ("/wordend", stderr);
687 break;
688
689 case symbeg:
690 fputs ("/symbeg", stderr);
691 break;
692
693 case symend:
694 fputs ("/symend", stderr);
695 break;
696
697 case syntaxspec:
698 fputs ("/syntaxspec", stderr);
699 mcnt = *p++;
700 fprintf (stderr, "/%d", mcnt);
701 break;
702
703 case notsyntaxspec:
704 fputs ("/notsyntaxspec", stderr);
705 mcnt = *p++;
706 fprintf (stderr, "/%d", mcnt);
707 break;
708
709 case at_dot:
710 fputs ("/at_dot", stderr);
711 break;
712
713 case categoryspec:
714 fputs ("/categoryspec", stderr);
715 mcnt = *p++;
716 fprintf (stderr, "/%d", mcnt);
717 break;
718
719 case notcategoryspec:
720 fputs ("/notcategoryspec", stderr);
721 mcnt = *p++;
722 fprintf (stderr, "/%d", mcnt);
723 break;
724
725 case begbuf:
726 fputs ("/begbuf", stderr);
727 break;
728
729 case endbuf:
730 fputs ("/endbuf", stderr);
731 break;
732
733 default:
734 fprintf (stderr, "?%d", *(p-1));
735 }
736
737 putc ('\n', stderr);
738 }
739
740 fprintf (stderr, "%td:\tend of pattern.\n", p - start);
741 }
742
743
744 static void
745 print_compiled_pattern (struct re_pattern_buffer *bufp)
746 {
747 re_char *buffer = bufp->buffer;
748
749 print_partial_compiled_pattern (buffer, buffer + bufp->used);
750 fprintf (stderr, "%td bytes used/%td bytes allocated.\n",
751 bufp->used, bufp->allocated);
752
753 if (bufp->fastmap_accurate && bufp->fastmap)
754 {
755 fputs ("fastmap: ", stderr);
756 print_fastmap (bufp->fastmap);
757 }
758
759 fprintf (stderr, "re_nsub: %td\t", bufp->re_nsub);
760 fprintf (stderr, "regs_alloc: %d\t", bufp->regs_allocated);
761 fprintf (stderr, "can_be_null: %d\n", bufp->can_be_null);
762
763 }
764
765
766 static void
767 print_double_string (re_char *where, re_char *string1, ptrdiff_t size1,
768 re_char *string2, ptrdiff_t size2)
769 {
770 if (where == NULL)
771 fputs ("(null)", stderr);
772 else
773 {
774 int i;
775 if (FIRST_STRING_P (where))
776 {
777 for (i = 0; i < string1 + size1 - where; i++)
778 debug_putchar (where[i]);
779 where = string2;
780 }
781
782 for (i = 0; i < string2 + size2 - where; i++)
783 debug_putchar (where[i]);
784 }
785 }
786
787 #else
788
789 # define DEBUG_STATEMENT(e)
790 # define DEBUG_PRINT(...)
791 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
792 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
793
794 #endif
795
796 typedef enum
797 {
798 REG_NOERROR = 0,
799 REG_NOMATCH,
800
801
802
803
804 REG_BADPAT,
805 REG_ECOLLATE,
806 REG_ECTYPE,
807 REG_EESCAPE,
808 REG_ESUBREG,
809 REG_EBRACK,
810 REG_EPAREN,
811 REG_EBRACE,
812 REG_BADBR,
813 REG_ERANGE,
814 REG_ESPACE,
815 REG_BADRPT,
816
817
818 REG_EEND,
819 REG_ESIZE,
820 REG_ERPAREN,
821 REG_ERANGEX,
822 REG_ESIZEBR
823 } reg_errcode_t;
824
825 static const char *re_error_msgid[] =
826 {
827 [REG_NOERROR] = "Success",
828 [REG_NOMATCH] = "No match",
829 [REG_BADPAT] = "Invalid regular expression",
830 [REG_ECOLLATE] = "Invalid collation character",
831 [REG_ECTYPE] = "Invalid character class name",
832 [REG_EESCAPE] = "Trailing backslash",
833 [REG_ESUBREG] = "Invalid back reference",
834 [REG_EBRACK] = "Unmatched [ or [^",
835 [REG_EPAREN] = "Unmatched ( or \\(",
836 [REG_EBRACE] = "Unmatched \\{",
837 [REG_BADBR] = "Invalid content of \\{\\}",
838 [REG_ERANGE] = "Invalid range end",
839 [REG_ESPACE] = "Memory exhausted",
840 [REG_BADRPT] = "Invalid preceding regular expression",
841 [REG_EEND] = "Premature end of regular expression",
842 [REG_ESIZE] = "Regular expression too big",
843 [REG_ERPAREN] = "Unmatched ) or \\)",
844 [REG_ERANGEX ] = "Range striding over charsets",
845 [REG_ESIZEBR ] = "Invalid content of \\{\\}",
846 };
847
848
849 enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED };
850
851
852
853
854 enum { RE_NREGS = 30 };
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869 #define INIT_FAILURE_ALLOC 20
870
871
872
873
874
875
876
877
878
879
880 ptrdiff_t emacs_re_max_failures = 40000;
881
882 union fail_stack_elt
883 {
884 re_char *pointer;
885 intptr_t integer;
886 };
887
888 typedef union fail_stack_elt fail_stack_elt_t;
889
890 typedef struct
891 {
892 fail_stack_elt_t *stack;
893 ptrdiff_t size;
894 ptrdiff_t avail;
895 ptrdiff_t frame;
896 } fail_stack_type;
897
898 #define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
899
900
901
902
903 #define INIT_FAIL_STACK() \
904 do { \
905 fail_stack.stack = \
906 SAFE_ALLOCA (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
907 * sizeof (fail_stack_elt_t)); \
908 fail_stack.size = INIT_FAILURE_ALLOC; \
909 fail_stack.avail = 0; \
910 fail_stack.frame = 0; \
911 } while (false)
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926 #define FAIL_STACK_GROWTH_FACTOR 4
927
928 #define GROW_FAIL_STACK(fail_stack) \
929 (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \
930 ? 0 \
931 : ((fail_stack).stack \
932 = REGEX_REALLOCATE ((fail_stack).stack, \
933 (fail_stack).avail * sizeof (fail_stack_elt_t), \
934 min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
935 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \
936 * sizeof (fail_stack_elt_t)), \
937 ((fail_stack).size \
938 = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
939 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))), \
940 1))
941
942
943
944
945
946 #define PUSH_FAILURE_POINTER(item) \
947 fail_stack.stack[fail_stack.avail++].pointer = (item)
948
949
950
951
952 #define PUSH_FAILURE_INT(item) \
953 fail_stack.stack[fail_stack.avail++].integer = (item)
954
955
956
957 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
958 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
959
960
961 #define NUM_NONREG_ITEMS 3
962
963
964 #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
965 #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
966 #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
967 #define TOP_FAILURE_HANDLE() fail_stack.frame
968
969
970 #define ENSURE_FAIL_STACK(space) \
971 while (REMAINING_AVAIL_SLOTS <= space) { \
972 if (!GROW_FAIL_STACK (fail_stack)) \
973 { \
974 unbind_to (count, Qnil); \
975 SAFE_FREE (); \
976 return -2; \
977 } \
978 DEBUG_PRINT ("\n Doubled stack; size now: %td\n", fail_stack.size); \
979 DEBUG_PRINT (" slots available: %td\n", REMAINING_AVAIL_SLOTS);\
980 }
981
982
983 #define PUSH_FAILURE_REG(num) \
984 do { \
985 char *destination; \
986 intptr_t n = num; \
987 eassert (0 < n && n < num_regs); \
988 eassert (REG_UNSET (regstart[n]) <= REG_UNSET (regend[n])); \
989 ENSURE_FAIL_STACK(3); \
990 DEBUG_PRINT (" Push reg %"PRIdPTR" (spanning %p -> %p)\n", \
991 n, regstart[n], regend[n]); \
992 PUSH_FAILURE_POINTER (regstart[n]); \
993 PUSH_FAILURE_POINTER (regend[n]); \
994 PUSH_FAILURE_INT (n); \
995 } while (false)
996
997
998
999 #define PUSH_NUMBER(ptr,val) \
1000 do { \
1001 char *destination; \
1002 int c; \
1003 ENSURE_FAIL_STACK(3); \
1004 EXTRACT_NUMBER (c, ptr); \
1005 DEBUG_PRINT (" Push number %p = %d -> %d\n", ptr, c, val); \
1006 PUSH_FAILURE_INT (c); \
1007 PUSH_FAILURE_POINTER (ptr); \
1008 PUSH_FAILURE_INT (-1); \
1009 STORE_NUMBER (ptr, val); \
1010 } while (false)
1011
1012
1013 #define POP_FAILURE_REG_OR_COUNT() \
1014 do { \
1015 intptr_t pfreg = POP_FAILURE_INT (); \
1016 if (pfreg == -1) \
1017 { \
1018 \
1019 \
1020 unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \
1021 pfreg = POP_FAILURE_INT (); \
1022 STORE_NUMBER (ptr, pfreg); \
1023 DEBUG_PRINT (" Pop counter %p = %"PRIdPTR"\n", ptr, pfreg); \
1024 } \
1025 else \
1026 { \
1027 eassert (0 < pfreg && pfreg < num_regs); \
1028 regend[pfreg] = POP_FAILURE_POINTER (); \
1029 regstart[pfreg] = POP_FAILURE_POINTER (); \
1030 eassert (REG_UNSET (regstart[pfreg]) <= REG_UNSET (regend[pfreg])); \
1031 DEBUG_PRINT (" Pop reg %ld (spanning %p -> %p)\n", \
1032 pfreg, regstart[pfreg], regend[pfreg]); \
1033 } \
1034 } while (false)
1035
1036
1037 #define CHECK_INFINITE_LOOP(pat_cur, string_place) \
1038 do { \
1039 ptrdiff_t failure = TOP_FAILURE_HANDLE (); \
1040 \
1041 while (failure > 0 \
1042 && (FAILURE_STR (failure) == string_place \
1043 || FAILURE_STR (failure) == NULL)) \
1044 { \
1045 eassert (FAILURE_PAT (failure) >= bufp->buffer \
1046 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
1047 if (FAILURE_PAT (failure) == pat_cur) \
1048 { \
1049 cycle = true; \
1050 break; \
1051 } \
1052 DEBUG_PRINT (" Other pattern: %p\n", FAILURE_PAT (failure)); \
1053 failure = NEXT_FAILURE_HANDLE(failure); \
1054 } \
1055 DEBUG_PRINT (" Other string: %p\n", FAILURE_STR (failure)); \
1056 } while (false)
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067 #define PUSH_FAILURE_POINT(pattern, string_place) \
1068 do { \
1069 char *destination; \
1070 DEBUG_STATEMENT (nfailure_points_pushed++); \
1071 DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \
1072 DEBUG_PRINT (" Before push, next avail: %td\n", fail_stack.avail); \
1073 DEBUG_PRINT (" size: %td\n", fail_stack.size); \
1074 \
1075 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
1076 \
1077 DEBUG_PRINT ("\n"); \
1078 \
1079 DEBUG_PRINT (" Push frame index: %td\n", fail_stack.frame); \
1080 PUSH_FAILURE_INT (fail_stack.frame); \
1081 \
1082 DEBUG_PRINT (" Push string %p: \"", string_place); \
1083 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1084 DEBUG_PRINT ("\"\n"); \
1085 PUSH_FAILURE_POINTER (string_place); \
1086 \
1087 DEBUG_PRINT (" Push pattern %p: ", pattern); \
1088 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend); \
1089 PUSH_FAILURE_POINTER (pattern); \
1090 \
1091 \
1092 fail_stack.frame = fail_stack.avail; \
1093 } while (false)
1094
1095
1096
1097
1098
1099 #define TYPICAL_FAILURE_SIZE 20
1100
1101
1102 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 #define POP_FAILURE_POINT(str, pat) \
1116 do { \
1117 eassert (!FAIL_STACK_EMPTY ()); \
1118 \
1119 \
1120 DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \
1121 DEBUG_PRINT (" Before pop, next avail: %td\n", fail_stack.avail); \
1122 DEBUG_PRINT (" size: %td\n", fail_stack.size); \
1123 \
1124 \
1125 while (fail_stack.frame < fail_stack.avail) \
1126 POP_FAILURE_REG_OR_COUNT (); \
1127 \
1128 pat = POP_FAILURE_POINTER (); \
1129 DEBUG_PRINT (" Popping pattern %p: ", pat); \
1130 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1131 \
1132
1133
1134 \
1135 str = POP_FAILURE_POINTER (); \
1136 DEBUG_PRINT (" Popping string %p: \"", str); \
1137 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1138 DEBUG_PRINT ("\"\n"); \
1139 \
1140 fail_stack.frame = POP_FAILURE_INT (); \
1141 DEBUG_PRINT (" Popping frame index: %td\n", fail_stack.frame); \
1142 \
1143 eassert (fail_stack.avail >= 0); \
1144 eassert (fail_stack.frame <= fail_stack.avail); \
1145 \
1146 DEBUG_STATEMENT (nfailure_points_popped++); \
1147 } while (false)
1148
1149
1150
1151
1152 #define REG_UNSET(e) ((e) == NULL)
1153
1154
1155
1156 static reg_errcode_t regex_compile (re_char *pattern, ptrdiff_t size,
1157 bool posix_backtracking,
1158 const char *whitespace_regexp,
1159 struct re_pattern_buffer *bufp);
1160 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
1161 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1162 static void insert_op1 (re_opcode_t op, unsigned char *loc,
1163 int arg, unsigned char *end);
1164 static void insert_op2 (re_opcode_t op, unsigned char *loc,
1165 int arg1, int arg2, unsigned char *end);
1166 static bool at_begline_loc_p (re_char *pattern, re_char *p);
1167 static bool at_endline_loc_p (re_char *p, re_char *pend);
1168 static re_char *skip_one_char (re_char *p);
1169 static int analyze_first (re_char *p, re_char *pend,
1170 char *fastmap, bool multibyte);
1171
1172
1173
1174 #define PATFETCH(c) \
1175 do { \
1176 int len; \
1177 if (p == pend) return REG_EEND; \
1178 c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte); \
1179 p += len; \
1180 } while (false)
1181
1182
1183 #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
1184 #define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d))
1185
1186
1187
1188
1189 #define INIT_BUF_SIZE 32
1190
1191
1192 #define GET_BUFFER_SPACE(n) \
1193 if (bufp->buffer + bufp->allocated - b < (n)) \
1194 EXTEND_BUFFER ((n) - (bufp->buffer + bufp->allocated - b))
1195
1196
1197 #define BUF_PUSH(c) \
1198 do { \
1199 GET_BUFFER_SPACE (1); \
1200 *b++ = (unsigned char) (c); \
1201 } while (false)
1202
1203
1204
1205 #define BUF_PUSH_2(c1, c2) \
1206 do { \
1207 GET_BUFFER_SPACE (2); \
1208 *b++ = (unsigned char) (c1); \
1209 *b++ = (unsigned char) (c2); \
1210 } while (false)
1211
1212
1213
1214
1215 #define STORE_JUMP(op, loc, to) \
1216 store_op1 (op, loc, (to) - (loc) - 3)
1217
1218
1219 #define STORE_JUMP2(op, loc, to, arg) \
1220 store_op2 (op, loc, (to) - (loc) - 3, arg)
1221
1222
1223 #define INSERT_JUMP(op, loc, to) \
1224 insert_op1 (op, loc, (to) - (loc) - 3, b)
1225
1226
1227 #define INSERT_JUMP2(op, loc, to, arg) \
1228 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1229
1230
1231
1232
1233
1234 # define MAX_BUF_SIZE (1 << 15)
1235
1236
1237
1238
1239
1240 #define EXTEND_BUFFER(n) \
1241 do { \
1242 ptrdiff_t requested_extension = n; \
1243 unsigned char *old_buffer = bufp->buffer; \
1244 if (MAX_BUF_SIZE - bufp->allocated < requested_extension) \
1245 return REG_ESIZE; \
1246 ptrdiff_t b_off = b - old_buffer; \
1247 ptrdiff_t begalt_off = begalt - old_buffer; \
1248 ptrdiff_t fixup_alt_jump_off = \
1249 fixup_alt_jump ? fixup_alt_jump - old_buffer : -1; \
1250 ptrdiff_t laststart_off = laststart ? laststart - old_buffer : -1; \
1251 ptrdiff_t pending_exact_off = \
1252 pending_exact ? pending_exact - old_buffer : -1; \
1253 bufp->buffer = xpalloc (bufp->buffer, &bufp->allocated, \
1254 requested_extension, MAX_BUF_SIZE, 1); \
1255 unsigned char *new_buffer = bufp->buffer; \
1256 b = new_buffer + b_off; \
1257 begalt = new_buffer + begalt_off; \
1258 if (0 <= fixup_alt_jump_off) \
1259 fixup_alt_jump = new_buffer + fixup_alt_jump_off; \
1260 if (0 <= laststart_off) \
1261 laststart = new_buffer + laststart_off; \
1262 if (0 <= pending_exact_off) \
1263 pending_exact = new_buffer + pending_exact_off; \
1264 } while (false)
1265
1266
1267
1268
1269
1270 #define MAX_REGNUM 255
1271
1272
1273
1274 typedef int regnum_t;
1275
1276
1277
1278
1279 typedef long pattern_offset_t;
1280 verify (LONG_MIN <= -(MAX_BUF_SIZE - 1) && MAX_BUF_SIZE - 1 <= LONG_MAX);
1281
1282 typedef struct
1283 {
1284 pattern_offset_t begalt_offset;
1285 pattern_offset_t fixup_alt_jump;
1286 pattern_offset_t laststart_offset;
1287 regnum_t regnum;
1288 } compile_stack_elt_t;
1289
1290
1291 typedef struct
1292 {
1293 compile_stack_elt_t *stack;
1294 ptrdiff_t size;
1295 ptrdiff_t avail;
1296 } compile_stack_type;
1297
1298
1299 #define INIT_COMPILE_STACK_SIZE 32
1300
1301 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1302 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1303
1304
1305 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1306
1307
1308 struct range_table_work_area
1309 {
1310 int *table;
1311 int allocated;
1312 int used;
1313 int bits;
1314 };
1315
1316
1317
1318
1319 #define EXTEND_RANGE_TABLE(work_area, n) \
1320 do { \
1321 if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
1322 { \
1323 extend_range_table_work_area (&work_area); \
1324 if ((work_area).table == 0) \
1325 return (REG_ESPACE); \
1326 } \
1327 } while (false)
1328
1329 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
1330 (work_area).bits |= (bit)
1331
1332
1333 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
1334 do { \
1335 EXTEND_RANGE_TABLE ((work_area), 2); \
1336 (work_area).table[(work_area).used++] = (range_start); \
1337 (work_area).table[(work_area).used++] = (range_end); \
1338 } while (false)
1339
1340
1341 #define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1342 do { \
1343 if ((work_area).table) \
1344 xfree ((work_area).table); \
1345 } while (false)
1346
1347 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) \
1348 ((work_area).used = 0, (work_area).bits = 0)
1349 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1350 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
1351 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1352
1353
1354
1355
1356 #define BIT_WORD 0x1
1357 #define BIT_LOWER 0x2
1358 #define BIT_PUNCT 0x4
1359 #define BIT_SPACE 0x8
1360 #define BIT_UPPER 0x10
1361 #define BIT_MULTIBYTE 0x20
1362 #define BIT_ALPHA 0x40
1363 #define BIT_ALNUM 0x80
1364 #define BIT_GRAPH 0x100
1365 #define BIT_PRINT 0x200
1366 #define BIT_BLANK 0x400
1367
1368
1369
1370 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384 #define SETUP_ASCII_RANGE(work_area, FROM, TO) \
1385 do { \
1386 int C0, C1; \
1387 \
1388 for (C0 = (FROM); C0 <= (TO); C0++) \
1389 { \
1390 C1 = TRANSLATE (C0); \
1391 if (! ASCII_CHAR_P (C1)) \
1392 { \
1393 SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
1394 if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0) \
1395 C1 = C0; \
1396 } \
1397 SET_LIST_BIT (C1); \
1398 } \
1399 } while (false)
1400
1401
1402
1403
1404 #define SETUP_UNIBYTE_RANGE(work_area, FROM, TO) \
1405 do { \
1406 int C0, C1, C2, I; \
1407 int USED = RANGE_TABLE_WORK_USED (work_area); \
1408 \
1409 for (C0 = (FROM); C0 <= (TO); C0++) \
1410 { \
1411 C1 = RE_CHAR_TO_MULTIBYTE (C0); \
1412 if (CHAR_BYTE8_P (C1)) \
1413 SET_LIST_BIT (C0); \
1414 else \
1415 { \
1416 C2 = TRANSLATE (C1); \
1417 if (C2 == C1 \
1418 || (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0) \
1419 C1 = C0; \
1420 SET_LIST_BIT (C1); \
1421 for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
1422 { \
1423 int from = RANGE_TABLE_WORK_ELT (work_area, I); \
1424 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1); \
1425 \
1426 if (C2 >= from - 1 && C2 <= to + 1) \
1427 { \
1428 if (C2 == from - 1) \
1429 RANGE_TABLE_WORK_ELT (work_area, I)--; \
1430 else if (C2 == to + 1) \
1431 RANGE_TABLE_WORK_ELT (work_area, I + 1)++; \
1432 break; \
1433 } \
1434 } \
1435 if (I < USED) \
1436 SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2); \
1437 } \
1438 } \
1439 } while (false)
1440
1441
1442
1443
1444 #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO) \
1445 do { \
1446 int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area); \
1447 \
1448 SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO)); \
1449 for (C0 = (FROM); C0 <= (TO); C0++) \
1450 { \
1451 C1 = TRANSLATE (C0); \
1452 if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0 \
1453 || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0)) \
1454 SET_LIST_BIT (C2); \
1455 if (C1 >= (FROM) && C1 <= (TO)) \
1456 continue; \
1457 for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
1458 { \
1459 int from = RANGE_TABLE_WORK_ELT (work_area, I); \
1460 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1); \
1461 \
1462 if (C1 >= from - 1 && C1 <= to + 1) \
1463 { \
1464 if (C1 == from - 1) \
1465 RANGE_TABLE_WORK_ELT (work_area, I)--; \
1466 else if (C1 == to + 1) \
1467 RANGE_TABLE_WORK_ELT (work_area, I + 1)++; \
1468 break; \
1469 } \
1470 } \
1471 if (I < USED) \
1472 SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
1473 } \
1474 } while (false)
1475
1476
1477 #define GET_INTERVAL_COUNT(num) \
1478 do { \
1479 if (p == pend) \
1480 FREE_STACK_RETURN (REG_EBRACE); \
1481 else \
1482 { \
1483 PATFETCH (c); \
1484 while ('0' <= c && c <= '9') \
1485 { \
1486 if (num < 0) \
1487 num = 0; \
1488 if (RE_DUP_MAX / 10 - (RE_DUP_MAX % 10 < c - '0') < num) \
1489 FREE_STACK_RETURN (REG_ESIZEBR); \
1490 num = num * 10 + c - '0'; \
1491 if (p == pend) \
1492 FREE_STACK_RETURN (REG_EBRACE); \
1493 PATFETCH (c); \
1494 } \
1495 } \
1496 } while (false)
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512 re_wctype_t
1513 re_wctype_parse (const unsigned char **strp, ptrdiff_t limit)
1514 {
1515 const char *beg = (const char *)*strp, *it;
1516
1517 if (limit < 4 || beg[0] != '[' || beg[1] != ':')
1518 return -1;
1519
1520 beg += 2;
1521 limit -= 3;
1522 for (it = beg; it[0] != ':' || it[1] != ']'; ++it)
1523 if (!--limit)
1524 return -1;
1525
1526 *strp = (const unsigned char *)(it + 2);
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555 switch (it - beg) {
1556 case 4:
1557 if (!memcmp (beg, "word", 4)) return RECC_WORD;
1558 break;
1559 case 5:
1560 if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM;
1561 if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA;
1562 if (!memcmp (beg, "space", 5)) return RECC_SPACE;
1563 if (!memcmp (beg, "digit", 5)) return RECC_DIGIT;
1564 if (!memcmp (beg, "blank", 5)) return RECC_BLANK;
1565 if (!memcmp (beg, "upper", 5)) return RECC_UPPER;
1566 if (!memcmp (beg, "lower", 5)) return RECC_LOWER;
1567 if (!memcmp (beg, "punct", 5)) return RECC_PUNCT;
1568 if (!memcmp (beg, "ascii", 5)) return RECC_ASCII;
1569 if (!memcmp (beg, "graph", 5)) return RECC_GRAPH;
1570 if (!memcmp (beg, "print", 5)) return RECC_PRINT;
1571 if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL;
1572 break;
1573 case 6:
1574 if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT;
1575 break;
1576 case 7:
1577 if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE;
1578 break;
1579 case 8:
1580 if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII;
1581 break;
1582 case 9:
1583 if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE;
1584 break;
1585 }
1586
1587 return RECC_ERROR;
1588 }
1589
1590
1591 bool
1592 re_iswctype (int ch, re_wctype_t cc)
1593 {
1594 switch (cc)
1595 {
1596 case RECC_ALNUM: return ISALNUM (ch) != 0;
1597 case RECC_ALPHA: return ISALPHA (ch) != 0;
1598 case RECC_BLANK: return ISBLANK (ch) != 0;
1599 case RECC_CNTRL: return ISCNTRL (ch) != 0;
1600 case RECC_DIGIT: return ISDIGIT (ch) != 0;
1601 case RECC_GRAPH: return ISGRAPH (ch) != 0;
1602 case RECC_LOWER: return ISLOWER (ch) != 0;
1603 case RECC_PRINT: return ISPRINT (ch) != 0;
1604 case RECC_PUNCT: return ISPUNCT (ch) != 0;
1605 case RECC_SPACE: return ISSPACE (ch) != 0;
1606 case RECC_UPPER: return ISUPPER (ch) != 0;
1607 case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
1608 case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
1609 case RECC_NONASCII: return !IS_REAL_ASCII (ch);
1610 case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
1611 case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
1612 case RECC_WORD: return ISWORD (ch) != 0;
1613 case RECC_ERROR: return false;
1614 default:
1615 abort ();
1616 }
1617 }
1618
1619
1620
1621 static int
1622 re_wctype_to_bit (re_wctype_t cc)
1623 {
1624 switch (cc)
1625 {
1626 case RECC_NONASCII:
1627 case RECC_MULTIBYTE: return BIT_MULTIBYTE;
1628 case RECC_ALPHA: return BIT_ALPHA;
1629 case RECC_ALNUM: return BIT_ALNUM;
1630 case RECC_WORD: return BIT_WORD;
1631 case RECC_LOWER: return BIT_LOWER;
1632 case RECC_UPPER: return BIT_UPPER;
1633 case RECC_PUNCT: return BIT_PUNCT;
1634 case RECC_SPACE: return BIT_SPACE;
1635 case RECC_GRAPH: return BIT_GRAPH;
1636 case RECC_PRINT: return BIT_PRINT;
1637 case RECC_BLANK: return BIT_BLANK;
1638 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
1639 case RECC_UNIBYTE: case RECC_ERROR: return 0;
1640 default:
1641 abort ();
1642 }
1643 }
1644
1645
1646
1647
1648
1649 static void
1650 extend_range_table_work_area (struct range_table_work_area *work_area)
1651 {
1652 work_area->allocated += 16 * sizeof (int);
1653 work_area->table = xrealloc (work_area->table, work_area->allocated);
1654 }
1655
1656
1657
1658 static bool group_in_compile_stack (compile_stack_type, regnum_t);
1659
1660
1661
1662 #define FIXUP_ALT_JUMP() \
1663 do { \
1664 if (fixup_alt_jump) \
1665 STORE_JUMP (jump, fixup_alt_jump, b); \
1666 } while (false)
1667
1668
1669
1670 #define FREE_STACK_RETURN(value) \
1671 do { \
1672 FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
1673 xfree (compile_stack.stack); \
1674 return value; \
1675 } while (false)
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696 static reg_errcode_t
1697 regex_compile (re_char *pattern, ptrdiff_t size,
1698 bool posix_backtracking,
1699 const char *whitespace_regexp,
1700 struct re_pattern_buffer *bufp)
1701 {
1702
1703 int c, c1;
1704
1705
1706 unsigned char *b;
1707
1708
1709 compile_stack_type compile_stack;
1710
1711
1712 re_char *p = pattern;
1713 re_char *pend = pattern + size;
1714
1715
1716 Lisp_Object translate = bufp->translate;
1717
1718
1719
1720
1721
1722 unsigned char *pending_exact = 0;
1723
1724
1725
1726
1727
1728 unsigned char *laststart = 0;
1729
1730
1731 unsigned char *begalt;
1732
1733
1734
1735 re_char *beg_interval;
1736
1737
1738
1739
1740 unsigned char *fixup_alt_jump = 0;
1741
1742
1743 struct range_table_work_area range_table_work;
1744
1745
1746 bool multibyte = RE_MULTIBYTE_P (bufp);
1747
1748
1749 int in_subpattern = 0;
1750
1751
1752
1753 re_char *main_p;
1754 re_char *main_pattern;
1755 re_char *main_pend;
1756
1757 #ifdef REGEX_EMACS_DEBUG
1758 regex_emacs_debug++;
1759 DEBUG_PRINT ("\nCompiling pattern: ");
1760 if (regex_emacs_debug > 0)
1761 {
1762 for (ptrdiff_t debug_count = 0; debug_count < size; debug_count++)
1763 debug_putchar (pattern[debug_count]);
1764 putc ('\n', stderr);
1765 }
1766 #endif
1767
1768
1769 compile_stack.stack = xmalloc (INIT_COMPILE_STACK_SIZE
1770 * sizeof *compile_stack.stack);
1771 __lsan_ignore_object (compile_stack.stack);
1772 compile_stack.size = INIT_COMPILE_STACK_SIZE;
1773 compile_stack.avail = 0;
1774
1775 range_table_work.table = 0;
1776 range_table_work.allocated = 0;
1777
1778
1779 bufp->fastmap_accurate = false;
1780 bufp->used_syntax = false;
1781
1782
1783
1784
1785 bufp->used = 0;
1786
1787 bufp->re_nsub = 0;
1788
1789 if (bufp->allocated == 0)
1790 {
1791
1792
1793 bufp->buffer = xrealloc (bufp->buffer, INIT_BUF_SIZE);
1794 bufp->allocated = INIT_BUF_SIZE;
1795 }
1796
1797 begalt = b = bufp->buffer;
1798
1799
1800 while (1)
1801 {
1802 if (p == pend)
1803 {
1804
1805
1806 if (in_subpattern)
1807 {
1808 in_subpattern = 0;
1809 pattern = main_pattern;
1810 p = main_p;
1811 pend = main_pend;
1812 continue;
1813 }
1814
1815 break;
1816 }
1817
1818 PATFETCH (c);
1819
1820 switch (c)
1821 {
1822 case ' ':
1823 {
1824 re_char *p1 = p;
1825
1826
1827
1828 if (!whitespace_regexp || in_subpattern)
1829 goto normal_char;
1830
1831
1832 while (p1 != pend)
1833 {
1834 if (*p1 != ' ')
1835 break;
1836 p1++;
1837 }
1838
1839
1840 if (p1 != pend
1841 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
1842 || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
1843 goto normal_char;
1844
1845
1846 in_subpattern = 1;
1847 main_p = p1;
1848 main_pend = pend;
1849 main_pattern = pattern;
1850 p = pattern = (re_char *) whitespace_regexp;
1851 pend = p + strlen (whitespace_regexp);
1852 break;
1853 }
1854
1855 case '^':
1856 if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
1857 goto normal_char;
1858
1859
1860 laststart = 0;
1861 BUF_PUSH (begline);
1862 break;
1863
1864 case '$':
1865 if (! (p == pend || at_endline_loc_p (p, pend)))
1866 goto normal_char;
1867 laststart = b;
1868 BUF_PUSH (endline);
1869 break;
1870
1871
1872 case '+':
1873 case '?':
1874 case '*':
1875
1876 if (!laststart)
1877 goto normal_char;
1878
1879 {
1880
1881 bool zero_times_ok = false, many_times_ok = false;
1882 bool greedy = true;
1883
1884
1885
1886
1887
1888
1889 for (;;)
1890 {
1891 if (c == '?' && (zero_times_ok || many_times_ok))
1892 greedy = false;
1893 else
1894 {
1895 zero_times_ok |= c != '+';
1896 many_times_ok |= c != '?';
1897 }
1898
1899 if (! (p < pend && (*p == '*' || *p == '+' || *p == '?')))
1900 break;
1901
1902 c = *p++;
1903 }
1904
1905
1906
1907 if (laststart == b)
1908 break;
1909
1910
1911
1912 if (greedy)
1913 {
1914 if (many_times_ok)
1915 {
1916 bool simple = skip_one_char (laststart) == b;
1917 ptrdiff_t startoffset = 0;
1918 re_opcode_t ofj =
1919
1920 (simple || !analyze_first (laststart, b, NULL, false))
1921 ? on_failure_jump : on_failure_jump_loop;
1922 eassert (skip_one_char (laststart) <= b);
1923
1924 if (!zero_times_ok && simple)
1925 {
1926
1927
1928 unsigned char *p1, *p2;
1929 startoffset = b - laststart;
1930 GET_BUFFER_SPACE (startoffset);
1931 p1 = b; p2 = laststart;
1932 while (p2 < p1)
1933 *b++ = *p2++;
1934 zero_times_ok = 1;
1935 }
1936
1937 GET_BUFFER_SPACE (6);
1938 if (!zero_times_ok)
1939
1940 STORE_JUMP (ofj, b, b + 6);
1941 else
1942
1943
1944
1945
1946 INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
1947 laststart + startoffset, b + 6);
1948 b += 3;
1949 STORE_JUMP (jump, b, laststart + startoffset);
1950 b += 3;
1951 }
1952 else
1953 {
1954
1955 eassert (zero_times_ok);
1956 GET_BUFFER_SPACE (3);
1957 INSERT_JUMP (on_failure_jump, laststart, b + 3);
1958 b += 3;
1959 }
1960 }
1961 else
1962 {
1963
1964 GET_BUFFER_SPACE (7);
1965 if (many_times_ok)
1966 {
1967 bool emptyp = !!analyze_first (laststart, b, NULL, false);
1968
1969
1970
1971
1972 if (emptyp) BUF_PUSH (no_op);
1973 STORE_JUMP (emptyp ? on_failure_jump_nastyloop
1974 : on_failure_jump, b, laststart);
1975 b += 3;
1976 if (zero_times_ok)
1977 {
1978
1979
1980
1981 INSERT_JUMP (jump, laststart, b);
1982 b += 3;
1983 }
1984 }
1985 else
1986 {
1987
1988 INSERT_JUMP (jump, laststart, b + 3);
1989 b += 3;
1990 INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
1991 b += 3;
1992 }
1993 }
1994 }
1995 pending_exact = 0;
1996 break;
1997
1998
1999 case '.':
2000 laststart = b;
2001 BUF_PUSH (anychar);
2002 break;
2003
2004
2005 case '[':
2006 {
2007 re_char *p1;
2008
2009 CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2010
2011 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2012
2013
2014
2015 GET_BUFFER_SPACE (34);
2016
2017 laststart = b;
2018
2019
2020
2021 BUF_PUSH (*p == '^' ? charset_not : charset);
2022 if (*p == '^')
2023 p++;
2024
2025
2026 p1 = p;
2027
2028
2029 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2030
2031
2032 memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2033
2034
2035 for (;;)
2036 {
2037 const unsigned char *p2 = p;
2038 int ch;
2039
2040 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2041
2042
2043
2044 re_wctype_t cc = re_wctype_parse (&p, pend - p);
2045 if (cc != -1)
2046 {
2047 if (cc == 0)
2048 FREE_STACK_RETURN (REG_ECTYPE);
2049
2050 if (p == pend)
2051 FREE_STACK_RETURN (REG_EBRACK);
2052
2053
2054
2055
2056
2057
2058 for (c = 0; c < 0x80; ++c)
2059 if (re_iswctype (c, cc))
2060 {
2061 SET_LIST_BIT (c);
2062 c1 = TRANSLATE (c);
2063 if (c1 == c)
2064 continue;
2065 if (ASCII_CHAR_P (c1))
2066 SET_LIST_BIT (c1);
2067 else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
2068 SET_LIST_BIT (c1);
2069 }
2070 SET_RANGE_TABLE_WORK_AREA_BIT
2071 (range_table_work, re_wctype_to_bit (cc));
2072
2073
2074
2075
2076
2077 if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
2078 bufp->used_syntax = true;
2079
2080
2081 continue;
2082 }
2083
2084
2085
2086
2087
2088 PATFETCH (c);
2089
2090
2091
2092
2093 if (c == ']' && p2 != p1)
2094 break;
2095
2096 if (p < pend && p[0] == '-' && p[1] != ']')
2097 {
2098
2099
2100 PATFETCH (c1);
2101
2102
2103 PATFETCH (c1);
2104
2105 if (CHAR_BYTE8_P (c1)
2106 && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
2107
2108
2109 c = c1 + 1;
2110 }
2111 else
2112
2113 c1 = c;
2114
2115 if (c <= c1)
2116 {
2117 if (c < 128)
2118 {
2119 ch = min (127, c1);
2120 SETUP_ASCII_RANGE (range_table_work, c, ch);
2121 c = ch + 1;
2122 if (CHAR_BYTE8_P (c1))
2123 c = BYTE8_TO_CHAR (128);
2124 }
2125 if (c <= c1)
2126 {
2127 if (CHAR_BYTE8_P (c))
2128 {
2129 c = CHAR_TO_BYTE8 (c);
2130 c1 = CHAR_TO_BYTE8 (c1);
2131 for (; c <= c1; c++)
2132 SET_LIST_BIT (c);
2133 }
2134 else if (multibyte)
2135 SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
2136 else
2137 SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
2138 }
2139 }
2140 }
2141
2142
2143
2144 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2145 b[-1]--;
2146 b += b[-1];
2147
2148
2149 if (RANGE_TABLE_WORK_USED (range_table_work)
2150 || RANGE_TABLE_WORK_BITS (range_table_work))
2151 {
2152 int i;
2153 int used = RANGE_TABLE_WORK_USED (range_table_work);
2154
2155
2156
2157
2158 GET_BUFFER_SPACE (4 + used * 3);
2159
2160
2161 laststart[1] |= 0x80;
2162
2163
2164 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
2165 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
2166
2167 STORE_NUMBER_AND_INCR (b, used / 2);
2168 for (i = 0; i < used; i++)
2169 STORE_CHARACTER_AND_INCR
2170 (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
2171 }
2172 }
2173 break;
2174
2175
2176 case '\\':
2177 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2178
2179
2180
2181
2182 PATFETCH (c);
2183
2184 switch (c)
2185 {
2186 case '(':
2187 {
2188 bool shy = false;
2189 regnum_t regnum = 0;
2190 if (p+1 < pend)
2191 {
2192
2193 if (*p == '?')
2194 {
2195 PATFETCH (c);
2196 while (!shy)
2197 {
2198 PATFETCH (c);
2199 switch (c)
2200 {
2201 case ':': shy = true; break;
2202 case '0':
2203
2204
2205 if (regnum == 0)
2206 FREE_STACK_RETURN (REG_BADPAT);
2207 FALLTHROUGH;
2208 case '1': case '2': case '3': case '4':
2209 case '5': case '6': case '7': case '8': case '9':
2210 if (ckd_mul (®num, regnum, 10)
2211 || ckd_add (®num, regnum, c - '0'))
2212 FREE_STACK_RETURN (REG_ESIZE);
2213 break;
2214 default:
2215
2216 FREE_STACK_RETURN (REG_BADPAT);
2217 }
2218 }
2219 }
2220 }
2221
2222 if (!shy)
2223 regnum = ++bufp->re_nsub;
2224 else if (regnum)
2225 {
2226 shy = false;
2227 if (regnum > bufp->re_nsub)
2228 bufp->re_nsub = regnum;
2229 else if (regnum > bufp->re_nsub
2230
2231
2232
2233
2234
2235 || group_in_compile_stack (compile_stack, regnum))
2236 FREE_STACK_RETURN (REG_BADPAT);
2237 }
2238 else
2239
2240 regnum = - bufp->re_nsub;
2241
2242 if (COMPILE_STACK_FULL)
2243 compile_stack.stack
2244 = xpalloc (compile_stack.stack, &compile_stack.size,
2245 1, -1, sizeof *compile_stack.stack);
2246
2247
2248
2249
2250
2251 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2252 COMPILE_STACK_TOP.fixup_alt_jump
2253 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2254 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2255 COMPILE_STACK_TOP.regnum = regnum;
2256
2257
2258
2259 if (regnum <= MAX_REGNUM && regnum > 0)
2260 BUF_PUSH_2 (start_memory, regnum);
2261
2262 compile_stack.avail++;
2263
2264 fixup_alt_jump = 0;
2265 laststart = 0;
2266 begalt = b;
2267
2268
2269
2270 pending_exact = 0;
2271 break;
2272 }
2273
2274 case ')':
2275 if (COMPILE_STACK_EMPTY)
2276 FREE_STACK_RETURN (REG_ERPAREN);
2277
2278 FIXUP_ALT_JUMP ();
2279
2280
2281 if (COMPILE_STACK_EMPTY)
2282 FREE_STACK_RETURN (REG_ERPAREN);
2283
2284
2285
2286 eassert (compile_stack.avail != 0);
2287 {
2288
2289
2290
2291 regnum_t regnum;
2292
2293 compile_stack.avail--;
2294 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2295 fixup_alt_jump
2296 = COMPILE_STACK_TOP.fixup_alt_jump
2297 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2298 : 0;
2299 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2300 regnum = COMPILE_STACK_TOP.regnum;
2301
2302
2303
2304 pending_exact = 0;
2305
2306
2307
2308 if (regnum <= MAX_REGNUM && regnum > 0)
2309 BUF_PUSH_2 (stop_memory, regnum);
2310 }
2311 break;
2312
2313
2314 case '|':
2315
2316
2317 GET_BUFFER_SPACE (3);
2318 INSERT_JUMP (on_failure_jump, begalt, b + 6);
2319 pending_exact = 0;
2320 b += 3;
2321
2322
2323
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338 FIXUP_ALT_JUMP ();
2339
2340
2341
2342
2343 fixup_alt_jump = b;
2344 GET_BUFFER_SPACE (3);
2345 b += 3;
2346
2347 laststart = 0;
2348 begalt = b;
2349 break;
2350
2351
2352 case '{':
2353 {
2354
2355 int lower_bound = 0, upper_bound = -1;
2356
2357 beg_interval = p;
2358
2359 GET_INTERVAL_COUNT (lower_bound);
2360
2361 if (c == ',')
2362 GET_INTERVAL_COUNT (upper_bound);
2363 else
2364
2365 upper_bound = lower_bound;
2366
2367 if (lower_bound < 0
2368 || (0 <= upper_bound && upper_bound < lower_bound)
2369 || c != '\\')
2370 FREE_STACK_RETURN (REG_BADBR);
2371 if (p == pend)
2372 FREE_STACK_RETURN (REG_EESCAPE);
2373 if (*p++ != '}')
2374 FREE_STACK_RETURN (REG_BADBR);
2375
2376
2377
2378
2379 if (!laststart)
2380 goto unfetch_interval;
2381
2382 if (upper_bound == 0)
2383
2384
2385 b = laststart;
2386 else if (lower_bound == 1 && upper_bound == 1)
2387
2388 ;
2389
2390
2391
2392
2393
2394
2395
2396
2397
2398
2399 else
2400 {
2401
2402 int nbytes = upper_bound < 0 ? 3 : upper_bound > 1 ? 5 : 0;
2403 int startoffset = 0;
2404
2405 GET_BUFFER_SPACE (20);
2406
2407 if (lower_bound == 0)
2408 {
2409
2410
2411 INSERT_JUMP (on_failure_jump_loop, laststart,
2412 b + 3 + nbytes);
2413 b += 3;
2414 }
2415 else
2416 {
2417
2418
2419
2420
2421
2422 INSERT_JUMP2 (succeed_n, laststart,
2423 b + 5 + nbytes,
2424 lower_bound);
2425 b += 5;
2426
2427
2428
2429
2430
2431 insert_op2 (set_number_at, laststart, 5,
2432 lower_bound, b);
2433 b += 5;
2434 startoffset += 5;
2435 }
2436
2437 if (upper_bound < 0)
2438 {
2439
2440
2441 STORE_JUMP (jump, b, laststart + startoffset);
2442 b += 3;
2443 }
2444 else if (upper_bound > 1)
2445 {
2446
2447
2448
2449
2450
2451
2452 STORE_JUMP2 (jump_n, b, laststart + startoffset,
2453 upper_bound - 1);
2454 b += 5;
2455
2456
2457
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470 insert_op2 (set_number_at, laststart, b - laststart,
2471 upper_bound - 1, b);
2472 b += 5;
2473 }
2474 }
2475 pending_exact = 0;
2476 beg_interval = NULL;
2477 }
2478 break;
2479
2480 unfetch_interval:
2481
2482 eassert (beg_interval);
2483 p = beg_interval;
2484 beg_interval = NULL;
2485 eassert (p > pattern && p[-1] == '\\');
2486 c = '{';
2487 goto normal_char;
2488
2489 case '=':
2490 laststart = b;
2491 BUF_PUSH (at_dot);
2492 break;
2493
2494 case 's':
2495 laststart = b;
2496 PATFETCH (c);
2497 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2498 break;
2499
2500 case 'S':
2501 laststart = b;
2502 PATFETCH (c);
2503 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2504 break;
2505
2506 case 'c':
2507 laststart = b;
2508 PATFETCH (c);
2509 BUF_PUSH_2 (categoryspec, c);
2510 break;
2511
2512 case 'C':
2513 laststart = b;
2514 PATFETCH (c);
2515 BUF_PUSH_2 (notcategoryspec, c);
2516 break;
2517
2518 case 'w':
2519 laststart = b;
2520 BUF_PUSH_2 (syntaxspec, Sword);
2521 break;
2522
2523
2524 case 'W':
2525 laststart = b;
2526 BUF_PUSH_2 (notsyntaxspec, Sword);
2527 break;
2528
2529
2530 case '<':
2531 laststart = b;
2532 BUF_PUSH (wordbeg);
2533 break;
2534
2535 case '>':
2536 laststart = b;
2537 BUF_PUSH (wordend);
2538 break;
2539
2540 case '_':
2541 laststart = b;
2542 PATFETCH (c);
2543 if (c == '<')
2544 BUF_PUSH (symbeg);
2545 else if (c == '>')
2546 BUF_PUSH (symend);
2547 else
2548 FREE_STACK_RETURN (REG_BADPAT);
2549 break;
2550
2551 case 'b':
2552 laststart = b;
2553 BUF_PUSH (wordbound);
2554 break;
2555
2556 case 'B':
2557 laststart = b;
2558 BUF_PUSH (notwordbound);
2559 break;
2560
2561 case '`':
2562
2563
2564 laststart = 0;
2565 BUF_PUSH (begbuf);
2566 break;
2567
2568 case '\'':
2569 laststart = b;
2570 BUF_PUSH (endbuf);
2571 break;
2572
2573 case '1': case '2': case '3': case '4': case '5':
2574 case '6': case '7': case '8': case '9':
2575 {
2576 regnum_t reg = c - '0';
2577
2578 if (reg > bufp->re_nsub || reg < 1
2579
2580 || group_in_compile_stack (compile_stack, reg))
2581 FREE_STACK_RETURN (REG_ESUBREG);
2582
2583 laststart = b;
2584 BUF_PUSH_2 (duplicate, reg);
2585 }
2586 break;
2587
2588 default:
2589
2590
2591
2592 goto normal_char;
2593 }
2594 break;
2595
2596
2597 default:
2598
2599 normal_char:
2600
2601 if (!pending_exact
2602
2603
2604 || pending_exact + *pending_exact + 1 != b
2605
2606
2607 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
2608
2609
2610 || (p != pend
2611 && (*p == '*' || *p == '+' || *p == '?'))
2612 || (p + 1 < pend && p[0] == '\\' && p[1] == '{'))
2613 {
2614
2615
2616 laststart = b;
2617
2618 BUF_PUSH_2 (exactn, 0);
2619 pending_exact = b - 1;
2620 }
2621
2622 GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
2623 {
2624 int len;
2625
2626 if (multibyte)
2627 {
2628 c = TRANSLATE (c);
2629 len = CHAR_STRING (c, b);
2630 b += len;
2631 }
2632 else
2633 {
2634 c1 = RE_CHAR_TO_MULTIBYTE (c);
2635 if (! CHAR_BYTE8_P (c1))
2636 {
2637 int c2 = TRANSLATE (c1);
2638
2639 if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
2640 c = c1;
2641 }
2642 *b++ = c;
2643 len = 1;
2644 }
2645 (*pending_exact) += len;
2646 }
2647
2648 break;
2649 }
2650 }
2651
2652
2653
2654
2655 FIXUP_ALT_JUMP ();
2656
2657 if (!COMPILE_STACK_EMPTY)
2658 FREE_STACK_RETURN (REG_EPAREN);
2659
2660
2661
2662 if (!posix_backtracking)
2663 BUF_PUSH (succeed);
2664
2665
2666 bufp->used = b - bufp->buffer;
2667
2668 #ifdef REGEX_EMACS_DEBUG
2669 if (regex_emacs_debug > 0)
2670 {
2671 re_compile_fastmap (bufp);
2672 DEBUG_PRINT ("\nCompiled pattern:\n");
2673 print_compiled_pattern (bufp);
2674 }
2675 regex_emacs_debug--;
2676 #endif
2677
2678 FREE_STACK_RETURN (REG_NOERROR);
2679
2680 }
2681
2682
2683
2684
2685
2686 static void
2687 store_op1 (re_opcode_t op, unsigned char *loc, int arg)
2688 {
2689 *loc = (unsigned char) op;
2690 STORE_NUMBER (loc + 1, arg);
2691 }
2692
2693
2694
2695
2696 static void
2697 store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
2698 {
2699 *loc = (unsigned char) op;
2700 STORE_NUMBER (loc + 1, arg1);
2701 STORE_NUMBER (loc + 3, arg2);
2702 }
2703
2704
2705
2706
2707
2708 static void
2709 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
2710 {
2711 register unsigned char *pfrom = end;
2712 register unsigned char *pto = end + 3;
2713
2714 while (pfrom != loc)
2715 *--pto = *--pfrom;
2716
2717 store_op1 (op, loc, arg);
2718 }
2719
2720
2721
2722
2723 static void
2724 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
2725 unsigned char *end)
2726 {
2727 register unsigned char *pfrom = end;
2728 register unsigned char *pto = end + 5;
2729
2730 while (pfrom != loc)
2731 *--pto = *--pfrom;
2732
2733 store_op2 (op, loc, arg1, arg2);
2734 }
2735
2736
2737
2738
2739
2740
2741 static bool
2742 at_begline_loc_p (re_char *pattern, re_char *p)
2743 {
2744 re_char *prev = p - 2;
2745
2746 switch (*prev)
2747 {
2748 case '(':
2749 case '|':
2750 break;
2751
2752 case ':':
2753
2754 while (prev > pattern && '0' <= prev[-1] && prev[-1] <= '9')
2755 --prev;
2756
2757 if (! (prev > pattern + 1 && prev[-1] == '?' && prev[-2] == '('))
2758 return false;
2759 prev -= 2;
2760 break;
2761
2762 default:
2763 return false;
2764 }
2765
2766
2767 p = prev;
2768 while (prev > pattern && prev[-1] == '\\')
2769 --prev;
2770 return (p - prev) & 1;
2771 }
2772
2773
2774
2775
2776
2777 static bool
2778 at_endline_loc_p (re_char *p, re_char *pend)
2779 {
2780
2781 return *p == '\\' && p + 1 < pend && (p[1] == ')' || p[1] == '|');
2782 }
2783
2784
2785
2786
2787
2788 static bool
2789 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
2790 {
2791 ptrdiff_t this_element;
2792
2793 for (this_element = compile_stack.avail - 1;
2794 this_element >= 0;
2795 this_element--)
2796 if (compile_stack.stack[this_element].regnum == regnum)
2797 return true;
2798
2799 return false;
2800 }
2801
2802
2803
2804
2805
2806
2807
2808
2809
2810
2811
2812 static int
2813 analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
2814 {
2815 int j, k;
2816 int nbits;
2817 bool not;
2818
2819
2820
2821 bool match_any_multibyte_characters = false;
2822
2823 eassert (p);
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837
2838
2839
2840 while (p < pend)
2841 {
2842
2843
2844
2845
2846
2847
2848
2849
2850 re_char *p1 = p;
2851
2852 switch (*p++)
2853 {
2854 case succeed:
2855 return 1;
2856
2857 case duplicate:
2858
2859
2860
2861
2862 p++;
2863 continue;
2864
2865
2866
2867
2868
2869 case exactn:
2870 if (fastmap)
2871 {
2872
2873
2874
2875
2876 fastmap[p[1]] = 1;
2877 if (multibyte)
2878 {
2879
2880
2881 if (CHAR_BYTE8_HEAD_P (p[1]))
2882 fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
2883 }
2884 else
2885 {
2886
2887
2888
2889 int c = RE_CHAR_TO_MULTIBYTE (p[1]);
2890
2891 fastmap[CHAR_LEADING_CODE (c)] = 1;
2892 }
2893 }
2894 break;
2895
2896
2897 case anychar:
2898
2899
2900 if (!fastmap) break;
2901 return -1;
2902
2903
2904 case charset_not:
2905 if (!fastmap) break;
2906 {
2907
2908 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
2909 j < (1 << BYTEWIDTH); j++)
2910 fastmap[j] = 1;
2911 }
2912 FALLTHROUGH;
2913 case charset:
2914 if (!fastmap) break;
2915 not = (re_opcode_t) *(p - 1) == charset_not;
2916 nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
2917 p++;
2918 for (j = 0; j < nbits; j++)
2919 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
2920 fastmap[j] = 1;
2921
2922
2923
2924 for (j = 0x80; j < nbits; j++)
2925 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
2926 fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
2927
2928 if (
2929
2930 not
2931 ||
2932
2933
2934 (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
2935 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
2936
2937 {
2938 if (match_any_multibyte_characters == false)
2939 {
2940 for (j = MIN_MULTIBYTE_LEADING_CODE;
2941 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
2942 fastmap[j] = 1;
2943 match_any_multibyte_characters = true;
2944 }
2945 }
2946
2947 else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
2948 && match_any_multibyte_characters == false)
2949 {
2950
2951
2952 int c, count;
2953 unsigned char lc1, lc2;
2954
2955
2956
2957 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
2958
2959
2960 EXTRACT_NUMBER_AND_INCR (count, p);
2961 for (; count > 0; count--, p += 3)
2962 {
2963
2964 EXTRACT_CHARACTER (c, p);
2965 lc1 = CHAR_LEADING_CODE (c);
2966 p += 3;
2967 EXTRACT_CHARACTER (c, p);
2968 lc2 = CHAR_LEADING_CODE (c);
2969 for (j = lc1; j <= lc2; j++)
2970 fastmap[j] = 1;
2971 }
2972 }
2973 break;
2974
2975 case syntaxspec:
2976 case notsyntaxspec:
2977 if (!fastmap) break;
2978
2979
2980 return -1;
2981
2982 case categoryspec:
2983 case notcategoryspec:
2984 if (!fastmap) break;
2985 not = (re_opcode_t)p[-1] == notcategoryspec;
2986 k = *p++;
2987 for (j = (1 << BYTEWIDTH); j >= 0; j--)
2988 if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
2989 fastmap[j] = 1;
2990
2991
2992
2993 if (match_any_multibyte_characters == false)
2994 {
2995 for (j = MIN_MULTIBYTE_LEADING_CODE;
2996 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
2997 fastmap[j] = 1;
2998 match_any_multibyte_characters = true;
2999 }
3000 break;
3001
3002
3003
3004
3005 case at_dot:
3006 case no_op:
3007 case begline:
3008 case endline:
3009 case begbuf:
3010 case endbuf:
3011 case wordbound:
3012 case notwordbound:
3013 case wordbeg:
3014 case wordend:
3015 case symbeg:
3016 case symend:
3017 continue;
3018
3019
3020 case jump:
3021 EXTRACT_NUMBER_AND_INCR (j, p);
3022 if (j < 0)
3023
3024
3025 break;
3026 p += j;
3027 switch (*p)
3028 {
3029 case on_failure_jump:
3030 case on_failure_keep_string_jump:
3031 case on_failure_jump_loop:
3032 case on_failure_jump_nastyloop:
3033 case on_failure_jump_smart:
3034 p++;
3035 break;
3036 default:
3037 continue;
3038 };
3039
3040
3041 FALLTHROUGH;
3042 case on_failure_jump:
3043 case on_failure_keep_string_jump:
3044 case on_failure_jump_nastyloop:
3045 case on_failure_jump_loop:
3046 case on_failure_jump_smart:
3047 EXTRACT_NUMBER_AND_INCR (j, p);
3048 if (p + j <= p1)
3049 ;
3050 else
3051 {
3052
3053
3054 int r = analyze_first (p, pend, fastmap, multibyte);
3055 if (r) return r;
3056 p += j;
3057 }
3058 continue;
3059
3060
3061 case jump_n:
3062
3063 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
3064 p += 4;
3065
3066
3067
3068 continue;
3069
3070 case succeed_n:
3071
3072 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0));
3073 p += 4;
3074
3075
3076
3077 continue;
3078
3079
3080 case set_number_at:
3081 p += 4;
3082 continue;
3083
3084
3085 case start_memory:
3086 case stop_memory:
3087 p += 1;
3088 continue;
3089
3090
3091 default:
3092 abort ();
3093 }
3094
3095
3096
3097
3098 return 0;
3099 }
3100
3101
3102 return 1;
3103
3104 }
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118
3119
3120
3121 static void
3122 re_compile_fastmap (struct re_pattern_buffer *bufp)
3123 {
3124 char *fastmap = bufp->fastmap;
3125 int analysis;
3126
3127 eassert (fastmap && bufp->buffer);
3128
3129 memset (fastmap, 0, 1 << BYTEWIDTH);
3130
3131
3132 bufp->fastmap_accurate = 1;
3133
3134 analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used,
3135 fastmap, RE_MULTIBYTE_P (bufp));
3136 bufp->can_be_null = (analysis != 0);
3137 }
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149
3150
3151
3152 void
3153 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
3154 ptrdiff_t num_regs, ptrdiff_t *starts, ptrdiff_t *ends)
3155 {
3156 if (num_regs)
3157 {
3158 bufp->regs_allocated = REGS_REALLOCATE;
3159 regs->num_regs = num_regs;
3160 regs->start = starts;
3161 regs->end = ends;
3162 }
3163 else
3164 {
3165 bufp->regs_allocated = REGS_UNALLOCATED;
3166 regs->num_regs = 0;
3167 regs->start = regs->end = 0;
3168 }
3169 }
3170
3171
3172
3173
3174
3175
3176 ptrdiff_t
3177 re_search (struct re_pattern_buffer *bufp, const char *string, ptrdiff_t size,
3178 ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs)
3179 {
3180 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3181 regs, size);
3182 }
3183
3184
3185 #define POS_ADDR_VSTRING(POS) \
3186 (((POS) >= size1 ? string2 - size1 : string1) + (POS))
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206
3207
3208
3209 ptrdiff_t
3210 re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1,
3211 const char *str2, ptrdiff_t size2,
3212 ptrdiff_t startpos, ptrdiff_t range,
3213 struct re_registers *regs, ptrdiff_t stop)
3214 {
3215 ptrdiff_t val;
3216 re_char *string1 = (re_char *) str1;
3217 re_char *string2 = (re_char *) str2;
3218 char *fastmap = bufp->fastmap;
3219 Lisp_Object translate = bufp->translate;
3220 ptrdiff_t total_size = size1 + size2;
3221 ptrdiff_t endpos = startpos + range;
3222 bool anchored_start;
3223
3224 bool multibyte = RE_TARGET_MULTIBYTE_P (bufp);
3225
3226
3227 if (startpos < 0 || startpos > total_size)
3228 return -1;
3229
3230
3231
3232
3233 if (endpos < 0)
3234 range = 0 - startpos;
3235 else if (endpos > total_size)
3236 range = total_size - startpos;
3237
3238
3239
3240 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3241 {
3242 if (startpos > 0)
3243 return -1;
3244 else
3245 range = 0;
3246 }
3247
3248
3249
3250 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3251 {
3252 range = PT_BYTE - BEGV_BYTE - startpos;
3253 if (range < 0)
3254 return -1;
3255 }
3256
3257
3258 if (fastmap && !bufp->fastmap_accurate)
3259 re_compile_fastmap (bufp);
3260
3261
3262 anchored_start = (bufp->buffer[0] == begline);
3263
3264 RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, startpos);
3265
3266
3267 for (;;)
3268 {
3269
3270
3271
3272
3273 if (anchored_start && startpos > 0)
3274 {
3275 if (! ((startpos <= size1 ? string1[startpos - 1]
3276 : string2[startpos - size1 - 1])
3277 == '\n'))
3278 goto advance;
3279 }
3280
3281
3282
3283
3284
3285 if (fastmap && startpos < total_size && !bufp->can_be_null)
3286 {
3287 re_char *d;
3288 int buf_ch;
3289
3290 d = POS_ADDR_VSTRING (startpos);
3291
3292 if (range > 0)
3293 {
3294 ptrdiff_t irange = range, lim = 0;
3295
3296 if (startpos < size1 && startpos + range >= size1)
3297 lim = range - (size1 - startpos);
3298
3299
3300
3301 if (!NILP (translate))
3302 {
3303 if (multibyte)
3304 while (range > lim)
3305 {
3306 int buf_charlen;
3307
3308 buf_ch = string_char_and_length (d, &buf_charlen);
3309 buf_ch = RE_TRANSLATE (translate, buf_ch);
3310 if (fastmap[CHAR_LEADING_CODE (buf_ch)])
3311 break;
3312
3313 range -= buf_charlen;
3314 d += buf_charlen;
3315 }
3316 else
3317 while (range > lim)
3318 {
3319 buf_ch = *d;
3320 int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
3321 int translated = RE_TRANSLATE (translate, ch);
3322 if (translated != ch
3323 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
3324 buf_ch = ch;
3325 if (fastmap[buf_ch])
3326 break;
3327 d++;
3328 range--;
3329 }
3330 }
3331 else
3332 {
3333 if (multibyte)
3334 while (range > lim)
3335 {
3336 int buf_charlen;
3337
3338 buf_ch = string_char_and_length (d, &buf_charlen);
3339 if (fastmap[CHAR_LEADING_CODE (buf_ch)])
3340 break;
3341 range -= buf_charlen;
3342 d += buf_charlen;
3343 }
3344 else
3345 while (range > lim && !fastmap[*d])
3346 {
3347 d++;
3348 range--;
3349 }
3350 }
3351 startpos += irange - range;
3352 }
3353 else
3354 {
3355 if (multibyte)
3356 {
3357 buf_ch = STRING_CHAR (d);
3358 buf_ch = TRANSLATE (buf_ch);
3359 if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
3360 goto advance;
3361 }
3362 else
3363 {
3364 buf_ch = *d;
3365 int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
3366 int translated = TRANSLATE (ch);
3367 if (translated != ch
3368 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
3369 buf_ch = ch;
3370 if (! fastmap[TRANSLATE (buf_ch)])
3371 goto advance;
3372 }
3373 }
3374 }
3375
3376
3377 if (range >= 0 && startpos == total_size && fastmap
3378 && !bufp->can_be_null)
3379 return -1;
3380
3381 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3382 startpos, regs, stop);
3383
3384 if (val >= 0)
3385 return startpos;
3386
3387 if (val == -2)
3388 return -2;
3389
3390 advance:
3391 if (!range)
3392 break;
3393 else if (range > 0)
3394 {
3395
3396 if (multibyte)
3397 {
3398 re_char *p = POS_ADDR_VSTRING (startpos);
3399 int len = BYTES_BY_CHAR_HEAD (*p);
3400
3401 range -= len;
3402 if (range < 0)
3403 break;
3404 startpos += len;
3405 }
3406 else
3407 {
3408 range--;
3409 startpos++;
3410 }
3411 }
3412 else
3413 {
3414 range++;
3415 startpos--;
3416
3417
3418 if (multibyte)
3419 {
3420 re_char *p = POS_ADDR_VSTRING (startpos) + 1;
3421 int len = raw_prev_char_len (p);
3422
3423 range += len - 1;
3424 if (range > 0)
3425 break;
3426 startpos -= len - 1;
3427 }
3428 }
3429 }
3430 return -1;
3431 }
3432
3433
3434
3435 static bool bcmp_translate (re_char *, re_char *, ptrdiff_t,
3436 Lisp_Object, bool);
3437
3438
3439
3440 #define POINTER_TO_OFFSET(ptr) \
3441 (FIRST_STRING_P (ptr) \
3442 ? (ptr) - string1 \
3443 : (ptr) - string2 + (ptrdiff_t) size1)
3444
3445
3446
3447
3448
3449
3450 #define PREFETCH(reset) \
3451 while (d == dend) \
3452 { \
3453 \
3454 if (dend == end_match_2) \
3455 { \
3456 reset; \
3457 goto fail; \
3458 } \
3459 \
3460 d = string2; \
3461 dend = end_match_2; \
3462 }
3463
3464
3465
3466
3467
3468 #define PREFETCH_NOLIMIT() \
3469 if (d == end1) \
3470 { \
3471 d = string2; \
3472 dend = end_match_2; \
3473 }
3474
3475
3476
3477 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3478 #define AT_STRINGS_END(d) ((d) == end2)
3479
3480
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490 #if 0
3491
3492
3493
3494
3495 #define WORDCHAR_P(d) \
3496 (SYNTAX ((d) == end1 ? *string2 \
3497 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3498 == Sword)
3499
3500
3501
3502 #define AT_WORD_BOUNDARY(d) \
3503 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3504 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3505 #endif
3506
3507
3508
3509
3510
3511
3512 static re_char *
3513 skip_one_char (re_char *p)
3514 {
3515 switch (*p++)
3516 {
3517 case anychar:
3518 break;
3519
3520 case exactn:
3521 p += *p + 1;
3522 break;
3523
3524 case charset_not:
3525 case charset:
3526 if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
3527 {
3528 int mcnt;
3529 p = CHARSET_RANGE_TABLE (p - 1);
3530 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3531 p = CHARSET_RANGE_TABLE_END (p, mcnt);
3532 }
3533 else
3534 p += 1 + CHARSET_BITMAP_SIZE (p - 1);
3535 break;
3536
3537 case syntaxspec:
3538 case notsyntaxspec:
3539 case categoryspec:
3540 case notcategoryspec:
3541 p++;
3542 break;
3543
3544 default:
3545 p = NULL;
3546 }
3547 return p;
3548 }
3549
3550
3551
3552 static re_char *
3553 skip_noops (re_char *p, re_char *pend)
3554 {
3555 int mcnt;
3556 while (p < pend)
3557 {
3558 switch (*p)
3559 {
3560 case start_memory:
3561 case stop_memory:
3562 p += 2; break;
3563 case no_op:
3564 p += 1; break;
3565 case jump:
3566 p += 1;
3567 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3568 p += mcnt;
3569 break;
3570 default:
3571 return p;
3572 }
3573 }
3574 eassert (p == pend);
3575 return p;
3576 }
3577
3578
3579
3580
3581
3582
3583
3584 static bool
3585 execute_charset (re_char **pp, int c, int corig, bool unibyte,
3586 Lisp_Object canon_table)
3587 {
3588 eassume (0 <= c && 0 <= corig);
3589 re_char *p = *pp, *rtp = NULL;
3590 bool not = (re_opcode_t) *p == charset_not;
3591
3592 if (CHARSET_RANGE_TABLE_EXISTS_P (p))
3593 {
3594 int count;
3595 rtp = CHARSET_RANGE_TABLE (p);
3596 EXTRACT_NUMBER_AND_INCR (count, rtp);
3597 *pp = CHARSET_RANGE_TABLE_END ((rtp), (count));
3598 }
3599 else
3600 *pp += 2 + CHARSET_BITMAP_SIZE (p);
3601
3602 if (unibyte && c < (1 << BYTEWIDTH))
3603 {
3604
3605
3606 if (c < (unsigned) (CHARSET_BITMAP_SIZE (p) * BYTEWIDTH)
3607 && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3608 return !not;
3609 }
3610 else if (rtp)
3611 {
3612 int class_bits = CHARSET_RANGE_TABLE_BITS (p);
3613 int range_start, range_end;
3614
3615
3616
3617
3618
3619 if ((class_bits & BIT_MULTIBYTE) ||
3620 (class_bits & BIT_ALNUM && ISALNUM (c)) ||
3621 (class_bits & BIT_ALPHA && ISALPHA (c)) ||
3622 (class_bits & BIT_SPACE && ISSPACE (c)) ||
3623 (class_bits & BIT_BLANK && ISBLANK (c)) ||
3624 (class_bits & BIT_WORD && ISWORD (c)) ||
3625 ((class_bits & BIT_UPPER) &&
3626 (ISUPPER (corig) || (!NILP (canon_table) && ISLOWER (corig)))) ||
3627 ((class_bits & BIT_LOWER) &&
3628 (ISLOWER (corig) || (!NILP (canon_table) && ISUPPER (corig)))) ||
3629 (class_bits & BIT_PUNCT && ISPUNCT (c)) ||
3630 (class_bits & BIT_GRAPH && ISGRAPH (c)) ||
3631 (class_bits & BIT_PRINT && ISPRINT (c)))
3632 return !not;
3633
3634 for (p = *pp; rtp < p; rtp += 2 * 3)
3635 {
3636 EXTRACT_CHARACTER (range_start, rtp);
3637 EXTRACT_CHARACTER (range_end, rtp + 3);
3638 if (range_start <= c && c <= range_end)
3639 return !not;
3640 }
3641 }
3642
3643 return not;
3644 }
3645
3646
3647 static bool
3648 mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3649 re_char *p2)
3650 {
3651 re_opcode_t op2;
3652 bool multibyte = RE_MULTIBYTE_P (bufp);
3653 unsigned char *pend = bufp->buffer + bufp->used;
3654 re_char *p2_orig = p2;
3655
3656 eassert (p1 >= bufp->buffer && p1 < pend
3657 && p2 >= bufp->buffer && p2 <= pend);
3658
3659
3660
3661
3662
3663 p2 = skip_noops (p2, pend);
3664
3665
3666
3667
3668 eassert (p1 >= bufp->buffer && p1 < pend
3669 && p2 >= bufp->buffer && p2 <= pend);
3670
3671 op2 = p2 == pend ? succeed : *p2;
3672
3673 switch (op2)
3674 {
3675 case succeed:
3676 case endbuf:
3677
3678 if (skip_one_char (p1))
3679 {
3680 DEBUG_PRINT (" End of pattern: fast loop.\n");
3681 return true;
3682 }
3683 break;
3684
3685 case endline:
3686 case exactn:
3687 {
3688 int c
3689 = (re_opcode_t) *p2 == endline ? '\n'
3690 : RE_STRING_CHAR (p2 + 2, multibyte);
3691
3692 if ((re_opcode_t) *p1 == exactn)
3693 {
3694 if (c != RE_STRING_CHAR (p1 + 2, multibyte))
3695 {
3696 DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]);
3697 return true;
3698 }
3699 }
3700
3701 else if ((re_opcode_t) *p1 == charset
3702 || (re_opcode_t) *p1 == charset_not)
3703 {
3704 if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
3705 Qnil))
3706 {
3707 DEBUG_PRINT (" No match => fast loop.\n");
3708 return true;
3709 }
3710 }
3711 else if ((re_opcode_t) *p1 == anychar
3712 && c == '\n')
3713 {
3714 DEBUG_PRINT (" . != \\n => fast loop.\n");
3715 return true;
3716 }
3717 }
3718 break;
3719
3720 case charset:
3721 {
3722 if ((re_opcode_t) *p1 == exactn)
3723
3724 return mutually_exclusive_p (bufp, p2, p1);
3725
3726
3727
3728
3729 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
3730 {
3731
3732
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742 if ((re_opcode_t) *p1 == charset)
3743 {
3744 int idx;
3745
3746
3747 for (idx = 0;
3748 (idx < (int) p2[1]
3749 && idx < CHARSET_BITMAP_SIZE (p1));
3750 idx++)
3751 if ((p2[2 + idx] & p1[2 + idx]) != 0)
3752 break;
3753
3754 if (idx == p2[1]
3755 || idx == CHARSET_BITMAP_SIZE (p1))
3756 {
3757 DEBUG_PRINT (" No match => fast loop.\n");
3758 return true;
3759 }
3760 }
3761 else if ((re_opcode_t) *p1 == charset_not)
3762 {
3763 int idx;
3764
3765
3766 for (idx = 0; idx < (int) p2[1]; idx++)
3767 if (! (p2[2 + idx] == 0
3768 || (idx < CHARSET_BITMAP_SIZE (p1)
3769 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
3770 break;
3771
3772 if (idx == p2[1])
3773 {
3774 DEBUG_PRINT (" No match => fast loop.\n");
3775 return true;
3776 }
3777 }
3778 }
3779 }
3780 break;
3781
3782 case charset_not:
3783 switch (*p1)
3784 {
3785 case exactn:
3786 case charset:
3787
3788 return mutually_exclusive_p (bufp, p2, p1);
3789 case charset_not:
3790
3791
3792
3793
3794 break;
3795 }
3796 break;
3797
3798 case wordend:
3799 return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
3800 case symend:
3801 return ((re_opcode_t) *p1 == syntaxspec
3802 && (p1[1] == Ssymbol || p1[1] == Sword));
3803 case notsyntaxspec:
3804 return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
3805
3806 case wordbeg:
3807 return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
3808 case symbeg:
3809 return ((re_opcode_t) *p1 == notsyntaxspec
3810 && (p1[1] == Ssymbol || p1[1] == Sword));
3811 case syntaxspec:
3812 return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
3813
3814 case wordbound:
3815 return (((re_opcode_t) *p1 == notsyntaxspec
3816 || (re_opcode_t) *p1 == syntaxspec)
3817 && p1[1] == Sword);
3818
3819 case categoryspec:
3820 return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
3821 case notcategoryspec:
3822 return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
3823
3824 case on_failure_jump_nastyloop:
3825 case on_failure_jump_smart:
3826 case on_failure_jump_loop:
3827 case on_failure_keep_string_jump:
3828 case on_failure_jump:
3829 {
3830 int mcnt;
3831 p2++;
3832 EXTRACT_NUMBER_AND_INCR (mcnt, p2);
3833
3834
3835 if (p2 + mcnt > p2_orig)
3836 return (mutually_exclusive_p (bufp, p1, p2)
3837 && mutually_exclusive_p (bufp, p1, p2 + mcnt));
3838 break;
3839 }
3840
3841 default:
3842 ;
3843 }
3844
3845
3846 return false;
3847 }
3848
3849
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864 ptrdiff_t
3865 re_match_2 (struct re_pattern_buffer *bufp,
3866 char const *string1, ptrdiff_t size1,
3867 char const *string2, ptrdiff_t size2,
3868 ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
3869 {
3870 ptrdiff_t result;
3871
3872 RE_SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, pos);
3873
3874 result = re_match_2_internal (bufp, (re_char *) string1, size1,
3875 (re_char *) string2, size2,
3876 pos, regs, stop);
3877 return result;
3878 }
3879
3880 static void
3881 unwind_re_match (void *ptr)
3882 {
3883 struct buffer *b = (struct buffer *) ptr;
3884 b->text->inhibit_shrinking = 0;
3885 }
3886
3887
3888
3889 static ptrdiff_t
3890 re_match_2_internal (struct re_pattern_buffer *bufp,
3891 re_char *string1, ptrdiff_t size1,
3892 re_char *string2, ptrdiff_t size2,
3893 ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
3894 {
3895 eassume (0 <= size1);
3896 eassume (0 <= size2);
3897 eassume (0 <= pos && pos <= stop && stop <= size1 + size2);
3898
3899
3900 int mcnt;
3901
3902
3903 re_char *end1, *end2;
3904
3905
3906
3907 re_char *end_match_1, *end_match_2;
3908
3909
3910 re_char *d, *dend;
3911
3912
3913
3914
3915
3916 re_char *dfail;
3917
3918
3919 re_char *p = bufp->buffer;
3920 re_char *pend = p + bufp->used;
3921
3922
3923 Lisp_Object translate = bufp->translate;
3924
3925
3926 bool multibyte = RE_MULTIBYTE_P (bufp);
3927
3928
3929 bool target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
3930
3931
3932
3933
3934
3935
3936
3937
3938 fail_stack_type fail_stack;
3939 #ifdef DEBUG_COMPILES_ARGUMENTS
3940 ptrdiff_t nfailure_points_pushed = 0, nfailure_points_popped = 0;
3941 #endif
3942
3943
3944
3945
3946 ptrdiff_t num_regs = bufp->re_nsub + 1;
3947 eassume (0 < num_regs);
3948
3949
3950
3951
3952
3953
3954
3955 re_char **regstart UNINIT, **regend UNINIT;
3956
3957
3958
3959
3960
3961 bool best_regs_set = false;
3962 re_char **best_regstart UNINIT, **best_regend UNINIT;
3963
3964
3965
3966
3967
3968
3969
3970
3971
3972 re_char *match_end = NULL;
3973
3974
3975 ptrdiff_t nchars = 0;
3976
3977 #ifdef DEBUG_COMPILES_ARGUMENTS
3978
3979 ptrdiff_t num_regs_pushed = 0;
3980 #endif
3981
3982 DEBUG_PRINT ("\nEntering re_match_2.\n");
3983
3984 REGEX_USE_SAFE_ALLOCA;
3985
3986 INIT_FAIL_STACK ();
3987
3988 specpdl_ref count = SPECPDL_INDEX ();
3989
3990
3991
3992
3993
3994
3995
3996
3997 if (!current_buffer->text->inhibit_shrinking)
3998 {
3999 record_unwind_protect_ptr (unwind_re_match, current_buffer);
4000 current_buffer->text->inhibit_shrinking = 1;
4001 }
4002
4003
4004
4005
4006
4007
4008 ptrdiff_t re_nsub = num_regs - 1;
4009 if (0 < re_nsub)
4010 {
4011 regstart = SAFE_ALLOCA ((re_nsub * 4 + 1) * sizeof *regstart);
4012 regend = regstart + num_regs;
4013 best_regstart = regend + re_nsub;
4014 best_regend = best_regstart + re_nsub;
4015
4016
4017
4018 for (re_char **apos = regstart + 1; apos < best_regstart + 1; apos++)
4019 *apos = NULL;
4020 }
4021
4022
4023
4024 if (size2 == 0 && string1 != NULL)
4025 {
4026 string2 = string1;
4027 size2 = size1;
4028 string1 = 0;
4029 size1 = 0;
4030 }
4031 end1 = string1 + size1;
4032 end2 = string2 + size2;
4033
4034
4035
4036
4037
4038
4039
4040 if (pos >= size1)
4041 {
4042
4043 d = string2 + pos - size1;
4044 dend = end_match_2 = string2 + stop - size1;
4045 end_match_1 = end1;
4046 }
4047 else
4048 {
4049 if (stop < size1)
4050 {
4051
4052 end_match_1 = string1 + stop;
4053
4054
4055
4056
4057
4058
4059
4060
4061 end_match_2 = end_match_1;
4062 }
4063 else
4064 {
4065
4066
4067 end_match_1 = end1;
4068 end_match_2 = string2 + stop - size1;
4069 }
4070 d = string1 + pos;
4071 dend = end_match_1;
4072 }
4073
4074 DEBUG_PRINT ("The compiled pattern is:\n");
4075 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4076 DEBUG_PRINT ("The string to match is: \"");
4077 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4078 DEBUG_PRINT ("\"\n");
4079
4080
4081
4082
4083 for (;;)
4084 {
4085 DEBUG_PRINT ("\n%p: ", p);
4086
4087 if (p == pend)
4088 {
4089
4090 DEBUG_PRINT ("end of pattern ... ");
4091
4092
4093
4094 if (d != end_match_2)
4095 {
4096
4097 bool best_match_p;
4098
4099 {
4100
4101
4102 bool same_str_p = (FIRST_STRING_P (match_end)
4103 == FIRST_STRING_P (d));
4104
4105
4106
4107 if (same_str_p)
4108 best_match_p = d > match_end;
4109 else
4110 best_match_p = !FIRST_STRING_P (d);
4111 }
4112
4113 DEBUG_PRINT ("backtracking.\n");
4114
4115 if (!FAIL_STACK_EMPTY ())
4116 {
4117
4118
4119 if (!best_regs_set || best_match_p)
4120 {
4121 best_regs_set = true;
4122 match_end = d;
4123
4124 DEBUG_PRINT ("\nSAVING match as best so far.\n");
4125
4126 for (ptrdiff_t reg = 1; reg < num_regs; reg++)
4127 {
4128 best_regstart[reg] = regstart[reg];
4129 best_regend[reg] = regend[reg];
4130 }
4131 }
4132 goto fail;
4133 }
4134
4135
4136
4137
4138 else if (best_regs_set && !best_match_p)
4139 {
4140 restore_best_regs:
4141
4142
4143
4144
4145
4146 DEBUG_PRINT ("Restoring best registers.\n");
4147
4148 d = match_end;
4149 dend = ((d >= string1 && d <= end1)
4150 ? end_match_1 : end_match_2);
4151
4152 for (ptrdiff_t reg = 1; reg < num_regs; reg++)
4153 {
4154 regstart[reg] = best_regstart[reg];
4155 regend[reg] = best_regend[reg];
4156 eassert (REG_UNSET (regstart[reg])
4157 <= REG_UNSET (regend[reg]));
4158 }
4159 }
4160 }
4161
4162 succeed_label:
4163 DEBUG_PRINT ("Accepting match.\n");
4164
4165
4166 if (regs)
4167 {
4168
4169 if (bufp->regs_allocated == REGS_UNALLOCATED)
4170 {
4171 ptrdiff_t n = max (RE_NREGS, num_regs);
4172 regs->start = xnmalloc (n, sizeof *regs->start);
4173 regs->end = xnmalloc (n, sizeof *regs->end);
4174 regs->num_regs = n;
4175 bufp->regs_allocated = REGS_REALLOCATE;
4176 }
4177 else if (bufp->regs_allocated == REGS_REALLOCATE)
4178 {
4179
4180
4181 ptrdiff_t n = regs->num_regs;
4182 if (n < num_regs)
4183 {
4184 n = max (n + (n >> 1), num_regs);
4185 regs->start
4186 = xnrealloc (regs->start, n, sizeof *regs->start);
4187 regs->end = xnrealloc (regs->end, n, sizeof *regs->end);
4188 regs->num_regs = n;
4189 }
4190 }
4191 else
4192 eassert (bufp->regs_allocated == REGS_FIXED);
4193
4194
4195
4196
4197 if (regs->num_regs > 0)
4198 {
4199 regs->start[0] = pos;
4200 regs->end[0] = POINTER_TO_OFFSET (d);
4201 }
4202
4203 for (ptrdiff_t reg = 1; reg < num_regs; reg++)
4204 {
4205 eassert (REG_UNSET (regstart[reg])
4206 <= REG_UNSET (regend[reg]));
4207 if (REG_UNSET (regend[reg]))
4208 regs->start[reg] = regs->end[reg] = -1;
4209 else
4210 {
4211 regs->start[reg] = POINTER_TO_OFFSET (regstart[reg]);
4212 regs->end[reg] = POINTER_TO_OFFSET (regend[reg]);
4213 }
4214 }
4215
4216
4217
4218 for (ptrdiff_t reg = num_regs; reg < regs->num_regs; reg++)
4219 regs->start[reg] = regs->end[reg] = -1;
4220 }
4221
4222 DEBUG_PRINT ("%td failure points pushed, %td popped (%td remain).\n",
4223 nfailure_points_pushed, nfailure_points_popped,
4224 nfailure_points_pushed - nfailure_points_popped);
4225 DEBUG_PRINT ("%td registers pushed.\n", num_regs_pushed);
4226
4227 ptrdiff_t dcnt = POINTER_TO_OFFSET (d) - pos;
4228
4229 DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
4230
4231 unbind_to (count, Qnil);
4232 SAFE_FREE ();
4233
4234
4235
4236
4237 if (max_redisplay_ticks > 0 && nchars > 0)
4238 update_redisplay_ticks (nchars / 50 + 1, NULL);
4239 return dcnt;
4240 }
4241
4242
4243 switch (*p++)
4244 {
4245
4246
4247 case no_op:
4248 DEBUG_PRINT ("EXECUTING no_op.\n");
4249 break;
4250
4251 case succeed:
4252 DEBUG_PRINT ("EXECUTING succeed.\n");
4253 goto succeed_label;
4254
4255
4256
4257
4258 case exactn:
4259 mcnt = *p++;
4260 DEBUG_PRINT ("EXECUTING exactn %d.\n", mcnt);
4261
4262
4263 dfail = d;
4264
4265
4266 if (target_multibyte)
4267 do
4268 {
4269 int pat_charlen, buf_charlen;
4270 int pat_ch, buf_ch;
4271
4272 PREFETCH (d = dfail);
4273 if (multibyte)
4274 pat_ch = string_char_and_length (p, &pat_charlen);
4275 else
4276 {
4277 pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
4278 pat_charlen = 1;
4279 }
4280 buf_ch = string_char_and_length (d, &buf_charlen);
4281
4282 if (TRANSLATE (buf_ch) != pat_ch)
4283 {
4284 d = dfail;
4285 goto fail;
4286 }
4287
4288 p += pat_charlen;
4289 d += buf_charlen;
4290 mcnt -= pat_charlen;
4291 nchars++;
4292 }
4293 while (mcnt > 0);
4294 else
4295 do
4296 {
4297 int pat_charlen;
4298 int pat_ch, buf_ch;
4299
4300 PREFETCH (d = dfail);
4301 if (multibyte)
4302 {
4303 pat_ch = string_char_and_length (p, &pat_charlen);
4304 pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
4305 }
4306 else
4307 {
4308 pat_ch = *p;
4309 pat_charlen = 1;
4310 }
4311 buf_ch = RE_CHAR_TO_MULTIBYTE (*d);
4312 if (! CHAR_BYTE8_P (buf_ch))
4313 {
4314 buf_ch = TRANSLATE (buf_ch);
4315 buf_ch = RE_CHAR_TO_UNIBYTE (buf_ch);
4316 if (buf_ch < 0)
4317 buf_ch = *d;
4318 }
4319 else
4320 buf_ch = *d;
4321 if (buf_ch != pat_ch)
4322 {
4323 d = dfail;
4324 goto fail;
4325 }
4326 p += pat_charlen;
4327 d++;
4328 mcnt -= pat_charlen;
4329 nchars++;
4330 }
4331 while (mcnt > 0);
4332
4333 break;
4334
4335
4336
4337 case anychar:
4338 {
4339 int buf_charlen;
4340 int buf_ch;
4341
4342 DEBUG_PRINT ("EXECUTING anychar.\n");
4343
4344 PREFETCH ();
4345 buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
4346 target_multibyte);
4347 buf_ch = TRANSLATE (buf_ch);
4348 if (buf_ch == '\n')
4349 goto fail;
4350
4351 DEBUG_PRINT (" Matched \"%d\".\n", *d);
4352 d += buf_charlen;
4353 nchars++;
4354 }
4355 break;
4356
4357
4358 case charset:
4359 case charset_not:
4360 {
4361
4362 bool unibyte_char = false;
4363
4364 DEBUG_PRINT ("EXECUTING charset%s.\n",
4365 (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
4366
4367 PREFETCH ();
4368 int len;
4369 int corig = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
4370 int c = corig;
4371 if (target_multibyte)
4372 {
4373 int c1;
4374
4375 c = TRANSLATE (c);
4376 c1 = RE_CHAR_TO_UNIBYTE (c);
4377 if (c1 >= 0)
4378 {
4379 unibyte_char = true;
4380 c = c1;
4381 }
4382 }
4383 else
4384 {
4385 int c1 = RE_CHAR_TO_MULTIBYTE (c);
4386
4387 if (! CHAR_BYTE8_P (c1))
4388 {
4389 c1 = TRANSLATE (c1);
4390 c1 = RE_CHAR_TO_UNIBYTE (c1);
4391 if (c1 >= 0)
4392 {
4393 unibyte_char = true;
4394 c = c1;
4395 }
4396 }
4397 else
4398 unibyte_char = true;
4399 }
4400
4401 p -= 1;
4402 if (!execute_charset (&p, c, corig, unibyte_char, translate))
4403 goto fail;
4404
4405 d += len;
4406 nchars++;
4407 }
4408 break;
4409
4410
4411
4412
4413
4414
4415 case start_memory:
4416 DEBUG_PRINT ("EXECUTING start_memory %d:\n", *p);
4417 eassert (0 < *p && *p < num_regs);
4418
4419
4420 PUSH_FAILURE_REG (*p);
4421
4422 regstart[*p] = d;
4423 DEBUG_PRINT (" regstart: %td\n", POINTER_TO_OFFSET (regstart[*p]));
4424
4425
4426 p += 1;
4427 break;
4428
4429
4430
4431
4432 case stop_memory:
4433 DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p);
4434
4435 eassert (0 < *p && *p < num_regs);
4436 eassert (!REG_UNSET (regstart[*p]));
4437
4438
4439
4440
4441
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451 regend[*p] = d;
4452 DEBUG_PRINT (" regend: %td\n", POINTER_TO_OFFSET (regend[*p]));
4453
4454
4455 p += 1;
4456 break;
4457
4458
4459
4460
4461 case duplicate:
4462 {
4463 re_char *d2, *dend2;
4464 int regno = *p++;
4465 DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno);
4466
4467
4468 eassert (0 < regno && regno < num_regs);
4469 eassert (REG_UNSET (regstart[regno]) <= REG_UNSET (regend[regno]));
4470 if (REG_UNSET (regend[regno]))
4471 goto fail;
4472
4473
4474 d2 = regstart[regno];
4475
4476
4477 dfail = d;
4478
4479
4480
4481
4482
4483
4484 dend2 = ((FIRST_STRING_P (regstart[regno])
4485 == FIRST_STRING_P (regend[regno]))
4486 ? regend[regno] : end_match_1);
4487 for (;;)
4488 {
4489 ptrdiff_t dcnt;
4490
4491
4492
4493 while (d2 == dend2)
4494 {
4495 if (dend2 == end_match_2) break;
4496 if (dend2 == regend[regno]) break;
4497
4498
4499 d2 = string2;
4500 dend2 = regend[regno];
4501 }
4502
4503 if (d2 == dend2) break;
4504
4505
4506 PREFETCH (d = dfail);
4507
4508
4509 dcnt = dend - d;
4510
4511
4512
4513 if (dcnt > dend2 - d2)
4514 dcnt = dend2 - d2;
4515
4516
4517
4518 if (!NILP (translate)
4519 ? bcmp_translate (d, d2, dcnt, translate, target_multibyte)
4520 : memcmp (d, d2, dcnt))
4521 {
4522 d = dfail;
4523 goto fail;
4524 }
4525 d += dcnt, d2 += dcnt;
4526 nchars++;
4527 }
4528 }
4529 break;
4530
4531
4532
4533
4534 case begline:
4535 DEBUG_PRINT ("EXECUTING begline.\n");
4536
4537 if (AT_STRINGS_BEG (d))
4538 break;
4539 else
4540 {
4541 unsigned c;
4542 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
4543 if (c == '\n')
4544 break;
4545 }
4546 goto fail;
4547
4548
4549
4550 case endline:
4551 DEBUG_PRINT ("EXECUTING endline.\n");
4552
4553 if (AT_STRINGS_END (d))
4554 break;
4555 PREFETCH_NOLIMIT ();
4556 if (*d == '\n')
4557 break;
4558 goto fail;
4559
4560
4561
4562 case begbuf:
4563 DEBUG_PRINT ("EXECUTING begbuf.\n");
4564 if (AT_STRINGS_BEG (d))
4565 break;
4566 goto fail;
4567
4568
4569
4570 case endbuf:
4571 DEBUG_PRINT ("EXECUTING endbuf.\n");
4572 if (AT_STRINGS_END (d))
4573 break;
4574 goto fail;
4575
4576
4577
4578
4579
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593 case on_failure_keep_string_jump:
4594 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4595 DEBUG_PRINT ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
4596 mcnt, p + mcnt);
4597
4598 PUSH_FAILURE_POINT (p - 3, NULL);
4599 break;
4600
4601
4602
4603
4604
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615 case on_failure_jump_nastyloop:
4616 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4617 DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
4618 mcnt, p + mcnt);
4619
4620 eassert ((re_opcode_t)p[-4] == no_op);
4621 {
4622 bool cycle = false;
4623 CHECK_INFINITE_LOOP (p - 4, d);
4624 if (!cycle)
4625
4626
4627
4628
4629 PUSH_FAILURE_POINT (p - 3, d);
4630 }
4631 break;
4632
4633
4634
4635 case on_failure_jump_loop:
4636 on_failure:
4637 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4638 DEBUG_PRINT ("EXECUTING on_failure_jump_loop %d (to %p):\n",
4639 mcnt, p + mcnt);
4640 {
4641 bool cycle = false;
4642 CHECK_INFINITE_LOOP (p - 3, d);
4643 if (cycle)
4644
4645
4646
4647
4648
4649 p += mcnt;
4650 else
4651 PUSH_FAILURE_POINT (p - 3, d);
4652 }
4653 break;
4654
4655
4656
4657
4658
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668 case on_failure_jump:
4669 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4670 DEBUG_PRINT ("EXECUTING on_failure_jump %d (to %p):\n",
4671 mcnt, p + mcnt);
4672
4673 PUSH_FAILURE_POINT (p -3, d);
4674 break;
4675
4676
4677
4678
4679
4680
4681
4682
4683 case on_failure_jump_smart:
4684 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4685 DEBUG_PRINT ("EXECUTING on_failure_jump_smart %d (to %p).\n",
4686 mcnt, p + mcnt);
4687 {
4688 re_char *p1 = p;
4689
4690 unsigned char *p2 = (unsigned char *) p + mcnt;
4691 unsigned char *p3 = (unsigned char *) p - 3;
4692
4693 p -= 3;
4694
4695
4696 EXTRACT_NUMBER (mcnt, p2 - 2);
4697
4698
4699
4700 eassert (skip_one_char (p1) == p2 - 3);
4701 eassert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
4702 DEBUG_STATEMENT (regex_emacs_debug += 2);
4703 if (mutually_exclusive_p (bufp, p1, p2))
4704 {
4705
4706 DEBUG_PRINT (" smart exclusive => fast loop.\n");
4707 *p3 = (unsigned char) on_failure_keep_string_jump;
4708 STORE_NUMBER (p2 - 2, mcnt + 3);
4709 }
4710 else
4711 {
4712
4713 DEBUG_PRINT (" smart default => slow loop.\n");
4714 *p3 = (unsigned char) on_failure_jump;
4715 }
4716 DEBUG_STATEMENT (regex_emacs_debug -= 2);
4717 }
4718 break;
4719
4720
4721 case jump:
4722 unconditional_jump:
4723 maybe_quit ();
4724 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4725 DEBUG_PRINT ("EXECUTING jump %d ", mcnt);
4726 p += mcnt;
4727 DEBUG_PRINT ("(to %p).\n", p);
4728 break;
4729
4730
4731
4732
4733 case succeed_n:
4734
4735 EXTRACT_NUMBER (mcnt, p + 2);
4736 DEBUG_PRINT ("EXECUTING succeed_n %d.\n", mcnt);
4737
4738
4739 if (mcnt != 0)
4740 {
4741
4742 unsigned char *p2 = (unsigned char *) p + 2;
4743 mcnt--;
4744 p += 4;
4745 PUSH_NUMBER (p2, mcnt);
4746 }
4747 else
4748
4749 goto on_failure;
4750 break;
4751
4752 case jump_n:
4753
4754 EXTRACT_NUMBER (mcnt, p + 2);
4755 DEBUG_PRINT ("EXECUTING jump_n %d.\n", mcnt);
4756
4757
4758 if (mcnt != 0)
4759 {
4760
4761 unsigned char *p2 = (unsigned char *) p + 2;
4762 mcnt--;
4763 PUSH_NUMBER (p2, mcnt);
4764 goto unconditional_jump;
4765 }
4766
4767 else
4768 p += 4;
4769 break;
4770
4771 case set_number_at:
4772 {
4773 unsigned char *p2;
4774 DEBUG_PRINT ("EXECUTING set_number_at.\n");
4775
4776 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4777
4778 p2 = (unsigned char *) p + mcnt;
4779
4780 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4781 DEBUG_PRINT (" Setting %p to %d.\n", p2, mcnt);
4782 PUSH_NUMBER (p2, mcnt);
4783 break;
4784 }
4785
4786 case wordbound:
4787 case notwordbound:
4788 {
4789 bool not = (re_opcode_t) *(p - 1) == notwordbound;
4790 DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : "");
4791
4792
4793
4794
4795 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4796 not = !not;
4797 else
4798 {
4799
4800
4801 int c1, c2;
4802 int s1, s2;
4803 int dummy;
4804 ptrdiff_t offset = POINTER_TO_OFFSET (d);
4805 ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
4806 UPDATE_SYNTAX_TABLE (charpos);
4807 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4808 nchars++;
4809 s1 = SYNTAX (c1);
4810 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
4811 PREFETCH_NOLIMIT ();
4812 GET_CHAR_AFTER (c2, d, dummy);
4813 nchars++;
4814 s2 = SYNTAX (c2);
4815
4816 if (
4817 ((s1 == Sword) != (s2 == Sword))
4818
4819
4820 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
4821 not = !not;
4822 }
4823 if (not)
4824 break;
4825 else
4826 goto fail;
4827 }
4828
4829 case wordbeg:
4830 DEBUG_PRINT ("EXECUTING wordbeg.\n");
4831
4832
4833
4834
4835 if (AT_STRINGS_END (d))
4836 goto fail;
4837 else
4838 {
4839
4840
4841 int c1, c2;
4842 int s1, s2;
4843 int dummy;
4844 ptrdiff_t offset = POINTER_TO_OFFSET (d);
4845 ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
4846 UPDATE_SYNTAX_TABLE (charpos);
4847 PREFETCH ();
4848 GET_CHAR_AFTER (c2, d, dummy);
4849 nchars++;
4850 s2 = SYNTAX (c2);
4851
4852
4853 if (s2 != Sword)
4854 goto fail;
4855
4856
4857 if (!AT_STRINGS_BEG (d))
4858 {
4859 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4860 nchars++;
4861 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
4862 s1 = SYNTAX (c1);
4863
4864
4865
4866 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
4867 goto fail;
4868 }
4869 }
4870 break;
4871
4872 case wordend:
4873 DEBUG_PRINT ("EXECUTING wordend.\n");
4874
4875
4876
4877
4878 if (AT_STRINGS_BEG (d))
4879 goto fail;
4880 else
4881 {
4882
4883
4884 int c1, c2;
4885 int s1, s2;
4886 int dummy;
4887 ptrdiff_t offset = POINTER_TO_OFFSET (d);
4888 ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
4889 UPDATE_SYNTAX_TABLE (charpos);
4890 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4891 nchars++;
4892 s1 = SYNTAX (c1);
4893
4894
4895 if (s1 != Sword)
4896 goto fail;
4897
4898
4899 if (!AT_STRINGS_END (d))
4900 {
4901 PREFETCH_NOLIMIT ();
4902 GET_CHAR_AFTER (c2, d, dummy);
4903 nchars++;
4904 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
4905 s2 = SYNTAX (c2);
4906
4907
4908
4909 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
4910 goto fail;
4911 }
4912 }
4913 break;
4914
4915 case symbeg:
4916 DEBUG_PRINT ("EXECUTING symbeg.\n");
4917
4918
4919
4920
4921 if (AT_STRINGS_END (d))
4922 goto fail;
4923 else
4924 {
4925
4926
4927 int c1, c2;
4928 int s1, s2;
4929 ptrdiff_t offset = POINTER_TO_OFFSET (d);
4930 ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
4931 UPDATE_SYNTAX_TABLE (charpos);
4932 PREFETCH ();
4933 c2 = RE_STRING_CHAR (d, target_multibyte);
4934 nchars++;
4935 s2 = SYNTAX (c2);
4936
4937
4938 if (s2 != Sword && s2 != Ssymbol)
4939 goto fail;
4940
4941
4942 if (!AT_STRINGS_BEG (d))
4943 {
4944 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4945 nchars++;
4946 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
4947 s1 = SYNTAX (c1);
4948
4949
4950 if (s1 == Sword || s1 == Ssymbol)
4951 goto fail;
4952 }
4953 }
4954 break;
4955
4956 case symend:
4957 DEBUG_PRINT ("EXECUTING symend.\n");
4958
4959
4960
4961
4962 if (AT_STRINGS_BEG (d))
4963 goto fail;
4964 else
4965 {
4966
4967
4968 int c1, c2;
4969 int s1, s2;
4970 ptrdiff_t offset = POINTER_TO_OFFSET (d);
4971 ptrdiff_t charpos = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
4972 UPDATE_SYNTAX_TABLE (charpos);
4973 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4974 nchars++;
4975 s1 = SYNTAX (c1);
4976
4977
4978 if (s1 != Sword && s1 != Ssymbol)
4979 goto fail;
4980
4981
4982 if (!AT_STRINGS_END (d))
4983 {
4984 PREFETCH_NOLIMIT ();
4985 c2 = RE_STRING_CHAR (d, target_multibyte);
4986 nchars++;
4987 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
4988 s2 = SYNTAX (c2);
4989
4990
4991 if (s2 == Sword || s2 == Ssymbol)
4992 goto fail;
4993 }
4994 }
4995 break;
4996
4997 case syntaxspec:
4998 case notsyntaxspec:
4999 {
5000 bool not = (re_opcode_t) *(p - 1) == notsyntaxspec;
5001 mcnt = *p++;
5002 DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "",
5003 mcnt);
5004 PREFETCH ();
5005 {
5006 ptrdiff_t offset = POINTER_TO_OFFSET (d);
5007 ptrdiff_t pos1 = RE_SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5008 UPDATE_SYNTAX_TABLE (pos1);
5009 }
5010 {
5011 int len;
5012 int c;
5013
5014 GET_CHAR_AFTER (c, d, len);
5015 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
5016 goto fail;
5017 d += len;
5018 nchars++;
5019 }
5020 }
5021 break;
5022
5023 case at_dot:
5024 DEBUG_PRINT ("EXECUTING at_dot.\n");
5025 if (PTR_BYTE_POS (d) != PT_BYTE)
5026 goto fail;
5027 break;
5028
5029 case categoryspec:
5030 case notcategoryspec:
5031 {
5032 bool not = (re_opcode_t) *(p - 1) == notcategoryspec;
5033 mcnt = *p++;
5034 DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n",
5035 not ? "not" : "", mcnt);
5036 PREFETCH ();
5037
5038 {
5039 int len;
5040 int c;
5041 GET_CHAR_AFTER (c, d, len);
5042 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
5043 goto fail;
5044 d += len;
5045 nchars++;
5046 }
5047 }
5048 break;
5049
5050 default:
5051 abort ();
5052 }
5053 continue;
5054
5055
5056
5057 fail:
5058 maybe_quit ();
5059 if (!FAIL_STACK_EMPTY ())
5060 {
5061 re_char *str, *pat;
5062
5063 DEBUG_PRINT ("\nFAIL:\n");
5064 POP_FAILURE_POINT (str, pat);
5065 switch (*pat++)
5066 {
5067 case on_failure_keep_string_jump:
5068 eassert (str == NULL);
5069 goto continue_failure_jump;
5070
5071 case on_failure_jump_nastyloop:
5072 eassert ((re_opcode_t)pat[-2] == no_op);
5073 PUSH_FAILURE_POINT (pat - 2, str);
5074 FALLTHROUGH;
5075 case on_failure_jump_loop:
5076 case on_failure_jump:
5077 case succeed_n:
5078 d = str;
5079 continue_failure_jump:
5080 EXTRACT_NUMBER_AND_INCR (mcnt, pat);
5081 p = pat + mcnt;
5082 break;
5083
5084 case no_op:
5085
5086 goto fail;
5087
5088 default:
5089 abort ();
5090 }
5091
5092 eassert (p >= bufp->buffer && p <= pend);
5093
5094 if (d >= string1 && d <= end1)
5095 dend = end_match_1;
5096 }
5097 else
5098 break;
5099 }
5100
5101 if (best_regs_set)
5102 goto restore_best_regs;
5103
5104 unbind_to (count, Qnil);
5105 SAFE_FREE ();
5106
5107 if (max_redisplay_ticks > 0 && nchars > 0)
5108 update_redisplay_ticks (nchars / 50 + 1, NULL);
5109
5110 return -1;
5111 }
5112
5113
5114
5115
5116
5117
5118 static bool
5119 bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len,
5120 Lisp_Object translate, bool target_multibyte)
5121 {
5122 re_char *p1 = s1, *p2 = s2;
5123 re_char *p1_end = s1 + len;
5124 re_char *p2_end = s2 + len;
5125
5126
5127
5128 while (p1 < p1_end && p2 < p2_end)
5129 {
5130 int p1_charlen, p2_charlen;
5131 int p1_ch, p2_ch;
5132
5133 GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
5134 GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
5135
5136 if (RE_TRANSLATE (translate, p1_ch)
5137 != RE_TRANSLATE (translate, p2_ch))
5138 return true;
5139
5140 p1 += p1_charlen, p2 += p2_charlen;
5141 }
5142
5143 return p1 != p1_end || p2 != p2_end;
5144 }
5145
5146
5147
5148
5149
5150
5151
5152
5153
5154
5155
5156
5157 const char *
5158 re_compile_pattern (const char *pattern, ptrdiff_t length,
5159 bool posix_backtracking, const char *whitespace_regexp,
5160 struct re_pattern_buffer *bufp)
5161 {
5162 bufp->regs_allocated = REGS_UNALLOCATED;
5163
5164 reg_errcode_t ret
5165 = regex_compile ((re_char *) pattern, length,
5166 posix_backtracking,
5167 whitespace_regexp,
5168 bufp);
5169
5170 if (!ret)
5171 return NULL;
5172 return re_error_msgid[ret];
5173 }