This source file includes following definitions.
- extract_number
- extract_number_and_incr
- debug_putchar
- print_fastmap
- print_partial_compiled_pattern
- print_compiled_pattern
- print_double_string
- re_wctype_parse
- re_iswctype
- re_wctype_to_bit
- extend_range_table_work_area
- regex_compile
- store_op1
- store_op2
- insert_op1
- insert_op2
- at_begline_loc_p
- at_endline_loc_p
- group_in_compile_stack
- analyze_first
- re_compile_fastmap
- re_set_registers
- re_search
- re_search_2
- skip_one_char
- skip_noops
- execute_charset
- mutually_exclusive_p
- re_match_2
- unwind_re_match
- re_match_2_internal
- bcmp_translate
- re_compile_pattern
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26 #include <config.h>
27
28 #include "regex-emacs.h"
29
30 #include <stdlib.h>
31
32 #include "character.h"
33 #include "buffer.h"
34 #include "syntax.h"
35 #include "category.h"
36 #include "dispextern.h"
37
38
39
40
41
42 #ifdef RE_DUP_MAX
43 # undef RE_DUP_MAX
44 #endif
45 #define RE_DUP_MAX (0xffff)
46
47
48 #define SYNTAX(c) syntax_property (c, 1)
49
50
51 #define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
52
53
54 #define POS_AS_IN_BUFFER(p) \
55 ((p) + (NILP (gl_state.object) || BUFFERP (gl_state.object)))
56
57 #define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
58 #define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
59 #define RE_STRING_CHAR(p, multibyte) \
60 (multibyte ? STRING_CHAR (p) : *(p))
61 #define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
62 (multibyte ? string_char_and_length (p, &(len)) : ((len) = 1, *(p)))
63
64 #define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
65
66 #define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
67
68
69
70
71 #define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
72 do { \
73 if (target_multibyte) \
74 { \
75 re_char *dtemp = (p) == (str2) ? (end1) : (p); \
76 re_char *dlimit = (p) > (str2) && (p) <= (end2) ? (str2) : (str1); \
77 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)) \
78 continue; \
79 c = STRING_CHAR (dtemp); \
80 } \
81 else \
82 { \
83 (c = ((p) == (str2) ? (end1) : (p))[-1]); \
84 (c) = RE_CHAR_TO_MULTIBYTE (c); \
85 } \
86 } while (false)
87
88
89
90 #define GET_CHAR_AFTER(c, p, len) \
91 do { \
92 if (target_multibyte) \
93 (c) = string_char_and_length (p, &(len)); \
94 else \
95 { \
96 (c) = *p; \
97 len = 1; \
98 (c) = RE_CHAR_TO_MULTIBYTE (c); \
99 } \
100 } while (false)
101
102
103 #define IS_REAL_ASCII(c) ((c) < 0200)
104
105
106 #define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
107
108
109
110
111 #define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
112 #define ISCNTRL(c) ((c) < ' ')
113 #define ISXDIGIT(c) (0 <= char_hexdigit (c))
114
115
116
117 #define ISBLANK(c) (IS_REAL_ASCII (c) \
118 ? ((c) == ' ' || (c) == '\t') \
119 : blankp (c))
120
121 #define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
122 ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
123 : graphicp (c))
124
125 #define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
126 ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
127 : printablep (c))
128
129 #define ISALNUM(c) (IS_REAL_ASCII (c) \
130 ? (((c) >= 'a' && (c) <= 'z') \
131 || ((c) >= 'A' && (c) <= 'Z') \
132 || ((c) >= '0' && (c) <= '9')) \
133 : alphanumericp (c))
134
135 #define ISALPHA(c) (IS_REAL_ASCII (c) \
136 ? (((c) >= 'a' && (c) <= 'z') \
137 || ((c) >= 'A' && (c) <= 'Z')) \
138 : alphabeticp (c))
139
140 #define ISLOWER(c) lowercasep (c)
141
142 #define ISPUNCT(c) (IS_REAL_ASCII (c) \
143 ? ((c) > ' ' && (c) < 0177 \
144 && !(((c) >= 'a' && (c) <= 'z') \
145 || ((c) >= 'A' && (c) <= 'Z') \
146 || ((c) >= '0' && (c) <= '9'))) \
147 : SYNTAX (c) != Sword)
148
149 #define ISSPACE(c) (SYNTAX (c) == Swhitespace)
150
151 #define ISUPPER(c) uppercasep (c)
152
153 #define ISWORD(c) (SYNTAX (c) == Sword)
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168 ptrdiff_t emacs_re_safe_alloca = MAX_ALLOCA;
169
170 #define REGEX_USE_SAFE_ALLOCA \
171 USE_SAFE_ALLOCA; sa_avail = emacs_re_safe_alloca
172
173
174 #define REGEX_REALLOCATE(source, osize, nsize) \
175 (destination = SAFE_ALLOCA (nsize), \
176 memcpy (destination, source, osize))
177
178
179
180
181 #define FIRST_STRING_P(ptr) \
182 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
183
184 #define BYTEWIDTH 8
185
186
187 typedef const unsigned char re_char;
188
189 static void re_compile_fastmap (struct re_pattern_buffer *);
190 static ptrdiff_t re_match_2_internal (struct re_pattern_buffer *bufp,
191 re_char *string1, ptrdiff_t size1,
192 re_char *string2, ptrdiff_t size2,
193 ptrdiff_t pos,
194 struct re_registers *regs,
195 ptrdiff_t stop);
196
197
198
199
200
201
202 typedef enum
203 {
204 no_op = 0,
205
206
207 succeed,
208
209
210 exactn,
211
212
213 anychar,
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229 charset,
230
231
232
233 charset_not,
234
235
236
237
238
239 start_memory,
240
241
242
243
244
245 stop_memory,
246
247
248
249 duplicate,
250
251
252 begline,
253
254
255 endline,
256
257
258 begbuf,
259
260
261 endbuf,
262
263
264 jump,
265
266
267
268 on_failure_jump,
269
270
271
272 on_failure_keep_string_jump,
273
274
275
276
277 on_failure_jump_loop,
278
279
280
281
282
283 on_failure_jump_nastyloop,
284
285
286
287
288
289
290
291 on_failure_jump_smart,
292
293
294
295
296
297 succeed_n,
298
299
300
301 jump_n,
302
303
304
305
306 set_number_at,
307
308 wordbeg,
309 wordend,
310
311 wordbound,
312 notwordbound,
313
314 symbeg,
315 symend,
316
317
318
319 syntaxspec,
320
321
322 notsyntaxspec,
323
324 at_dot,
325
326
327
328
329 categoryspec,
330
331
332
333
334 notcategoryspec
335 } re_opcode_t;
336
337
338
339
340
341 #define STORE_NUMBER(destination, number) \
342 do { \
343 (destination)[0] = (number) & 0377; \
344 (destination)[1] = (number) >> 8; \
345 } while (false)
346
347
348
349
350
351 #define STORE_NUMBER_AND_INCR(destination, number) \
352 do { \
353 STORE_NUMBER (destination, number); \
354 (destination) += 2; \
355 } while (false)
356
357
358
359
360 #define EXTRACT_NUMBER(destination, source) \
361 ((destination) = extract_number (source))
362
363 static int
364 extract_number (re_char *source)
365 {
366 signed char leading_byte = source[1];
367 return leading_byte * 256 + source[0];
368 }
369
370
371
372
373 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
374 ((destination) = extract_number_and_incr (&source))
375
376 static int
377 extract_number_and_incr (re_char **source)
378 {
379 int num = extract_number (*source);
380 *source += 2;
381 return num;
382 }
383
384
385
386
387
388 #define STORE_CHARACTER_AND_INCR(destination, character) \
389 do { \
390 (destination)[0] = (character) & 0377; \
391 (destination)[1] = ((character) >> 8) & 0377; \
392 (destination)[2] = (character) >> 16; \
393 (destination) += 3; \
394 } while (false)
395
396
397
398
399 #define EXTRACT_CHARACTER(destination, source) \
400 do { \
401 (destination) = ((source)[0] \
402 | ((source)[1] << 8) \
403 | ((source)[2] << 16)); \
404 } while (false)
405
406
407
408
409
410
411 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
412
413
414 #define CHARSET_RANGE_TABLE_EXISTS_P(p) (((p)[1] & 0x80) != 0)
415
416
417
418
419
420 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
421
422
423 #define CHARSET_RANGE_TABLE_BITS(p) \
424 ((p)[2 + CHARSET_BITMAP_SIZE (p)] \
425 + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
426
427
428
429
430
431 #define CHARSET_RANGE_TABLE_END(range_table, count) \
432 ((range_table) + (count) * 2 * 3)
433
434
435
436
437 #ifdef REGEX_EMACS_DEBUG
438
439
440 # include "sysstdio.h"
441
442 static int regex_emacs_debug = -100000;
443
444 # define DEBUG_STATEMENT(e) e
445 # define DEBUG_PRINT(...) \
446 if (regex_emacs_debug > 0) fprintf (stderr, __VA_ARGS__)
447 # define DEBUG_COMPILES_ARGUMENTS
448 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
449 if (regex_emacs_debug > 0) print_partial_compiled_pattern (s, e)
450 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
451 if (regex_emacs_debug > 0) print_double_string (w, s1, sz1, s2, sz2)
452
453 static void
454 debug_putchar (int c)
455 {
456 if (c >= 32 && c <= 126)
457 putc (c, stderr);
458 else
459 {
460 unsigned int uc = c;
461 fprintf (stderr, "{%02x}", uc);
462 }
463 }
464
465
466
467 static void
468 print_fastmap (char *fastmap)
469 {
470 bool was_a_range = false;
471 int i = 0;
472
473 while (i < (1 << BYTEWIDTH))
474 {
475 if (fastmap[i++])
476 {
477 was_a_range = false;
478 debug_putchar (i - 1);
479 while (i < (1 << BYTEWIDTH) && fastmap[i])
480 {
481 was_a_range = true;
482 i++;
483 }
484 if (was_a_range)
485 {
486 debug_putchar ('-');
487 debug_putchar (i - 1);
488 }
489 }
490 }
491 putc ('\n', stderr);
492 }
493
494
495
496
497
498 static void
499 print_partial_compiled_pattern (re_char *start, re_char *end)
500 {
501 int mcnt, mcnt2;
502 re_char *p = start;
503 re_char *pend = end;
504
505 if (start == NULL)
506 {
507 fputs ("(null)\n", stderr);
508 return;
509 }
510
511
512 while (p < pend)
513 {
514 fprintf (stderr, "%td:\t", p - start);
515
516 switch ((re_opcode_t) *p++)
517 {
518 case no_op:
519 fputs ("/no_op", stderr);
520 break;
521
522 case succeed:
523 fputs ("/succeed", stderr);
524 break;
525
526 case exactn:
527 mcnt = *p++;
528 fprintf (stderr, "/exactn/%d", mcnt);
529 do
530 {
531 debug_putchar ('/');
532 debug_putchar (*p++);
533 }
534 while (--mcnt);
535 break;
536
537 case start_memory:
538 fprintf (stderr, "/start_memory/%d", *p++);
539 break;
540
541 case stop_memory:
542 fprintf (stderr, "/stop_memory/%d", *p++);
543 break;
544
545 case duplicate:
546 fprintf (stderr, "/duplicate/%d", *p++);
547 break;
548
549 case anychar:
550 fputs ("/anychar", stderr);
551 break;
552
553 case charset:
554 case charset_not:
555 {
556 int c, last = -100;
557 bool in_range = false;
558 int length = CHARSET_BITMAP_SIZE (p - 1);
559 bool has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
560
561 fprintf (stderr, "/charset [%s",
562 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
563
564 if (p + *p >= pend)
565 fputs (" !extends past end of pattern! ", stderr);
566
567 for (c = 0; c < 256; c++)
568 if (c / 8 < length
569 && (p[1 + (c/8)] & (1 << (c % 8))))
570 {
571
572 if (last + 1 == c && ! in_range)
573 {
574 debug_putchar ('-');
575 in_range = true;
576 }
577
578 else if (last + 1 != c && in_range)
579 {
580 debug_putchar (last);
581 in_range = false;
582 }
583
584 if (! in_range)
585 debug_putchar (c);
586
587 last = c;
588 }
589
590 if (in_range)
591 debug_putchar (last);
592
593 debug_putchar (']');
594
595 p += 1 + length;
596
597 if (has_range_table)
598 {
599 int count;
600 fputs ("has-range-table", stderr);
601
602
603 p += 2;
604 EXTRACT_NUMBER_AND_INCR (count, p);
605 p = CHARSET_RANGE_TABLE_END (p, count);
606 }
607 }
608 break;
609
610 case begline:
611 fputs ("/begline", stderr);
612 break;
613
614 case endline:
615 fputs ("/endline", stderr);
616 break;
617
618 case on_failure_jump:
619 EXTRACT_NUMBER_AND_INCR (mcnt, p);
620 fprintf (stderr, "/on_failure_jump to %td", p + mcnt - start);
621 break;
622
623 case on_failure_keep_string_jump:
624 EXTRACT_NUMBER_AND_INCR (mcnt, p);
625 fprintf (stderr, "/on_failure_keep_string_jump to %td",
626 p + mcnt - start);
627 break;
628
629 case on_failure_jump_nastyloop:
630 EXTRACT_NUMBER_AND_INCR (mcnt, p);
631 fprintf (stderr, "/on_failure_jump_nastyloop to %td",
632 p + mcnt - start);
633 break;
634
635 case on_failure_jump_loop:
636 EXTRACT_NUMBER_AND_INCR (mcnt, p);
637 fprintf (stderr, "/on_failure_jump_loop to %td",
638 p + mcnt - start);
639 break;
640
641 case on_failure_jump_smart:
642 EXTRACT_NUMBER_AND_INCR (mcnt, p);
643 fprintf (stderr, "/on_failure_jump_smart to %td",
644 p + mcnt - start);
645 break;
646
647 case jump:
648 EXTRACT_NUMBER_AND_INCR (mcnt, p);
649 fprintf (stderr, "/jump to %td", p + mcnt - start);
650 break;
651
652 case succeed_n:
653 EXTRACT_NUMBER_AND_INCR (mcnt, p);
654 EXTRACT_NUMBER_AND_INCR (mcnt2, p);
655 fprintf (stderr, "/succeed_n to %td, %d times",
656 p - 2 + mcnt - start, mcnt2);
657 break;
658
659 case jump_n:
660 EXTRACT_NUMBER_AND_INCR (mcnt, p);
661 EXTRACT_NUMBER_AND_INCR (mcnt2, p);
662 fprintf (stderr, "/jump_n to %td, %d times",
663 p - 2 + mcnt - start, mcnt2);
664 break;
665
666 case set_number_at:
667 EXTRACT_NUMBER_AND_INCR (mcnt, p);
668 EXTRACT_NUMBER_AND_INCR (mcnt2, p);
669 fprintf (stderr, "/set_number_at location %td to %d",
670 p - 2 + mcnt - start, mcnt2);
671 break;
672
673 case wordbound:
674 fputs ("/wordbound", stderr);
675 break;
676
677 case notwordbound:
678 fputs ("/notwordbound", stderr);
679 break;
680
681 case wordbeg:
682 fputs ("/wordbeg", stderr);
683 break;
684
685 case wordend:
686 fputs ("/wordend", stderr);
687 break;
688
689 case symbeg:
690 fputs ("/symbeg", stderr);
691 break;
692
693 case symend:
694 fputs ("/symend", stderr);
695 break;
696
697 case syntaxspec:
698 fputs ("/syntaxspec", stderr);
699 mcnt = *p++;
700 fprintf (stderr, "/%d", mcnt);
701 break;
702
703 case notsyntaxspec:
704 fputs ("/notsyntaxspec", stderr);
705 mcnt = *p++;
706 fprintf (stderr, "/%d", mcnt);
707 break;
708
709 case at_dot:
710 fputs ("/at_dot", stderr);
711 break;
712
713 case categoryspec:
714 fputs ("/categoryspec", stderr);
715 mcnt = *p++;
716 fprintf (stderr, "/%d", mcnt);
717 break;
718
719 case notcategoryspec:
720 fputs ("/notcategoryspec", stderr);
721 mcnt = *p++;
722 fprintf (stderr, "/%d", mcnt);
723 break;
724
725 case begbuf:
726 fputs ("/begbuf", stderr);
727 break;
728
729 case endbuf:
730 fputs ("/endbuf", stderr);
731 break;
732
733 default:
734 fprintf (stderr, "?%d", *(p-1));
735 }
736
737 putc ('\n', stderr);
738 }
739
740 fprintf (stderr, "%td:\tend of pattern.\n", p - start);
741 }
742
743
744 static void
745 print_compiled_pattern (struct re_pattern_buffer *bufp)
746 {
747 re_char *buffer = bufp->buffer;
748
749 print_partial_compiled_pattern (buffer, buffer + bufp->used);
750 fprintf (stderr, "%td bytes used/%td bytes allocated.\n",
751 bufp->used, bufp->allocated);
752
753 if (bufp->fastmap_accurate && bufp->fastmap)
754 {
755 fputs ("fastmap: ", stderr);
756 print_fastmap (bufp->fastmap);
757 }
758
759 fprintf (stderr, "re_nsub: %td\t", bufp->re_nsub);
760 fprintf (stderr, "regs_alloc: %d\t", bufp->regs_allocated);
761 fprintf (stderr, "can_be_null: %d\n", bufp->can_be_null);
762
763 }
764
765
766 static void
767 print_double_string (re_char *where, re_char *string1, ptrdiff_t size1,
768 re_char *string2, ptrdiff_t size2)
769 {
770 if (where == NULL)
771 fputs ("(null)", stderr);
772 else
773 {
774 int i;
775 if (FIRST_STRING_P (where))
776 {
777 for (i = 0; i < string1 + size1 - where; i++)
778 debug_putchar (where[i]);
779 where = string2;
780 }
781
782 for (i = 0; i < string2 + size2 - where; i++)
783 debug_putchar (where[i]);
784 }
785 }
786
787 #else
788
789 # define DEBUG_STATEMENT(e)
790 # define DEBUG_PRINT(...)
791 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
792 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
793
794 #endif
795
796 typedef enum
797 {
798 REG_NOERROR = 0,
799 REG_NOMATCH,
800
801
802
803
804 REG_BADPAT,
805 REG_ECOLLATE,
806 REG_ECTYPE,
807 REG_EESCAPE,
808 REG_ESUBREG,
809 REG_EBRACK,
810 REG_EPAREN,
811 REG_EBRACE,
812 REG_BADBR,
813 REG_ERANGE,
814 REG_ESPACE,
815 REG_BADRPT,
816
817
818 REG_EEND,
819 REG_ESIZE,
820 REG_ERPAREN,
821 REG_ERANGEX,
822 REG_ESIZEBR
823 } reg_errcode_t;
824
825 static const char *re_error_msgid[] =
826 {
827 [REG_NOERROR] = "Success",
828 [REG_NOMATCH] = "No match",
829 [REG_BADPAT] = "Invalid regular expression",
830 [REG_ECOLLATE] = "Invalid collation character",
831 [REG_ECTYPE] = "Invalid character class name",
832 [REG_EESCAPE] = "Trailing backslash",
833 [REG_ESUBREG] = "Invalid back reference",
834 [REG_EBRACK] = "Unmatched [ or [^",
835 [REG_EPAREN] = "Unmatched ( or \\(",
836 [REG_EBRACE] = "Unmatched \\{",
837 [REG_BADBR] = "Invalid content of \\{\\}",
838 [REG_ERANGE] = "Invalid range end",
839 [REG_ESPACE] = "Memory exhausted",
840 [REG_BADRPT] = "Invalid preceding regular expression",
841 [REG_EEND] = "Premature end of regular expression",
842 [REG_ESIZE] = "Regular expression too big",
843 [REG_ERPAREN] = "Unmatched ) or \\)",
844 [REG_ERANGEX ] = "Range striding over charsets",
845 [REG_ESIZEBR ] = "Invalid content of \\{\\}",
846 };
847
848
849 enum { REGS_UNALLOCATED, REGS_REALLOCATE, REGS_FIXED };
850
851
852
853
854 enum { RE_NREGS = 30 };
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869 #define INIT_FAILURE_ALLOC 20
870
871
872
873
874
875
876
877
878
879
880 ptrdiff_t emacs_re_max_failures = 40000;
881
882 union fail_stack_elt
883 {
884 re_char *pointer;
885 intptr_t integer;
886 };
887
888 typedef union fail_stack_elt fail_stack_elt_t;
889
890 typedef struct
891 {
892 fail_stack_elt_t *stack;
893 ptrdiff_t size;
894 ptrdiff_t avail;
895 ptrdiff_t frame;
896 } fail_stack_type;
897
898 #define FAIL_STACK_EMPTY() (fail_stack.frame == 0)
899
900
901
902
903 #define INIT_FAIL_STACK() \
904 do { \
905 fail_stack.stack = \
906 SAFE_ALLOCA (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE \
907 * sizeof (fail_stack_elt_t)); \
908 fail_stack.size = INIT_FAILURE_ALLOC; \
909 fail_stack.avail = 0; \
910 fail_stack.frame = 0; \
911 } while (false)
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926 #define FAIL_STACK_GROWTH_FACTOR 4
927
928 #define GROW_FAIL_STACK(fail_stack) \
929 (((fail_stack).size >= emacs_re_max_failures * TYPICAL_FAILURE_SIZE) \
930 ? 0 \
931 : ((fail_stack).stack \
932 = REGEX_REALLOCATE ((fail_stack).stack, \
933 (fail_stack).avail * sizeof (fail_stack_elt_t), \
934 min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
935 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)) \
936 * sizeof (fail_stack_elt_t)), \
937 ((fail_stack).size \
938 = (min (emacs_re_max_failures * TYPICAL_FAILURE_SIZE, \
939 ((fail_stack).size * FAIL_STACK_GROWTH_FACTOR)))), \
940 1))
941
942
943
944
945
946 #define PUSH_FAILURE_POINTER(item) \
947 fail_stack.stack[fail_stack.avail++].pointer = (item)
948
949
950
951
952 #define PUSH_FAILURE_INT(item) \
953 fail_stack.stack[fail_stack.avail++].integer = (item)
954
955
956
957 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
958 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
959
960
961 #define NUM_NONREG_ITEMS 3
962
963
964 #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
965 #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
966 #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
967 #define TOP_FAILURE_HANDLE() fail_stack.frame
968
969
970 #define ENSURE_FAIL_STACK(space) \
971 while (REMAINING_AVAIL_SLOTS <= space) { \
972 if (!GROW_FAIL_STACK (fail_stack)) \
973 { \
974 unbind_to (count, Qnil); \
975 SAFE_FREE (); \
976 return -2; \
977 } \
978 DEBUG_PRINT ("\n Doubled stack; size now: %td\n", fail_stack.size); \
979 DEBUG_PRINT (" slots available: %td\n", REMAINING_AVAIL_SLOTS);\
980 }
981
982
983 #define PUSH_FAILURE_REG(num) \
984 do { \
985 char *destination; \
986 intptr_t n = num; \
987 eassert (0 < n && n < num_regs); \
988 eassert (REG_UNSET (regstart[n]) <= REG_UNSET (regend[n])); \
989 ENSURE_FAIL_STACK(3); \
990 DEBUG_PRINT (" Push reg %"PRIdPTR" (spanning %p -> %p)\n", \
991 n, regstart[n], regend[n]); \
992 PUSH_FAILURE_POINTER (regstart[n]); \
993 PUSH_FAILURE_POINTER (regend[n]); \
994 PUSH_FAILURE_INT (n); \
995 } while (false)
996
997
998
999 #define PUSH_NUMBER(ptr,val) \
1000 do { \
1001 char *destination; \
1002 int c; \
1003 ENSURE_FAIL_STACK(3); \
1004 EXTRACT_NUMBER (c, ptr); \
1005 DEBUG_PRINT (" Push number %p = %d -> %d\n", ptr, c, val); \
1006 PUSH_FAILURE_INT (c); \
1007 PUSH_FAILURE_POINTER (ptr); \
1008 PUSH_FAILURE_INT (-1); \
1009 STORE_NUMBER (ptr, val); \
1010 } while (false)
1011
1012
1013 #define POP_FAILURE_REG_OR_COUNT() \
1014 do { \
1015 intptr_t pfreg = POP_FAILURE_INT (); \
1016 if (pfreg == -1) \
1017 { \
1018 \
1019 \
1020 unsigned char *ptr = (unsigned char *) POP_FAILURE_POINTER (); \
1021 pfreg = POP_FAILURE_INT (); \
1022 STORE_NUMBER (ptr, pfreg); \
1023 DEBUG_PRINT (" Pop counter %p = %"PRIdPTR"\n", ptr, pfreg); \
1024 } \
1025 else \
1026 { \
1027 eassert (0 < pfreg && pfreg < num_regs); \
1028 regend[pfreg] = POP_FAILURE_POINTER (); \
1029 regstart[pfreg] = POP_FAILURE_POINTER (); \
1030 eassert (REG_UNSET (regstart[pfreg]) <= REG_UNSET (regend[pfreg])); \
1031 DEBUG_PRINT (" Pop reg %ld (spanning %p -> %p)\n", \
1032 pfreg, regstart[pfreg], regend[pfreg]); \
1033 } \
1034 } while (false)
1035
1036
1037 #define CHECK_INFINITE_LOOP(pat_cur, string_place) \
1038 do { \
1039 ptrdiff_t failure = TOP_FAILURE_HANDLE (); \
1040 \
1041 while (failure > 0 \
1042 && (FAILURE_STR (failure) == string_place \
1043 || FAILURE_STR (failure) == NULL)) \
1044 { \
1045 eassert (FAILURE_PAT (failure) >= bufp->buffer \
1046 && FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
1047 if (FAILURE_PAT (failure) == pat_cur) \
1048 { \
1049 cycle = true; \
1050 break; \
1051 } \
1052 DEBUG_PRINT (" Other pattern: %p\n", FAILURE_PAT (failure)); \
1053 failure = NEXT_FAILURE_HANDLE(failure); \
1054 } \
1055 DEBUG_PRINT (" Other string: %p\n", FAILURE_STR (failure)); \
1056 } while (false)
1057
1058
1059
1060
1061
1062
1063
1064
1065
1066
1067 #define PUSH_FAILURE_POINT(pattern, string_place) \
1068 do { \
1069 char *destination; \
1070 DEBUG_STATEMENT (nfailure_points_pushed++); \
1071 DEBUG_PRINT ("\nPUSH_FAILURE_POINT:\n"); \
1072 DEBUG_PRINT (" Before push, next avail: %td\n", fail_stack.avail); \
1073 DEBUG_PRINT (" size: %td\n", fail_stack.size); \
1074 \
1075 ENSURE_FAIL_STACK (NUM_NONREG_ITEMS); \
1076 \
1077 DEBUG_PRINT ("\n"); \
1078 \
1079 DEBUG_PRINT (" Push frame index: %td\n", fail_stack.frame); \
1080 PUSH_FAILURE_INT (fail_stack.frame); \
1081 \
1082 DEBUG_PRINT (" Push string %p: \"", string_place); \
1083 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1084 DEBUG_PRINT ("\"\n"); \
1085 PUSH_FAILURE_POINTER (string_place); \
1086 \
1087 DEBUG_PRINT (" Push pattern %p: ", pattern); \
1088 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend); \
1089 PUSH_FAILURE_POINTER (pattern); \
1090 \
1091 \
1092 fail_stack.frame = fail_stack.avail; \
1093 } while (false)
1094
1095
1096
1097
1098
1099 #define TYPICAL_FAILURE_SIZE 20
1100
1101
1102 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1103
1104
1105
1106
1107
1108
1109
1110
1111
1112
1113
1114
1115 #define POP_FAILURE_POINT(str, pat) \
1116 do { \
1117 eassert (!FAIL_STACK_EMPTY ()); \
1118 \
1119 \
1120 DEBUG_PRINT ("POP_FAILURE_POINT:\n"); \
1121 DEBUG_PRINT (" Before pop, next avail: %td\n", fail_stack.avail); \
1122 DEBUG_PRINT (" size: %td\n", fail_stack.size); \
1123 \
1124 \
1125 while (fail_stack.frame < fail_stack.avail) \
1126 POP_FAILURE_REG_OR_COUNT (); \
1127 \
1128 pat = POP_FAILURE_POINTER (); \
1129 DEBUG_PRINT (" Popping pattern %p: ", pat); \
1130 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
1131 \
1132
1133
1134 \
1135 str = POP_FAILURE_POINTER (); \
1136 DEBUG_PRINT (" Popping string %p: \"", str); \
1137 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
1138 DEBUG_PRINT ("\"\n"); \
1139 \
1140 fail_stack.frame = POP_FAILURE_INT (); \
1141 DEBUG_PRINT (" Popping frame index: %td\n", fail_stack.frame); \
1142 \
1143 eassert (fail_stack.avail >= 0); \
1144 eassert (fail_stack.frame <= fail_stack.avail); \
1145 \
1146 DEBUG_STATEMENT (nfailure_points_popped++); \
1147 } while (false)
1148
1149
1150
1151
1152 #define REG_UNSET(e) ((e) == NULL)
1153
1154
1155
1156 static reg_errcode_t regex_compile (re_char *pattern, ptrdiff_t size,
1157 bool posix_backtracking,
1158 const char *whitespace_regexp,
1159 struct re_pattern_buffer *bufp);
1160 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg);
1161 static void store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
1162 static void insert_op1 (re_opcode_t op, unsigned char *loc,
1163 int arg, unsigned char *end);
1164 static void insert_op2 (re_opcode_t op, unsigned char *loc,
1165 int arg1, int arg2, unsigned char *end);
1166 static bool at_begline_loc_p (re_char *pattern, re_char *p);
1167 static bool at_endline_loc_p (re_char *p, re_char *pend);
1168 static re_char *skip_one_char (re_char *p);
1169 static int analyze_first (re_char *p, re_char *pend,
1170 char *fastmap, bool multibyte);
1171
1172
1173
1174 #define PATFETCH(c) \
1175 do { \
1176 int len; \
1177 if (p == pend) return REG_EEND; \
1178 c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte); \
1179 p += len; \
1180 } while (false)
1181
1182
1183 #define RE_TRANSLATE(TBL, C) char_table_translate (TBL, C)
1184 #define TRANSLATE(d) (!NILP (translate) ? RE_TRANSLATE (translate, d) : (d))
1185
1186
1187
1188
1189 #define INIT_BUF_SIZE 32
1190
1191
1192 #define GET_BUFFER_SPACE(n) \
1193 if (bufp->buffer + bufp->allocated - b < (n)) \
1194 EXTEND_BUFFER ((n) - (bufp->buffer + bufp->allocated - b))
1195
1196
1197 #define BUF_PUSH(c) \
1198 do { \
1199 GET_BUFFER_SPACE (1); \
1200 *b++ = (unsigned char) (c); \
1201 } while (false)
1202
1203
1204
1205 #define BUF_PUSH_2(c1, c2) \
1206 do { \
1207 GET_BUFFER_SPACE (2); \
1208 *b++ = (unsigned char) (c1); \
1209 *b++ = (unsigned char) (c2); \
1210 } while (false)
1211
1212
1213
1214
1215 #define STORE_JUMP(op, loc, to) \
1216 store_op1 (op, loc, (to) - (loc) - 3)
1217
1218
1219 #define STORE_JUMP2(op, loc, to, arg) \
1220 store_op2 (op, loc, (to) - (loc) - 3, arg)
1221
1222
1223 #define INSERT_JUMP(op, loc, to) \
1224 insert_op1 (op, loc, (to) - (loc) - 3, b)
1225
1226
1227 #define INSERT_JUMP2(op, loc, to, arg) \
1228 insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1229
1230
1231
1232
1233
1234 # define MAX_BUF_SIZE (1 << 15)
1235
1236
1237
1238
1239
1240 #define EXTEND_BUFFER(n) \
1241 do { \
1242 ptrdiff_t requested_extension = n; \
1243 unsigned char *old_buffer = bufp->buffer; \
1244 if (MAX_BUF_SIZE - bufp->allocated < requested_extension) \
1245 return REG_ESIZE; \
1246 ptrdiff_t b_off = b - old_buffer; \
1247 ptrdiff_t begalt_off = begalt - old_buffer; \
1248 ptrdiff_t fixup_alt_jump_off = \
1249 fixup_alt_jump ? fixup_alt_jump - old_buffer : -1; \
1250 ptrdiff_t laststart_off = laststart ? laststart - old_buffer : -1; \
1251 ptrdiff_t pending_exact_off = \
1252 pending_exact ? pending_exact - old_buffer : -1; \
1253 bufp->buffer = xpalloc (bufp->buffer, &bufp->allocated, \
1254 requested_extension, MAX_BUF_SIZE, 1); \
1255 unsigned char *new_buffer = bufp->buffer; \
1256 b = new_buffer + b_off; \
1257 begalt = new_buffer + begalt_off; \
1258 if (0 <= fixup_alt_jump_off) \
1259 fixup_alt_jump = new_buffer + fixup_alt_jump_off; \
1260 if (0 <= laststart_off) \
1261 laststart = new_buffer + laststart_off; \
1262 if (0 <= pending_exact_off) \
1263 pending_exact = new_buffer + pending_exact_off; \
1264 } while (false)
1265
1266
1267
1268
1269
1270 #define MAX_REGNUM 255
1271
1272
1273
1274 typedef int regnum_t;
1275
1276
1277
1278
1279 typedef long pattern_offset_t;
1280 verify (LONG_MIN <= -(MAX_BUF_SIZE - 1) && MAX_BUF_SIZE - 1 <= LONG_MAX);
1281
1282 typedef struct
1283 {
1284 pattern_offset_t begalt_offset;
1285 pattern_offset_t fixup_alt_jump;
1286 pattern_offset_t laststart_offset;
1287 regnum_t regnum;
1288 } compile_stack_elt_t;
1289
1290
1291 typedef struct
1292 {
1293 compile_stack_elt_t *stack;
1294 ptrdiff_t size;
1295 ptrdiff_t avail;
1296 } compile_stack_type;
1297
1298
1299 #define INIT_COMPILE_STACK_SIZE 32
1300
1301 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
1302 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
1303
1304
1305 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1306
1307
1308 struct range_table_work_area
1309 {
1310 int *table;
1311 int allocated;
1312 int used;
1313 int bits;
1314 };
1315
1316
1317
1318
1319 #define EXTEND_RANGE_TABLE(work_area, n) \
1320 do { \
1321 if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
1322 { \
1323 extend_range_table_work_area (&work_area); \
1324 if ((work_area).table == 0) \
1325 return (REG_ESPACE); \
1326 } \
1327 } while (false)
1328
1329 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit) \
1330 (work_area).bits |= (bit)
1331
1332
1333 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end) \
1334 do { \
1335 EXTEND_RANGE_TABLE ((work_area), 2); \
1336 (work_area).table[(work_area).used++] = (range_start); \
1337 (work_area).table[(work_area).used++] = (range_end); \
1338 } while (false)
1339
1340
1341 #define FREE_RANGE_TABLE_WORK_AREA(work_area) \
1342 do { \
1343 if ((work_area).table) \
1344 xfree ((work_area).table); \
1345 } while (false)
1346
1347 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) \
1348 ((work_area).used = 0, (work_area).bits = 0)
1349 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1350 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
1351 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1352
1353
1354
1355
1356 #define BIT_WORD 0x1
1357 #define BIT_LOWER 0x2
1358 #define BIT_PUNCT 0x4
1359 #define BIT_SPACE 0x8
1360 #define BIT_UPPER 0x10
1361 #define BIT_MULTIBYTE 0x20
1362 #define BIT_ALPHA 0x40
1363 #define BIT_ALNUM 0x80
1364 #define BIT_GRAPH 0x100
1365 #define BIT_PRINT 0x200
1366 #define BIT_BLANK 0x400
1367
1368
1369
1370 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
1371
1372
1373
1374
1375
1376
1377
1378
1379
1380
1381
1382
1383
1384 #define SETUP_ASCII_RANGE(work_area, FROM, TO) \
1385 do { \
1386 int C0, C1; \
1387 \
1388 for (C0 = (FROM); C0 <= (TO); C0++) \
1389 { \
1390 C1 = TRANSLATE (C0); \
1391 if (! ASCII_CHAR_P (C1)) \
1392 { \
1393 SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
1394 if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0) \
1395 C1 = C0; \
1396 } \
1397 SET_LIST_BIT (C1); \
1398 } \
1399 } while (false)
1400
1401
1402
1403
1404 #define SETUP_UNIBYTE_RANGE(work_area, FROM, TO) \
1405 do { \
1406 int C0, C1, C2, I; \
1407 int USED = RANGE_TABLE_WORK_USED (work_area); \
1408 \
1409 for (C0 = (FROM); C0 <= (TO); C0++) \
1410 { \
1411 C1 = RE_CHAR_TO_MULTIBYTE (C0); \
1412 if (CHAR_BYTE8_P (C1)) \
1413 SET_LIST_BIT (C0); \
1414 else \
1415 { \
1416 C2 = TRANSLATE (C1); \
1417 if (C2 == C1 \
1418 || (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0) \
1419 C1 = C0; \
1420 SET_LIST_BIT (C1); \
1421 for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
1422 { \
1423 int from = RANGE_TABLE_WORK_ELT (work_area, I); \
1424 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1); \
1425 \
1426 if (C2 >= from - 1 && C2 <= to + 1) \
1427 { \
1428 if (C2 == from - 1) \
1429 RANGE_TABLE_WORK_ELT (work_area, I)--; \
1430 else if (C2 == to + 1) \
1431 RANGE_TABLE_WORK_ELT (work_area, I + 1)++; \
1432 break; \
1433 } \
1434 } \
1435 if (I < USED) \
1436 SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2); \
1437 } \
1438 } \
1439 } while (false)
1440
1441
1442
1443
1444 #define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO) \
1445 do { \
1446 int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area); \
1447 \
1448 SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO)); \
1449 for (C0 = (FROM); C0 <= (TO); C0++) \
1450 { \
1451 C1 = TRANSLATE (C0); \
1452 if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0 \
1453 || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0)) \
1454 SET_LIST_BIT (C2); \
1455 if (C1 >= (FROM) && C1 <= (TO)) \
1456 continue; \
1457 for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
1458 { \
1459 int from = RANGE_TABLE_WORK_ELT (work_area, I); \
1460 int to = RANGE_TABLE_WORK_ELT (work_area, I + 1); \
1461 \
1462 if (C1 >= from - 1 && C1 <= to + 1) \
1463 { \
1464 if (C1 == from - 1) \
1465 RANGE_TABLE_WORK_ELT (work_area, I)--; \
1466 else if (C1 == to + 1) \
1467 RANGE_TABLE_WORK_ELT (work_area, I + 1)++; \
1468 break; \
1469 } \
1470 } \
1471 if (I < USED) \
1472 SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1); \
1473 } \
1474 } while (false)
1475
1476
1477 #define GET_INTERVAL_COUNT(num) \
1478 do { \
1479 if (p == pend) \
1480 FREE_STACK_RETURN (REG_EBRACE); \
1481 else \
1482 { \
1483 PATFETCH (c); \
1484 while ('0' <= c && c <= '9') \
1485 { \
1486 if (num < 0) \
1487 num = 0; \
1488 if (RE_DUP_MAX / 10 - (RE_DUP_MAX % 10 < c - '0') < num) \
1489 FREE_STACK_RETURN (REG_ESIZEBR); \
1490 num = num * 10 + c - '0'; \
1491 if (p == pend) \
1492 FREE_STACK_RETURN (REG_EBRACE); \
1493 PATFETCH (c); \
1494 } \
1495 } \
1496 } while (false)
1497
1498
1499
1500
1501
1502
1503
1504
1505
1506
1507
1508
1509
1510
1511
1512 re_wctype_t
1513 re_wctype_parse (const unsigned char **strp, ptrdiff_t limit)
1514 {
1515 const char *beg = (const char *)*strp, *it;
1516
1517 if (limit < 4 || beg[0] != '[' || beg[1] != ':')
1518 return -1;
1519
1520 beg += 2;
1521 limit -= 3;
1522 for (it = beg; it[0] != ':' || it[1] != ']'; ++it)
1523 if (!--limit)
1524 return -1;
1525
1526 *strp = (const unsigned char *)(it + 2);
1527
1528
1529
1530
1531
1532
1533
1534
1535
1536
1537
1538
1539
1540
1541
1542
1543
1544
1545
1546
1547
1548
1549
1550
1551
1552
1553
1554
1555 switch (it - beg) {
1556 case 4:
1557 if (!memcmp (beg, "word", 4)) return RECC_WORD;
1558 break;
1559 case 5:
1560 if (!memcmp (beg, "alnum", 5)) return RECC_ALNUM;
1561 if (!memcmp (beg, "alpha", 5)) return RECC_ALPHA;
1562 if (!memcmp (beg, "space", 5)) return RECC_SPACE;
1563 if (!memcmp (beg, "digit", 5)) return RECC_DIGIT;
1564 if (!memcmp (beg, "blank", 5)) return RECC_BLANK;
1565 if (!memcmp (beg, "upper", 5)) return RECC_UPPER;
1566 if (!memcmp (beg, "lower", 5)) return RECC_LOWER;
1567 if (!memcmp (beg, "punct", 5)) return RECC_PUNCT;
1568 if (!memcmp (beg, "ascii", 5)) return RECC_ASCII;
1569 if (!memcmp (beg, "graph", 5)) return RECC_GRAPH;
1570 if (!memcmp (beg, "print", 5)) return RECC_PRINT;
1571 if (!memcmp (beg, "cntrl", 5)) return RECC_CNTRL;
1572 break;
1573 case 6:
1574 if (!memcmp (beg, "xdigit", 6)) return RECC_XDIGIT;
1575 break;
1576 case 7:
1577 if (!memcmp (beg, "unibyte", 7)) return RECC_UNIBYTE;
1578 break;
1579 case 8:
1580 if (!memcmp (beg, "nonascii", 8)) return RECC_NONASCII;
1581 break;
1582 case 9:
1583 if (!memcmp (beg, "multibyte", 9)) return RECC_MULTIBYTE;
1584 break;
1585 }
1586
1587 return RECC_ERROR;
1588 }
1589
1590
1591 bool
1592 re_iswctype (int ch, re_wctype_t cc)
1593 {
1594 switch (cc)
1595 {
1596 case RECC_ALNUM: return ISALNUM (ch) != 0;
1597 case RECC_ALPHA: return ISALPHA (ch) != 0;
1598 case RECC_BLANK: return ISBLANK (ch) != 0;
1599 case RECC_CNTRL: return ISCNTRL (ch) != 0;
1600 case RECC_DIGIT: return ISDIGIT (ch) != 0;
1601 case RECC_GRAPH: return ISGRAPH (ch) != 0;
1602 case RECC_LOWER: return ISLOWER (ch) != 0;
1603 case RECC_PRINT: return ISPRINT (ch) != 0;
1604 case RECC_PUNCT: return ISPUNCT (ch) != 0;
1605 case RECC_SPACE: return ISSPACE (ch) != 0;
1606 case RECC_UPPER: return ISUPPER (ch) != 0;
1607 case RECC_XDIGIT: return ISXDIGIT (ch) != 0;
1608 case RECC_ASCII: return IS_REAL_ASCII (ch) != 0;
1609 case RECC_NONASCII: return !IS_REAL_ASCII (ch);
1610 case RECC_UNIBYTE: return ISUNIBYTE (ch) != 0;
1611 case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
1612 case RECC_WORD: return ISWORD (ch) != 0;
1613 case RECC_ERROR: return false;
1614 default:
1615 abort ();
1616 }
1617 }
1618
1619
1620
1621 static int
1622 re_wctype_to_bit (re_wctype_t cc)
1623 {
1624 switch (cc)
1625 {
1626 case RECC_NONASCII:
1627 case RECC_MULTIBYTE: return BIT_MULTIBYTE;
1628 case RECC_ALPHA: return BIT_ALPHA;
1629 case RECC_ALNUM: return BIT_ALNUM;
1630 case RECC_WORD: return BIT_WORD;
1631 case RECC_LOWER: return BIT_LOWER;
1632 case RECC_UPPER: return BIT_UPPER;
1633 case RECC_PUNCT: return BIT_PUNCT;
1634 case RECC_SPACE: return BIT_SPACE;
1635 case RECC_GRAPH: return BIT_GRAPH;
1636 case RECC_PRINT: return BIT_PRINT;
1637 case RECC_BLANK: return BIT_BLANK;
1638 case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
1639 case RECC_UNIBYTE: case RECC_ERROR: return 0;
1640 default:
1641 abort ();
1642 }
1643 }
1644
1645
1646
1647
1648
1649 static void
1650 extend_range_table_work_area (struct range_table_work_area *work_area)
1651 {
1652 work_area->allocated += 16 * sizeof (int);
1653 work_area->table = xrealloc (work_area->table, work_area->allocated);
1654 }
1655
1656
1657
1658 static bool group_in_compile_stack (compile_stack_type, regnum_t);
1659
1660
1661
1662 #define FIXUP_ALT_JUMP() \
1663 do { \
1664 if (fixup_alt_jump) \
1665 STORE_JUMP (jump, fixup_alt_jump, b); \
1666 } while (false)
1667
1668
1669
1670 #define FREE_STACK_RETURN(value) \
1671 do { \
1672 FREE_RANGE_TABLE_WORK_AREA (range_table_work); \
1673 xfree (compile_stack.stack); \
1674 return value; \
1675 } while (false)
1676
1677
1678
1679
1680
1681
1682
1683
1684
1685
1686
1687
1688
1689
1690
1691
1692
1693
1694
1695
1696 static reg_errcode_t
1697 regex_compile (re_char *pattern, ptrdiff_t size,
1698 bool posix_backtracking,
1699 const char *whitespace_regexp,
1700 struct re_pattern_buffer *bufp)
1701 {
1702
1703 int c, c1;
1704
1705
1706 unsigned char *b;
1707
1708
1709 compile_stack_type compile_stack;
1710
1711
1712 re_char *p = pattern;
1713 re_char *pend = pattern + size;
1714
1715
1716 Lisp_Object translate = bufp->translate;
1717
1718
1719
1720
1721
1722 unsigned char *pending_exact = 0;
1723
1724
1725
1726
1727 unsigned char *laststart = 0;
1728
1729
1730 unsigned char *begalt;
1731
1732
1733
1734 re_char *beg_interval;
1735
1736
1737
1738
1739 unsigned char *fixup_alt_jump = 0;
1740
1741
1742 struct range_table_work_area range_table_work;
1743
1744
1745 bool multibyte = RE_MULTIBYTE_P (bufp);
1746
1747
1748 int in_subpattern = 0;
1749
1750
1751
1752 re_char *main_p;
1753 re_char *main_pattern;
1754 re_char *main_pend;
1755
1756 #ifdef REGEX_EMACS_DEBUG
1757 regex_emacs_debug++;
1758 DEBUG_PRINT ("\nCompiling pattern: ");
1759 if (regex_emacs_debug > 0)
1760 {
1761 for (ptrdiff_t debug_count = 0; debug_count < size; debug_count++)
1762 debug_putchar (pattern[debug_count]);
1763 putc ('\n', stderr);
1764 }
1765 #endif
1766
1767
1768 compile_stack.stack = xmalloc (INIT_COMPILE_STACK_SIZE
1769 * sizeof *compile_stack.stack);
1770 __lsan_ignore_object (compile_stack.stack);
1771 compile_stack.size = INIT_COMPILE_STACK_SIZE;
1772 compile_stack.avail = 0;
1773
1774 range_table_work.table = 0;
1775 range_table_work.allocated = 0;
1776
1777
1778 bufp->fastmap_accurate = false;
1779 bufp->used_syntax = false;
1780
1781
1782
1783
1784 bufp->used = 0;
1785
1786 bufp->re_nsub = 0;
1787
1788 if (bufp->allocated == 0)
1789 {
1790
1791
1792 bufp->buffer = xrealloc (bufp->buffer, INIT_BUF_SIZE);
1793 bufp->allocated = INIT_BUF_SIZE;
1794 }
1795
1796 begalt = b = bufp->buffer;
1797
1798
1799 while (1)
1800 {
1801 if (p == pend)
1802 {
1803
1804
1805 if (in_subpattern)
1806 {
1807 in_subpattern = 0;
1808 pattern = main_pattern;
1809 p = main_p;
1810 pend = main_pend;
1811 continue;
1812 }
1813
1814 break;
1815 }
1816
1817 PATFETCH (c);
1818
1819 switch (c)
1820 {
1821 case ' ':
1822 {
1823 re_char *p1 = p;
1824
1825
1826
1827 if (!whitespace_regexp || in_subpattern)
1828 goto normal_char;
1829
1830
1831 while (p1 != pend)
1832 {
1833 if (*p1 != ' ')
1834 break;
1835 p1++;
1836 }
1837
1838
1839 if (p1 != pend
1840 && (*p1 == '*' || *p1 == '+' || *p1 == '?'
1841 || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
1842 goto normal_char;
1843
1844
1845 in_subpattern = 1;
1846 main_p = p1;
1847 main_pend = pend;
1848 main_pattern = pattern;
1849 p = pattern = (re_char *) whitespace_regexp;
1850 pend = p + strlen (whitespace_regexp);
1851 break;
1852 }
1853
1854 case '^':
1855 if (! (p == pattern + 1 || at_begline_loc_p (pattern, p)))
1856 goto normal_char;
1857 BUF_PUSH (begline);
1858 break;
1859
1860 case '$':
1861 if (! (p == pend || at_endline_loc_p (p, pend)))
1862 goto normal_char;
1863 BUF_PUSH (endline);
1864 break;
1865
1866
1867 case '+':
1868 case '?':
1869 case '*':
1870
1871 if (!laststart)
1872 goto normal_char;
1873
1874 {
1875
1876 bool zero_times_ok = false, many_times_ok = false;
1877 bool greedy = true;
1878
1879
1880
1881
1882
1883
1884 for (;;)
1885 {
1886 if (c == '?' && (zero_times_ok || many_times_ok))
1887 greedy = false;
1888 else
1889 {
1890 zero_times_ok |= c != '+';
1891 many_times_ok |= c != '?';
1892 }
1893
1894 if (! (p < pend && (*p == '*' || *p == '+' || *p == '?')))
1895 break;
1896
1897 c = *p++;
1898 }
1899
1900
1901
1902 if (!laststart || laststart == b)
1903 break;
1904
1905
1906
1907 if (greedy)
1908 {
1909 if (many_times_ok)
1910 {
1911 bool simple = skip_one_char (laststart) == b;
1912 ptrdiff_t startoffset = 0;
1913 re_opcode_t ofj =
1914
1915 (simple || !analyze_first (laststart, b, NULL, false))
1916 ? on_failure_jump : on_failure_jump_loop;
1917 eassert (skip_one_char (laststart) <= b);
1918
1919 if (!zero_times_ok && simple)
1920 {
1921
1922
1923 unsigned char *p1, *p2;
1924 startoffset = b - laststart;
1925 GET_BUFFER_SPACE (startoffset);
1926 p1 = b; p2 = laststart;
1927 while (p2 < p1)
1928 *b++ = *p2++;
1929 zero_times_ok = 1;
1930 }
1931
1932 GET_BUFFER_SPACE (6);
1933 if (!zero_times_ok)
1934
1935 STORE_JUMP (ofj, b, b + 6);
1936 else
1937
1938
1939
1940
1941 INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
1942 laststart + startoffset, b + 6);
1943 b += 3;
1944 STORE_JUMP (jump, b, laststart + startoffset);
1945 b += 3;
1946 }
1947 else
1948 {
1949
1950 eassert (zero_times_ok);
1951 GET_BUFFER_SPACE (3);
1952 INSERT_JUMP (on_failure_jump, laststart, b + 3);
1953 b += 3;
1954 }
1955 }
1956 else
1957 {
1958
1959 GET_BUFFER_SPACE (7);
1960 if (many_times_ok)
1961 {
1962 bool emptyp = !!analyze_first (laststart, b, NULL, false);
1963
1964
1965
1966
1967 if (emptyp) BUF_PUSH (no_op);
1968 STORE_JUMP (emptyp ? on_failure_jump_nastyloop
1969 : on_failure_jump, b, laststart);
1970 b += 3;
1971 if (zero_times_ok)
1972 {
1973
1974
1975
1976 INSERT_JUMP (jump, laststart, b);
1977 b += 3;
1978 }
1979 }
1980 else
1981 {
1982
1983 INSERT_JUMP (jump, laststart, b + 3);
1984 b += 3;
1985 INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
1986 b += 3;
1987 }
1988 }
1989 }
1990 pending_exact = 0;
1991 break;
1992
1993
1994 case '.':
1995 laststart = b;
1996 BUF_PUSH (anychar);
1997 break;
1998
1999
2000 case '[':
2001 {
2002 re_char *p1;
2003
2004 CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2005
2006 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2007
2008
2009
2010 GET_BUFFER_SPACE (34);
2011
2012 laststart = b;
2013
2014
2015
2016 BUF_PUSH (*p == '^' ? charset_not : charset);
2017 if (*p == '^')
2018 p++;
2019
2020
2021 p1 = p;
2022
2023
2024 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2025
2026
2027 memset (b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
2028
2029
2030 for (;;)
2031 {
2032 const unsigned char *p2 = p;
2033 int ch;
2034
2035 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2036
2037
2038
2039 re_wctype_t cc = re_wctype_parse (&p, pend - p);
2040 if (cc != -1)
2041 {
2042 if (cc == 0)
2043 FREE_STACK_RETURN (REG_ECTYPE);
2044
2045 if (p == pend)
2046 FREE_STACK_RETURN (REG_EBRACK);
2047
2048
2049
2050
2051
2052
2053
2054
2055
2056
2057
2058 SETUP_BUFFER_SYNTAX_TABLE ();
2059
2060 for (c = 0; c < 0x80; ++c)
2061 if (re_iswctype (c, cc))
2062 {
2063 SET_LIST_BIT (c);
2064 c1 = TRANSLATE (c);
2065 if (c1 == c)
2066 continue;
2067 if (ASCII_CHAR_P (c1))
2068 SET_LIST_BIT (c1);
2069 else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
2070 SET_LIST_BIT (c1);
2071 }
2072 SET_RANGE_TABLE_WORK_AREA_BIT
2073 (range_table_work, re_wctype_to_bit (cc));
2074
2075
2076
2077
2078
2079 if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
2080 bufp->used_syntax = true;
2081
2082
2083 continue;
2084 }
2085
2086
2087
2088
2089
2090 PATFETCH (c);
2091
2092
2093
2094
2095 if (c == ']' && p2 != p1)
2096 break;
2097
2098 if (p < pend && p[0] == '-' && p[1] != ']')
2099 {
2100
2101
2102 PATFETCH (c1);
2103
2104
2105 PATFETCH (c1);
2106
2107 if (CHAR_BYTE8_P (c1)
2108 && ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
2109
2110
2111 c = c1 + 1;
2112 }
2113 else
2114
2115 c1 = c;
2116
2117 if (c <= c1)
2118 {
2119 if (c < 128)
2120 {
2121 ch = min (127, c1);
2122 SETUP_ASCII_RANGE (range_table_work, c, ch);
2123 c = ch + 1;
2124 if (CHAR_BYTE8_P (c1))
2125 c = BYTE8_TO_CHAR (128);
2126 }
2127 if (c <= c1)
2128 {
2129 if (CHAR_BYTE8_P (c))
2130 {
2131 c = CHAR_TO_BYTE8 (c);
2132 c1 = CHAR_TO_BYTE8 (c1);
2133 for (; c <= c1; c++)
2134 SET_LIST_BIT (c);
2135 }
2136 else if (multibyte)
2137 SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
2138 else
2139 SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
2140 }
2141 }
2142 }
2143
2144
2145
2146 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2147 b[-1]--;
2148 b += b[-1];
2149
2150
2151 if (RANGE_TABLE_WORK_USED (range_table_work)
2152 || RANGE_TABLE_WORK_BITS (range_table_work))
2153 {
2154 int i;
2155 int used = RANGE_TABLE_WORK_USED (range_table_work);
2156
2157
2158
2159
2160 GET_BUFFER_SPACE (4 + used * 3);
2161
2162
2163 laststart[1] |= 0x80;
2164
2165
2166 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
2167 *b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
2168
2169 STORE_NUMBER_AND_INCR (b, used / 2);
2170 for (i = 0; i < used; i++)
2171 STORE_CHARACTER_AND_INCR
2172 (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
2173 }
2174 }
2175 break;
2176
2177
2178 case '\\':
2179 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2180
2181
2182
2183
2184 PATFETCH (c);
2185
2186 switch (c)
2187 {
2188 case '(':
2189 {
2190 bool shy = false;
2191 regnum_t regnum = 0;
2192 if (p+1 < pend)
2193 {
2194
2195 if (*p == '?')
2196 {
2197 PATFETCH (c);
2198 while (!shy)
2199 {
2200 PATFETCH (c);
2201 switch (c)
2202 {
2203 case ':': shy = true; break;
2204 case '0':
2205
2206
2207 if (regnum == 0)
2208 FREE_STACK_RETURN (REG_BADPAT);
2209 FALLTHROUGH;
2210 case '1': case '2': case '3': case '4':
2211 case '5': case '6': case '7': case '8': case '9':
2212 if (INT_MULTIPLY_WRAPV (regnum, 10, ®num)
2213 || INT_ADD_WRAPV (regnum, c - '0',
2214 ®num))
2215 FREE_STACK_RETURN (REG_ESIZE);
2216 break;
2217 default:
2218
2219 FREE_STACK_RETURN (REG_BADPAT);
2220 }
2221 }
2222 }
2223 }
2224
2225 if (!shy)
2226 regnum = ++bufp->re_nsub;
2227 else if (regnum)
2228 {
2229 shy = false;
2230 if (regnum > bufp->re_nsub)
2231 bufp->re_nsub = regnum;
2232 else if (regnum > bufp->re_nsub
2233
2234
2235
2236
2237
2238 || group_in_compile_stack (compile_stack, regnum))
2239 FREE_STACK_RETURN (REG_BADPAT);
2240 }
2241 else
2242
2243 regnum = - bufp->re_nsub;
2244
2245 if (COMPILE_STACK_FULL)
2246 compile_stack.stack
2247 = xpalloc (compile_stack.stack, &compile_stack.size,
2248 1, -1, sizeof *compile_stack.stack);
2249
2250
2251
2252
2253
2254 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2255 COMPILE_STACK_TOP.fixup_alt_jump
2256 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2257 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2258 COMPILE_STACK_TOP.regnum = regnum;
2259
2260
2261
2262 if (regnum <= MAX_REGNUM && regnum > 0)
2263 BUF_PUSH_2 (start_memory, regnum);
2264
2265 compile_stack.avail++;
2266
2267 fixup_alt_jump = 0;
2268 laststart = 0;
2269 begalt = b;
2270
2271
2272
2273 pending_exact = 0;
2274 break;
2275 }
2276
2277 case ')':
2278 if (COMPILE_STACK_EMPTY)
2279 FREE_STACK_RETURN (REG_ERPAREN);
2280
2281 FIXUP_ALT_JUMP ();
2282
2283
2284 if (COMPILE_STACK_EMPTY)
2285 FREE_STACK_RETURN (REG_ERPAREN);
2286
2287
2288
2289 eassert (compile_stack.avail != 0);
2290 {
2291
2292
2293
2294 regnum_t regnum;
2295
2296 compile_stack.avail--;
2297 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2298 fixup_alt_jump
2299 = COMPILE_STACK_TOP.fixup_alt_jump
2300 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2301 : 0;
2302 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2303 regnum = COMPILE_STACK_TOP.regnum;
2304
2305
2306
2307 pending_exact = 0;
2308
2309
2310
2311 if (regnum <= MAX_REGNUM && regnum > 0)
2312 BUF_PUSH_2 (stop_memory, regnum);
2313 }
2314 break;
2315
2316
2317 case '|':
2318
2319
2320 GET_BUFFER_SPACE (3);
2321 INSERT_JUMP (on_failure_jump, begalt, b + 6);
2322 pending_exact = 0;
2323 b += 3;
2324
2325
2326
2327
2328
2329
2330
2331
2332
2333
2334
2335
2336
2337
2338
2339
2340
2341 FIXUP_ALT_JUMP ();
2342
2343
2344
2345
2346 fixup_alt_jump = b;
2347 GET_BUFFER_SPACE (3);
2348 b += 3;
2349
2350 laststart = 0;
2351 begalt = b;
2352 break;
2353
2354
2355 case '{':
2356 {
2357
2358 int lower_bound = 0, upper_bound = -1;
2359
2360 beg_interval = p;
2361
2362 GET_INTERVAL_COUNT (lower_bound);
2363
2364 if (c == ',')
2365 GET_INTERVAL_COUNT (upper_bound);
2366 else
2367
2368 upper_bound = lower_bound;
2369
2370 if (lower_bound < 0
2371 || (0 <= upper_bound && upper_bound < lower_bound)
2372 || c != '\\')
2373 FREE_STACK_RETURN (REG_BADBR);
2374 if (p == pend)
2375 FREE_STACK_RETURN (REG_EESCAPE);
2376 if (*p++ != '}')
2377 FREE_STACK_RETURN (REG_BADBR);
2378
2379
2380
2381
2382 if (!laststart)
2383 goto unfetch_interval;
2384
2385 if (upper_bound == 0)
2386
2387
2388 b = laststart;
2389 else if (lower_bound == 1 && upper_bound == 1)
2390
2391 ;
2392
2393
2394
2395
2396
2397
2398
2399
2400
2401
2402 else
2403 {
2404
2405 int nbytes = upper_bound < 0 ? 3 : upper_bound > 1 ? 5 : 0;
2406 int startoffset = 0;
2407
2408 GET_BUFFER_SPACE (20);
2409
2410 if (lower_bound == 0)
2411 {
2412
2413
2414 INSERT_JUMP (on_failure_jump_loop, laststart,
2415 b + 3 + nbytes);
2416 b += 3;
2417 }
2418 else
2419 {
2420
2421
2422
2423
2424
2425 INSERT_JUMP2 (succeed_n, laststart,
2426 b + 5 + nbytes,
2427 lower_bound);
2428 b += 5;
2429
2430
2431
2432
2433
2434 insert_op2 (set_number_at, laststart, 5,
2435 lower_bound, b);
2436 b += 5;
2437 startoffset += 5;
2438 }
2439
2440 if (upper_bound < 0)
2441 {
2442
2443
2444 STORE_JUMP (jump, b, laststart + startoffset);
2445 b += 3;
2446 }
2447 else if (upper_bound > 1)
2448 {
2449
2450
2451
2452
2453
2454
2455 STORE_JUMP2 (jump_n, b, laststart + startoffset,
2456 upper_bound - 1);
2457 b += 5;
2458
2459
2460
2461
2462
2463
2464
2465
2466
2467
2468
2469
2470
2471
2472
2473 insert_op2 (set_number_at, laststart, b - laststart,
2474 upper_bound - 1, b);
2475 b += 5;
2476 }
2477 }
2478 pending_exact = 0;
2479 beg_interval = NULL;
2480 }
2481 break;
2482
2483 unfetch_interval:
2484
2485 eassert (beg_interval);
2486 p = beg_interval;
2487 beg_interval = NULL;
2488 eassert (p > pattern && p[-1] == '\\');
2489 c = '{';
2490 goto normal_char;
2491
2492 case '=':
2493 laststart = b;
2494 BUF_PUSH (at_dot);
2495 break;
2496
2497 case 's':
2498 laststart = b;
2499 PATFETCH (c);
2500 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2501 break;
2502
2503 case 'S':
2504 laststart = b;
2505 PATFETCH (c);
2506 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2507 break;
2508
2509 case 'c':
2510 laststart = b;
2511 PATFETCH (c);
2512 BUF_PUSH_2 (categoryspec, c);
2513 break;
2514
2515 case 'C':
2516 laststart = b;
2517 PATFETCH (c);
2518 BUF_PUSH_2 (notcategoryspec, c);
2519 break;
2520
2521 case 'w':
2522 laststart = b;
2523 BUF_PUSH_2 (syntaxspec, Sword);
2524 break;
2525
2526
2527 case 'W':
2528 laststart = b;
2529 BUF_PUSH_2 (notsyntaxspec, Sword);
2530 break;
2531
2532
2533 case '<':
2534 laststart = b;
2535 BUF_PUSH (wordbeg);
2536 break;
2537
2538 case '>':
2539 laststart = b;
2540 BUF_PUSH (wordend);
2541 break;
2542
2543 case '_':
2544 laststart = b;
2545 PATFETCH (c);
2546 if (c == '<')
2547 BUF_PUSH (symbeg);
2548 else if (c == '>')
2549 BUF_PUSH (symend);
2550 else
2551 FREE_STACK_RETURN (REG_BADPAT);
2552 break;
2553
2554 case 'b':
2555 BUF_PUSH (wordbound);
2556 break;
2557
2558 case 'B':
2559 BUF_PUSH (notwordbound);
2560 break;
2561
2562 case '`':
2563 BUF_PUSH (begbuf);
2564 break;
2565
2566 case '\'':
2567 BUF_PUSH (endbuf);
2568 break;
2569
2570 case '1': case '2': case '3': case '4': case '5':
2571 case '6': case '7': case '8': case '9':
2572 {
2573 regnum_t reg = c - '0';
2574
2575 if (reg > bufp->re_nsub || reg < 1
2576
2577 || group_in_compile_stack (compile_stack, reg))
2578 FREE_STACK_RETURN (REG_ESUBREG);
2579
2580 laststart = b;
2581 BUF_PUSH_2 (duplicate, reg);
2582 }
2583 break;
2584
2585 default:
2586
2587
2588
2589 goto normal_char;
2590 }
2591 break;
2592
2593
2594 default:
2595
2596 normal_char:
2597
2598 if (!pending_exact
2599
2600
2601 || pending_exact + *pending_exact + 1 != b
2602
2603
2604 || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
2605
2606
2607 || (p != pend
2608 && (*p == '*' || *p == '+' || *p == '?' || *p == '^'))
2609 || (p + 1 < pend && p[0] == '\\' && p[1] == '{'))
2610 {
2611
2612
2613 laststart = b;
2614
2615 BUF_PUSH_2 (exactn, 0);
2616 pending_exact = b - 1;
2617 }
2618
2619 GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
2620 {
2621 int len;
2622
2623 if (multibyte)
2624 {
2625 c = TRANSLATE (c);
2626 len = CHAR_STRING (c, b);
2627 b += len;
2628 }
2629 else
2630 {
2631 c1 = RE_CHAR_TO_MULTIBYTE (c);
2632 if (! CHAR_BYTE8_P (c1))
2633 {
2634 int c2 = TRANSLATE (c1);
2635
2636 if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
2637 c = c1;
2638 }
2639 *b++ = c;
2640 len = 1;
2641 }
2642 (*pending_exact) += len;
2643 }
2644
2645 break;
2646 }
2647 }
2648
2649
2650
2651
2652 FIXUP_ALT_JUMP ();
2653
2654 if (!COMPILE_STACK_EMPTY)
2655 FREE_STACK_RETURN (REG_EPAREN);
2656
2657
2658
2659 if (!posix_backtracking)
2660 BUF_PUSH (succeed);
2661
2662
2663 bufp->used = b - bufp->buffer;
2664
2665 #ifdef REGEX_EMACS_DEBUG
2666 if (regex_emacs_debug > 0)
2667 {
2668 re_compile_fastmap (bufp);
2669 DEBUG_PRINT ("\nCompiled pattern:\n");
2670 print_compiled_pattern (bufp);
2671 }
2672 regex_emacs_debug--;
2673 #endif
2674
2675 FREE_STACK_RETURN (REG_NOERROR);
2676
2677 }
2678
2679
2680
2681
2682
2683 static void
2684 store_op1 (re_opcode_t op, unsigned char *loc, int arg)
2685 {
2686 *loc = (unsigned char) op;
2687 STORE_NUMBER (loc + 1, arg);
2688 }
2689
2690
2691
2692
2693 static void
2694 store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2)
2695 {
2696 *loc = (unsigned char) op;
2697 STORE_NUMBER (loc + 1, arg1);
2698 STORE_NUMBER (loc + 3, arg2);
2699 }
2700
2701
2702
2703
2704
2705 static void
2706 insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
2707 {
2708 register unsigned char *pfrom = end;
2709 register unsigned char *pto = end + 3;
2710
2711 while (pfrom != loc)
2712 *--pto = *--pfrom;
2713
2714 store_op1 (op, loc, arg);
2715 }
2716
2717
2718
2719
2720 static void
2721 insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2,
2722 unsigned char *end)
2723 {
2724 register unsigned char *pfrom = end;
2725 register unsigned char *pto = end + 5;
2726
2727 while (pfrom != loc)
2728 *--pto = *--pfrom;
2729
2730 store_op2 (op, loc, arg1, arg2);
2731 }
2732
2733
2734
2735
2736
2737
2738 static bool
2739 at_begline_loc_p (re_char *pattern, re_char *p)
2740 {
2741 re_char *prev = p - 2;
2742
2743 switch (*prev)
2744 {
2745 case '(':
2746 case '|':
2747 break;
2748
2749 case ':':
2750
2751 while (prev > pattern && '0' <= prev[-1] && prev[-1] <= '9')
2752 --prev;
2753
2754 if (! (prev > pattern + 1 && prev[-1] == '?' && prev[-2] == '('))
2755 return false;
2756 prev -= 2;
2757 break;
2758
2759 default:
2760 return false;
2761 }
2762
2763
2764 p = prev;
2765 while (prev > pattern && prev[-1] == '\\')
2766 --prev;
2767 return (p - prev) & 1;
2768 }
2769
2770
2771
2772
2773
2774 static bool
2775 at_endline_loc_p (re_char *p, re_char *pend)
2776 {
2777
2778 return *p == '\\' && p + 1 < pend && (p[1] == ')' || p[1] == '|');
2779 }
2780
2781
2782
2783
2784
2785 static bool
2786 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
2787 {
2788 ptrdiff_t this_element;
2789
2790 for (this_element = compile_stack.avail - 1;
2791 this_element >= 0;
2792 this_element--)
2793 if (compile_stack.stack[this_element].regnum == regnum)
2794 return true;
2795
2796 return false;
2797 }
2798
2799
2800
2801
2802
2803
2804
2805
2806
2807
2808
2809 static int
2810 analyze_first (re_char *p, re_char *pend, char *fastmap, bool multibyte)
2811 {
2812 int j, k;
2813 int nbits;
2814 bool not;
2815
2816
2817
2818 bool match_any_multibyte_characters = false;
2819
2820 eassert (p);
2821
2822
2823
2824
2825
2826
2827
2828
2829
2830
2831
2832
2833
2834
2835
2836
2837 while (p < pend)
2838 {
2839
2840
2841
2842
2843
2844
2845
2846
2847 re_char *p1 = p;
2848
2849 switch (*p++)
2850 {
2851 case succeed:
2852 return 1;
2853
2854 case duplicate:
2855
2856
2857
2858
2859 p++;
2860 continue;
2861
2862
2863
2864
2865
2866 case exactn:
2867 if (fastmap)
2868 {
2869
2870
2871
2872
2873 fastmap[p[1]] = 1;
2874 if (multibyte)
2875 {
2876
2877
2878 if (CHAR_BYTE8_HEAD_P (p[1]))
2879 fastmap[CHAR_TO_BYTE8 (STRING_CHAR (p + 1))] = 1;
2880 }
2881 else
2882 {
2883
2884
2885
2886 int c = RE_CHAR_TO_MULTIBYTE (p[1]);
2887
2888 fastmap[CHAR_LEADING_CODE (c)] = 1;
2889 }
2890 }
2891 break;
2892
2893
2894 case anychar:
2895
2896
2897 if (!fastmap) break;
2898 return -1;
2899
2900
2901 case charset_not:
2902 if (!fastmap) break;
2903 {
2904
2905 for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
2906 j < (1 << BYTEWIDTH); j++)
2907 fastmap[j] = 1;
2908 }
2909 FALLTHROUGH;
2910 case charset:
2911 if (!fastmap) break;
2912 not = (re_opcode_t) *(p - 1) == charset_not;
2913 nbits = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
2914 p++;
2915 for (j = 0; j < nbits; j++)
2916 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
2917 fastmap[j] = 1;
2918
2919
2920
2921 for (j = 0x80; j < nbits; j++)
2922 if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
2923 fastmap[CHAR_LEADING_CODE (BYTE8_TO_CHAR (j))] = 1;
2924
2925 if (
2926
2927 not
2928 ||
2929
2930
2931 (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
2932 && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
2933
2934 {
2935 if (match_any_multibyte_characters == false)
2936 {
2937 for (j = MIN_MULTIBYTE_LEADING_CODE;
2938 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
2939 fastmap[j] = 1;
2940 match_any_multibyte_characters = true;
2941 }
2942 }
2943
2944 else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
2945 && match_any_multibyte_characters == false)
2946 {
2947
2948
2949 int c, count;
2950 unsigned char lc1, lc2;
2951
2952
2953
2954 p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
2955
2956
2957 EXTRACT_NUMBER_AND_INCR (count, p);
2958 for (; count > 0; count--, p += 3)
2959 {
2960
2961 EXTRACT_CHARACTER (c, p);
2962 lc1 = CHAR_LEADING_CODE (c);
2963 p += 3;
2964 EXTRACT_CHARACTER (c, p);
2965 lc2 = CHAR_LEADING_CODE (c);
2966 for (j = lc1; j <= lc2; j++)
2967 fastmap[j] = 1;
2968 }
2969 }
2970 break;
2971
2972 case syntaxspec:
2973 case notsyntaxspec:
2974 if (!fastmap) break;
2975
2976
2977 return -1;
2978
2979 case categoryspec:
2980 case notcategoryspec:
2981 if (!fastmap) break;
2982 not = (re_opcode_t)p[-1] == notcategoryspec;
2983 k = *p++;
2984 for (j = (1 << BYTEWIDTH); j >= 0; j--)
2985 if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
2986 fastmap[j] = 1;
2987
2988
2989
2990 if (match_any_multibyte_characters == false)
2991 {
2992 for (j = MIN_MULTIBYTE_LEADING_CODE;
2993 j <= MAX_MULTIBYTE_LEADING_CODE; j++)
2994 fastmap[j] = 1;
2995 match_any_multibyte_characters = true;
2996 }
2997 break;
2998
2999
3000
3001
3002 case at_dot:
3003 case no_op:
3004 case begline:
3005 case endline:
3006 case begbuf:
3007 case endbuf:
3008 case wordbound:
3009 case notwordbound:
3010 case wordbeg:
3011 case wordend:
3012 case symbeg:
3013 case symend:
3014 continue;
3015
3016
3017 case jump:
3018 EXTRACT_NUMBER_AND_INCR (j, p);
3019 if (j < 0)
3020
3021
3022 break;
3023 p += j;
3024 switch (*p)
3025 {
3026 case on_failure_jump:
3027 case on_failure_keep_string_jump:
3028 case on_failure_jump_loop:
3029 case on_failure_jump_nastyloop:
3030 case on_failure_jump_smart:
3031 p++;
3032 break;
3033 default:
3034 continue;
3035 };
3036
3037
3038 FALLTHROUGH;
3039 case on_failure_jump:
3040 case on_failure_keep_string_jump:
3041 case on_failure_jump_nastyloop:
3042 case on_failure_jump_loop:
3043 case on_failure_jump_smart:
3044 EXTRACT_NUMBER_AND_INCR (j, p);
3045 if (p + j <= p1)
3046 ;
3047 else
3048 {
3049
3050
3051 int r = analyze_first (p, pend, fastmap, multibyte);
3052 if (r) return r;
3053 p += j;
3054 }
3055 continue;
3056
3057
3058 case jump_n:
3059
3060 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); eassert (j < 0));
3061 p += 4;
3062
3063
3064
3065 continue;
3066
3067 case succeed_n:
3068
3069 DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); eassert (j > 0));
3070 p += 4;
3071
3072
3073
3074 continue;
3075
3076
3077 case set_number_at:
3078 p += 4;
3079 continue;
3080
3081
3082 case start_memory:
3083 case stop_memory:
3084 p += 1;
3085 continue;
3086
3087
3088 default:
3089 abort ();
3090 }
3091
3092
3093
3094
3095 return 0;
3096 }
3097
3098
3099 return 1;
3100
3101 }
3102
3103
3104
3105
3106
3107
3108
3109
3110
3111
3112
3113
3114
3115
3116
3117
3118 static void
3119 re_compile_fastmap (struct re_pattern_buffer *bufp)
3120 {
3121 char *fastmap = bufp->fastmap;
3122 int analysis;
3123
3124 eassert (fastmap && bufp->buffer);
3125
3126 memset (fastmap, 0, 1 << BYTEWIDTH);
3127
3128
3129 bufp->fastmap_accurate = 1;
3130
3131 analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used,
3132 fastmap, RE_MULTIBYTE_P (bufp));
3133 bufp->can_be_null = (analysis != 0);
3134 }
3135
3136
3137
3138
3139
3140
3141
3142
3143
3144
3145
3146
3147
3148
3149 void
3150 re_set_registers (struct re_pattern_buffer *bufp, struct re_registers *regs,
3151 ptrdiff_t num_regs, ptrdiff_t *starts, ptrdiff_t *ends)
3152 {
3153 if (num_regs)
3154 {
3155 bufp->regs_allocated = REGS_REALLOCATE;
3156 regs->num_regs = num_regs;
3157 regs->start = starts;
3158 regs->end = ends;
3159 }
3160 else
3161 {
3162 bufp->regs_allocated = REGS_UNALLOCATED;
3163 regs->num_regs = 0;
3164 regs->start = regs->end = 0;
3165 }
3166 }
3167
3168
3169
3170
3171
3172
3173 ptrdiff_t
3174 re_search (struct re_pattern_buffer *bufp, const char *string, ptrdiff_t size,
3175 ptrdiff_t startpos, ptrdiff_t range, struct re_registers *regs)
3176 {
3177 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3178 regs, size);
3179 }
3180
3181
3182 #define POS_ADDR_VSTRING(POS) \
3183 (((POS) >= size1 ? string2 - size1 : string1) + (POS))
3184
3185
3186
3187
3188
3189
3190
3191
3192
3193
3194
3195
3196
3197
3198
3199
3200
3201
3202
3203
3204
3205
3206 ptrdiff_t
3207 re_search_2 (struct re_pattern_buffer *bufp, const char *str1, ptrdiff_t size1,
3208 const char *str2, ptrdiff_t size2,
3209 ptrdiff_t startpos, ptrdiff_t range,
3210 struct re_registers *regs, ptrdiff_t stop)
3211 {
3212 ptrdiff_t val;
3213 re_char *string1 = (re_char *) str1;
3214 re_char *string2 = (re_char *) str2;
3215 char *fastmap = bufp->fastmap;
3216 Lisp_Object translate = bufp->translate;
3217 ptrdiff_t total_size = size1 + size2;
3218 ptrdiff_t endpos = startpos + range;
3219 bool anchored_start;
3220
3221 bool multibyte = RE_TARGET_MULTIBYTE_P (bufp);
3222
3223
3224 if (startpos < 0 || startpos > total_size)
3225 return -1;
3226
3227
3228
3229
3230 if (endpos < 0)
3231 range = 0 - startpos;
3232 else if (endpos > total_size)
3233 range = total_size - startpos;
3234
3235
3236
3237 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3238 {
3239 if (startpos > 0)
3240 return -1;
3241 else
3242 range = 0;
3243 }
3244
3245
3246
3247 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3248 {
3249 range = PT_BYTE - BEGV_BYTE - startpos;
3250 if (range < 0)
3251 return -1;
3252 }
3253
3254
3255 if (fastmap && !bufp->fastmap_accurate)
3256 re_compile_fastmap (bufp);
3257
3258
3259 anchored_start = (bufp->buffer[0] == begline);
3260
3261 gl_state.object = re_match_object;
3262 {
3263 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
3264
3265 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
3266 }
3267
3268
3269 for (;;)
3270 {
3271
3272
3273
3274
3275 if (anchored_start && startpos > 0)
3276 {
3277 if (! ((startpos <= size1 ? string1[startpos - 1]
3278 : string2[startpos - size1 - 1])
3279 == '\n'))
3280 goto advance;
3281 }
3282
3283
3284
3285
3286
3287 if (fastmap && startpos < total_size && !bufp->can_be_null)
3288 {
3289 re_char *d;
3290 int buf_ch;
3291
3292 d = POS_ADDR_VSTRING (startpos);
3293
3294 if (range > 0)
3295 {
3296 ptrdiff_t irange = range, lim = 0;
3297
3298 if (startpos < size1 && startpos + range >= size1)
3299 lim = range - (size1 - startpos);
3300
3301
3302
3303 if (!NILP (translate))
3304 {
3305 if (multibyte)
3306 while (range > lim)
3307 {
3308 int buf_charlen;
3309
3310 buf_ch = string_char_and_length (d, &buf_charlen);
3311 buf_ch = RE_TRANSLATE (translate, buf_ch);
3312 if (fastmap[CHAR_LEADING_CODE (buf_ch)])
3313 break;
3314
3315 range -= buf_charlen;
3316 d += buf_charlen;
3317 }
3318 else
3319 while (range > lim)
3320 {
3321 buf_ch = *d;
3322 int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
3323 int translated = RE_TRANSLATE (translate, ch);
3324 if (translated != ch
3325 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
3326 buf_ch = ch;
3327 if (fastmap[buf_ch])
3328 break;
3329 d++;
3330 range--;
3331 }
3332 }
3333 else
3334 {
3335 if (multibyte)
3336 while (range > lim)
3337 {
3338 int buf_charlen;
3339
3340 buf_ch = string_char_and_length (d, &buf_charlen);
3341 if (fastmap[CHAR_LEADING_CODE (buf_ch)])
3342 break;
3343 range -= buf_charlen;
3344 d += buf_charlen;
3345 }
3346 else
3347 while (range > lim && !fastmap[*d])
3348 {
3349 d++;
3350 range--;
3351 }
3352 }
3353 startpos += irange - range;
3354 }
3355 else
3356 {
3357 if (multibyte)
3358 {
3359 buf_ch = STRING_CHAR (d);
3360 buf_ch = TRANSLATE (buf_ch);
3361 if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
3362 goto advance;
3363 }
3364 else
3365 {
3366 buf_ch = *d;
3367 int ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
3368 int translated = TRANSLATE (ch);
3369 if (translated != ch
3370 && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
3371 buf_ch = ch;
3372 if (! fastmap[TRANSLATE (buf_ch)])
3373 goto advance;
3374 }
3375 }
3376 }
3377
3378
3379 if (range >= 0 && startpos == total_size && fastmap
3380 && !bufp->can_be_null)
3381 return -1;
3382
3383 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3384 startpos, regs, stop);
3385
3386 if (val >= 0)
3387 return startpos;
3388
3389 if (val == -2)
3390 return -2;
3391
3392 advance:
3393 if (!range)
3394 break;
3395 else if (range > 0)
3396 {
3397
3398 if (multibyte)
3399 {
3400 re_char *p = POS_ADDR_VSTRING (startpos);
3401 int len = BYTES_BY_CHAR_HEAD (*p);
3402
3403 range -= len;
3404 if (range < 0)
3405 break;
3406 startpos += len;
3407 }
3408 else
3409 {
3410 range--;
3411 startpos++;
3412 }
3413 }
3414 else
3415 {
3416 range++;
3417 startpos--;
3418
3419
3420 if (multibyte)
3421 {
3422 re_char *p = POS_ADDR_VSTRING (startpos) + 1;
3423 int len = raw_prev_char_len (p);
3424
3425 range += len - 1;
3426 if (range > 0)
3427 break;
3428 startpos -= len - 1;
3429 }
3430 }
3431 }
3432 return -1;
3433 }
3434
3435
3436
3437 static bool bcmp_translate (re_char *, re_char *, ptrdiff_t,
3438 Lisp_Object, bool);
3439
3440
3441
3442 #define POINTER_TO_OFFSET(ptr) \
3443 (FIRST_STRING_P (ptr) \
3444 ? (ptr) - string1 \
3445 : (ptr) - string2 + (ptrdiff_t) size1)
3446
3447
3448
3449
3450
3451
3452 #define PREFETCH(reset) \
3453 while (d == dend) \
3454 { \
3455 \
3456 if (dend == end_match_2) \
3457 { \
3458 reset; \
3459 goto fail; \
3460 } \
3461 \
3462 d = string2; \
3463 dend = end_match_2; \
3464 }
3465
3466
3467
3468
3469
3470 #define PREFETCH_NOLIMIT() \
3471 if (d == end1) \
3472 { \
3473 d = string2; \
3474 dend = end_match_2; \
3475 }
3476
3477
3478
3479 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3480 #define AT_STRINGS_END(d) ((d) == end2)
3481
3482
3483
3484
3485
3486
3487
3488
3489
3490
3491
3492 #if 0
3493
3494
3495
3496
3497 #define WORDCHAR_P(d) \
3498 (SYNTAX ((d) == end1 ? *string2 \
3499 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
3500 == Sword)
3501
3502
3503
3504 #define AT_WORD_BOUNDARY(d) \
3505 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
3506 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3507 #endif
3508
3509
3510
3511
3512
3513
3514 static re_char *
3515 skip_one_char (re_char *p)
3516 {
3517 switch (*p++)
3518 {
3519 case anychar:
3520 break;
3521
3522 case exactn:
3523 p += *p + 1;
3524 break;
3525
3526 case charset_not:
3527 case charset:
3528 if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
3529 {
3530 int mcnt;
3531 p = CHARSET_RANGE_TABLE (p - 1);
3532 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3533 p = CHARSET_RANGE_TABLE_END (p, mcnt);
3534 }
3535 else
3536 p += 1 + CHARSET_BITMAP_SIZE (p - 1);
3537 break;
3538
3539 case syntaxspec:
3540 case notsyntaxspec:
3541 case categoryspec:
3542 case notcategoryspec:
3543 p++;
3544 break;
3545
3546 default:
3547 p = NULL;
3548 }
3549 return p;
3550 }
3551
3552
3553
3554 static re_char *
3555 skip_noops (re_char *p, re_char *pend)
3556 {
3557 int mcnt;
3558 while (p < pend)
3559 {
3560 switch (*p)
3561 {
3562 case start_memory:
3563 case stop_memory:
3564 p += 2; break;
3565 case no_op:
3566 p += 1; break;
3567 case jump:
3568 p += 1;
3569 EXTRACT_NUMBER_AND_INCR (mcnt, p);
3570 p += mcnt;
3571 break;
3572 default:
3573 return p;
3574 }
3575 }
3576 eassert (p == pend);
3577 return p;
3578 }
3579
3580
3581
3582
3583
3584
3585
3586 static bool
3587 execute_charset (re_char **pp, int c, int corig, bool unibyte,
3588 Lisp_Object canon_table)
3589 {
3590 eassume (0 <= c && 0 <= corig);
3591 re_char *p = *pp, *rtp = NULL;
3592 bool not = (re_opcode_t) *p == charset_not;
3593
3594 if (CHARSET_RANGE_TABLE_EXISTS_P (p))
3595 {
3596 int count;
3597 rtp = CHARSET_RANGE_TABLE (p);
3598 EXTRACT_NUMBER_AND_INCR (count, rtp);
3599 *pp = CHARSET_RANGE_TABLE_END ((rtp), (count));
3600 }
3601 else
3602 *pp += 2 + CHARSET_BITMAP_SIZE (p);
3603
3604 if (unibyte && c < (1 << BYTEWIDTH))
3605 {
3606
3607
3608 if (c < (unsigned) (CHARSET_BITMAP_SIZE (p) * BYTEWIDTH)
3609 && p[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3610 return !not;
3611 }
3612 else if (rtp)
3613 {
3614 int class_bits = CHARSET_RANGE_TABLE_BITS (p);
3615 int range_start, range_end;
3616
3617
3618
3619
3620
3621 if ((class_bits & BIT_MULTIBYTE) ||
3622 (class_bits & BIT_ALNUM && ISALNUM (c)) ||
3623 (class_bits & BIT_ALPHA && ISALPHA (c)) ||
3624 (class_bits & BIT_SPACE && ISSPACE (c)) ||
3625 (class_bits & BIT_BLANK && ISBLANK (c)) ||
3626 (class_bits & BIT_WORD && ISWORD (c)) ||
3627 ((class_bits & BIT_UPPER) &&
3628 (ISUPPER (corig) || (!NILP (canon_table) && ISLOWER (corig)))) ||
3629 ((class_bits & BIT_LOWER) &&
3630 (ISLOWER (corig) || (!NILP (canon_table) && ISUPPER (corig)))) ||
3631 (class_bits & BIT_PUNCT && ISPUNCT (c)) ||
3632 (class_bits & BIT_GRAPH && ISGRAPH (c)) ||
3633 (class_bits & BIT_PRINT && ISPRINT (c)))
3634 return !not;
3635
3636 for (p = *pp; rtp < p; rtp += 2 * 3)
3637 {
3638 EXTRACT_CHARACTER (range_start, rtp);
3639 EXTRACT_CHARACTER (range_end, rtp + 3);
3640 if (range_start <= c && c <= range_end)
3641 return !not;
3642 }
3643 }
3644
3645 return not;
3646 }
3647
3648
3649 static bool
3650 mutually_exclusive_p (struct re_pattern_buffer *bufp, re_char *p1,
3651 re_char *p2)
3652 {
3653 re_opcode_t op2;
3654 bool multibyte = RE_MULTIBYTE_P (bufp);
3655 unsigned char *pend = bufp->buffer + bufp->used;
3656 re_char *p2_orig = p2;
3657
3658 eassert (p1 >= bufp->buffer && p1 < pend
3659 && p2 >= bufp->buffer && p2 <= pend);
3660
3661
3662
3663
3664
3665 p2 = skip_noops (p2, pend);
3666
3667
3668
3669
3670 eassert (p1 >= bufp->buffer && p1 < pend
3671 && p2 >= bufp->buffer && p2 <= pend);
3672
3673 op2 = p2 == pend ? succeed : *p2;
3674
3675 switch (op2)
3676 {
3677 case succeed:
3678 case endbuf:
3679
3680 if (skip_one_char (p1))
3681 {
3682 DEBUG_PRINT (" End of pattern: fast loop.\n");
3683 return true;
3684 }
3685 break;
3686
3687 case endline:
3688 case exactn:
3689 {
3690 int c
3691 = (re_opcode_t) *p2 == endline ? '\n'
3692 : RE_STRING_CHAR (p2 + 2, multibyte);
3693
3694 if ((re_opcode_t) *p1 == exactn)
3695 {
3696 if (c != RE_STRING_CHAR (p1 + 2, multibyte))
3697 {
3698 DEBUG_PRINT (" '%c' != '%c' => fast loop.\n", c, p1[2]);
3699 return true;
3700 }
3701 }
3702
3703 else if ((re_opcode_t) *p1 == charset
3704 || (re_opcode_t) *p1 == charset_not)
3705 {
3706 if (!execute_charset (&p1, c, c, !multibyte || ASCII_CHAR_P (c),
3707 Qnil))
3708 {
3709 DEBUG_PRINT (" No match => fast loop.\n");
3710 return true;
3711 }
3712 }
3713 else if ((re_opcode_t) *p1 == anychar
3714 && c == '\n')
3715 {
3716 DEBUG_PRINT (" . != \\n => fast loop.\n");
3717 return true;
3718 }
3719 }
3720 break;
3721
3722 case charset:
3723 {
3724 if ((re_opcode_t) *p1 == exactn)
3725
3726 return mutually_exclusive_p (bufp, p2, p1);
3727
3728
3729
3730
3731 else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
3732 {
3733
3734
3735
3736
3737
3738
3739
3740
3741
3742
3743
3744 if ((re_opcode_t) *p1 == charset)
3745 {
3746 int idx;
3747
3748
3749 for (idx = 0;
3750 (idx < (int) p2[1]
3751 && idx < CHARSET_BITMAP_SIZE (p1));
3752 idx++)
3753 if ((p2[2 + idx] & p1[2 + idx]) != 0)
3754 break;
3755
3756 if (idx == p2[1]
3757 || idx == CHARSET_BITMAP_SIZE (p1))
3758 {
3759 DEBUG_PRINT (" No match => fast loop.\n");
3760 return true;
3761 }
3762 }
3763 else if ((re_opcode_t) *p1 == charset_not)
3764 {
3765 int idx;
3766
3767
3768 for (idx = 0; idx < (int) p2[1]; idx++)
3769 if (! (p2[2 + idx] == 0
3770 || (idx < CHARSET_BITMAP_SIZE (p1)
3771 && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
3772 break;
3773
3774 if (idx == p2[1])
3775 {
3776 DEBUG_PRINT (" No match => fast loop.\n");
3777 return true;
3778 }
3779 }
3780 }
3781 }
3782 break;
3783
3784 case charset_not:
3785 switch (*p1)
3786 {
3787 case exactn:
3788 case charset:
3789
3790 return mutually_exclusive_p (bufp, p2, p1);
3791 case charset_not:
3792
3793
3794
3795
3796 break;
3797 }
3798 break;
3799
3800 case wordend:
3801 return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
3802 case symend:
3803 return ((re_opcode_t) *p1 == syntaxspec
3804 && (p1[1] == Ssymbol || p1[1] == Sword));
3805 case notsyntaxspec:
3806 return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
3807
3808 case wordbeg:
3809 return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
3810 case symbeg:
3811 return ((re_opcode_t) *p1 == notsyntaxspec
3812 && (p1[1] == Ssymbol || p1[1] == Sword));
3813 case syntaxspec:
3814 return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
3815
3816 case wordbound:
3817 return (((re_opcode_t) *p1 == notsyntaxspec
3818 || (re_opcode_t) *p1 == syntaxspec)
3819 && p1[1] == Sword);
3820
3821 case categoryspec:
3822 return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
3823 case notcategoryspec:
3824 return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
3825
3826 case on_failure_jump_nastyloop:
3827 case on_failure_jump_smart:
3828 case on_failure_jump_loop:
3829 case on_failure_keep_string_jump:
3830 case on_failure_jump:
3831 {
3832 int mcnt;
3833 p2++;
3834 EXTRACT_NUMBER_AND_INCR (mcnt, p2);
3835
3836
3837 if (p2 + mcnt > p2_orig)
3838 return (mutually_exclusive_p (bufp, p1, p2)
3839 && mutually_exclusive_p (bufp, p1, p2 + mcnt));
3840 break;
3841 }
3842
3843 default:
3844 ;
3845 }
3846
3847
3848 return false;
3849 }
3850
3851
3852
3853
3854
3855
3856
3857
3858
3859
3860
3861
3862
3863
3864
3865
3866 ptrdiff_t
3867 re_match_2 (struct re_pattern_buffer *bufp,
3868 char const *string1, ptrdiff_t size1,
3869 char const *string2, ptrdiff_t size2,
3870 ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
3871 {
3872 ptrdiff_t result;
3873
3874 ptrdiff_t charpos;
3875 gl_state.object = re_match_object;
3876 charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
3877 SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
3878
3879 result = re_match_2_internal (bufp, (re_char *) string1, size1,
3880 (re_char *) string2, size2,
3881 pos, regs, stop);
3882 return result;
3883 }
3884
3885 static void
3886 unwind_re_match (void *ptr)
3887 {
3888 struct buffer *b = (struct buffer *) ptr;
3889 b->text->inhibit_shrinking = 0;
3890 }
3891
3892
3893
3894 static ptrdiff_t
3895 re_match_2_internal (struct re_pattern_buffer *bufp,
3896 re_char *string1, ptrdiff_t size1,
3897 re_char *string2, ptrdiff_t size2,
3898 ptrdiff_t pos, struct re_registers *regs, ptrdiff_t stop)
3899 {
3900 eassume (0 <= size1);
3901 eassume (0 <= size2);
3902 eassume (0 <= pos && pos <= stop && stop <= size1 + size2);
3903
3904
3905 int mcnt;
3906
3907
3908 re_char *end1, *end2;
3909
3910
3911
3912 re_char *end_match_1, *end_match_2;
3913
3914
3915 re_char *d, *dend;
3916
3917
3918
3919
3920
3921 re_char *dfail;
3922
3923
3924 re_char *p = bufp->buffer;
3925 re_char *pend = p + bufp->used;
3926
3927
3928 Lisp_Object translate = bufp->translate;
3929
3930
3931 bool multibyte = RE_MULTIBYTE_P (bufp);
3932
3933
3934 bool target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
3935
3936
3937
3938
3939
3940
3941
3942
3943 fail_stack_type fail_stack;
3944 #ifdef DEBUG_COMPILES_ARGUMENTS
3945 ptrdiff_t nfailure_points_pushed = 0, nfailure_points_popped = 0;
3946 #endif
3947
3948
3949
3950
3951 ptrdiff_t num_regs = bufp->re_nsub + 1;
3952 eassume (0 < num_regs);
3953
3954
3955
3956
3957
3958
3959
3960 re_char **regstart UNINIT, **regend UNINIT;
3961
3962
3963
3964
3965
3966 bool best_regs_set = false;
3967 re_char **best_regstart UNINIT, **best_regend UNINIT;
3968
3969
3970
3971
3972
3973
3974
3975
3976
3977 re_char *match_end = NULL;
3978
3979
3980 ptrdiff_t nchars = 0;
3981
3982 #ifdef DEBUG_COMPILES_ARGUMENTS
3983
3984 ptrdiff_t num_regs_pushed = 0;
3985 #endif
3986
3987 DEBUG_PRINT ("\nEntering re_match_2.\n");
3988
3989 REGEX_USE_SAFE_ALLOCA;
3990
3991 INIT_FAIL_STACK ();
3992
3993 specpdl_ref count = SPECPDL_INDEX ();
3994
3995
3996
3997
3998
3999
4000
4001
4002 if (!current_buffer->text->inhibit_shrinking)
4003 {
4004 record_unwind_protect_ptr (unwind_re_match, current_buffer);
4005 current_buffer->text->inhibit_shrinking = 1;
4006 }
4007
4008
4009
4010
4011
4012
4013 ptrdiff_t re_nsub = num_regs - 1;
4014 if (0 < re_nsub)
4015 {
4016 regstart = SAFE_ALLOCA ((re_nsub * 4 + 1) * sizeof *regstart);
4017 regend = regstart + num_regs;
4018 best_regstart = regend + re_nsub;
4019 best_regend = best_regstart + re_nsub;
4020
4021
4022
4023 for (re_char **apos = regstart + 1; apos < best_regstart + 1; apos++)
4024 *apos = NULL;
4025 }
4026
4027
4028
4029 if (size2 == 0 && string1 != NULL)
4030 {
4031 string2 = string1;
4032 size2 = size1;
4033 string1 = 0;
4034 size1 = 0;
4035 }
4036 end1 = string1 + size1;
4037 end2 = string2 + size2;
4038
4039
4040
4041
4042
4043
4044
4045 if (pos >= size1)
4046 {
4047
4048 d = string2 + pos - size1;
4049 dend = end_match_2 = string2 + stop - size1;
4050 end_match_1 = end1;
4051 }
4052 else
4053 {
4054 if (stop < size1)
4055 {
4056
4057 end_match_1 = string1 + stop;
4058
4059
4060
4061
4062
4063
4064
4065
4066 end_match_2 = end_match_1;
4067 }
4068 else
4069 {
4070
4071
4072 end_match_1 = end1;
4073 end_match_2 = string2 + stop - size1;
4074 }
4075 d = string1 + pos;
4076 dend = end_match_1;
4077 }
4078
4079 DEBUG_PRINT ("The compiled pattern is:\n");
4080 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4081 DEBUG_PRINT ("The string to match is: \"");
4082 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4083 DEBUG_PRINT ("\"\n");
4084
4085
4086
4087
4088 for (;;)
4089 {
4090 DEBUG_PRINT ("\n%p: ", p);
4091
4092 if (p == pend)
4093 {
4094
4095 DEBUG_PRINT ("end of pattern ... ");
4096
4097
4098
4099 if (d != end_match_2)
4100 {
4101
4102 bool best_match_p;
4103
4104 {
4105
4106
4107 bool same_str_p = (FIRST_STRING_P (match_end)
4108 == FIRST_STRING_P (d));
4109
4110
4111
4112 if (same_str_p)
4113 best_match_p = d > match_end;
4114 else
4115 best_match_p = !FIRST_STRING_P (d);
4116 }
4117
4118 DEBUG_PRINT ("backtracking.\n");
4119
4120 if (!FAIL_STACK_EMPTY ())
4121 {
4122
4123
4124 if (!best_regs_set || best_match_p)
4125 {
4126 best_regs_set = true;
4127 match_end = d;
4128
4129 DEBUG_PRINT ("\nSAVING match as best so far.\n");
4130
4131 for (ptrdiff_t reg = 1; reg < num_regs; reg++)
4132 {
4133 best_regstart[reg] = regstart[reg];
4134 best_regend[reg] = regend[reg];
4135 }
4136 }
4137 goto fail;
4138 }
4139
4140
4141
4142
4143 else if (best_regs_set && !best_match_p)
4144 {
4145 restore_best_regs:
4146
4147
4148
4149
4150
4151 DEBUG_PRINT ("Restoring best registers.\n");
4152
4153 d = match_end;
4154 dend = ((d >= string1 && d <= end1)
4155 ? end_match_1 : end_match_2);
4156
4157 for (ptrdiff_t reg = 1; reg < num_regs; reg++)
4158 {
4159 regstart[reg] = best_regstart[reg];
4160 regend[reg] = best_regend[reg];
4161 eassert (REG_UNSET (regstart[reg])
4162 <= REG_UNSET (regend[reg]));
4163 }
4164 }
4165 }
4166
4167 succeed_label:
4168 DEBUG_PRINT ("Accepting match.\n");
4169
4170
4171 if (regs)
4172 {
4173
4174 if (bufp->regs_allocated == REGS_UNALLOCATED)
4175 {
4176 ptrdiff_t n = max (RE_NREGS, num_regs);
4177 regs->start = xnmalloc (n, sizeof *regs->start);
4178 regs->end = xnmalloc (n, sizeof *regs->end);
4179 regs->num_regs = n;
4180 bufp->regs_allocated = REGS_REALLOCATE;
4181 }
4182 else if (bufp->regs_allocated == REGS_REALLOCATE)
4183 {
4184
4185
4186 ptrdiff_t n = regs->num_regs;
4187 if (n < num_regs)
4188 {
4189 n = max (n + (n >> 1), num_regs);
4190 regs->start
4191 = xnrealloc (regs->start, n, sizeof *regs->start);
4192 regs->end = xnrealloc (regs->end, n, sizeof *regs->end);
4193 regs->num_regs = n;
4194 }
4195 }
4196 else
4197 eassert (bufp->regs_allocated == REGS_FIXED);
4198
4199
4200
4201
4202 if (regs->num_regs > 0)
4203 {
4204 regs->start[0] = pos;
4205 regs->end[0] = POINTER_TO_OFFSET (d);
4206 }
4207
4208 for (ptrdiff_t reg = 1; reg < num_regs; reg++)
4209 {
4210 eassert (REG_UNSET (regstart[reg])
4211 <= REG_UNSET (regend[reg]));
4212 if (REG_UNSET (regend[reg]))
4213 regs->start[reg] = regs->end[reg] = -1;
4214 else
4215 {
4216 regs->start[reg] = POINTER_TO_OFFSET (regstart[reg]);
4217 regs->end[reg] = POINTER_TO_OFFSET (regend[reg]);
4218 }
4219 }
4220
4221
4222
4223 for (ptrdiff_t reg = num_regs; reg < regs->num_regs; reg++)
4224 regs->start[reg] = regs->end[reg] = -1;
4225 }
4226
4227 DEBUG_PRINT ("%td failure points pushed, %td popped (%td remain).\n",
4228 nfailure_points_pushed, nfailure_points_popped,
4229 nfailure_points_pushed - nfailure_points_popped);
4230 DEBUG_PRINT ("%td registers pushed.\n", num_regs_pushed);
4231
4232 ptrdiff_t dcnt = POINTER_TO_OFFSET (d) - pos;
4233
4234 DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
4235
4236 unbind_to (count, Qnil);
4237 SAFE_FREE ();
4238
4239
4240
4241
4242 if (max_redisplay_ticks > 0 && nchars > 0)
4243 update_redisplay_ticks (nchars / 50 + 1, NULL);
4244 return dcnt;
4245 }
4246
4247
4248 switch (*p++)
4249 {
4250
4251
4252 case no_op:
4253 DEBUG_PRINT ("EXECUTING no_op.\n");
4254 break;
4255
4256 case succeed:
4257 DEBUG_PRINT ("EXECUTING succeed.\n");
4258 goto succeed_label;
4259
4260
4261
4262
4263 case exactn:
4264 mcnt = *p++;
4265 DEBUG_PRINT ("EXECUTING exactn %d.\n", mcnt);
4266
4267
4268 dfail = d;
4269
4270
4271 if (target_multibyte)
4272 do
4273 {
4274 int pat_charlen, buf_charlen;
4275 int pat_ch, buf_ch;
4276
4277 PREFETCH (d = dfail);
4278 if (multibyte)
4279 pat_ch = string_char_and_length (p, &pat_charlen);
4280 else
4281 {
4282 pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
4283 pat_charlen = 1;
4284 }
4285 buf_ch = string_char_and_length (d, &buf_charlen);
4286
4287 if (TRANSLATE (buf_ch) != pat_ch)
4288 {
4289 d = dfail;
4290 goto fail;
4291 }
4292
4293 p += pat_charlen;
4294 d += buf_charlen;
4295 mcnt -= pat_charlen;
4296 nchars++;
4297 }
4298 while (mcnt > 0);
4299 else
4300 do
4301 {
4302 int pat_charlen;
4303 int pat_ch, buf_ch;
4304
4305 PREFETCH (d = dfail);
4306 if (multibyte)
4307 {
4308 pat_ch = string_char_and_length (p, &pat_charlen);
4309 pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
4310 }
4311 else
4312 {
4313 pat_ch = *p;
4314 pat_charlen = 1;
4315 }
4316 buf_ch = RE_CHAR_TO_MULTIBYTE (*d);
4317 if (! CHAR_BYTE8_P (buf_ch))
4318 {
4319 buf_ch = TRANSLATE (buf_ch);
4320 buf_ch = RE_CHAR_TO_UNIBYTE (buf_ch);
4321 if (buf_ch < 0)
4322 buf_ch = *d;
4323 }
4324 else
4325 buf_ch = *d;
4326 if (buf_ch != pat_ch)
4327 {
4328 d = dfail;
4329 goto fail;
4330 }
4331 p += pat_charlen;
4332 d++;
4333 mcnt -= pat_charlen;
4334 nchars++;
4335 }
4336 while (mcnt > 0);
4337
4338 break;
4339
4340
4341
4342 case anychar:
4343 {
4344 int buf_charlen;
4345 int buf_ch;
4346
4347 DEBUG_PRINT ("EXECUTING anychar.\n");
4348
4349 PREFETCH ();
4350 buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
4351 target_multibyte);
4352 buf_ch = TRANSLATE (buf_ch);
4353 if (buf_ch == '\n')
4354 goto fail;
4355
4356 DEBUG_PRINT (" Matched \"%d\".\n", *d);
4357 d += buf_charlen;
4358 nchars++;
4359 }
4360 break;
4361
4362
4363 case charset:
4364 case charset_not:
4365 {
4366
4367 bool unibyte_char = false;
4368
4369 DEBUG_PRINT ("EXECUTING charset%s.\n",
4370 (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
4371
4372 PREFETCH ();
4373 int len;
4374 int corig = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
4375 int c = corig;
4376 if (target_multibyte)
4377 {
4378 int c1;
4379
4380 c = TRANSLATE (c);
4381 c1 = RE_CHAR_TO_UNIBYTE (c);
4382 if (c1 >= 0)
4383 {
4384 unibyte_char = true;
4385 c = c1;
4386 }
4387 }
4388 else
4389 {
4390 int c1 = RE_CHAR_TO_MULTIBYTE (c);
4391
4392 if (! CHAR_BYTE8_P (c1))
4393 {
4394 c1 = TRANSLATE (c1);
4395 c1 = RE_CHAR_TO_UNIBYTE (c1);
4396 if (c1 >= 0)
4397 {
4398 unibyte_char = true;
4399 c = c1;
4400 }
4401 }
4402 else
4403 unibyte_char = true;
4404 }
4405
4406 p -= 1;
4407 if (!execute_charset (&p, c, corig, unibyte_char, translate))
4408 goto fail;
4409
4410 d += len;
4411 nchars++;
4412 }
4413 break;
4414
4415
4416
4417
4418
4419
4420 case start_memory:
4421 DEBUG_PRINT ("EXECUTING start_memory %d:\n", *p);
4422 eassert (0 < *p && *p < num_regs);
4423
4424
4425 PUSH_FAILURE_REG (*p);
4426
4427 regstart[*p] = d;
4428 DEBUG_PRINT (" regstart: %td\n", POINTER_TO_OFFSET (regstart[*p]));
4429
4430
4431 p += 1;
4432 break;
4433
4434
4435
4436
4437 case stop_memory:
4438 DEBUG_PRINT ("EXECUTING stop_memory %d:\n", *p);
4439
4440 eassert (0 < *p && *p < num_regs);
4441 eassert (!REG_UNSET (regstart[*p]));
4442
4443
4444
4445
4446
4447
4448
4449
4450
4451
4452
4453
4454
4455
4456 regend[*p] = d;
4457 DEBUG_PRINT (" regend: %td\n", POINTER_TO_OFFSET (regend[*p]));
4458
4459
4460 p += 1;
4461 break;
4462
4463
4464
4465
4466 case duplicate:
4467 {
4468 re_char *d2, *dend2;
4469 int regno = *p++;
4470 DEBUG_PRINT ("EXECUTING duplicate %d.\n", regno);
4471
4472
4473 eassert (0 < regno && regno < num_regs);
4474 eassert (REG_UNSET (regstart[regno]) <= REG_UNSET (regend[regno]));
4475 if (REG_UNSET (regend[regno]))
4476 goto fail;
4477
4478
4479 d2 = regstart[regno];
4480
4481
4482 dfail = d;
4483
4484
4485
4486
4487
4488
4489 dend2 = ((FIRST_STRING_P (regstart[regno])
4490 == FIRST_STRING_P (regend[regno]))
4491 ? regend[regno] : end_match_1);
4492 for (;;)
4493 {
4494 ptrdiff_t dcnt;
4495
4496
4497
4498 while (d2 == dend2)
4499 {
4500 if (dend2 == end_match_2) break;
4501 if (dend2 == regend[regno]) break;
4502
4503
4504 d2 = string2;
4505 dend2 = regend[regno];
4506 }
4507
4508 if (d2 == dend2) break;
4509
4510
4511 PREFETCH (d = dfail);
4512
4513
4514 dcnt = dend - d;
4515
4516
4517
4518 if (dcnt > dend2 - d2)
4519 dcnt = dend2 - d2;
4520
4521
4522
4523 if (!NILP (translate)
4524 ? bcmp_translate (d, d2, dcnt, translate, target_multibyte)
4525 : memcmp (d, d2, dcnt))
4526 {
4527 d = dfail;
4528 goto fail;
4529 }
4530 d += dcnt, d2 += dcnt;
4531 nchars++;
4532 }
4533 }
4534 break;
4535
4536
4537
4538
4539 case begline:
4540 DEBUG_PRINT ("EXECUTING begline.\n");
4541
4542 if (AT_STRINGS_BEG (d))
4543 break;
4544 else
4545 {
4546 unsigned c;
4547 GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
4548 if (c == '\n')
4549 break;
4550 }
4551 goto fail;
4552
4553
4554
4555 case endline:
4556 DEBUG_PRINT ("EXECUTING endline.\n");
4557
4558 if (AT_STRINGS_END (d))
4559 break;
4560 PREFETCH_NOLIMIT ();
4561 if (*d == '\n')
4562 break;
4563 goto fail;
4564
4565
4566
4567 case begbuf:
4568 DEBUG_PRINT ("EXECUTING begbuf.\n");
4569 if (AT_STRINGS_BEG (d))
4570 break;
4571 goto fail;
4572
4573
4574
4575 case endbuf:
4576 DEBUG_PRINT ("EXECUTING endbuf.\n");
4577 if (AT_STRINGS_END (d))
4578 break;
4579 goto fail;
4580
4581
4582
4583
4584
4585
4586
4587
4588
4589
4590
4591
4592
4593
4594
4595
4596
4597
4598 case on_failure_keep_string_jump:
4599 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4600 DEBUG_PRINT ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
4601 mcnt, p + mcnt);
4602
4603 PUSH_FAILURE_POINT (p - 3, NULL);
4604 break;
4605
4606
4607
4608
4609
4610
4611
4612
4613
4614
4615
4616
4617
4618
4619
4620 case on_failure_jump_nastyloop:
4621 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4622 DEBUG_PRINT ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
4623 mcnt, p + mcnt);
4624
4625 eassert ((re_opcode_t)p[-4] == no_op);
4626 {
4627 bool cycle = false;
4628 CHECK_INFINITE_LOOP (p - 4, d);
4629 if (!cycle)
4630
4631
4632
4633
4634 PUSH_FAILURE_POINT (p - 3, d);
4635 }
4636 break;
4637
4638
4639
4640 case on_failure_jump_loop:
4641 on_failure:
4642 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4643 DEBUG_PRINT ("EXECUTING on_failure_jump_loop %d (to %p):\n",
4644 mcnt, p + mcnt);
4645 {
4646 bool cycle = false;
4647 CHECK_INFINITE_LOOP (p - 3, d);
4648 if (cycle)
4649
4650
4651
4652
4653
4654 p += mcnt;
4655 else
4656 PUSH_FAILURE_POINT (p - 3, d);
4657 }
4658 break;
4659
4660
4661
4662
4663
4664
4665
4666
4667
4668
4669
4670
4671
4672
4673 case on_failure_jump:
4674 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4675 DEBUG_PRINT ("EXECUTING on_failure_jump %d (to %p):\n",
4676 mcnt, p + mcnt);
4677
4678 PUSH_FAILURE_POINT (p -3, d);
4679 break;
4680
4681
4682
4683
4684
4685
4686
4687
4688 case on_failure_jump_smart:
4689 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4690 DEBUG_PRINT ("EXECUTING on_failure_jump_smart %d (to %p).\n",
4691 mcnt, p + mcnt);
4692 {
4693 re_char *p1 = p;
4694
4695 unsigned char *p2 = (unsigned char *) p + mcnt;
4696 unsigned char *p3 = (unsigned char *) p - 3;
4697
4698 p -= 3;
4699
4700
4701 EXTRACT_NUMBER (mcnt, p2 - 2);
4702
4703
4704
4705 eassert (skip_one_char (p1) == p2 - 3);
4706 eassert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
4707 DEBUG_STATEMENT (regex_emacs_debug += 2);
4708 if (mutually_exclusive_p (bufp, p1, p2))
4709 {
4710
4711 DEBUG_PRINT (" smart exclusive => fast loop.\n");
4712 *p3 = (unsigned char) on_failure_keep_string_jump;
4713 STORE_NUMBER (p2 - 2, mcnt + 3);
4714 }
4715 else
4716 {
4717
4718 DEBUG_PRINT (" smart default => slow loop.\n");
4719 *p3 = (unsigned char) on_failure_jump;
4720 }
4721 DEBUG_STATEMENT (regex_emacs_debug -= 2);
4722 }
4723 break;
4724
4725
4726 case jump:
4727 unconditional_jump:
4728 maybe_quit ();
4729 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4730 DEBUG_PRINT ("EXECUTING jump %d ", mcnt);
4731 p += mcnt;
4732 DEBUG_PRINT ("(to %p).\n", p);
4733 break;
4734
4735
4736
4737
4738 case succeed_n:
4739
4740 EXTRACT_NUMBER (mcnt, p + 2);
4741 DEBUG_PRINT ("EXECUTING succeed_n %d.\n", mcnt);
4742
4743
4744 if (mcnt != 0)
4745 {
4746
4747 unsigned char *p2 = (unsigned char *) p + 2;
4748 mcnt--;
4749 p += 4;
4750 PUSH_NUMBER (p2, mcnt);
4751 }
4752 else
4753
4754 goto on_failure;
4755 break;
4756
4757 case jump_n:
4758
4759 EXTRACT_NUMBER (mcnt, p + 2);
4760 DEBUG_PRINT ("EXECUTING jump_n %d.\n", mcnt);
4761
4762
4763 if (mcnt != 0)
4764 {
4765
4766 unsigned char *p2 = (unsigned char *) p + 2;
4767 mcnt--;
4768 PUSH_NUMBER (p2, mcnt);
4769 goto unconditional_jump;
4770 }
4771
4772 else
4773 p += 4;
4774 break;
4775
4776 case set_number_at:
4777 {
4778 unsigned char *p2;
4779 DEBUG_PRINT ("EXECUTING set_number_at.\n");
4780
4781 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4782
4783 p2 = (unsigned char *) p + mcnt;
4784
4785 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4786 DEBUG_PRINT (" Setting %p to %d.\n", p2, mcnt);
4787 PUSH_NUMBER (p2, mcnt);
4788 break;
4789 }
4790
4791 case wordbound:
4792 case notwordbound:
4793 {
4794 bool not = (re_opcode_t) *(p - 1) == notwordbound;
4795 DEBUG_PRINT ("EXECUTING %swordbound.\n", not ? "not" : "");
4796
4797
4798
4799
4800 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4801 not = !not;
4802 else
4803 {
4804
4805
4806 int c1, c2;
4807 int s1, s2;
4808 int dummy;
4809 ptrdiff_t offset = PTR_TO_OFFSET (d);
4810 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
4811 UPDATE_SYNTAX_TABLE (charpos);
4812 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4813 nchars++;
4814 s1 = SYNTAX (c1);
4815 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
4816 PREFETCH_NOLIMIT ();
4817 GET_CHAR_AFTER (c2, d, dummy);
4818 nchars++;
4819 s2 = SYNTAX (c2);
4820
4821 if (
4822 ((s1 == Sword) != (s2 == Sword))
4823
4824
4825 || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
4826 not = !not;
4827 }
4828 if (not)
4829 break;
4830 else
4831 goto fail;
4832 }
4833
4834 case wordbeg:
4835 DEBUG_PRINT ("EXECUTING wordbeg.\n");
4836
4837
4838
4839
4840 if (AT_STRINGS_END (d))
4841 goto fail;
4842 else
4843 {
4844
4845
4846 int c1, c2;
4847 int s1, s2;
4848 int dummy;
4849 ptrdiff_t offset = PTR_TO_OFFSET (d);
4850 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
4851 UPDATE_SYNTAX_TABLE (charpos);
4852 PREFETCH ();
4853 GET_CHAR_AFTER (c2, d, dummy);
4854 nchars++;
4855 s2 = SYNTAX (c2);
4856
4857
4858 if (s2 != Sword)
4859 goto fail;
4860
4861
4862 if (!AT_STRINGS_BEG (d))
4863 {
4864 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4865 nchars++;
4866 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
4867 s1 = SYNTAX (c1);
4868
4869
4870
4871 if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
4872 goto fail;
4873 }
4874 }
4875 break;
4876
4877 case wordend:
4878 DEBUG_PRINT ("EXECUTING wordend.\n");
4879
4880
4881
4882
4883 if (AT_STRINGS_BEG (d))
4884 goto fail;
4885 else
4886 {
4887
4888
4889 int c1, c2;
4890 int s1, s2;
4891 int dummy;
4892 ptrdiff_t offset = PTR_TO_OFFSET (d);
4893 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
4894 UPDATE_SYNTAX_TABLE (charpos);
4895 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4896 nchars++;
4897 s1 = SYNTAX (c1);
4898
4899
4900 if (s1 != Sword)
4901 goto fail;
4902
4903
4904 if (!AT_STRINGS_END (d))
4905 {
4906 PREFETCH_NOLIMIT ();
4907 GET_CHAR_AFTER (c2, d, dummy);
4908 nchars++;
4909 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
4910 s2 = SYNTAX (c2);
4911
4912
4913
4914 if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
4915 goto fail;
4916 }
4917 }
4918 break;
4919
4920 case symbeg:
4921 DEBUG_PRINT ("EXECUTING symbeg.\n");
4922
4923
4924
4925
4926 if (AT_STRINGS_END (d))
4927 goto fail;
4928 else
4929 {
4930
4931
4932 int c1, c2;
4933 int s1, s2;
4934 ptrdiff_t offset = PTR_TO_OFFSET (d);
4935 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
4936 UPDATE_SYNTAX_TABLE (charpos);
4937 PREFETCH ();
4938 c2 = RE_STRING_CHAR (d, target_multibyte);
4939 nchars++;
4940 s2 = SYNTAX (c2);
4941
4942
4943 if (s2 != Sword && s2 != Ssymbol)
4944 goto fail;
4945
4946
4947 if (!AT_STRINGS_BEG (d))
4948 {
4949 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4950 nchars++;
4951 UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
4952 s1 = SYNTAX (c1);
4953
4954
4955 if (s1 == Sword || s1 == Ssymbol)
4956 goto fail;
4957 }
4958 }
4959 break;
4960
4961 case symend:
4962 DEBUG_PRINT ("EXECUTING symend.\n");
4963
4964
4965
4966
4967 if (AT_STRINGS_BEG (d))
4968 goto fail;
4969 else
4970 {
4971
4972
4973 int c1, c2;
4974 int s1, s2;
4975 ptrdiff_t offset = PTR_TO_OFFSET (d);
4976 ptrdiff_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset) - 1;
4977 UPDATE_SYNTAX_TABLE (charpos);
4978 GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
4979 nchars++;
4980 s1 = SYNTAX (c1);
4981
4982
4983 if (s1 != Sword && s1 != Ssymbol)
4984 goto fail;
4985
4986
4987 if (!AT_STRINGS_END (d))
4988 {
4989 PREFETCH_NOLIMIT ();
4990 c2 = RE_STRING_CHAR (d, target_multibyte);
4991 nchars++;
4992 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
4993 s2 = SYNTAX (c2);
4994
4995
4996 if (s2 == Sword || s2 == Ssymbol)
4997 goto fail;
4998 }
4999 }
5000 break;
5001
5002 case syntaxspec:
5003 case notsyntaxspec:
5004 {
5005 bool not = (re_opcode_t) *(p - 1) == notsyntaxspec;
5006 mcnt = *p++;
5007 DEBUG_PRINT ("EXECUTING %ssyntaxspec %d.\n", not ? "not" : "",
5008 mcnt);
5009 PREFETCH ();
5010 {
5011 ptrdiff_t offset = PTR_TO_OFFSET (d);
5012 ptrdiff_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5013 UPDATE_SYNTAX_TABLE (pos1);
5014 }
5015 {
5016 int len;
5017 int c;
5018
5019 GET_CHAR_AFTER (c, d, len);
5020 if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
5021 goto fail;
5022 d += len;
5023 nchars++;
5024 }
5025 }
5026 break;
5027
5028 case at_dot:
5029 DEBUG_PRINT ("EXECUTING at_dot.\n");
5030 if (PTR_BYTE_POS (d) != PT_BYTE)
5031 goto fail;
5032 break;
5033
5034 case categoryspec:
5035 case notcategoryspec:
5036 {
5037 bool not = (re_opcode_t) *(p - 1) == notcategoryspec;
5038 mcnt = *p++;
5039 DEBUG_PRINT ("EXECUTING %scategoryspec %d.\n",
5040 not ? "not" : "", mcnt);
5041 PREFETCH ();
5042
5043 {
5044 int len;
5045 int c;
5046 GET_CHAR_AFTER (c, d, len);
5047 if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
5048 goto fail;
5049 d += len;
5050 nchars++;
5051 }
5052 }
5053 break;
5054
5055 default:
5056 abort ();
5057 }
5058 continue;
5059
5060
5061
5062 fail:
5063 maybe_quit ();
5064 if (!FAIL_STACK_EMPTY ())
5065 {
5066 re_char *str, *pat;
5067
5068 DEBUG_PRINT ("\nFAIL:\n");
5069 POP_FAILURE_POINT (str, pat);
5070 switch (*pat++)
5071 {
5072 case on_failure_keep_string_jump:
5073 eassert (str == NULL);
5074 goto continue_failure_jump;
5075
5076 case on_failure_jump_nastyloop:
5077 eassert ((re_opcode_t)pat[-2] == no_op);
5078 PUSH_FAILURE_POINT (pat - 2, str);
5079 FALLTHROUGH;
5080 case on_failure_jump_loop:
5081 case on_failure_jump:
5082 case succeed_n:
5083 d = str;
5084 continue_failure_jump:
5085 EXTRACT_NUMBER_AND_INCR (mcnt, pat);
5086 p = pat + mcnt;
5087 break;
5088
5089 case no_op:
5090
5091 goto fail;
5092
5093 default:
5094 abort ();
5095 }
5096
5097 eassert (p >= bufp->buffer && p <= pend);
5098
5099 if (d >= string1 && d <= end1)
5100 dend = end_match_1;
5101 }
5102 else
5103 break;
5104 }
5105
5106 if (best_regs_set)
5107 goto restore_best_regs;
5108
5109 unbind_to (count, Qnil);
5110 SAFE_FREE ();
5111
5112 if (max_redisplay_ticks > 0 && nchars > 0)
5113 update_redisplay_ticks (nchars / 50 + 1, NULL);
5114
5115 return -1;
5116 }
5117
5118
5119
5120
5121
5122
5123 static bool
5124 bcmp_translate (re_char *s1, re_char *s2, ptrdiff_t len,
5125 Lisp_Object translate, bool target_multibyte)
5126 {
5127 re_char *p1 = s1, *p2 = s2;
5128 re_char *p1_end = s1 + len;
5129 re_char *p2_end = s2 + len;
5130
5131
5132
5133 while (p1 < p1_end && p2 < p2_end)
5134 {
5135 int p1_charlen, p2_charlen;
5136 int p1_ch, p2_ch;
5137
5138 GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
5139 GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
5140
5141 if (RE_TRANSLATE (translate, p1_ch)
5142 != RE_TRANSLATE (translate, p2_ch))
5143 return true;
5144
5145 p1 += p1_charlen, p2 += p2_charlen;
5146 }
5147
5148 return p1 != p1_end || p2 != p2_end;
5149 }
5150
5151
5152
5153
5154
5155
5156
5157
5158
5159
5160
5161
5162 const char *
5163 re_compile_pattern (const char *pattern, ptrdiff_t length,
5164 bool posix_backtracking, const char *whitespace_regexp,
5165 struct re_pattern_buffer *bufp)
5166 {
5167 bufp->regs_allocated = REGS_UNALLOCATED;
5168
5169 reg_errcode_t ret
5170 = regex_compile ((re_char *) pattern, length,
5171 posix_backtracking,
5172 whitespace_regexp,
5173 bufp);
5174
5175 if (!ret)
5176 return NULL;
5177 return re_error_msgid[ret];
5178 }