1 /* String search routines for GNU Emacs.
2
3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2023 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
20
21
22 #include <config.h>
23
24 #include "lisp.h"
25 #include "character.h"
26 #include "buffer.h"
27 #include "syntax.h"
28 #include "charset.h"
29 #include "region-cache.h"
30 #include "blockinput.h"
31 #include "intervals.h"
32 #include "pdumper.h"
33 #include "composite.h"
34
35 #include "regex-emacs.h"
36
37 #define REGEXP_CACHE_SIZE 20
38
39 /* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
41 struct regexp_cache
42 {
43 struct regexp_cache *next;
44 Lisp_Object regexp, f_whitespace_regexp;
45 /* Syntax table for which the regexp applies. We need this because
46 of character classes. If this is t, then the compiled pattern is valid
47 for any syntax-table. */
48 Lisp_Object syntax_table;
49 struct re_pattern_buffer buf;
50 char fastmap[0400];
51 /* True means regexp was compiled to do full POSIX backtracking. */
52 bool posix;
53 /* True means we're inside a buffer match. */
54 bool busy;
55 };
56
57 /* The instances of that struct. */
58 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
59
60 /* The head of the linked list; points to the most recently used buffer. */
61 static struct regexp_cache *searchbuf_head;
62
63 static void set_search_regs (ptrdiff_t, ptrdiff_t);
64 static void save_search_regs (void);
65 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
66 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
67 ptrdiff_t, ptrdiff_t);
68 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
69 Lisp_Object, Lisp_Object, ptrdiff_t,
70 ptrdiff_t, int);
71
72 Lisp_Object re_match_object;
73
74 static AVOID
75 matcher_overflow (void)
76 {
77 error ("Stack overflow in regexp matcher");
78 }
79
80 static void
81 freeze_buffer_relocation (void)
82 {
83 #ifdef REL_ALLOC
84 /* Prevent ralloc.c from relocating the current buffer while
85 searching it. */
86 r_alloc_inhibit_buffer_relocation (1);
87 record_unwind_protect_int (r_alloc_inhibit_buffer_relocation, 0);
88 #endif
89 }
90
91 /* Compile a regexp and signal a Lisp error if anything goes wrong.
92 PATTERN is the pattern to compile.
93 CP is the place to put the result.
94 TRANSLATE is a translation table for ignoring case, or nil for none.
95 POSIX is true if we want full backtracking (POSIX style) for this pattern.
96 False means backtrack only enough to get a valid match.
97
98 The behavior also depends on Vsearch_spaces_regexp. */
99
100 static void
101 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
102 Lisp_Object translate, bool posix)
103 {
104 const char *whitespace_regexp;
105 char *val;
106
107 eassert (!cp->busy);
108 cp->regexp = Qnil;
109 cp->buf.translate = translate;
110 cp->posix = posix;
111 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
112 cp->buf.charset_unibyte = charset_unibyte;
113 if (STRINGP (Vsearch_spaces_regexp))
114 cp->f_whitespace_regexp = Vsearch_spaces_regexp;
115 else
116 cp->f_whitespace_regexp = Qnil;
117
118 whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ?
119 SSDATA (Vsearch_spaces_regexp) : NULL;
120
121 val = (char *) re_compile_pattern (SSDATA (pattern), SBYTES (pattern),
122 posix, whitespace_regexp, &cp->buf);
123
124 /* If the compiled pattern hard codes some of the contents of the
125 syntax-table, it can only be reused with *this* syntax table. */
126 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
127
128 if (val)
129 xsignal1 (Qinvalid_regexp, build_string (val));
130
131 cp->regexp = Fcopy_sequence (pattern);
132 }
133
134 /* Shrink each compiled regexp buffer in the cache
135 to the size actually used right now.
136 This is called from garbage collection. */
137
138 void
139 shrink_regexp_cache (void)
140 {
141 struct regexp_cache *cp;
142
143 for (cp = searchbuf_head; cp != 0; cp = cp->next)
144 if (!cp->busy)
145 {
146 cp->buf.allocated = cp->buf.used;
147 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
148 }
149 }
150
151 /* Clear the regexp cache w.r.t. a particular syntax table,
152 because it was changed.
153 There is no danger of memory leak here because re_compile_pattern
154 automagically manages the memory in each re_pattern_buffer struct,
155 based on its `allocated' and `buffer' values. */
156 void
157 clear_regexp_cache (void)
158 {
159 int i;
160
161 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
162 /* It's tempting to compare with the syntax-table we've actually changed,
163 but it's not sufficient because char-table inheritance means that
164 modifying one syntax-table can change others at the same time. */
165 if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
166 searchbufs[i].regexp = Qnil;
167 }
168
169 static void
170 unfreeze_pattern (void *arg)
171 {
172 struct regexp_cache *searchbuf = arg;
173 searchbuf->busy = false;
174 }
175
176 static void
177 freeze_pattern (struct regexp_cache *searchbuf)
178 {
179 eassert (!searchbuf->busy);
180 record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
181 searchbuf->busy = true;
182 }
183
184 /* Compile a regexp if necessary, but first check to see if there's one in
185 the cache.
186 PATTERN is the pattern to compile.
187 TRANSLATE is a translation table for ignoring case, or nil for none.
188 REGP is the structure that says where to store the "register"
189 values that will result from matching this pattern.
190 If it is 0, we should compile the pattern not to record any
191 subexpression bounds.
192 POSIX is true if we want full backtracking (POSIX style) for this pattern.
193 False means backtrack only enough to get a valid match. */
194
195 static struct regexp_cache *
196 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
197 Lisp_Object translate, bool posix, bool multibyte)
198 {
199 struct regexp_cache *cp, **cpp, **lru_nonbusy;
200
201 for (cpp = &searchbuf_head, lru_nonbusy = NULL; ; cpp = &cp->next)
202 {
203 cp = *cpp;
204 if (!cp->busy)
205 lru_nonbusy = cpp;
206 /* Entries are initialized to nil, and may be set to nil by
207 compile_pattern_1 if the pattern isn't valid. Don't apply
208 string accessors in those cases. However, compile_pattern_1
209 is only applied to the cache entry we pick here to reuse. So
210 nil should never appear before a non-nil entry. */
211 if (NILP (cp->regexp))
212 goto compile_it;
213 if (SCHARS (cp->regexp) == SCHARS (pattern)
214 && !cp->busy
215 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
216 && !NILP (Fstring_equal (cp->regexp, pattern))
217 && EQ (cp->buf.translate, translate)
218 && cp->posix == posix
219 && (EQ (cp->syntax_table, Qt)
220 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
221 && !NILP (Fequal (cp->f_whitespace_regexp, Vsearch_spaces_regexp))
222 && cp->buf.charset_unibyte == charset_unibyte)
223 break;
224
225 /* If we're at the end of the cache, compile into the last
226 (least recently used) non-busy cell in the cache. */
227 if (cp->next == 0)
228 {
229 if (!lru_nonbusy)
230 error ("Too much matching reentrancy");
231 cpp = lru_nonbusy;
232 cp = *cpp;
233 compile_it:
234 eassert (!cp->busy);
235 compile_pattern_1 (cp, pattern, translate, posix);
236 break;
237 }
238 }
239
240 /* When we get here, cp (aka *cpp) contains the compiled pattern,
241 either because we found it in the cache or because we just compiled it.
242 Move it to the front of the queue to mark it as most recently used. */
243 *cpp = cp->next;
244 cp->next = searchbuf_head;
245 searchbuf_head = cp;
246
247 /* Advise the searching functions about the space we have allocated
248 for register data. */
249 if (regp)
250 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
251
252 /* The compiled pattern can be used both for multibyte and unibyte
253 target. But, we have to tell which the pattern is used for. */
254 cp->buf.target_multibyte = multibyte;
255 return cp;
256 }
257
258
259 static Lisp_Object
260 looking_at_1 (Lisp_Object string, bool posix, bool modify_data)
261 {
262 Lisp_Object val;
263 unsigned char *p1, *p2;
264 ptrdiff_t s1, s2;
265 register ptrdiff_t i;
266
267 if (running_asynch_code)
268 save_search_regs ();
269
270 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
271 table. */
272 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
273 BVAR (current_buffer, case_eqv_table));
274
275 CHECK_STRING (string);
276
277 /* Snapshot in case Lisp changes the value. */
278 bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
279
280 struct regexp_cache *cache_entry = compile_pattern (
281 string,
282 modify_match_data ? &search_regs : NULL,
283 (!NILP (BVAR (current_buffer, case_fold_search))
284 ? BVAR (current_buffer, case_canon_table) : Qnil),
285 posix,
286 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
287
288 /* Do a pending quit right away, to avoid paradoxical behavior */
289 maybe_quit ();
290
291 /* Get pointers and sizes of the two strings
292 that make up the visible portion of the buffer. */
293
294 p1 = BEGV_ADDR;
295 s1 = GPT_BYTE - BEGV_BYTE;
296 p2 = GAP_END_ADDR;
297 s2 = ZV_BYTE - GPT_BYTE;
298 if (s1 < 0)
299 {
300 p2 = p1;
301 s2 = ZV_BYTE - BEGV_BYTE;
302 s1 = 0;
303 }
304 if (s2 < 0)
305 {
306 s1 = ZV_BYTE - BEGV_BYTE;
307 s2 = 0;
308 }
309
310 specpdl_ref count = SPECPDL_INDEX ();
311 freeze_buffer_relocation ();
312 freeze_pattern (cache_entry);
313 re_match_object = Qnil;
314 i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
315 PT_BYTE - BEGV_BYTE,
316 modify_match_data ? &search_regs : NULL,
317 ZV_BYTE - BEGV_BYTE);
318
319 if (i == -2)
320 {
321 unbind_to (count, Qnil);
322 matcher_overflow ();
323 }
324
325 val = (i >= 0 ? Qt : Qnil);
326 if (modify_match_data && i >= 0)
327 {
328 for (i = 0; i < search_regs.num_regs; i++)
329 if (search_regs.start[i] >= 0)
330 {
331 search_regs.start[i]
332 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
333 search_regs.end[i]
334 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
335 }
336 /* Set last_thing_searched only when match data is changed. */
337 XSETBUFFER (last_thing_searched, current_buffer);
338 }
339
340 return unbind_to (count, val);
341 }
342
343 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 2, 0,
344 doc: /* Return t if text after point matches regular expression REGEXP.
345 By default, this function modifies the match data that
346 `match-beginning', `match-end' and `match-data' access. If
347 INHIBIT-MODIFY is non-nil, don't modify the match data. */)
348 (Lisp_Object regexp, Lisp_Object inhibit_modify)
349 {
350 return looking_at_1 (regexp, 0, NILP (inhibit_modify));
351 }
352
353 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 2, 0,
354 doc: /* Return t if text after point matches REGEXP according to Posix rules.
355 Find the longest match, in accordance with Posix regular expression rules.
356
357 By default, this function modifies the match data that
358 `match-beginning', `match-end' and `match-data' access. If
359 INHIBIT-MODIFY is non-nil, don't modify the match data. */)
360 (Lisp_Object regexp, Lisp_Object inhibit_modify)
361 {
362 return looking_at_1 (regexp, 1, NILP (inhibit_modify));
363 }
364
365 static Lisp_Object
366 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
367 bool posix, bool modify_data)
368 {
369 ptrdiff_t val;
370 EMACS_INT pos;
371 ptrdiff_t pos_byte, i;
372 bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
373
374 if (running_asynch_code)
375 save_search_regs ();
376
377 CHECK_STRING (regexp);
378 CHECK_STRING (string);
379
380 if (NILP (start))
381 pos = 0, pos_byte = 0;
382 else
383 {
384 ptrdiff_t len = SCHARS (string);
385
386 CHECK_FIXNUM (start);
387 pos = XFIXNUM (start);
388 if (pos < 0 && -pos <= len)
389 pos = len + pos;
390 else if (0 > pos || pos > len)
391 args_out_of_range (string, start);
392 pos_byte = string_char_to_byte (string, pos);
393 }
394
395 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
396 table. */
397 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
398 BVAR (current_buffer, case_eqv_table));
399
400 specpdl_ref count = SPECPDL_INDEX ();
401 struct regexp_cache *cache_entry
402 = compile_pattern (regexp,
403 modify_match_data ? &search_regs : NULL,
404 (!NILP (BVAR (current_buffer, case_fold_search))
405 ? BVAR (current_buffer, case_canon_table)
406 : Qnil),
407 posix,
408 STRING_MULTIBYTE (string));
409 freeze_pattern (cache_entry);
410 re_match_object = string;
411 val = re_search (&cache_entry->buf, SSDATA (string),
412 SBYTES (string), pos_byte,
413 SBYTES (string) - pos_byte,
414 (modify_match_data ? &search_regs : NULL));
415 unbind_to (count, Qnil);
416
417 /* Set last_thing_searched only when match data is changed. */
418 if (modify_match_data)
419 last_thing_searched = Qt;
420
421 if (val == -2)
422 matcher_overflow ();
423 if (val < 0) return Qnil;
424
425 if (modify_match_data)
426 for (i = 0; i < search_regs.num_regs; i++)
427 if (search_regs.start[i] >= 0)
428 {
429 search_regs.start[i]
430 = string_byte_to_char (string, search_regs.start[i]);
431 search_regs.end[i]
432 = string_byte_to_char (string, search_regs.end[i]);
433 }
434
435 return make_fixnum (string_byte_to_char (string, val));
436 }
437
438 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 4, 0,
439 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
440 Matching ignores case if `case-fold-search' is non-nil.
441 If third arg START is non-nil, start search at that index in STRING.
442
443 If INHIBIT-MODIFY is non-nil, match data is not changed.
444
445 If INHIBIT-MODIFY is nil or missing, match data is changed, and
446 `match-end' and `match-beginning' give indices of substrings matched
447 by parenthesis constructs in the pattern. You can use the function
448 `match-string' to extract the substrings matched by the parenthesis
449 constructions in REGEXP. For index of first char beyond the match, do
450 (match-end 0). */)
451 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
452 Lisp_Object inhibit_modify)
453 {
454 return string_match_1 (regexp, string, start, 0, NILP (inhibit_modify));
455 }
456
457 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 4, 0,
458 doc: /* Return index of start of first match for Posix REGEXP in STRING, or nil.
459 Find the longest match, in accord with Posix regular expression rules.
460 Case is ignored if `case-fold-search' is non-nil in the current buffer.
461
462 If INHIBIT-MODIFY is non-nil, match data is not changed.
463
464 If INHIBIT-MODIFY is nil or missing, match data is changed, and
465 `match-end' and `match-beginning' give indices of substrings matched
466 by parenthesis constructs in the pattern. You can use the function
467 `match-string' to extract the substrings matched by the parenthesis
468 constructions in REGEXP. For index of first char beyond the match, do
469 (match-end 0). */)
470 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
471 Lisp_Object inhibit_modify)
472 {
473 return string_match_1 (regexp, string, start, 1, NILP (inhibit_modify));
474 }
475
476 /* Match REGEXP against STRING using translation table TABLE,
477 searching all of STRING, and return the index of the match,
478 or negative on failure. This does not clobber the match data. */
479
480 ptrdiff_t
481 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
482 Lisp_Object table)
483 {
484 re_match_object = string;
485 specpdl_ref count = SPECPDL_INDEX ();
486 struct regexp_cache *cache_entry
487 = compile_pattern (regexp, 0, table, 0, STRING_MULTIBYTE (string));
488 freeze_pattern (cache_entry);
489 ptrdiff_t val = re_search (&cache_entry->buf, SSDATA (string),
490 SBYTES (string), 0,
491 SBYTES (string), 0);
492 unbind_to (count, Qnil);
493 return val;
494 }
495
496 /* Match REGEXP against STRING, searching all of STRING and return the
497 index of the match, or negative on failure. This does not clobber
498 the match data. Table is a canonicalize table for ignoring case,
499 or nil for none.
500
501 We assume that STRING contains single-byte characters. */
502
503 ptrdiff_t
504 fast_c_string_match_internal (Lisp_Object regexp,
505 const char *string, ptrdiff_t len,
506 Lisp_Object table)
507 {
508 /* FIXME: This is expensive and not obviously correct when it makes
509 a difference. I.e., no longer "fast", and may hide bugs.
510 Something should be done about this. */
511 regexp = string_make_unibyte (regexp);
512 /* Record specpdl index because freeze_pattern pushes an
513 unwind-protect on the specpdl. */
514 specpdl_ref count = SPECPDL_INDEX ();
515 struct regexp_cache *cache_entry
516 = compile_pattern (regexp, 0, table, 0, 0);
517 freeze_pattern (cache_entry);
518 re_match_object = Qt;
519 ptrdiff_t val = re_search (&cache_entry->buf, string, len, 0, len, 0);
520 unbind_to (count, Qnil);
521 return val;
522 }
523
524 /* Match REGEXP against the characters after POS to LIMIT, and return
525 the number of matched characters. If STRING is non-nil, match
526 against the characters in it. In that case, POS and LIMIT are
527 indices into the string. This function doesn't modify the match
528 data. */
529
530 ptrdiff_t
531 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
532 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
533 {
534 bool multibyte;
535 unsigned char *p1, *p2;
536 ptrdiff_t s1, s2;
537 ptrdiff_t len;
538
539 if (STRINGP (string))
540 {
541 if (pos_byte < 0)
542 pos_byte = string_char_to_byte (string, pos);
543 if (limit_byte < 0)
544 limit_byte = string_char_to_byte (string, limit);
545 p1 = NULL;
546 s1 = 0;
547 p2 = SDATA (string);
548 s2 = SBYTES (string);
549 multibyte = STRING_MULTIBYTE (string);
550 }
551 else
552 {
553 if (pos_byte < 0)
554 pos_byte = CHAR_TO_BYTE (pos);
555 if (limit_byte < 0)
556 limit_byte = CHAR_TO_BYTE (limit);
557 pos_byte -= BEGV_BYTE;
558 limit_byte -= BEGV_BYTE;
559 p1 = BEGV_ADDR;
560 s1 = GPT_BYTE - BEGV_BYTE;
561 p2 = GAP_END_ADDR;
562 s2 = ZV_BYTE - GPT_BYTE;
563 if (s1 < 0)
564 {
565 p2 = p1;
566 s2 = ZV_BYTE - BEGV_BYTE;
567 s1 = 0;
568 }
569 if (s2 < 0)
570 {
571 s1 = ZV_BYTE - BEGV_BYTE;
572 s2 = 0;
573 }
574 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
575 }
576
577 struct regexp_cache *cache_entry =
578 compile_pattern (regexp, 0, Qnil, 0, multibyte);
579 specpdl_ref count = SPECPDL_INDEX ();
580 freeze_buffer_relocation ();
581 freeze_pattern (cache_entry);
582 re_match_object = STRINGP (string) ? string : Qnil;
583 len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
584 pos_byte, NULL, limit_byte);
585
586 unbind_to (count, Qnil);
587 return len;
588 }
589
590
591 /* The newline cache: remembering which sections of text have no newlines. */
592
593 /* If the user has requested the long scans caching, make sure it's on.
594 Otherwise, make sure it's off.
595 This is our cheezy way of associating an action with the change of
596 state of a buffer-local variable. */
597 static struct region_cache *
598 newline_cache_on_off (struct buffer *buf)
599 {
600 struct buffer *base_buf = buf;
601 bool indirect_p = false;
602
603 if (buf->base_buffer)
604 {
605 base_buf = buf->base_buffer;
606 indirect_p = true;
607 }
608
609 /* Don't turn on or off the cache in the base buffer, if the value
610 of cache-long-scans of the base buffer is inconsistent with that.
611 This is because doing so will just make the cache pure overhead,
612 since if we turn it on via indirect buffer, it will be
613 immediately turned off by its base buffer. */
614 if (NILP (BVAR (buf, cache_long_scans)))
615 {
616 if (!indirect_p
617 || NILP (BVAR (base_buf, cache_long_scans)))
618 {
619 /* It should be off. */
620 if (base_buf->newline_cache)
621 {
622 free_region_cache (base_buf->newline_cache);
623 base_buf->newline_cache = 0;
624 }
625 }
626 return NULL;
627 }
628 else
629 {
630 if (!indirect_p
631 || !NILP (BVAR (base_buf, cache_long_scans)))
632 {
633 /* It should be on. */
634 if (base_buf->newline_cache == 0)
635 {
636 base_buf->newline_cache = new_region_cache ();
637 __lsan_ignore_object (base_buf->newline_cache);
638 }
639 }
640 return base_buf->newline_cache;
641 }
642 }
643
644
645 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
646
647 If COUNT is positive, search forwards; END must be >= START.
648 If COUNT is negative, search backwards for the -COUNTth instance;
649 END must be <= START.
650 If COUNT is zero, do anything you please; run rogue, for all I care.
651
652 If END is zero, use BEGV or ZV instead, as appropriate for the
653 direction indicated by COUNT. If START_BYTE is -1 it is unknown,
654 and similarly for END_BYTE.
655
656 If we find COUNT instances, set *COUNTED to COUNT, and return the
657 position past the COUNTth match. Note that for reverse motion
658 this is not the same as the usual convention for Emacs motion commands.
659
660 If we don't find COUNT instances before reaching END, set *COUNTED
661 to the number of newlines left found (negated if COUNT is negative),
662 and return END.
663
664 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
665 to the returned character position.
666
667 If ALLOW_QUIT, check for quitting. That's good to do
668 except when inside redisplay. */
669
670 ptrdiff_t
671 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
672 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
673 ptrdiff_t *bytepos, bool allow_quit)
674 {
675 struct region_cache *newline_cache;
676 struct buffer *cache_buffer;
677
678 if (!end)
679 {
680 if (count > 0)
681 end = ZV, end_byte = ZV_BYTE;
682 else
683 end = BEGV, end_byte = BEGV_BYTE;
684 }
685 if (end_byte == -1)
686 end_byte = CHAR_TO_BYTE (end);
687
688 newline_cache = newline_cache_on_off (current_buffer);
689 if (current_buffer->base_buffer)
690 cache_buffer = current_buffer->base_buffer;
691 else
692 cache_buffer = current_buffer;
693
694 if (counted)
695 *counted = count;
696
697 if (count > 0)
698 while (start != end)
699 {
700 /* Our innermost scanning loop is very simple; it doesn't know
701 about gaps, buffer ends, or the newline cache. ceiling is
702 the position of the last character before the next such
703 obstacle --- the last character the dumb search loop should
704 examine. */
705 ptrdiff_t tem, ceiling_byte = end_byte - 1;
706
707 /* If we're using the newline cache, consult it to see whether
708 we can avoid some scanning. */
709 if (newline_cache)
710 {
711 ptrdiff_t next_change;
712 int result = 1;
713
714 while (start < end && result)
715 {
716 ptrdiff_t lim1;
717
718 result = region_cache_forward (cache_buffer, newline_cache,
719 start, &next_change);
720 if (result)
721 {
722 /* When the cache revalidation is deferred,
723 next-change might point beyond ZV, which will
724 cause assertion violation in CHAR_TO_BYTE below.
725 Limit next_change to ZV to avoid that. */
726 if (next_change > ZV)
727 next_change = ZV;
728 start = next_change;
729 lim1 = next_change = end;
730 }
731 else
732 lim1 = min (next_change, end);
733
734 /* The cache returned zero for this region; see if
735 this is because the region is known and includes
736 only newlines. While at that, count any newlines
737 we bump into, and exit if we found enough off them. */
738 start_byte = CHAR_TO_BYTE (start);
739 while (start < lim1
740 && FETCH_BYTE (start_byte) == '\n')
741 {
742 start_byte++;
743 start++;
744 if (--count == 0)
745 {
746 if (bytepos)
747 *bytepos = start_byte;
748 return start;
749 }
750 }
751 /* If we found a non-newline character before hitting
752 position where the cache will again return non-zero
753 (i.e. no newlines beyond that position), it means
754 this region is not yet known to the cache, and we
755 must resort to the "dumb loop" method. */
756 if (start < next_change && !result)
757 break;
758 result = 1;
759 }
760 if (start >= end)
761 {
762 start = end;
763 start_byte = end_byte;
764 break;
765 }
766
767 /* START should never be after END. */
768 if (start_byte > ceiling_byte)
769 start_byte = ceiling_byte;
770
771 /* Now the text after start is an unknown region, and
772 next_change is the position of the next known region. */
773 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
774 }
775 else if (start_byte == -1)
776 start_byte = CHAR_TO_BYTE (start);
777
778 /* The dumb loop can only scan text stored in contiguous
779 bytes. BUFFER_CEILING_OF returns the last character
780 position that is contiguous, so the ceiling is the
781 position after that. */
782 tem = BUFFER_CEILING_OF (start_byte);
783 ceiling_byte = min (tem, ceiling_byte);
784
785 {
786 /* The termination address of the dumb loop. */
787 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
788 ptrdiff_t lim_byte = ceiling_byte + 1;
789
790 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
791 of the base, the cursor, and the next line. */
792 ptrdiff_t base = start_byte - lim_byte;
793 ptrdiff_t cursor, next;
794
795 for (cursor = base; cursor < 0; cursor = next)
796 {
797 /* The dumb loop. */
798 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
799 next = nl ? nl - lim_addr : 0;
800
801 /* If we're using the newline cache, cache the fact that
802 the region we just traversed is free of newlines. */
803 if (newline_cache && cursor != next)
804 {
805 know_region_cache (cache_buffer, newline_cache,
806 BYTE_TO_CHAR (lim_byte + cursor),
807 BYTE_TO_CHAR (lim_byte + next));
808 /* know_region_cache can relocate buffer text. */
809 lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
810 }
811
812 if (! nl)
813 break;
814 next++;
815
816 if (--count == 0)
817 {
818 if (bytepos)
819 *bytepos = lim_byte + next;
820 return BYTE_TO_CHAR (lim_byte + next);
821 }
822 if (allow_quit)
823 maybe_quit ();
824 }
825
826 start_byte = lim_byte;
827 start = BYTE_TO_CHAR (start_byte);
828 }
829 }
830 else
831 while (start > end)
832 {
833 /* The last character to check before the next obstacle. */
834 ptrdiff_t tem, ceiling_byte = end_byte;
835
836 /* Consult the newline cache, if appropriate. */
837 if (newline_cache)
838 {
839 ptrdiff_t next_change;
840 int result = 1;
841
842 while (start > end && result)
843 {
844 ptrdiff_t lim1;
845
846 result = region_cache_backward (cache_buffer, newline_cache,
847 start, &next_change);
848 if (result)
849 {
850 start = next_change;
851 lim1 = next_change = end;
852 }
853 else
854 lim1 = max (next_change, end);
855 start_byte = CHAR_TO_BYTE (start);
856 while (start > lim1
857 && FETCH_BYTE (start_byte - 1) == '\n')
858 {
859 if (++count == 0)
860 {
861 if (bytepos)
862 *bytepos = start_byte;
863 return start;
864 }
865 start_byte--;
866 start--;
867 }
868 if (start > next_change && !result)
869 break;
870 result = 1;
871 }
872 if (start <= end)
873 {
874 start = end;
875 start_byte = end_byte;
876 break;
877 }
878
879 /* Start should never be at or before end. */
880 if (start_byte <= ceiling_byte)
881 start_byte = ceiling_byte + 1;
882
883 /* Now the text before start is an unknown region, and
884 next_change is the position of the next known region. */
885 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
886 }
887 else if (start_byte == -1)
888 start_byte = CHAR_TO_BYTE (start);
889
890 /* Stop scanning before the gap. */
891 tem = BUFFER_FLOOR_OF (start_byte - 1);
892 ceiling_byte = max (tem, ceiling_byte);
893
894 {
895 /* The termination address of the dumb loop. */
896 unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
897
898 /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
899 the base, the cursor, and the previous line. These
900 offsets are at least -1. */
901 ptrdiff_t base = start_byte - ceiling_byte;
902 ptrdiff_t cursor, prev;
903
904 for (cursor = base; 0 < cursor; cursor = prev)
905 {
906 unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
907 prev = nl ? nl - ceiling_addr : -1;
908
909 /* If we're looking for newlines, cache the fact that
910 this line's region is free of them. */
911 if (newline_cache && cursor != prev + 1)
912 {
913 know_region_cache (cache_buffer, newline_cache,
914 BYTE_TO_CHAR (ceiling_byte + prev + 1),
915 BYTE_TO_CHAR (ceiling_byte + cursor));
916 /* know_region_cache can relocate buffer text. */
917 ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
918 }
919
920 if (! nl)
921 break;
922
923 if (++count >= 0)
924 {
925 if (bytepos)
926 *bytepos = ceiling_byte + prev + 1;
927 return BYTE_TO_CHAR (ceiling_byte + prev + 1);
928 }
929 if (allow_quit)
930 maybe_quit ();
931 }
932
933 start_byte = ceiling_byte;
934 start = BYTE_TO_CHAR (start_byte);
935 }
936 }
937
938 if (counted)
939 *counted -= count;
940 if (bytepos)
941 {
942 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
943 eassert (*bytepos == CHAR_TO_BYTE (start));
944 }
945 return start;
946 }
947
948 /* Search for COUNT instances of a line boundary.
949 Start at START. If COUNT is negative, search backwards.
950
951 We report the resulting position by calling TEMP_SET_PT_BOTH.
952
953 If we find COUNT instances. we position after (always after,
954 even if scanning backwards) the COUNTth match.
955
956 If we don't find COUNT instances before reaching the end of the
957 buffer (or the beginning, if scanning backwards), we position at
958 the limit we bumped up against.
959
960 If ALLOW_QUIT, check for quitting. That's good to do
961 except in special cases. */
962
963 void
964 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
965 ptrdiff_t limit, ptrdiff_t limit_byte,
966 ptrdiff_t count, bool allow_quit)
967 {
968 ptrdiff_t charpos, bytepos, counted;
969
970 charpos = find_newline (start, start_byte, limit, limit_byte,
971 count, &counted, &bytepos, allow_quit);
972 if (counted != count)
973 TEMP_SET_PT_BOTH (limit, limit_byte);
974 else
975 TEMP_SET_PT_BOTH (charpos, bytepos);
976 }
977
978 /* Like above, but always scan from point and report the
979 resulting position in *CHARPOS and *BYTEPOS. */
980
981 ptrdiff_t
982 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
983 ptrdiff_t *bytepos)
984 {
985 ptrdiff_t counted;
986
987 if (count <= 0)
988 *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
989 &counted, bytepos, 1);
990 else
991 *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
992 &counted, bytepos, 1);
993 return counted;
994 }
995
996 /* Like find_newline, but doesn't allow QUITting and doesn't return
997 COUNTED. */
998 ptrdiff_t
999 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
1000 ptrdiff_t cnt, ptrdiff_t *bytepos)
1001 {
1002 return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
1003 }
1004
1005 /* Like find_newline, but returns position before the newline, not
1006 after, and only search up to TO.
1007 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
1008
1009 ptrdiff_t
1010 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
1011 ptrdiff_t cnt, ptrdiff_t *bytepos)
1012 {
1013 ptrdiff_t counted;
1014 ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &counted, bytepos, 1);
1015
1016 if (counted == cnt)
1017 {
1018 if (bytepos)
1019 dec_both (&pos, &*bytepos);
1020 else
1021 pos--;
1022 }
1023 return pos;
1024 }
1025
1026 /* Subroutines of Lisp buffer search functions. */
1027
1028 static Lisp_Object
1029 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1030 Lisp_Object count, int direction, int RE, bool posix)
1031 {
1032 EMACS_INT np;
1033 EMACS_INT lim;
1034 ptrdiff_t lim_byte;
1035 EMACS_INT n = direction;
1036
1037 if (!NILP (count))
1038 {
1039 CHECK_FIXNUM (count);
1040 n *= XFIXNUM (count);
1041 }
1042
1043 CHECK_STRING (string);
1044 if (NILP (bound))
1045 {
1046 if (n > 0)
1047 lim = ZV, lim_byte = ZV_BYTE;
1048 else
1049 lim = BEGV, lim_byte = BEGV_BYTE;
1050 }
1051 else
1052 {
1053 lim = fix_position (bound);
1054 if (n > 0 ? lim < PT : lim > PT)
1055 error ("Invalid search bound (wrong side of point)");
1056 if (lim > ZV)
1057 lim = ZV, lim_byte = ZV_BYTE;
1058 else if (lim < BEGV)
1059 lim = BEGV, lim_byte = BEGV_BYTE;
1060 else
1061 lim_byte = CHAR_TO_BYTE (lim);
1062 }
1063
1064 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
1065 table. */
1066 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1067 BVAR (current_buffer, case_eqv_table));
1068
1069 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1070 (!NILP (BVAR (current_buffer, case_fold_search))
1071 ? BVAR (current_buffer, case_canon_table)
1072 : Qnil),
1073 (!NILP (BVAR (current_buffer, case_fold_search))
1074 ? BVAR (current_buffer, case_eqv_table)
1075 : Qnil),
1076 posix);
1077 if (np <= 0)
1078 {
1079 if (NILP (noerror))
1080 xsignal1 (Qsearch_failed, string);
1081
1082 if (!EQ (noerror, Qt))
1083 {
1084 eassert (BEGV <= lim && lim <= ZV);
1085 SET_PT_BOTH (lim, lim_byte);
1086 return Qnil;
1087 #if 0 /* This would be clean, but maybe programs depend on
1088 a value of nil here. */
1089 np = lim;
1090 #endif
1091 }
1092 else
1093 return Qnil;
1094 }
1095
1096 eassert (BEGV <= np && np <= ZV);
1097 SET_PT (np);
1098
1099 return make_fixnum (np);
1100 }
1101
1102 /* Return true if REGEXP it matches just one constant string. */
1103
1104 static bool
1105 trivial_regexp_p (Lisp_Object regexp)
1106 {
1107 ptrdiff_t len = SBYTES (regexp);
1108 unsigned char *s = SDATA (regexp);
1109 while (--len >= 0)
1110 {
1111 switch (*s++)
1112 {
1113 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1114 return 0;
1115 case '\\':
1116 if (--len < 0)
1117 return 0;
1118 switch (*s++)
1119 {
1120 case '|': case '(': case ')': case '`': case '\'': case 'b':
1121 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1122 case 'S': case '=': case '{': case '}': case '_':
1123 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1124 case '1': case '2': case '3': case '4': case '5':
1125 case '6': case '7': case '8': case '9':
1126 return 0;
1127 }
1128 }
1129 }
1130 return 1;
1131 }
1132
1133 /* Search for the n'th occurrence of STRING in the current buffer,
1134 starting at position POS and stopping at position LIM,
1135 treating STRING as a literal string if RE is false or as
1136 a regular expression if RE is true.
1137
1138 If N is positive, searching is forward and LIM must be greater than POS.
1139 If N is negative, searching is backward and LIM must be less than POS.
1140
1141 Returns -x if x occurrences remain to be found (x > 0),
1142 or else the position at the beginning of the Nth occurrence
1143 (if searching backward) or the end (if searching forward).
1144
1145 POSIX is nonzero if we want full backtracking (POSIX style)
1146 for this pattern. 0 means backtrack only enough to get a valid match. */
1147
1148 #define TRANSLATE(out, trt, d) \
1149 do \
1150 { \
1151 if (! NILP (trt)) \
1152 { \
1153 Lisp_Object temp; \
1154 temp = Faref (trt, make_fixnum (d)); \
1155 if (FIXNUMP (temp)) \
1156 out = XFIXNUM (temp); \
1157 else \
1158 out = d; \
1159 } \
1160 else \
1161 out = d; \
1162 } \
1163 while (0)
1164
1165 /* Only used in search_buffer, to record the end position of the match
1166 when searching regexps and SEARCH_REGS should not be changed
1167 (i.e. Vinhibit_changing_match_data is non-nil). */
1168 static struct re_registers search_regs_1;
1169
1170 static EMACS_INT
1171 search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1172 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1173 Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1174 {
1175 unsigned char *p1, *p2;
1176 ptrdiff_t s1, s2;
1177
1178 /* Snapshot in case Lisp changes the value. */
1179 bool preserve_match_data = NILP (Vinhibit_changing_match_data);
1180
1181 struct regexp_cache *cache_entry =
1182 compile_pattern (string,
1183 preserve_match_data ? &search_regs : &search_regs_1,
1184 trt, posix,
1185 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1186 struct re_pattern_buffer *bufp = &cache_entry->buf;
1187
1188 maybe_quit (); /* Do a pending quit right away,
1189 to avoid paradoxical behavior */
1190 /* Get pointers and sizes of the two strings
1191 that make up the visible portion of the buffer. */
1192
1193 p1 = BEGV_ADDR;
1194 s1 = GPT_BYTE - BEGV_BYTE;
1195 p2 = GAP_END_ADDR;
1196 s2 = ZV_BYTE - GPT_BYTE;
1197 if (s1 < 0)
1198 {
1199 p2 = p1;
1200 s2 = ZV_BYTE - BEGV_BYTE;
1201 s1 = 0;
1202 }
1203 if (s2 < 0)
1204 {
1205 s1 = ZV_BYTE - BEGV_BYTE;
1206 s2 = 0;
1207 }
1208
1209 specpdl_ref count = SPECPDL_INDEX ();
1210 freeze_buffer_relocation ();
1211 freeze_pattern (cache_entry);
1212
1213 while (n < 0)
1214 {
1215 ptrdiff_t val;
1216
1217 re_match_object = Qnil;
1218 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1219 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1220 preserve_match_data ? &search_regs : &search_regs_1,
1221 /* Don't allow match past current point */
1222 pos_byte - BEGV_BYTE);
1223 if (val == -2)
1224 {
1225 unbind_to (count, Qnil);
1226 matcher_overflow ();
1227 }
1228 if (val >= 0)
1229 {
1230 if (preserve_match_data)
1231 {
1232 pos_byte = search_regs.start[0] + BEGV_BYTE;
1233 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1234 if (search_regs.start[i] >= 0)
1235 {
1236 search_regs.start[i]
1237 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1238 search_regs.end[i]
1239 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1240 }
1241 XSETBUFFER (last_thing_searched, current_buffer);
1242 /* Set pos to the new position. */
1243 pos = search_regs.start[0];
1244 }
1245 else
1246 {
1247 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1248 /* Set pos to the new position. */
1249 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1250 }
1251 }
1252 else
1253 {
1254 unbind_to (count, Qnil);
1255 return (n);
1256 }
1257 n++;
1258 maybe_quit ();
1259 }
1260 while (n > 0)
1261 {
1262 ptrdiff_t val;
1263
1264 re_match_object = Qnil;
1265 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1266 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1267 preserve_match_data ? &search_regs : &search_regs_1,
1268 lim_byte - BEGV_BYTE);
1269 if (val == -2)
1270 {
1271 unbind_to (count, Qnil);
1272 matcher_overflow ();
1273 }
1274 if (val >= 0)
1275 {
1276 if (preserve_match_data)
1277 {
1278 pos_byte = search_regs.end[0] + BEGV_BYTE;
1279 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1280 if (search_regs.start[i] >= 0)
1281 {
1282 search_regs.start[i]
1283 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1284 search_regs.end[i]
1285 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1286 }
1287 XSETBUFFER (last_thing_searched, current_buffer);
1288 pos = search_regs.end[0];
1289 }
1290 else
1291 {
1292 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1293 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1294 }
1295 }
1296 else
1297 {
1298 unbind_to (count, Qnil);
1299 return (0 - n);
1300 }
1301 n--;
1302 maybe_quit ();
1303 }
1304 unbind_to (count, Qnil);
1305 return (pos);
1306 }
1307
1308 static EMACS_INT
1309 search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
1310 ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
1311 EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
1312 bool posix)
1313 {
1314 unsigned char *raw_pattern, *pat;
1315 ptrdiff_t raw_pattern_size;
1316 ptrdiff_t raw_pattern_size_byte;
1317 unsigned char *patbuf;
1318 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1319 unsigned char *base_pat;
1320 /* Set to positive if we find a non-ASCII char that need
1321 translation. Otherwise set to zero later. */
1322 int char_base = -1;
1323 bool boyer_moore_ok = 1;
1324 USE_SAFE_ALLOCA;
1325
1326 /* MULTIBYTE says whether the text to be searched is multibyte.
1327 We must convert PATTERN to match that, or we will not really
1328 find things right. */
1329
1330 if (multibyte == STRING_MULTIBYTE (string))
1331 {
1332 raw_pattern = SDATA (string);
1333 raw_pattern_size = SCHARS (string);
1334 raw_pattern_size_byte = SBYTES (string);
1335 }
1336 else if (multibyte)
1337 {
1338 raw_pattern_size = SCHARS (string);
1339 raw_pattern_size_byte
1340 = count_size_as_multibyte (SDATA (string),
1341 raw_pattern_size);
1342 raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1343 copy_text (SDATA (string), raw_pattern,
1344 SCHARS (string), 0, 1);
1345 }
1346 else
1347 {
1348 /* Converting multibyte to single-byte. */
1349 raw_pattern_size = SCHARS (string);
1350 raw_pattern_size_byte = SCHARS (string);
1351 raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1352 copy_text (SDATA (string), raw_pattern,
1353 SBYTES (string), 1, 0);
1354 }
1355
1356 /* Copy and optionally translate the pattern. */
1357 ptrdiff_t len = raw_pattern_size;
1358 ptrdiff_t len_byte = raw_pattern_size_byte;
1359 SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1360 pat = patbuf;
1361 base_pat = raw_pattern;
1362 if (multibyte)
1363 {
1364 /* Fill patbuf by translated characters in STRING while
1365 checking if we can use boyer-moore search. If TRT is
1366 non-nil, we can use boyer-moore search only if TRT can be
1367 represented by the byte array of 256 elements. For that,
1368 all non-ASCII case-equivalents of all case-sensitive
1369 characters in STRING must belong to the same character
1370 group (two characters belong to the same group iff their
1371 multibyte forms are the same except for the last byte;
1372 i.e. every 64 characters form a group; U+0000..U+003F,
1373 U+0040..U+007F, U+0080..U+00BF, ...). */
1374
1375 while (--len >= 0)
1376 {
1377 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1378 int translated, inverse;
1379 int charlen;
1380
1381 /* If we got here and the RE flag is set, it's because we're
1382 dealing with a regexp known to be trivial, so the backslash
1383 just quotes the next character. */
1384 if (RE && *base_pat == '\\')
1385 {
1386 len--;
1387 raw_pattern_size--;
1388 len_byte--;
1389 base_pat++;
1390 }
1391
1392 int in_charlen, c = string_char_and_length (base_pat, &in_charlen);
1393
1394 if (NILP (trt))
1395 {
1396 str = base_pat;
1397 charlen = in_charlen;
1398 }
1399 else
1400 {
1401 /* Translate the character. */
1402 TRANSLATE (translated, trt, c);
1403 charlen = CHAR_STRING (translated, str_base);
1404 str = str_base;
1405
1406 /* Check if C has any other case-equivalents. */
1407 TRANSLATE (inverse, inverse_trt, c);
1408 /* If so, check if we can use boyer-moore. */
1409 if (c != inverse && boyer_moore_ok)
1410 {
1411 /* Check if all equivalents belong to the same
1412 group of characters. Note that the check of C
1413 itself is done by the last iteration. */
1414 int this_char_base = -1;
1415
1416 while (boyer_moore_ok)
1417 {
1418 if (ASCII_CHAR_P (inverse))
1419 {
1420 if (this_char_base > 0)
1421 boyer_moore_ok = 0;
1422 else
1423 this_char_base = 0;
1424 }
1425 else if (CHAR_BYTE8_P (inverse))
1426 /* Boyer-moore search can't handle a
1427 translation of an eight-bit
1428 character. */
1429 boyer_moore_ok = 0;
1430 else if (this_char_base < 0)
1431 {
1432 this_char_base = inverse & ~0x3F;
1433 if (char_base < 0)
1434 char_base = this_char_base;
1435 else if (this_char_base != char_base)
1436 boyer_moore_ok = 0;
1437 }
1438 else if ((inverse & ~0x3F) != this_char_base)
1439 boyer_moore_ok = 0;
1440 if (c == inverse)
1441 break;
1442 TRANSLATE (inverse, inverse_trt, inverse);
1443 }
1444 }
1445 }
1446
1447 /* Store this character into the translated pattern. */
1448 memcpy (pat, str, charlen);
1449 pat += charlen;
1450 base_pat += in_charlen;
1451 len_byte -= in_charlen;
1452 }
1453
1454 /* If char_base is still negative we didn't find any translated
1455 non-ASCII characters. */
1456 if (char_base < 0)
1457 char_base = 0;
1458 }
1459 else
1460 {
1461 /* Unibyte buffer. */
1462 char_base = 0;
1463 while (--len >= 0)
1464 {
1465 int c, translated, inverse;
1466
1467 /* If we got here and the RE flag is set, it's because we're
1468 dealing with a regexp known to be trivial, so the backslash
1469 just quotes the next character. */
1470 if (RE && *base_pat == '\\')
1471 {
1472 len--;
1473 raw_pattern_size--;
1474 base_pat++;
1475 }
1476 c = *base_pat++;
1477 TRANSLATE (translated, trt, c);
1478 *pat++ = translated;
1479 /* Check that none of C's equivalents violates the
1480 assumptions of boyer_moore. */
1481 TRANSLATE (inverse, inverse_trt, c);
1482 while (1)
1483 {
1484 if (inverse >= 0200)
1485 {
1486 boyer_moore_ok = 0;
1487 break;
1488 }
1489 if (c == inverse)
1490 break;
1491 TRANSLATE (inverse, inverse_trt, inverse);
1492 }
1493 }
1494 }
1495
1496 len_byte = pat - patbuf;
1497 pat = base_pat = patbuf;
1498
1499 EMACS_INT result
1500 = (boyer_moore_ok
1501 ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1502 pos_byte, lim_byte,
1503 char_base)
1504 : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1505 pos, pos_byte, lim, lim_byte));
1506 SAFE_FREE ();
1507 return result;
1508 }
1509
1510 EMACS_INT
1511 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1512 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1513 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1514 {
1515 if (running_asynch_code)
1516 save_search_regs ();
1517
1518 /* Searching 0 times means don't move. */
1519 /* Null string is found at starting position. */
1520 if (n == 0 || SCHARS (string) == 0)
1521 {
1522 set_search_regs (pos_byte, 0);
1523 return pos;
1524 }
1525
1526 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1527 pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
1528 n, trt, inverse_trt, posix);
1529 else
1530 pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
1531 n, RE, trt, inverse_trt, posix);
1532
1533 return pos;
1534 }
1535
1536 /* Do a simple string search N times for the string PAT,
1537 whose length is LEN/LEN_BYTE,
1538 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1539 TRT is the translation table.
1540
1541 Return the character position where the match is found.
1542 Otherwise, if M matches remained to be found, return -M.
1543
1544 This kind of search works regardless of what is in PAT and
1545 regardless of what is in TRT. It is used in cases where
1546 boyer_moore cannot work. */
1547
1548 static EMACS_INT
1549 simple_search (EMACS_INT n, unsigned char *pat,
1550 ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1551 ptrdiff_t pos, ptrdiff_t pos_byte,
1552 ptrdiff_t lim, ptrdiff_t lim_byte)
1553 {
1554 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1555 bool forward = n > 0;
1556 /* Number of buffer bytes matched. Note that this may be different
1557 from len_byte in a multibyte buffer. */
1558 ptrdiff_t match_byte = PTRDIFF_MIN;
1559
1560 if (lim > pos && multibyte)
1561 while (n > 0)
1562 {
1563 while (1)
1564 {
1565 /* Try matching at position POS. */
1566 ptrdiff_t this_pos_byte = pos_byte;
1567 ptrdiff_t this_len = len;
1568 unsigned char *p = pat;
1569 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1570 goto stop;
1571
1572 while (this_len > 0)
1573 {
1574 int charlen, pat_ch = string_char_and_length (p, &charlen);
1575 int buf_charlen, buf_ch
1576 = string_char_and_length (BYTE_POS_ADDR (this_pos_byte),
1577 &buf_charlen);
1578 TRANSLATE (buf_ch, trt, buf_ch);
1579
1580 if (buf_ch != pat_ch)
1581 break;
1582
1583 this_len--;
1584 p += charlen;
1585
1586 this_pos_byte += buf_charlen;
1587 }
1588
1589 if (this_len == 0)
1590 {
1591 match_byte = this_pos_byte - pos_byte;
1592 pos += len;
1593 pos_byte += match_byte;
1594 break;
1595 }
1596
1597 inc_both (&pos, &pos_byte);
1598 }
1599
1600 n--;
1601 }
1602 else if (lim > pos)
1603 while (n > 0)
1604 {
1605 while (1)
1606 {
1607 /* Try matching at position POS. */
1608 ptrdiff_t this_pos = pos;
1609 ptrdiff_t this_len = len;
1610 unsigned char *p = pat;
1611
1612 if (pos + len > lim)
1613 goto stop;
1614
1615 while (this_len > 0)
1616 {
1617 int pat_ch = *p++;
1618 int buf_ch = FETCH_BYTE (this_pos);
1619 TRANSLATE (buf_ch, trt, buf_ch);
1620
1621 if (buf_ch != pat_ch)
1622 break;
1623
1624 this_len--;
1625 this_pos++;
1626 }
1627
1628 if (this_len == 0)
1629 {
1630 match_byte = len;
1631 pos += len;
1632 break;
1633 }
1634
1635 pos++;
1636 }
1637
1638 n--;
1639 }
1640 /* Backwards search. */
1641 else if (lim < pos && multibyte)
1642 while (n < 0)
1643 {
1644 while (1)
1645 {
1646 /* Try matching at position POS. */
1647 ptrdiff_t this_pos = pos;
1648 ptrdiff_t this_pos_byte = pos_byte;
1649 ptrdiff_t this_len = len;
1650 const unsigned char *p = pat + len_byte;
1651
1652 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1653 goto stop;
1654
1655 while (this_len > 0)
1656 {
1657 int pat_ch, buf_ch;
1658
1659 dec_both (&this_pos, &this_pos_byte);
1660 p -= raw_prev_char_len (p);
1661 pat_ch = STRING_CHAR (p);
1662 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1663 TRANSLATE (buf_ch, trt, buf_ch);
1664
1665 if (buf_ch != pat_ch)
1666 break;
1667
1668 this_len--;
1669 }
1670
1671 if (this_len == 0)
1672 {
1673 match_byte = pos_byte - this_pos_byte;
1674 pos = this_pos;
1675 pos_byte = this_pos_byte;
1676 break;
1677 }
1678
1679 dec_both (&pos, &pos_byte);
1680 }
1681
1682 n++;
1683 }
1684 else if (lim < pos)
1685 while (n < 0)
1686 {
1687 while (1)
1688 {
1689 /* Try matching at position POS. */
1690 ptrdiff_t this_pos = pos - len;
1691 ptrdiff_t this_len = len;
1692 unsigned char *p = pat;
1693
1694 if (this_pos < lim)
1695 goto stop;
1696
1697 while (this_len > 0)
1698 {
1699 int pat_ch = *p++;
1700 int buf_ch = FETCH_BYTE (this_pos);
1701 TRANSLATE (buf_ch, trt, buf_ch);
1702
1703 if (buf_ch != pat_ch)
1704 break;
1705 this_len--;
1706 this_pos++;
1707 }
1708
1709 if (this_len == 0)
1710 {
1711 match_byte = len;
1712 pos -= len;
1713 break;
1714 }
1715
1716 pos--;
1717 }
1718
1719 n++;
1720 }
1721
1722 stop:
1723 if (n == 0)
1724 {
1725 eassert (match_byte != PTRDIFF_MIN);
1726 if (forward)
1727 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1728 else
1729 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1730
1731 return pos;
1732 }
1733 else if (n > 0)
1734 return -n;
1735 else
1736 return n;
1737 }
1738
1739 /* Do Boyer-Moore search N times for the string BASE_PAT,
1740 whose length is LEN_BYTE,
1741 from buffer position POS_BYTE until LIM_BYTE.
1742 DIRECTION says which direction we search in.
1743 TRT and INVERSE_TRT are translation tables.
1744 Characters in PAT are already translated by TRT.
1745
1746 This kind of search works if all the characters in BASE_PAT that
1747 have nontrivial translation are the same aside from the last byte.
1748 This makes it possible to translate just the last byte of a
1749 character, and do so after just a simple test of the context.
1750 CHAR_BASE is nonzero if there is such a non-ASCII character.
1751
1752 If that criterion is not satisfied, do not call this function. */
1753
1754 static EMACS_INT
1755 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1756 ptrdiff_t len_byte,
1757 Lisp_Object trt, Lisp_Object inverse_trt,
1758 ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1759 int char_base)
1760 {
1761 int direction = ((n > 0) ? 1 : -1);
1762 register ptrdiff_t dirlen;
1763 ptrdiff_t limit;
1764 int stride_for_teases = 0;
1765 int BM_tab[0400];
1766 register unsigned char *cursor, *p_limit;
1767 register ptrdiff_t i;
1768 register int j;
1769 unsigned char *pat, *pat_end;
1770 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1771
1772 unsigned char simple_translate[0400];
1773 /* These are set to the preceding bytes of a byte to be translated
1774 if char_base is nonzero. As the maximum byte length of a
1775 multibyte character is 5, we have to check at most four previous
1776 bytes. */
1777 int translate_prev_byte1 = 0;
1778 int translate_prev_byte2 = 0;
1779 int translate_prev_byte3 = 0;
1780
1781 /* The general approach is that we are going to maintain that we know
1782 the first (closest to the present position, in whatever direction
1783 we're searching) character that could possibly be the last
1784 (furthest from present position) character of a valid match. We
1785 advance the state of our knowledge by looking at that character
1786 and seeing whether it indeed matches the last character of the
1787 pattern. If it does, we take a closer look. If it does not, we
1788 move our pointer (to putative last characters) as far as is
1789 logically possible. This amount of movement, which I call a
1790 stride, will be the length of the pattern if the actual character
1791 appears nowhere in the pattern, otherwise it will be the distance
1792 from the last occurrence of that character to the end of the
1793 pattern. If the amount is zero we have a possible match. */
1794
1795 /* Here we make a "mickey mouse" BM table. The stride of the search
1796 is determined only by the last character of the putative match.
1797 If that character does not match, we will stride the proper
1798 distance to propose a match that superimposes it on the last
1799 instance of a character that matches it (per trt), or misses
1800 it entirely if there is none. */
1801
1802 dirlen = len_byte * direction;
1803
1804 /* Record position after the end of the pattern. */
1805 pat_end = base_pat + len_byte;
1806 /* BASE_PAT points to a character that we start scanning from.
1807 It is the first character in a forward search,
1808 the last character in a backward search. */
1809 if (direction < 0)
1810 base_pat = pat_end - 1;
1811
1812 /* A character that does not appear in the pattern induces a
1813 stride equal to the pattern length. */
1814 for (i = 0; i < 0400; i++)
1815 BM_tab[i] = dirlen;
1816
1817 /* We use this for translation, instead of TRT itself.
1818 We fill this in to handle the characters that actually
1819 occur in the pattern. Others don't matter anyway! */
1820 for (i = 0; i < 0400; i++)
1821 simple_translate[i] = i;
1822
1823 if (char_base)
1824 {
1825 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1826 byte following them are the target of translation. */
1827 eassume (0x80 <= char_base && char_base <= MAX_CHAR);
1828 unsigned char str[MAX_MULTIBYTE_LENGTH];
1829 int cblen = CHAR_STRING (char_base, str);
1830
1831 translate_prev_byte1 = str[cblen - 2];
1832 if (cblen > 2)
1833 {
1834 translate_prev_byte2 = str[cblen - 3];
1835 if (cblen > 3)
1836 translate_prev_byte3 = str[cblen - 4];
1837 }
1838 }
1839
1840 i = 0;
1841 while (i != dirlen)
1842 {
1843 unsigned char *ptr = base_pat + i;
1844 i += direction;
1845 if (! NILP (trt))
1846 {
1847 /* If the byte currently looking at is the last of a
1848 character to check case-equivalents, set CH to that
1849 character. An ASCII character and a non-ASCII character
1850 matching with CHAR_BASE are to be checked. */
1851 int ch = -1;
1852
1853 if (ASCII_CHAR_P (*ptr) || ! multibyte)
1854 ch = *ptr;
1855 else if (char_base
1856 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1857 {
1858 unsigned char *charstart = ptr - 1;
1859
1860 while (! (CHAR_HEAD_P (*charstart)))
1861 charstart--;
1862 ch = STRING_CHAR (charstart);
1863 if (char_base != (ch & ~0x3F))
1864 ch = -1;
1865 }
1866
1867 if (ch >= 0200 && multibyte)
1868 j = (ch & 0x3F) | 0200;
1869 else
1870 j = *ptr;
1871
1872 if (i == dirlen)
1873 stride_for_teases = BM_tab[j];
1874
1875 BM_tab[j] = dirlen - i;
1876 /* A translation table is accompanied by its inverse -- see
1877 comment following downcase_table for details. */
1878 if (ch >= 0)
1879 {
1880 int starting_ch = ch;
1881 int starting_j = j;
1882
1883 while (1)
1884 {
1885 TRANSLATE (ch, inverse_trt, ch);
1886 if (ch >= 0200 && multibyte)
1887 j = (ch & 0x3F) | 0200;
1888 else
1889 j = ch;
1890
1891 /* For all the characters that map into CH,
1892 set up simple_translate to map the last byte
1893 into STARTING_J. */
1894 simple_translate[j] = starting_j;
1895 if (ch == starting_ch)
1896 break;
1897 BM_tab[j] = dirlen - i;
1898 }
1899 }
1900 }
1901 else
1902 {
1903 j = *ptr;
1904
1905 if (i == dirlen)
1906 stride_for_teases = BM_tab[j];
1907 BM_tab[j] = dirlen - i;
1908 }
1909 /* stride_for_teases tells how much to stride if we get a
1910 match on the far character but are subsequently
1911 disappointed, by recording what the stride would have been
1912 for that character if the last character had been
1913 different. */
1914 }
1915 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1916 /* loop invariant - POS_BYTE points at where last char (first
1917 char if reverse) of pattern would align in a possible match. */
1918 while (n != 0)
1919 {
1920 ptrdiff_t tail_end;
1921 unsigned char *tail_end_ptr;
1922
1923 /* It's been reported that some (broken) compiler thinks that
1924 Boolean expressions in an arithmetic context are unsigned.
1925 Using an explicit ?1:0 prevents this. */
1926 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1927 < 0)
1928 return (n * (0 - direction));
1929 /* First we do the part we can by pointers (maybe nothing) */
1930 maybe_quit ();
1931 pat = base_pat;
1932 limit = pos_byte - dirlen + direction;
1933 if (direction > 0)
1934 {
1935 limit = BUFFER_CEILING_OF (limit);
1936 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1937 can take on without hitting edge of buffer or the gap. */
1938 limit = min (limit, pos_byte + 20000);
1939 limit = min (limit, lim_byte - 1);
1940 }
1941 else
1942 {
1943 limit = BUFFER_FLOOR_OF (limit);
1944 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1945 can take on without hitting edge of buffer or the gap. */
1946 limit = max (limit, pos_byte - 20000);
1947 limit = max (limit, lim_byte);
1948 }
1949 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1950 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1951
1952 if ((limit - pos_byte) * direction > 20)
1953 {
1954 unsigned char *p2;
1955
1956 p_limit = BYTE_POS_ADDR (limit);
1957 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1958 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
1959 while (1) /* use one cursor setting as long as i can */
1960 {
1961 if (direction > 0) /* worth duplicating */
1962 {
1963 while (cursor <= p_limit)
1964 {
1965 if (BM_tab[*cursor] == 0)
1966 goto hit;
1967 cursor += BM_tab[*cursor];
1968 }
1969 }
1970 else
1971 {
1972 while (cursor >= p_limit)
1973 {
1974 if (BM_tab[*cursor] == 0)
1975 goto hit;
1976 cursor += BM_tab[*cursor];
1977 }
1978 }
1979 /* If you are here, cursor is beyond the end of the
1980 searched region. You fail to match within the
1981 permitted region and would otherwise try a character
1982 beyond that region. */
1983 break;
1984
1985 hit:
1986 i = dirlen - direction;
1987 if (! NILP (trt))
1988 {
1989 while ((i -= direction) + direction != 0)
1990 {
1991 int ch;
1992 cursor -= direction;
1993 /* Translate only the last byte of a character. */
1994 if (! multibyte
1995 || ((cursor == tail_end_ptr
1996 || CHAR_HEAD_P (cursor[1]))
1997 && (CHAR_HEAD_P (cursor[0])
1998 /* Check if this is the last byte of
1999 a translatable character. */
2000 || (translate_prev_byte1 == cursor[-1]
2001 && (CHAR_HEAD_P (translate_prev_byte1)
2002 || (translate_prev_byte2 == cursor[-2]
2003 && (CHAR_HEAD_P (translate_prev_byte2)
2004 || (translate_prev_byte3 == cursor[-3]))))))))
2005 ch = simple_translate[*cursor];
2006 else
2007 ch = *cursor;
2008 if (pat[i] != ch)
2009 break;
2010 }
2011 }
2012 else
2013 {
2014 while ((i -= direction) + direction != 0)
2015 {
2016 cursor -= direction;
2017 if (pat[i] != *cursor)
2018 break;
2019 }
2020 }
2021 cursor += dirlen - i - direction; /* fix cursor */
2022 if (i + direction == 0)
2023 {
2024 ptrdiff_t position, start, end;
2025 #ifdef REL_ALLOC
2026 ptrdiff_t cursor_off;
2027 #endif
2028
2029 cursor -= direction;
2030
2031 position = pos_byte + cursor - p2 + ((direction > 0)
2032 ? 1 - len_byte : 0);
2033 #ifdef REL_ALLOC
2034 /* set_search_regs might call malloc, which could
2035 cause ralloc.c relocate buffer text. We need to
2036 update pointers into buffer text due to that. */
2037 cursor_off = cursor - p2;
2038 #endif
2039 set_search_regs (position, len_byte);
2040 #ifdef REL_ALLOC
2041 p_limit = BYTE_POS_ADDR (limit);
2042 p2 = BYTE_POS_ADDR (pos_byte);
2043 cursor = p2 + cursor_off;
2044 #endif
2045
2046 if (NILP (Vinhibit_changing_match_data))
2047 {
2048 start = search_regs.start[0];
2049 end = search_regs.end[0];
2050 }
2051 else
2052 /* If Vinhibit_changing_match_data is non-nil,
2053 search_regs will not be changed. So let's
2054 compute start and end here. */
2055 {
2056 start = BYTE_TO_CHAR (position);
2057 end = BYTE_TO_CHAR (position + len_byte);
2058 }
2059
2060 if ((n -= direction) != 0)
2061 cursor += dirlen; /* to resume search */
2062 else
2063 return direction > 0 ? end : start;
2064 }
2065 else
2066 cursor += stride_for_teases; /* <sigh> we lose - */
2067 }
2068 pos_byte += cursor - p2;
2069 }
2070 else
2071 /* Now we'll pick up a clump that has to be done the hard
2072 way because it covers a discontinuity. */
2073 {
2074 limit = ((direction > 0)
2075 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2076 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2077 limit = ((direction > 0)
2078 ? min (limit + len_byte, lim_byte - 1)
2079 : max (limit - len_byte, lim_byte));
2080 /* LIMIT is now the last value POS_BYTE can have
2081 and still be valid for a possible match. */
2082 while (1)
2083 {
2084 /* This loop can be coded for space rather than
2085 speed because it will usually run only once.
2086 (the reach is at most len + 21, and typically
2087 does not exceed len). */
2088 while ((limit - pos_byte) * direction >= 0)
2089 {
2090 int ch = FETCH_BYTE (pos_byte);
2091 if (BM_tab[ch] == 0)
2092 goto hit2;
2093 pos_byte += BM_tab[ch];
2094 }
2095 break; /* ran off the end */
2096
2097 hit2:
2098 /* Found what might be a match. */
2099 i = dirlen - direction;
2100 while ((i -= direction) + direction != 0)
2101 {
2102 int ch;
2103 unsigned char *ptr;
2104 pos_byte -= direction;
2105 ptr = BYTE_POS_ADDR (pos_byte);
2106 /* Translate only the last byte of a character. */
2107 if (! multibyte
2108 || ((ptr == tail_end_ptr
2109 || CHAR_HEAD_P (ptr[1]))
2110 && (CHAR_HEAD_P (ptr[0])
2111 /* Check if this is the last byte of a
2112 translatable character. */
2113 || (translate_prev_byte1 == ptr[-1]
2114 && (CHAR_HEAD_P (translate_prev_byte1)
2115 || (translate_prev_byte2 == ptr[-2]
2116 && (CHAR_HEAD_P (translate_prev_byte2)
2117 || translate_prev_byte3 == ptr[-3])))))))
2118 ch = simple_translate[*ptr];
2119 else
2120 ch = *ptr;
2121 if (pat[i] != ch)
2122 break;
2123 }
2124 /* Above loop has moved POS_BYTE part or all the way
2125 back to the first pos (last pos if reverse).
2126 Set it once again at the last (first if reverse) char. */
2127 pos_byte += dirlen - i - direction;
2128 if (i + direction == 0)
2129 {
2130 ptrdiff_t position, start, end;
2131 pos_byte -= direction;
2132
2133 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2134 set_search_regs (position, len_byte);
2135
2136 if (NILP (Vinhibit_changing_match_data))
2137 {
2138 start = search_regs.start[0];
2139 end = search_regs.end[0];
2140 }
2141 else
2142 /* If Vinhibit_changing_match_data is non-nil,
2143 search_regs will not be changed. So let's
2144 compute start and end here. */
2145 {
2146 start = BYTE_TO_CHAR (position);
2147 end = BYTE_TO_CHAR (position + len_byte);
2148 }
2149
2150 if ((n -= direction) != 0)
2151 pos_byte += dirlen; /* to resume search */
2152 else
2153 return direction > 0 ? end : start;
2154 }
2155 else
2156 pos_byte += stride_for_teases;
2157 }
2158 }
2159 /* We have done one clump. Can we continue? */
2160 if ((lim_byte - pos_byte) * direction < 0)
2161 return ((0 - n) * direction);
2162 }
2163 return BYTE_TO_CHAR (pos_byte);
2164 }
2165
2166 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2167 for the overall match just found in the current buffer.
2168 Also clear out the match data for registers 1 and up. */
2169
2170 static void
2171 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2172 {
2173 ptrdiff_t i;
2174
2175 if (!NILP (Vinhibit_changing_match_data))
2176 return;
2177
2178 /* Make sure we have registers in which to store
2179 the match position. */
2180 if (search_regs.num_regs == 0)
2181 {
2182 search_regs.start = xmalloc (2 * sizeof *search_regs.start);
2183 search_regs.end = xmalloc (2 * sizeof *search_regs.end);
2184 search_regs.num_regs = 2;
2185 }
2186
2187 /* Clear out the other registers. */
2188 for (i = 1; i < search_regs.num_regs; i++)
2189 {
2190 search_regs.start[i] = -1;
2191 search_regs.end[i] = -1;
2192 }
2193
2194 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2195 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2196 XSETBUFFER (last_thing_searched, current_buffer);
2197 }
2198
2199 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2200 "MSearch backward: ",
2201 doc: /* Search backward from point for STRING.
2202 Set point to the beginning of the occurrence found, and return point.
2203 An optional second argument bounds the search; it is a buffer position.
2204 The match found must not begin before that position. A value of nil
2205 means search to the beginning of the accessible portion of the buffer.
2206 Optional third argument, if t, means if fail just return nil (no error).
2207 If not nil and not t, position at limit of search and return nil.
2208 Optional fourth argument COUNT, if a positive number, means to search
2209 for COUNT successive occurrences. If COUNT is negative, search
2210 forward, instead of backward, for -COUNT occurrences. A value of
2211 nil means the same as 1.
2212 With COUNT positive, the match found is the COUNTth to last one (or
2213 last, if COUNT is 1 or nil) in the buffer located entirely before
2214 the origin of the search; correspondingly with COUNT negative.
2215
2216 Search case-sensitivity is determined by the value of the variable
2217 `case-fold-search', which see.
2218
2219 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2220 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2221 {
2222 return search_command (string, bound, noerror, count, -1, 0, 0);
2223 }
2224
2225 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2226 doc: /* Search forward from point for STRING.
2227 Set point to the end of the occurrence found, and return point.
2228 An optional second argument bounds the search; it is a buffer position.
2229 The match found must not end after that position. A value of nil
2230 means search to the end of the accessible portion of the buffer.
2231 Optional third argument, if t, means if fail just return nil (no error).
2232 If not nil and not t, move to limit of search and return nil.
2233 Optional fourth argument COUNT, if a positive number, means to search
2234 for COUNT successive occurrences. If COUNT is negative, search
2235 backward, instead of forward, for -COUNT occurrences. A value of
2236 nil means the same as 1.
2237 With COUNT positive, the match found is the COUNTth one (or first,
2238 if COUNT is 1 or nil) in the buffer located entirely after the
2239 origin of the search; correspondingly with COUNT negative.
2240
2241 Search case-sensitivity is determined by the value of the variable
2242 `case-fold-search', which see.
2243
2244 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2245 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2246 {
2247 return search_command (string, bound, noerror, count, 1, 0, 0);
2248 }
2249
2250 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2251 "sRE search backward: ",
2252 doc: /* Search backward from point for regular expression REGEXP.
2253 This function is almost identical to `re-search-forward', except that
2254 by default it searches backward instead of forward, and the sign of
2255 COUNT also indicates exactly the opposite searching direction.
2256 See `re-search-forward' for details.
2257
2258 Note that searching backwards may give a shorter match than expected,
2259 because REGEXP is still matched in the forward direction. See Info
2260 anchor `(elisp) re-search-backward' for details. */)
2261 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2262 {
2263 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2264 }
2265
2266 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2267 "sRE search: ",
2268 doc: /* Search forward from point for regular expression REGEXP.
2269 Set point to the end of the occurrence found, and return point.
2270 The optional second argument BOUND is a buffer position that bounds
2271 the search. The match found must not end after that position. A
2272 value of nil means search to the end of the accessible portion of
2273 the buffer.
2274 The optional third argument NOERROR indicates how errors are handled
2275 when the search fails. If it is nil or omitted, emit an error; if
2276 it is t, simply return nil and do nothing; if it is neither nil nor
2277 t, move to the limit of search and return nil.
2278 The optional fourth argument COUNT is a number that indicates the
2279 search direction and the number of occurrences to search for. If it
2280 is positive, search forward for COUNT successive occurrences; if it
2281 is negative, search backward, instead of forward, for -COUNT
2282 occurrences. A value of nil means the same as 1.
2283 With COUNT positive/negative, the match found is the COUNTth/-COUNTth
2284 one in the buffer located entirely after/before the origin of the
2285 search.
2286
2287 Search case-sensitivity is determined by the value of the variable
2288 `case-fold-search', which see.
2289
2290 See also the functions `match-beginning', `match-end', `match-string',
2291 and `replace-match'. */)
2292 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2293 {
2294 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2295 }
2296
2297 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2298 "sPosix search backward: ",
2299 doc: /* Search backward from point for match for REGEXP according to Posix rules.
2300 Find the longest match in accord with Posix regular expression rules.
2301 Set point to the beginning of the occurrence found, and return point.
2302 An optional second argument bounds the search; it is a buffer position.
2303 The match found must not begin before that position. A value of nil
2304 means search to the beginning of the accessible portion of the buffer.
2305 Optional third argument, if t, means if fail just return nil (no error).
2306 If not nil and not t, position at limit of search and return nil.
2307 Optional fourth argument COUNT, if a positive number, means to search
2308 for COUNT successive occurrences. If COUNT is negative, search
2309 forward, instead of backward, for -COUNT occurrences. A value of
2310 nil means the same as 1.
2311 With COUNT positive, the match found is the COUNTth to last one (or
2312 last, if COUNT is 1 or nil) in the buffer located entirely before
2313 the origin of the search; correspondingly with COUNT negative.
2314
2315 Search case-sensitivity is determined by the value of the variable
2316 `case-fold-search', which see.
2317
2318 See also the functions `match-beginning', `match-end', `match-string',
2319 and `replace-match'. */)
2320 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2321 {
2322 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2323 }
2324
2325 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2326 "sPosix search: ",
2327 doc: /* Search forward from point for REGEXP according to Posix rules.
2328 Find the longest match in accord with Posix regular expression rules.
2329 Set point to the end of the occurrence found, and return point.
2330 An optional second argument bounds the search; it is a buffer position.
2331 The match found must not end after that position. A value of nil
2332 means search to the end of the accessible portion of the buffer.
2333 Optional third argument, if t, means if fail just return nil (no error).
2334 If not nil and not t, move to limit of search and return nil.
2335 Optional fourth argument COUNT, if a positive number, means to search
2336 for COUNT successive occurrences. If COUNT is negative, search
2337 backward, instead of forward, for -COUNT occurrences. A value of
2338 nil means the same as 1.
2339 With COUNT positive, the match found is the COUNTth one (or first,
2340 if COUNT is 1 or nil) in the buffer located entirely after the
2341 origin of the search; correspondingly with COUNT negative.
2342
2343 Search case-sensitivity is determined by the value of the variable
2344 `case-fold-search', which see.
2345
2346 See also the functions `match-beginning', `match-end', `match-string',
2347 and `replace-match'. */)
2348 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2349 {
2350 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2351 }
2352
2353 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2354 doc: /* Replace text matched by last search with NEWTEXT.
2355 Leave point at the end of the replacement text.
2356
2357 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2358 the replacement text. Otherwise, maybe capitalize the whole text, or
2359 maybe just word initials, based on the replaced text. If the replaced
2360 text has only capital letters and has at least one multiletter word,
2361 convert NEWTEXT to all caps. Otherwise if all words are capitalized
2362 in the replaced text, capitalize each word in NEWTEXT. Note that
2363 what exactly is a word is determined by the syntax tables in effect
2364 in the current buffer.
2365
2366 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2367 Otherwise treat `\\' as special:
2368 `\\&' in NEWTEXT means substitute original matched text.
2369 `\\N' means substitute what matched the Nth `\\(...\\)'.
2370 If Nth parens didn't match, substitute nothing.
2371 `\\\\' means insert one `\\'.
2372 `\\?' is treated literally
2373 (for compatibility with `query-replace-regexp').
2374 Any other character following `\\' signals an error.
2375 Case conversion does not apply to these substitutions.
2376
2377 If optional fourth argument STRING is non-nil, it should be a string
2378 to act on; this should be the string on which the previous match was
2379 done via `string-match'. In this case, `replace-match' creates and
2380 returns a new string, made by copying STRING and replacing the part of
2381 STRING that was matched (the original STRING itself is not altered).
2382
2383 The optional fifth argument SUBEXP specifies a subexpression;
2384 it says to replace just that subexpression with NEWTEXT,
2385 rather than replacing the entire matched text.
2386 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2387 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2388 NEWTEXT in place of subexp N.
2389 This is useful only after a regular expression search or match,
2390 since only regular expressions have distinguished subexpressions. */)
2391 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2392 {
2393 enum { nochange, all_caps, cap_initial } case_action;
2394 ptrdiff_t pos, pos_byte;
2395 bool some_multiletter_word;
2396 bool some_lowercase;
2397 bool some_uppercase;
2398 bool some_nonuppercase_initial;
2399 int c, prevc;
2400 ptrdiff_t sub;
2401 ptrdiff_t opoint, newpoint;
2402
2403 CHECK_STRING (newtext);
2404
2405 if (! NILP (string))
2406 CHECK_STRING (string);
2407
2408 /* Most replacement texts don't contain any backslash directives in
2409 the replacements. Check whether that's the case, which will
2410 enable us to take the fast path later. */
2411 if (NILP (literal)
2412 && !memchr (SSDATA (newtext), '\\', SBYTES (newtext)))
2413 literal = Qt;
2414
2415 case_action = nochange; /* We tried an initialization */
2416 /* but some C compilers blew it */
2417
2418 ptrdiff_t num_regs = search_regs.num_regs;
2419 if (num_regs <= 0)
2420 error ("`replace-match' called before any match found");
2421
2422 sub = !NILP (subexp) ? check_integer_range (subexp, 0, num_regs - 1) : 0;
2423 ptrdiff_t sub_start = search_regs.start[sub];
2424 ptrdiff_t sub_end = search_regs.end[sub];
2425 eassert (sub_start <= sub_end);
2426
2427 /* Check whether the text to replace is present in the buffer/string. */
2428 if (! (NILP (string)
2429 ? BEGV <= sub_start && sub_end <= ZV
2430 : 0 <= sub_start && sub_end <= SCHARS (string)))
2431 {
2432 if (sub_start < 0)
2433 xsignal2 (Qerror,
2434 build_string ("replace-match subexpression does not exist"),
2435 subexp);
2436 args_out_of_range (make_fixnum (sub_start), make_fixnum (sub_end));
2437 }
2438
2439 if (NILP (fixedcase))
2440 {
2441 /* Decide how to casify by examining the matched text. */
2442 ptrdiff_t last;
2443
2444 pos = sub_start;
2445 last = sub_end;
2446
2447 if (NILP (string))
2448 pos_byte = CHAR_TO_BYTE (pos);
2449 else
2450 pos_byte = string_char_to_byte (string, pos);
2451
2452 prevc = '\n';
2453 case_action = all_caps;
2454
2455 /* some_multiletter_word is set nonzero if any original word
2456 is more than one letter long. */
2457 some_multiletter_word = 0;
2458 some_lowercase = 0;
2459 some_nonuppercase_initial = 0;
2460 some_uppercase = 0;
2461
2462 while (pos < last)
2463 {
2464 if (NILP (string))
2465 {
2466 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2467 inc_both (&pos, &pos_byte);
2468 }
2469 else
2470 c = fetch_string_char_as_multibyte_advance (string,
2471 &pos, &pos_byte);
2472
2473 if (lowercasep (c))
2474 {
2475 /* Cannot be all caps if any original char is lower case */
2476
2477 some_lowercase = 1;
2478 if (SYNTAX (prevc) != Sword)
2479 some_nonuppercase_initial = 1;
2480 else
2481 some_multiletter_word = 1;
2482 }
2483 else if (uppercasep (c))
2484 {
2485 some_uppercase = 1;
2486 if (SYNTAX (prevc) != Sword)
2487 ;
2488 else
2489 some_multiletter_word = 1;
2490 }
2491 else
2492 {
2493 /* If the initial is a caseless word constituent,
2494 treat that like a lowercase initial. */
2495 if (SYNTAX (prevc) != Sword)
2496 some_nonuppercase_initial = 1;
2497 }
2498
2499 prevc = c;
2500 }
2501
2502 /* Convert to all caps if the old text is all caps
2503 and has at least one multiletter word. */
2504 if (! some_lowercase && some_multiletter_word)
2505 case_action = all_caps;
2506 /* Capitalize each word, if the old text has all capitalized words. */
2507 else if (!some_nonuppercase_initial && some_multiletter_word)
2508 case_action = cap_initial;
2509 else if (!some_nonuppercase_initial && some_uppercase)
2510 /* Should x -> yz, operating on X, give Yz or YZ?
2511 We'll assume the latter. */
2512 case_action = all_caps;
2513 else
2514 case_action = nochange;
2515 }
2516
2517 /* Do replacement in a string. */
2518 if (!NILP (string))
2519 {
2520 Lisp_Object before, after;
2521
2522 before = Fsubstring (string, make_fixnum (0), make_fixnum (sub_start));
2523 after = Fsubstring (string, make_fixnum (sub_end), Qnil);
2524
2525 /* Substitute parts of the match into NEWTEXT
2526 if desired. */
2527 if (NILP (literal))
2528 {
2529 ptrdiff_t lastpos = 0;
2530 ptrdiff_t lastpos_byte = 0;
2531 /* We build up the substituted string in ACCUM. */
2532 Lisp_Object accum;
2533 Lisp_Object middle;
2534 ptrdiff_t length = SBYTES (newtext);
2535
2536 accum = Qnil;
2537
2538 for (pos_byte = 0, pos = 0; pos_byte < length;)
2539 {
2540 ptrdiff_t substart = -1;
2541 ptrdiff_t subend = 0;
2542 bool delbackslash = 0;
2543
2544 c = fetch_string_char_advance (newtext, &pos, &pos_byte);
2545
2546 if (c == '\\')
2547 {
2548 c = fetch_string_char_advance (newtext, &pos, &pos_byte);
2549
2550 if (c == '&')
2551 {
2552 substart = sub_start;
2553 subend = sub_end;
2554 }
2555 else if (c >= '1' && c <= '9')
2556 {
2557 if (c - '0' < num_regs
2558 && search_regs.start[c - '0'] >= 0)
2559 {
2560 substart = search_regs.start[c - '0'];
2561 subend = search_regs.end[c - '0'];
2562 }
2563 else
2564 {
2565 /* If that subexp did not match,
2566 replace \\N with nothing. */
2567 substart = 0;
2568 subend = 0;
2569 }
2570 }
2571 else if (c == '\\')
2572 delbackslash = 1;
2573 else if (c != '?')
2574 error ("Invalid use of `\\' in replacement text");
2575 }
2576 if (substart >= 0)
2577 {
2578 if (pos - 2 != lastpos)
2579 middle = substring_both (newtext, lastpos,
2580 lastpos_byte,
2581 pos - 2, pos_byte - 2);
2582 else
2583 middle = Qnil;
2584 accum = concat3 (accum, middle,
2585 Fsubstring (string,
2586 make_fixnum (substart),
2587 make_fixnum (subend)));
2588 lastpos = pos;
2589 lastpos_byte = pos_byte;
2590 }
2591 else if (delbackslash)
2592 {
2593 middle = substring_both (newtext, lastpos,
2594 lastpos_byte,
2595 pos - 1, pos_byte - 1);
2596
2597 accum = concat2 (accum, middle);
2598 lastpos = pos;
2599 lastpos_byte = pos_byte;
2600 }
2601 }
2602
2603 if (pos != lastpos)
2604 middle = substring_both (newtext, lastpos,
2605 lastpos_byte,
2606 pos, pos_byte);
2607 else
2608 middle = Qnil;
2609
2610 newtext = concat2 (accum, middle);
2611 }
2612
2613 /* Do case substitution in NEWTEXT if desired. */
2614 if (case_action == all_caps)
2615 newtext = Fupcase (newtext);
2616 else if (case_action == cap_initial)
2617 newtext = Fupcase_initials (newtext);
2618
2619 return concat3 (before, newtext, after);
2620 }
2621
2622 /* Record point. A nonpositive OPOINT is actually an offset from ZV. */
2623 opoint = PT <= sub_start ? PT : max (PT, sub_end) - ZV;
2624
2625 /* If we want non-literal replacement,
2626 perform substitution on the replacement string. */
2627 if (NILP (literal))
2628 {
2629 ptrdiff_t length = SBYTES (newtext);
2630 unsigned char *substed;
2631 ptrdiff_t substed_alloc_size, substed_len;
2632 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2633 bool str_multibyte = STRING_MULTIBYTE (newtext);
2634 bool really_changed = 0;
2635
2636 substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2637 ? length * 2 + 100
2638 : STRING_BYTES_BOUND);
2639 substed = xmalloc (substed_alloc_size);
2640 substed_len = 0;
2641
2642 /* Go thru NEWTEXT, producing the actual text to insert in
2643 SUBSTED while adjusting multibyteness to that of the current
2644 buffer. */
2645
2646 for (pos_byte = 0, pos = 0; pos_byte < length;)
2647 {
2648 unsigned char str[MAX_MULTIBYTE_LENGTH];
2649 const unsigned char *add_stuff = NULL;
2650 ptrdiff_t add_len = 0;
2651 ptrdiff_t idx = -1;
2652 ptrdiff_t begbyte UNINIT;
2653
2654 if (str_multibyte)
2655 {
2656 c = fetch_string_char_advance_no_check (newtext,
2657 &pos, &pos_byte);
2658 if (!buf_multibyte)
2659 c = CHAR_TO_BYTE8 (c);
2660 }
2661 else
2662 {
2663 /* Note that we don't have to increment POS. */
2664 c = SREF (newtext, pos_byte++);
2665 if (buf_multibyte)
2666 c = make_char_multibyte (c);
2667 }
2668
2669 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2670 or set IDX to a match index, which means put that part
2671 of the buffer text into SUBSTED. */
2672
2673 if (c == '\\')
2674 {
2675 really_changed = 1;
2676
2677 if (str_multibyte)
2678 {
2679 c = fetch_string_char_advance_no_check (newtext,
2680 &pos, &pos_byte);
2681 if (!buf_multibyte && !ASCII_CHAR_P (c))
2682 c = CHAR_TO_BYTE8 (c);
2683 }
2684 else
2685 {
2686 c = SREF (newtext, pos_byte++);
2687 if (buf_multibyte)
2688 c = make_char_multibyte (c);
2689 }
2690
2691 if (c == '&')
2692 idx = sub;
2693 else if ('1' <= c && c <= '9' && c - '0' < num_regs)
2694 {
2695 if (search_regs.start[c - '0'] >= 1)
2696 idx = c - '0';
2697 }
2698 else if (c == '\\')
2699 add_len = 1, add_stuff = (unsigned char *) "\\";
2700 else
2701 {
2702 xfree (substed);
2703 error ("Invalid use of `\\' in replacement text");
2704 }
2705 }
2706 else
2707 {
2708 add_len = CHAR_STRING (c, str);
2709 add_stuff = str;
2710 }
2711
2712 /* If we want to copy part of a previous match,
2713 set up ADD_STUFF and ADD_LEN to point to it. */
2714 if (idx >= 0)
2715 {
2716 begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2717 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2718 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2719 move_gap_both (search_regs.start[idx], begbyte);
2720 }
2721
2722 /* Now the stuff we want to add to SUBSTED
2723 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2724
2725 /* Make sure SUBSTED is big enough. */
2726 if (substed_alloc_size - substed_len < add_len)
2727 substed =
2728 xpalloc (substed, &substed_alloc_size,
2729 add_len - (substed_alloc_size - substed_len),
2730 STRING_BYTES_BOUND, 1);
2731
2732 /* We compute this after the call to xpalloc, because that
2733 could cause buffer text be relocated when ralloc.c is used. */
2734 if (idx >= 0)
2735 add_stuff = BYTE_POS_ADDR (begbyte);
2736
2737 /* Now add to the end of SUBSTED. */
2738 if (add_stuff)
2739 {
2740 memcpy (substed + substed_len, add_stuff, add_len);
2741 substed_len += add_len;
2742 }
2743 }
2744
2745 if (really_changed)
2746 newtext = make_specified_string ((const char *) substed, -1,
2747 substed_len, buf_multibyte);
2748 xfree (substed);
2749 }
2750
2751 newpoint = sub_start + SCHARS (newtext);
2752
2753 /* Replace the old text with the new in the cleanest possible way. */
2754 replace_range (sub_start, sub_end, newtext, 1, 0, 1, true, true);
2755
2756 if (case_action == all_caps)
2757 Fupcase_region (make_fixnum (search_regs.start[sub]),
2758 make_fixnum (newpoint),
2759 Qnil);
2760 else if (case_action == cap_initial)
2761 Fupcase_initials_region (make_fixnum (search_regs.start[sub]),
2762 make_fixnum (newpoint), Qnil);
2763
2764 /* The replace_range etc. functions can trigger modification hooks
2765 (see signal_before_change and signal_after_change). Try to error
2766 out if these hooks clobber the match data since clobbering can
2767 result in confusing bugs. We used to check for changes in
2768 search_regs start and end, but that fails if modification hooks
2769 remove or add text earlier in the buffer, so just check num_regs
2770 now. */
2771 if (search_regs.num_regs != num_regs)
2772 error ("Match data clobbered by buffer modification hooks");
2773
2774 /* Put point back where it was in the text, if possible. */
2775 TEMP_SET_PT (clip_to_bounds (BEGV, opoint + (opoint <= 0 ? ZV : 0), ZV));
2776 /* Now move point "officially" to the end of the inserted replacement. */
2777 move_if_not_intangible (newpoint);
2778
2779 signal_after_change (sub_start, sub_end - sub_start, SCHARS (newtext));
2780 update_compositions (sub_start, newpoint, CHECK_BORDER);
2781
2782 return Qnil;
2783 }
2784
2785 static Lisp_Object
2786 match_limit (Lisp_Object num, bool beginningp)
2787 {
2788 EMACS_INT n;
2789
2790 CHECK_FIXNUM (num);
2791 n = XFIXNUM (num);
2792 if (n < 0)
2793 args_out_of_range (num, make_fixnum (0));
2794 if (search_regs.num_regs <= 0)
2795 error ("No match data, because no search succeeded");
2796 if (n >= search_regs.num_regs
2797 || search_regs.start[n] < 0)
2798 return Qnil;
2799 return (make_fixnum ((beginningp) ? search_regs.start[n]
2800 : search_regs.end[n]));
2801 }
2802
2803 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2804 doc: /* Return position of start of text matched by last search.
2805 SUBEXP, a number, specifies which parenthesized expression in the last
2806 regexp.
2807 Value is nil if SUBEXPth pair didn't match, or there were less than
2808 SUBEXP pairs.
2809 Zero means the entire text matched by the whole regexp or whole string.
2810
2811 Return value is undefined if the last search failed. */)
2812 (Lisp_Object subexp)
2813 {
2814 return match_limit (subexp, 1);
2815 }
2816
2817 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2818 doc: /* Return position of end of text matched by last search.
2819 SUBEXP, a number, specifies which parenthesized expression in the last
2820 regexp.
2821 Value is nil if SUBEXPth pair didn't match, or there were less than
2822 SUBEXP pairs.
2823 Zero means the entire text matched by the whole regexp or whole string.
2824
2825 Return value is undefined if the last search failed. */)
2826 (Lisp_Object subexp)
2827 {
2828 return match_limit (subexp, 0);
2829 }
2830
2831 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2832 doc: /* Return a list of positions that record text matched by the last search.
2833 Element 2N of the returned list is the position of the beginning of the
2834 match of the Nth subexpression; it corresponds to `(match-beginning N)';
2835 element 2N + 1 is the position of the end of the match of the Nth
2836 subexpression; it corresponds to `(match-end N)'. See `match-beginning'
2837 and `match-end'.
2838 If the last search was on a buffer, all the elements are by default
2839 markers or nil (nil when the Nth pair didn't match); they are integers
2840 or nil if the search was on a string. But if the optional argument
2841 INTEGERS is non-nil, the elements that represent buffer positions are
2842 always integers, not markers, and (if the search was on a buffer) the
2843 buffer itself is appended to the list as one additional element.
2844
2845 Use `set-match-data' to reinstate the match data from the elements of
2846 this list.
2847
2848 Note that non-matching optional groups at the end of the regexp are
2849 elided instead of being represented with two `nil's each. For instance:
2850
2851 (progn
2852 (string-match "^\\(a\\)?\\(b\\)\\(c\\)?$" "b")
2853 (match-data))
2854 => (0 1 nil nil 0 1)
2855
2856 If REUSE is a list, store the value in REUSE by destructively modifying it.
2857 If REUSE is long enough to hold all the values, its length remains the
2858 same, and any unused elements are set to nil. If REUSE is not long
2859 enough, it is extended. Note that if REUSE is long enough and INTEGERS
2860 is non-nil, no consing is done to make the return value; this minimizes GC.
2861
2862 If optional third argument RESEAT is non-nil, any previous markers on the
2863 REUSE list will be modified to point to nowhere.
2864
2865 Return value is undefined if the last search failed. */)
2866 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2867 {
2868 Lisp_Object tail, prev;
2869 Lisp_Object *data;
2870 ptrdiff_t i, len;
2871
2872 if (!NILP (reseat))
2873 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2874 if (MARKERP (XCAR (tail)))
2875 {
2876 unchain_marker (XMARKER (XCAR (tail)));
2877 XSETCAR (tail, Qnil);
2878 }
2879
2880 if (NILP (last_thing_searched))
2881 return Qnil;
2882
2883 prev = Qnil;
2884
2885 USE_SAFE_ALLOCA;
2886 SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
2887
2888 len = 0;
2889 for (i = 0; i < search_regs.num_regs; i++)
2890 {
2891 ptrdiff_t start = search_regs.start[i];
2892 if (start >= 0)
2893 {
2894 if (EQ (last_thing_searched, Qt)
2895 || ! NILP (integers))
2896 {
2897 XSETFASTINT (data[2 * i], start);
2898 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2899 }
2900 else if (BUFFERP (last_thing_searched))
2901 {
2902 data[2 * i] = Fmake_marker ();
2903 Fset_marker (data[2 * i],
2904 make_fixnum (start),
2905 last_thing_searched);
2906 data[2 * i + 1] = Fmake_marker ();
2907 Fset_marker (data[2 * i + 1],
2908 make_fixnum (search_regs.end[i]),
2909 last_thing_searched);
2910 }
2911 else
2912 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2913 emacs_abort ();
2914
2915 len = 2 * i + 2;
2916 }
2917 else
2918 data[2 * i] = data[2 * i + 1] = Qnil;
2919 }
2920
2921 if (BUFFERP (last_thing_searched) && !NILP (integers))
2922 {
2923 data[len] = last_thing_searched;
2924 len++;
2925 }
2926
2927 /* If REUSE is not usable, cons up the values and return them. */
2928 if (! CONSP (reuse))
2929 reuse = Flist (len, data);
2930 else
2931 {
2932 /* If REUSE is a list, store as many value elements as will fit
2933 into the elements of REUSE. */
2934 for (i = 0, tail = reuse; CONSP (tail);
2935 i++, tail = XCDR (tail))
2936 {
2937 if (i < len)
2938 XSETCAR (tail, data[i]);
2939 else
2940 XSETCAR (tail, Qnil);
2941 prev = tail;
2942 }
2943
2944 /* If we couldn't fit all value elements into REUSE,
2945 cons up the rest of them and add them to the end of REUSE. */
2946 if (i < len)
2947 XSETCDR (prev, Flist (len - i, data + i));
2948 }
2949
2950 SAFE_FREE ();
2951 return reuse;
2952 }
2953
2954 /* We used to have an internal use variant of `reseat' described as:
2955
2956 If RESEAT is `evaporate', put the markers back on the free list
2957 immediately. No other references to the markers must exist in this
2958 case, so it is used only internally on the unwind stack and
2959 save-match-data from Lisp.
2960
2961 But it was ill-conceived: those supposedly-internal markers get exposed via
2962 the undo-list, so freeing them here is unsafe. */
2963
2964 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2965 doc: /* Set internal data on last search match from elements of LIST.
2966 LIST should have been created by calling `match-data' previously.
2967
2968 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2969 (register Lisp_Object list, Lisp_Object reseat)
2970 {
2971 ptrdiff_t i;
2972 register Lisp_Object marker;
2973
2974 if (running_asynch_code)
2975 save_search_regs ();
2976
2977 CHECK_LIST (list);
2978
2979 /* Unless we find a marker with a buffer or an explicit buffer
2980 in LIST, assume that this match data came from a string. */
2981 last_thing_searched = Qt;
2982
2983 /* Allocate registers if they don't already exist. */
2984 {
2985 ptrdiff_t length = list_length (list) / 2;
2986
2987 if (length > search_regs.num_regs)
2988 {
2989 ptrdiff_t num_regs = search_regs.num_regs;
2990 search_regs.start =
2991 xpalloc (search_regs.start, &num_regs, length - num_regs,
2992 min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start);
2993 search_regs.end =
2994 xrealloc (search_regs.end, num_regs * sizeof *search_regs.end);
2995
2996 for (i = search_regs.num_regs; i < num_regs; i++)
2997 search_regs.start[i] = -1;
2998
2999 search_regs.num_regs = num_regs;
3000 }
3001
3002 for (i = 0; CONSP (list); i++)
3003 {
3004 marker = XCAR (list);
3005 if (BUFFERP (marker))
3006 {
3007 last_thing_searched = marker;
3008 break;
3009 }
3010 if (i >= length)
3011 break;
3012 if (NILP (marker))
3013 {
3014 search_regs.start[i] = -1;
3015 list = XCDR (list);
3016 }
3017 else
3018 {
3019 Lisp_Object from;
3020 Lisp_Object m;
3021
3022 m = marker;
3023 if (MARKERP (marker))
3024 {
3025 if (XMARKER (marker)->buffer == 0)
3026 XSETFASTINT (marker, 0);
3027 else
3028 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3029 }
3030
3031 CHECK_FIXNUM_COERCE_MARKER (marker);
3032 from = marker;
3033
3034 if (!NILP (reseat) && MARKERP (m))
3035 {
3036 unchain_marker (XMARKER (m));
3037 XSETCAR (list, Qnil);
3038 }
3039
3040 if ((list = XCDR (list), !CONSP (list)))
3041 break;
3042
3043 m = marker = XCAR (list);
3044
3045 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3046 XSETFASTINT (marker, 0);
3047
3048 CHECK_FIXNUM_COERCE_MARKER (marker);
3049 if (PTRDIFF_MIN <= XFIXNUM (from) && XFIXNUM (from) <= PTRDIFF_MAX
3050 && PTRDIFF_MIN <= XFIXNUM (marker)
3051 && XFIXNUM (marker) <= PTRDIFF_MAX)
3052 {
3053 search_regs.start[i] = XFIXNUM (from);
3054 search_regs.end[i] = XFIXNUM (marker);
3055 }
3056 else
3057 {
3058 search_regs.start[i] = -1;
3059 }
3060
3061 if (!NILP (reseat) && MARKERP (m))
3062 {
3063 unchain_marker (XMARKER (m));
3064 XSETCAR (list, Qnil);
3065 }
3066 }
3067 list = XCDR (list);
3068 }
3069
3070 for (; i < search_regs.num_regs; i++)
3071 search_regs.start[i] = -1;
3072 }
3073
3074 return Qnil;
3075 }
3076
3077 DEFUN ("match-data--translate", Fmatch_data__translate, Smatch_data__translate,
3078 1, 1, 0,
3079 doc: /* Add N to all positions in the match data. Internal. */)
3080 (Lisp_Object n)
3081 {
3082 CHECK_FIXNUM (n);
3083 EMACS_INT delta = XFIXNUM (n);
3084 if (!NILP (last_thing_searched))
3085 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
3086 if (search_regs.start[i] >= 0)
3087 {
3088 search_regs.start[i] = max (0, search_regs.start[i] + delta);
3089 search_regs.end[i] = max (0, search_regs.end[i] + delta);
3090 }
3091 return Qnil;
3092 }
3093
3094 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3095 if asynchronous code (filter or sentinel) is running. */
3096 static void
3097 save_search_regs (void)
3098 {
3099 if (saved_search_regs.num_regs == 0)
3100 {
3101 saved_search_regs = search_regs;
3102 saved_last_thing_searched = last_thing_searched;
3103 last_thing_searched = Qnil;
3104 search_regs.num_regs = 0;
3105 search_regs.start = 0;
3106 search_regs.end = 0;
3107 }
3108 }
3109
3110 /* Called upon exit from filters and sentinels. */
3111 void
3112 restore_search_regs (void)
3113 {
3114 if (saved_search_regs.num_regs != 0)
3115 {
3116 if (search_regs.num_regs > 0)
3117 {
3118 xfree (search_regs.start);
3119 xfree (search_regs.end);
3120 }
3121 search_regs = saved_search_regs;
3122 last_thing_searched = saved_last_thing_searched;
3123 saved_last_thing_searched = Qnil;
3124 saved_search_regs.num_regs = 0;
3125 }
3126 }
3127
3128 /* Called from replace-match via replace_range. */
3129 void
3130 update_search_regs (ptrdiff_t oldstart, ptrdiff_t oldend, ptrdiff_t newend)
3131 {
3132 /* Adjust search data for this change. */
3133 ptrdiff_t change = newend - oldend;
3134 ptrdiff_t i;
3135
3136 for (i = 0; i < search_regs.num_regs; i++)
3137 {
3138 if (search_regs.start[i] >= oldend)
3139 search_regs.start[i] += change;
3140 else if (search_regs.start[i] > oldstart)
3141 search_regs.start[i] = oldstart;
3142 if (search_regs.end[i] >= oldend)
3143 search_regs.end[i] += change;
3144 else if (search_regs.end[i] > oldstart)
3145 search_regs.end[i] = oldstart;
3146 }
3147 }
3148
3149 static void
3150 unwind_set_match_data (Lisp_Object list)
3151 {
3152 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3153 Fset_match_data (list, Qt);
3154 }
3155
3156 /* Called to unwind protect the match data. */
3157 void
3158 record_unwind_save_match_data (void)
3159 {
3160 record_unwind_protect (unwind_set_match_data,
3161 Fmatch_data (Qnil, Qnil, Qnil));
3162 }
3163
3164 /* Quote a string to deactivate reg-expr chars */
3165
3166 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3167 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3168 (Lisp_Object string)
3169 {
3170 char *in, *out, *end;
3171 char *temp;
3172 ptrdiff_t backslashes_added = 0;
3173
3174 CHECK_STRING (string);
3175
3176 USE_SAFE_ALLOCA;
3177 SAFE_NALLOCA (temp, 2, SBYTES (string));
3178
3179 /* Now copy the data into the new string, inserting escapes. */
3180
3181 in = SSDATA (string);
3182 end = in + SBYTES (string);
3183 out = temp;
3184
3185 for (; in != end; in++)
3186 {
3187 if (*in == '['
3188 || *in == '*' || *in == '.' || *in == '\\'
3189 || *in == '?' || *in == '+'
3190 || *in == '^' || *in == '$')
3191 *out++ = '\\', backslashes_added++;
3192 *out++ = *in;
3193 }
3194
3195 Lisp_Object result
3196 = (backslashes_added > 0
3197 ? make_specified_string (temp,
3198 SCHARS (string) + backslashes_added,
3199 out - temp,
3200 STRING_MULTIBYTE (string))
3201 : string);
3202 SAFE_FREE ();
3203 return result;
3204 }
3205
3206 /* Like find_newline, but doesn't use the cache, and only searches forward. */
3207 ptrdiff_t
3208 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3209 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
3210 ptrdiff_t *bytepos, bool allow_quit)
3211 {
3212 if (count > 0)
3213 {
3214 if (!end)
3215 end = ZV, end_byte = ZV_BYTE;
3216 }
3217 else
3218 {
3219 if (!end)
3220 end = BEGV, end_byte = BEGV_BYTE;
3221 }
3222 if (end_byte == -1)
3223 end_byte = CHAR_TO_BYTE (end);
3224
3225 if (counted)
3226 *counted = count;
3227
3228 if (count > 0)
3229 while (start != end)
3230 {
3231 /* Our innermost scanning loop is very simple; it doesn't know
3232 about gaps, buffer ends, or the newline cache. ceiling is
3233 the position of the last character before the next such
3234 obstacle --- the last character the dumb search loop should
3235 examine. */
3236 ptrdiff_t tem, ceiling_byte = end_byte - 1;
3237
3238 if (start_byte == -1)
3239 start_byte = CHAR_TO_BYTE (start);
3240
3241 /* The dumb loop can only scan text stored in contiguous
3242 bytes. BUFFER_CEILING_OF returns the last character
3243 position that is contiguous, so the ceiling is the
3244 position after that. */
3245 tem = BUFFER_CEILING_OF (start_byte);
3246 ceiling_byte = min (tem, ceiling_byte);
3247
3248 {
3249 /* The termination address of the dumb loop. */
3250 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3251 ptrdiff_t lim_byte = ceiling_byte + 1;
3252
3253 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3254 of the base, the cursor, and the next line. */
3255 ptrdiff_t base = start_byte - lim_byte;
3256 ptrdiff_t cursor, next;
3257
3258 for (cursor = base; cursor < 0; cursor = next)
3259 {
3260 /* The dumb loop. */
3261 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3262 next = nl ? nl - lim_addr : 0;
3263
3264 if (! nl)
3265 break;
3266 next++;
3267
3268 if (--count == 0)
3269 {
3270 if (bytepos)
3271 *bytepos = lim_byte + next;
3272 return BYTE_TO_CHAR (lim_byte + next);
3273 }
3274 if (allow_quit)
3275 maybe_quit ();
3276 }
3277
3278 start_byte = lim_byte;
3279 start = BYTE_TO_CHAR (start_byte);
3280 }
3281 }
3282
3283 if (counted)
3284 *counted -= count;
3285 if (bytepos)
3286 {
3287 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3288 eassert (*bytepos == CHAR_TO_BYTE (start));
3289 }
3290 return start;
3291 }
3292
3293 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3294 0, 1, 0,
3295 doc: /* Check the newline cache of BUFFER against buffer contents.
3296
3297 BUFFER defaults to the current buffer.
3298
3299 Value is an array of 2 sub-arrays of buffer positions for newlines,
3300 the first based on the cache, the second based on actually scanning
3301 the buffer. If the buffer doesn't have a cache, the value is nil. */)
3302 (Lisp_Object buffer)
3303 {
3304 struct buffer *buf, *old = NULL;
3305 ptrdiff_t nl_count_cache, nl_count_buf;
3306 Lisp_Object cache_newlines, buf_newlines, val;
3307 ptrdiff_t from, found, i;
3308
3309 if (NILP (buffer))
3310 buf = current_buffer;
3311 else
3312 {
3313 CHECK_BUFFER (buffer);
3314 buf = XBUFFER (buffer);
3315 old = current_buffer;
3316 }
3317 if (buf->base_buffer)
3318 buf = buf->base_buffer;
3319
3320 /* If the buffer doesn't have a newline cache, return nil. */
3321 if (NILP (BVAR (buf, cache_long_scans))
3322 || buf->newline_cache == NULL)
3323 return Qnil;
3324
3325 /* find_newline can only work on the current buffer. */
3326 if (old != NULL)
3327 set_buffer_internal_1 (buf);
3328
3329 /* How many newlines are there according to the cache? */
3330 find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3331 TYPE_MAXIMUM (ptrdiff_t), &nl_count_cache, NULL, true);
3332
3333 /* Create vector and populate it. */
3334 cache_newlines = make_vector (nl_count_cache, make_fixnum (-1));
3335
3336 if (nl_count_cache)
3337 {
3338 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3339 {
3340 ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
3341
3342 found = find_newline (from, from_byte, 0, -1, 1, &counted,
3343 NULL, true);
3344 if (counted == 0 || i >= nl_count_cache)
3345 break;
3346 ASET (cache_newlines, i, make_fixnum (found - 1));
3347 }
3348 }
3349
3350 /* Now do the same, but without using the cache. */
3351 find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3352 TYPE_MAXIMUM (ptrdiff_t), &nl_count_buf, NULL, true);
3353 buf_newlines = make_vector (nl_count_buf, make_fixnum (-1));
3354 if (nl_count_buf)
3355 {
3356 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3357 {
3358 ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
3359
3360 found = find_newline1 (from, from_byte, 0, -1, 1, &counted,
3361 NULL, true);
3362 if (counted == 0 || i >= nl_count_buf)
3363 break;
3364 ASET (buf_newlines, i, make_fixnum (found - 1));
3365 }
3366 }
3367
3368 /* Construct the value and return it. */
3369 val = CALLN (Fvector, cache_newlines, buf_newlines);
3370
3371 if (old != NULL)
3372 set_buffer_internal_1 (old);
3373 return val;
3374 }
3375
3376
3377 static void syms_of_search_for_pdumper (void);
3378
3379 void
3380 syms_of_search (void)
3381 {
3382 for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
3383 {
3384 staticpro (&searchbufs[i].regexp);
3385 staticpro (&searchbufs[i].f_whitespace_regexp);
3386 staticpro (&searchbufs[i].syntax_table);
3387 }
3388
3389 /* Error condition used for failing searches. */
3390 DEFSYM (Qsearch_failed, "search-failed");
3391
3392 /* Error condition used for failing searches started by user, i.e.,
3393 where failure should not invoke the debugger. */
3394 DEFSYM (Quser_search_failed, "user-search-failed");
3395
3396 /* Error condition signaled when regexp compile_pattern fails. */
3397 DEFSYM (Qinvalid_regexp, "invalid-regexp");
3398
3399 Fput (Qsearch_failed, Qerror_conditions,
3400 pure_list (Qsearch_failed, Qerror));
3401 Fput (Qsearch_failed, Qerror_message,
3402 build_pure_c_string ("Search failed"));
3403
3404 Fput (Quser_search_failed, Qerror_conditions,
3405 pure_list (Quser_search_failed, Quser_error, Qsearch_failed, Qerror));
3406 Fput (Quser_search_failed, Qerror_message,
3407 build_pure_c_string ("Search failed"));
3408
3409 Fput (Qinvalid_regexp, Qerror_conditions,
3410 pure_list (Qinvalid_regexp, Qerror));
3411 Fput (Qinvalid_regexp, Qerror_message,
3412 build_pure_c_string ("Invalid regexp"));
3413
3414 re_match_object = Qnil;
3415 staticpro (&re_match_object);
3416
3417 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3418 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3419 Some commands use this for user-specified regexps.
3420 Spaces that occur inside character classes or repetition operators
3421 or other such regexp constructs are not replaced with this.
3422 A value of nil (which is the normal value) means treat spaces
3423 literally. Note that a value with capturing groups can change the
3424 numbering of existing capture groups in unexpected ways. */);
3425 Vsearch_spaces_regexp = Qnil;
3426
3427 DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data");
3428 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3429 doc: /* Internal use only.
3430 If non-nil, the primitive searching and matching functions
3431 such as `looking-at', `string-match', `re-search-forward', etc.,
3432 do not set the match data. The proper way to use this variable
3433 is to bind it with `let' around a small expression. */);
3434 Vinhibit_changing_match_data = Qnil;
3435
3436 defsubr (&Slooking_at);
3437 defsubr (&Sposix_looking_at);
3438 defsubr (&Sstring_match);
3439 defsubr (&Sposix_string_match);
3440 defsubr (&Ssearch_forward);
3441 defsubr (&Ssearch_backward);
3442 defsubr (&Sre_search_forward);
3443 defsubr (&Sre_search_backward);
3444 defsubr (&Sposix_search_forward);
3445 defsubr (&Sposix_search_backward);
3446 defsubr (&Sreplace_match);
3447 defsubr (&Smatch_beginning);
3448 defsubr (&Smatch_end);
3449 defsubr (&Smatch_data);
3450 defsubr (&Sset_match_data);
3451 defsubr (&Smatch_data__translate);
3452 defsubr (&Sregexp_quote);
3453 defsubr (&Snewline_cache_check);
3454
3455 pdumper_do_now_and_after_load (syms_of_search_for_pdumper);
3456 }
3457
3458 static void
3459 syms_of_search_for_pdumper (void)
3460 {
3461 for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
3462 {
3463 searchbufs[i].buf.allocated = 100;
3464 searchbufs[i].buf.buffer = xmalloc (100);
3465 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3466 searchbufs[i].regexp = Qnil;
3467 searchbufs[i].f_whitespace_regexp = Qnil;
3468 searchbufs[i].busy = false;
3469 searchbufs[i].syntax_table = Qnil;
3470 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3471 }
3472 searchbuf_head = &searchbufs[0];
3473 }