1 /* String search routines for GNU Emacs.
2
3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2023 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
20
21
22 #include <config.h>
23
24 #include "lisp.h"
25 #include "character.h"
26 #include "buffer.h"
27 #include "syntax.h"
28 #include "charset.h"
29 #include "region-cache.h"
30 #include "blockinput.h"
31 #include "intervals.h"
32 #include "pdumper.h"
33 #include "composite.h"
34
35 #include "regex-emacs.h"
36
37 #define REGEXP_CACHE_SIZE 20
38
39 /* If the regexp is non-nil, then the buffer contains the compiled form
40 of that regexp, suitable for searching. */
41 struct regexp_cache
42 {
43 struct regexp_cache *next;
44 Lisp_Object regexp, f_whitespace_regexp;
45 /* Syntax table for which the regexp applies. We need this because
46 of character classes. If this is t, then the compiled pattern is valid
47 for any syntax-table. */
48 Lisp_Object syntax_table;
49 struct re_pattern_buffer buf;
50 char fastmap[0400];
51 /* True means regexp was compiled to do full POSIX backtracking. */
52 bool posix;
53 /* True means we're inside a buffer match. */
54 bool busy;
55 };
56
57 /* The instances of that struct. */
58 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
59
60 /* The head of the linked list; points to the most recently used buffer. */
61 static struct regexp_cache *searchbuf_head;
62
63 static void set_search_regs (ptrdiff_t, ptrdiff_t);
64 static void save_search_regs (void);
65 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
66 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
67 ptrdiff_t, ptrdiff_t);
68 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
69 Lisp_Object, Lisp_Object, ptrdiff_t,
70 ptrdiff_t, int);
71
72 Lisp_Object re_match_object;
73
74 static AVOID
75 matcher_overflow (void)
76 {
77 error ("Stack overflow in regexp matcher");
78 }
79
80 static void
81 freeze_buffer_relocation (void)
82 {
83 #ifdef REL_ALLOC
84 /* Prevent ralloc.c from relocating the current buffer while
85 searching it. */
86 r_alloc_inhibit_buffer_relocation (1);
87 record_unwind_protect_int (r_alloc_inhibit_buffer_relocation, 0);
88 #endif
89 }
90
91 /* Compile a regexp and signal a Lisp error if anything goes wrong.
92 PATTERN is the pattern to compile.
93 CP is the place to put the result.
94 TRANSLATE is a translation table for ignoring case, or nil for none.
95 POSIX is true if we want full backtracking (POSIX style) for this pattern.
96 False means backtrack only enough to get a valid match.
97
98 The behavior also depends on Vsearch_spaces_regexp. */
99
100 static void
101 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
102 Lisp_Object translate, bool posix)
103 {
104 const char *whitespace_regexp;
105 char *val;
106
107 eassert (!cp->busy);
108 cp->regexp = Qnil;
109 cp->buf.translate = translate;
110 cp->posix = posix;
111 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
112 cp->buf.charset_unibyte = charset_unibyte;
113 if (STRINGP (Vsearch_spaces_regexp))
114 cp->f_whitespace_regexp = Vsearch_spaces_regexp;
115 else
116 cp->f_whitespace_regexp = Qnil;
117
118 whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ?
119 SSDATA (Vsearch_spaces_regexp) : NULL;
120
121 val = (char *) re_compile_pattern (SSDATA (pattern), SBYTES (pattern),
122 posix, whitespace_regexp, &cp->buf);
123
124 /* If the compiled pattern hard codes some of the contents of the
125 syntax-table, it can only be reused with *this* syntax table. */
126 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
127
128 if (val)
129 xsignal1 (Qinvalid_regexp, build_string (val));
130
131 cp->regexp = Fcopy_sequence (pattern);
132 }
133
134 /* Shrink each compiled regexp buffer in the cache
135 to the size actually used right now.
136 This is called from garbage collection. */
137
138 void
139 shrink_regexp_cache (void)
140 {
141 struct regexp_cache *cp;
142
143 for (cp = searchbuf_head; cp != 0; cp = cp->next)
144 if (!cp->busy)
145 {
146 cp->buf.allocated = cp->buf.used;
147 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
148 }
149 }
150
151 /* Clear the regexp cache w.r.t. a particular syntax table,
152 because it was changed.
153 There is no danger of memory leak here because re_compile_pattern
154 automagically manages the memory in each re_pattern_buffer struct,
155 based on its `allocated' and `buffer' values. */
156 void
157 clear_regexp_cache (void)
158 {
159 int i;
160
161 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
162 /* It's tempting to compare with the syntax-table we've actually changed,
163 but it's not sufficient because char-table inheritance means that
164 modifying one syntax-table can change others at the same time. */
165 if (!searchbufs[i].busy && !BASE_EQ (searchbufs[i].syntax_table, Qt))
166 searchbufs[i].regexp = Qnil;
167 }
168
169 static void
170 unfreeze_pattern (void *arg)
171 {
172 struct regexp_cache *searchbuf = arg;
173 searchbuf->busy = false;
174 }
175
176 static void
177 freeze_pattern (struct regexp_cache *searchbuf)
178 {
179 eassert (!searchbuf->busy);
180 record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
181 searchbuf->busy = true;
182 }
183
184 /* Compile a regexp if necessary, but first check to see if there's one in
185 the cache.
186 PATTERN is the pattern to compile.
187 TRANSLATE is a translation table for ignoring case, or nil for none.
188 REGP is the structure that says where to store the "register"
189 values that will result from matching this pattern.
190 If it is 0, we should compile the pattern not to record any
191 subexpression bounds.
192 POSIX is true if we want full backtracking (POSIX style) for this pattern.
193 False means backtrack only enough to get a valid match. */
194
195 static struct regexp_cache *
196 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
197 Lisp_Object translate, bool posix, bool multibyte)
198 {
199 struct regexp_cache *cp, **cpp, **lru_nonbusy;
200
201 for (cpp = &searchbuf_head, lru_nonbusy = NULL; ; cpp = &cp->next)
202 {
203 cp = *cpp;
204 if (!cp->busy)
205 lru_nonbusy = cpp;
206 /* Entries are initialized to nil, and may be set to nil by
207 compile_pattern_1 if the pattern isn't valid. Don't apply
208 string accessors in those cases. However, compile_pattern_1
209 is only applied to the cache entry we pick here to reuse. So
210 nil should never appear before a non-nil entry. */
211 if (NILP (cp->regexp))
212 goto compile_it;
213 if (SCHARS (cp->regexp) == SCHARS (pattern)
214 && !cp->busy
215 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
216 && !NILP (Fstring_equal (cp->regexp, pattern))
217 && BASE_EQ (cp->buf.translate, translate)
218 && cp->posix == posix
219 && (BASE_EQ (cp->syntax_table, Qt)
220 || BASE_EQ (cp->syntax_table,
221 BVAR (current_buffer, syntax_table)))
222 && !NILP (Fequal (cp->f_whitespace_regexp, Vsearch_spaces_regexp))
223 && cp->buf.charset_unibyte == charset_unibyte)
224 break;
225
226 /* If we're at the end of the cache, compile into the last
227 (least recently used) non-busy cell in the cache. */
228 if (cp->next == 0)
229 {
230 if (!lru_nonbusy)
231 error ("Too much matching reentrancy");
232 cpp = lru_nonbusy;
233 cp = *cpp;
234 compile_it:
235 eassert (!cp->busy);
236 compile_pattern_1 (cp, pattern, translate, posix);
237 break;
238 }
239 }
240
241 /* When we get here, cp (aka *cpp) contains the compiled pattern,
242 either because we found it in the cache or because we just compiled it.
243 Move it to the front of the queue to mark it as most recently used. */
244 *cpp = cp->next;
245 cp->next = searchbuf_head;
246 searchbuf_head = cp;
247
248 /* Advise the searching functions about the space we have allocated
249 for register data. */
250 if (regp)
251 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
252
253 /* The compiled pattern can be used both for multibyte and unibyte
254 target. But, we have to tell which the pattern is used for. */
255 cp->buf.target_multibyte = multibyte;
256 return cp;
257 }
258
259
260 static Lisp_Object
261 looking_at_1 (Lisp_Object string, bool posix, bool modify_data)
262 {
263 Lisp_Object val;
264 unsigned char *p1, *p2;
265 ptrdiff_t s1, s2;
266 register ptrdiff_t i;
267
268 if (running_asynch_code)
269 save_search_regs ();
270
271 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
272 table. */
273 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
274 BVAR (current_buffer, case_eqv_table));
275
276 CHECK_STRING (string);
277
278 /* Snapshot in case Lisp changes the value. */
279 bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
280
281 struct regexp_cache *cache_entry = compile_pattern (
282 string,
283 modify_match_data ? &search_regs : NULL,
284 (!NILP (BVAR (current_buffer, case_fold_search))
285 ? BVAR (current_buffer, case_canon_table) : Qnil),
286 posix,
287 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
288
289 /* Do a pending quit right away, to avoid paradoxical behavior */
290 maybe_quit ();
291
292 /* Get pointers and sizes of the two strings
293 that make up the visible portion of the buffer. */
294
295 p1 = BEGV_ADDR;
296 s1 = GPT_BYTE - BEGV_BYTE;
297 p2 = GAP_END_ADDR;
298 s2 = ZV_BYTE - GPT_BYTE;
299 if (s1 < 0)
300 {
301 p2 = p1;
302 s2 = ZV_BYTE - BEGV_BYTE;
303 s1 = 0;
304 }
305 if (s2 < 0)
306 {
307 s1 = ZV_BYTE - BEGV_BYTE;
308 s2 = 0;
309 }
310
311 specpdl_ref count = SPECPDL_INDEX ();
312 freeze_buffer_relocation ();
313 freeze_pattern (cache_entry);
314 re_match_object = Qnil;
315 i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
316 PT_BYTE - BEGV_BYTE,
317 modify_match_data ? &search_regs : NULL,
318 ZV_BYTE - BEGV_BYTE);
319
320 if (i == -2)
321 {
322 unbind_to (count, Qnil);
323 matcher_overflow ();
324 }
325
326 val = (i >= 0 ? Qt : Qnil);
327 if (modify_match_data && i >= 0)
328 {
329 for (i = 0; i < search_regs.num_regs; i++)
330 if (search_regs.start[i] >= 0)
331 {
332 search_regs.start[i]
333 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
334 search_regs.end[i]
335 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
336 }
337 /* Set last_thing_searched only when match data is changed. */
338 XSETBUFFER (last_thing_searched, current_buffer);
339 }
340
341 return unbind_to (count, val);
342 }
343
344 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 2, 0,
345 doc: /* Return t if text after point matches regular expression REGEXP.
346 By default, this function modifies the match data that
347 `match-beginning', `match-end' and `match-data' access. If
348 INHIBIT-MODIFY is non-nil, don't modify the match data. */)
349 (Lisp_Object regexp, Lisp_Object inhibit_modify)
350 {
351 return looking_at_1 (regexp, 0, NILP (inhibit_modify));
352 }
353
354 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 2, 0,
355 doc: /* Return t if text after point matches REGEXP according to Posix rules.
356 Find the longest match, in accordance with Posix regular expression rules.
357
358 By default, this function modifies the match data that
359 `match-beginning', `match-end' and `match-data' access. If
360 INHIBIT-MODIFY is non-nil, don't modify the match data. */)
361 (Lisp_Object regexp, Lisp_Object inhibit_modify)
362 {
363 return looking_at_1 (regexp, 1, NILP (inhibit_modify));
364 }
365
366 static Lisp_Object
367 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
368 bool posix, bool modify_data)
369 {
370 ptrdiff_t val;
371 EMACS_INT pos;
372 ptrdiff_t pos_byte, i;
373 bool modify_match_data = NILP (Vinhibit_changing_match_data) && modify_data;
374
375 if (running_asynch_code)
376 save_search_regs ();
377
378 CHECK_STRING (regexp);
379 CHECK_STRING (string);
380
381 if (NILP (start))
382 pos = 0, pos_byte = 0;
383 else
384 {
385 ptrdiff_t len = SCHARS (string);
386
387 CHECK_FIXNUM (start);
388 pos = XFIXNUM (start);
389 if (pos < 0 && -pos <= len)
390 pos = len + pos;
391 else if (0 > pos || pos > len)
392 args_out_of_range (string, start);
393 pos_byte = string_char_to_byte (string, pos);
394 }
395
396 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
397 table. */
398 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
399 BVAR (current_buffer, case_eqv_table));
400
401 specpdl_ref count = SPECPDL_INDEX ();
402 struct regexp_cache *cache_entry
403 = compile_pattern (regexp,
404 modify_match_data ? &search_regs : NULL,
405 (!NILP (BVAR (current_buffer, case_fold_search))
406 ? BVAR (current_buffer, case_canon_table)
407 : Qnil),
408 posix,
409 STRING_MULTIBYTE (string));
410 freeze_pattern (cache_entry);
411 re_match_object = string;
412 val = re_search (&cache_entry->buf, SSDATA (string),
413 SBYTES (string), pos_byte,
414 SBYTES (string) - pos_byte,
415 (modify_match_data ? &search_regs : NULL));
416 unbind_to (count, Qnil);
417
418 /* Set last_thing_searched only when match data is changed. */
419 if (modify_match_data)
420 last_thing_searched = Qt;
421
422 if (val == -2)
423 matcher_overflow ();
424 if (val < 0) return Qnil;
425
426 if (modify_match_data)
427 for (i = 0; i < search_regs.num_regs; i++)
428 if (search_regs.start[i] >= 0)
429 {
430 search_regs.start[i]
431 = string_byte_to_char (string, search_regs.start[i]);
432 search_regs.end[i]
433 = string_byte_to_char (string, search_regs.end[i]);
434 }
435
436 return make_fixnum (string_byte_to_char (string, val));
437 }
438
439 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 4, 0,
440 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
441 Matching ignores case if `case-fold-search' is non-nil.
442 If third arg START is non-nil, start search at that index in STRING.
443
444 If INHIBIT-MODIFY is non-nil, match data is not changed.
445
446 If INHIBIT-MODIFY is nil or missing, match data is changed, and
447 `match-end' and `match-beginning' give indices of substrings matched
448 by parenthesis constructs in the pattern. You can use the function
449 `match-string' to extract the substrings matched by the parenthesis
450 constructions in REGEXP. For index of first char beyond the match, do
451 (match-end 0). */)
452 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
453 Lisp_Object inhibit_modify)
454 {
455 return string_match_1 (regexp, string, start, 0, NILP (inhibit_modify));
456 }
457
458 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 4, 0,
459 doc: /* Return index of start of first match for Posix REGEXP in STRING, or nil.
460 Find the longest match, in accord with Posix regular expression rules.
461 Case is ignored if `case-fold-search' is non-nil in the current buffer.
462
463 If INHIBIT-MODIFY is non-nil, match data is not changed.
464
465 If INHIBIT-MODIFY is nil or missing, match data is changed, and
466 `match-end' and `match-beginning' give indices of substrings matched
467 by parenthesis constructs in the pattern. You can use the function
468 `match-string' to extract the substrings matched by the parenthesis
469 constructions in REGEXP. For index of first char beyond the match, do
470 (match-end 0). */)
471 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
472 Lisp_Object inhibit_modify)
473 {
474 return string_match_1 (regexp, string, start, 1, NILP (inhibit_modify));
475 }
476
477 /* Match REGEXP against STRING using translation table TABLE,
478 searching all of STRING, and return the index of the match,
479 or negative on failure. This does not clobber the match data. */
480
481 ptrdiff_t
482 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
483 Lisp_Object table)
484 {
485 re_match_object = string;
486 specpdl_ref count = SPECPDL_INDEX ();
487 struct regexp_cache *cache_entry
488 = compile_pattern (regexp, 0, table, 0, STRING_MULTIBYTE (string));
489 freeze_pattern (cache_entry);
490 ptrdiff_t val = re_search (&cache_entry->buf, SSDATA (string),
491 SBYTES (string), 0,
492 SBYTES (string), 0);
493 unbind_to (count, Qnil);
494 return val;
495 }
496
497 /* Match REGEXP against STRING, searching all of STRING and return the
498 index of the match, or negative on failure. This does not clobber
499 the match data. Table is a canonicalize table for ignoring case,
500 or nil for none.
501
502 We assume that STRING contains single-byte characters. */
503
504 ptrdiff_t
505 fast_c_string_match_internal (Lisp_Object regexp,
506 const char *string, ptrdiff_t len,
507 Lisp_Object table)
508 {
509 /* FIXME: This is expensive and not obviously correct when it makes
510 a difference. I.e., no longer "fast", and may hide bugs.
511 Something should be done about this. */
512 regexp = string_make_unibyte (regexp);
513 /* Record specpdl index because freeze_pattern pushes an
514 unwind-protect on the specpdl. */
515 specpdl_ref count = SPECPDL_INDEX ();
516 struct regexp_cache *cache_entry
517 = compile_pattern (regexp, 0, table, 0, 0);
518 freeze_pattern (cache_entry);
519 re_match_object = Qt;
520 ptrdiff_t val = re_search (&cache_entry->buf, string, len, 0, len, 0);
521 unbind_to (count, Qnil);
522 return val;
523 }
524
525 /* Match REGEXP against the characters after POS to LIMIT, and return
526 the number of matched characters. If STRING is non-nil, match
527 against the characters in it. In that case, POS and LIMIT are
528 indices into the string. This function doesn't modify the match
529 data. */
530
531 ptrdiff_t
532 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
533 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
534 {
535 bool multibyte;
536 unsigned char *p1, *p2;
537 ptrdiff_t s1, s2;
538 ptrdiff_t len;
539
540 if (STRINGP (string))
541 {
542 if (pos_byte < 0)
543 pos_byte = string_char_to_byte (string, pos);
544 if (limit_byte < 0)
545 limit_byte = string_char_to_byte (string, limit);
546 p1 = NULL;
547 s1 = 0;
548 p2 = SDATA (string);
549 s2 = SBYTES (string);
550 multibyte = STRING_MULTIBYTE (string);
551 }
552 else
553 {
554 if (pos_byte < 0)
555 pos_byte = CHAR_TO_BYTE (pos);
556 if (limit_byte < 0)
557 limit_byte = CHAR_TO_BYTE (limit);
558 pos_byte -= BEGV_BYTE;
559 limit_byte -= BEGV_BYTE;
560 p1 = BEGV_ADDR;
561 s1 = GPT_BYTE - BEGV_BYTE;
562 p2 = GAP_END_ADDR;
563 s2 = ZV_BYTE - GPT_BYTE;
564 if (s1 < 0)
565 {
566 p2 = p1;
567 s2 = ZV_BYTE - BEGV_BYTE;
568 s1 = 0;
569 }
570 if (s2 < 0)
571 {
572 s1 = ZV_BYTE - BEGV_BYTE;
573 s2 = 0;
574 }
575 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
576 }
577
578 struct regexp_cache *cache_entry =
579 compile_pattern (regexp, 0, Qnil, 0, multibyte);
580 specpdl_ref count = SPECPDL_INDEX ();
581 freeze_buffer_relocation ();
582 freeze_pattern (cache_entry);
583 re_match_object = STRINGP (string) ? string : Qnil;
584 len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
585 pos_byte, NULL, limit_byte);
586
587 unbind_to (count, Qnil);
588 return len;
589 }
590
591
592 /* The newline cache: remembering which sections of text have no newlines. */
593
594 /* If the user has requested the long scans caching, make sure it's on.
595 Otherwise, make sure it's off.
596 This is our cheezy way of associating an action with the change of
597 state of a buffer-local variable. */
598 static struct region_cache *
599 newline_cache_on_off (struct buffer *buf)
600 {
601 struct buffer *base_buf = buf;
602 bool indirect_p = false;
603
604 if (buf->base_buffer)
605 {
606 base_buf = buf->base_buffer;
607 indirect_p = true;
608 }
609
610 /* Don't turn on or off the cache in the base buffer, if the value
611 of cache-long-scans of the base buffer is inconsistent with that.
612 This is because doing so will just make the cache pure overhead,
613 since if we turn it on via indirect buffer, it will be
614 immediately turned off by its base buffer. */
615 if (NILP (BVAR (buf, cache_long_scans)))
616 {
617 if (!indirect_p
618 || NILP (BVAR (base_buf, cache_long_scans)))
619 {
620 /* It should be off. */
621 if (base_buf->newline_cache)
622 {
623 free_region_cache (base_buf->newline_cache);
624 base_buf->newline_cache = 0;
625 }
626 }
627 return NULL;
628 }
629 else
630 {
631 if (!indirect_p
632 || !NILP (BVAR (base_buf, cache_long_scans)))
633 {
634 /* It should be on. */
635 if (base_buf->newline_cache == 0)
636 {
637 base_buf->newline_cache = new_region_cache ();
638 __lsan_ignore_object (base_buf->newline_cache);
639 }
640 }
641 return base_buf->newline_cache;
642 }
643 }
644
645
646 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
647
648 If COUNT is positive, search forwards; END must be >= START.
649 If COUNT is negative, search backwards for the -COUNTth instance;
650 END must be <= START.
651 If COUNT is zero, do anything you please; run rogue, for all I care.
652
653 If END is zero, use BEGV or ZV instead, as appropriate for the
654 direction indicated by COUNT. If START_BYTE is -1 it is unknown,
655 and similarly for END_BYTE.
656
657 If we find COUNT instances, set *COUNTED to COUNT, and return the
658 position past the COUNTth match. Note that for reverse motion
659 this is not the same as the usual convention for Emacs motion commands.
660
661 If we don't find COUNT instances before reaching END, set *COUNTED
662 to the number of newlines left found (negated if COUNT is negative),
663 and return END.
664
665 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
666 to the returned character position.
667
668 If ALLOW_QUIT, check for quitting. That's good to do
669 except when inside redisplay. */
670
671 ptrdiff_t
672 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
673 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
674 ptrdiff_t *bytepos, bool allow_quit)
675 {
676 struct region_cache *newline_cache;
677 struct buffer *cache_buffer;
678
679 if (!end)
680 {
681 if (count > 0)
682 end = ZV, end_byte = ZV_BYTE;
683 else
684 end = BEGV, end_byte = BEGV_BYTE;
685 }
686 if (end_byte == -1)
687 end_byte = CHAR_TO_BYTE (end);
688
689 newline_cache = newline_cache_on_off (current_buffer);
690 if (current_buffer->base_buffer)
691 cache_buffer = current_buffer->base_buffer;
692 else
693 cache_buffer = current_buffer;
694
695 if (counted)
696 *counted = count;
697
698 if (count > 0)
699 while (start != end)
700 {
701 /* Our innermost scanning loop is very simple; it doesn't know
702 about gaps, buffer ends, or the newline cache. ceiling is
703 the position of the last character before the next such
704 obstacle --- the last character the dumb search loop should
705 examine. */
706 ptrdiff_t tem, ceiling_byte = end_byte - 1;
707
708 /* If we're using the newline cache, consult it to see whether
709 we can avoid some scanning. */
710 if (newline_cache)
711 {
712 ptrdiff_t next_change;
713 int result = 1;
714
715 while (start < end && result)
716 {
717 ptrdiff_t lim1;
718
719 result = region_cache_forward (cache_buffer, newline_cache,
720 start, &next_change);
721 if (result)
722 {
723 /* When the cache revalidation is deferred,
724 next-change might point beyond ZV, which will
725 cause assertion violation in CHAR_TO_BYTE below.
726 Limit next_change to ZV to avoid that. */
727 if (next_change > ZV)
728 next_change = ZV;
729 start = next_change;
730 lim1 = next_change = end;
731 }
732 else
733 lim1 = min (next_change, end);
734
735 /* The cache returned zero for this region; see if
736 this is because the region is known and includes
737 only newlines. While at that, count any newlines
738 we bump into, and exit if we found enough off them. */
739 start_byte = CHAR_TO_BYTE (start);
740 while (start < lim1
741 && FETCH_BYTE (start_byte) == '\n')
742 {
743 start_byte++;
744 start++;
745 if (--count == 0)
746 {
747 if (bytepos)
748 *bytepos = start_byte;
749 return start;
750 }
751 }
752 /* If we found a non-newline character before hitting
753 position where the cache will again return non-zero
754 (i.e. no newlines beyond that position), it means
755 this region is not yet known to the cache, and we
756 must resort to the "dumb loop" method. */
757 if (start < next_change && !result)
758 break;
759 result = 1;
760 }
761 if (start >= end)
762 {
763 start = end;
764 start_byte = end_byte;
765 break;
766 }
767
768 /* START should never be after END. */
769 if (start_byte > ceiling_byte)
770 start_byte = ceiling_byte;
771
772 /* Now the text after start is an unknown region, and
773 next_change is the position of the next known region. */
774 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
775 }
776 else if (start_byte == -1)
777 start_byte = CHAR_TO_BYTE (start);
778
779 /* The dumb loop can only scan text stored in contiguous
780 bytes. BUFFER_CEILING_OF returns the last character
781 position that is contiguous, so the ceiling is the
782 position after that. */
783 tem = BUFFER_CEILING_OF (start_byte);
784 ceiling_byte = min (tem, ceiling_byte);
785
786 {
787 /* The termination address of the dumb loop. */
788 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
789 ptrdiff_t lim_byte = ceiling_byte + 1;
790
791 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
792 of the base, the cursor, and the next line. */
793 ptrdiff_t base = start_byte - lim_byte;
794 ptrdiff_t cursor, next;
795
796 for (cursor = base; cursor < 0; cursor = next)
797 {
798 /* The dumb loop. */
799 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
800 next = nl ? nl - lim_addr : 0;
801
802 /* If we're using the newline cache, cache the fact that
803 the region we just traversed is free of newlines. */
804 if (newline_cache && cursor != next)
805 {
806 know_region_cache (cache_buffer, newline_cache,
807 BYTE_TO_CHAR (lim_byte + cursor),
808 BYTE_TO_CHAR (lim_byte + next));
809 /* know_region_cache can relocate buffer text. */
810 lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
811 }
812
813 if (! nl)
814 break;
815 next++;
816
817 if (--count == 0)
818 {
819 if (bytepos)
820 *bytepos = lim_byte + next;
821 return BYTE_TO_CHAR (lim_byte + next);
822 }
823 if (allow_quit)
824 maybe_quit ();
825 }
826
827 start_byte = lim_byte;
828 start = BYTE_TO_CHAR (start_byte);
829 }
830 }
831 else
832 while (start > end)
833 {
834 /* The last character to check before the next obstacle. */
835 ptrdiff_t tem, ceiling_byte = end_byte;
836
837 /* Consult the newline cache, if appropriate. */
838 if (newline_cache)
839 {
840 ptrdiff_t next_change;
841 int result = 1;
842
843 while (start > end && result)
844 {
845 ptrdiff_t lim1;
846
847 result = region_cache_backward (cache_buffer, newline_cache,
848 start, &next_change);
849 if (result)
850 {
851 start = next_change;
852 lim1 = next_change = end;
853 }
854 else
855 lim1 = max (next_change, end);
856 start_byte = CHAR_TO_BYTE (start);
857 while (start > lim1
858 && FETCH_BYTE (start_byte - 1) == '\n')
859 {
860 if (++count == 0)
861 {
862 if (bytepos)
863 *bytepos = start_byte;
864 return start;
865 }
866 start_byte--;
867 start--;
868 }
869 if (start > next_change && !result)
870 break;
871 result = 1;
872 }
873 if (start <= end)
874 {
875 start = end;
876 start_byte = end_byte;
877 break;
878 }
879
880 /* Start should never be at or before end. */
881 if (start_byte <= ceiling_byte)
882 start_byte = ceiling_byte + 1;
883
884 /* Now the text before start is an unknown region, and
885 next_change is the position of the next known region. */
886 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
887 }
888 else if (start_byte == -1)
889 start_byte = CHAR_TO_BYTE (start);
890
891 /* Stop scanning before the gap. */
892 tem = BUFFER_FLOOR_OF (start_byte - 1);
893 ceiling_byte = max (tem, ceiling_byte);
894
895 {
896 /* The termination address of the dumb loop. */
897 unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
898
899 /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
900 the base, the cursor, and the previous line. These
901 offsets are at least -1. */
902 ptrdiff_t base = start_byte - ceiling_byte;
903 ptrdiff_t cursor, prev;
904
905 for (cursor = base; 0 < cursor; cursor = prev)
906 {
907 unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
908 prev = nl ? nl - ceiling_addr : -1;
909
910 /* If we're looking for newlines, cache the fact that
911 this line's region is free of them. */
912 if (newline_cache && cursor != prev + 1)
913 {
914 know_region_cache (cache_buffer, newline_cache,
915 BYTE_TO_CHAR (ceiling_byte + prev + 1),
916 BYTE_TO_CHAR (ceiling_byte + cursor));
917 /* know_region_cache can relocate buffer text. */
918 ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
919 }
920
921 if (! nl)
922 break;
923
924 if (++count >= 0)
925 {
926 if (bytepos)
927 *bytepos = ceiling_byte + prev + 1;
928 return BYTE_TO_CHAR (ceiling_byte + prev + 1);
929 }
930 if (allow_quit)
931 maybe_quit ();
932 }
933
934 start_byte = ceiling_byte;
935 start = BYTE_TO_CHAR (start_byte);
936 }
937 }
938
939 if (counted)
940 *counted -= count;
941 if (bytepos)
942 {
943 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
944 eassert (*bytepos == CHAR_TO_BYTE (start));
945 }
946 return start;
947 }
948
949 /* Search for COUNT instances of a line boundary.
950 Start at START. If COUNT is negative, search backwards.
951
952 We report the resulting position by calling TEMP_SET_PT_BOTH.
953
954 If we find COUNT instances. we position after (always after,
955 even if scanning backwards) the COUNTth match.
956
957 If we don't find COUNT instances before reaching the end of the
958 buffer (or the beginning, if scanning backwards), we position at
959 the limit we bumped up against.
960
961 If ALLOW_QUIT, check for quitting. That's good to do
962 except in special cases. */
963
964 void
965 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
966 ptrdiff_t limit, ptrdiff_t limit_byte,
967 ptrdiff_t count, bool allow_quit)
968 {
969 ptrdiff_t charpos, bytepos, counted;
970
971 charpos = find_newline (start, start_byte, limit, limit_byte,
972 count, &counted, &bytepos, allow_quit);
973 if (counted != count)
974 TEMP_SET_PT_BOTH (limit, limit_byte);
975 else
976 TEMP_SET_PT_BOTH (charpos, bytepos);
977 }
978
979 /* Like above, but always scan from point and report the
980 resulting position in *CHARPOS and *BYTEPOS. */
981
982 ptrdiff_t
983 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
984 ptrdiff_t *bytepos)
985 {
986 ptrdiff_t counted;
987
988 if (count <= 0)
989 *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
990 &counted, bytepos, 1);
991 else
992 *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
993 &counted, bytepos, 1);
994 return counted;
995 }
996
997 /* Like find_newline, but doesn't allow QUITting and doesn't return
998 COUNTED. */
999 ptrdiff_t
1000 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
1001 ptrdiff_t cnt, ptrdiff_t *bytepos)
1002 {
1003 return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
1004 }
1005
1006 /* Like find_newline, but returns position before the newline, not
1007 after, and only search up to TO.
1008 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
1009
1010 ptrdiff_t
1011 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
1012 ptrdiff_t cnt, ptrdiff_t *bytepos)
1013 {
1014 ptrdiff_t counted;
1015 ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &counted, bytepos, 1);
1016
1017 if (counted == cnt)
1018 {
1019 if (bytepos)
1020 dec_both (&pos, &*bytepos);
1021 else
1022 pos--;
1023 }
1024 return pos;
1025 }
1026
1027 /* Subroutines of Lisp buffer search functions. */
1028
1029 static Lisp_Object
1030 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1031 Lisp_Object count, int direction, bool RE, bool posix)
1032 {
1033 EMACS_INT np;
1034 EMACS_INT lim;
1035 ptrdiff_t lim_byte;
1036 EMACS_INT n = direction;
1037
1038 if (!NILP (count))
1039 {
1040 CHECK_FIXNUM (count);
1041 n *= XFIXNUM (count);
1042 }
1043
1044 CHECK_STRING (string);
1045 if (NILP (bound))
1046 {
1047 if (n > 0)
1048 lim = ZV, lim_byte = ZV_BYTE;
1049 else
1050 lim = BEGV, lim_byte = BEGV_BYTE;
1051 }
1052 else
1053 {
1054 lim = fix_position (bound);
1055 if (n > 0 ? lim < PT : lim > PT)
1056 error ("Invalid search bound (wrong side of point)");
1057 if (lim > ZV)
1058 lim = ZV, lim_byte = ZV_BYTE;
1059 else if (lim < BEGV)
1060 lim = BEGV, lim_byte = BEGV_BYTE;
1061 else
1062 lim_byte = CHAR_TO_BYTE (lim);
1063 }
1064
1065 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
1066 table. */
1067 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1068 BVAR (current_buffer, case_eqv_table));
1069
1070 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1071 (!NILP (BVAR (current_buffer, case_fold_search))
1072 ? BVAR (current_buffer, case_canon_table)
1073 : Qnil),
1074 (!NILP (BVAR (current_buffer, case_fold_search))
1075 ? BVAR (current_buffer, case_eqv_table)
1076 : Qnil),
1077 posix);
1078 if (np <= 0)
1079 {
1080 if (NILP (noerror))
1081 xsignal1 (Qsearch_failed, string);
1082
1083 if (!EQ (noerror, Qt))
1084 {
1085 eassert (BEGV <= lim && lim <= ZV);
1086 SET_PT_BOTH (lim, lim_byte);
1087 return Qnil;
1088 #if 0 /* This would be clean, but maybe programs depend on
1089 a value of nil here. */
1090 np = lim;
1091 #endif
1092 }
1093 else
1094 return Qnil;
1095 }
1096
1097 eassert (BEGV <= np && np <= ZV);
1098 SET_PT (np);
1099
1100 return make_fixnum (np);
1101 }
1102
1103 /* Return true if REGEXP it matches just one constant string. */
1104
1105 static bool
1106 trivial_regexp_p (Lisp_Object regexp)
1107 {
1108 ptrdiff_t len = SBYTES (regexp);
1109 unsigned char *s = SDATA (regexp);
1110 while (--len >= 0)
1111 {
1112 switch (*s++)
1113 {
1114 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1115 return 0;
1116 case '\\':
1117 if (--len < 0)
1118 return 0;
1119 switch (*s++)
1120 {
1121 case '|': case '(': case ')': case '`': case '\'': case 'b':
1122 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1123 case 'S': case '=': case '{': case '}': case '_':
1124 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1125 case '1': case '2': case '3': case '4': case '5':
1126 case '6': case '7': case '8': case '9':
1127 return 0;
1128 }
1129 }
1130 }
1131 return 1;
1132 }
1133
1134 #define TRANSLATE(out, trt, d) \
1135 do \
1136 { \
1137 if (! NILP (trt)) \
1138 { \
1139 Lisp_Object temp; \
1140 temp = Faref (trt, make_fixnum (d)); \
1141 if (FIXNUMP (temp)) \
1142 out = XFIXNUM (temp); \
1143 else \
1144 out = d; \
1145 } \
1146 else \
1147 out = d; \
1148 } \
1149 while (0)
1150
1151 /* Only used in search_buffer, to record the end position of the match
1152 when searching regexps and SEARCH_REGS should not be changed
1153 (i.e. Vinhibit_changing_match_data is non-nil). */
1154 static struct re_registers search_regs_1;
1155
1156 static EMACS_INT
1157 search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1158 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1159 Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1160 {
1161 unsigned char *p1, *p2;
1162 ptrdiff_t s1, s2;
1163
1164 /* Snapshot in case Lisp changes the value. */
1165 bool preserve_match_data = NILP (Vinhibit_changing_match_data);
1166
1167 struct regexp_cache *cache_entry =
1168 compile_pattern (string,
1169 preserve_match_data ? &search_regs : &search_regs_1,
1170 trt, posix,
1171 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1172 struct re_pattern_buffer *bufp = &cache_entry->buf;
1173
1174 maybe_quit (); /* Do a pending quit right away,
1175 to avoid paradoxical behavior */
1176 /* Get pointers and sizes of the two strings
1177 that make up the visible portion of the buffer. */
1178
1179 p1 = BEGV_ADDR;
1180 s1 = GPT_BYTE - BEGV_BYTE;
1181 p2 = GAP_END_ADDR;
1182 s2 = ZV_BYTE - GPT_BYTE;
1183 if (s1 < 0)
1184 {
1185 p2 = p1;
1186 s2 = ZV_BYTE - BEGV_BYTE;
1187 s1 = 0;
1188 }
1189 if (s2 < 0)
1190 {
1191 s1 = ZV_BYTE - BEGV_BYTE;
1192 s2 = 0;
1193 }
1194
1195 specpdl_ref count = SPECPDL_INDEX ();
1196 freeze_buffer_relocation ();
1197 freeze_pattern (cache_entry);
1198
1199 while (n < 0)
1200 {
1201 ptrdiff_t val;
1202
1203 re_match_object = Qnil;
1204 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1205 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1206 preserve_match_data ? &search_regs : &search_regs_1,
1207 /* Don't allow match past current point */
1208 pos_byte - BEGV_BYTE);
1209 if (val == -2)
1210 {
1211 unbind_to (count, Qnil);
1212 matcher_overflow ();
1213 }
1214 if (val >= 0)
1215 {
1216 if (preserve_match_data)
1217 {
1218 pos_byte = search_regs.start[0] + BEGV_BYTE;
1219 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1220 if (search_regs.start[i] >= 0)
1221 {
1222 search_regs.start[i]
1223 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1224 search_regs.end[i]
1225 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1226 }
1227 XSETBUFFER (last_thing_searched, current_buffer);
1228 /* Set pos to the new position. */
1229 pos = search_regs.start[0];
1230 }
1231 else
1232 {
1233 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1234 /* Set pos to the new position. */
1235 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1236 }
1237 }
1238 else
1239 {
1240 unbind_to (count, Qnil);
1241 return (n);
1242 }
1243 n++;
1244 maybe_quit ();
1245 }
1246 while (n > 0)
1247 {
1248 ptrdiff_t val;
1249
1250 re_match_object = Qnil;
1251 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1252 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1253 preserve_match_data ? &search_regs : &search_regs_1,
1254 lim_byte - BEGV_BYTE);
1255 if (val == -2)
1256 {
1257 unbind_to (count, Qnil);
1258 matcher_overflow ();
1259 }
1260 if (val >= 0)
1261 {
1262 if (preserve_match_data)
1263 {
1264 pos_byte = search_regs.end[0] + BEGV_BYTE;
1265 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1266 if (search_regs.start[i] >= 0)
1267 {
1268 search_regs.start[i]
1269 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1270 search_regs.end[i]
1271 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1272 }
1273 XSETBUFFER (last_thing_searched, current_buffer);
1274 pos = search_regs.end[0];
1275 }
1276 else
1277 {
1278 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1279 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1280 }
1281 }
1282 else
1283 {
1284 unbind_to (count, Qnil);
1285 return (0 - n);
1286 }
1287 n--;
1288 maybe_quit ();
1289 }
1290 unbind_to (count, Qnil);
1291 return (pos);
1292 }
1293
1294 static EMACS_INT
1295 search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
1296 ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
1297 EMACS_INT n, bool RE, Lisp_Object trt, Lisp_Object inverse_trt,
1298 bool posix)
1299 {
1300 unsigned char *raw_pattern, *pat;
1301 ptrdiff_t raw_pattern_size;
1302 ptrdiff_t raw_pattern_size_byte;
1303 unsigned char *patbuf;
1304 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1305 unsigned char *base_pat;
1306 /* Set to positive if we find a non-ASCII char that need
1307 translation. Otherwise set to zero later. */
1308 int char_base = -1;
1309 bool boyer_moore_ok = 1;
1310 USE_SAFE_ALLOCA;
1311
1312 /* MULTIBYTE says whether the text to be searched is multibyte.
1313 We must convert PATTERN to match that, or we will not really
1314 find things right. */
1315
1316 if (multibyte == STRING_MULTIBYTE (string))
1317 {
1318 raw_pattern = SDATA (string);
1319 raw_pattern_size = SCHARS (string);
1320 raw_pattern_size_byte = SBYTES (string);
1321 }
1322 else if (multibyte)
1323 {
1324 raw_pattern_size = SCHARS (string);
1325 raw_pattern_size_byte
1326 = count_size_as_multibyte (SDATA (string),
1327 raw_pattern_size);
1328 raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1329 copy_text (SDATA (string), raw_pattern,
1330 SCHARS (string), 0, 1);
1331 }
1332 else
1333 {
1334 /* Converting multibyte to single-byte. */
1335 raw_pattern_size = SCHARS (string);
1336 raw_pattern_size_byte = SCHARS (string);
1337 raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1338 copy_text (SDATA (string), raw_pattern,
1339 SBYTES (string), 1, 0);
1340 }
1341
1342 /* Copy and optionally translate the pattern. */
1343 ptrdiff_t len = raw_pattern_size;
1344 ptrdiff_t len_byte = raw_pattern_size_byte;
1345 SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1346 pat = patbuf;
1347 base_pat = raw_pattern;
1348 if (multibyte)
1349 {
1350 /* Fill patbuf by translated characters in STRING while
1351 checking if we can use boyer-moore search. If TRT is
1352 non-nil, we can use boyer-moore search only if TRT can be
1353 represented by the byte array of 256 elements. For that,
1354 all non-ASCII case-equivalents of all case-sensitive
1355 characters in STRING must belong to the same character
1356 group (two characters belong to the same group iff their
1357 multibyte forms are the same except for the last byte;
1358 i.e. every 64 characters form a group; U+0000..U+003F,
1359 U+0040..U+007F, U+0080..U+00BF, ...). */
1360
1361 while (--len >= 0)
1362 {
1363 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1364 int translated, inverse;
1365 int charlen;
1366
1367 /* If we got here and the RE flag is set, it's because we're
1368 dealing with a regexp known to be trivial, so the backslash
1369 just quotes the next character. */
1370 if (RE && *base_pat == '\\')
1371 {
1372 len--;
1373 raw_pattern_size--;
1374 len_byte--;
1375 base_pat++;
1376 }
1377
1378 int in_charlen, c = string_char_and_length (base_pat, &in_charlen);
1379
1380 if (NILP (trt))
1381 {
1382 str = base_pat;
1383 charlen = in_charlen;
1384 }
1385 else
1386 {
1387 /* Translate the character. */
1388 TRANSLATE (translated, trt, c);
1389 charlen = CHAR_STRING (translated, str_base);
1390 str = str_base;
1391
1392 /* Check if C has any other case-equivalents. */
1393 TRANSLATE (inverse, inverse_trt, c);
1394 /* If so, check if we can use boyer-moore. */
1395 if (c != inverse && boyer_moore_ok)
1396 {
1397 /* Check if all equivalents belong to the same
1398 group of characters. Note that the check of C
1399 itself is done by the last iteration. */
1400 int this_char_base = -1;
1401
1402 while (boyer_moore_ok)
1403 {
1404 if (ASCII_CHAR_P (inverse))
1405 {
1406 if (this_char_base > 0)
1407 boyer_moore_ok = 0;
1408 else
1409 this_char_base = 0;
1410 }
1411 else if (CHAR_BYTE8_P (inverse))
1412 /* Boyer-moore search can't handle a
1413 translation of an eight-bit
1414 character. */
1415 boyer_moore_ok = 0;
1416 else if (this_char_base < 0)
1417 {
1418 this_char_base = inverse & ~0x3F;
1419 if (char_base < 0)
1420 char_base = this_char_base;
1421 else if (this_char_base != char_base)
1422 boyer_moore_ok = 0;
1423 }
1424 else if ((inverse & ~0x3F) != this_char_base)
1425 boyer_moore_ok = 0;
1426 if (c == inverse)
1427 break;
1428 TRANSLATE (inverse, inverse_trt, inverse);
1429 }
1430 }
1431 }
1432
1433 /* Store this character into the translated pattern. */
1434 memcpy (pat, str, charlen);
1435 pat += charlen;
1436 base_pat += in_charlen;
1437 len_byte -= in_charlen;
1438 }
1439
1440 /* If char_base is still negative we didn't find any translated
1441 non-ASCII characters. */
1442 if (char_base < 0)
1443 char_base = 0;
1444 }
1445 else
1446 {
1447 /* Unibyte buffer. */
1448 char_base = 0;
1449 while (--len >= 0)
1450 {
1451 int c, translated, inverse;
1452
1453 /* If we got here and the RE flag is set, it's because we're
1454 dealing with a regexp known to be trivial, so the backslash
1455 just quotes the next character. */
1456 if (RE && *base_pat == '\\')
1457 {
1458 len--;
1459 raw_pattern_size--;
1460 base_pat++;
1461 }
1462 c = *base_pat++;
1463 TRANSLATE (translated, trt, c);
1464 *pat++ = translated;
1465 /* Check that none of C's equivalents violates the
1466 assumptions of boyer_moore. */
1467 TRANSLATE (inverse, inverse_trt, c);
1468 while (1)
1469 {
1470 if (inverse >= 0200)
1471 {
1472 boyer_moore_ok = 0;
1473 break;
1474 }
1475 if (c == inverse)
1476 break;
1477 TRANSLATE (inverse, inverse_trt, inverse);
1478 }
1479 }
1480 }
1481
1482 len_byte = pat - patbuf;
1483 pat = base_pat = patbuf;
1484
1485 EMACS_INT result
1486 = (boyer_moore_ok
1487 ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1488 pos_byte, lim_byte,
1489 char_base)
1490 : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1491 pos, pos_byte, lim, lim_byte));
1492 SAFE_FREE ();
1493 return result;
1494 }
1495
1496 /* Search for the Nth occurrence of STRING in the current buffer,
1497 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1498
1499 If RE, look for matches against the regular expression STRING instead;
1500 if POSIX, enable POSIX style backtracking within that regular
1501 expression.
1502
1503 If N is positive, search forward; in this case, LIM must be greater
1504 than POS.
1505
1506 If N is negative, search backward; LIM must be less than POS.
1507
1508 Return -X if there are X remaining occurrences or matches,
1509 or else the position at the beginning (if N is negative) or the end
1510 (if N is positive) of the Nth occurrence or match against STRING.
1511
1512 Use TRT and INVERSE_TRT as character translation tables. */
1513
1514 EMACS_INT
1515 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1516 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1517 bool RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1518 {
1519 if (running_asynch_code)
1520 save_search_regs ();
1521
1522 /* Searching 0 times means don't move. */
1523 /* Null string is found at starting position. */
1524 if (n == 0 || SCHARS (string) == 0)
1525 {
1526 set_search_regs (pos_byte, 0);
1527 return pos;
1528 }
1529
1530 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1531 pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
1532 n, trt, inverse_trt, posix);
1533 else
1534 pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
1535 n, RE, trt, inverse_trt, posix);
1536
1537 return pos;
1538 }
1539
1540 /* Do a simple string search N times for the string PAT,
1541 whose length is LEN/LEN_BYTE,
1542 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1543 TRT is the translation table.
1544
1545 Return the character position where the match is found.
1546 Otherwise, if M matches remained to be found, return -M.
1547
1548 This kind of search works regardless of what is in PAT and
1549 regardless of what is in TRT. It is used in cases where
1550 boyer_moore cannot work. */
1551
1552 static EMACS_INT
1553 simple_search (EMACS_INT n, unsigned char *pat,
1554 ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1555 ptrdiff_t pos, ptrdiff_t pos_byte,
1556 ptrdiff_t lim, ptrdiff_t lim_byte)
1557 {
1558 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1559 bool forward = n > 0;
1560 /* Number of buffer bytes matched. Note that this may be different
1561 from len_byte in a multibyte buffer. */
1562 ptrdiff_t match_byte = PTRDIFF_MIN;
1563
1564 if (lim > pos && multibyte)
1565 while (n > 0)
1566 {
1567 while (1)
1568 {
1569 /* Try matching at position POS. */
1570 ptrdiff_t this_pos_byte = pos_byte;
1571 ptrdiff_t this_len = len;
1572 unsigned char *p = pat;
1573 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1574 goto stop;
1575
1576 while (this_len > 0)
1577 {
1578 int charlen, pat_ch = string_char_and_length (p, &charlen);
1579 int buf_charlen, buf_ch
1580 = string_char_and_length (BYTE_POS_ADDR (this_pos_byte),
1581 &buf_charlen);
1582 TRANSLATE (buf_ch, trt, buf_ch);
1583
1584 if (buf_ch != pat_ch)
1585 break;
1586
1587 this_len--;
1588 p += charlen;
1589
1590 this_pos_byte += buf_charlen;
1591 }
1592
1593 if (this_len == 0)
1594 {
1595 match_byte = this_pos_byte - pos_byte;
1596 pos += len;
1597 pos_byte += match_byte;
1598 break;
1599 }
1600
1601 inc_both (&pos, &pos_byte);
1602 }
1603
1604 n--;
1605 }
1606 else if (lim > pos)
1607 while (n > 0)
1608 {
1609 while (1)
1610 {
1611 /* Try matching at position POS. */
1612 ptrdiff_t this_pos = pos;
1613 ptrdiff_t this_len = len;
1614 unsigned char *p = pat;
1615
1616 if (pos + len > lim)
1617 goto stop;
1618
1619 while (this_len > 0)
1620 {
1621 int pat_ch = *p++;
1622 int buf_ch = FETCH_BYTE (this_pos);
1623 TRANSLATE (buf_ch, trt, buf_ch);
1624
1625 if (buf_ch != pat_ch)
1626 break;
1627
1628 this_len--;
1629 this_pos++;
1630 }
1631
1632 if (this_len == 0)
1633 {
1634 match_byte = len;
1635 pos += len;
1636 break;
1637 }
1638
1639 pos++;
1640 }
1641
1642 n--;
1643 }
1644 /* Backwards search. */
1645 else if (lim < pos && multibyte)
1646 while (n < 0)
1647 {
1648 while (1)
1649 {
1650 /* Try matching at position POS. */
1651 ptrdiff_t this_pos = pos;
1652 ptrdiff_t this_pos_byte = pos_byte;
1653 ptrdiff_t this_len = len;
1654 const unsigned char *p = pat + len_byte;
1655
1656 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1657 goto stop;
1658
1659 while (this_len > 0)
1660 {
1661 int pat_ch, buf_ch;
1662
1663 dec_both (&this_pos, &this_pos_byte);
1664 p -= raw_prev_char_len (p);
1665 pat_ch = STRING_CHAR (p);
1666 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1667 TRANSLATE (buf_ch, trt, buf_ch);
1668
1669 if (buf_ch != pat_ch)
1670 break;
1671
1672 this_len--;
1673 }
1674
1675 if (this_len == 0)
1676 {
1677 match_byte = pos_byte - this_pos_byte;
1678 pos = this_pos;
1679 pos_byte = this_pos_byte;
1680 break;
1681 }
1682
1683 dec_both (&pos, &pos_byte);
1684 }
1685
1686 n++;
1687 }
1688 else if (lim < pos)
1689 while (n < 0)
1690 {
1691 while (1)
1692 {
1693 /* Try matching at position POS. */
1694 ptrdiff_t this_pos = pos - len;
1695 ptrdiff_t this_len = len;
1696 unsigned char *p = pat;
1697
1698 if (this_pos < lim)
1699 goto stop;
1700
1701 while (this_len > 0)
1702 {
1703 int pat_ch = *p++;
1704 int buf_ch = FETCH_BYTE (this_pos);
1705 TRANSLATE (buf_ch, trt, buf_ch);
1706
1707 if (buf_ch != pat_ch)
1708 break;
1709 this_len--;
1710 this_pos++;
1711 }
1712
1713 if (this_len == 0)
1714 {
1715 match_byte = len;
1716 pos -= len;
1717 break;
1718 }
1719
1720 pos--;
1721 }
1722
1723 n++;
1724 }
1725
1726 stop:
1727 if (n == 0)
1728 {
1729 eassert (match_byte != PTRDIFF_MIN);
1730 if (forward)
1731 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1732 else
1733 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1734
1735 return pos;
1736 }
1737 else if (n > 0)
1738 return -n;
1739 else
1740 return n;
1741 }
1742
1743 /* Do Boyer-Moore search N times for the string BASE_PAT,
1744 whose length is LEN_BYTE,
1745 from buffer position POS_BYTE until LIM_BYTE.
1746 DIRECTION says which direction we search in.
1747 TRT and INVERSE_TRT are translation tables.
1748 Characters in PAT are already translated by TRT.
1749
1750 This kind of search works if all the characters in BASE_PAT that
1751 have nontrivial translation are the same aside from the last byte.
1752 This makes it possible to translate just the last byte of a
1753 character, and do so after just a simple test of the context.
1754 CHAR_BASE is nonzero if there is such a non-ASCII character.
1755
1756 If that criterion is not satisfied, do not call this function. */
1757
1758 static EMACS_INT
1759 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1760 ptrdiff_t len_byte,
1761 Lisp_Object trt, Lisp_Object inverse_trt,
1762 ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1763 int char_base)
1764 {
1765 int direction = ((n > 0) ? 1 : -1);
1766 register ptrdiff_t dirlen;
1767 ptrdiff_t limit;
1768 int stride_for_teases = 0;
1769 int BM_tab[0400];
1770 register unsigned char *cursor, *p_limit;
1771 register ptrdiff_t i;
1772 register int j;
1773 unsigned char *pat, *pat_end;
1774 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1775
1776 unsigned char simple_translate[0400];
1777 /* These are set to the preceding bytes of a byte to be translated
1778 if char_base is nonzero. As the maximum byte length of a
1779 multibyte character is 5, we have to check at most four previous
1780 bytes. */
1781 int translate_prev_byte1 = 0;
1782 int translate_prev_byte2 = 0;
1783 int translate_prev_byte3 = 0;
1784
1785 /* The general approach is that we are going to maintain that we know
1786 the first (closest to the present position, in whatever direction
1787 we're searching) character that could possibly be the last
1788 (furthest from present position) character of a valid match. We
1789 advance the state of our knowledge by looking at that character
1790 and seeing whether it indeed matches the last character of the
1791 pattern. If it does, we take a closer look. If it does not, we
1792 move our pointer (to putative last characters) as far as is
1793 logically possible. This amount of movement, which I call a
1794 stride, will be the length of the pattern if the actual character
1795 appears nowhere in the pattern, otherwise it will be the distance
1796 from the last occurrence of that character to the end of the
1797 pattern. If the amount is zero we have a possible match. */
1798
1799 /* Here we make a "mickey mouse" BM table. The stride of the search
1800 is determined only by the last character of the putative match.
1801 If that character does not match, we will stride the proper
1802 distance to propose a match that superimposes it on the last
1803 instance of a character that matches it (per trt), or misses
1804 it entirely if there is none. */
1805
1806 dirlen = len_byte * direction;
1807
1808 /* Record position after the end of the pattern. */
1809 pat_end = base_pat + len_byte;
1810 /* BASE_PAT points to a character that we start scanning from.
1811 It is the first character in a forward search,
1812 the last character in a backward search. */
1813 if (direction < 0)
1814 base_pat = pat_end - 1;
1815
1816 /* A character that does not appear in the pattern induces a
1817 stride equal to the pattern length. */
1818 for (i = 0; i < 0400; i++)
1819 BM_tab[i] = dirlen;
1820
1821 /* We use this for translation, instead of TRT itself.
1822 We fill this in to handle the characters that actually
1823 occur in the pattern. Others don't matter anyway! */
1824 for (i = 0; i < 0400; i++)
1825 simple_translate[i] = i;
1826
1827 if (char_base)
1828 {
1829 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1830 byte following them are the target of translation. */
1831 eassume (0x80 <= char_base && char_base <= MAX_CHAR);
1832 unsigned char str[MAX_MULTIBYTE_LENGTH];
1833 int cblen = CHAR_STRING (char_base, str);
1834
1835 translate_prev_byte1 = str[cblen - 2];
1836 if (cblen > 2)
1837 {
1838 translate_prev_byte2 = str[cblen - 3];
1839 if (cblen > 3)
1840 translate_prev_byte3 = str[cblen - 4];
1841 }
1842 }
1843
1844 i = 0;
1845 while (i != dirlen)
1846 {
1847 unsigned char *ptr = base_pat + i;
1848 i += direction;
1849 if (! NILP (trt))
1850 {
1851 /* If the byte currently looking at is the last of a
1852 character to check case-equivalents, set CH to that
1853 character. An ASCII character and a non-ASCII character
1854 matching with CHAR_BASE are to be checked. */
1855 int ch = -1;
1856
1857 if (ASCII_CHAR_P (*ptr) || ! multibyte)
1858 ch = *ptr;
1859 else if (char_base
1860 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1861 {
1862 unsigned char *charstart = ptr - 1;
1863
1864 while (! (CHAR_HEAD_P (*charstart)))
1865 charstart--;
1866 ch = STRING_CHAR (charstart);
1867 if (char_base != (ch & ~0x3F))
1868 ch = -1;
1869 }
1870
1871 if (ch >= 0200 && multibyte)
1872 j = (ch & 0x3F) | 0200;
1873 else
1874 j = *ptr;
1875
1876 if (i == dirlen)
1877 stride_for_teases = BM_tab[j];
1878
1879 BM_tab[j] = dirlen - i;
1880 /* A translation table is accompanied by its inverse -- see
1881 comment following downcase_table for details. */
1882 if (ch >= 0)
1883 {
1884 int starting_ch = ch;
1885 int starting_j = j;
1886
1887 while (1)
1888 {
1889 TRANSLATE (ch, inverse_trt, ch);
1890 if (ch >= 0200 && multibyte)
1891 j = (ch & 0x3F) | 0200;
1892 else
1893 j = ch;
1894
1895 /* For all the characters that map into CH,
1896 set up simple_translate to map the last byte
1897 into STARTING_J. */
1898 simple_translate[j] = starting_j;
1899 if (ch == starting_ch)
1900 break;
1901 BM_tab[j] = dirlen - i;
1902 }
1903 }
1904 }
1905 else
1906 {
1907 j = *ptr;
1908
1909 if (i == dirlen)
1910 stride_for_teases = BM_tab[j];
1911 BM_tab[j] = dirlen - i;
1912 }
1913 /* stride_for_teases tells how much to stride if we get a
1914 match on the far character but are subsequently
1915 disappointed, by recording what the stride would have been
1916 for that character if the last character had been
1917 different. */
1918 }
1919 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1920 /* loop invariant - POS_BYTE points at where last char (first
1921 char if reverse) of pattern would align in a possible match. */
1922 while (n != 0)
1923 {
1924 ptrdiff_t tail_end;
1925 unsigned char *tail_end_ptr;
1926
1927 /* It's been reported that some (broken) compiler thinks that
1928 Boolean expressions in an arithmetic context are unsigned.
1929 Using an explicit ?1:0 prevents this. */
1930 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1931 < 0)
1932 return (n * (0 - direction));
1933 /* First we do the part we can by pointers (maybe nothing) */
1934 maybe_quit ();
1935 pat = base_pat;
1936 limit = pos_byte - dirlen + direction;
1937 if (direction > 0)
1938 {
1939 limit = BUFFER_CEILING_OF (limit);
1940 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1941 can take on without hitting edge of buffer or the gap. */
1942 limit = min (limit, pos_byte + 20000);
1943 limit = min (limit, lim_byte - 1);
1944 }
1945 else
1946 {
1947 limit = BUFFER_FLOOR_OF (limit);
1948 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1949 can take on without hitting edge of buffer or the gap. */
1950 limit = max (limit, pos_byte - 20000);
1951 limit = max (limit, lim_byte);
1952 }
1953 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1954 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1955
1956 if ((limit - pos_byte) * direction > 20)
1957 {
1958 unsigned char *p2;
1959
1960 p_limit = BYTE_POS_ADDR (limit);
1961 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1962 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
1963 while (1) /* use one cursor setting as long as i can */
1964 {
1965 if (direction > 0) /* worth duplicating */
1966 {
1967 while (cursor <= p_limit)
1968 {
1969 if (BM_tab[*cursor] == 0)
1970 goto hit;
1971 cursor += BM_tab[*cursor];
1972 }
1973 }
1974 else
1975 {
1976 while (cursor >= p_limit)
1977 {
1978 if (BM_tab[*cursor] == 0)
1979 goto hit;
1980 cursor += BM_tab[*cursor];
1981 }
1982 }
1983 /* If you are here, cursor is beyond the end of the
1984 searched region. You fail to match within the
1985 permitted region and would otherwise try a character
1986 beyond that region. */
1987 break;
1988
1989 hit:
1990 i = dirlen - direction;
1991 if (! NILP (trt))
1992 {
1993 while ((i -= direction) + direction != 0)
1994 {
1995 int ch;
1996 cursor -= direction;
1997 /* Translate only the last byte of a character. */
1998 if (! multibyte
1999 || ((cursor == tail_end_ptr
2000 || CHAR_HEAD_P (cursor[1]))
2001 && (CHAR_HEAD_P (cursor[0])
2002 /* Check if this is the last byte of
2003 a translatable character. */
2004 || (translate_prev_byte1 == cursor[-1]
2005 && (CHAR_HEAD_P (translate_prev_byte1)
2006 || (translate_prev_byte2 == cursor[-2]
2007 && (CHAR_HEAD_P (translate_prev_byte2)
2008 || (translate_prev_byte3 == cursor[-3]))))))))
2009 ch = simple_translate[*cursor];
2010 else
2011 ch = *cursor;
2012 if (pat[i] != ch)
2013 break;
2014 }
2015 }
2016 else
2017 {
2018 while ((i -= direction) + direction != 0)
2019 {
2020 cursor -= direction;
2021 if (pat[i] != *cursor)
2022 break;
2023 }
2024 }
2025 cursor += dirlen - i - direction; /* fix cursor */
2026 if (i + direction == 0)
2027 {
2028 ptrdiff_t position, start, end;
2029 #ifdef REL_ALLOC
2030 ptrdiff_t cursor_off;
2031 #endif
2032
2033 cursor -= direction;
2034
2035 position = pos_byte + cursor - p2 + ((direction > 0)
2036 ? 1 - len_byte : 0);
2037 #ifdef REL_ALLOC
2038 /* set_search_regs might call malloc, which could
2039 cause ralloc.c relocate buffer text. We need to
2040 update pointers into buffer text due to that. */
2041 cursor_off = cursor - p2;
2042 #endif
2043 set_search_regs (position, len_byte);
2044 #ifdef REL_ALLOC
2045 p_limit = BYTE_POS_ADDR (limit);
2046 p2 = BYTE_POS_ADDR (pos_byte);
2047 cursor = p2 + cursor_off;
2048 #endif
2049
2050 if (NILP (Vinhibit_changing_match_data))
2051 {
2052 start = search_regs.start[0];
2053 end = search_regs.end[0];
2054 }
2055 else
2056 /* If Vinhibit_changing_match_data is non-nil,
2057 search_regs will not be changed. So let's
2058 compute start and end here. */
2059 {
2060 start = BYTE_TO_CHAR (position);
2061 end = BYTE_TO_CHAR (position + len_byte);
2062 }
2063
2064 if ((n -= direction) != 0)
2065 cursor += dirlen; /* to resume search */
2066 else
2067 return direction > 0 ? end : start;
2068 }
2069 else
2070 cursor += stride_for_teases; /* <sigh> we lose - */
2071 }
2072 pos_byte += cursor - p2;
2073 }
2074 else
2075 /* Now we'll pick up a clump that has to be done the hard
2076 way because it covers a discontinuity. */
2077 {
2078 limit = ((direction > 0)
2079 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2080 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2081 limit = ((direction > 0)
2082 ? min (limit + len_byte, lim_byte - 1)
2083 : max (limit - len_byte, lim_byte));
2084 /* LIMIT is now the last value POS_BYTE can have
2085 and still be valid for a possible match. */
2086 while (1)
2087 {
2088 /* This loop can be coded for space rather than
2089 speed because it will usually run only once.
2090 (the reach is at most len + 21, and typically
2091 does not exceed len). */
2092 while ((limit - pos_byte) * direction >= 0)
2093 {
2094 int ch = FETCH_BYTE (pos_byte);
2095 if (BM_tab[ch] == 0)
2096 goto hit2;
2097 pos_byte += BM_tab[ch];
2098 }
2099 break; /* ran off the end */
2100
2101 hit2:
2102 /* Found what might be a match. */
2103 i = dirlen - direction;
2104 while ((i -= direction) + direction != 0)
2105 {
2106 int ch;
2107 unsigned char *ptr;
2108 pos_byte -= direction;
2109 ptr = BYTE_POS_ADDR (pos_byte);
2110 /* Translate only the last byte of a character. */
2111 if (! multibyte
2112 || ((ptr == tail_end_ptr
2113 || CHAR_HEAD_P (ptr[1]))
2114 && (CHAR_HEAD_P (ptr[0])
2115 /* Check if this is the last byte of a
2116 translatable character. */
2117 || (translate_prev_byte1 == ptr[-1]
2118 && (CHAR_HEAD_P (translate_prev_byte1)
2119 || (translate_prev_byte2 == ptr[-2]
2120 && (CHAR_HEAD_P (translate_prev_byte2)
2121 || translate_prev_byte3 == ptr[-3])))))))
2122 ch = simple_translate[*ptr];
2123 else
2124 ch = *ptr;
2125 if (pat[i] != ch)
2126 break;
2127 }
2128 /* Above loop has moved POS_BYTE part or all the way
2129 back to the first pos (last pos if reverse).
2130 Set it once again at the last (first if reverse) char. */
2131 pos_byte += dirlen - i - direction;
2132 if (i + direction == 0)
2133 {
2134 ptrdiff_t position, start, end;
2135 pos_byte -= direction;
2136
2137 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2138 set_search_regs (position, len_byte);
2139
2140 if (NILP (Vinhibit_changing_match_data))
2141 {
2142 start = search_regs.start[0];
2143 end = search_regs.end[0];
2144 }
2145 else
2146 /* If Vinhibit_changing_match_data is non-nil,
2147 search_regs will not be changed. So let's
2148 compute start and end here. */
2149 {
2150 start = BYTE_TO_CHAR (position);
2151 end = BYTE_TO_CHAR (position + len_byte);
2152 }
2153
2154 if ((n -= direction) != 0)
2155 pos_byte += dirlen; /* to resume search */
2156 else
2157 return direction > 0 ? end : start;
2158 }
2159 else
2160 pos_byte += stride_for_teases;
2161 }
2162 }
2163 /* We have done one clump. Can we continue? */
2164 if ((lim_byte - pos_byte) * direction < 0)
2165 return ((0 - n) * direction);
2166 }
2167 return BYTE_TO_CHAR (pos_byte);
2168 }
2169
2170 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2171 for the overall match just found in the current buffer.
2172 Also clear out the match data for registers 1 and up. */
2173
2174 static void
2175 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2176 {
2177 ptrdiff_t i;
2178
2179 if (!NILP (Vinhibit_changing_match_data))
2180 return;
2181
2182 /* Make sure we have registers in which to store
2183 the match position. */
2184 if (search_regs.num_regs == 0)
2185 {
2186 search_regs.start = xmalloc (2 * sizeof *search_regs.start);
2187 search_regs.end = xmalloc (2 * sizeof *search_regs.end);
2188 search_regs.num_regs = 2;
2189 }
2190
2191 /* Clear out the other registers. */
2192 for (i = 1; i < search_regs.num_regs; i++)
2193 {
2194 search_regs.start[i] = -1;
2195 search_regs.end[i] = -1;
2196 }
2197
2198 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2199 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2200 XSETBUFFER (last_thing_searched, current_buffer);
2201 }
2202
2203 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2204 "MSearch backward: ",
2205 doc: /* Search backward from point for STRING.
2206 Set point to the beginning of the occurrence found, and return point.
2207 An optional second argument bounds the search; it is a buffer position.
2208 The match found must not begin before that position. A value of nil
2209 means search to the beginning of the accessible portion of the buffer.
2210 Optional third argument, if t, means if fail just return nil (no error).
2211 If not nil and not t, position at limit of search and return nil.
2212 Optional fourth argument COUNT, if a positive number, means to search
2213 for COUNT successive occurrences. If COUNT is negative, search
2214 forward, instead of backward, for -COUNT occurrences. A value of
2215 nil means the same as 1.
2216 With COUNT positive, the match found is the COUNTth to last one (or
2217 last, if COUNT is 1 or nil) in the buffer located entirely before
2218 the origin of the search; correspondingly with COUNT negative.
2219
2220 Search case-sensitivity is determined by the value of the variable
2221 `case-fold-search', which see.
2222
2223 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2224 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2225 {
2226 return search_command (string, bound, noerror, count, -1, false, false);
2227 }
2228
2229 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2230 doc: /* Search forward from point for STRING.
2231 Set point to the end of the occurrence found, and return point.
2232 An optional second argument bounds the search; it is a buffer position.
2233 The match found must not end after that position. A value of nil
2234 means search to the end of the accessible portion of the buffer.
2235 Optional third argument, if t, means if fail just return nil (no error).
2236 If not nil and not t, move to limit of search and return nil.
2237 Optional fourth argument COUNT, if a positive number, means to search
2238 for COUNT successive occurrences. If COUNT is negative, search
2239 backward, instead of forward, for -COUNT occurrences. A value of
2240 nil means the same as 1.
2241 With COUNT positive, the match found is the COUNTth one (or first,
2242 if COUNT is 1 or nil) in the buffer located entirely after the
2243 origin of the search; correspondingly with COUNT negative.
2244
2245 Search case-sensitivity is determined by the value of the variable
2246 `case-fold-search', which see.
2247
2248 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2249 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2250 {
2251 return search_command (string, bound, noerror, count, 1, false, false);
2252 }
2253
2254 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2255 "sRE search backward: ",
2256 doc: /* Search backward from point for regular expression REGEXP.
2257 This function is almost identical to `re-search-forward', except that
2258 by default it searches backward instead of forward, and the sign of
2259 COUNT also indicates exactly the opposite searching direction.
2260 See `re-search-forward' for details.
2261
2262 Note that searching backwards may give a shorter match than expected,
2263 because REGEXP is still matched in the forward direction. See Info
2264 anchor `(elisp) re-search-backward' for details. */)
2265 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2266 {
2267 return search_command (regexp, bound, noerror, count, -1, true, false);
2268 }
2269
2270 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2271 "sRE search: ",
2272 doc: /* Search forward from point for regular expression REGEXP.
2273 Set point to the end of the occurrence found, and return point.
2274 The optional second argument BOUND is a buffer position that bounds
2275 the search. The match found must not end after that position. A
2276 value of nil means search to the end of the accessible portion of
2277 the buffer.
2278 The optional third argument NOERROR indicates how errors are handled
2279 when the search fails. If it is nil or omitted, emit an error; if
2280 it is t, simply return nil and do nothing; if it is neither nil nor
2281 t, move to the limit of search and return nil.
2282 The optional fourth argument COUNT is a number that indicates the
2283 search direction and the number of occurrences to search for. If it
2284 is positive, search forward for COUNT successive occurrences; if it
2285 is negative, search backward, instead of forward, for -COUNT
2286 occurrences. A value of nil means the same as 1.
2287 With COUNT positive/negative, the match found is the COUNTth/-COUNTth
2288 one in the buffer located entirely after/before the origin of the
2289 search.
2290
2291 Search case-sensitivity is determined by the value of the variable
2292 `case-fold-search', which see.
2293
2294 See also the functions `match-beginning', `match-end', `match-string',
2295 and `replace-match'. */)
2296 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2297 {
2298 return search_command (regexp, bound, noerror, count, 1, true, false);
2299 }
2300
2301 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2302 "sPosix search backward: ",
2303 doc: /* Search backward from point for match for REGEXP according to Posix rules.
2304 Find the longest match in accord with Posix regular expression rules.
2305 Set point to the beginning of the occurrence found, and return point.
2306 An optional second argument bounds the search; it is a buffer position.
2307 The match found must not begin before that position. A value of nil
2308 means search to the beginning of the accessible portion of the buffer.
2309 Optional third argument, if t, means if fail just return nil (no error).
2310 If not nil and not t, position at limit of search and return nil.
2311 Optional fourth argument COUNT, if a positive number, means to search
2312 for COUNT successive occurrences. If COUNT is negative, search
2313 forward, instead of backward, for -COUNT occurrences. A value of
2314 nil means the same as 1.
2315 With COUNT positive, the match found is the COUNTth to last one (or
2316 last, if COUNT is 1 or nil) in the buffer located entirely before
2317 the origin of the search; correspondingly with COUNT negative.
2318
2319 Search case-sensitivity is determined by the value of the variable
2320 `case-fold-search', which see.
2321
2322 See also the functions `match-beginning', `match-end', `match-string',
2323 and `replace-match'. */)
2324 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2325 {
2326 return search_command (regexp, bound, noerror, count, -1, true, true);
2327 }
2328
2329 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2330 "sPosix search: ",
2331 doc: /* Search forward from point for REGEXP according to Posix rules.
2332 Find the longest match in accord with Posix regular expression rules.
2333 Set point to the end of the occurrence found, and return point.
2334 An optional second argument bounds the search; it is a buffer position.
2335 The match found must not end after that position. A value of nil
2336 means search to the end of the accessible portion of the buffer.
2337 Optional third argument, if t, means if fail just return nil (no error).
2338 If not nil and not t, move to limit of search and return nil.
2339 Optional fourth argument COUNT, if a positive number, means to search
2340 for COUNT successive occurrences. If COUNT is negative, search
2341 backward, instead of forward, for -COUNT occurrences. A value of
2342 nil means the same as 1.
2343 With COUNT positive, the match found is the COUNTth one (or first,
2344 if COUNT is 1 or nil) in the buffer located entirely after the
2345 origin of the search; correspondingly with COUNT negative.
2346
2347 Search case-sensitivity is determined by the value of the variable
2348 `case-fold-search', which see.
2349
2350 See also the functions `match-beginning', `match-end', `match-string',
2351 and `replace-match'. */)
2352 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2353 {
2354 return search_command (regexp, bound, noerror, count, 1, true, true);
2355 }
2356
2357 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2358 doc: /* Replace text matched by last search with NEWTEXT.
2359 Leave point at the end of the replacement text.
2360
2361 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2362 the replacement text. Otherwise, maybe capitalize the whole text, or
2363 maybe just word initials, based on the replaced text. If the replaced
2364 text has only capital letters and has at least one multiletter word,
2365 convert NEWTEXT to all caps. Otherwise if all words are capitalized
2366 in the replaced text, capitalize each word in NEWTEXT.
2367
2368 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2369 Otherwise treat `\\' as special:
2370 `\\&' in NEWTEXT means substitute original matched text.
2371 `\\N' means substitute what matched the Nth `\\(...\\)'.
2372 If Nth parens didn't match, substitute nothing.
2373 `\\\\' means insert one `\\'.
2374 `\\?' is treated literally
2375 (for compatibility with `query-replace-regexp').
2376 Any other character following `\\' signals an error.
2377 Case conversion does not apply to these substitutions.
2378
2379 If optional fourth argument STRING is non-nil, it should be a string
2380 to act on; this should be the string on which the previous match was
2381 done via `string-match'. In this case, `replace-match' creates and
2382 returns a new string, made by copying STRING and replacing the part of
2383 STRING that was matched (the original STRING itself is not altered).
2384
2385 The optional fifth argument SUBEXP specifies a subexpression;
2386 it says to replace just that subexpression with NEWTEXT,
2387 rather than replacing the entire matched text.
2388 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2389 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2390 NEWTEXT in place of subexp N.
2391 This is useful only after a regular expression search or match,
2392 since only regular expressions have distinguished subexpressions. */)
2393 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2394 {
2395 enum { nochange, all_caps, cap_initial } case_action;
2396 ptrdiff_t pos, pos_byte;
2397 bool some_multiletter_word;
2398 bool some_lowercase;
2399 bool some_uppercase;
2400 bool some_nonuppercase_initial;
2401 int c, prevc;
2402 ptrdiff_t sub;
2403 ptrdiff_t opoint, newpoint;
2404
2405 CHECK_STRING (newtext);
2406
2407 if (! NILP (string))
2408 CHECK_STRING (string);
2409
2410 /* Most replacement texts don't contain any backslash directives in
2411 the replacements. Check whether that's the case, which will
2412 enable us to take the fast path later. */
2413 if (NILP (literal)
2414 && !memchr (SSDATA (newtext), '\\', SBYTES (newtext)))
2415 literal = Qt;
2416
2417 case_action = nochange; /* We tried an initialization */
2418 /* but some C compilers blew it */
2419
2420 ptrdiff_t num_regs = search_regs.num_regs;
2421 if (num_regs <= 0)
2422 error ("`replace-match' called before any match found");
2423
2424 sub = !NILP (subexp) ? check_integer_range (subexp, 0, num_regs - 1) : 0;
2425 ptrdiff_t sub_start = search_regs.start[sub];
2426 ptrdiff_t sub_end = search_regs.end[sub];
2427 eassert (sub_start <= sub_end);
2428
2429 /* Check whether the text to replace is present in the buffer/string. */
2430 if (! (NILP (string)
2431 ? BEGV <= sub_start && sub_end <= ZV
2432 : 0 <= sub_start && sub_end <= SCHARS (string)))
2433 {
2434 if (sub_start < 0)
2435 xsignal2 (Qerror,
2436 build_string ("replace-match subexpression does not exist"),
2437 subexp);
2438 args_out_of_range (make_fixnum (sub_start), make_fixnum (sub_end));
2439 }
2440
2441 if (NILP (fixedcase))
2442 {
2443 /* Decide how to casify by examining the matched text. */
2444 ptrdiff_t last;
2445
2446 pos = sub_start;
2447 last = sub_end;
2448
2449 if (NILP (string))
2450 pos_byte = CHAR_TO_BYTE (pos);
2451 else
2452 pos_byte = string_char_to_byte (string, pos);
2453
2454 prevc = '\n';
2455 case_action = all_caps;
2456
2457 /* some_multiletter_word is set nonzero if any original word
2458 is more than one letter long. */
2459 some_multiletter_word = 0;
2460 some_lowercase = 0;
2461 some_nonuppercase_initial = 0;
2462 some_uppercase = 0;
2463
2464 while (pos < last)
2465 {
2466 if (NILP (string))
2467 {
2468 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2469 inc_both (&pos, &pos_byte);
2470 }
2471 else
2472 c = fetch_string_char_as_multibyte_advance (string,
2473 &pos, &pos_byte);
2474
2475 if (lowercasep (c))
2476 {
2477 /* Cannot be all caps if any original char is lower case */
2478
2479 some_lowercase = 1;
2480 if (SYNTAX (prevc) != Sword)
2481 some_nonuppercase_initial = 1;
2482 else
2483 some_multiletter_word = 1;
2484 }
2485 else if (uppercasep (c))
2486 {
2487 some_uppercase = 1;
2488 if (SYNTAX (prevc) != Sword)
2489 ;
2490 else
2491 some_multiletter_word = 1;
2492 }
2493 else
2494 {
2495 /* If the initial is a caseless word constituent,
2496 treat that like a lowercase initial. */
2497 if (SYNTAX (prevc) != Sword)
2498 some_nonuppercase_initial = 1;
2499 }
2500
2501 prevc = c;
2502 }
2503
2504 /* Convert to all caps if the old text is all caps
2505 and has at least one multiletter word. */
2506 if (! some_lowercase && some_multiletter_word)
2507 case_action = all_caps;
2508 /* Capitalize each word, if the old text has all capitalized words. */
2509 else if (!some_nonuppercase_initial && some_multiletter_word)
2510 case_action = cap_initial;
2511 else if (!some_nonuppercase_initial && some_uppercase)
2512 /* Should x -> yz, operating on X, give Yz or YZ?
2513 We'll assume the latter. */
2514 case_action = all_caps;
2515 else
2516 case_action = nochange;
2517 }
2518
2519 /* Do replacement in a string. */
2520 if (!NILP (string))
2521 {
2522 Lisp_Object before, after;
2523
2524 before = Fsubstring (string, make_fixnum (0), make_fixnum (sub_start));
2525 after = Fsubstring (string, make_fixnum (sub_end), Qnil);
2526
2527 /* Substitute parts of the match into NEWTEXT
2528 if desired. */
2529 if (NILP (literal))
2530 {
2531 ptrdiff_t lastpos = 0;
2532 ptrdiff_t lastpos_byte = 0;
2533 /* We build up the substituted string in ACCUM. */
2534 Lisp_Object accum;
2535 Lisp_Object middle;
2536 ptrdiff_t length = SBYTES (newtext);
2537
2538 accum = Qnil;
2539
2540 for (pos_byte = 0, pos = 0; pos_byte < length;)
2541 {
2542 ptrdiff_t substart = -1;
2543 ptrdiff_t subend = 0;
2544 bool delbackslash = 0;
2545
2546 c = fetch_string_char_advance (newtext, &pos, &pos_byte);
2547
2548 if (c == '\\')
2549 {
2550 c = fetch_string_char_advance (newtext, &pos, &pos_byte);
2551
2552 if (c == '&')
2553 {
2554 substart = sub_start;
2555 subend = sub_end;
2556 }
2557 else if (c >= '1' && c <= '9')
2558 {
2559 if (c - '0' < num_regs
2560 && search_regs.start[c - '0'] >= 0)
2561 {
2562 substart = search_regs.start[c - '0'];
2563 subend = search_regs.end[c - '0'];
2564 }
2565 else
2566 {
2567 /* If that subexp did not match,
2568 replace \\N with nothing. */
2569 substart = 0;
2570 subend = 0;
2571 }
2572 }
2573 else if (c == '\\')
2574 delbackslash = 1;
2575 else if (c != '?')
2576 error ("Invalid use of `\\' in replacement text");
2577 }
2578 if (substart >= 0)
2579 {
2580 if (pos - 2 != lastpos)
2581 middle = substring_both (newtext, lastpos,
2582 lastpos_byte,
2583 pos - 2, pos_byte - 2);
2584 else
2585 middle = Qnil;
2586 accum = concat3 (accum, middle,
2587 Fsubstring (string,
2588 make_fixnum (substart),
2589 make_fixnum (subend)));
2590 lastpos = pos;
2591 lastpos_byte = pos_byte;
2592 }
2593 else if (delbackslash)
2594 {
2595 middle = substring_both (newtext, lastpos,
2596 lastpos_byte,
2597 pos - 1, pos_byte - 1);
2598
2599 accum = concat2 (accum, middle);
2600 lastpos = pos;
2601 lastpos_byte = pos_byte;
2602 }
2603 }
2604
2605 if (pos != lastpos)
2606 middle = substring_both (newtext, lastpos,
2607 lastpos_byte,
2608 pos, pos_byte);
2609 else
2610 middle = Qnil;
2611
2612 newtext = concat2 (accum, middle);
2613 }
2614
2615 /* Do case substitution in NEWTEXT if desired. */
2616 if (case_action == all_caps)
2617 newtext = Fupcase (newtext);
2618 else if (case_action == cap_initial)
2619 newtext = Fupcase_initials (newtext);
2620
2621 return concat3 (before, newtext, after);
2622 }
2623
2624 /* Record point. A nonpositive OPOINT is actually an offset from ZV. */
2625 opoint = PT <= sub_start ? PT : max (PT, sub_end) - ZV;
2626
2627 /* If we want non-literal replacement,
2628 perform substitution on the replacement string. */
2629 if (NILP (literal))
2630 {
2631 ptrdiff_t length = SBYTES (newtext);
2632 unsigned char *substed;
2633 ptrdiff_t substed_alloc_size, substed_len;
2634 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2635 bool str_multibyte = STRING_MULTIBYTE (newtext);
2636 bool really_changed = 0;
2637
2638 substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2639 ? length * 2 + 100
2640 : STRING_BYTES_BOUND);
2641 substed = xmalloc (substed_alloc_size);
2642 substed_len = 0;
2643
2644 /* Go thru NEWTEXT, producing the actual text to insert in
2645 SUBSTED while adjusting multibyteness to that of the current
2646 buffer. */
2647
2648 for (pos_byte = 0, pos = 0; pos_byte < length;)
2649 {
2650 unsigned char str[MAX_MULTIBYTE_LENGTH];
2651 const unsigned char *add_stuff = NULL;
2652 ptrdiff_t add_len = 0;
2653 ptrdiff_t idx = -1;
2654 ptrdiff_t begbyte UNINIT;
2655
2656 if (str_multibyte)
2657 {
2658 c = fetch_string_char_advance_no_check (newtext,
2659 &pos, &pos_byte);
2660 if (!buf_multibyte)
2661 c = CHAR_TO_BYTE8 (c);
2662 }
2663 else
2664 {
2665 /* Note that we don't have to increment POS. */
2666 c = SREF (newtext, pos_byte++);
2667 if (buf_multibyte)
2668 c = make_char_multibyte (c);
2669 }
2670
2671 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2672 or set IDX to a match index, which means put that part
2673 of the buffer text into SUBSTED. */
2674
2675 if (c == '\\')
2676 {
2677 really_changed = 1;
2678
2679 if (str_multibyte)
2680 {
2681 c = fetch_string_char_advance_no_check (newtext,
2682 &pos, &pos_byte);
2683 if (!buf_multibyte && !ASCII_CHAR_P (c))
2684 c = CHAR_TO_BYTE8 (c);
2685 }
2686 else
2687 {
2688 c = SREF (newtext, pos_byte++);
2689 if (buf_multibyte)
2690 c = make_char_multibyte (c);
2691 }
2692
2693 if (c == '&')
2694 idx = sub;
2695 else if ('1' <= c && c <= '9' && c - '0' < num_regs)
2696 {
2697 if (search_regs.start[c - '0'] >= 1)
2698 idx = c - '0';
2699 }
2700 else if (c == '\\')
2701 add_len = 1, add_stuff = (unsigned char *) "\\";
2702 else
2703 {
2704 xfree (substed);
2705 error ("Invalid use of `\\' in replacement text");
2706 }
2707 }
2708 else
2709 {
2710 add_len = CHAR_STRING (c, str);
2711 add_stuff = str;
2712 }
2713
2714 /* If we want to copy part of a previous match,
2715 set up ADD_STUFF and ADD_LEN to point to it. */
2716 if (idx >= 0)
2717 {
2718 begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2719 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2720 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2721 move_gap_both (search_regs.start[idx], begbyte);
2722 }
2723
2724 /* Now the stuff we want to add to SUBSTED
2725 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2726
2727 /* Make sure SUBSTED is big enough. */
2728 if (substed_alloc_size - substed_len < add_len)
2729 substed =
2730 xpalloc (substed, &substed_alloc_size,
2731 add_len - (substed_alloc_size - substed_len),
2732 STRING_BYTES_BOUND, 1);
2733
2734 /* We compute this after the call to xpalloc, because that
2735 could cause buffer text be relocated when ralloc.c is used. */
2736 if (idx >= 0)
2737 add_stuff = BYTE_POS_ADDR (begbyte);
2738
2739 /* Now add to the end of SUBSTED. */
2740 if (add_stuff)
2741 {
2742 memcpy (substed + substed_len, add_stuff, add_len);
2743 substed_len += add_len;
2744 }
2745 }
2746
2747 if (really_changed)
2748 newtext = make_specified_string ((const char *) substed, -1,
2749 substed_len, buf_multibyte);
2750 xfree (substed);
2751 }
2752
2753 newpoint = sub_start + SCHARS (newtext);
2754
2755 /* Replace the old text with the new in the cleanest possible way. */
2756 replace_range (sub_start, sub_end, newtext, 1, 0, 1, true, true);
2757
2758 if (case_action == all_caps)
2759 Fupcase_region (make_fixnum (search_regs.start[sub]),
2760 make_fixnum (newpoint),
2761 Qnil);
2762 else if (case_action == cap_initial)
2763 Fupcase_initials_region (make_fixnum (search_regs.start[sub]),
2764 make_fixnum (newpoint), Qnil);
2765
2766 /* The replace_range etc. functions can trigger modification hooks
2767 (see signal_before_change and signal_after_change). Try to error
2768 out if these hooks clobber the match data since clobbering can
2769 result in confusing bugs. We used to check for changes in
2770 search_regs start and end, but that fails if modification hooks
2771 remove or add text earlier in the buffer, so just check num_regs
2772 now. */
2773 if (search_regs.num_regs != num_regs)
2774 error ("Match data clobbered by buffer modification hooks");
2775
2776 /* Put point back where it was in the text, if possible. */
2777 TEMP_SET_PT (clip_to_bounds (BEGV, opoint + (opoint <= 0 ? ZV : 0), ZV));
2778 /* Now move point "officially" to the end of the inserted replacement. */
2779 move_if_not_intangible (newpoint);
2780
2781 signal_after_change (sub_start, sub_end - sub_start, SCHARS (newtext));
2782 update_compositions (sub_start, newpoint, CHECK_BORDER);
2783
2784 return Qnil;
2785 }
2786
2787 static Lisp_Object
2788 match_limit (Lisp_Object num, bool beginningp)
2789 {
2790 EMACS_INT n;
2791
2792 CHECK_FIXNUM (num);
2793 n = XFIXNUM (num);
2794 if (n < 0)
2795 args_out_of_range (num, make_fixnum (0));
2796 if (search_regs.num_regs <= 0)
2797 error ("No match data, because no search succeeded");
2798 if (n >= search_regs.num_regs
2799 || search_regs.start[n] < 0)
2800 return Qnil;
2801 return (make_fixnum ((beginningp) ? search_regs.start[n]
2802 : search_regs.end[n]));
2803 }
2804
2805 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2806 doc: /* Return position of start of text matched by last search.
2807 SUBEXP, a number, specifies which parenthesized expression in the last
2808 regexp.
2809 Value is nil if SUBEXPth pair didn't match, or there were less than
2810 SUBEXP pairs.
2811 Zero means the entire text matched by the whole regexp or whole string.
2812
2813 Return value is undefined if the last search failed. */)
2814 (Lisp_Object subexp)
2815 {
2816 return match_limit (subexp, 1);
2817 }
2818
2819 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2820 doc: /* Return position of end of text matched by last search.
2821 SUBEXP, a number, specifies which parenthesized expression in the last
2822 regexp.
2823 Value is nil if SUBEXPth pair didn't match, or there were less than
2824 SUBEXP pairs.
2825 Zero means the entire text matched by the whole regexp or whole string.
2826
2827 Return value is undefined if the last search failed. */)
2828 (Lisp_Object subexp)
2829 {
2830 return match_limit (subexp, 0);
2831 }
2832
2833 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2834 doc: /* Return a list of positions that record text matched by the last search.
2835 Element 2N of the returned list is the position of the beginning of the
2836 match of the Nth subexpression; it corresponds to `(match-beginning N)';
2837 element 2N + 1 is the position of the end of the match of the Nth
2838 subexpression; it corresponds to `(match-end N)'. See `match-beginning'
2839 and `match-end'.
2840 If the last search was on a buffer, all the elements are by default
2841 markers or nil (nil when the Nth pair didn't match); they are integers
2842 or nil if the search was on a string. But if the optional argument
2843 INTEGERS is non-nil, the elements that represent buffer positions are
2844 always integers, not markers, and (if the search was on a buffer) the
2845 buffer itself is appended to the list as one additional element.
2846
2847 Use `set-match-data' to reinstate the match data from the elements of
2848 this list.
2849
2850 Note that non-matching optional groups at the end of the regexp are
2851 elided instead of being represented with two `nil's each. For instance:
2852
2853 (progn
2854 (string-match "^\\(a\\)?\\(b\\)\\(c\\)?$" "b")
2855 (match-data))
2856 => (0 1 nil nil 0 1)
2857
2858 If REUSE is a list, store the value in REUSE by destructively modifying it.
2859 If REUSE is long enough to hold all the values, its length remains the
2860 same, and any unused elements are set to nil. If REUSE is not long
2861 enough, it is extended. Note that if REUSE is long enough and INTEGERS
2862 is non-nil, no consing is done to make the return value; this minimizes GC.
2863
2864 If optional third argument RESEAT is non-nil, any previous markers on the
2865 REUSE list will be modified to point to nowhere.
2866
2867 Return value is undefined if the last search failed. */)
2868 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2869 {
2870 Lisp_Object tail, prev;
2871 Lisp_Object *data;
2872 ptrdiff_t i, len;
2873
2874 if (!NILP (reseat))
2875 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2876 if (MARKERP (XCAR (tail)))
2877 {
2878 unchain_marker (XMARKER (XCAR (tail)));
2879 XSETCAR (tail, Qnil);
2880 }
2881
2882 if (NILP (last_thing_searched))
2883 return Qnil;
2884
2885 prev = Qnil;
2886
2887 USE_SAFE_ALLOCA;
2888 SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
2889
2890 len = 0;
2891 for (i = 0; i < search_regs.num_regs; i++)
2892 {
2893 ptrdiff_t start = search_regs.start[i];
2894 if (start >= 0)
2895 {
2896 if (BASE_EQ (last_thing_searched, Qt)
2897 || ! NILP (integers))
2898 {
2899 XSETFASTINT (data[2 * i], start);
2900 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2901 }
2902 else if (BUFFERP (last_thing_searched))
2903 {
2904 data[2 * i] = Fmake_marker ();
2905 Fset_marker (data[2 * i],
2906 make_fixnum (start),
2907 last_thing_searched);
2908 data[2 * i + 1] = Fmake_marker ();
2909 Fset_marker (data[2 * i + 1],
2910 make_fixnum (search_regs.end[i]),
2911 last_thing_searched);
2912 }
2913 else
2914 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2915 emacs_abort ();
2916
2917 len = 2 * i + 2;
2918 }
2919 else
2920 data[2 * i] = data[2 * i + 1] = Qnil;
2921 }
2922
2923 if (BUFFERP (last_thing_searched) && !NILP (integers))
2924 {
2925 data[len] = last_thing_searched;
2926 len++;
2927 }
2928
2929 /* If REUSE is not usable, cons up the values and return them. */
2930 if (! CONSP (reuse))
2931 reuse = Flist (len, data);
2932 else
2933 {
2934 /* If REUSE is a list, store as many value elements as will fit
2935 into the elements of REUSE. */
2936 for (i = 0, tail = reuse; CONSP (tail);
2937 i++, tail = XCDR (tail))
2938 {
2939 if (i < len)
2940 XSETCAR (tail, data[i]);
2941 else
2942 XSETCAR (tail, Qnil);
2943 prev = tail;
2944 }
2945
2946 /* If we couldn't fit all value elements into REUSE,
2947 cons up the rest of them and add them to the end of REUSE. */
2948 if (i < len)
2949 XSETCDR (prev, Flist (len - i, data + i));
2950 }
2951
2952 SAFE_FREE ();
2953 return reuse;
2954 }
2955
2956 /* We used to have an internal use variant of `reseat' described as:
2957
2958 If RESEAT is `evaporate', put the markers back on the free list
2959 immediately. No other references to the markers must exist in this
2960 case, so it is used only internally on the unwind stack and
2961 save-match-data from Lisp.
2962
2963 But it was ill-conceived: those supposedly-internal markers get exposed via
2964 the undo-list, so freeing them here is unsafe. */
2965
2966 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2967 doc: /* Set internal data on last search match from elements of LIST.
2968 LIST should have been created by calling `match-data' previously.
2969
2970 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2971 (register Lisp_Object list, Lisp_Object reseat)
2972 {
2973 ptrdiff_t i;
2974 register Lisp_Object marker;
2975
2976 if (running_asynch_code)
2977 save_search_regs ();
2978
2979 CHECK_LIST (list);
2980
2981 /* Unless we find a marker with a buffer or an explicit buffer
2982 in LIST, assume that this match data came from a string. */
2983 last_thing_searched = Qt;
2984
2985 /* Allocate registers if they don't already exist. */
2986 {
2987 ptrdiff_t length = list_length (list) / 2;
2988
2989 if (length > search_regs.num_regs)
2990 {
2991 ptrdiff_t num_regs = search_regs.num_regs;
2992 search_regs.start =
2993 xpalloc (search_regs.start, &num_regs, length - num_regs,
2994 min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start);
2995 search_regs.end =
2996 xrealloc (search_regs.end, num_regs * sizeof *search_regs.end);
2997
2998 for (i = search_regs.num_regs; i < num_regs; i++)
2999 search_regs.start[i] = -1;
3000
3001 search_regs.num_regs = num_regs;
3002 }
3003
3004 for (i = 0; CONSP (list); i++)
3005 {
3006 marker = XCAR (list);
3007 if (BUFFERP (marker))
3008 {
3009 last_thing_searched = marker;
3010 break;
3011 }
3012 if (i >= length)
3013 break;
3014 if (NILP (marker))
3015 {
3016 search_regs.start[i] = -1;
3017 list = XCDR (list);
3018 }
3019 else
3020 {
3021 Lisp_Object from;
3022 Lisp_Object m;
3023
3024 m = marker;
3025 if (MARKERP (marker))
3026 {
3027 if (XMARKER (marker)->buffer == 0)
3028 XSETFASTINT (marker, 0);
3029 else
3030 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3031 }
3032
3033 CHECK_FIXNUM_COERCE_MARKER (marker);
3034 from = marker;
3035
3036 if (!NILP (reseat) && MARKERP (m))
3037 {
3038 unchain_marker (XMARKER (m));
3039 XSETCAR (list, Qnil);
3040 }
3041
3042 if ((list = XCDR (list), !CONSP (list)))
3043 break;
3044
3045 m = marker = XCAR (list);
3046
3047 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3048 XSETFASTINT (marker, 0);
3049
3050 CHECK_FIXNUM_COERCE_MARKER (marker);
3051 if (PTRDIFF_MIN <= XFIXNUM (from) && XFIXNUM (from) <= PTRDIFF_MAX
3052 && PTRDIFF_MIN <= XFIXNUM (marker)
3053 && XFIXNUM (marker) <= PTRDIFF_MAX)
3054 {
3055 search_regs.start[i] = XFIXNUM (from);
3056 search_regs.end[i] = XFIXNUM (marker);
3057 }
3058 else
3059 {
3060 search_regs.start[i] = -1;
3061 }
3062
3063 if (!NILP (reseat) && MARKERP (m))
3064 {
3065 unchain_marker (XMARKER (m));
3066 XSETCAR (list, Qnil);
3067 }
3068 }
3069 list = XCDR (list);
3070 }
3071
3072 for (; i < search_regs.num_regs; i++)
3073 search_regs.start[i] = -1;
3074 }
3075
3076 return Qnil;
3077 }
3078
3079 DEFUN ("match-data--translate", Fmatch_data__translate, Smatch_data__translate,
3080 1, 1, 0,
3081 doc: /* Add N to all positions in the match data. Internal. */)
3082 (Lisp_Object n)
3083 {
3084 CHECK_FIXNUM (n);
3085 EMACS_INT delta = XFIXNUM (n);
3086 if (!NILP (last_thing_searched))
3087 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
3088 if (search_regs.start[i] >= 0)
3089 {
3090 search_regs.start[i] = max (0, search_regs.start[i] + delta);
3091 search_regs.end[i] = max (0, search_regs.end[i] + delta);
3092 }
3093 return Qnil;
3094 }
3095
3096 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3097 if asynchronous code (filter or sentinel) is running. */
3098 static void
3099 save_search_regs (void)
3100 {
3101 if (saved_search_regs.num_regs == 0)
3102 {
3103 saved_search_regs = search_regs;
3104 saved_last_thing_searched = last_thing_searched;
3105 last_thing_searched = Qnil;
3106 search_regs.num_regs = 0;
3107 search_regs.start = 0;
3108 search_regs.end = 0;
3109 }
3110 }
3111
3112 /* Called upon exit from filters and sentinels. */
3113 void
3114 restore_search_regs (void)
3115 {
3116 if (saved_search_regs.num_regs != 0)
3117 {
3118 if (search_regs.num_regs > 0)
3119 {
3120 xfree (search_regs.start);
3121 xfree (search_regs.end);
3122 }
3123 search_regs = saved_search_regs;
3124 last_thing_searched = saved_last_thing_searched;
3125 saved_last_thing_searched = Qnil;
3126 saved_search_regs.num_regs = 0;
3127 }
3128 }
3129
3130 /* Called from replace-match via replace_range. */
3131 void
3132 update_search_regs (ptrdiff_t oldstart, ptrdiff_t oldend, ptrdiff_t newend)
3133 {
3134 /* Adjust search data for this change. */
3135 ptrdiff_t change = newend - oldend;
3136 ptrdiff_t i;
3137
3138 for (i = 0; i < search_regs.num_regs; i++)
3139 {
3140 if (search_regs.start[i] >= oldend)
3141 search_regs.start[i] += change;
3142 else if (search_regs.start[i] > oldstart)
3143 search_regs.start[i] = oldstart;
3144 if (search_regs.end[i] >= oldend)
3145 search_regs.end[i] += change;
3146 else if (search_regs.end[i] > oldstart)
3147 search_regs.end[i] = oldstart;
3148 }
3149 }
3150
3151 static void
3152 unwind_set_match_data (Lisp_Object list)
3153 {
3154 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3155 Fset_match_data (list, Qt);
3156 }
3157
3158 /* Called to unwind protect the match data. */
3159 void
3160 record_unwind_save_match_data (void)
3161 {
3162 record_unwind_protect (unwind_set_match_data,
3163 Fmatch_data (Qnil, Qnil, Qnil));
3164 }
3165
3166 /* Quote a string to deactivate reg-expr chars */
3167
3168 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3169 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3170 (Lisp_Object string)
3171 {
3172 char *in, *out, *end;
3173 char *temp;
3174 ptrdiff_t backslashes_added = 0;
3175
3176 CHECK_STRING (string);
3177
3178 USE_SAFE_ALLOCA;
3179 SAFE_NALLOCA (temp, 2, SBYTES (string));
3180
3181 /* Now copy the data into the new string, inserting escapes. */
3182
3183 in = SSDATA (string);
3184 end = in + SBYTES (string);
3185 out = temp;
3186
3187 for (; in != end; in++)
3188 {
3189 if (*in == '['
3190 || *in == '*' || *in == '.' || *in == '\\'
3191 || *in == '?' || *in == '+'
3192 || *in == '^' || *in == '$')
3193 *out++ = '\\', backslashes_added++;
3194 *out++ = *in;
3195 }
3196
3197 Lisp_Object result
3198 = (backslashes_added > 0
3199 ? make_specified_string (temp,
3200 SCHARS (string) + backslashes_added,
3201 out - temp,
3202 STRING_MULTIBYTE (string))
3203 : string);
3204 SAFE_FREE ();
3205 return result;
3206 }
3207
3208 /* Like find_newline, but doesn't use the cache, and only searches forward. */
3209 ptrdiff_t
3210 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3211 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
3212 ptrdiff_t *bytepos, bool allow_quit)
3213 {
3214 if (count > 0)
3215 {
3216 if (!end)
3217 end = ZV, end_byte = ZV_BYTE;
3218 }
3219 else
3220 {
3221 if (!end)
3222 end = BEGV, end_byte = BEGV_BYTE;
3223 }
3224 if (end_byte == -1)
3225 end_byte = CHAR_TO_BYTE (end);
3226
3227 if (counted)
3228 *counted = count;
3229
3230 if (count > 0)
3231 while (start != end)
3232 {
3233 /* Our innermost scanning loop is very simple; it doesn't know
3234 about gaps, buffer ends, or the newline cache. ceiling is
3235 the position of the last character before the next such
3236 obstacle --- the last character the dumb search loop should
3237 examine. */
3238 ptrdiff_t tem, ceiling_byte = end_byte - 1;
3239
3240 if (start_byte == -1)
3241 start_byte = CHAR_TO_BYTE (start);
3242
3243 /* The dumb loop can only scan text stored in contiguous
3244 bytes. BUFFER_CEILING_OF returns the last character
3245 position that is contiguous, so the ceiling is the
3246 position after that. */
3247 tem = BUFFER_CEILING_OF (start_byte);
3248 ceiling_byte = min (tem, ceiling_byte);
3249
3250 {
3251 /* The termination address of the dumb loop. */
3252 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3253 ptrdiff_t lim_byte = ceiling_byte + 1;
3254
3255 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3256 of the base, the cursor, and the next line. */
3257 ptrdiff_t base = start_byte - lim_byte;
3258 ptrdiff_t cursor, next;
3259
3260 for (cursor = base; cursor < 0; cursor = next)
3261 {
3262 /* The dumb loop. */
3263 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3264 next = nl ? nl - lim_addr : 0;
3265
3266 if (! nl)
3267 break;
3268 next++;
3269
3270 if (--count == 0)
3271 {
3272 if (bytepos)
3273 *bytepos = lim_byte + next;
3274 return BYTE_TO_CHAR (lim_byte + next);
3275 }
3276 if (allow_quit)
3277 maybe_quit ();
3278 }
3279
3280 start_byte = lim_byte;
3281 start = BYTE_TO_CHAR (start_byte);
3282 }
3283 }
3284
3285 if (counted)
3286 *counted -= count;
3287 if (bytepos)
3288 {
3289 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3290 eassert (*bytepos == CHAR_TO_BYTE (start));
3291 }
3292 return start;
3293 }
3294
3295 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3296 0, 1, 0,
3297 doc: /* Check the newline cache of BUFFER against buffer contents.
3298
3299 BUFFER defaults to the current buffer.
3300
3301 Value is an array of 2 sub-arrays of buffer positions for newlines,
3302 the first based on the cache, the second based on actually scanning
3303 the buffer. If the buffer doesn't have a cache, the value is nil. */)
3304 (Lisp_Object buffer)
3305 {
3306 struct buffer *buf, *old = NULL;
3307 ptrdiff_t nl_count_cache, nl_count_buf;
3308 Lisp_Object cache_newlines, buf_newlines, val;
3309 ptrdiff_t from, found, i;
3310
3311 if (NILP (buffer))
3312 buf = current_buffer;
3313 else
3314 {
3315 CHECK_BUFFER (buffer);
3316 buf = XBUFFER (buffer);
3317 old = current_buffer;
3318 }
3319 if (buf->base_buffer)
3320 buf = buf->base_buffer;
3321
3322 /* If the buffer doesn't have a newline cache, return nil. */
3323 if (NILP (BVAR (buf, cache_long_scans))
3324 || buf->newline_cache == NULL)
3325 return Qnil;
3326
3327 /* find_newline can only work on the current buffer. */
3328 if (old != NULL)
3329 set_buffer_internal_1 (buf);
3330
3331 /* How many newlines are there according to the cache? */
3332 find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3333 TYPE_MAXIMUM (ptrdiff_t), &nl_count_cache, NULL, true);
3334
3335 /* Create vector and populate it. */
3336 cache_newlines = make_vector (nl_count_cache, make_fixnum (-1));
3337
3338 if (nl_count_cache)
3339 {
3340 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3341 {
3342 ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
3343
3344 found = find_newline (from, from_byte, 0, -1, 1, &counted,
3345 NULL, true);
3346 if (counted == 0 || i >= nl_count_cache)
3347 break;
3348 ASET (cache_newlines, i, make_fixnum (found - 1));
3349 }
3350 }
3351
3352 /* Now do the same, but without using the cache. */
3353 find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3354 TYPE_MAXIMUM (ptrdiff_t), &nl_count_buf, NULL, true);
3355 buf_newlines = make_vector (nl_count_buf, make_fixnum (-1));
3356 if (nl_count_buf)
3357 {
3358 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3359 {
3360 ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
3361
3362 found = find_newline1 (from, from_byte, 0, -1, 1, &counted,
3363 NULL, true);
3364 if (counted == 0 || i >= nl_count_buf)
3365 break;
3366 ASET (buf_newlines, i, make_fixnum (found - 1));
3367 }
3368 }
3369
3370 /* Construct the value and return it. */
3371 val = CALLN (Fvector, cache_newlines, buf_newlines);
3372
3373 if (old != NULL)
3374 set_buffer_internal_1 (old);
3375 return val;
3376 }
3377
3378
3379 static void syms_of_search_for_pdumper (void);
3380
3381 void
3382 syms_of_search (void)
3383 {
3384 for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
3385 {
3386 staticpro (&searchbufs[i].regexp);
3387 staticpro (&searchbufs[i].f_whitespace_regexp);
3388 staticpro (&searchbufs[i].syntax_table);
3389 }
3390
3391 /* Error condition used for failing searches. */
3392 DEFSYM (Qsearch_failed, "search-failed");
3393
3394 /* Error condition used for failing searches started by user, i.e.,
3395 where failure should not invoke the debugger. */
3396 DEFSYM (Quser_search_failed, "user-search-failed");
3397
3398 /* Error condition signaled when regexp compile_pattern fails. */
3399 DEFSYM (Qinvalid_regexp, "invalid-regexp");
3400
3401 Fput (Qsearch_failed, Qerror_conditions,
3402 pure_list (Qsearch_failed, Qerror));
3403 Fput (Qsearch_failed, Qerror_message,
3404 build_pure_c_string ("Search failed"));
3405
3406 Fput (Quser_search_failed, Qerror_conditions,
3407 pure_list (Quser_search_failed, Quser_error, Qsearch_failed, Qerror));
3408 Fput (Quser_search_failed, Qerror_message,
3409 build_pure_c_string ("Search failed"));
3410
3411 Fput (Qinvalid_regexp, Qerror_conditions,
3412 pure_list (Qinvalid_regexp, Qerror));
3413 Fput (Qinvalid_regexp, Qerror_message,
3414 build_pure_c_string ("Invalid regexp"));
3415
3416 re_match_object = Qnil;
3417 staticpro (&re_match_object);
3418
3419 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3420 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3421 Some commands use this for user-specified regexps.
3422 Spaces that occur inside character classes or repetition operators
3423 or other such regexp constructs are not replaced with this.
3424 A value of nil (which is the normal value) means treat spaces
3425 literally. Note that a value with capturing groups can change the
3426 numbering of existing capture groups in unexpected ways. */);
3427 Vsearch_spaces_regexp = Qnil;
3428
3429 DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data");
3430 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3431 doc: /* Internal use only.
3432 If non-nil, the primitive searching and matching functions
3433 such as `looking-at', `string-match', `re-search-forward', etc.,
3434 do not set the match data. The proper way to use this variable
3435 is to bind it with `let' around a small expression. */);
3436 Vinhibit_changing_match_data = Qnil;
3437
3438 defsubr (&Slooking_at);
3439 defsubr (&Sposix_looking_at);
3440 defsubr (&Sstring_match);
3441 defsubr (&Sposix_string_match);
3442 defsubr (&Ssearch_forward);
3443 defsubr (&Ssearch_backward);
3444 defsubr (&Sre_search_forward);
3445 defsubr (&Sre_search_backward);
3446 defsubr (&Sposix_search_forward);
3447 defsubr (&Sposix_search_backward);
3448 defsubr (&Sreplace_match);
3449 defsubr (&Smatch_beginning);
3450 defsubr (&Smatch_end);
3451 defsubr (&Smatch_data);
3452 defsubr (&Sset_match_data);
3453 defsubr (&Smatch_data__translate);
3454 defsubr (&Sregexp_quote);
3455 defsubr (&Snewline_cache_check);
3456
3457 pdumper_do_now_and_after_load (syms_of_search_for_pdumper);
3458 }
3459
3460 static void
3461 syms_of_search_for_pdumper (void)
3462 {
3463 for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
3464 {
3465 searchbufs[i].buf.allocated = 100;
3466 searchbufs[i].buf.buffer = xmalloc (100);
3467 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3468 searchbufs[i].regexp = Qnil;
3469 searchbufs[i].f_whitespace_regexp = Qnil;
3470 searchbufs[i].busy = false;
3471 searchbufs[i].syntax_table = Qnil;
3472 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3473 }
3474 searchbuf_head = &searchbufs[0];
3475 }