1 /* String search routines for GNU Emacs.
2
3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2021 Free Software
4 Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
20
21
22 #include <config.h>
23
24 #include "lisp.h"
25 #include "character.h"
26 #include "buffer.h"
27 #include "syntax.h"
28 #include "charset.h"
29 #include "region-cache.h"
30 #include "blockinput.h"
31 #include "intervals.h"
32 #include "pdumper.h"
33
34 #include "regex-emacs.h"
35
36 #define REGEXP_CACHE_SIZE 20
37
38 /* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
40 struct regexp_cache
41 {
42 struct regexp_cache *next;
43 Lisp_Object regexp, f_whitespace_regexp;
44 /* Syntax table for which the regexp applies. We need this because
45 of character classes. If this is t, then the compiled pattern is valid
46 for any syntax-table. */
47 Lisp_Object syntax_table;
48 struct re_pattern_buffer buf;
49 char fastmap[0400];
50 /* True means regexp was compiled to do full POSIX backtracking. */
51 bool posix;
52 /* True means we're inside a buffer match. */
53 bool busy;
54 };
55
56 /* The instances of that struct. */
57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 static struct regexp_cache *searchbuf_head;
61
62 static void set_search_regs (ptrdiff_t, ptrdiff_t);
63 static void save_search_regs (void);
64 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
65 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
66 ptrdiff_t, ptrdiff_t);
67 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
68 Lisp_Object, Lisp_Object, ptrdiff_t,
69 ptrdiff_t, int);
70 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
71 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
72 Lisp_Object, Lisp_Object, bool);
73
74 Lisp_Object re_match_object;
75
76 static AVOID
matcher_overflow(void)77 matcher_overflow (void)
78 {
79 error ("Stack overflow in regexp matcher");
80 }
81
82 static void
freeze_buffer_relocation(void)83 freeze_buffer_relocation (void)
84 {
85 #ifdef REL_ALLOC
86 /* Prevent ralloc.c from relocating the current buffer while
87 searching it. */
88 r_alloc_inhibit_buffer_relocation (1);
89 record_unwind_protect_int (r_alloc_inhibit_buffer_relocation, 0);
90 #endif
91 }
92
93 /* Compile a regexp and signal a Lisp error if anything goes wrong.
94 PATTERN is the pattern to compile.
95 CP is the place to put the result.
96 TRANSLATE is a translation table for ignoring case, or nil for none.
97 POSIX is true if we want full backtracking (POSIX style) for this pattern.
98 False means backtrack only enough to get a valid match.
99
100 The behavior also depends on Vsearch_spaces_regexp. */
101
102 static void
compile_pattern_1(struct regexp_cache * cp,Lisp_Object pattern,Lisp_Object translate,bool posix)103 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
104 Lisp_Object translate, bool posix)
105 {
106 const char *whitespace_regexp;
107 char *val;
108
109 eassert (!cp->busy);
110 cp->regexp = Qnil;
111 cp->buf.translate = translate;
112 cp->posix = posix;
113 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
114 cp->buf.charset_unibyte = charset_unibyte;
115 if (STRINGP (Vsearch_spaces_regexp))
116 cp->f_whitespace_regexp = Vsearch_spaces_regexp;
117 else
118 cp->f_whitespace_regexp = Qnil;
119
120 whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ?
121 SSDATA (Vsearch_spaces_regexp) : NULL;
122
123 val = (char *) re_compile_pattern (SSDATA (pattern), SBYTES (pattern),
124 posix, whitespace_regexp, &cp->buf);
125
126 /* If the compiled pattern hard codes some of the contents of the
127 syntax-table, it can only be reused with *this* syntax table. */
128 cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
129
130 if (val)
131 xsignal1 (Qinvalid_regexp, build_string (val));
132
133 cp->regexp = Fcopy_sequence (pattern);
134 }
135
136 /* Shrink each compiled regexp buffer in the cache
137 to the size actually used right now.
138 This is called from garbage collection. */
139
140 void
shrink_regexp_cache(void)141 shrink_regexp_cache (void)
142 {
143 struct regexp_cache *cp;
144
145 for (cp = searchbuf_head; cp != 0; cp = cp->next)
146 if (!cp->busy)
147 {
148 cp->buf.allocated = cp->buf.used;
149 cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
150 }
151 }
152
153 /* Clear the regexp cache w.r.t. a particular syntax table,
154 because it was changed.
155 There is no danger of memory leak here because re_compile_pattern
156 automagically manages the memory in each re_pattern_buffer struct,
157 based on its `allocated' and `buffer' values. */
158 void
clear_regexp_cache(void)159 clear_regexp_cache (void)
160 {
161 int i;
162
163 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
164 /* It's tempting to compare with the syntax-table we've actually changed,
165 but it's not sufficient because char-table inheritance means that
166 modifying one syntax-table can change others at the same time. */
167 if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
168 searchbufs[i].regexp = Qnil;
169 }
170
171 static void
unfreeze_pattern(void * arg)172 unfreeze_pattern (void *arg)
173 {
174 struct regexp_cache *searchbuf = arg;
175 searchbuf->busy = false;
176 }
177
178 static void
freeze_pattern(struct regexp_cache * searchbuf)179 freeze_pattern (struct regexp_cache *searchbuf)
180 {
181 eassert (!searchbuf->busy);
182 record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
183 searchbuf->busy = true;
184 }
185
186 /* Compile a regexp if necessary, but first check to see if there's one in
187 the cache.
188 PATTERN is the pattern to compile.
189 TRANSLATE is a translation table for ignoring case, or nil for none.
190 REGP is the structure that says where to store the "register"
191 values that will result from matching this pattern.
192 If it is 0, we should compile the pattern not to record any
193 subexpression bounds.
194 POSIX is true if we want full backtracking (POSIX style) for this pattern.
195 False means backtrack only enough to get a valid match. */
196
197 static struct regexp_cache *
compile_pattern(Lisp_Object pattern,struct re_registers * regp,Lisp_Object translate,bool posix,bool multibyte)198 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
199 Lisp_Object translate, bool posix, bool multibyte)
200 {
201 struct regexp_cache *cp, **cpp, **lru_nonbusy;
202
203 for (cpp = &searchbuf_head, lru_nonbusy = NULL; ; cpp = &cp->next)
204 {
205 cp = *cpp;
206 if (!cp->busy)
207 lru_nonbusy = cpp;
208 /* Entries are initialized to nil, and may be set to nil by
209 compile_pattern_1 if the pattern isn't valid. Don't apply
210 string accessors in those cases. However, compile_pattern_1
211 is only applied to the cache entry we pick here to reuse. So
212 nil should never appear before a non-nil entry. */
213 if (NILP (cp->regexp))
214 goto compile_it;
215 if (SCHARS (cp->regexp) == SCHARS (pattern)
216 && !cp->busy
217 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
218 && !NILP (Fstring_equal (cp->regexp, pattern))
219 && EQ (cp->buf.translate, translate)
220 && cp->posix == posix
221 && (EQ (cp->syntax_table, Qt)
222 || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
223 && !NILP (Fequal (cp->f_whitespace_regexp, Vsearch_spaces_regexp))
224 && cp->buf.charset_unibyte == charset_unibyte)
225 break;
226
227 /* If we're at the end of the cache, compile into the last
228 (least recently used) non-busy cell in the cache. */
229 if (cp->next == 0)
230 {
231 if (!lru_nonbusy)
232 error ("Too much matching reentrancy");
233 cpp = lru_nonbusy;
234 cp = *cpp;
235 compile_it:
236 eassert (!cp->busy);
237 compile_pattern_1 (cp, pattern, translate, posix);
238 break;
239 }
240 }
241
242 /* When we get here, cp (aka *cpp) contains the compiled pattern,
243 either because we found it in the cache or because we just compiled it.
244 Move it to the front of the queue to mark it as most recently used. */
245 *cpp = cp->next;
246 cp->next = searchbuf_head;
247 searchbuf_head = cp;
248
249 /* Advise the searching functions about the space we have allocated
250 for register data. */
251 if (regp)
252 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
253
254 /* The compiled pattern can be used both for multibyte and unibyte
255 target. But, we have to tell which the pattern is used for. */
256 cp->buf.target_multibyte = multibyte;
257 return cp;
258 }
259
260
261 static Lisp_Object
looking_at_1(Lisp_Object string,bool posix)262 looking_at_1 (Lisp_Object string, bool posix)
263 {
264 Lisp_Object val;
265 unsigned char *p1, *p2;
266 ptrdiff_t s1, s2;
267 register ptrdiff_t i;
268
269 if (running_asynch_code)
270 save_search_regs ();
271
272 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
273 table. */
274 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
275 BVAR (current_buffer, case_eqv_table));
276
277 CHECK_STRING (string);
278
279 /* Snapshot in case Lisp changes the value. */
280 bool preserve_match_data = NILP (Vinhibit_changing_match_data);
281
282 struct regexp_cache *cache_entry = compile_pattern (
283 string,
284 preserve_match_data ? &search_regs : NULL,
285 (!NILP (BVAR (current_buffer, case_fold_search))
286 ? BVAR (current_buffer, case_canon_table) : Qnil),
287 posix,
288 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
289
290 /* Do a pending quit right away, to avoid paradoxical behavior */
291 maybe_quit ();
292
293 /* Get pointers and sizes of the two strings
294 that make up the visible portion of the buffer. */
295
296 p1 = BEGV_ADDR;
297 s1 = GPT_BYTE - BEGV_BYTE;
298 p2 = GAP_END_ADDR;
299 s2 = ZV_BYTE - GPT_BYTE;
300 if (s1 < 0)
301 {
302 p2 = p1;
303 s2 = ZV_BYTE - BEGV_BYTE;
304 s1 = 0;
305 }
306 if (s2 < 0)
307 {
308 s1 = ZV_BYTE - BEGV_BYTE;
309 s2 = 0;
310 }
311
312 ptrdiff_t count = SPECPDL_INDEX ();
313 freeze_buffer_relocation ();
314 freeze_pattern (cache_entry);
315 re_match_object = Qnil;
316 i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
317 PT_BYTE - BEGV_BYTE,
318 preserve_match_data ? &search_regs : NULL,
319 ZV_BYTE - BEGV_BYTE);
320
321 if (i == -2)
322 {
323 unbind_to (count, Qnil);
324 matcher_overflow ();
325 }
326
327 val = (i >= 0 ? Qt : Qnil);
328 if (preserve_match_data && i >= 0)
329 {
330 for (i = 0; i < search_regs.num_regs; i++)
331 if (search_regs.start[i] >= 0)
332 {
333 search_regs.start[i]
334 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
335 search_regs.end[i]
336 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
337 }
338 /* Set last_thing_searched only when match data is changed. */
339 XSETBUFFER (last_thing_searched, current_buffer);
340 }
341
342 return unbind_to (count, val);
343 }
344
345 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
346 doc: /* Return t if text after point matches regular expression REGEXP.
347 This function modifies the match data that `match-beginning',
348 `match-end' and `match-data' access; save and restore the match
349 data if you want to preserve them. */)
350 (Lisp_Object regexp)
351 {
352 return looking_at_1 (regexp, 0);
353 }
354
355 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
356 doc: /* Return t if text after point matches regular expression REGEXP.
357 Find the longest match, in accord with Posix regular expression rules.
358 This function modifies the match data that `match-beginning',
359 `match-end' and `match-data' access; save and restore the match
360 data if you want to preserve them. */)
361 (Lisp_Object regexp)
362 {
363 return looking_at_1 (regexp, 1);
364 }
365
366 static Lisp_Object
string_match_1(Lisp_Object regexp,Lisp_Object string,Lisp_Object start,bool posix)367 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
368 bool posix)
369 {
370 ptrdiff_t val;
371 struct re_pattern_buffer *bufp;
372 EMACS_INT pos;
373 ptrdiff_t pos_byte, i;
374
375 if (running_asynch_code)
376 save_search_regs ();
377
378 CHECK_STRING (regexp);
379 CHECK_STRING (string);
380
381 if (NILP (start))
382 pos = 0, pos_byte = 0;
383 else
384 {
385 ptrdiff_t len = SCHARS (string);
386
387 CHECK_FIXNUM (start);
388 pos = XFIXNUM (start);
389 if (pos < 0 && -pos <= len)
390 pos = len + pos;
391 else if (0 > pos || pos > len)
392 args_out_of_range (string, start);
393 pos_byte = string_char_to_byte (string, pos);
394 }
395
396 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
397 table. */
398 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
399 BVAR (current_buffer, case_eqv_table));
400
401 bufp = &compile_pattern (regexp,
402 (NILP (Vinhibit_changing_match_data)
403 ? &search_regs : NULL),
404 (!NILP (BVAR (current_buffer, case_fold_search))
405 ? BVAR (current_buffer, case_canon_table) : Qnil),
406 posix,
407 STRING_MULTIBYTE (string))->buf;
408 re_match_object = string;
409 val = re_search (bufp, SSDATA (string),
410 SBYTES (string), pos_byte,
411 SBYTES (string) - pos_byte,
412 (NILP (Vinhibit_changing_match_data)
413 ? &search_regs : NULL));
414
415 /* Set last_thing_searched only when match data is changed. */
416 if (NILP (Vinhibit_changing_match_data))
417 last_thing_searched = Qt;
418
419 if (val == -2)
420 matcher_overflow ();
421 if (val < 0) return Qnil;
422
423 if (NILP (Vinhibit_changing_match_data))
424 for (i = 0; i < search_regs.num_regs; i++)
425 if (search_regs.start[i] >= 0)
426 {
427 search_regs.start[i]
428 = string_byte_to_char (string, search_regs.start[i]);
429 search_regs.end[i]
430 = string_byte_to_char (string, search_regs.end[i]);
431 }
432
433 return make_fixnum (string_byte_to_char (string, val));
434 }
435
436 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
437 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
438 Matching ignores case if `case-fold-search' is non-nil.
439 If third arg START is non-nil, start search at that index in STRING.
440 For index of first char beyond the match, do (match-end 0).
441 `match-end' and `match-beginning' also give indices of substrings
442 matched by parenthesis constructs in the pattern.
443
444 You can use the function `match-string' to extract the substrings
445 matched by the parenthesis constructions in REGEXP. */)
446 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
447 {
448 return string_match_1 (regexp, string, start, 0);
449 }
450
451 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
452 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
453 Find the longest match, in accord with Posix regular expression rules.
454 Case is ignored if `case-fold-search' is non-nil in the current buffer.
455 If third arg START is non-nil, start search at that index in STRING.
456 For index of first char beyond the match, do (match-end 0).
457 `match-end' and `match-beginning' also give indices of substrings
458 matched by parenthesis constructs in the pattern. */)
459 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
460 {
461 return string_match_1 (regexp, string, start, 1);
462 }
463
464 /* Match REGEXP against STRING using translation table TABLE,
465 searching all of STRING, and return the index of the match,
466 or negative on failure. This does not clobber the match data. */
467
468 ptrdiff_t
fast_string_match_internal(Lisp_Object regexp,Lisp_Object string,Lisp_Object table)469 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
470 Lisp_Object table)
471 {
472 ptrdiff_t val;
473 struct re_pattern_buffer *bufp;
474
475 bufp = &compile_pattern (regexp, 0, table,
476 0, STRING_MULTIBYTE (string))->buf;
477 re_match_object = string;
478 val = re_search (bufp, SSDATA (string),
479 SBYTES (string), 0,
480 SBYTES (string), 0);
481 return val;
482 }
483
484 /* Match REGEXP against STRING, searching all of STRING ignoring case,
485 and return the index of the match, or negative on failure.
486 This does not clobber the match data.
487 We assume that STRING contains single-byte characters. */
488
489 ptrdiff_t
fast_c_string_match_ignore_case(Lisp_Object regexp,const char * string,ptrdiff_t len)490 fast_c_string_match_ignore_case (Lisp_Object regexp,
491 const char *string, ptrdiff_t len)
492 {
493 ptrdiff_t val;
494 struct re_pattern_buffer *bufp;
495
496 regexp = string_make_unibyte (regexp);
497 bufp = &compile_pattern (regexp, 0,
498 Vascii_canon_table, 0,
499 0)->buf;
500 re_match_object = Qt;
501 val = re_search (bufp, string, len, 0, len, 0);
502 return val;
503 }
504
505 /* Match REGEXP against the characters after POS to LIMIT, and return
506 the number of matched characters. If STRING is non-nil, match
507 against the characters in it. In that case, POS and LIMIT are
508 indices into the string. This function doesn't modify the match
509 data. */
510
511 ptrdiff_t
fast_looking_at(Lisp_Object regexp,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t limit,ptrdiff_t limit_byte,Lisp_Object string)512 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
513 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
514 {
515 bool multibyte;
516 unsigned char *p1, *p2;
517 ptrdiff_t s1, s2;
518 ptrdiff_t len;
519
520 if (STRINGP (string))
521 {
522 if (pos_byte < 0)
523 pos_byte = string_char_to_byte (string, pos);
524 if (limit_byte < 0)
525 limit_byte = string_char_to_byte (string, limit);
526 p1 = NULL;
527 s1 = 0;
528 p2 = SDATA (string);
529 s2 = SBYTES (string);
530 multibyte = STRING_MULTIBYTE (string);
531 }
532 else
533 {
534 if (pos_byte < 0)
535 pos_byte = CHAR_TO_BYTE (pos);
536 if (limit_byte < 0)
537 limit_byte = CHAR_TO_BYTE (limit);
538 pos_byte -= BEGV_BYTE;
539 limit_byte -= BEGV_BYTE;
540 p1 = BEGV_ADDR;
541 s1 = GPT_BYTE - BEGV_BYTE;
542 p2 = GAP_END_ADDR;
543 s2 = ZV_BYTE - GPT_BYTE;
544 if (s1 < 0)
545 {
546 p2 = p1;
547 s2 = ZV_BYTE - BEGV_BYTE;
548 s1 = 0;
549 }
550 if (s2 < 0)
551 {
552 s1 = ZV_BYTE - BEGV_BYTE;
553 s2 = 0;
554 }
555 multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
556 }
557
558 struct regexp_cache *cache_entry =
559 compile_pattern (regexp, 0, Qnil, 0, multibyte);
560 ptrdiff_t count = SPECPDL_INDEX ();
561 freeze_buffer_relocation ();
562 freeze_pattern (cache_entry);
563 re_match_object = STRINGP (string) ? string : Qnil;
564 len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
565 pos_byte, NULL, limit_byte);
566
567 unbind_to (count, Qnil);
568 return len;
569 }
570
571
572 /* The newline cache: remembering which sections of text have no newlines. */
573
574 /* If the user has requested the long scans caching, make sure it's on.
575 Otherwise, make sure it's off.
576 This is our cheezy way of associating an action with the change of
577 state of a buffer-local variable. */
578 static struct region_cache *
newline_cache_on_off(struct buffer * buf)579 newline_cache_on_off (struct buffer *buf)
580 {
581 struct buffer *base_buf = buf;
582 bool indirect_p = false;
583
584 if (buf->base_buffer)
585 {
586 base_buf = buf->base_buffer;
587 indirect_p = true;
588 }
589
590 /* Don't turn on or off the cache in the base buffer, if the value
591 of cache-long-scans of the base buffer is inconsistent with that.
592 This is because doing so will just make the cache pure overhead,
593 since if we turn it on via indirect buffer, it will be
594 immediately turned off by its base buffer. */
595 if (NILP (BVAR (buf, cache_long_scans)))
596 {
597 if (!indirect_p
598 || NILP (BVAR (base_buf, cache_long_scans)))
599 {
600 /* It should be off. */
601 if (base_buf->newline_cache)
602 {
603 free_region_cache (base_buf->newline_cache);
604 base_buf->newline_cache = 0;
605 }
606 }
607 return NULL;
608 }
609 else
610 {
611 if (!indirect_p
612 || !NILP (BVAR (base_buf, cache_long_scans)))
613 {
614 /* It should be on. */
615 if (base_buf->newline_cache == 0)
616 base_buf->newline_cache = new_region_cache ();
617 }
618 return base_buf->newline_cache;
619 }
620 }
621
622
623 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
624
625 If COUNT is positive, search forwards; END must be >= START.
626 If COUNT is negative, search backwards for the -COUNTth instance;
627 END must be <= START.
628 If COUNT is zero, do anything you please; run rogue, for all I care.
629
630 If END is zero, use BEGV or ZV instead, as appropriate for the
631 direction indicated by COUNT. If START_BYTE is -1 it is unknown,
632 and similarly for END_BYTE.
633
634 If we find COUNT instances, set *COUNTED to COUNT, and return the
635 position past the COUNTth match. Note that for reverse motion
636 this is not the same as the usual convention for Emacs motion commands.
637
638 If we don't find COUNT instances before reaching END, set *COUNTED
639 to the number of newlines left found (negated if COUNT is negative),
640 and return END.
641
642 If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
643 to the returned character position.
644
645 If ALLOW_QUIT, check for quitting. That's good to do
646 except when inside redisplay. */
647
648 ptrdiff_t
find_newline(ptrdiff_t start,ptrdiff_t start_byte,ptrdiff_t end,ptrdiff_t end_byte,ptrdiff_t count,ptrdiff_t * counted,ptrdiff_t * bytepos,bool allow_quit)649 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
650 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
651 ptrdiff_t *bytepos, bool allow_quit)
652 {
653 struct region_cache *newline_cache;
654 struct buffer *cache_buffer;
655
656 if (!end)
657 {
658 if (count > 0)
659 end = ZV, end_byte = ZV_BYTE;
660 else
661 end = BEGV, end_byte = BEGV_BYTE;
662 }
663 if (end_byte == -1)
664 end_byte = CHAR_TO_BYTE (end);
665
666 newline_cache = newline_cache_on_off (current_buffer);
667 if (current_buffer->base_buffer)
668 cache_buffer = current_buffer->base_buffer;
669 else
670 cache_buffer = current_buffer;
671
672 if (counted)
673 *counted = count;
674
675 if (count > 0)
676 while (start != end)
677 {
678 /* Our innermost scanning loop is very simple; it doesn't know
679 about gaps, buffer ends, or the newline cache. ceiling is
680 the position of the last character before the next such
681 obstacle --- the last character the dumb search loop should
682 examine. */
683 ptrdiff_t tem, ceiling_byte = end_byte - 1;
684
685 /* If we're using the newline cache, consult it to see whether
686 we can avoid some scanning. */
687 if (newline_cache)
688 {
689 ptrdiff_t next_change;
690 int result = 1;
691
692 while (start < end && result)
693 {
694 ptrdiff_t lim1;
695
696 result = region_cache_forward (cache_buffer, newline_cache,
697 start, &next_change);
698 if (result)
699 {
700 /* When the cache revalidation is deferred,
701 next-change might point beyond ZV, which will
702 cause assertion violation in CHAR_TO_BYTE below.
703 Limit next_change to ZV to avoid that. */
704 if (next_change > ZV)
705 next_change = ZV;
706 start = next_change;
707 lim1 = next_change = end;
708 }
709 else
710 lim1 = min (next_change, end);
711
712 /* The cache returned zero for this region; see if
713 this is because the region is known and includes
714 only newlines. While at that, count any newlines
715 we bump into, and exit if we found enough off them. */
716 start_byte = CHAR_TO_BYTE (start);
717 while (start < lim1
718 && FETCH_BYTE (start_byte) == '\n')
719 {
720 start_byte++;
721 start++;
722 if (--count == 0)
723 {
724 if (bytepos)
725 *bytepos = start_byte;
726 return start;
727 }
728 }
729 /* If we found a non-newline character before hitting
730 position where the cache will again return non-zero
731 (i.e. no newlines beyond that position), it means
732 this region is not yet known to the cache, and we
733 must resort to the "dumb loop" method. */
734 if (start < next_change && !result)
735 break;
736 result = 1;
737 }
738 if (start >= end)
739 {
740 start = end;
741 start_byte = end_byte;
742 break;
743 }
744
745 /* START should never be after END. */
746 if (start_byte > ceiling_byte)
747 start_byte = ceiling_byte;
748
749 /* Now the text after start is an unknown region, and
750 next_change is the position of the next known region. */
751 ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
752 }
753 else if (start_byte == -1)
754 start_byte = CHAR_TO_BYTE (start);
755
756 /* The dumb loop can only scan text stored in contiguous
757 bytes. BUFFER_CEILING_OF returns the last character
758 position that is contiguous, so the ceiling is the
759 position after that. */
760 tem = BUFFER_CEILING_OF (start_byte);
761 ceiling_byte = min (tem, ceiling_byte);
762
763 {
764 /* The termination address of the dumb loop. */
765 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
766 ptrdiff_t lim_byte = ceiling_byte + 1;
767
768 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
769 of the base, the cursor, and the next line. */
770 ptrdiff_t base = start_byte - lim_byte;
771 ptrdiff_t cursor, next;
772
773 for (cursor = base; cursor < 0; cursor = next)
774 {
775 /* The dumb loop. */
776 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
777 next = nl ? nl - lim_addr : 0;
778
779 /* If we're using the newline cache, cache the fact that
780 the region we just traversed is free of newlines. */
781 if (newline_cache && cursor != next)
782 {
783 know_region_cache (cache_buffer, newline_cache,
784 BYTE_TO_CHAR (lim_byte + cursor),
785 BYTE_TO_CHAR (lim_byte + next));
786 /* know_region_cache can relocate buffer text. */
787 lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
788 }
789
790 if (! nl)
791 break;
792 next++;
793
794 if (--count == 0)
795 {
796 if (bytepos)
797 *bytepos = lim_byte + next;
798 return BYTE_TO_CHAR (lim_byte + next);
799 }
800 if (allow_quit)
801 maybe_quit ();
802 }
803
804 start_byte = lim_byte;
805 start = BYTE_TO_CHAR (start_byte);
806 }
807 }
808 else
809 while (start > end)
810 {
811 /* The last character to check before the next obstacle. */
812 ptrdiff_t tem, ceiling_byte = end_byte;
813
814 /* Consult the newline cache, if appropriate. */
815 if (newline_cache)
816 {
817 ptrdiff_t next_change;
818 int result = 1;
819
820 while (start > end && result)
821 {
822 ptrdiff_t lim1;
823
824 result = region_cache_backward (cache_buffer, newline_cache,
825 start, &next_change);
826 if (result)
827 {
828 start = next_change;
829 lim1 = next_change = end;
830 }
831 else
832 lim1 = max (next_change, end);
833 start_byte = CHAR_TO_BYTE (start);
834 while (start > lim1
835 && FETCH_BYTE (start_byte - 1) == '\n')
836 {
837 if (++count == 0)
838 {
839 if (bytepos)
840 *bytepos = start_byte;
841 return start;
842 }
843 start_byte--;
844 start--;
845 }
846 if (start > next_change && !result)
847 break;
848 result = 1;
849 }
850 if (start <= end)
851 {
852 start = end;
853 start_byte = end_byte;
854 break;
855 }
856
857 /* Start should never be at or before end. */
858 if (start_byte <= ceiling_byte)
859 start_byte = ceiling_byte + 1;
860
861 /* Now the text before start is an unknown region, and
862 next_change is the position of the next known region. */
863 ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
864 }
865 else if (start_byte == -1)
866 start_byte = CHAR_TO_BYTE (start);
867
868 /* Stop scanning before the gap. */
869 tem = BUFFER_FLOOR_OF (start_byte - 1);
870 ceiling_byte = max (tem, ceiling_byte);
871
872 {
873 /* The termination address of the dumb loop. */
874 unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
875
876 /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
877 the base, the cursor, and the previous line. These
878 offsets are at least -1. */
879 ptrdiff_t base = start_byte - ceiling_byte;
880 ptrdiff_t cursor, prev;
881
882 for (cursor = base; 0 < cursor; cursor = prev)
883 {
884 unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
885 prev = nl ? nl - ceiling_addr : -1;
886
887 /* If we're looking for newlines, cache the fact that
888 this line's region is free of them. */
889 if (newline_cache && cursor != prev + 1)
890 {
891 know_region_cache (cache_buffer, newline_cache,
892 BYTE_TO_CHAR (ceiling_byte + prev + 1),
893 BYTE_TO_CHAR (ceiling_byte + cursor));
894 /* know_region_cache can relocate buffer text. */
895 ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
896 }
897
898 if (! nl)
899 break;
900
901 if (++count >= 0)
902 {
903 if (bytepos)
904 *bytepos = ceiling_byte + prev + 1;
905 return BYTE_TO_CHAR (ceiling_byte + prev + 1);
906 }
907 if (allow_quit)
908 maybe_quit ();
909 }
910
911 start_byte = ceiling_byte;
912 start = BYTE_TO_CHAR (start_byte);
913 }
914 }
915
916 if (counted)
917 *counted -= count;
918 if (bytepos)
919 {
920 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
921 eassert (*bytepos == CHAR_TO_BYTE (start));
922 }
923 return start;
924 }
925
926 /* Search for COUNT instances of a line boundary.
927 Start at START. If COUNT is negative, search backwards.
928
929 We report the resulting position by calling TEMP_SET_PT_BOTH.
930
931 If we find COUNT instances. we position after (always after,
932 even if scanning backwards) the COUNTth match.
933
934 If we don't find COUNT instances before reaching the end of the
935 buffer (or the beginning, if scanning backwards), we position at
936 the limit we bumped up against.
937
938 If ALLOW_QUIT, check for quitting. That's good to do
939 except in special cases. */
940
941 void
scan_newline(ptrdiff_t start,ptrdiff_t start_byte,ptrdiff_t limit,ptrdiff_t limit_byte,ptrdiff_t count,bool allow_quit)942 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
943 ptrdiff_t limit, ptrdiff_t limit_byte,
944 ptrdiff_t count, bool allow_quit)
945 {
946 ptrdiff_t charpos, bytepos, counted;
947
948 charpos = find_newline (start, start_byte, limit, limit_byte,
949 count, &counted, &bytepos, allow_quit);
950 if (counted != count)
951 TEMP_SET_PT_BOTH (limit, limit_byte);
952 else
953 TEMP_SET_PT_BOTH (charpos, bytepos);
954 }
955
956 /* Like above, but always scan from point and report the
957 resulting position in *CHARPOS and *BYTEPOS. */
958
959 ptrdiff_t
scan_newline_from_point(ptrdiff_t count,ptrdiff_t * charpos,ptrdiff_t * bytepos)960 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
961 ptrdiff_t *bytepos)
962 {
963 ptrdiff_t counted;
964
965 if (count <= 0)
966 *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
967 &counted, bytepos, 1);
968 else
969 *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
970 &counted, bytepos, 1);
971 return counted;
972 }
973
974 /* Like find_newline, but doesn't allow QUITting and doesn't return
975 COUNTED. */
976 ptrdiff_t
find_newline_no_quit(ptrdiff_t from,ptrdiff_t frombyte,ptrdiff_t cnt,ptrdiff_t * bytepos)977 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
978 ptrdiff_t cnt, ptrdiff_t *bytepos)
979 {
980 return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
981 }
982
983 /* Like find_newline, but returns position before the newline, not
984 after, and only search up to TO.
985 This isn't just find_newline_no_quit (...)-1, because you might hit TO. */
986
987 ptrdiff_t
find_before_next_newline(ptrdiff_t from,ptrdiff_t to,ptrdiff_t cnt,ptrdiff_t * bytepos)988 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
989 ptrdiff_t cnt, ptrdiff_t *bytepos)
990 {
991 ptrdiff_t counted;
992 ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &counted, bytepos, 1);
993
994 if (counted == cnt)
995 {
996 if (bytepos)
997 DEC_BOTH (pos, *bytepos);
998 else
999 pos--;
1000 }
1001 return pos;
1002 }
1003
1004 /* Subroutines of Lisp buffer search functions. */
1005
1006 static Lisp_Object
search_command(Lisp_Object string,Lisp_Object bound,Lisp_Object noerror,Lisp_Object count,int direction,int RE,bool posix)1007 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1008 Lisp_Object count, int direction, int RE, bool posix)
1009 {
1010 EMACS_INT np;
1011 EMACS_INT lim;
1012 ptrdiff_t lim_byte;
1013 EMACS_INT n = direction;
1014
1015 if (!NILP (count))
1016 {
1017 CHECK_FIXNUM (count);
1018 n *= XFIXNUM (count);
1019 }
1020
1021 CHECK_STRING (string);
1022 if (NILP (bound))
1023 {
1024 if (n > 0)
1025 lim = ZV, lim_byte = ZV_BYTE;
1026 else
1027 lim = BEGV, lim_byte = BEGV_BYTE;
1028 }
1029 else
1030 {
1031 CHECK_FIXNUM_COERCE_MARKER (bound);
1032 lim = XFIXNUM (bound);
1033 if (n > 0 ? lim < PT : lim > PT)
1034 error ("Invalid search bound (wrong side of point)");
1035 if (lim > ZV)
1036 lim = ZV, lim_byte = ZV_BYTE;
1037 else if (lim < BEGV)
1038 lim = BEGV, lim_byte = BEGV_BYTE;
1039 else
1040 lim_byte = CHAR_TO_BYTE (lim);
1041 }
1042
1043 /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
1044 table. */
1045 set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1046 BVAR (current_buffer, case_eqv_table));
1047
1048 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1049 (!NILP (BVAR (current_buffer, case_fold_search))
1050 ? BVAR (current_buffer, case_canon_table)
1051 : Qnil),
1052 (!NILP (BVAR (current_buffer, case_fold_search))
1053 ? BVAR (current_buffer, case_eqv_table)
1054 : Qnil),
1055 posix);
1056 if (np <= 0)
1057 {
1058 if (NILP (noerror))
1059 xsignal1 (Qsearch_failed, string);
1060
1061 if (!EQ (noerror, Qt))
1062 {
1063 eassert (BEGV <= lim && lim <= ZV);
1064 SET_PT_BOTH (lim, lim_byte);
1065 return Qnil;
1066 #if 0 /* This would be clean, but maybe programs depend on
1067 a value of nil here. */
1068 np = lim;
1069 #endif
1070 }
1071 else
1072 return Qnil;
1073 }
1074
1075 eassert (BEGV <= np && np <= ZV);
1076 SET_PT (np);
1077
1078 return make_fixnum (np);
1079 }
1080
1081 /* Return true if REGEXP it matches just one constant string. */
1082
1083 static bool
trivial_regexp_p(Lisp_Object regexp)1084 trivial_regexp_p (Lisp_Object regexp)
1085 {
1086 ptrdiff_t len = SBYTES (regexp);
1087 unsigned char *s = SDATA (regexp);
1088 while (--len >= 0)
1089 {
1090 switch (*s++)
1091 {
1092 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1093 return 0;
1094 case '\\':
1095 if (--len < 0)
1096 return 0;
1097 switch (*s++)
1098 {
1099 case '|': case '(': case ')': case '`': case '\'': case 'b':
1100 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1101 case 'S': case '=': case '{': case '}': case '_':
1102 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1103 case '1': case '2': case '3': case '4': case '5':
1104 case '6': case '7': case '8': case '9':
1105 return 0;
1106 }
1107 }
1108 }
1109 return 1;
1110 }
1111
1112 /* Search for the n'th occurrence of STRING in the current buffer,
1113 starting at position POS and stopping at position LIM,
1114 treating STRING as a literal string if RE is false or as
1115 a regular expression if RE is true.
1116
1117 If N is positive, searching is forward and LIM must be greater than POS.
1118 If N is negative, searching is backward and LIM must be less than POS.
1119
1120 Returns -x if x occurrences remain to be found (x > 0),
1121 or else the position at the beginning of the Nth occurrence
1122 (if searching backward) or the end (if searching forward).
1123
1124 POSIX is nonzero if we want full backtracking (POSIX style)
1125 for this pattern. 0 means backtrack only enough to get a valid match. */
1126
1127 #define TRANSLATE(out, trt, d) \
1128 do \
1129 { \
1130 if (! NILP (trt)) \
1131 { \
1132 Lisp_Object temp; \
1133 temp = Faref (trt, make_fixnum (d)); \
1134 if (FIXNUMP (temp)) \
1135 out = XFIXNUM (temp); \
1136 else \
1137 out = d; \
1138 } \
1139 else \
1140 out = d; \
1141 } \
1142 while (0)
1143
1144 /* Only used in search_buffer, to record the end position of the match
1145 when searching regexps and SEARCH_REGS should not be changed
1146 (i.e. Vinhibit_changing_match_data is non-nil). */
1147 static struct re_registers search_regs_1;
1148
1149 static EMACS_INT
search_buffer_re(Lisp_Object string,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t lim,ptrdiff_t lim_byte,EMACS_INT n,Lisp_Object trt,Lisp_Object inverse_trt,bool posix)1150 search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1151 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1152 Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1153 {
1154 unsigned char *p1, *p2;
1155 ptrdiff_t s1, s2;
1156
1157 /* Snapshot in case Lisp changes the value. */
1158 bool preserve_match_data = NILP (Vinhibit_changing_match_data);
1159
1160 struct regexp_cache *cache_entry =
1161 compile_pattern (string,
1162 preserve_match_data ? &search_regs : &search_regs_1,
1163 trt, posix,
1164 !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1165 struct re_pattern_buffer *bufp = &cache_entry->buf;
1166
1167 maybe_quit (); /* Do a pending quit right away,
1168 to avoid paradoxical behavior */
1169 /* Get pointers and sizes of the two strings
1170 that make up the visible portion of the buffer. */
1171
1172 p1 = BEGV_ADDR;
1173 s1 = GPT_BYTE - BEGV_BYTE;
1174 p2 = GAP_END_ADDR;
1175 s2 = ZV_BYTE - GPT_BYTE;
1176 if (s1 < 0)
1177 {
1178 p2 = p1;
1179 s2 = ZV_BYTE - BEGV_BYTE;
1180 s1 = 0;
1181 }
1182 if (s2 < 0)
1183 {
1184 s1 = ZV_BYTE - BEGV_BYTE;
1185 s2 = 0;
1186 }
1187
1188 ptrdiff_t count = SPECPDL_INDEX ();
1189 freeze_buffer_relocation ();
1190 freeze_pattern (cache_entry);
1191
1192 while (n < 0)
1193 {
1194 ptrdiff_t val;
1195
1196 re_match_object = Qnil;
1197 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1198 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1199 preserve_match_data ? &search_regs : &search_regs_1,
1200 /* Don't allow match past current point */
1201 pos_byte - BEGV_BYTE);
1202 if (val == -2)
1203 {
1204 unbind_to (count, Qnil);
1205 matcher_overflow ();
1206 }
1207 if (val >= 0)
1208 {
1209 if (preserve_match_data)
1210 {
1211 pos_byte = search_regs.start[0] + BEGV_BYTE;
1212 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1213 if (search_regs.start[i] >= 0)
1214 {
1215 search_regs.start[i]
1216 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1217 search_regs.end[i]
1218 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1219 }
1220 XSETBUFFER (last_thing_searched, current_buffer);
1221 /* Set pos to the new position. */
1222 pos = search_regs.start[0];
1223 }
1224 else
1225 {
1226 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1227 /* Set pos to the new position. */
1228 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1229 }
1230 }
1231 else
1232 {
1233 unbind_to (count, Qnil);
1234 return (n);
1235 }
1236 n++;
1237 maybe_quit ();
1238 }
1239 while (n > 0)
1240 {
1241 ptrdiff_t val;
1242
1243 re_match_object = Qnil;
1244 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1245 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1246 preserve_match_data ? &search_regs : &search_regs_1,
1247 lim_byte - BEGV_BYTE);
1248 if (val == -2)
1249 {
1250 unbind_to (count, Qnil);
1251 matcher_overflow ();
1252 }
1253 if (val >= 0)
1254 {
1255 if (preserve_match_data)
1256 {
1257 pos_byte = search_regs.end[0] + BEGV_BYTE;
1258 for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1259 if (search_regs.start[i] >= 0)
1260 {
1261 search_regs.start[i]
1262 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1263 search_regs.end[i]
1264 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1265 }
1266 XSETBUFFER (last_thing_searched, current_buffer);
1267 pos = search_regs.end[0];
1268 }
1269 else
1270 {
1271 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1272 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1273 }
1274 }
1275 else
1276 {
1277 unbind_to (count, Qnil);
1278 return (0 - n);
1279 }
1280 n--;
1281 maybe_quit ();
1282 }
1283 unbind_to (count, Qnil);
1284 return (pos);
1285 }
1286
1287 static EMACS_INT
search_buffer_non_re(Lisp_Object string,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t lim,ptrdiff_t lim_byte,EMACS_INT n,int RE,Lisp_Object trt,Lisp_Object inverse_trt,bool posix)1288 search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
1289 ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
1290 EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
1291 bool posix)
1292 {
1293 unsigned char *raw_pattern, *pat;
1294 ptrdiff_t raw_pattern_size;
1295 ptrdiff_t raw_pattern_size_byte;
1296 unsigned char *patbuf;
1297 bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1298 unsigned char *base_pat;
1299 /* Set to positive if we find a non-ASCII char that need
1300 translation. Otherwise set to zero later. */
1301 int char_base = -1;
1302 bool boyer_moore_ok = 1;
1303 USE_SAFE_ALLOCA;
1304
1305 /* MULTIBYTE says whether the text to be searched is multibyte.
1306 We must convert PATTERN to match that, or we will not really
1307 find things right. */
1308
1309 if (multibyte == STRING_MULTIBYTE (string))
1310 {
1311 raw_pattern = SDATA (string);
1312 raw_pattern_size = SCHARS (string);
1313 raw_pattern_size_byte = SBYTES (string);
1314 }
1315 else if (multibyte)
1316 {
1317 raw_pattern_size = SCHARS (string);
1318 raw_pattern_size_byte
1319 = count_size_as_multibyte (SDATA (string),
1320 raw_pattern_size);
1321 raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1322 copy_text (SDATA (string), raw_pattern,
1323 SCHARS (string), 0, 1);
1324 }
1325 else
1326 {
1327 /* Converting multibyte to single-byte. */
1328 raw_pattern_size = SCHARS (string);
1329 raw_pattern_size_byte = SCHARS (string);
1330 raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1331 copy_text (SDATA (string), raw_pattern,
1332 SBYTES (string), 1, 0);
1333 }
1334
1335 /* Copy and optionally translate the pattern. */
1336 ptrdiff_t len = raw_pattern_size;
1337 ptrdiff_t len_byte = raw_pattern_size_byte;
1338 SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1339 pat = patbuf;
1340 base_pat = raw_pattern;
1341 if (multibyte)
1342 {
1343 /* Fill patbuf by translated characters in STRING while
1344 checking if we can use boyer-moore search. If TRT is
1345 non-nil, we can use boyer-moore search only if TRT can be
1346 represented by the byte array of 256 elements. For that,
1347 all non-ASCII case-equivalents of all case-sensitive
1348 characters in STRING must belong to the same character
1349 group (two characters belong to the same group iff their
1350 multibyte forms are the same except for the last byte;
1351 i.e. every 64 characters form a group; U+0000..U+003F,
1352 U+0040..U+007F, U+0080..U+00BF, ...). */
1353
1354 while (--len >= 0)
1355 {
1356 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1357 int c, translated, inverse;
1358 int in_charlen, charlen;
1359
1360 /* If we got here and the RE flag is set, it's because we're
1361 dealing with a regexp known to be trivial, so the backslash
1362 just quotes the next character. */
1363 if (RE && *base_pat == '\\')
1364 {
1365 len--;
1366 raw_pattern_size--;
1367 len_byte--;
1368 base_pat++;
1369 }
1370
1371 c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1372
1373 if (NILP (trt))
1374 {
1375 str = base_pat;
1376 charlen = in_charlen;
1377 }
1378 else
1379 {
1380 /* Translate the character. */
1381 TRANSLATE (translated, trt, c);
1382 charlen = CHAR_STRING (translated, str_base);
1383 str = str_base;
1384
1385 /* Check if C has any other case-equivalents. */
1386 TRANSLATE (inverse, inverse_trt, c);
1387 /* If so, check if we can use boyer-moore. */
1388 if (c != inverse && boyer_moore_ok)
1389 {
1390 /* Check if all equivalents belong to the same
1391 group of characters. Note that the check of C
1392 itself is done by the last iteration. */
1393 int this_char_base = -1;
1394
1395 while (boyer_moore_ok)
1396 {
1397 if (ASCII_CHAR_P (inverse))
1398 {
1399 if (this_char_base > 0)
1400 boyer_moore_ok = 0;
1401 else
1402 this_char_base = 0;
1403 }
1404 else if (CHAR_BYTE8_P (inverse))
1405 /* Boyer-moore search can't handle a
1406 translation of an eight-bit
1407 character. */
1408 boyer_moore_ok = 0;
1409 else if (this_char_base < 0)
1410 {
1411 this_char_base = inverse & ~0x3F;
1412 if (char_base < 0)
1413 char_base = this_char_base;
1414 else if (this_char_base != char_base)
1415 boyer_moore_ok = 0;
1416 }
1417 else if ((inverse & ~0x3F) != this_char_base)
1418 boyer_moore_ok = 0;
1419 if (c == inverse)
1420 break;
1421 TRANSLATE (inverse, inverse_trt, inverse);
1422 }
1423 }
1424 }
1425
1426 /* Store this character into the translated pattern. */
1427 memcpy (pat, str, charlen);
1428 pat += charlen;
1429 base_pat += in_charlen;
1430 len_byte -= in_charlen;
1431 }
1432
1433 /* If char_base is still negative we didn't find any translated
1434 non-ASCII characters. */
1435 if (char_base < 0)
1436 char_base = 0;
1437 }
1438 else
1439 {
1440 /* Unibyte buffer. */
1441 char_base = 0;
1442 while (--len >= 0)
1443 {
1444 int c, translated, inverse;
1445
1446 /* If we got here and the RE flag is set, it's because we're
1447 dealing with a regexp known to be trivial, so the backslash
1448 just quotes the next character. */
1449 if (RE && *base_pat == '\\')
1450 {
1451 len--;
1452 raw_pattern_size--;
1453 base_pat++;
1454 }
1455 c = *base_pat++;
1456 TRANSLATE (translated, trt, c);
1457 *pat++ = translated;
1458 /* Check that none of C's equivalents violates the
1459 assumptions of boyer_moore. */
1460 TRANSLATE (inverse, inverse_trt, c);
1461 while (1)
1462 {
1463 if (inverse >= 0200)
1464 {
1465 boyer_moore_ok = 0;
1466 break;
1467 }
1468 if (c == inverse)
1469 break;
1470 TRANSLATE (inverse, inverse_trt, inverse);
1471 }
1472 }
1473 }
1474
1475 len_byte = pat - patbuf;
1476 pat = base_pat = patbuf;
1477
1478 EMACS_INT result
1479 = (boyer_moore_ok
1480 ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1481 pos_byte, lim_byte,
1482 char_base)
1483 : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1484 pos, pos_byte, lim, lim_byte));
1485 SAFE_FREE ();
1486 return result;
1487 }
1488
1489 static EMACS_INT
search_buffer(Lisp_Object string,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t lim,ptrdiff_t lim_byte,EMACS_INT n,int RE,Lisp_Object trt,Lisp_Object inverse_trt,bool posix)1490 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1491 ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1492 int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1493 {
1494 if (running_asynch_code)
1495 save_search_regs ();
1496
1497 /* Searching 0 times means don't move. */
1498 /* Null string is found at starting position. */
1499 if (n == 0 || SCHARS (string) == 0)
1500 {
1501 set_search_regs (pos_byte, 0);
1502 return pos;
1503 }
1504
1505 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1506 pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
1507 n, trt, inverse_trt, posix);
1508 else
1509 pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
1510 n, RE, trt, inverse_trt, posix);
1511
1512 return pos;
1513 }
1514
1515 /* Do a simple string search N times for the string PAT,
1516 whose length is LEN/LEN_BYTE,
1517 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1518 TRT is the translation table.
1519
1520 Return the character position where the match is found.
1521 Otherwise, if M matches remained to be found, return -M.
1522
1523 This kind of search works regardless of what is in PAT and
1524 regardless of what is in TRT. It is used in cases where
1525 boyer_moore cannot work. */
1526
1527 static EMACS_INT
simple_search(EMACS_INT n,unsigned char * pat,ptrdiff_t len,ptrdiff_t len_byte,Lisp_Object trt,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t lim,ptrdiff_t lim_byte)1528 simple_search (EMACS_INT n, unsigned char *pat,
1529 ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1530 ptrdiff_t pos, ptrdiff_t pos_byte,
1531 ptrdiff_t lim, ptrdiff_t lim_byte)
1532 {
1533 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1534 bool forward = n > 0;
1535 /* Number of buffer bytes matched. Note that this may be different
1536 from len_byte in a multibyte buffer. */
1537 ptrdiff_t match_byte = PTRDIFF_MIN;
1538
1539 if (lim > pos && multibyte)
1540 while (n > 0)
1541 {
1542 while (1)
1543 {
1544 /* Try matching at position POS. */
1545 ptrdiff_t this_pos = pos;
1546 ptrdiff_t this_pos_byte = pos_byte;
1547 ptrdiff_t this_len = len;
1548 unsigned char *p = pat;
1549 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1550 goto stop;
1551
1552 while (this_len > 0)
1553 {
1554 int charlen, buf_charlen;
1555 int pat_ch, buf_ch;
1556
1557 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1558 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1559 buf_charlen);
1560 TRANSLATE (buf_ch, trt, buf_ch);
1561
1562 if (buf_ch != pat_ch)
1563 break;
1564
1565 this_len--;
1566 p += charlen;
1567
1568 this_pos_byte += buf_charlen;
1569 this_pos++;
1570 }
1571
1572 if (this_len == 0)
1573 {
1574 match_byte = this_pos_byte - pos_byte;
1575 pos += len;
1576 pos_byte += match_byte;
1577 break;
1578 }
1579
1580 INC_BOTH (pos, pos_byte);
1581 }
1582
1583 n--;
1584 }
1585 else if (lim > pos)
1586 while (n > 0)
1587 {
1588 while (1)
1589 {
1590 /* Try matching at position POS. */
1591 ptrdiff_t this_pos = pos;
1592 ptrdiff_t this_len = len;
1593 unsigned char *p = pat;
1594
1595 if (pos + len > lim)
1596 goto stop;
1597
1598 while (this_len > 0)
1599 {
1600 int pat_ch = *p++;
1601 int buf_ch = FETCH_BYTE (this_pos);
1602 TRANSLATE (buf_ch, trt, buf_ch);
1603
1604 if (buf_ch != pat_ch)
1605 break;
1606
1607 this_len--;
1608 this_pos++;
1609 }
1610
1611 if (this_len == 0)
1612 {
1613 match_byte = len;
1614 pos += len;
1615 break;
1616 }
1617
1618 pos++;
1619 }
1620
1621 n--;
1622 }
1623 /* Backwards search. */
1624 else if (lim < pos && multibyte)
1625 while (n < 0)
1626 {
1627 while (1)
1628 {
1629 /* Try matching at position POS. */
1630 ptrdiff_t this_pos = pos;
1631 ptrdiff_t this_pos_byte = pos_byte;
1632 ptrdiff_t this_len = len;
1633 const unsigned char *p = pat + len_byte;
1634
1635 if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1636 goto stop;
1637
1638 while (this_len > 0)
1639 {
1640 int pat_ch, buf_ch;
1641
1642 DEC_BOTH (this_pos, this_pos_byte);
1643 PREV_CHAR_BOUNDARY (p, pat);
1644 pat_ch = STRING_CHAR (p);
1645 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1646 TRANSLATE (buf_ch, trt, buf_ch);
1647
1648 if (buf_ch != pat_ch)
1649 break;
1650
1651 this_len--;
1652 }
1653
1654 if (this_len == 0)
1655 {
1656 match_byte = pos_byte - this_pos_byte;
1657 pos = this_pos;
1658 pos_byte = this_pos_byte;
1659 break;
1660 }
1661
1662 DEC_BOTH (pos, pos_byte);
1663 }
1664
1665 n++;
1666 }
1667 else if (lim < pos)
1668 while (n < 0)
1669 {
1670 while (1)
1671 {
1672 /* Try matching at position POS. */
1673 ptrdiff_t this_pos = pos - len;
1674 ptrdiff_t this_len = len;
1675 unsigned char *p = pat;
1676
1677 if (this_pos < lim)
1678 goto stop;
1679
1680 while (this_len > 0)
1681 {
1682 int pat_ch = *p++;
1683 int buf_ch = FETCH_BYTE (this_pos);
1684 TRANSLATE (buf_ch, trt, buf_ch);
1685
1686 if (buf_ch != pat_ch)
1687 break;
1688 this_len--;
1689 this_pos++;
1690 }
1691
1692 if (this_len == 0)
1693 {
1694 match_byte = len;
1695 pos -= len;
1696 break;
1697 }
1698
1699 pos--;
1700 }
1701
1702 n++;
1703 }
1704
1705 stop:
1706 if (n == 0)
1707 {
1708 eassert (match_byte != PTRDIFF_MIN);
1709 if (forward)
1710 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1711 else
1712 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1713
1714 return pos;
1715 }
1716 else if (n > 0)
1717 return -n;
1718 else
1719 return n;
1720 }
1721
1722 /* Do Boyer-Moore search N times for the string BASE_PAT,
1723 whose length is LEN_BYTE,
1724 from buffer position POS_BYTE until LIM_BYTE.
1725 DIRECTION says which direction we search in.
1726 TRT and INVERSE_TRT are translation tables.
1727 Characters in PAT are already translated by TRT.
1728
1729 This kind of search works if all the characters in BASE_PAT that
1730 have nontrivial translation are the same aside from the last byte.
1731 This makes it possible to translate just the last byte of a
1732 character, and do so after just a simple test of the context.
1733 CHAR_BASE is nonzero if there is such a non-ASCII character.
1734
1735 If that criterion is not satisfied, do not call this function. */
1736
1737 static EMACS_INT
boyer_moore(EMACS_INT n,unsigned char * base_pat,ptrdiff_t len_byte,Lisp_Object trt,Lisp_Object inverse_trt,ptrdiff_t pos_byte,ptrdiff_t lim_byte,int char_base)1738 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1739 ptrdiff_t len_byte,
1740 Lisp_Object trt, Lisp_Object inverse_trt,
1741 ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1742 int char_base)
1743 {
1744 int direction = ((n > 0) ? 1 : -1);
1745 register ptrdiff_t dirlen;
1746 ptrdiff_t limit;
1747 int stride_for_teases = 0;
1748 int BM_tab[0400];
1749 register unsigned char *cursor, *p_limit;
1750 register ptrdiff_t i;
1751 register int j;
1752 unsigned char *pat, *pat_end;
1753 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1754
1755 unsigned char simple_translate[0400];
1756 /* These are set to the preceding bytes of a byte to be translated
1757 if char_base is nonzero. As the maximum byte length of a
1758 multibyte character is 5, we have to check at most four previous
1759 bytes. */
1760 int translate_prev_byte1 = 0;
1761 int translate_prev_byte2 = 0;
1762 int translate_prev_byte3 = 0;
1763
1764 /* The general approach is that we are going to maintain that we know
1765 the first (closest to the present position, in whatever direction
1766 we're searching) character that could possibly be the last
1767 (furthest from present position) character of a valid match. We
1768 advance the state of our knowledge by looking at that character
1769 and seeing whether it indeed matches the last character of the
1770 pattern. If it does, we take a closer look. If it does not, we
1771 move our pointer (to putative last characters) as far as is
1772 logically possible. This amount of movement, which I call a
1773 stride, will be the length of the pattern if the actual character
1774 appears nowhere in the pattern, otherwise it will be the distance
1775 from the last occurrence of that character to the end of the
1776 pattern. If the amount is zero we have a possible match. */
1777
1778 /* Here we make a "mickey mouse" BM table. The stride of the search
1779 is determined only by the last character of the putative match.
1780 If that character does not match, we will stride the proper
1781 distance to propose a match that superimposes it on the last
1782 instance of a character that matches it (per trt), or misses
1783 it entirely if there is none. */
1784
1785 dirlen = len_byte * direction;
1786
1787 /* Record position after the end of the pattern. */
1788 pat_end = base_pat + len_byte;
1789 /* BASE_PAT points to a character that we start scanning from.
1790 It is the first character in a forward search,
1791 the last character in a backward search. */
1792 if (direction < 0)
1793 base_pat = pat_end - 1;
1794
1795 /* A character that does not appear in the pattern induces a
1796 stride equal to the pattern length. */
1797 for (i = 0; i < 0400; i++)
1798 BM_tab[i] = dirlen;
1799
1800 /* We use this for translation, instead of TRT itself.
1801 We fill this in to handle the characters that actually
1802 occur in the pattern. Others don't matter anyway! */
1803 for (i = 0; i < 0400; i++)
1804 simple_translate[i] = i;
1805
1806 if (char_base)
1807 {
1808 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1809 byte following them are the target of translation. */
1810 eassume (0x80 <= char_base && char_base <= MAX_CHAR);
1811 unsigned char str[MAX_MULTIBYTE_LENGTH];
1812 int cblen = CHAR_STRING (char_base, str);
1813
1814 translate_prev_byte1 = str[cblen - 2];
1815 if (cblen > 2)
1816 {
1817 translate_prev_byte2 = str[cblen - 3];
1818 if (cblen > 3)
1819 translate_prev_byte3 = str[cblen - 4];
1820 }
1821 }
1822
1823 i = 0;
1824 while (i != dirlen)
1825 {
1826 unsigned char *ptr = base_pat + i;
1827 i += direction;
1828 if (! NILP (trt))
1829 {
1830 /* If the byte currently looking at is the last of a
1831 character to check case-equivalents, set CH to that
1832 character. An ASCII character and a non-ASCII character
1833 matching with CHAR_BASE are to be checked. */
1834 int ch = -1;
1835
1836 if (ASCII_CHAR_P (*ptr) || ! multibyte)
1837 ch = *ptr;
1838 else if (char_base
1839 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1840 {
1841 unsigned char *charstart = ptr - 1;
1842
1843 while (! (CHAR_HEAD_P (*charstart)))
1844 charstart--;
1845 ch = STRING_CHAR (charstart);
1846 if (char_base != (ch & ~0x3F))
1847 ch = -1;
1848 }
1849
1850 if (ch >= 0200 && multibyte)
1851 j = (ch & 0x3F) | 0200;
1852 else
1853 j = *ptr;
1854
1855 if (i == dirlen)
1856 stride_for_teases = BM_tab[j];
1857
1858 BM_tab[j] = dirlen - i;
1859 /* A translation table is accompanied by its inverse -- see
1860 comment following downcase_table for details. */
1861 if (ch >= 0)
1862 {
1863 int starting_ch = ch;
1864 int starting_j = j;
1865
1866 while (1)
1867 {
1868 TRANSLATE (ch, inverse_trt, ch);
1869 if (ch >= 0200 && multibyte)
1870 j = (ch & 0x3F) | 0200;
1871 else
1872 j = ch;
1873
1874 /* For all the characters that map into CH,
1875 set up simple_translate to map the last byte
1876 into STARTING_J. */
1877 simple_translate[j] = starting_j;
1878 if (ch == starting_ch)
1879 break;
1880 BM_tab[j] = dirlen - i;
1881 }
1882 }
1883 }
1884 else
1885 {
1886 j = *ptr;
1887
1888 if (i == dirlen)
1889 stride_for_teases = BM_tab[j];
1890 BM_tab[j] = dirlen - i;
1891 }
1892 /* stride_for_teases tells how much to stride if we get a
1893 match on the far character but are subsequently
1894 disappointed, by recording what the stride would have been
1895 for that character if the last character had been
1896 different. */
1897 }
1898 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1899 /* loop invariant - POS_BYTE points at where last char (first
1900 char if reverse) of pattern would align in a possible match. */
1901 while (n != 0)
1902 {
1903 ptrdiff_t tail_end;
1904 unsigned char *tail_end_ptr;
1905
1906 /* It's been reported that some (broken) compiler thinks that
1907 Boolean expressions in an arithmetic context are unsigned.
1908 Using an explicit ?1:0 prevents this. */
1909 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1910 < 0)
1911 return (n * (0 - direction));
1912 /* First we do the part we can by pointers (maybe nothing) */
1913 maybe_quit ();
1914 pat = base_pat;
1915 limit = pos_byte - dirlen + direction;
1916 if (direction > 0)
1917 {
1918 limit = BUFFER_CEILING_OF (limit);
1919 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1920 can take on without hitting edge of buffer or the gap. */
1921 limit = min (limit, pos_byte + 20000);
1922 limit = min (limit, lim_byte - 1);
1923 }
1924 else
1925 {
1926 limit = BUFFER_FLOOR_OF (limit);
1927 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1928 can take on without hitting edge of buffer or the gap. */
1929 limit = max (limit, pos_byte - 20000);
1930 limit = max (limit, lim_byte);
1931 }
1932 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1933 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1934
1935 if ((limit - pos_byte) * direction > 20)
1936 {
1937 unsigned char *p2;
1938
1939 p_limit = BYTE_POS_ADDR (limit);
1940 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1941 /* In this loop, pos + cursor - p2 is the surrogate for pos. */
1942 while (1) /* use one cursor setting as long as i can */
1943 {
1944 if (direction > 0) /* worth duplicating */
1945 {
1946 while (cursor <= p_limit)
1947 {
1948 if (BM_tab[*cursor] == 0)
1949 goto hit;
1950 cursor += BM_tab[*cursor];
1951 }
1952 }
1953 else
1954 {
1955 while (cursor >= p_limit)
1956 {
1957 if (BM_tab[*cursor] == 0)
1958 goto hit;
1959 cursor += BM_tab[*cursor];
1960 }
1961 }
1962 /* If you are here, cursor is beyond the end of the
1963 searched region. You fail to match within the
1964 permitted region and would otherwise try a character
1965 beyond that region. */
1966 break;
1967
1968 hit:
1969 i = dirlen - direction;
1970 if (! NILP (trt))
1971 {
1972 while ((i -= direction) + direction != 0)
1973 {
1974 int ch;
1975 cursor -= direction;
1976 /* Translate only the last byte of a character. */
1977 if (! multibyte
1978 || ((cursor == tail_end_ptr
1979 || CHAR_HEAD_P (cursor[1]))
1980 && (CHAR_HEAD_P (cursor[0])
1981 /* Check if this is the last byte of
1982 a translatable character. */
1983 || (translate_prev_byte1 == cursor[-1]
1984 && (CHAR_HEAD_P (translate_prev_byte1)
1985 || (translate_prev_byte2 == cursor[-2]
1986 && (CHAR_HEAD_P (translate_prev_byte2)
1987 || (translate_prev_byte3 == cursor[-3]))))))))
1988 ch = simple_translate[*cursor];
1989 else
1990 ch = *cursor;
1991 if (pat[i] != ch)
1992 break;
1993 }
1994 }
1995 else
1996 {
1997 while ((i -= direction) + direction != 0)
1998 {
1999 cursor -= direction;
2000 if (pat[i] != *cursor)
2001 break;
2002 }
2003 }
2004 cursor += dirlen - i - direction; /* fix cursor */
2005 if (i + direction == 0)
2006 {
2007 ptrdiff_t position, start, end;
2008 #ifdef REL_ALLOC
2009 ptrdiff_t cursor_off;
2010 #endif
2011
2012 cursor -= direction;
2013
2014 position = pos_byte + cursor - p2 + ((direction > 0)
2015 ? 1 - len_byte : 0);
2016 #ifdef REL_ALLOC
2017 /* set_search_regs might call malloc, which could
2018 cause ralloc.c relocate buffer text. We need to
2019 update pointers into buffer text due to that. */
2020 cursor_off = cursor - p2;
2021 #endif
2022 set_search_regs (position, len_byte);
2023 #ifdef REL_ALLOC
2024 p_limit = BYTE_POS_ADDR (limit);
2025 p2 = BYTE_POS_ADDR (pos_byte);
2026 cursor = p2 + cursor_off;
2027 #endif
2028
2029 if (NILP (Vinhibit_changing_match_data))
2030 {
2031 start = search_regs.start[0];
2032 end = search_regs.end[0];
2033 }
2034 else
2035 /* If Vinhibit_changing_match_data is non-nil,
2036 search_regs will not be changed. So let's
2037 compute start and end here. */
2038 {
2039 start = BYTE_TO_CHAR (position);
2040 end = BYTE_TO_CHAR (position + len_byte);
2041 }
2042
2043 if ((n -= direction) != 0)
2044 cursor += dirlen; /* to resume search */
2045 else
2046 return direction > 0 ? end : start;
2047 }
2048 else
2049 cursor += stride_for_teases; /* <sigh> we lose - */
2050 }
2051 pos_byte += cursor - p2;
2052 }
2053 else
2054 /* Now we'll pick up a clump that has to be done the hard
2055 way because it covers a discontinuity. */
2056 {
2057 limit = ((direction > 0)
2058 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2059 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2060 limit = ((direction > 0)
2061 ? min (limit + len_byte, lim_byte - 1)
2062 : max (limit - len_byte, lim_byte));
2063 /* LIMIT is now the last value POS_BYTE can have
2064 and still be valid for a possible match. */
2065 while (1)
2066 {
2067 /* This loop can be coded for space rather than
2068 speed because it will usually run only once.
2069 (the reach is at most len + 21, and typically
2070 does not exceed len). */
2071 while ((limit - pos_byte) * direction >= 0)
2072 {
2073 int ch = FETCH_BYTE (pos_byte);
2074 if (BM_tab[ch] == 0)
2075 goto hit2;
2076 pos_byte += BM_tab[ch];
2077 }
2078 break; /* ran off the end */
2079
2080 hit2:
2081 /* Found what might be a match. */
2082 i = dirlen - direction;
2083 while ((i -= direction) + direction != 0)
2084 {
2085 int ch;
2086 unsigned char *ptr;
2087 pos_byte -= direction;
2088 ptr = BYTE_POS_ADDR (pos_byte);
2089 /* Translate only the last byte of a character. */
2090 if (! multibyte
2091 || ((ptr == tail_end_ptr
2092 || CHAR_HEAD_P (ptr[1]))
2093 && (CHAR_HEAD_P (ptr[0])
2094 /* Check if this is the last byte of a
2095 translatable character. */
2096 || (translate_prev_byte1 == ptr[-1]
2097 && (CHAR_HEAD_P (translate_prev_byte1)
2098 || (translate_prev_byte2 == ptr[-2]
2099 && (CHAR_HEAD_P (translate_prev_byte2)
2100 || translate_prev_byte3 == ptr[-3])))))))
2101 ch = simple_translate[*ptr];
2102 else
2103 ch = *ptr;
2104 if (pat[i] != ch)
2105 break;
2106 }
2107 /* Above loop has moved POS_BYTE part or all the way
2108 back to the first pos (last pos if reverse).
2109 Set it once again at the last (first if reverse) char. */
2110 pos_byte += dirlen - i - direction;
2111 if (i + direction == 0)
2112 {
2113 ptrdiff_t position, start, end;
2114 pos_byte -= direction;
2115
2116 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2117 set_search_regs (position, len_byte);
2118
2119 if (NILP (Vinhibit_changing_match_data))
2120 {
2121 start = search_regs.start[0];
2122 end = search_regs.end[0];
2123 }
2124 else
2125 /* If Vinhibit_changing_match_data is non-nil,
2126 search_regs will not be changed. So let's
2127 compute start and end here. */
2128 {
2129 start = BYTE_TO_CHAR (position);
2130 end = BYTE_TO_CHAR (position + len_byte);
2131 }
2132
2133 if ((n -= direction) != 0)
2134 pos_byte += dirlen; /* to resume search */
2135 else
2136 return direction > 0 ? end : start;
2137 }
2138 else
2139 pos_byte += stride_for_teases;
2140 }
2141 }
2142 /* We have done one clump. Can we continue? */
2143 if ((lim_byte - pos_byte) * direction < 0)
2144 return ((0 - n) * direction);
2145 }
2146 return BYTE_TO_CHAR (pos_byte);
2147 }
2148
2149 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2150 for the overall match just found in the current buffer.
2151 Also clear out the match data for registers 1 and up. */
2152
2153 static void
set_search_regs(ptrdiff_t beg_byte,ptrdiff_t nbytes)2154 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2155 {
2156 ptrdiff_t i;
2157
2158 if (!NILP (Vinhibit_changing_match_data))
2159 return;
2160
2161 /* Make sure we have registers in which to store
2162 the match position. */
2163 if (search_regs.num_regs == 0)
2164 {
2165 search_regs.start = xmalloc (2 * sizeof *search_regs.start);
2166 search_regs.end = xmalloc (2 * sizeof *search_regs.end);
2167 search_regs.num_regs = 2;
2168 }
2169
2170 /* Clear out the other registers. */
2171 for (i = 1; i < search_regs.num_regs; i++)
2172 {
2173 search_regs.start[i] = -1;
2174 search_regs.end[i] = -1;
2175 }
2176
2177 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2178 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2179 XSETBUFFER (last_thing_searched, current_buffer);
2180 }
2181
2182 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2183 "MSearch backward: ",
2184 doc: /* Search backward from point for STRING.
2185 Set point to the beginning of the occurrence found, and return point.
2186 An optional second argument bounds the search; it is a buffer position.
2187 The match found must not begin before that position. A value of nil
2188 means search to the beginning of the accessible portion of the buffer.
2189 Optional third argument, if t, means if fail just return nil (no error).
2190 If not nil and not t, position at limit of search and return nil.
2191 Optional fourth argument COUNT, if a positive number, means to search
2192 for COUNT successive occurrences. If COUNT is negative, search
2193 forward, instead of backward, for -COUNT occurrences. A value of
2194 nil means the same as 1.
2195 With COUNT positive, the match found is the COUNTth to last one (or
2196 last, if COUNT is 1 or nil) in the buffer located entirely before
2197 the origin of the search; correspondingly with COUNT negative.
2198
2199 Search case-sensitivity is determined by the value of the variable
2200 `case-fold-search', which see.
2201
2202 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2203 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2204 {
2205 return search_command (string, bound, noerror, count, -1, 0, 0);
2206 }
2207
2208 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2209 doc: /* Search forward from point for STRING.
2210 Set point to the end of the occurrence found, and return point.
2211 An optional second argument bounds the search; it is a buffer position.
2212 The match found must not end after that position. A value of nil
2213 means search to the end of the accessible portion of the buffer.
2214 Optional third argument, if t, means if fail just return nil (no error).
2215 If not nil and not t, move to limit of search and return nil.
2216 Optional fourth argument COUNT, if a positive number, means to search
2217 for COUNT successive occurrences. If COUNT is negative, search
2218 backward, instead of forward, for -COUNT occurrences. A value of
2219 nil means the same as 1.
2220 With COUNT positive, the match found is the COUNTth one (or first,
2221 if COUNT is 1 or nil) in the buffer located entirely after the
2222 origin of the search; correspondingly with COUNT negative.
2223
2224 Search case-sensitivity is determined by the value of the variable
2225 `case-fold-search', which see.
2226
2227 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2228 (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2229 {
2230 return search_command (string, bound, noerror, count, 1, 0, 0);
2231 }
2232
2233 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2234 "sRE search backward: ",
2235 doc: /* Search backward from point for regular expression REGEXP.
2236 This function is almost identical to `re-search-forward', except that
2237 by default it searches backward instead of forward, and the sign of
2238 COUNT also indicates exactly the opposite searching direction.
2239 See `re-search-forward' for details.
2240
2241 Note that searching backwards may give a shorter match than expected,
2242 because REGEXP is still matched in the forward direction. See Info
2243 anchor `(elisp) re-search-backward' for details. */)
2244 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2245 {
2246 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2247 }
2248
2249 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2250 "sRE search: ",
2251 doc: /* Search forward from point for regular expression REGEXP.
2252 Set point to the end of the occurrence found, and return point.
2253 The optional second argument BOUND is a buffer position that bounds
2254 the search. The match found must not end after that position. A
2255 value of nil means search to the end of the accessible portion of
2256 the buffer.
2257 The optional third argument NOERROR indicates how errors are handled
2258 when the search fails. If it is nil or omitted, emit an error; if
2259 it is t, simply return nil and do nothing; if it is neither nil nor
2260 t, move to the limit of search and return nil.
2261 The optional fourth argument COUNT is a number that indicates the
2262 search direction and the number of occurrences to search for. If it
2263 is positive, search forward for COUNT successive occurrences; if it
2264 is negative, search backward, instead of forward, for -COUNT
2265 occurrences. A value of nil means the same as 1.
2266 With COUNT positive/negative, the match found is the COUNTth/-COUNTth
2267 one in the buffer located entirely after/before the origin of the
2268 search.
2269
2270 Search case-sensitivity is determined by the value of the variable
2271 `case-fold-search', which see.
2272
2273 See also the functions `match-beginning', `match-end', `match-string',
2274 and `replace-match'. */)
2275 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2276 {
2277 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2278 }
2279
2280 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2281 "sPosix search backward: ",
2282 doc: /* Search backward from point for match for regular expression REGEXP.
2283 Find the longest match in accord with Posix regular expression rules.
2284 Set point to the beginning of the occurrence found, and return point.
2285 An optional second argument bounds the search; it is a buffer position.
2286 The match found must not begin before that position. A value of nil
2287 means search to the beginning of the accessible portion of the buffer.
2288 Optional third argument, if t, means if fail just return nil (no error).
2289 If not nil and not t, position at limit of search and return nil.
2290 Optional fourth argument COUNT, if a positive number, means to search
2291 for COUNT successive occurrences. If COUNT is negative, search
2292 forward, instead of backward, for -COUNT occurrences. A value of
2293 nil means the same as 1.
2294 With COUNT positive, the match found is the COUNTth to last one (or
2295 last, if COUNT is 1 or nil) in the buffer located entirely before
2296 the origin of the search; correspondingly with COUNT negative.
2297
2298 Search case-sensitivity is determined by the value of the variable
2299 `case-fold-search', which see.
2300
2301 See also the functions `match-beginning', `match-end', `match-string',
2302 and `replace-match'. */)
2303 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2304 {
2305 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2306 }
2307
2308 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2309 "sPosix search: ",
2310 doc: /* Search forward from point for regular expression REGEXP.
2311 Find the longest match in accord with Posix regular expression rules.
2312 Set point to the end of the occurrence found, and return point.
2313 An optional second argument bounds the search; it is a buffer position.
2314 The match found must not end after that position. A value of nil
2315 means search to the end of the accessible portion of the buffer.
2316 Optional third argument, if t, means if fail just return nil (no error).
2317 If not nil and not t, move to limit of search and return nil.
2318 Optional fourth argument COUNT, if a positive number, means to search
2319 for COUNT successive occurrences. If COUNT is negative, search
2320 backward, instead of forward, for -COUNT occurrences. A value of
2321 nil means the same as 1.
2322 With COUNT positive, the match found is the COUNTth one (or first,
2323 if COUNT is 1 or nil) in the buffer located entirely after the
2324 origin of the search; correspondingly with COUNT negative.
2325
2326 Search case-sensitivity is determined by the value of the variable
2327 `case-fold-search', which see.
2328
2329 See also the functions `match-beginning', `match-end', `match-string',
2330 and `replace-match'. */)
2331 (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2332 {
2333 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2334 }
2335
2336 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2337 doc: /* Replace text matched by last search with NEWTEXT.
2338 Leave point at the end of the replacement text.
2339
2340 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2341 the replacement text. Otherwise, maybe capitalize the whole text, or
2342 maybe just word initials, based on the replaced text. If the replaced
2343 text has only capital letters and has at least one multiletter word,
2344 convert NEWTEXT to all caps. Otherwise if all words are capitalized
2345 in the replaced text, capitalize each word in NEWTEXT.
2346
2347 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2348 Otherwise treat `\\' as special:
2349 `\\&' in NEWTEXT means substitute original matched text.
2350 `\\N' means substitute what matched the Nth `\\(...\\)'.
2351 If Nth parens didn't match, substitute nothing.
2352 `\\\\' means insert one `\\'.
2353 `\\?' is treated literally
2354 (for compatibility with `query-replace-regexp').
2355 Any other character following `\\' signals an error.
2356 Case conversion does not apply to these substitutions.
2357
2358 If optional fourth argument STRING is non-nil, it should be a string
2359 to act on; this should be the string on which the previous match was
2360 done via `string-match'. In this case, `replace-match' creates and
2361 returns a new string, made by copying STRING and replacing the part of
2362 STRING that was matched (the original STRING itself is not altered).
2363
2364 The optional fifth argument SUBEXP specifies a subexpression;
2365 it says to replace just that subexpression with NEWTEXT,
2366 rather than replacing the entire matched text.
2367 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2368 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2369 NEWTEXT in place of subexp N.
2370 This is useful only after a regular expression search or match,
2371 since only regular expressions have distinguished subexpressions. */)
2372 (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2373 {
2374 enum { nochange, all_caps, cap_initial } case_action;
2375 ptrdiff_t pos, pos_byte;
2376 bool some_multiletter_word;
2377 bool some_lowercase;
2378 bool some_uppercase;
2379 bool some_nonuppercase_initial;
2380 int c, prevc;
2381 ptrdiff_t sub;
2382 ptrdiff_t opoint, newpoint;
2383
2384 CHECK_STRING (newtext);
2385
2386 if (! NILP (string))
2387 CHECK_STRING (string);
2388
2389 case_action = nochange; /* We tried an initialization */
2390 /* but some C compilers blew it */
2391
2392 ptrdiff_t num_regs = search_regs.num_regs;
2393 if (num_regs <= 0)
2394 error ("`replace-match' called before any match found");
2395
2396 if (NILP (subexp))
2397 sub = 0;
2398 else
2399 {
2400 CHECK_RANGED_INTEGER (subexp, 0, num_regs - 1);
2401 sub = XFIXNUM (subexp);
2402 }
2403
2404 ptrdiff_t sub_start = search_regs.start[sub];
2405 ptrdiff_t sub_end = search_regs.end[sub];
2406 eassert (sub_start <= sub_end);
2407
2408 /* Check whether the text to replace is present in the buffer/string. */
2409 if (! (NILP (string)
2410 ? BEGV <= sub_start && sub_end <= ZV
2411 : 0 <= sub_start && sub_end <= SCHARS (string)))
2412 {
2413 if (sub_start < 0)
2414 xsignal2 (Qerror,
2415 build_string ("replace-match subexpression does not exist"),
2416 subexp);
2417 args_out_of_range (make_fixnum (sub_start), make_fixnum (sub_end));
2418 }
2419
2420 if (NILP (fixedcase))
2421 {
2422 /* Decide how to casify by examining the matched text. */
2423 ptrdiff_t last;
2424
2425 pos = sub_start;
2426 last = sub_end;
2427
2428 if (NILP (string))
2429 pos_byte = CHAR_TO_BYTE (pos);
2430 else
2431 pos_byte = string_char_to_byte (string, pos);
2432
2433 prevc = '\n';
2434 case_action = all_caps;
2435
2436 /* some_multiletter_word is set nonzero if any original word
2437 is more than one letter long. */
2438 some_multiletter_word = 0;
2439 some_lowercase = 0;
2440 some_nonuppercase_initial = 0;
2441 some_uppercase = 0;
2442
2443 while (pos < last)
2444 {
2445 if (NILP (string))
2446 {
2447 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2448 INC_BOTH (pos, pos_byte);
2449 }
2450 else
2451 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2452
2453 if (lowercasep (c))
2454 {
2455 /* Cannot be all caps if any original char is lower case */
2456
2457 some_lowercase = 1;
2458 if (SYNTAX (prevc) != Sword)
2459 some_nonuppercase_initial = 1;
2460 else
2461 some_multiletter_word = 1;
2462 }
2463 else if (uppercasep (c))
2464 {
2465 some_uppercase = 1;
2466 if (SYNTAX (prevc) != Sword)
2467 ;
2468 else
2469 some_multiletter_word = 1;
2470 }
2471 else
2472 {
2473 /* If the initial is a caseless word constituent,
2474 treat that like a lowercase initial. */
2475 if (SYNTAX (prevc) != Sword)
2476 some_nonuppercase_initial = 1;
2477 }
2478
2479 prevc = c;
2480 }
2481
2482 /* Convert to all caps if the old text is all caps
2483 and has at least one multiletter word. */
2484 if (! some_lowercase && some_multiletter_word)
2485 case_action = all_caps;
2486 /* Capitalize each word, if the old text has all capitalized words. */
2487 else if (!some_nonuppercase_initial && some_multiletter_word)
2488 case_action = cap_initial;
2489 else if (!some_nonuppercase_initial && some_uppercase)
2490 /* Should x -> yz, operating on X, give Yz or YZ?
2491 We'll assume the latter. */
2492 case_action = all_caps;
2493 else
2494 case_action = nochange;
2495 }
2496
2497 /* Do replacement in a string. */
2498 if (!NILP (string))
2499 {
2500 Lisp_Object before, after;
2501
2502 before = Fsubstring (string, make_fixnum (0), make_fixnum (sub_start));
2503 after = Fsubstring (string, make_fixnum (sub_end), Qnil);
2504
2505 /* Substitute parts of the match into NEWTEXT
2506 if desired. */
2507 if (NILP (literal))
2508 {
2509 ptrdiff_t lastpos = 0;
2510 ptrdiff_t lastpos_byte = 0;
2511 /* We build up the substituted string in ACCUM. */
2512 Lisp_Object accum;
2513 Lisp_Object middle;
2514 ptrdiff_t length = SBYTES (newtext);
2515
2516 accum = Qnil;
2517
2518 for (pos_byte = 0, pos = 0; pos_byte < length;)
2519 {
2520 ptrdiff_t substart = -1;
2521 ptrdiff_t subend = 0;
2522 bool delbackslash = 0;
2523
2524 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2525
2526 if (c == '\\')
2527 {
2528 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2529
2530 if (c == '&')
2531 {
2532 substart = sub_start;
2533 subend = sub_end;
2534 }
2535 else if (c >= '1' && c <= '9')
2536 {
2537 if (c - '0' < num_regs
2538 && search_regs.start[c - '0'] >= 0)
2539 {
2540 substart = search_regs.start[c - '0'];
2541 subend = search_regs.end[c - '0'];
2542 }
2543 else
2544 {
2545 /* If that subexp did not match,
2546 replace \\N with nothing. */
2547 substart = 0;
2548 subend = 0;
2549 }
2550 }
2551 else if (c == '\\')
2552 delbackslash = 1;
2553 else if (c != '?')
2554 error ("Invalid use of `\\' in replacement text");
2555 }
2556 if (substart >= 0)
2557 {
2558 if (pos - 2 != lastpos)
2559 middle = substring_both (newtext, lastpos,
2560 lastpos_byte,
2561 pos - 2, pos_byte - 2);
2562 else
2563 middle = Qnil;
2564 accum = concat3 (accum, middle,
2565 Fsubstring (string,
2566 make_fixnum (substart),
2567 make_fixnum (subend)));
2568 lastpos = pos;
2569 lastpos_byte = pos_byte;
2570 }
2571 else if (delbackslash)
2572 {
2573 middle = substring_both (newtext, lastpos,
2574 lastpos_byte,
2575 pos - 1, pos_byte - 1);
2576
2577 accum = concat2 (accum, middle);
2578 lastpos = pos;
2579 lastpos_byte = pos_byte;
2580 }
2581 }
2582
2583 if (pos != lastpos)
2584 middle = substring_both (newtext, lastpos,
2585 lastpos_byte,
2586 pos, pos_byte);
2587 else
2588 middle = Qnil;
2589
2590 newtext = concat2 (accum, middle);
2591 }
2592
2593 /* Do case substitution in NEWTEXT if desired. */
2594 if (case_action == all_caps)
2595 newtext = Fupcase (newtext);
2596 else if (case_action == cap_initial)
2597 newtext = Fupcase_initials (newtext);
2598
2599 return concat3 (before, newtext, after);
2600 }
2601
2602 /* Record point. A nonpositive OPOINT is actually an offset from ZV. */
2603 opoint = PT <= sub_start ? PT : max (PT, sub_end) - ZV;
2604
2605 /* If we want non-literal replacement,
2606 perform substitution on the replacement string. */
2607 if (NILP (literal))
2608 {
2609 ptrdiff_t length = SBYTES (newtext);
2610 unsigned char *substed;
2611 ptrdiff_t substed_alloc_size, substed_len;
2612 bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2613 bool str_multibyte = STRING_MULTIBYTE (newtext);
2614 bool really_changed = 0;
2615
2616 substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2617 ? length * 2 + 100
2618 : STRING_BYTES_BOUND);
2619 substed = xmalloc (substed_alloc_size);
2620 substed_len = 0;
2621
2622 /* Go thru NEWTEXT, producing the actual text to insert in
2623 SUBSTED while adjusting multibyteness to that of the current
2624 buffer. */
2625
2626 for (pos_byte = 0, pos = 0; pos_byte < length;)
2627 {
2628 unsigned char str[MAX_MULTIBYTE_LENGTH];
2629 const unsigned char *add_stuff = NULL;
2630 ptrdiff_t add_len = 0;
2631 ptrdiff_t idx = -1;
2632 ptrdiff_t begbyte UNINIT;
2633
2634 if (str_multibyte)
2635 {
2636 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2637 if (!buf_multibyte)
2638 c = CHAR_TO_BYTE8 (c);
2639 }
2640 else
2641 {
2642 /* Note that we don't have to increment POS. */
2643 c = SREF (newtext, pos_byte++);
2644 if (buf_multibyte)
2645 MAKE_CHAR_MULTIBYTE (c);
2646 }
2647
2648 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2649 or set IDX to a match index, which means put that part
2650 of the buffer text into SUBSTED. */
2651
2652 if (c == '\\')
2653 {
2654 really_changed = 1;
2655
2656 if (str_multibyte)
2657 {
2658 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2659 pos, pos_byte);
2660 if (!buf_multibyte && !ASCII_CHAR_P (c))
2661 c = CHAR_TO_BYTE8 (c);
2662 }
2663 else
2664 {
2665 c = SREF (newtext, pos_byte++);
2666 if (buf_multibyte)
2667 MAKE_CHAR_MULTIBYTE (c);
2668 }
2669
2670 if (c == '&')
2671 idx = sub;
2672 else if ('1' <= c && c <= '9' && c - '0' < num_regs)
2673 {
2674 if (search_regs.start[c - '0'] >= 1)
2675 idx = c - '0';
2676 }
2677 else if (c == '\\')
2678 add_len = 1, add_stuff = (unsigned char *) "\\";
2679 else
2680 {
2681 xfree (substed);
2682 error ("Invalid use of `\\' in replacement text");
2683 }
2684 }
2685 else
2686 {
2687 add_len = CHAR_STRING (c, str);
2688 add_stuff = str;
2689 }
2690
2691 /* If we want to copy part of a previous match,
2692 set up ADD_STUFF and ADD_LEN to point to it. */
2693 if (idx >= 0)
2694 {
2695 begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2696 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2697 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2698 move_gap_both (search_regs.start[idx], begbyte);
2699 }
2700
2701 /* Now the stuff we want to add to SUBSTED
2702 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2703
2704 /* Make sure SUBSTED is big enough. */
2705 if (substed_alloc_size - substed_len < add_len)
2706 substed =
2707 xpalloc (substed, &substed_alloc_size,
2708 add_len - (substed_alloc_size - substed_len),
2709 STRING_BYTES_BOUND, 1);
2710
2711 /* We compute this after the call to xpalloc, because that
2712 could cause buffer text be relocated when ralloc.c is used. */
2713 if (idx >= 0)
2714 add_stuff = BYTE_POS_ADDR (begbyte);
2715
2716 /* Now add to the end of SUBSTED. */
2717 if (add_stuff)
2718 {
2719 memcpy (substed + substed_len, add_stuff, add_len);
2720 substed_len += add_len;
2721 }
2722 }
2723
2724 if (really_changed)
2725 newtext = make_specified_string ((const char *) substed, -1,
2726 substed_len, buf_multibyte);
2727 xfree (substed);
2728 }
2729
2730 newpoint = sub_start + SCHARS (newtext);
2731 ptrdiff_t newstart = sub_start == sub_end ? newpoint : sub_start;
2732
2733 /* Replace the old text with the new in the cleanest possible way. */
2734 replace_range (sub_start, sub_end, newtext, 1, 0, 1, true);
2735
2736 if (case_action == all_caps)
2737 Fupcase_region (make_fixnum (search_regs.start[sub]),
2738 make_fixnum (newpoint),
2739 Qnil);
2740 else if (case_action == cap_initial)
2741 Fupcase_initials_region (make_fixnum (search_regs.start[sub]),
2742 make_fixnum (newpoint), Qnil);
2743
2744 /* The replace_range etc. functions can trigger modification hooks
2745 (see signal_before_change and signal_after_change). Try to error
2746 out if these hooks clobber the match data since clobbering can
2747 result in confusing bugs. Although this sanity check does not
2748 catch all possible clobberings, it should catch many of them. */
2749 if (! (search_regs.num_regs == num_regs
2750 && search_regs.start[sub] == newstart
2751 && search_regs.end[sub] == newpoint))
2752 error ("Match data clobbered by buffer modification hooks");
2753
2754 /* Put point back where it was in the text, if possible. */
2755 TEMP_SET_PT (clip_to_bounds (BEGV, opoint + (opoint <= 0 ? ZV : 0), ZV));
2756 /* Now move point "officially" to the start of the inserted replacement. */
2757 move_if_not_intangible (newpoint);
2758
2759 return Qnil;
2760 }
2761
2762 static Lisp_Object
match_limit(Lisp_Object num,bool beginningp)2763 match_limit (Lisp_Object num, bool beginningp)
2764 {
2765 EMACS_INT n;
2766
2767 CHECK_FIXNUM (num);
2768 n = XFIXNUM (num);
2769 if (n < 0)
2770 args_out_of_range (num, make_fixnum (0));
2771 if (search_regs.num_regs <= 0)
2772 error ("No match data, because no search succeeded");
2773 if (n >= search_regs.num_regs
2774 || search_regs.start[n] < 0)
2775 return Qnil;
2776 return (make_fixnum ((beginningp) ? search_regs.start[n]
2777 : search_regs.end[n]));
2778 }
2779
2780 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2781 doc: /* Return position of start of text matched by last search.
2782 SUBEXP, a number, specifies which parenthesized expression in the last
2783 regexp.
2784 Value is nil if SUBEXPth pair didn't match, or there were less than
2785 SUBEXP pairs.
2786 Zero means the entire text matched by the whole regexp or whole string.
2787
2788 Return value is undefined if the last search failed. */)
2789 (Lisp_Object subexp)
2790 {
2791 return match_limit (subexp, 1);
2792 }
2793
2794 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2795 doc: /* Return position of end of text matched by last search.
2796 SUBEXP, a number, specifies which parenthesized expression in the last
2797 regexp.
2798 Value is nil if SUBEXPth pair didn't match, or there were less than
2799 SUBEXP pairs.
2800 Zero means the entire text matched by the whole regexp or whole string.
2801
2802 Return value is undefined if the last search failed. */)
2803 (Lisp_Object subexp)
2804 {
2805 return match_limit (subexp, 0);
2806 }
2807
2808 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2809 doc: /* Return a list describing what the last search matched.
2810 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2811 All the elements are markers or nil (nil if the Nth pair didn't match)
2812 if the last match was on a buffer; integers or nil if a string was matched.
2813 Use `set-match-data' to reinstate the data in this list.
2814
2815 If INTEGERS (the optional first argument) is non-nil, always use
2816 integers (rather than markers) to represent buffer positions. In
2817 this case, and if the last match was in a buffer, the buffer will get
2818 stored as one additional element at the end of the list.
2819
2820 If REUSE is a list, reuse it as part of the value. If REUSE is long
2821 enough to hold all the values, and if INTEGERS is non-nil, no consing
2822 is done.
2823
2824 If optional third arg RESEAT is non-nil, any previous markers on the
2825 REUSE list will be modified to point to nowhere.
2826
2827 Return value is undefined if the last search failed. */)
2828 (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2829 {
2830 Lisp_Object tail, prev;
2831 Lisp_Object *data;
2832 ptrdiff_t i, len;
2833
2834 if (!NILP (reseat))
2835 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2836 if (MARKERP (XCAR (tail)))
2837 {
2838 unchain_marker (XMARKER (XCAR (tail)));
2839 XSETCAR (tail, Qnil);
2840 }
2841
2842 if (NILP (last_thing_searched))
2843 return Qnil;
2844
2845 prev = Qnil;
2846
2847 USE_SAFE_ALLOCA;
2848 SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
2849
2850 len = 0;
2851 for (i = 0; i < search_regs.num_regs; i++)
2852 {
2853 ptrdiff_t start = search_regs.start[i];
2854 if (start >= 0)
2855 {
2856 if (EQ (last_thing_searched, Qt)
2857 || ! NILP (integers))
2858 {
2859 XSETFASTINT (data[2 * i], start);
2860 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2861 }
2862 else if (BUFFERP (last_thing_searched))
2863 {
2864 data[2 * i] = Fmake_marker ();
2865 Fset_marker (data[2 * i],
2866 make_fixnum (start),
2867 last_thing_searched);
2868 data[2 * i + 1] = Fmake_marker ();
2869 Fset_marker (data[2 * i + 1],
2870 make_fixnum (search_regs.end[i]),
2871 last_thing_searched);
2872 }
2873 else
2874 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2875 emacs_abort ();
2876
2877 len = 2 * i + 2;
2878 }
2879 else
2880 data[2 * i] = data[2 * i + 1] = Qnil;
2881 }
2882
2883 if (BUFFERP (last_thing_searched) && !NILP (integers))
2884 {
2885 data[len] = last_thing_searched;
2886 len++;
2887 }
2888
2889 /* If REUSE is not usable, cons up the values and return them. */
2890 if (! CONSP (reuse))
2891 reuse = Flist (len, data);
2892 else
2893 {
2894 /* If REUSE is a list, store as many value elements as will fit
2895 into the elements of REUSE. */
2896 for (i = 0, tail = reuse; CONSP (tail);
2897 i++, tail = XCDR (tail))
2898 {
2899 if (i < len)
2900 XSETCAR (tail, data[i]);
2901 else
2902 XSETCAR (tail, Qnil);
2903 prev = tail;
2904 }
2905
2906 /* If we couldn't fit all value elements into REUSE,
2907 cons up the rest of them and add them to the end of REUSE. */
2908 if (i < len)
2909 XSETCDR (prev, Flist (len - i, data + i));
2910 }
2911
2912 SAFE_FREE ();
2913 return reuse;
2914 }
2915
2916 /* We used to have an internal use variant of `reseat' described as:
2917
2918 If RESEAT is `evaporate', put the markers back on the free list
2919 immediately. No other references to the markers must exist in this
2920 case, so it is used only internally on the unwind stack and
2921 save-match-data from Lisp.
2922
2923 But it was ill-conceived: those supposedly-internal markers get exposed via
2924 the undo-list, so freeing them here is unsafe. */
2925
2926 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2927 doc: /* Set internal data on last search match from elements of LIST.
2928 LIST should have been created by calling `match-data' previously.
2929
2930 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2931 (register Lisp_Object list, Lisp_Object reseat)
2932 {
2933 ptrdiff_t i;
2934 register Lisp_Object marker;
2935
2936 if (running_asynch_code)
2937 save_search_regs ();
2938
2939 CHECK_LIST (list);
2940
2941 /* Unless we find a marker with a buffer or an explicit buffer
2942 in LIST, assume that this match data came from a string. */
2943 last_thing_searched = Qt;
2944
2945 /* Allocate registers if they don't already exist. */
2946 {
2947 ptrdiff_t length = list_length (list) / 2;
2948
2949 if (length > search_regs.num_regs)
2950 {
2951 ptrdiff_t num_regs = search_regs.num_regs;
2952 search_regs.start =
2953 xpalloc (search_regs.start, &num_regs, length - num_regs,
2954 min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start);
2955 search_regs.end =
2956 xrealloc (search_regs.end, num_regs * sizeof *search_regs.end);
2957
2958 for (i = search_regs.num_regs; i < num_regs; i++)
2959 search_regs.start[i] = -1;
2960
2961 search_regs.num_regs = num_regs;
2962 }
2963
2964 for (i = 0; CONSP (list); i++)
2965 {
2966 marker = XCAR (list);
2967 if (BUFFERP (marker))
2968 {
2969 last_thing_searched = marker;
2970 break;
2971 }
2972 if (i >= length)
2973 break;
2974 if (NILP (marker))
2975 {
2976 search_regs.start[i] = -1;
2977 list = XCDR (list);
2978 }
2979 else
2980 {
2981 Lisp_Object from;
2982 Lisp_Object m;
2983
2984 m = marker;
2985 if (MARKERP (marker))
2986 {
2987 if (XMARKER (marker)->buffer == 0)
2988 XSETFASTINT (marker, 0);
2989 else
2990 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2991 }
2992
2993 CHECK_FIXNUM_COERCE_MARKER (marker);
2994 from = marker;
2995
2996 if (!NILP (reseat) && MARKERP (m))
2997 {
2998 unchain_marker (XMARKER (m));
2999 XSETCAR (list, Qnil);
3000 }
3001
3002 if ((list = XCDR (list), !CONSP (list)))
3003 break;
3004
3005 m = marker = XCAR (list);
3006
3007 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3008 XSETFASTINT (marker, 0);
3009
3010 CHECK_FIXNUM_COERCE_MARKER (marker);
3011 if (PTRDIFF_MIN <= XFIXNUM (from) && XFIXNUM (from) <= PTRDIFF_MAX
3012 && PTRDIFF_MIN <= XFIXNUM (marker)
3013 && XFIXNUM (marker) <= PTRDIFF_MAX)
3014 {
3015 search_regs.start[i] = XFIXNUM (from);
3016 search_regs.end[i] = XFIXNUM (marker);
3017 }
3018 else
3019 {
3020 search_regs.start[i] = -1;
3021 }
3022
3023 if (!NILP (reseat) && MARKERP (m))
3024 {
3025 unchain_marker (XMARKER (m));
3026 XSETCAR (list, Qnil);
3027 }
3028 }
3029 list = XCDR (list);
3030 }
3031
3032 for (; i < search_regs.num_regs; i++)
3033 search_regs.start[i] = -1;
3034 }
3035
3036 return Qnil;
3037 }
3038
3039 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3040 if asynchronous code (filter or sentinel) is running. */
3041 static void
save_search_regs(void)3042 save_search_regs (void)
3043 {
3044 if (saved_search_regs.num_regs == 0)
3045 {
3046 saved_search_regs = search_regs;
3047 saved_last_thing_searched = last_thing_searched;
3048 last_thing_searched = Qnil;
3049 search_regs.num_regs = 0;
3050 search_regs.start = 0;
3051 search_regs.end = 0;
3052 }
3053 }
3054
3055 /* Called upon exit from filters and sentinels. */
3056 void
restore_search_regs(void)3057 restore_search_regs (void)
3058 {
3059 if (saved_search_regs.num_regs != 0)
3060 {
3061 if (search_regs.num_regs > 0)
3062 {
3063 xfree (search_regs.start);
3064 xfree (search_regs.end);
3065 }
3066 search_regs = saved_search_regs;
3067 last_thing_searched = saved_last_thing_searched;
3068 saved_last_thing_searched = Qnil;
3069 saved_search_regs.num_regs = 0;
3070 }
3071 }
3072
3073 /* Called from replace-match via replace_range. */
3074 void
update_search_regs(ptrdiff_t oldstart,ptrdiff_t oldend,ptrdiff_t newend)3075 update_search_regs (ptrdiff_t oldstart, ptrdiff_t oldend, ptrdiff_t newend)
3076 {
3077 /* Adjust search data for this change. */
3078 ptrdiff_t change = newend - oldend;
3079 ptrdiff_t i;
3080
3081 for (i = 0; i < search_regs.num_regs; i++)
3082 {
3083 if (search_regs.start[i] >= oldend)
3084 search_regs.start[i] += change;
3085 else if (search_regs.start[i] > oldstart)
3086 search_regs.start[i] = oldstart;
3087 if (search_regs.end[i] >= oldend)
3088 search_regs.end[i] += change;
3089 else if (search_regs.end[i] > oldstart)
3090 search_regs.end[i] = oldstart;
3091 }
3092 }
3093
3094 static void
unwind_set_match_data(Lisp_Object list)3095 unwind_set_match_data (Lisp_Object list)
3096 {
3097 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3098 Fset_match_data (list, Qt);
3099 }
3100
3101 /* Called to unwind protect the match data. */
3102 void
record_unwind_save_match_data(void)3103 record_unwind_save_match_data (void)
3104 {
3105 record_unwind_protect (unwind_set_match_data,
3106 Fmatch_data (Qnil, Qnil, Qnil));
3107 }
3108
3109 /* Quote a string to deactivate reg-expr chars */
3110
3111 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3112 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3113 (Lisp_Object string)
3114 {
3115 char *in, *out, *end;
3116 char *temp;
3117 ptrdiff_t backslashes_added = 0;
3118
3119 CHECK_STRING (string);
3120
3121 USE_SAFE_ALLOCA;
3122 SAFE_NALLOCA (temp, 2, SBYTES (string));
3123
3124 /* Now copy the data into the new string, inserting escapes. */
3125
3126 in = SSDATA (string);
3127 end = in + SBYTES (string);
3128 out = temp;
3129
3130 for (; in != end; in++)
3131 {
3132 if (*in == '['
3133 || *in == '*' || *in == '.' || *in == '\\'
3134 || *in == '?' || *in == '+'
3135 || *in == '^' || *in == '$')
3136 *out++ = '\\', backslashes_added++;
3137 *out++ = *in;
3138 }
3139
3140 Lisp_Object result
3141 = (backslashes_added > 0
3142 ? make_specified_string (temp,
3143 SCHARS (string) + backslashes_added,
3144 out - temp,
3145 STRING_MULTIBYTE (string))
3146 : string);
3147 SAFE_FREE ();
3148 return result;
3149 }
3150
3151 /* Like find_newline, but doesn't use the cache, and only searches forward. */
3152 static ptrdiff_t
find_newline1(ptrdiff_t start,ptrdiff_t start_byte,ptrdiff_t end,ptrdiff_t end_byte,ptrdiff_t count,ptrdiff_t * counted,ptrdiff_t * bytepos,bool allow_quit)3153 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3154 ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
3155 ptrdiff_t *bytepos, bool allow_quit)
3156 {
3157 if (count > 0)
3158 {
3159 if (!end)
3160 end = ZV, end_byte = ZV_BYTE;
3161 }
3162 else
3163 {
3164 if (!end)
3165 end = BEGV, end_byte = BEGV_BYTE;
3166 }
3167 if (end_byte == -1)
3168 end_byte = CHAR_TO_BYTE (end);
3169
3170 if (counted)
3171 *counted = count;
3172
3173 if (count > 0)
3174 while (start != end)
3175 {
3176 /* Our innermost scanning loop is very simple; it doesn't know
3177 about gaps, buffer ends, or the newline cache. ceiling is
3178 the position of the last character before the next such
3179 obstacle --- the last character the dumb search loop should
3180 examine. */
3181 ptrdiff_t tem, ceiling_byte = end_byte - 1;
3182
3183 if (start_byte == -1)
3184 start_byte = CHAR_TO_BYTE (start);
3185
3186 /* The dumb loop can only scan text stored in contiguous
3187 bytes. BUFFER_CEILING_OF returns the last character
3188 position that is contiguous, so the ceiling is the
3189 position after that. */
3190 tem = BUFFER_CEILING_OF (start_byte);
3191 ceiling_byte = min (tem, ceiling_byte);
3192
3193 {
3194 /* The termination address of the dumb loop. */
3195 unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3196 ptrdiff_t lim_byte = ceiling_byte + 1;
3197
3198 /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3199 of the base, the cursor, and the next line. */
3200 ptrdiff_t base = start_byte - lim_byte;
3201 ptrdiff_t cursor, next;
3202
3203 for (cursor = base; cursor < 0; cursor = next)
3204 {
3205 /* The dumb loop. */
3206 unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3207 next = nl ? nl - lim_addr : 0;
3208
3209 if (! nl)
3210 break;
3211 next++;
3212
3213 if (--count == 0)
3214 {
3215 if (bytepos)
3216 *bytepos = lim_byte + next;
3217 return BYTE_TO_CHAR (lim_byte + next);
3218 }
3219 if (allow_quit)
3220 maybe_quit ();
3221 }
3222
3223 start_byte = lim_byte;
3224 start = BYTE_TO_CHAR (start_byte);
3225 }
3226 }
3227
3228 if (counted)
3229 *counted -= count;
3230 if (bytepos)
3231 {
3232 *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3233 eassert (*bytepos == CHAR_TO_BYTE (start));
3234 }
3235 return start;
3236 }
3237
3238 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3239 0, 1, 0,
3240 doc: /* Check the newline cache of BUFFER against buffer contents.
3241
3242 BUFFER defaults to the current buffer.
3243
3244 Value is an array of 2 sub-arrays of buffer positions for newlines,
3245 the first based on the cache, the second based on actually scanning
3246 the buffer. If the buffer doesn't have a cache, the value is nil. */)
3247 (Lisp_Object buffer)
3248 {
3249 struct buffer *buf, *old = NULL;
3250 ptrdiff_t nl_count_cache, nl_count_buf;
3251 Lisp_Object cache_newlines, buf_newlines, val;
3252 ptrdiff_t from, found, i;
3253
3254 if (NILP (buffer))
3255 buf = current_buffer;
3256 else
3257 {
3258 CHECK_BUFFER (buffer);
3259 buf = XBUFFER (buffer);
3260 old = current_buffer;
3261 }
3262 if (buf->base_buffer)
3263 buf = buf->base_buffer;
3264
3265 /* If the buffer doesn't have a newline cache, return nil. */
3266 if (NILP (BVAR (buf, cache_long_scans))
3267 || buf->newline_cache == NULL)
3268 return Qnil;
3269
3270 /* find_newline can only work on the current buffer. */
3271 if (old != NULL)
3272 set_buffer_internal_1 (buf);
3273
3274 /* How many newlines are there according to the cache? */
3275 find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3276 TYPE_MAXIMUM (ptrdiff_t), &nl_count_cache, NULL, true);
3277
3278 /* Create vector and populate it. */
3279 cache_newlines = make_uninit_vector (nl_count_cache);
3280
3281 if (nl_count_cache)
3282 {
3283 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3284 {
3285 ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
3286
3287 found = find_newline (from, from_byte, 0, -1, 1, &counted,
3288 NULL, true);
3289 if (counted == 0 || i >= nl_count_cache)
3290 break;
3291 ASET (cache_newlines, i, make_fixnum (found - 1));
3292 }
3293 /* Fill the rest of slots with an invalid position. */
3294 for ( ; i < nl_count_cache; i++)
3295 ASET (cache_newlines, i, make_fixnum (-1));
3296 }
3297
3298 /* Now do the same, but without using the cache. */
3299 find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3300 TYPE_MAXIMUM (ptrdiff_t), &nl_count_buf, NULL, true);
3301 buf_newlines = make_uninit_vector (nl_count_buf);
3302 if (nl_count_buf)
3303 {
3304 for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3305 {
3306 ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
3307
3308 found = find_newline1 (from, from_byte, 0, -1, 1, &counted,
3309 NULL, true);
3310 if (counted == 0 || i >= nl_count_buf)
3311 break;
3312 ASET (buf_newlines, i, make_fixnum (found - 1));
3313 }
3314 for ( ; i < nl_count_buf; i++)
3315 ASET (buf_newlines, i, make_fixnum (-1));
3316 }
3317
3318 /* Construct the value and return it. */
3319 val = make_uninit_vector (2);
3320 ASET (val, 0, cache_newlines);
3321 ASET (val, 1, buf_newlines);
3322
3323 if (old != NULL)
3324 set_buffer_internal_1 (old);
3325 return val;
3326 }
3327
3328
3329 static void syms_of_search_for_pdumper (void);
3330
3331 void
syms_of_search(void)3332 syms_of_search (void)
3333 {
3334 for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
3335 {
3336 staticpro (&searchbufs[i].regexp);
3337 staticpro (&searchbufs[i].f_whitespace_regexp);
3338 staticpro (&searchbufs[i].syntax_table);
3339 }
3340
3341 /* Error condition used for failing searches. */
3342 DEFSYM (Qsearch_failed, "search-failed");
3343
3344 /* Error condition used for failing searches started by user, i.e.,
3345 where failure should not invoke the debugger. */
3346 DEFSYM (Quser_search_failed, "user-search-failed");
3347
3348 /* Error condition signaled when regexp compile_pattern fails. */
3349 DEFSYM (Qinvalid_regexp, "invalid-regexp");
3350
3351 Fput (Qsearch_failed, Qerror_conditions,
3352 pure_list (Qsearch_failed, Qerror));
3353 Fput (Qsearch_failed, Qerror_message,
3354 build_pure_c_string ("Search failed"));
3355
3356 Fput (Quser_search_failed, Qerror_conditions,
3357 pure_list (Quser_search_failed, Quser_error, Qsearch_failed, Qerror));
3358 Fput (Quser_search_failed, Qerror_message,
3359 build_pure_c_string ("Search failed"));
3360
3361 Fput (Qinvalid_regexp, Qerror_conditions,
3362 pure_list (Qinvalid_regexp, Qerror));
3363 Fput (Qinvalid_regexp, Qerror_message,
3364 build_pure_c_string ("Invalid regexp"));
3365
3366 re_match_object = Qnil;
3367 staticpro (&re_match_object);
3368
3369 DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3370 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3371 Some commands use this for user-specified regexps.
3372 Spaces that occur inside character classes or repetition operators
3373 or other such regexp constructs are not replaced with this.
3374 A value of nil (which is the normal value) means treat spaces
3375 literally. Note that a value with capturing groups can change the
3376 numbering of existing capture groups in unexpected ways. */);
3377 Vsearch_spaces_regexp = Qnil;
3378
3379 DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data");
3380 DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3381 doc: /* Internal use only.
3382 If non-nil, the primitive searching and matching functions
3383 such as `looking-at', `string-match', `re-search-forward', etc.,
3384 do not set the match data. The proper way to use this variable
3385 is to bind it with `let' around a small expression. */);
3386 Vinhibit_changing_match_data = Qnil;
3387
3388 defsubr (&Slooking_at);
3389 defsubr (&Sposix_looking_at);
3390 defsubr (&Sstring_match);
3391 defsubr (&Sposix_string_match);
3392 defsubr (&Ssearch_forward);
3393 defsubr (&Ssearch_backward);
3394 defsubr (&Sre_search_forward);
3395 defsubr (&Sre_search_backward);
3396 defsubr (&Sposix_search_forward);
3397 defsubr (&Sposix_search_backward);
3398 defsubr (&Sreplace_match);
3399 defsubr (&Smatch_beginning);
3400 defsubr (&Smatch_end);
3401 defsubr (&Smatch_data);
3402 defsubr (&Sset_match_data);
3403 defsubr (&Sregexp_quote);
3404 defsubr (&Snewline_cache_check);
3405
3406 pdumper_do_now_and_after_load (syms_of_search_for_pdumper);
3407 }
3408
3409 static void
syms_of_search_for_pdumper(void)3410 syms_of_search_for_pdumper (void)
3411 {
3412 for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
3413 {
3414 searchbufs[i].buf.allocated = 100;
3415 searchbufs[i].buf.buffer = xmalloc (100);
3416 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3417 searchbufs[i].regexp = Qnil;
3418 searchbufs[i].f_whitespace_regexp = Qnil;
3419 searchbufs[i].busy = false;
3420 searchbufs[i].syntax_table = Qnil;
3421 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3422 }
3423 searchbuf_head = &searchbufs[0];
3424 }
3425