1 /* String search routines for GNU Emacs.
2 
3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2021 Free Software
4 Foundation, Inc.
5 
6 This file is part of GNU Emacs.
7 
8 GNU Emacs is free software: you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation, either version 3 of the License, or (at
11 your option) any later version.
12 
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16 GNU General Public License for more details.
17 
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
20 
21 
22 #include <config.h>
23 
24 #include "lisp.h"
25 #include "character.h"
26 #include "buffer.h"
27 #include "syntax.h"
28 #include "charset.h"
29 #include "region-cache.h"
30 #include "blockinput.h"
31 #include "intervals.h"
32 #include "pdumper.h"
33 
34 #include "regex-emacs.h"
35 
36 #define REGEXP_CACHE_SIZE 20
37 
38 /* If the regexp is non-nil, then the buffer contains the compiled form
39    of that regexp, suitable for searching.  */
40 struct regexp_cache
41 {
42   struct regexp_cache *next;
43   Lisp_Object regexp, f_whitespace_regexp;
44   /* Syntax table for which the regexp applies.  We need this because
45      of character classes.  If this is t, then the compiled pattern is valid
46      for any syntax-table.  */
47   Lisp_Object syntax_table;
48   struct re_pattern_buffer buf;
49   char fastmap[0400];
50   /* True means regexp was compiled to do full POSIX backtracking.  */
51   bool posix;
52   /* True means we're inside a buffer match.  */
53   bool busy;
54 };
55 
56 /* The instances of that struct.  */
57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58 
59 /* The head of the linked list; points to the most recently used buffer.  */
60 static struct regexp_cache *searchbuf_head;
61 
62 static void set_search_regs (ptrdiff_t, ptrdiff_t);
63 static void save_search_regs (void);
64 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
65 				ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
66                                 ptrdiff_t, ptrdiff_t);
67 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
68                               Lisp_Object, Lisp_Object, ptrdiff_t,
69                               ptrdiff_t, int);
70 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
71                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
72                                 Lisp_Object, Lisp_Object, bool);
73 
74 Lisp_Object re_match_object;
75 
76 static AVOID
matcher_overflow(void)77 matcher_overflow (void)
78 {
79   error ("Stack overflow in regexp matcher");
80 }
81 
82 static void
freeze_buffer_relocation(void)83 freeze_buffer_relocation (void)
84 {
85 #ifdef REL_ALLOC
86   /* Prevent ralloc.c from relocating the current buffer while
87      searching it.  */
88   r_alloc_inhibit_buffer_relocation (1);
89   record_unwind_protect_int (r_alloc_inhibit_buffer_relocation, 0);
90 #endif
91 }
92 
93 /* Compile a regexp and signal a Lisp error if anything goes wrong.
94    PATTERN is the pattern to compile.
95    CP is the place to put the result.
96    TRANSLATE is a translation table for ignoring case, or nil for none.
97    POSIX is true if we want full backtracking (POSIX style) for this pattern.
98    False means backtrack only enough to get a valid match.
99 
100    The behavior also depends on Vsearch_spaces_regexp.  */
101 
102 static void
compile_pattern_1(struct regexp_cache * cp,Lisp_Object pattern,Lisp_Object translate,bool posix)103 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
104 		   Lisp_Object translate, bool posix)
105 {
106   const char *whitespace_regexp;
107   char *val;
108 
109   eassert (!cp->busy);
110   cp->regexp = Qnil;
111   cp->buf.translate = translate;
112   cp->posix = posix;
113   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
114   cp->buf.charset_unibyte = charset_unibyte;
115   if (STRINGP (Vsearch_spaces_regexp))
116     cp->f_whitespace_regexp = Vsearch_spaces_regexp;
117   else
118     cp->f_whitespace_regexp = Qnil;
119 
120   whitespace_regexp = STRINGP (Vsearch_spaces_regexp) ?
121     SSDATA (Vsearch_spaces_regexp) : NULL;
122 
123   val = (char *) re_compile_pattern (SSDATA (pattern), SBYTES (pattern),
124 				     posix, whitespace_regexp, &cp->buf);
125 
126   /* If the compiled pattern hard codes some of the contents of the
127      syntax-table, it can only be reused with *this* syntax table.  */
128   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
129 
130   if (val)
131     xsignal1 (Qinvalid_regexp, build_string (val));
132 
133   cp->regexp = Fcopy_sequence (pattern);
134 }
135 
136 /* Shrink each compiled regexp buffer in the cache
137    to the size actually used right now.
138    This is called from garbage collection.  */
139 
140 void
shrink_regexp_cache(void)141 shrink_regexp_cache (void)
142 {
143   struct regexp_cache *cp;
144 
145   for (cp = searchbuf_head; cp != 0; cp = cp->next)
146     if (!cp->busy)
147       {
148         cp->buf.allocated = cp->buf.used;
149         cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
150       }
151 }
152 
153 /* Clear the regexp cache w.r.t. a particular syntax table,
154    because it was changed.
155    There is no danger of memory leak here because re_compile_pattern
156    automagically manages the memory in each re_pattern_buffer struct,
157    based on its `allocated' and `buffer' values.  */
158 void
clear_regexp_cache(void)159 clear_regexp_cache (void)
160 {
161   int i;
162 
163   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
164     /* It's tempting to compare with the syntax-table we've actually changed,
165        but it's not sufficient because char-table inheritance means that
166        modifying one syntax-table can change others at the same time.  */
167     if (!searchbufs[i].busy && !EQ (searchbufs[i].syntax_table, Qt))
168       searchbufs[i].regexp = Qnil;
169 }
170 
171 static void
unfreeze_pattern(void * arg)172 unfreeze_pattern (void *arg)
173 {
174   struct regexp_cache *searchbuf = arg;
175   searchbuf->busy = false;
176 }
177 
178 static void
freeze_pattern(struct regexp_cache * searchbuf)179 freeze_pattern (struct regexp_cache *searchbuf)
180 {
181   eassert (!searchbuf->busy);
182   record_unwind_protect_ptr (unfreeze_pattern, searchbuf);
183   searchbuf->busy = true;
184 }
185 
186 /* Compile a regexp if necessary, but first check to see if there's one in
187    the cache.
188    PATTERN is the pattern to compile.
189    TRANSLATE is a translation table for ignoring case, or nil for none.
190    REGP is the structure that says where to store the "register"
191    values that will result from matching this pattern.
192    If it is 0, we should compile the pattern not to record any
193    subexpression bounds.
194    POSIX is true if we want full backtracking (POSIX style) for this pattern.
195    False means backtrack only enough to get a valid match.  */
196 
197 static struct regexp_cache *
compile_pattern(Lisp_Object pattern,struct re_registers * regp,Lisp_Object translate,bool posix,bool multibyte)198 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
199 		 Lisp_Object translate, bool posix, bool multibyte)
200 {
201   struct regexp_cache *cp, **cpp, **lru_nonbusy;
202 
203   for (cpp = &searchbuf_head, lru_nonbusy = NULL; ; cpp = &cp->next)
204     {
205       cp = *cpp;
206       if (!cp->busy)
207         lru_nonbusy = cpp;
208       /* Entries are initialized to nil, and may be set to nil by
209 	 compile_pattern_1 if the pattern isn't valid.  Don't apply
210 	 string accessors in those cases.  However, compile_pattern_1
211 	 is only applied to the cache entry we pick here to reuse.  So
212 	 nil should never appear before a non-nil entry.  */
213       if (NILP (cp->regexp))
214 	goto compile_it;
215       if (SCHARS (cp->regexp) == SCHARS (pattern)
216           && !cp->busy
217 	  && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
218 	  && !NILP (Fstring_equal (cp->regexp, pattern))
219 	  && EQ (cp->buf.translate, translate)
220 	  && cp->posix == posix
221 	  && (EQ (cp->syntax_table, Qt)
222 	      || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
223 	  && !NILP (Fequal (cp->f_whitespace_regexp, Vsearch_spaces_regexp))
224 	  && cp->buf.charset_unibyte == charset_unibyte)
225 	break;
226 
227       /* If we're at the end of the cache, compile into the last
228 	 (least recently used) non-busy cell in the cache.  */
229       if (cp->next == 0)
230 	{
231           if (!lru_nonbusy)
232             error ("Too much matching reentrancy");
233           cpp = lru_nonbusy;
234           cp = *cpp;
235 	compile_it:
236           eassert (!cp->busy);
237 	  compile_pattern_1 (cp, pattern, translate, posix);
238 	  break;
239 	}
240     }
241 
242   /* When we get here, cp (aka *cpp) contains the compiled pattern,
243      either because we found it in the cache or because we just compiled it.
244      Move it to the front of the queue to mark it as most recently used.  */
245   *cpp = cp->next;
246   cp->next = searchbuf_head;
247   searchbuf_head = cp;
248 
249   /* Advise the searching functions about the space we have allocated
250      for register data.  */
251   if (regp)
252     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
253 
254   /* The compiled pattern can be used both for multibyte and unibyte
255      target.  But, we have to tell which the pattern is used for. */
256   cp->buf.target_multibyte = multibyte;
257   return cp;
258 }
259 
260 
261 static Lisp_Object
looking_at_1(Lisp_Object string,bool posix)262 looking_at_1 (Lisp_Object string, bool posix)
263 {
264   Lisp_Object val;
265   unsigned char *p1, *p2;
266   ptrdiff_t s1, s2;
267   register ptrdiff_t i;
268 
269   if (running_asynch_code)
270     save_search_regs ();
271 
272   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
273      table.  */
274   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
275 			 BVAR (current_buffer, case_eqv_table));
276 
277   CHECK_STRING (string);
278 
279   /* Snapshot in case Lisp changes the value.  */
280   bool preserve_match_data = NILP (Vinhibit_changing_match_data);
281 
282   struct regexp_cache *cache_entry = compile_pattern (
283     string,
284     preserve_match_data ? &search_regs : NULL,
285     (!NILP (BVAR (current_buffer, case_fold_search))
286      ? BVAR (current_buffer, case_canon_table) : Qnil),
287     posix,
288     !NILP (BVAR (current_buffer, enable_multibyte_characters)));
289 
290   /* Do a pending quit right away, to avoid paradoxical behavior */
291   maybe_quit ();
292 
293   /* Get pointers and sizes of the two strings
294      that make up the visible portion of the buffer. */
295 
296   p1 = BEGV_ADDR;
297   s1 = GPT_BYTE - BEGV_BYTE;
298   p2 = GAP_END_ADDR;
299   s2 = ZV_BYTE - GPT_BYTE;
300   if (s1 < 0)
301     {
302       p2 = p1;
303       s2 = ZV_BYTE - BEGV_BYTE;
304       s1 = 0;
305     }
306   if (s2 < 0)
307     {
308       s1 = ZV_BYTE - BEGV_BYTE;
309       s2 = 0;
310     }
311 
312   ptrdiff_t count = SPECPDL_INDEX ();
313   freeze_buffer_relocation ();
314   freeze_pattern (cache_entry);
315   re_match_object = Qnil;
316   i = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
317 		  PT_BYTE - BEGV_BYTE,
318 		  preserve_match_data ? &search_regs : NULL,
319 		  ZV_BYTE - BEGV_BYTE);
320 
321   if (i == -2)
322     {
323       unbind_to (count, Qnil);
324       matcher_overflow ();
325     }
326 
327   val = (i >= 0 ? Qt : Qnil);
328   if (preserve_match_data && i >= 0)
329   {
330     for (i = 0; i < search_regs.num_regs; i++)
331       if (search_regs.start[i] >= 0)
332 	{
333 	  search_regs.start[i]
334 	    = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
335          search_regs.end[i]
336            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
337        }
338     /* Set last_thing_searched only when match data is changed.  */
339     XSETBUFFER (last_thing_searched, current_buffer);
340   }
341 
342   return unbind_to (count, val);
343 }
344 
345 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
346        doc: /* Return t if text after point matches regular expression REGEXP.
347 This function modifies the match data that `match-beginning',
348 `match-end' and `match-data' access; save and restore the match
349 data if you want to preserve them.  */)
350   (Lisp_Object regexp)
351 {
352   return looking_at_1 (regexp, 0);
353 }
354 
355 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
356        doc: /* Return t if text after point matches regular expression REGEXP.
357 Find the longest match, in accord with Posix regular expression rules.
358 This function modifies the match data that `match-beginning',
359 `match-end' and `match-data' access; save and restore the match
360 data if you want to preserve them.  */)
361   (Lisp_Object regexp)
362 {
363   return looking_at_1 (regexp, 1);
364 }
365 
366 static Lisp_Object
string_match_1(Lisp_Object regexp,Lisp_Object string,Lisp_Object start,bool posix)367 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
368 		bool posix)
369 {
370   ptrdiff_t val;
371   struct re_pattern_buffer *bufp;
372   EMACS_INT pos;
373   ptrdiff_t pos_byte, i;
374 
375   if (running_asynch_code)
376     save_search_regs ();
377 
378   CHECK_STRING (regexp);
379   CHECK_STRING (string);
380 
381   if (NILP (start))
382     pos = 0, pos_byte = 0;
383   else
384     {
385       ptrdiff_t len = SCHARS (string);
386 
387       CHECK_FIXNUM (start);
388       pos = XFIXNUM (start);
389       if (pos < 0 && -pos <= len)
390 	pos = len + pos;
391       else if (0 > pos || pos > len)
392 	args_out_of_range (string, start);
393       pos_byte = string_char_to_byte (string, pos);
394     }
395 
396   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
397      table.  */
398   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
399 			 BVAR (current_buffer, case_eqv_table));
400 
401   bufp = &compile_pattern (regexp,
402                            (NILP (Vinhibit_changing_match_data)
403                             ? &search_regs : NULL),
404                            (!NILP (BVAR (current_buffer, case_fold_search))
405                             ? BVAR (current_buffer, case_canon_table) : Qnil),
406                            posix,
407                            STRING_MULTIBYTE (string))->buf;
408   re_match_object = string;
409   val = re_search (bufp, SSDATA (string),
410 		   SBYTES (string), pos_byte,
411 		   SBYTES (string) - pos_byte,
412 		   (NILP (Vinhibit_changing_match_data)
413 		    ? &search_regs : NULL));
414 
415   /* Set last_thing_searched only when match data is changed.  */
416   if (NILP (Vinhibit_changing_match_data))
417     last_thing_searched = Qt;
418 
419   if (val == -2)
420     matcher_overflow ();
421   if (val < 0) return Qnil;
422 
423   if (NILP (Vinhibit_changing_match_data))
424     for (i = 0; i < search_regs.num_regs; i++)
425       if (search_regs.start[i] >= 0)
426 	{
427 	  search_regs.start[i]
428 	    = string_byte_to_char (string, search_regs.start[i]);
429 	  search_regs.end[i]
430 	    = string_byte_to_char (string, search_regs.end[i]);
431 	}
432 
433   return make_fixnum (string_byte_to_char (string, val));
434 }
435 
436 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
437        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
438 Matching ignores case if `case-fold-search' is non-nil.
439 If third arg START is non-nil, start search at that index in STRING.
440 For index of first char beyond the match, do (match-end 0).
441 `match-end' and `match-beginning' also give indices of substrings
442 matched by parenthesis constructs in the pattern.
443 
444 You can use the function `match-string' to extract the substrings
445 matched by the parenthesis constructions in REGEXP. */)
446   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
447 {
448   return string_match_1 (regexp, string, start, 0);
449 }
450 
451 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
452        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
453 Find the longest match, in accord with Posix regular expression rules.
454 Case is ignored if `case-fold-search' is non-nil in the current buffer.
455 If third arg START is non-nil, start search at that index in STRING.
456 For index of first char beyond the match, do (match-end 0).
457 `match-end' and `match-beginning' also give indices of substrings
458 matched by parenthesis constructs in the pattern.  */)
459   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
460 {
461   return string_match_1 (regexp, string, start, 1);
462 }
463 
464 /* Match REGEXP against STRING using translation table TABLE,
465    searching all of STRING, and return the index of the match,
466    or negative on failure.  This does not clobber the match data.  */
467 
468 ptrdiff_t
fast_string_match_internal(Lisp_Object regexp,Lisp_Object string,Lisp_Object table)469 fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
470 			    Lisp_Object table)
471 {
472   ptrdiff_t val;
473   struct re_pattern_buffer *bufp;
474 
475   bufp = &compile_pattern (regexp, 0, table,
476                            0, STRING_MULTIBYTE (string))->buf;
477   re_match_object = string;
478   val = re_search (bufp, SSDATA (string),
479 		   SBYTES (string), 0,
480 		   SBYTES (string), 0);
481   return val;
482 }
483 
484 /* Match REGEXP against STRING, searching all of STRING ignoring case,
485    and return the index of the match, or negative on failure.
486    This does not clobber the match data.
487    We assume that STRING contains single-byte characters.  */
488 
489 ptrdiff_t
fast_c_string_match_ignore_case(Lisp_Object regexp,const char * string,ptrdiff_t len)490 fast_c_string_match_ignore_case (Lisp_Object regexp,
491 				 const char *string, ptrdiff_t len)
492 {
493   ptrdiff_t val;
494   struct re_pattern_buffer *bufp;
495 
496   regexp = string_make_unibyte (regexp);
497   bufp = &compile_pattern (regexp, 0,
498                            Vascii_canon_table, 0,
499                            0)->buf;
500   re_match_object = Qt;
501   val = re_search (bufp, string, len, 0, len, 0);
502   return val;
503 }
504 
505 /* Match REGEXP against the characters after POS to LIMIT, and return
506    the number of matched characters.  If STRING is non-nil, match
507    against the characters in it.  In that case, POS and LIMIT are
508    indices into the string.  This function doesn't modify the match
509    data.  */
510 
511 ptrdiff_t
fast_looking_at(Lisp_Object regexp,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t limit,ptrdiff_t limit_byte,Lisp_Object string)512 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
513 		 ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
514 {
515   bool multibyte;
516   unsigned char *p1, *p2;
517   ptrdiff_t s1, s2;
518   ptrdiff_t len;
519 
520   if (STRINGP (string))
521     {
522       if (pos_byte < 0)
523 	pos_byte = string_char_to_byte (string, pos);
524       if (limit_byte < 0)
525 	limit_byte = string_char_to_byte (string, limit);
526       p1 = NULL;
527       s1 = 0;
528       p2 = SDATA (string);
529       s2 = SBYTES (string);
530       multibyte = STRING_MULTIBYTE (string);
531     }
532   else
533     {
534       if (pos_byte < 0)
535 	pos_byte = CHAR_TO_BYTE (pos);
536       if (limit_byte < 0)
537 	limit_byte = CHAR_TO_BYTE (limit);
538       pos_byte -= BEGV_BYTE;
539       limit_byte -= BEGV_BYTE;
540       p1 = BEGV_ADDR;
541       s1 = GPT_BYTE - BEGV_BYTE;
542       p2 = GAP_END_ADDR;
543       s2 = ZV_BYTE - GPT_BYTE;
544       if (s1 < 0)
545 	{
546 	  p2 = p1;
547 	  s2 = ZV_BYTE - BEGV_BYTE;
548 	  s1 = 0;
549 	}
550       if (s2 < 0)
551 	{
552 	  s1 = ZV_BYTE - BEGV_BYTE;
553 	  s2 = 0;
554 	}
555       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
556     }
557 
558   struct regexp_cache *cache_entry =
559     compile_pattern (regexp, 0, Qnil, 0, multibyte);
560   ptrdiff_t count = SPECPDL_INDEX ();
561   freeze_buffer_relocation ();
562   freeze_pattern (cache_entry);
563   re_match_object = STRINGP (string) ? string : Qnil;
564   len = re_match_2 (&cache_entry->buf, (char *) p1, s1, (char *) p2, s2,
565 		    pos_byte, NULL, limit_byte);
566 
567   unbind_to (count, Qnil);
568   return len;
569 }
570 
571 
572 /* The newline cache: remembering which sections of text have no newlines.  */
573 
574 /* If the user has requested the long scans caching, make sure it's on.
575    Otherwise, make sure it's off.
576    This is our cheezy way of associating an action with the change of
577    state of a buffer-local variable.  */
578 static struct region_cache *
newline_cache_on_off(struct buffer * buf)579 newline_cache_on_off (struct buffer *buf)
580 {
581   struct buffer *base_buf = buf;
582   bool indirect_p = false;
583 
584   if (buf->base_buffer)
585     {
586       base_buf = buf->base_buffer;
587       indirect_p = true;
588     }
589 
590   /* Don't turn on or off the cache in the base buffer, if the value
591      of cache-long-scans of the base buffer is inconsistent with that.
592      This is because doing so will just make the cache pure overhead,
593      since if we turn it on via indirect buffer, it will be
594      immediately turned off by its base buffer.  */
595   if (NILP (BVAR (buf, cache_long_scans)))
596     {
597       if (!indirect_p
598 	  || NILP (BVAR (base_buf, cache_long_scans)))
599 	{
600 	  /* It should be off.  */
601 	  if (base_buf->newline_cache)
602 	    {
603 	      free_region_cache (base_buf->newline_cache);
604 	      base_buf->newline_cache = 0;
605 	    }
606 	}
607       return NULL;
608     }
609   else
610     {
611       if (!indirect_p
612 	  || !NILP (BVAR (base_buf, cache_long_scans)))
613 	{
614 	  /* It should be on.  */
615 	  if (base_buf->newline_cache == 0)
616 	    base_buf->newline_cache = new_region_cache ();
617 	}
618       return base_buf->newline_cache;
619     }
620 }
621 
622 
623 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
624 
625    If COUNT is positive, search forwards; END must be >= START.
626    If COUNT is negative, search backwards for the -COUNTth instance;
627       END must be <= START.
628    If COUNT is zero, do anything you please; run rogue, for all I care.
629 
630    If END is zero, use BEGV or ZV instead, as appropriate for the
631    direction indicated by COUNT.  If START_BYTE is -1 it is unknown,
632    and similarly for END_BYTE.
633 
634    If we find COUNT instances, set *COUNTED to COUNT, and return the
635    position past the COUNTth match.  Note that for reverse motion
636    this is not the same as the usual convention for Emacs motion commands.
637 
638    If we don't find COUNT instances before reaching END, set *COUNTED
639    to the number of newlines left found (negated if COUNT is negative),
640    and return END.
641 
642    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
643    to the returned character position.
644 
645    If ALLOW_QUIT, check for quitting.  That's good to do
646    except when inside redisplay.  */
647 
648 ptrdiff_t
find_newline(ptrdiff_t start,ptrdiff_t start_byte,ptrdiff_t end,ptrdiff_t end_byte,ptrdiff_t count,ptrdiff_t * counted,ptrdiff_t * bytepos,bool allow_quit)649 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
650 	      ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
651 	      ptrdiff_t *bytepos, bool allow_quit)
652 {
653   struct region_cache *newline_cache;
654   struct buffer *cache_buffer;
655 
656   if (!end)
657     {
658       if (count > 0)
659 	end = ZV, end_byte = ZV_BYTE;
660       else
661 	end = BEGV, end_byte = BEGV_BYTE;
662     }
663   if (end_byte == -1)
664     end_byte = CHAR_TO_BYTE (end);
665 
666   newline_cache = newline_cache_on_off (current_buffer);
667   if (current_buffer->base_buffer)
668     cache_buffer = current_buffer->base_buffer;
669   else
670     cache_buffer = current_buffer;
671 
672   if (counted)
673     *counted = count;
674 
675   if (count > 0)
676     while (start != end)
677       {
678         /* Our innermost scanning loop is very simple; it doesn't know
679            about gaps, buffer ends, or the newline cache.  ceiling is
680            the position of the last character before the next such
681            obstacle --- the last character the dumb search loop should
682            examine.  */
683 	ptrdiff_t tem, ceiling_byte = end_byte - 1;
684 
685         /* If we're using the newline cache, consult it to see whether
686            we can avoid some scanning.  */
687         if (newline_cache)
688           {
689             ptrdiff_t next_change;
690 	    int result = 1;
691 
692             while (start < end && result)
693 	      {
694 		ptrdiff_t lim1;
695 
696 		result = region_cache_forward (cache_buffer, newline_cache,
697 					       start, &next_change);
698 		if (result)
699 		  {
700 		    /* When the cache revalidation is deferred,
701 		       next-change might point beyond ZV, which will
702 		       cause assertion violation in CHAR_TO_BYTE below.
703 		       Limit next_change to ZV to avoid that.  */
704 		    if (next_change > ZV)
705 		      next_change = ZV;
706 		    start = next_change;
707 		    lim1 = next_change = end;
708 		  }
709 		else
710 		  lim1 = min (next_change, end);
711 
712 		/* The cache returned zero for this region; see if
713 		   this is because the region is known and includes
714 		   only newlines.  While at that, count any newlines
715 		   we bump into, and exit if we found enough off them.  */
716 		start_byte = CHAR_TO_BYTE (start);
717 		while (start < lim1
718 		       && FETCH_BYTE (start_byte) == '\n')
719 		  {
720 		    start_byte++;
721 		    start++;
722 		    if (--count == 0)
723 		      {
724 			if (bytepos)
725 			  *bytepos = start_byte;
726 			return start;
727 		      }
728 		  }
729 		/* If we found a non-newline character before hitting
730 		   position where the cache will again return non-zero
731 		   (i.e. no newlines beyond that position), it means
732 		   this region is not yet known to the cache, and we
733 		   must resort to the "dumb loop" method.  */
734 		if (start < next_change && !result)
735 		  break;
736 		result = 1;
737 	      }
738 	    if (start >= end)
739 	      {
740 		start = end;
741 		start_byte = end_byte;
742 		break;
743 	      }
744 
745             /* START should never be after END.  */
746             if (start_byte > ceiling_byte)
747               start_byte = ceiling_byte;
748 
749             /* Now the text after start is an unknown region, and
750                next_change is the position of the next known region. */
751             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
752           }
753 	else if (start_byte == -1)
754 	  start_byte = CHAR_TO_BYTE (start);
755 
756         /* The dumb loop can only scan text stored in contiguous
757            bytes. BUFFER_CEILING_OF returns the last character
758            position that is contiguous, so the ceiling is the
759            position after that.  */
760 	tem = BUFFER_CEILING_OF (start_byte);
761 	ceiling_byte = min (tem, ceiling_byte);
762 
763         {
764           /* The termination address of the dumb loop.  */
765 	  unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
766 	  ptrdiff_t lim_byte = ceiling_byte + 1;
767 
768 	  /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
769 	     of the base, the cursor, and the next line.  */
770 	  ptrdiff_t base = start_byte - lim_byte;
771 	  ptrdiff_t cursor, next;
772 
773 	  for (cursor = base; cursor < 0; cursor = next)
774 	    {
775               /* The dumb loop.  */
776 	      unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
777 	      next = nl ? nl - lim_addr : 0;
778 
779               /* If we're using the newline cache, cache the fact that
780                  the region we just traversed is free of newlines. */
781               if (newline_cache && cursor != next)
782 		{
783 		  know_region_cache (cache_buffer, newline_cache,
784 				     BYTE_TO_CHAR (lim_byte + cursor),
785 				     BYTE_TO_CHAR (lim_byte + next));
786 		  /* know_region_cache can relocate buffer text.  */
787 		  lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
788 		}
789 
790               if (! nl)
791 		break;
792 	      next++;
793 
794 	      if (--count == 0)
795 		{
796 		  if (bytepos)
797 		    *bytepos = lim_byte + next;
798 		  return BYTE_TO_CHAR (lim_byte + next);
799 		}
800 	      if (allow_quit)
801 		maybe_quit ();
802             }
803 
804 	  start_byte = lim_byte;
805 	  start = BYTE_TO_CHAR (start_byte);
806         }
807       }
808   else
809     while (start > end)
810       {
811         /* The last character to check before the next obstacle.  */
812 	ptrdiff_t tem, ceiling_byte = end_byte;
813 
814         /* Consult the newline cache, if appropriate.  */
815         if (newline_cache)
816           {
817             ptrdiff_t next_change;
818 	    int result = 1;
819 
820             while (start > end && result)
821 	      {
822 		ptrdiff_t lim1;
823 
824 		result = region_cache_backward (cache_buffer, newline_cache,
825 						start, &next_change);
826 		if (result)
827 		  {
828 		    start = next_change;
829 		    lim1 = next_change = end;
830 		  }
831 		else
832 		  lim1 = max (next_change, end);
833 		start_byte = CHAR_TO_BYTE (start);
834 		while (start > lim1
835 		       && FETCH_BYTE (start_byte - 1) == '\n')
836 		  {
837 		    if (++count == 0)
838 		      {
839 			if (bytepos)
840 			  *bytepos = start_byte;
841 			return start;
842 		      }
843 		    start_byte--;
844 		    start--;
845 		  }
846 		if (start > next_change && !result)
847 		  break;
848 		result = 1;
849 	      }
850 	    if (start <= end)
851 	      {
852 		start = end;
853 		start_byte = end_byte;
854 		break;
855 	      }
856 
857             /* Start should never be at or before end.  */
858             if (start_byte <= ceiling_byte)
859               start_byte = ceiling_byte + 1;
860 
861             /* Now the text before start is an unknown region, and
862                next_change is the position of the next known region. */
863             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
864           }
865 	else if (start_byte == -1)
866 	  start_byte = CHAR_TO_BYTE (start);
867 
868         /* Stop scanning before the gap.  */
869 	tem = BUFFER_FLOOR_OF (start_byte - 1);
870 	ceiling_byte = max (tem, ceiling_byte);
871 
872         {
873           /* The termination address of the dumb loop.  */
874 	  unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
875 
876 	  /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
877 	     the base, the cursor, and the previous line.  These
878 	     offsets are at least -1.  */
879 	  ptrdiff_t base = start_byte - ceiling_byte;
880 	  ptrdiff_t cursor, prev;
881 
882 	  for (cursor = base; 0 < cursor; cursor = prev)
883             {
884 	      unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
885 	      prev = nl ? nl - ceiling_addr : -1;
886 
887               /* If we're looking for newlines, cache the fact that
888                  this line's region is free of them. */
889               if (newline_cache && cursor != prev + 1)
890 		{
891 		  know_region_cache (cache_buffer, newline_cache,
892 				     BYTE_TO_CHAR (ceiling_byte + prev + 1),
893 				     BYTE_TO_CHAR (ceiling_byte + cursor));
894 		  /* know_region_cache can relocate buffer text.  */
895 		  ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
896 		}
897 
898               if (! nl)
899 		break;
900 
901 	      if (++count >= 0)
902 		{
903 		  if (bytepos)
904 		    *bytepos = ceiling_byte + prev + 1;
905 		  return BYTE_TO_CHAR (ceiling_byte + prev + 1);
906 		}
907 	      if (allow_quit)
908 		maybe_quit ();
909             }
910 
911 	  start_byte = ceiling_byte;
912 	  start = BYTE_TO_CHAR (start_byte);
913         }
914       }
915 
916   if (counted)
917     *counted -= count;
918   if (bytepos)
919     {
920       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
921       eassert (*bytepos == CHAR_TO_BYTE (start));
922     }
923   return start;
924 }
925 
926 /* Search for COUNT instances of a line boundary.
927    Start at START.  If COUNT is negative, search backwards.
928 
929    We report the resulting position by calling TEMP_SET_PT_BOTH.
930 
931    If we find COUNT instances. we position after (always after,
932    even if scanning backwards) the COUNTth match.
933 
934    If we don't find COUNT instances before reaching the end of the
935    buffer (or the beginning, if scanning backwards), we position at
936    the limit we bumped up against.
937 
938    If ALLOW_QUIT, check for quitting.  That's good to do
939    except in special cases.  */
940 
941 void
scan_newline(ptrdiff_t start,ptrdiff_t start_byte,ptrdiff_t limit,ptrdiff_t limit_byte,ptrdiff_t count,bool allow_quit)942 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
943 	      ptrdiff_t limit, ptrdiff_t limit_byte,
944 	      ptrdiff_t count, bool allow_quit)
945 {
946   ptrdiff_t charpos, bytepos, counted;
947 
948   charpos = find_newline (start, start_byte, limit, limit_byte,
949 			  count, &counted, &bytepos, allow_quit);
950   if (counted != count)
951     TEMP_SET_PT_BOTH (limit, limit_byte);
952   else
953     TEMP_SET_PT_BOTH (charpos, bytepos);
954 }
955 
956 /* Like above, but always scan from point and report the
957    resulting position in *CHARPOS and *BYTEPOS.  */
958 
959 ptrdiff_t
scan_newline_from_point(ptrdiff_t count,ptrdiff_t * charpos,ptrdiff_t * bytepos)960 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
961 			 ptrdiff_t *bytepos)
962 {
963   ptrdiff_t counted;
964 
965   if (count <= 0)
966     *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
967 			     &counted, bytepos, 1);
968   else
969     *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
970 			     &counted, bytepos, 1);
971   return counted;
972 }
973 
974 /* Like find_newline, but doesn't allow QUITting and doesn't return
975    COUNTED.  */
976 ptrdiff_t
find_newline_no_quit(ptrdiff_t from,ptrdiff_t frombyte,ptrdiff_t cnt,ptrdiff_t * bytepos)977 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
978 		      ptrdiff_t cnt, ptrdiff_t *bytepos)
979 {
980   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
981 }
982 
983 /* Like find_newline, but returns position before the newline, not
984    after, and only search up to TO.
985    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
986 
987 ptrdiff_t
find_before_next_newline(ptrdiff_t from,ptrdiff_t to,ptrdiff_t cnt,ptrdiff_t * bytepos)988 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
989 			  ptrdiff_t cnt, ptrdiff_t *bytepos)
990 {
991   ptrdiff_t counted;
992   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &counted, bytepos, 1);
993 
994   if (counted == cnt)
995     {
996       if (bytepos)
997 	DEC_BOTH (pos, *bytepos);
998       else
999 	pos--;
1000     }
1001   return pos;
1002 }
1003 
1004 /* Subroutines of Lisp buffer search functions. */
1005 
1006 static Lisp_Object
search_command(Lisp_Object string,Lisp_Object bound,Lisp_Object noerror,Lisp_Object count,int direction,int RE,bool posix)1007 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1008 		Lisp_Object count, int direction, int RE, bool posix)
1009 {
1010   EMACS_INT np;
1011   EMACS_INT lim;
1012   ptrdiff_t lim_byte;
1013   EMACS_INT n = direction;
1014 
1015   if (!NILP (count))
1016     {
1017       CHECK_FIXNUM (count);
1018       n *= XFIXNUM (count);
1019     }
1020 
1021   CHECK_STRING (string);
1022   if (NILP (bound))
1023     {
1024       if (n > 0)
1025 	lim = ZV, lim_byte = ZV_BYTE;
1026       else
1027 	lim = BEGV, lim_byte = BEGV_BYTE;
1028     }
1029   else
1030     {
1031       CHECK_FIXNUM_COERCE_MARKER (bound);
1032       lim = XFIXNUM (bound);
1033       if (n > 0 ? lim < PT : lim > PT)
1034 	error ("Invalid search bound (wrong side of point)");
1035       if (lim > ZV)
1036 	lim = ZV, lim_byte = ZV_BYTE;
1037       else if (lim < BEGV)
1038 	lim = BEGV, lim_byte = BEGV_BYTE;
1039       else
1040 	lim_byte = CHAR_TO_BYTE (lim);
1041     }
1042 
1043   /* This is so set_image_of_range_1 in regex-emacs.c can find the EQV
1044      table.  */
1045   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1046 			 BVAR (current_buffer, case_eqv_table));
1047 
1048   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1049 		      (!NILP (BVAR (current_buffer, case_fold_search))
1050 		       ? BVAR (current_buffer, case_canon_table)
1051 		       : Qnil),
1052 		      (!NILP (BVAR (current_buffer, case_fold_search))
1053 		       ? BVAR (current_buffer, case_eqv_table)
1054 		       : Qnil),
1055 		      posix);
1056   if (np <= 0)
1057     {
1058       if (NILP (noerror))
1059 	xsignal1 (Qsearch_failed, string);
1060 
1061       if (!EQ (noerror, Qt))
1062 	{
1063 	  eassert (BEGV <= lim && lim <= ZV);
1064 	  SET_PT_BOTH (lim, lim_byte);
1065 	  return Qnil;
1066 #if 0 /* This would be clean, but maybe programs depend on
1067 	 a value of nil here.  */
1068 	  np = lim;
1069 #endif
1070 	}
1071       else
1072 	return Qnil;
1073     }
1074 
1075   eassert (BEGV <= np && np <= ZV);
1076   SET_PT (np);
1077 
1078   return make_fixnum (np);
1079 }
1080 
1081 /* Return true if REGEXP it matches just one constant string.  */
1082 
1083 static bool
trivial_regexp_p(Lisp_Object regexp)1084 trivial_regexp_p (Lisp_Object regexp)
1085 {
1086   ptrdiff_t len = SBYTES (regexp);
1087   unsigned char *s = SDATA (regexp);
1088   while (--len >= 0)
1089     {
1090       switch (*s++)
1091 	{
1092 	case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1093 	  return 0;
1094 	case '\\':
1095 	  if (--len < 0)
1096 	    return 0;
1097 	  switch (*s++)
1098 	    {
1099 	    case '|': case '(': case ')': case '`': case '\'': case 'b':
1100 	    case 'B': case '<': case '>': case 'w': case 'W': case 's':
1101 	    case 'S': case '=': case '{': case '}': case '_':
1102 	    case 'c': case 'C':	/* for categoryspec and notcategoryspec */
1103 	    case '1': case '2': case '3': case '4': case '5':
1104 	    case '6': case '7': case '8': case '9':
1105 	      return 0;
1106 	    }
1107 	}
1108     }
1109   return 1;
1110 }
1111 
1112 /* Search for the n'th occurrence of STRING in the current buffer,
1113    starting at position POS and stopping at position LIM,
1114    treating STRING as a literal string if RE is false or as
1115    a regular expression if RE is true.
1116 
1117    If N is positive, searching is forward and LIM must be greater than POS.
1118    If N is negative, searching is backward and LIM must be less than POS.
1119 
1120    Returns -x if x occurrences remain to be found (x > 0),
1121    or else the position at the beginning of the Nth occurrence
1122    (if searching backward) or the end (if searching forward).
1123 
1124    POSIX is nonzero if we want full backtracking (POSIX style)
1125    for this pattern.  0 means backtrack only enough to get a valid match.  */
1126 
1127 #define TRANSLATE(out, trt, d)			\
1128 do						\
1129   {						\
1130     if (! NILP (trt))				\
1131       {						\
1132 	Lisp_Object temp;			\
1133 	temp = Faref (trt, make_fixnum (d));	\
1134 	if (FIXNUMP (temp))			\
1135 	  out = XFIXNUM (temp);			\
1136 	else					\
1137 	  out = d;				\
1138       }						\
1139     else					\
1140       out = d;					\
1141   }						\
1142 while (0)
1143 
1144 /* Only used in search_buffer, to record the end position of the match
1145    when searching regexps and SEARCH_REGS should not be changed
1146    (i.e. Vinhibit_changing_match_data is non-nil).  */
1147 static struct re_registers search_regs_1;
1148 
1149 static EMACS_INT
search_buffer_re(Lisp_Object string,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t lim,ptrdiff_t lim_byte,EMACS_INT n,Lisp_Object trt,Lisp_Object inverse_trt,bool posix)1150 search_buffer_re (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1151                   ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1152                   Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1153 {
1154   unsigned char *p1, *p2;
1155   ptrdiff_t s1, s2;
1156 
1157   /* Snapshot in case Lisp changes the value.  */
1158   bool preserve_match_data = NILP (Vinhibit_changing_match_data);
1159 
1160   struct regexp_cache *cache_entry =
1161     compile_pattern (string,
1162                      preserve_match_data ? &search_regs : &search_regs_1,
1163                      trt, posix,
1164                      !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1165   struct re_pattern_buffer *bufp = &cache_entry->buf;
1166 
1167   maybe_quit ();		/* Do a pending quit right away,
1168 				   to avoid paradoxical behavior */
1169   /* Get pointers and sizes of the two strings
1170      that make up the visible portion of the buffer. */
1171 
1172   p1 = BEGV_ADDR;
1173   s1 = GPT_BYTE - BEGV_BYTE;
1174   p2 = GAP_END_ADDR;
1175   s2 = ZV_BYTE - GPT_BYTE;
1176   if (s1 < 0)
1177     {
1178       p2 = p1;
1179       s2 = ZV_BYTE - BEGV_BYTE;
1180       s1 = 0;
1181     }
1182   if (s2 < 0)
1183     {
1184       s1 = ZV_BYTE - BEGV_BYTE;
1185       s2 = 0;
1186     }
1187 
1188   ptrdiff_t count = SPECPDL_INDEX ();
1189   freeze_buffer_relocation ();
1190   freeze_pattern (cache_entry);
1191 
1192   while (n < 0)
1193     {
1194       ptrdiff_t val;
1195 
1196       re_match_object = Qnil;
1197       val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1198                          pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1199                          preserve_match_data ? &search_regs : &search_regs_1,
1200                          /* Don't allow match past current point */
1201                          pos_byte - BEGV_BYTE);
1202       if (val == -2)
1203         {
1204           unbind_to (count, Qnil);
1205           matcher_overflow ();
1206         }
1207       if (val >= 0)
1208         {
1209           if (preserve_match_data)
1210             {
1211               pos_byte = search_regs.start[0] + BEGV_BYTE;
1212               for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1213                 if (search_regs.start[i] >= 0)
1214                   {
1215                     search_regs.start[i]
1216                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1217                     search_regs.end[i]
1218                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1219                   }
1220               XSETBUFFER (last_thing_searched, current_buffer);
1221               /* Set pos to the new position. */
1222               pos = search_regs.start[0];
1223             }
1224           else
1225             {
1226               pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1227               /* Set pos to the new position.  */
1228               pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1229             }
1230         }
1231       else
1232         {
1233           unbind_to (count, Qnil);
1234           return (n);
1235         }
1236       n++;
1237       maybe_quit ();
1238     }
1239   while (n > 0)
1240     {
1241       ptrdiff_t val;
1242 
1243       re_match_object = Qnil;
1244       val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1245                          pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1246                          preserve_match_data ? &search_regs : &search_regs_1,
1247                          lim_byte - BEGV_BYTE);
1248       if (val == -2)
1249         {
1250           unbind_to (count, Qnil);
1251           matcher_overflow ();
1252         }
1253       if (val >= 0)
1254         {
1255           if (preserve_match_data)
1256             {
1257               pos_byte = search_regs.end[0] + BEGV_BYTE;
1258               for (ptrdiff_t i = 0; i < search_regs.num_regs; i++)
1259                 if (search_regs.start[i] >= 0)
1260                   {
1261                     search_regs.start[i]
1262                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1263                     search_regs.end[i]
1264                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1265                   }
1266               XSETBUFFER (last_thing_searched, current_buffer);
1267               pos = search_regs.end[0];
1268             }
1269           else
1270             {
1271               pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1272               pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1273             }
1274         }
1275       else
1276         {
1277           unbind_to (count, Qnil);
1278           return (0 - n);
1279         }
1280       n--;
1281       maybe_quit ();
1282     }
1283   unbind_to (count, Qnil);
1284   return (pos);
1285 }
1286 
1287 static EMACS_INT
search_buffer_non_re(Lisp_Object string,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t lim,ptrdiff_t lim_byte,EMACS_INT n,int RE,Lisp_Object trt,Lisp_Object inverse_trt,bool posix)1288 search_buffer_non_re (Lisp_Object string, ptrdiff_t pos,
1289                       ptrdiff_t pos_byte, ptrdiff_t lim, ptrdiff_t lim_byte,
1290                       EMACS_INT n, int RE, Lisp_Object trt, Lisp_Object inverse_trt,
1291                       bool posix)
1292 {
1293   unsigned char *raw_pattern, *pat;
1294   ptrdiff_t raw_pattern_size;
1295   ptrdiff_t raw_pattern_size_byte;
1296   unsigned char *patbuf;
1297   bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1298   unsigned char *base_pat;
1299   /* Set to positive if we find a non-ASCII char that need
1300      translation.  Otherwise set to zero later.  */
1301   int char_base = -1;
1302   bool boyer_moore_ok = 1;
1303   USE_SAFE_ALLOCA;
1304 
1305   /* MULTIBYTE says whether the text to be searched is multibyte.
1306      We must convert PATTERN to match that, or we will not really
1307      find things right.  */
1308 
1309   if (multibyte == STRING_MULTIBYTE (string))
1310     {
1311       raw_pattern = SDATA (string);
1312       raw_pattern_size = SCHARS (string);
1313       raw_pattern_size_byte = SBYTES (string);
1314     }
1315   else if (multibyte)
1316     {
1317       raw_pattern_size = SCHARS (string);
1318       raw_pattern_size_byte
1319         = count_size_as_multibyte (SDATA (string),
1320                                    raw_pattern_size);
1321       raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1322       copy_text (SDATA (string), raw_pattern,
1323                  SCHARS (string), 0, 1);
1324     }
1325   else
1326     {
1327       /* Converting multibyte to single-byte.  */
1328       raw_pattern_size = SCHARS (string);
1329       raw_pattern_size_byte = SCHARS (string);
1330       raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1331       copy_text (SDATA (string), raw_pattern,
1332                  SBYTES (string), 1, 0);
1333     }
1334 
1335   /* Copy and optionally translate the pattern.  */
1336   ptrdiff_t len = raw_pattern_size;
1337   ptrdiff_t len_byte = raw_pattern_size_byte;
1338   SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1339   pat = patbuf;
1340   base_pat = raw_pattern;
1341   if (multibyte)
1342     {
1343       /* Fill patbuf by translated characters in STRING while
1344          checking if we can use boyer-moore search.  If TRT is
1345          non-nil, we can use boyer-moore search only if TRT can be
1346          represented by the byte array of 256 elements.  For that,
1347          all non-ASCII case-equivalents of all case-sensitive
1348          characters in STRING must belong to the same character
1349          group (two characters belong to the same group iff their
1350          multibyte forms are the same except for the last byte;
1351          i.e. every 64 characters form a group; U+0000..U+003F,
1352          U+0040..U+007F, U+0080..U+00BF, ...).  */
1353 
1354       while (--len >= 0)
1355         {
1356           unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1357           int c, translated, inverse;
1358           int in_charlen, charlen;
1359 
1360           /* If we got here and the RE flag is set, it's because we're
1361              dealing with a regexp known to be trivial, so the backslash
1362              just quotes the next character.  */
1363           if (RE && *base_pat == '\\')
1364             {
1365               len--;
1366               raw_pattern_size--;
1367               len_byte--;
1368               base_pat++;
1369             }
1370 
1371           c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1372 
1373           if (NILP (trt))
1374             {
1375               str = base_pat;
1376               charlen = in_charlen;
1377             }
1378           else
1379             {
1380               /* Translate the character.  */
1381               TRANSLATE (translated, trt, c);
1382               charlen = CHAR_STRING (translated, str_base);
1383               str = str_base;
1384 
1385               /* Check if C has any other case-equivalents.  */
1386               TRANSLATE (inverse, inverse_trt, c);
1387               /* If so, check if we can use boyer-moore.  */
1388               if (c != inverse && boyer_moore_ok)
1389                 {
1390                   /* Check if all equivalents belong to the same
1391                      group of characters.  Note that the check of C
1392                      itself is done by the last iteration.  */
1393                   int this_char_base = -1;
1394 
1395                   while (boyer_moore_ok)
1396                     {
1397                       if (ASCII_CHAR_P (inverse))
1398                         {
1399                           if (this_char_base > 0)
1400                             boyer_moore_ok = 0;
1401                           else
1402                             this_char_base = 0;
1403                         }
1404                       else if (CHAR_BYTE8_P (inverse))
1405                         /* Boyer-moore search can't handle a
1406                            translation of an eight-bit
1407                            character.  */
1408                         boyer_moore_ok = 0;
1409                       else if (this_char_base < 0)
1410                         {
1411                           this_char_base = inverse & ~0x3F;
1412                           if (char_base < 0)
1413                             char_base = this_char_base;
1414                           else if (this_char_base != char_base)
1415                             boyer_moore_ok = 0;
1416                         }
1417                       else if ((inverse & ~0x3F) != this_char_base)
1418                         boyer_moore_ok = 0;
1419                       if (c == inverse)
1420                         break;
1421                       TRANSLATE (inverse, inverse_trt, inverse);
1422                     }
1423                 }
1424             }
1425 
1426           /* Store this character into the translated pattern.  */
1427           memcpy (pat, str, charlen);
1428           pat += charlen;
1429           base_pat += in_charlen;
1430           len_byte -= in_charlen;
1431         }
1432 
1433       /* If char_base is still negative we didn't find any translated
1434          non-ASCII characters.  */
1435       if (char_base < 0)
1436         char_base = 0;
1437     }
1438   else
1439     {
1440       /* Unibyte buffer.  */
1441       char_base = 0;
1442       while (--len >= 0)
1443         {
1444           int c, translated, inverse;
1445 
1446           /* If we got here and the RE flag is set, it's because we're
1447              dealing with a regexp known to be trivial, so the backslash
1448              just quotes the next character.  */
1449           if (RE && *base_pat == '\\')
1450             {
1451               len--;
1452               raw_pattern_size--;
1453               base_pat++;
1454             }
1455           c = *base_pat++;
1456           TRANSLATE (translated, trt, c);
1457           *pat++ = translated;
1458           /* Check that none of C's equivalents violates the
1459              assumptions of boyer_moore.  */
1460           TRANSLATE (inverse, inverse_trt, c);
1461           while (1)
1462             {
1463               if (inverse >= 0200)
1464                 {
1465                   boyer_moore_ok = 0;
1466                   break;
1467                 }
1468               if (c == inverse)
1469                 break;
1470               TRANSLATE (inverse, inverse_trt, inverse);
1471             }
1472         }
1473     }
1474 
1475   len_byte = pat - patbuf;
1476   pat = base_pat = patbuf;
1477 
1478   EMACS_INT result
1479     = (boyer_moore_ok
1480        ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1481                       pos_byte, lim_byte,
1482                       char_base)
1483        : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1484                         pos, pos_byte, lim, lim_byte));
1485   SAFE_FREE ();
1486   return result;
1487 }
1488 
1489 static EMACS_INT
search_buffer(Lisp_Object string,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t lim,ptrdiff_t lim_byte,EMACS_INT n,int RE,Lisp_Object trt,Lisp_Object inverse_trt,bool posix)1490 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1491 	       ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1492 	       int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1493 {
1494   if (running_asynch_code)
1495     save_search_regs ();
1496 
1497   /* Searching 0 times means don't move.  */
1498   /* Null string is found at starting position.  */
1499   if (n == 0 || SCHARS (string) == 0)
1500     {
1501       set_search_regs (pos_byte, 0);
1502       return pos;
1503     }
1504 
1505   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1506     pos = search_buffer_re (string, pos, pos_byte, lim, lim_byte,
1507                             n, trt, inverse_trt, posix);
1508   else
1509     pos = search_buffer_non_re (string, pos, pos_byte, lim, lim_byte,
1510                                 n, RE, trt, inverse_trt, posix);
1511 
1512   return pos;
1513 }
1514 
1515 /* Do a simple string search N times for the string PAT,
1516    whose length is LEN/LEN_BYTE,
1517    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1518    TRT is the translation table.
1519 
1520    Return the character position where the match is found.
1521    Otherwise, if M matches remained to be found, return -M.
1522 
1523    This kind of search works regardless of what is in PAT and
1524    regardless of what is in TRT.  It is used in cases where
1525    boyer_moore cannot work.  */
1526 
1527 static EMACS_INT
simple_search(EMACS_INT n,unsigned char * pat,ptrdiff_t len,ptrdiff_t len_byte,Lisp_Object trt,ptrdiff_t pos,ptrdiff_t pos_byte,ptrdiff_t lim,ptrdiff_t lim_byte)1528 simple_search (EMACS_INT n, unsigned char *pat,
1529 	       ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1530 	       ptrdiff_t pos, ptrdiff_t pos_byte,
1531 	       ptrdiff_t lim, ptrdiff_t lim_byte)
1532 {
1533   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1534   bool forward = n > 0;
1535   /* Number of buffer bytes matched.  Note that this may be different
1536      from len_byte in a multibyte buffer.  */
1537   ptrdiff_t match_byte = PTRDIFF_MIN;
1538 
1539   if (lim > pos && multibyte)
1540     while (n > 0)
1541       {
1542 	while (1)
1543 	  {
1544 	    /* Try matching at position POS.  */
1545 	    ptrdiff_t this_pos = pos;
1546 	    ptrdiff_t this_pos_byte = pos_byte;
1547 	    ptrdiff_t this_len = len;
1548 	    unsigned char *p = pat;
1549 	    if (pos + len > lim || pos_byte + len_byte > lim_byte)
1550 	      goto stop;
1551 
1552 	    while (this_len > 0)
1553 	      {
1554 		int charlen, buf_charlen;
1555 		int pat_ch, buf_ch;
1556 
1557 		pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1558 		buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1559 						 buf_charlen);
1560 		TRANSLATE (buf_ch, trt, buf_ch);
1561 
1562 		if (buf_ch != pat_ch)
1563 		  break;
1564 
1565 		this_len--;
1566 		p += charlen;
1567 
1568 		this_pos_byte += buf_charlen;
1569 		this_pos++;
1570 	      }
1571 
1572 	    if (this_len == 0)
1573 	      {
1574 		match_byte = this_pos_byte - pos_byte;
1575 		pos += len;
1576 		pos_byte += match_byte;
1577 		break;
1578 	      }
1579 
1580 	    INC_BOTH (pos, pos_byte);
1581 	  }
1582 
1583 	n--;
1584       }
1585   else if (lim > pos)
1586     while (n > 0)
1587       {
1588 	while (1)
1589 	  {
1590 	    /* Try matching at position POS.  */
1591 	    ptrdiff_t this_pos = pos;
1592 	    ptrdiff_t this_len = len;
1593 	    unsigned char *p = pat;
1594 
1595 	    if (pos + len > lim)
1596 	      goto stop;
1597 
1598 	    while (this_len > 0)
1599 	      {
1600 		int pat_ch = *p++;
1601 		int buf_ch = FETCH_BYTE (this_pos);
1602 		TRANSLATE (buf_ch, trt, buf_ch);
1603 
1604 		if (buf_ch != pat_ch)
1605 		  break;
1606 
1607 		this_len--;
1608 		this_pos++;
1609 	      }
1610 
1611 	    if (this_len == 0)
1612 	      {
1613 		match_byte = len;
1614 		pos += len;
1615 		break;
1616 	      }
1617 
1618 	    pos++;
1619 	  }
1620 
1621 	n--;
1622       }
1623   /* Backwards search.  */
1624   else if (lim < pos && multibyte)
1625     while (n < 0)
1626       {
1627 	while (1)
1628 	  {
1629 	    /* Try matching at position POS.  */
1630 	    ptrdiff_t this_pos = pos;
1631 	    ptrdiff_t this_pos_byte = pos_byte;
1632 	    ptrdiff_t this_len = len;
1633 	    const unsigned char *p = pat + len_byte;
1634 
1635 	    if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1636 	      goto stop;
1637 
1638 	    while (this_len > 0)
1639 	      {
1640 		int pat_ch, buf_ch;
1641 
1642 		DEC_BOTH (this_pos, this_pos_byte);
1643 		PREV_CHAR_BOUNDARY (p, pat);
1644 		pat_ch = STRING_CHAR (p);
1645 		buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1646 		TRANSLATE (buf_ch, trt, buf_ch);
1647 
1648 		if (buf_ch != pat_ch)
1649 		  break;
1650 
1651 		this_len--;
1652 	      }
1653 
1654 	    if (this_len == 0)
1655 	      {
1656 		match_byte = pos_byte - this_pos_byte;
1657 		pos = this_pos;
1658 		pos_byte = this_pos_byte;
1659 		break;
1660 	      }
1661 
1662 	    DEC_BOTH (pos, pos_byte);
1663 	  }
1664 
1665 	n++;
1666       }
1667   else if (lim < pos)
1668     while (n < 0)
1669       {
1670 	while (1)
1671 	  {
1672 	    /* Try matching at position POS.  */
1673 	    ptrdiff_t this_pos = pos - len;
1674 	    ptrdiff_t this_len = len;
1675 	    unsigned char *p = pat;
1676 
1677 	    if (this_pos < lim)
1678 	      goto stop;
1679 
1680 	    while (this_len > 0)
1681 	      {
1682 		int pat_ch = *p++;
1683 		int buf_ch = FETCH_BYTE (this_pos);
1684 		TRANSLATE (buf_ch, trt, buf_ch);
1685 
1686 		if (buf_ch != pat_ch)
1687 		  break;
1688 		this_len--;
1689 		this_pos++;
1690 	      }
1691 
1692 	    if (this_len == 0)
1693 	      {
1694 		match_byte = len;
1695 		pos -= len;
1696 		break;
1697 	      }
1698 
1699 	    pos--;
1700 	  }
1701 
1702 	n++;
1703       }
1704 
1705  stop:
1706   if (n == 0)
1707     {
1708       eassert (match_byte != PTRDIFF_MIN);
1709       if (forward)
1710 	set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1711       else
1712 	set_search_regs (multibyte ? pos_byte : pos, match_byte);
1713 
1714       return pos;
1715     }
1716   else if (n > 0)
1717     return -n;
1718   else
1719     return n;
1720 }
1721 
1722 /* Do Boyer-Moore search N times for the string BASE_PAT,
1723    whose length is LEN_BYTE,
1724    from buffer position POS_BYTE until LIM_BYTE.
1725    DIRECTION says which direction we search in.
1726    TRT and INVERSE_TRT are translation tables.
1727    Characters in PAT are already translated by TRT.
1728 
1729    This kind of search works if all the characters in BASE_PAT that
1730    have nontrivial translation are the same aside from the last byte.
1731    This makes it possible to translate just the last byte of a
1732    character, and do so after just a simple test of the context.
1733    CHAR_BASE is nonzero if there is such a non-ASCII character.
1734 
1735    If that criterion is not satisfied, do not call this function.  */
1736 
1737 static EMACS_INT
boyer_moore(EMACS_INT n,unsigned char * base_pat,ptrdiff_t len_byte,Lisp_Object trt,Lisp_Object inverse_trt,ptrdiff_t pos_byte,ptrdiff_t lim_byte,int char_base)1738 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1739 	     ptrdiff_t len_byte,
1740 	     Lisp_Object trt, Lisp_Object inverse_trt,
1741 	     ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1742              int char_base)
1743 {
1744   int direction = ((n > 0) ? 1 : -1);
1745   register ptrdiff_t dirlen;
1746   ptrdiff_t limit;
1747   int stride_for_teases = 0;
1748   int BM_tab[0400];
1749   register unsigned char *cursor, *p_limit;
1750   register ptrdiff_t i;
1751   register int j;
1752   unsigned char *pat, *pat_end;
1753   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1754 
1755   unsigned char simple_translate[0400];
1756   /* These are set to the preceding bytes of a byte to be translated
1757      if char_base is nonzero.  As the maximum byte length of a
1758      multibyte character is 5, we have to check at most four previous
1759      bytes.  */
1760   int translate_prev_byte1 = 0;
1761   int translate_prev_byte2 = 0;
1762   int translate_prev_byte3 = 0;
1763 
1764   /* The general approach is that we are going to maintain that we know
1765      the first (closest to the present position, in whatever direction
1766      we're searching) character that could possibly be the last
1767      (furthest from present position) character of a valid match.  We
1768      advance the state of our knowledge by looking at that character
1769      and seeing whether it indeed matches the last character of the
1770      pattern.  If it does, we take a closer look.  If it does not, we
1771      move our pointer (to putative last characters) as far as is
1772      logically possible.  This amount of movement, which I call a
1773      stride, will be the length of the pattern if the actual character
1774      appears nowhere in the pattern, otherwise it will be the distance
1775      from the last occurrence of that character to the end of the
1776      pattern.  If the amount is zero we have a possible match.  */
1777 
1778   /* Here we make a "mickey mouse" BM table.  The stride of the search
1779      is determined only by the last character of the putative match.
1780      If that character does not match, we will stride the proper
1781      distance to propose a match that superimposes it on the last
1782      instance of a character that matches it (per trt), or misses
1783      it entirely if there is none. */
1784 
1785   dirlen = len_byte * direction;
1786 
1787   /* Record position after the end of the pattern.  */
1788   pat_end = base_pat + len_byte;
1789   /* BASE_PAT points to a character that we start scanning from.
1790      It is the first character in a forward search,
1791      the last character in a backward search.  */
1792   if (direction < 0)
1793     base_pat = pat_end - 1;
1794 
1795   /* A character that does not appear in the pattern induces a
1796      stride equal to the pattern length.  */
1797   for (i = 0; i < 0400; i++)
1798     BM_tab[i] = dirlen;
1799 
1800   /* We use this for translation, instead of TRT itself.
1801      We fill this in to handle the characters that actually
1802      occur in the pattern.  Others don't matter anyway!  */
1803   for (i = 0; i < 0400; i++)
1804     simple_translate[i] = i;
1805 
1806   if (char_base)
1807     {
1808       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1809 	 byte following them are the target of translation.  */
1810       eassume (0x80 <= char_base && char_base <= MAX_CHAR);
1811       unsigned char str[MAX_MULTIBYTE_LENGTH];
1812       int cblen = CHAR_STRING (char_base, str);
1813 
1814       translate_prev_byte1 = str[cblen - 2];
1815       if (cblen > 2)
1816 	{
1817 	  translate_prev_byte2 = str[cblen - 3];
1818 	  if (cblen > 3)
1819 	    translate_prev_byte3 = str[cblen - 4];
1820 	}
1821     }
1822 
1823   i = 0;
1824   while (i != dirlen)
1825     {
1826       unsigned char *ptr = base_pat + i;
1827       i += direction;
1828       if (! NILP (trt))
1829 	{
1830 	  /* If the byte currently looking at is the last of a
1831 	     character to check case-equivalents, set CH to that
1832 	     character.  An ASCII character and a non-ASCII character
1833 	     matching with CHAR_BASE are to be checked.  */
1834 	  int ch = -1;
1835 
1836 	  if (ASCII_CHAR_P (*ptr) || ! multibyte)
1837 	    ch = *ptr;
1838 	  else if (char_base
1839 		   && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1840 	    {
1841 	      unsigned char *charstart = ptr - 1;
1842 
1843 	      while (! (CHAR_HEAD_P (*charstart)))
1844 		charstart--;
1845 	      ch = STRING_CHAR (charstart);
1846 	      if (char_base != (ch & ~0x3F))
1847 		ch = -1;
1848 	    }
1849 
1850 	  if (ch >= 0200 && multibyte)
1851 	    j = (ch & 0x3F) | 0200;
1852 	  else
1853 	    j = *ptr;
1854 
1855 	  if (i == dirlen)
1856 	    stride_for_teases = BM_tab[j];
1857 
1858 	  BM_tab[j] = dirlen - i;
1859 	  /* A translation table is accompanied by its inverse -- see
1860 	     comment following downcase_table for details.  */
1861 	  if (ch >= 0)
1862 	    {
1863 	      int starting_ch = ch;
1864 	      int starting_j = j;
1865 
1866 	      while (1)
1867 		{
1868 		  TRANSLATE (ch, inverse_trt, ch);
1869 		  if (ch >= 0200 && multibyte)
1870 		    j = (ch & 0x3F) | 0200;
1871 		  else
1872 		    j = ch;
1873 
1874 		  /* For all the characters that map into CH,
1875 		     set up simple_translate to map the last byte
1876 		     into STARTING_J.  */
1877 		  simple_translate[j] = starting_j;
1878 		  if (ch == starting_ch)
1879 		    break;
1880 		  BM_tab[j] = dirlen - i;
1881 		}
1882 	    }
1883 	}
1884       else
1885 	{
1886 	  j = *ptr;
1887 
1888 	  if (i == dirlen)
1889 	    stride_for_teases = BM_tab[j];
1890 	  BM_tab[j] = dirlen - i;
1891 	}
1892       /* stride_for_teases tells how much to stride if we get a
1893 	 match on the far character but are subsequently
1894 	 disappointed, by recording what the stride would have been
1895 	 for that character if the last character had been
1896 	 different.  */
1897     }
1898   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1899   /* loop invariant - POS_BYTE points at where last char (first
1900      char if reverse) of pattern would align in a possible match.  */
1901   while (n != 0)
1902     {
1903       ptrdiff_t tail_end;
1904       unsigned char *tail_end_ptr;
1905 
1906       /* It's been reported that some (broken) compiler thinks that
1907 	 Boolean expressions in an arithmetic context are unsigned.
1908 	 Using an explicit ?1:0 prevents this.  */
1909       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1910 	  < 0)
1911 	return (n * (0 - direction));
1912       /* First we do the part we can by pointers (maybe nothing) */
1913       maybe_quit ();
1914       pat = base_pat;
1915       limit = pos_byte - dirlen + direction;
1916       if (direction > 0)
1917 	{
1918 	  limit = BUFFER_CEILING_OF (limit);
1919 	  /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1920 	     can take on without hitting edge of buffer or the gap.  */
1921 	  limit = min (limit, pos_byte + 20000);
1922 	  limit = min (limit, lim_byte - 1);
1923 	}
1924       else
1925 	{
1926 	  limit = BUFFER_FLOOR_OF (limit);
1927 	  /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1928 	     can take on without hitting edge of buffer or the gap.  */
1929 	  limit = max (limit, pos_byte - 20000);
1930 	  limit = max (limit, lim_byte);
1931 	}
1932       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1933       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1934 
1935       if ((limit - pos_byte) * direction > 20)
1936 	{
1937 	  unsigned char *p2;
1938 
1939 	  p_limit = BYTE_POS_ADDR (limit);
1940 	  p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1941 	  /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1942 	  while (1)		/* use one cursor setting as long as i can */
1943 	    {
1944 	      if (direction > 0) /* worth duplicating */
1945 		{
1946 		  while (cursor <= p_limit)
1947 		    {
1948 		      if (BM_tab[*cursor] == 0)
1949 			goto hit;
1950 		      cursor += BM_tab[*cursor];
1951 		    }
1952 		}
1953 	      else
1954 		{
1955 		  while (cursor >= p_limit)
1956 		    {
1957 		      if (BM_tab[*cursor] == 0)
1958 			goto hit;
1959 		      cursor += BM_tab[*cursor];
1960 		    }
1961 		}
1962 	      /* If you are here, cursor is beyond the end of the
1963 		 searched region.  You fail to match within the
1964 		 permitted region and would otherwise try a character
1965 		 beyond that region.  */
1966 	      break;
1967 
1968 	    hit:
1969 	      i = dirlen - direction;
1970 	      if (! NILP (trt))
1971 		{
1972 		  while ((i -= direction) + direction != 0)
1973 		    {
1974 		      int ch;
1975 		      cursor -= direction;
1976 		      /* Translate only the last byte of a character.  */
1977 		      if (! multibyte
1978 			  || ((cursor == tail_end_ptr
1979 			       || CHAR_HEAD_P (cursor[1]))
1980 			      && (CHAR_HEAD_P (cursor[0])
1981 				  /* Check if this is the last byte of
1982 				     a translatable character.  */
1983 				  || (translate_prev_byte1 == cursor[-1]
1984 				      && (CHAR_HEAD_P (translate_prev_byte1)
1985 					  || (translate_prev_byte2 == cursor[-2]
1986 					      && (CHAR_HEAD_P (translate_prev_byte2)
1987 						  || (translate_prev_byte3 == cursor[-3]))))))))
1988 			ch = simple_translate[*cursor];
1989 		      else
1990 			ch = *cursor;
1991 		      if (pat[i] != ch)
1992 			break;
1993 		    }
1994 		}
1995 	      else
1996 		{
1997 		  while ((i -= direction) + direction != 0)
1998 		    {
1999 		      cursor -= direction;
2000 		      if (pat[i] != *cursor)
2001 			break;
2002 		    }
2003 		}
2004 	      cursor += dirlen - i - direction;	/* fix cursor */
2005 	      if (i + direction == 0)
2006 		{
2007 		  ptrdiff_t position, start, end;
2008 #ifdef REL_ALLOC
2009 		  ptrdiff_t cursor_off;
2010 #endif
2011 
2012 		  cursor -= direction;
2013 
2014 		  position = pos_byte + cursor - p2 + ((direction > 0)
2015 						       ? 1 - len_byte : 0);
2016 #ifdef REL_ALLOC
2017 		  /* set_search_regs might call malloc, which could
2018 		     cause ralloc.c relocate buffer text.  We need to
2019 		     update pointers into buffer text due to that.  */
2020 		  cursor_off = cursor - p2;
2021 #endif
2022 		  set_search_regs (position, len_byte);
2023 #ifdef REL_ALLOC
2024 		  p_limit = BYTE_POS_ADDR (limit);
2025 		  p2 = BYTE_POS_ADDR (pos_byte);
2026 		  cursor = p2 + cursor_off;
2027 #endif
2028 
2029 		  if (NILP (Vinhibit_changing_match_data))
2030 		    {
2031 		      start = search_regs.start[0];
2032 		      end = search_regs.end[0];
2033 		    }
2034 		  else
2035 		    /* If Vinhibit_changing_match_data is non-nil,
2036 		       search_regs will not be changed.  So let's
2037 		       compute start and end here.  */
2038 		    {
2039 		      start = BYTE_TO_CHAR (position);
2040 		      end = BYTE_TO_CHAR (position + len_byte);
2041 		    }
2042 
2043 		  if ((n -= direction) != 0)
2044 		    cursor += dirlen; /* to resume search */
2045 		  else
2046 		    return direction > 0 ? end : start;
2047 		}
2048 	      else
2049 		cursor += stride_for_teases; /* <sigh> we lose -  */
2050 	    }
2051 	  pos_byte += cursor - p2;
2052 	}
2053       else
2054 	/* Now we'll pick up a clump that has to be done the hard
2055 	   way because it covers a discontinuity.  */
2056 	{
2057 	  limit = ((direction > 0)
2058 		   ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2059 		   : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2060 	  limit = ((direction > 0)
2061 		   ? min (limit + len_byte, lim_byte - 1)
2062 		   : max (limit - len_byte, lim_byte));
2063 	  /* LIMIT is now the last value POS_BYTE can have
2064 	     and still be valid for a possible match.  */
2065 	  while (1)
2066 	    {
2067 	      /* This loop can be coded for space rather than
2068 		 speed because it will usually run only once.
2069 		 (the reach is at most len + 21, and typically
2070 		 does not exceed len).  */
2071 	      while ((limit - pos_byte) * direction >= 0)
2072 		{
2073 		  int ch = FETCH_BYTE (pos_byte);
2074 		  if (BM_tab[ch] == 0)
2075 		    goto hit2;
2076 		  pos_byte += BM_tab[ch];
2077 		}
2078 	      break;	/* ran off the end */
2079 
2080 	    hit2:
2081 	      /* Found what might be a match.  */
2082 	      i = dirlen - direction;
2083 	      while ((i -= direction) + direction != 0)
2084 		{
2085 		  int ch;
2086 		  unsigned char *ptr;
2087 		  pos_byte -= direction;
2088 		  ptr = BYTE_POS_ADDR (pos_byte);
2089 		  /* Translate only the last byte of a character.  */
2090 		  if (! multibyte
2091 		      || ((ptr == tail_end_ptr
2092 			   || CHAR_HEAD_P (ptr[1]))
2093 			  && (CHAR_HEAD_P (ptr[0])
2094 			      /* Check if this is the last byte of a
2095 				 translatable character.  */
2096 			      || (translate_prev_byte1 == ptr[-1]
2097 				  && (CHAR_HEAD_P (translate_prev_byte1)
2098 				      || (translate_prev_byte2 == ptr[-2]
2099 					  && (CHAR_HEAD_P (translate_prev_byte2)
2100 					      || translate_prev_byte3 == ptr[-3])))))))
2101 		    ch = simple_translate[*ptr];
2102 		  else
2103 		    ch = *ptr;
2104 		  if (pat[i] != ch)
2105 		    break;
2106 		}
2107 	      /* Above loop has moved POS_BYTE part or all the way
2108 		 back to the first pos (last pos if reverse).
2109 		 Set it once again at the last (first if reverse) char.  */
2110 	      pos_byte += dirlen - i - direction;
2111 	      if (i + direction == 0)
2112 		{
2113 		  ptrdiff_t position, start, end;
2114 		  pos_byte -= direction;
2115 
2116 		  position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2117 		  set_search_regs (position, len_byte);
2118 
2119 		  if (NILP (Vinhibit_changing_match_data))
2120 		    {
2121 		      start = search_regs.start[0];
2122 		      end = search_regs.end[0];
2123 		    }
2124 		  else
2125 		    /* If Vinhibit_changing_match_data is non-nil,
2126 		       search_regs will not be changed.  So let's
2127 		       compute start and end here.  */
2128 		    {
2129 		      start = BYTE_TO_CHAR (position);
2130 		      end = BYTE_TO_CHAR (position + len_byte);
2131 		    }
2132 
2133 		  if ((n -= direction) != 0)
2134 		    pos_byte += dirlen; /* to resume search */
2135 		  else
2136 		    return direction > 0 ? end : start;
2137 		}
2138 	      else
2139 		pos_byte += stride_for_teases;
2140 	    }
2141 	  }
2142       /* We have done one clump.  Can we continue? */
2143       if ((lim_byte - pos_byte) * direction < 0)
2144 	return ((0 - n) * direction);
2145     }
2146   return BYTE_TO_CHAR (pos_byte);
2147 }
2148 
2149 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2150    for the overall match just found in the current buffer.
2151    Also clear out the match data for registers 1 and up.  */
2152 
2153 static void
set_search_regs(ptrdiff_t beg_byte,ptrdiff_t nbytes)2154 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2155 {
2156   ptrdiff_t i;
2157 
2158   if (!NILP (Vinhibit_changing_match_data))
2159     return;
2160 
2161   /* Make sure we have registers in which to store
2162      the match position.  */
2163   if (search_regs.num_regs == 0)
2164     {
2165       search_regs.start = xmalloc (2 * sizeof *search_regs.start);
2166       search_regs.end = xmalloc (2 * sizeof *search_regs.end);
2167       search_regs.num_regs = 2;
2168     }
2169 
2170   /* Clear out the other registers.  */
2171   for (i = 1; i < search_regs.num_regs; i++)
2172     {
2173       search_regs.start[i] = -1;
2174       search_regs.end[i] = -1;
2175     }
2176 
2177   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2178   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2179   XSETBUFFER (last_thing_searched, current_buffer);
2180 }
2181 
2182 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2183        "MSearch backward: ",
2184        doc: /* Search backward from point for STRING.
2185 Set point to the beginning of the occurrence found, and return point.
2186 An optional second argument bounds the search; it is a buffer position.
2187   The match found must not begin before that position.  A value of nil
2188   means search to the beginning of the accessible portion of the buffer.
2189 Optional third argument, if t, means if fail just return nil (no error).
2190   If not nil and not t, position at limit of search and return nil.
2191 Optional fourth argument COUNT, if a positive number, means to search
2192   for COUNT successive occurrences.  If COUNT is negative, search
2193   forward, instead of backward, for -COUNT occurrences.  A value of
2194   nil means the same as 1.
2195 With COUNT positive, the match found is the COUNTth to last one (or
2196   last, if COUNT is 1 or nil) in the buffer located entirely before
2197   the origin of the search; correspondingly with COUNT negative.
2198 
2199 Search case-sensitivity is determined by the value of the variable
2200 `case-fold-search', which see.
2201 
2202 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2203   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2204 {
2205   return search_command (string, bound, noerror, count, -1, 0, 0);
2206 }
2207 
2208 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2209        doc: /* Search forward from point for STRING.
2210 Set point to the end of the occurrence found, and return point.
2211 An optional second argument bounds the search; it is a buffer position.
2212   The match found must not end after that position.  A value of nil
2213   means search to the end of the accessible portion of the buffer.
2214 Optional third argument, if t, means if fail just return nil (no error).
2215   If not nil and not t, move to limit of search and return nil.
2216 Optional fourth argument COUNT, if a positive number, means to search
2217   for COUNT successive occurrences.  If COUNT is negative, search
2218   backward, instead of forward, for -COUNT occurrences.  A value of
2219   nil means the same as 1.
2220 With COUNT positive, the match found is the COUNTth one (or first,
2221   if COUNT is 1 or nil) in the buffer located entirely after the
2222   origin of the search; correspondingly with COUNT negative.
2223 
2224 Search case-sensitivity is determined by the value of the variable
2225 `case-fold-search', which see.
2226 
2227 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2228   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2229 {
2230   return search_command (string, bound, noerror, count, 1, 0, 0);
2231 }
2232 
2233 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2234        "sRE search backward: ",
2235        doc: /* Search backward from point for regular expression REGEXP.
2236 This function is almost identical to `re-search-forward', except that
2237 by default it searches backward instead of forward, and the sign of
2238 COUNT also indicates exactly the opposite searching direction.
2239 See `re-search-forward' for details.
2240 
2241 Note that searching backwards may give a shorter match than expected,
2242 because REGEXP is still matched in the forward direction.  See Info
2243 anchor `(elisp) re-search-backward' for details.  */)
2244   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2245 {
2246   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2247 }
2248 
2249 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2250        "sRE search: ",
2251        doc: /* Search forward from point for regular expression REGEXP.
2252 Set point to the end of the occurrence found, and return point.
2253 The optional second argument BOUND is a buffer position that bounds
2254   the search.  The match found must not end after that position.  A
2255   value of nil means search to the end of the accessible portion of
2256   the buffer.
2257 The optional third argument NOERROR indicates how errors are handled
2258   when the search fails.  If it is nil or omitted, emit an error; if
2259   it is t, simply return nil and do nothing; if it is neither nil nor
2260   t, move to the limit of search and return nil.
2261 The optional fourth argument COUNT is a number that indicates the
2262   search direction and the number of occurrences to search for.  If it
2263   is positive, search forward for COUNT successive occurrences; if it
2264   is negative, search backward, instead of forward, for -COUNT
2265   occurrences.  A value of nil means the same as 1.
2266 With COUNT positive/negative, the match found is the COUNTth/-COUNTth
2267   one in the buffer located entirely after/before the origin of the
2268   search.
2269 
2270 Search case-sensitivity is determined by the value of the variable
2271 `case-fold-search', which see.
2272 
2273 See also the functions `match-beginning', `match-end', `match-string',
2274 and `replace-match'.  */)
2275   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2276 {
2277   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2278 }
2279 
2280 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2281        "sPosix search backward: ",
2282        doc: /* Search backward from point for match for regular expression REGEXP.
2283 Find the longest match in accord with Posix regular expression rules.
2284 Set point to the beginning of the occurrence found, and return point.
2285 An optional second argument bounds the search; it is a buffer position.
2286   The match found must not begin before that position.  A value of nil
2287   means search to the beginning of the accessible portion of the buffer.
2288 Optional third argument, if t, means if fail just return nil (no error).
2289   If not nil and not t, position at limit of search and return nil.
2290 Optional fourth argument COUNT, if a positive number, means to search
2291   for COUNT successive occurrences.  If COUNT is negative, search
2292   forward, instead of backward, for -COUNT occurrences.  A value of
2293   nil means the same as 1.
2294 With COUNT positive, the match found is the COUNTth to last one (or
2295   last, if COUNT is 1 or nil) in the buffer located entirely before
2296   the origin of the search; correspondingly with COUNT negative.
2297 
2298 Search case-sensitivity is determined by the value of the variable
2299 `case-fold-search', which see.
2300 
2301 See also the functions `match-beginning', `match-end', `match-string',
2302 and `replace-match'.  */)
2303   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2304 {
2305   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2306 }
2307 
2308 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2309        "sPosix search: ",
2310        doc: /* Search forward from point for regular expression REGEXP.
2311 Find the longest match in accord with Posix regular expression rules.
2312 Set point to the end of the occurrence found, and return point.
2313 An optional second argument bounds the search; it is a buffer position.
2314   The match found must not end after that position.  A value of nil
2315   means search to the end of the accessible portion of the buffer.
2316 Optional third argument, if t, means if fail just return nil (no error).
2317   If not nil and not t, move to limit of search and return nil.
2318 Optional fourth argument COUNT, if a positive number, means to search
2319   for COUNT successive occurrences.  If COUNT is negative, search
2320   backward, instead of forward, for -COUNT occurrences.  A value of
2321   nil means the same as 1.
2322 With COUNT positive, the match found is the COUNTth one (or first,
2323   if COUNT is 1 or nil) in the buffer located entirely after the
2324   origin of the search; correspondingly with COUNT negative.
2325 
2326 Search case-sensitivity is determined by the value of the variable
2327 `case-fold-search', which see.
2328 
2329 See also the functions `match-beginning', `match-end', `match-string',
2330 and `replace-match'.  */)
2331   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2332 {
2333   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2334 }
2335 
2336 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2337        doc: /* Replace text matched by last search with NEWTEXT.
2338 Leave point at the end of the replacement text.
2339 
2340 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2341 the replacement text.  Otherwise, maybe capitalize the whole text, or
2342 maybe just word initials, based on the replaced text.  If the replaced
2343 text has only capital letters and has at least one multiletter word,
2344 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2345 in the replaced text, capitalize each word in NEWTEXT.
2346 
2347 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2348 Otherwise treat `\\' as special:
2349   `\\&' in NEWTEXT means substitute original matched text.
2350   `\\N' means substitute what matched the Nth `\\(...\\)'.
2351        If Nth parens didn't match, substitute nothing.
2352   `\\\\' means insert one `\\'.
2353   `\\?' is treated literally
2354        (for compatibility with `query-replace-regexp').
2355   Any other character following `\\' signals an error.
2356 Case conversion does not apply to these substitutions.
2357 
2358 If optional fourth argument STRING is non-nil, it should be a string
2359 to act on; this should be the string on which the previous match was
2360 done via `string-match'.  In this case, `replace-match' creates and
2361 returns a new string, made by copying STRING and replacing the part of
2362 STRING that was matched (the original STRING itself is not altered).
2363 
2364 The optional fifth argument SUBEXP specifies a subexpression;
2365 it says to replace just that subexpression with NEWTEXT,
2366 rather than replacing the entire matched text.
2367 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2368 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2369 NEWTEXT in place of subexp N.
2370 This is useful only after a regular expression search or match,
2371 since only regular expressions have distinguished subexpressions.  */)
2372   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2373 {
2374   enum { nochange, all_caps, cap_initial } case_action;
2375   ptrdiff_t pos, pos_byte;
2376   bool some_multiletter_word;
2377   bool some_lowercase;
2378   bool some_uppercase;
2379   bool some_nonuppercase_initial;
2380   int c, prevc;
2381   ptrdiff_t sub;
2382   ptrdiff_t opoint, newpoint;
2383 
2384   CHECK_STRING (newtext);
2385 
2386   if (! NILP (string))
2387     CHECK_STRING (string);
2388 
2389   case_action = nochange;	/* We tried an initialization */
2390 				/* but some C compilers blew it */
2391 
2392   ptrdiff_t num_regs = search_regs.num_regs;
2393   if (num_regs <= 0)
2394     error ("`replace-match' called before any match found");
2395 
2396   if (NILP (subexp))
2397     sub = 0;
2398   else
2399     {
2400       CHECK_RANGED_INTEGER (subexp, 0, num_regs - 1);
2401       sub = XFIXNUM (subexp);
2402     }
2403 
2404   ptrdiff_t sub_start = search_regs.start[sub];
2405   ptrdiff_t sub_end = search_regs.end[sub];
2406   eassert (sub_start <= sub_end);
2407 
2408   /* Check whether the text to replace is present in the buffer/string.  */
2409   if (! (NILP (string)
2410 	 ? BEGV <= sub_start && sub_end <= ZV
2411 	 : 0 <= sub_start && sub_end <= SCHARS (string)))
2412     {
2413       if (sub_start < 0)
2414 	xsignal2 (Qerror,
2415 		  build_string ("replace-match subexpression does not exist"),
2416 		  subexp);
2417       args_out_of_range (make_fixnum (sub_start), make_fixnum (sub_end));
2418     }
2419 
2420   if (NILP (fixedcase))
2421     {
2422       /* Decide how to casify by examining the matched text. */
2423       ptrdiff_t last;
2424 
2425       pos = sub_start;
2426       last = sub_end;
2427 
2428       if (NILP (string))
2429 	pos_byte = CHAR_TO_BYTE (pos);
2430       else
2431 	pos_byte = string_char_to_byte (string, pos);
2432 
2433       prevc = '\n';
2434       case_action = all_caps;
2435 
2436       /* some_multiletter_word is set nonzero if any original word
2437 	 is more than one letter long. */
2438       some_multiletter_word = 0;
2439       some_lowercase = 0;
2440       some_nonuppercase_initial = 0;
2441       some_uppercase = 0;
2442 
2443       while (pos < last)
2444 	{
2445 	  if (NILP (string))
2446 	    {
2447 	      c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2448 	      INC_BOTH (pos, pos_byte);
2449 	    }
2450 	  else
2451 	    FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2452 
2453 	  if (lowercasep (c))
2454 	    {
2455 	      /* Cannot be all caps if any original char is lower case */
2456 
2457 	      some_lowercase = 1;
2458 	      if (SYNTAX (prevc) != Sword)
2459 		some_nonuppercase_initial = 1;
2460 	      else
2461 		some_multiletter_word = 1;
2462 	    }
2463 	  else if (uppercasep (c))
2464 	    {
2465 	      some_uppercase = 1;
2466 	      if (SYNTAX (prevc) != Sword)
2467 		;
2468 	      else
2469 		some_multiletter_word = 1;
2470 	    }
2471 	  else
2472 	    {
2473 	      /* If the initial is a caseless word constituent,
2474 		 treat that like a lowercase initial.  */
2475 	      if (SYNTAX (prevc) != Sword)
2476 		some_nonuppercase_initial = 1;
2477 	    }
2478 
2479 	  prevc = c;
2480 	}
2481 
2482       /* Convert to all caps if the old text is all caps
2483 	 and has at least one multiletter word.  */
2484       if (! some_lowercase && some_multiletter_word)
2485 	case_action = all_caps;
2486       /* Capitalize each word, if the old text has all capitalized words.  */
2487       else if (!some_nonuppercase_initial && some_multiletter_word)
2488 	case_action = cap_initial;
2489       else if (!some_nonuppercase_initial && some_uppercase)
2490 	/* Should x -> yz, operating on X, give Yz or YZ?
2491 	   We'll assume the latter.  */
2492 	case_action = all_caps;
2493       else
2494 	case_action = nochange;
2495     }
2496 
2497   /* Do replacement in a string.  */
2498   if (!NILP (string))
2499     {
2500       Lisp_Object before, after;
2501 
2502       before = Fsubstring (string, make_fixnum (0), make_fixnum (sub_start));
2503       after = Fsubstring (string, make_fixnum (sub_end), Qnil);
2504 
2505       /* Substitute parts of the match into NEWTEXT
2506 	 if desired.  */
2507       if (NILP (literal))
2508 	{
2509 	  ptrdiff_t lastpos = 0;
2510 	  ptrdiff_t lastpos_byte = 0;
2511 	  /* We build up the substituted string in ACCUM.  */
2512 	  Lisp_Object accum;
2513 	  Lisp_Object middle;
2514 	  ptrdiff_t length = SBYTES (newtext);
2515 
2516 	  accum = Qnil;
2517 
2518 	  for (pos_byte = 0, pos = 0; pos_byte < length;)
2519 	    {
2520 	      ptrdiff_t substart = -1;
2521 	      ptrdiff_t subend = 0;
2522 	      bool delbackslash = 0;
2523 
2524 	      FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2525 
2526 	      if (c == '\\')
2527 		{
2528 		  FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2529 
2530 		  if (c == '&')
2531 		    {
2532 		      substart = sub_start;
2533 		      subend = sub_end;
2534 		    }
2535 		  else if (c >= '1' && c <= '9')
2536 		    {
2537 		      if (c - '0' < num_regs
2538 			  && search_regs.start[c - '0'] >= 0)
2539 			{
2540 			  substart = search_regs.start[c - '0'];
2541 			  subend = search_regs.end[c - '0'];
2542 			}
2543 		      else
2544 			{
2545 			  /* If that subexp did not match,
2546 			     replace \\N with nothing.  */
2547 			  substart = 0;
2548 			  subend = 0;
2549 			}
2550 		    }
2551 		  else if (c == '\\')
2552 		    delbackslash = 1;
2553 		  else if (c != '?')
2554 		    error ("Invalid use of `\\' in replacement text");
2555 		}
2556 	      if (substart >= 0)
2557 		{
2558 		  if (pos - 2 != lastpos)
2559 		    middle = substring_both (newtext, lastpos,
2560 					     lastpos_byte,
2561 					     pos - 2, pos_byte - 2);
2562 		  else
2563 		    middle = Qnil;
2564 		  accum = concat3 (accum, middle,
2565 				   Fsubstring (string,
2566 					       make_fixnum (substart),
2567 					       make_fixnum (subend)));
2568 		  lastpos = pos;
2569 		  lastpos_byte = pos_byte;
2570 		}
2571 	      else if (delbackslash)
2572 		{
2573 		  middle = substring_both (newtext, lastpos,
2574 					   lastpos_byte,
2575 					   pos - 1, pos_byte - 1);
2576 
2577 		  accum = concat2 (accum, middle);
2578 		  lastpos = pos;
2579 		  lastpos_byte = pos_byte;
2580 		}
2581 	    }
2582 
2583 	  if (pos != lastpos)
2584 	    middle = substring_both (newtext, lastpos,
2585 				     lastpos_byte,
2586 				     pos, pos_byte);
2587 	  else
2588 	    middle = Qnil;
2589 
2590 	  newtext = concat2 (accum, middle);
2591 	}
2592 
2593       /* Do case substitution in NEWTEXT if desired.  */
2594       if (case_action == all_caps)
2595 	newtext = Fupcase (newtext);
2596       else if (case_action == cap_initial)
2597 	newtext = Fupcase_initials (newtext);
2598 
2599       return concat3 (before, newtext, after);
2600     }
2601 
2602   /* Record point.  A nonpositive OPOINT is actually an offset from ZV.  */
2603   opoint = PT <= sub_start ? PT : max (PT, sub_end) - ZV;
2604 
2605   /* If we want non-literal replacement,
2606      perform substitution on the replacement string.  */
2607   if (NILP (literal))
2608     {
2609       ptrdiff_t length = SBYTES (newtext);
2610       unsigned char *substed;
2611       ptrdiff_t substed_alloc_size, substed_len;
2612       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2613       bool str_multibyte = STRING_MULTIBYTE (newtext);
2614       bool really_changed = 0;
2615 
2616       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2617 			    ? length * 2 + 100
2618 			    : STRING_BYTES_BOUND);
2619       substed = xmalloc (substed_alloc_size);
2620       substed_len = 0;
2621 
2622       /* Go thru NEWTEXT, producing the actual text to insert in
2623 	 SUBSTED while adjusting multibyteness to that of the current
2624 	 buffer.  */
2625 
2626       for (pos_byte = 0, pos = 0; pos_byte < length;)
2627 	{
2628 	  unsigned char str[MAX_MULTIBYTE_LENGTH];
2629 	  const unsigned char *add_stuff = NULL;
2630 	  ptrdiff_t add_len = 0;
2631 	  ptrdiff_t idx = -1;
2632 	  ptrdiff_t begbyte UNINIT;
2633 
2634 	  if (str_multibyte)
2635 	    {
2636 	      FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2637 	      if (!buf_multibyte)
2638 		c = CHAR_TO_BYTE8 (c);
2639 	    }
2640 	  else
2641 	    {
2642 	      /* Note that we don't have to increment POS.  */
2643 	      c = SREF (newtext, pos_byte++);
2644 	      if (buf_multibyte)
2645 		MAKE_CHAR_MULTIBYTE (c);
2646 	    }
2647 
2648 	  /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2649 	     or set IDX to a match index, which means put that part
2650 	     of the buffer text into SUBSTED.  */
2651 
2652 	  if (c == '\\')
2653 	    {
2654 	      really_changed = 1;
2655 
2656 	      if (str_multibyte)
2657 		{
2658 		  FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2659 						      pos, pos_byte);
2660 		  if (!buf_multibyte && !ASCII_CHAR_P (c))
2661 		    c = CHAR_TO_BYTE8 (c);
2662 		}
2663 	      else
2664 		{
2665 		  c = SREF (newtext, pos_byte++);
2666 		  if (buf_multibyte)
2667 		    MAKE_CHAR_MULTIBYTE (c);
2668 		}
2669 
2670 	      if (c == '&')
2671 		idx = sub;
2672 	      else if ('1' <= c && c <= '9' && c - '0' < num_regs)
2673 		{
2674 		  if (search_regs.start[c - '0'] >= 1)
2675 		    idx = c - '0';
2676 		}
2677 	      else if (c == '\\')
2678 		add_len = 1, add_stuff = (unsigned char *) "\\";
2679 	      else
2680 		{
2681 		  xfree (substed);
2682 		  error ("Invalid use of `\\' in replacement text");
2683 		}
2684 	    }
2685 	  else
2686 	    {
2687 	      add_len = CHAR_STRING (c, str);
2688 	      add_stuff = str;
2689 	    }
2690 
2691 	  /* If we want to copy part of a previous match,
2692 	     set up ADD_STUFF and ADD_LEN to point to it.  */
2693 	  if (idx >= 0)
2694 	    {
2695 	      begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2696 	      add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2697 	      if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2698 		move_gap_both (search_regs.start[idx], begbyte);
2699 	    }
2700 
2701 	  /* Now the stuff we want to add to SUBSTED
2702 	     is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2703 
2704 	  /* Make sure SUBSTED is big enough.  */
2705 	  if (substed_alloc_size - substed_len < add_len)
2706 	    substed =
2707 	      xpalloc (substed, &substed_alloc_size,
2708 		       add_len - (substed_alloc_size - substed_len),
2709 		       STRING_BYTES_BOUND, 1);
2710 
2711 	  /* We compute this after the call to xpalloc, because that
2712 	     could cause buffer text be relocated when ralloc.c is used.  */
2713 	  if (idx >= 0)
2714 	    add_stuff = BYTE_POS_ADDR (begbyte);
2715 
2716 	  /* Now add to the end of SUBSTED.  */
2717 	  if (add_stuff)
2718 	    {
2719 	      memcpy (substed + substed_len, add_stuff, add_len);
2720 	      substed_len += add_len;
2721 	    }
2722 	}
2723 
2724       if (really_changed)
2725 	newtext = make_specified_string ((const char *) substed, -1,
2726 					 substed_len, buf_multibyte);
2727       xfree (substed);
2728     }
2729 
2730   newpoint = sub_start + SCHARS (newtext);
2731   ptrdiff_t newstart = sub_start == sub_end ? newpoint : sub_start;
2732 
2733   /* Replace the old text with the new in the cleanest possible way.  */
2734   replace_range (sub_start, sub_end, newtext, 1, 0, 1, true);
2735 
2736   if (case_action == all_caps)
2737     Fupcase_region (make_fixnum (search_regs.start[sub]),
2738 		    make_fixnum (newpoint),
2739 		    Qnil);
2740   else if (case_action == cap_initial)
2741     Fupcase_initials_region (make_fixnum (search_regs.start[sub]),
2742 			     make_fixnum (newpoint), Qnil);
2743 
2744   /* The replace_range etc. functions can trigger modification hooks
2745      (see signal_before_change and signal_after_change).  Try to error
2746      out if these hooks clobber the match data since clobbering can
2747      result in confusing bugs.  Although this sanity check does not
2748      catch all possible clobberings, it should catch many of them.  */
2749   if (! (search_regs.num_regs == num_regs
2750 	 && search_regs.start[sub] == newstart
2751 	 && search_regs.end[sub] == newpoint))
2752     error ("Match data clobbered by buffer modification hooks");
2753 
2754   /* Put point back where it was in the text, if possible.  */
2755   TEMP_SET_PT (clip_to_bounds (BEGV, opoint + (opoint <= 0 ? ZV : 0), ZV));
2756   /* Now move point "officially" to the start of the inserted replacement.  */
2757   move_if_not_intangible (newpoint);
2758 
2759   return Qnil;
2760 }
2761 
2762 static Lisp_Object
match_limit(Lisp_Object num,bool beginningp)2763 match_limit (Lisp_Object num, bool beginningp)
2764 {
2765   EMACS_INT n;
2766 
2767   CHECK_FIXNUM (num);
2768   n = XFIXNUM (num);
2769   if (n < 0)
2770     args_out_of_range (num, make_fixnum (0));
2771   if (search_regs.num_regs <= 0)
2772     error ("No match data, because no search succeeded");
2773   if (n >= search_regs.num_regs
2774       || search_regs.start[n] < 0)
2775     return Qnil;
2776   return (make_fixnum ((beginningp) ? search_regs.start[n]
2777 		                    : search_regs.end[n]));
2778 }
2779 
2780 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2781        doc: /* Return position of start of text matched by last search.
2782 SUBEXP, a number, specifies which parenthesized expression in the last
2783   regexp.
2784 Value is nil if SUBEXPth pair didn't match, or there were less than
2785   SUBEXP pairs.
2786 Zero means the entire text matched by the whole regexp or whole string.
2787 
2788 Return value is undefined if the last search failed.  */)
2789   (Lisp_Object subexp)
2790 {
2791   return match_limit (subexp, 1);
2792 }
2793 
2794 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2795        doc: /* Return position of end of text matched by last search.
2796 SUBEXP, a number, specifies which parenthesized expression in the last
2797   regexp.
2798 Value is nil if SUBEXPth pair didn't match, or there were less than
2799   SUBEXP pairs.
2800 Zero means the entire text matched by the whole regexp or whole string.
2801 
2802 Return value is undefined if the last search failed.  */)
2803   (Lisp_Object subexp)
2804 {
2805   return match_limit (subexp, 0);
2806 }
2807 
2808 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2809        doc: /* Return a list describing what the last search matched.
2810 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2811 All the elements are markers or nil (nil if the Nth pair didn't match)
2812 if the last match was on a buffer; integers or nil if a string was matched.
2813 Use `set-match-data' to reinstate the data in this list.
2814 
2815 If INTEGERS (the optional first argument) is non-nil, always use
2816 integers (rather than markers) to represent buffer positions.  In
2817 this case, and if the last match was in a buffer, the buffer will get
2818 stored as one additional element at the end of the list.
2819 
2820 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2821 enough to hold all the values, and if INTEGERS is non-nil, no consing
2822 is done.
2823 
2824 If optional third arg RESEAT is non-nil, any previous markers on the
2825 REUSE list will be modified to point to nowhere.
2826 
2827 Return value is undefined if the last search failed.  */)
2828   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2829 {
2830   Lisp_Object tail, prev;
2831   Lisp_Object *data;
2832   ptrdiff_t i, len;
2833 
2834   if (!NILP (reseat))
2835     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2836       if (MARKERP (XCAR (tail)))
2837 	{
2838 	  unchain_marker (XMARKER (XCAR (tail)));
2839 	  XSETCAR (tail, Qnil);
2840 	}
2841 
2842   if (NILP (last_thing_searched))
2843     return Qnil;
2844 
2845   prev = Qnil;
2846 
2847   USE_SAFE_ALLOCA;
2848   SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
2849 
2850   len = 0;
2851   for (i = 0; i < search_regs.num_regs; i++)
2852     {
2853       ptrdiff_t start = search_regs.start[i];
2854       if (start >= 0)
2855 	{
2856 	  if (EQ (last_thing_searched, Qt)
2857 	      || ! NILP (integers))
2858 	    {
2859 	      XSETFASTINT (data[2 * i], start);
2860 	      XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2861 	    }
2862 	  else if (BUFFERP (last_thing_searched))
2863 	    {
2864 	      data[2 * i] = Fmake_marker ();
2865 	      Fset_marker (data[2 * i],
2866 			   make_fixnum (start),
2867 			   last_thing_searched);
2868 	      data[2 * i + 1] = Fmake_marker ();
2869 	      Fset_marker (data[2 * i + 1],
2870 			   make_fixnum (search_regs.end[i]),
2871 			   last_thing_searched);
2872 	    }
2873 	  else
2874 	    /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2875 	    emacs_abort ();
2876 
2877 	  len = 2 * i + 2;
2878 	}
2879       else
2880 	data[2 * i] = data[2 * i + 1] = Qnil;
2881     }
2882 
2883   if (BUFFERP (last_thing_searched) && !NILP (integers))
2884     {
2885       data[len] = last_thing_searched;
2886       len++;
2887     }
2888 
2889   /* If REUSE is not usable, cons up the values and return them.  */
2890   if (! CONSP (reuse))
2891     reuse = Flist (len, data);
2892   else
2893     {
2894       /* If REUSE is a list, store as many value elements as will fit
2895 	 into the elements of REUSE.  */
2896       for (i = 0, tail = reuse; CONSP (tail);
2897 	   i++, tail = XCDR (tail))
2898 	{
2899 	  if (i < len)
2900 	    XSETCAR (tail, data[i]);
2901 	  else
2902 	    XSETCAR (tail, Qnil);
2903 	  prev = tail;
2904 	}
2905 
2906       /* If we couldn't fit all value elements into REUSE,
2907 	 cons up the rest of them and add them to the end of REUSE.  */
2908       if (i < len)
2909 	XSETCDR (prev, Flist (len - i, data + i));
2910     }
2911 
2912   SAFE_FREE ();
2913   return reuse;
2914 }
2915 
2916 /* We used to have an internal use variant of `reseat' described as:
2917 
2918       If RESEAT is `evaporate', put the markers back on the free list
2919       immediately.  No other references to the markers must exist in this
2920       case, so it is used only internally on the unwind stack and
2921       save-match-data from Lisp.
2922 
2923    But it was ill-conceived: those supposedly-internal markers get exposed via
2924    the undo-list, so freeing them here is unsafe.  */
2925 
2926 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2927        doc: /* Set internal data on last search match from elements of LIST.
2928 LIST should have been created by calling `match-data' previously.
2929 
2930 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2931   (register Lisp_Object list, Lisp_Object reseat)
2932 {
2933   ptrdiff_t i;
2934   register Lisp_Object marker;
2935 
2936   if (running_asynch_code)
2937     save_search_regs ();
2938 
2939   CHECK_LIST (list);
2940 
2941   /* Unless we find a marker with a buffer or an explicit buffer
2942      in LIST, assume that this match data came from a string.  */
2943   last_thing_searched = Qt;
2944 
2945   /* Allocate registers if they don't already exist.  */
2946   {
2947     ptrdiff_t length = list_length (list) / 2;
2948 
2949     if (length > search_regs.num_regs)
2950       {
2951 	ptrdiff_t num_regs = search_regs.num_regs;
2952 	search_regs.start =
2953 	  xpalloc (search_regs.start, &num_regs, length - num_regs,
2954 		   min (PTRDIFF_MAX, UINT_MAX), sizeof *search_regs.start);
2955 	search_regs.end =
2956 	  xrealloc (search_regs.end, num_regs * sizeof *search_regs.end);
2957 
2958 	for (i = search_regs.num_regs; i < num_regs; i++)
2959 	  search_regs.start[i] = -1;
2960 
2961 	search_regs.num_regs = num_regs;
2962       }
2963 
2964     for (i = 0; CONSP (list); i++)
2965       {
2966 	marker = XCAR (list);
2967 	if (BUFFERP (marker))
2968 	  {
2969 	    last_thing_searched = marker;
2970 	    break;
2971 	  }
2972 	if (i >= length)
2973 	  break;
2974 	if (NILP (marker))
2975 	  {
2976 	    search_regs.start[i] = -1;
2977 	    list = XCDR (list);
2978 	  }
2979 	else
2980 	  {
2981 	    Lisp_Object from;
2982 	    Lisp_Object m;
2983 
2984 	    m = marker;
2985 	    if (MARKERP (marker))
2986 	      {
2987 		if (XMARKER (marker)->buffer == 0)
2988 		  XSETFASTINT (marker, 0);
2989 		else
2990 		  XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2991 	      }
2992 
2993 	    CHECK_FIXNUM_COERCE_MARKER (marker);
2994 	    from = marker;
2995 
2996 	    if (!NILP (reseat) && MARKERP (m))
2997 	      {
2998 		unchain_marker (XMARKER (m));
2999 		XSETCAR (list, Qnil);
3000 	      }
3001 
3002 	    if ((list = XCDR (list), !CONSP (list)))
3003 	      break;
3004 
3005 	    m = marker = XCAR (list);
3006 
3007 	    if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3008 	      XSETFASTINT (marker, 0);
3009 
3010 	    CHECK_FIXNUM_COERCE_MARKER (marker);
3011 	    if (PTRDIFF_MIN <= XFIXNUM (from) && XFIXNUM (from) <= PTRDIFF_MAX
3012 		&& PTRDIFF_MIN <= XFIXNUM (marker)
3013 		&& XFIXNUM (marker) <= PTRDIFF_MAX)
3014 	      {
3015 		search_regs.start[i] = XFIXNUM (from);
3016 		search_regs.end[i] = XFIXNUM (marker);
3017 	      }
3018 	    else
3019 	      {
3020 		search_regs.start[i] = -1;
3021 	      }
3022 
3023 	    if (!NILP (reseat) && MARKERP (m))
3024 	      {
3025 		unchain_marker (XMARKER (m));
3026 		XSETCAR (list, Qnil);
3027 	      }
3028 	  }
3029 	list = XCDR (list);
3030       }
3031 
3032     for (; i < search_regs.num_regs; i++)
3033       search_regs.start[i] = -1;
3034   }
3035 
3036   return Qnil;
3037 }
3038 
3039 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3040    if asynchronous code (filter or sentinel) is running. */
3041 static void
save_search_regs(void)3042 save_search_regs (void)
3043 {
3044   if (saved_search_regs.num_regs == 0)
3045     {
3046       saved_search_regs = search_regs;
3047       saved_last_thing_searched = last_thing_searched;
3048       last_thing_searched = Qnil;
3049       search_regs.num_regs = 0;
3050       search_regs.start = 0;
3051       search_regs.end = 0;
3052     }
3053 }
3054 
3055 /* Called upon exit from filters and sentinels. */
3056 void
restore_search_regs(void)3057 restore_search_regs (void)
3058 {
3059   if (saved_search_regs.num_regs != 0)
3060     {
3061       if (search_regs.num_regs > 0)
3062 	{
3063 	  xfree (search_regs.start);
3064 	  xfree (search_regs.end);
3065 	}
3066       search_regs = saved_search_regs;
3067       last_thing_searched = saved_last_thing_searched;
3068       saved_last_thing_searched = Qnil;
3069       saved_search_regs.num_regs = 0;
3070     }
3071 }
3072 
3073 /* Called from replace-match via replace_range.  */
3074 void
update_search_regs(ptrdiff_t oldstart,ptrdiff_t oldend,ptrdiff_t newend)3075 update_search_regs (ptrdiff_t oldstart, ptrdiff_t oldend, ptrdiff_t newend)
3076 {
3077   /* Adjust search data for this change.  */
3078   ptrdiff_t change = newend - oldend;
3079   ptrdiff_t i;
3080 
3081   for (i = 0; i < search_regs.num_regs; i++)
3082     {
3083       if (search_regs.start[i] >= oldend)
3084         search_regs.start[i] += change;
3085       else if (search_regs.start[i] > oldstart)
3086         search_regs.start[i] = oldstart;
3087       if (search_regs.end[i] >= oldend)
3088         search_regs.end[i] += change;
3089       else if (search_regs.end[i] > oldstart)
3090         search_regs.end[i] = oldstart;
3091     }
3092 }
3093 
3094 static void
unwind_set_match_data(Lisp_Object list)3095 unwind_set_match_data (Lisp_Object list)
3096 {
3097   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3098   Fset_match_data (list, Qt);
3099 }
3100 
3101 /* Called to unwind protect the match data.  */
3102 void
record_unwind_save_match_data(void)3103 record_unwind_save_match_data (void)
3104 {
3105   record_unwind_protect (unwind_set_match_data,
3106 			 Fmatch_data (Qnil, Qnil, Qnil));
3107 }
3108 
3109 /* Quote a string to deactivate reg-expr chars */
3110 
3111 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3112        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3113   (Lisp_Object string)
3114 {
3115   char *in, *out, *end;
3116   char *temp;
3117   ptrdiff_t backslashes_added = 0;
3118 
3119   CHECK_STRING (string);
3120 
3121   USE_SAFE_ALLOCA;
3122   SAFE_NALLOCA (temp, 2, SBYTES (string));
3123 
3124   /* Now copy the data into the new string, inserting escapes. */
3125 
3126   in = SSDATA (string);
3127   end = in + SBYTES (string);
3128   out = temp;
3129 
3130   for (; in != end; in++)
3131     {
3132       if (*in == '['
3133 	  || *in == '*' || *in == '.' || *in == '\\'
3134 	  || *in == '?' || *in == '+'
3135 	  || *in == '^' || *in == '$')
3136 	*out++ = '\\', backslashes_added++;
3137       *out++ = *in;
3138     }
3139 
3140   Lisp_Object result
3141     = (backslashes_added > 0
3142        ? make_specified_string (temp,
3143                                 SCHARS (string) + backslashes_added,
3144                                 out - temp,
3145                                 STRING_MULTIBYTE (string))
3146        : string);
3147   SAFE_FREE ();
3148   return result;
3149 }
3150 
3151 /* Like find_newline, but doesn't use the cache, and only searches forward.  */
3152 static ptrdiff_t
find_newline1(ptrdiff_t start,ptrdiff_t start_byte,ptrdiff_t end,ptrdiff_t end_byte,ptrdiff_t count,ptrdiff_t * counted,ptrdiff_t * bytepos,bool allow_quit)3153 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3154 	       ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *counted,
3155 	       ptrdiff_t *bytepos, bool allow_quit)
3156 {
3157   if (count > 0)
3158     {
3159       if (!end)
3160 	end = ZV, end_byte = ZV_BYTE;
3161     }
3162   else
3163     {
3164       if (!end)
3165 	end = BEGV, end_byte = BEGV_BYTE;
3166     }
3167   if (end_byte == -1)
3168     end_byte = CHAR_TO_BYTE (end);
3169 
3170   if (counted)
3171     *counted = count;
3172 
3173   if (count > 0)
3174     while (start != end)
3175       {
3176         /* Our innermost scanning loop is very simple; it doesn't know
3177            about gaps, buffer ends, or the newline cache.  ceiling is
3178            the position of the last character before the next such
3179            obstacle --- the last character the dumb search loop should
3180            examine.  */
3181 	ptrdiff_t tem, ceiling_byte = end_byte - 1;
3182 
3183 	if (start_byte == -1)
3184 	  start_byte = CHAR_TO_BYTE (start);
3185 
3186         /* The dumb loop can only scan text stored in contiguous
3187            bytes. BUFFER_CEILING_OF returns the last character
3188            position that is contiguous, so the ceiling is the
3189            position after that.  */
3190 	tem = BUFFER_CEILING_OF (start_byte);
3191 	ceiling_byte = min (tem, ceiling_byte);
3192 
3193         {
3194           /* The termination address of the dumb loop.  */
3195 	  unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3196 	  ptrdiff_t lim_byte = ceiling_byte + 1;
3197 
3198 	  /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3199 	     of the base, the cursor, and the next line.  */
3200 	  ptrdiff_t base = start_byte - lim_byte;
3201 	  ptrdiff_t cursor, next;
3202 
3203 	  for (cursor = base; cursor < 0; cursor = next)
3204 	    {
3205               /* The dumb loop.  */
3206 	      unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3207 	      next = nl ? nl - lim_addr : 0;
3208 
3209               if (! nl)
3210 		break;
3211 	      next++;
3212 
3213 	      if (--count == 0)
3214 		{
3215 		  if (bytepos)
3216 		    *bytepos = lim_byte + next;
3217 		  return BYTE_TO_CHAR (lim_byte + next);
3218 		}
3219 	      if (allow_quit)
3220 		maybe_quit ();
3221             }
3222 
3223 	  start_byte = lim_byte;
3224 	  start = BYTE_TO_CHAR (start_byte);
3225         }
3226       }
3227 
3228   if (counted)
3229     *counted -= count;
3230   if (bytepos)
3231     {
3232       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3233       eassert (*bytepos == CHAR_TO_BYTE (start));
3234     }
3235   return start;
3236 }
3237 
3238 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3239        0, 1, 0,
3240        doc: /* Check the newline cache of BUFFER against buffer contents.
3241 
3242 BUFFER defaults to the current buffer.
3243 
3244 Value is an array of 2 sub-arrays of buffer positions for newlines,
3245 the first based on the cache, the second based on actually scanning
3246 the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
3247   (Lisp_Object buffer)
3248 {
3249   struct buffer *buf, *old = NULL;
3250   ptrdiff_t nl_count_cache, nl_count_buf;
3251   Lisp_Object cache_newlines, buf_newlines, val;
3252   ptrdiff_t from, found, i;
3253 
3254   if (NILP (buffer))
3255     buf = current_buffer;
3256   else
3257     {
3258       CHECK_BUFFER (buffer);
3259       buf = XBUFFER (buffer);
3260       old = current_buffer;
3261     }
3262   if (buf->base_buffer)
3263     buf = buf->base_buffer;
3264 
3265   /* If the buffer doesn't have a newline cache, return nil.  */
3266   if (NILP (BVAR (buf, cache_long_scans))
3267       || buf->newline_cache == NULL)
3268     return Qnil;
3269 
3270   /* find_newline can only work on the current buffer.  */
3271   if (old != NULL)
3272     set_buffer_internal_1 (buf);
3273 
3274   /* How many newlines are there according to the cache?  */
3275   find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3276 		TYPE_MAXIMUM (ptrdiff_t), &nl_count_cache, NULL, true);
3277 
3278   /* Create vector and populate it.  */
3279   cache_newlines = make_uninit_vector (nl_count_cache);
3280 
3281   if (nl_count_cache)
3282     {
3283       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3284 	{
3285 	  ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
3286 
3287 	  found = find_newline (from, from_byte, 0, -1, 1, &counted,
3288 				NULL, true);
3289 	  if (counted == 0 || i >= nl_count_cache)
3290 	    break;
3291 	  ASET (cache_newlines, i, make_fixnum (found - 1));
3292 	}
3293       /* Fill the rest of slots with an invalid position.  */
3294       for ( ; i < nl_count_cache; i++)
3295 	ASET (cache_newlines, i, make_fixnum (-1));
3296     }
3297 
3298   /* Now do the same, but without using the cache.  */
3299   find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3300 		 TYPE_MAXIMUM (ptrdiff_t), &nl_count_buf, NULL, true);
3301   buf_newlines = make_uninit_vector (nl_count_buf);
3302   if (nl_count_buf)
3303     {
3304       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3305 	{
3306 	  ptrdiff_t from_byte = CHAR_TO_BYTE (from), counted;
3307 
3308 	  found = find_newline1 (from, from_byte, 0, -1, 1, &counted,
3309 				 NULL, true);
3310 	  if (counted == 0 || i >= nl_count_buf)
3311 	    break;
3312 	  ASET (buf_newlines, i, make_fixnum (found - 1));
3313 	}
3314       for ( ; i < nl_count_buf; i++)
3315 	ASET (buf_newlines, i, make_fixnum (-1));
3316     }
3317 
3318   /* Construct the value and return it.  */
3319   val = make_uninit_vector (2);
3320   ASET (val, 0, cache_newlines);
3321   ASET (val, 1, buf_newlines);
3322 
3323   if (old != NULL)
3324     set_buffer_internal_1 (old);
3325   return val;
3326 }
3327 
3328 
3329 static void syms_of_search_for_pdumper (void);
3330 
3331 void
syms_of_search(void)3332 syms_of_search (void)
3333 {
3334   for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
3335     {
3336       staticpro (&searchbufs[i].regexp);
3337       staticpro (&searchbufs[i].f_whitespace_regexp);
3338       staticpro (&searchbufs[i].syntax_table);
3339     }
3340 
3341   /* Error condition used for failing searches.  */
3342   DEFSYM (Qsearch_failed, "search-failed");
3343 
3344   /* Error condition used for failing searches started by user, i.e.,
3345      where failure should not invoke the debugger.  */
3346   DEFSYM (Quser_search_failed, "user-search-failed");
3347 
3348   /* Error condition signaled when regexp compile_pattern fails.  */
3349   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3350 
3351   Fput (Qsearch_failed, Qerror_conditions,
3352 	pure_list (Qsearch_failed, Qerror));
3353   Fput (Qsearch_failed, Qerror_message,
3354 	build_pure_c_string ("Search failed"));
3355 
3356   Fput (Quser_search_failed, Qerror_conditions,
3357 	pure_list (Quser_search_failed, Quser_error, Qsearch_failed, Qerror));
3358   Fput (Quser_search_failed, Qerror_message,
3359         build_pure_c_string ("Search failed"));
3360 
3361   Fput (Qinvalid_regexp, Qerror_conditions,
3362 	pure_list (Qinvalid_regexp, Qerror));
3363   Fput (Qinvalid_regexp, Qerror_message,
3364 	build_pure_c_string ("Invalid regexp"));
3365 
3366   re_match_object = Qnil;
3367   staticpro (&re_match_object);
3368 
3369   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3370       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3371 Some commands use this for user-specified regexps.
3372 Spaces that occur inside character classes or repetition operators
3373 or other such regexp constructs are not replaced with this.
3374 A value of nil (which is the normal value) means treat spaces
3375 literally.  Note that a value with capturing groups can change the
3376 numbering of existing capture groups in unexpected ways.  */);
3377   Vsearch_spaces_regexp = Qnil;
3378 
3379   DEFSYM (Qinhibit_changing_match_data, "inhibit-changing-match-data");
3380   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3381       doc: /* Internal use only.
3382 If non-nil, the primitive searching and matching functions
3383 such as `looking-at', `string-match', `re-search-forward', etc.,
3384 do not set the match data.  The proper way to use this variable
3385 is to bind it with `let' around a small expression.  */);
3386   Vinhibit_changing_match_data = Qnil;
3387 
3388   defsubr (&Slooking_at);
3389   defsubr (&Sposix_looking_at);
3390   defsubr (&Sstring_match);
3391   defsubr (&Sposix_string_match);
3392   defsubr (&Ssearch_forward);
3393   defsubr (&Ssearch_backward);
3394   defsubr (&Sre_search_forward);
3395   defsubr (&Sre_search_backward);
3396   defsubr (&Sposix_search_forward);
3397   defsubr (&Sposix_search_backward);
3398   defsubr (&Sreplace_match);
3399   defsubr (&Smatch_beginning);
3400   defsubr (&Smatch_end);
3401   defsubr (&Smatch_data);
3402   defsubr (&Sset_match_data);
3403   defsubr (&Sregexp_quote);
3404   defsubr (&Snewline_cache_check);
3405 
3406   pdumper_do_now_and_after_load (syms_of_search_for_pdumper);
3407 }
3408 
3409 static void
syms_of_search_for_pdumper(void)3410 syms_of_search_for_pdumper (void)
3411 {
3412   for (int i = 0; i < REGEXP_CACHE_SIZE; ++i)
3413     {
3414       searchbufs[i].buf.allocated = 100;
3415       searchbufs[i].buf.buffer = xmalloc (100);
3416       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3417       searchbufs[i].regexp = Qnil;
3418       searchbufs[i].f_whitespace_regexp = Qnil;
3419       searchbufs[i].busy = false;
3420       searchbufs[i].syntax_table = Qnil;
3421       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3422     }
3423   searchbuf_head = &searchbufs[0];
3424 }
3425