1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2    Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2021 Free
3    Software Foundation, Inc.
4 
5 This file is part of GNU Emacs.
6 
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
11 
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 GNU General Public License for more details.
16 
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs.  If not, see <https://www.gnu.org/licenses/>.  */
19 
20 #include <config.h>
21 
22 #include "lisp.h"
23 #include "character.h"
24 #include "buffer.h"
25 #include "regex-emacs.h"
26 #include "syntax.h"
27 #include "intervals.h"
28 #include "category.h"
29 
30 /* Make syntax table lookup grant data in gl_state.  */
31 #define SYNTAX(c) syntax_property (c, 1)
32 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
33 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
34 
35 /* Eight single-bit flags have the following meanings:
36   1. This character is the first of a two-character comment-start sequence.
37   2. This character is the second of a two-character comment-start sequence.
38   3. This character is the first of a two-character comment-end sequence.
39   4. This character is the second of a two-character comment-end sequence.
40   5. This character is a prefix, for backward-prefix-chars.
41   6. The char is part of a delimiter for comments of style "b".
42   7. This character is part of a nestable comment sequence.
43   8. The char is part of a delimiter for comments of style "c".
44   Note that any two-character sequence whose first character has flag 1
45   and whose second character has flag 2 will be interpreted as a comment start.
46 
47   Bits 6 and 8 discriminate among different comment styles.
48   Languages such as C++ allow two orthogonal syntax start/end pairs
49   and bit 6 determines whether a comment-end or Scommentend
50   ends style a or b.  Comment markers can start style a, b, c, or bc.
51   Style a is always the default.
52   For 2-char comment markers, the style b flag is looked up only on the second
53   char of the comment marker and on the first char of the comment ender.
54   For style c (like the nested flag), the flag can be placed on any of
55   the chars.  */
56 
57 /* These functions extract specific flags from an integer
58    that holds the syntax code and the flags.  */
59 
60 static bool
SYNTAX_FLAGS_COMSTART_FIRST(int flags)61 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
62 {
63   return (flags >> 16) & 1;
64 }
65 static bool
SYNTAX_FLAGS_COMSTART_SECOND(int flags)66 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
67 {
68   return (flags >> 17) & 1;
69 }
70 static bool
SYNTAX_FLAGS_COMEND_FIRST(int flags)71 SYNTAX_FLAGS_COMEND_FIRST (int flags)
72 {
73   return (flags >> 18) & 1;
74 }
75 static bool
SYNTAX_FLAGS_COMEND_SECOND(int flags)76 SYNTAX_FLAGS_COMEND_SECOND (int flags)
77 {
78   return (flags >> 19) & 1;
79 }
80 static bool
SYNTAX_FLAGS_COMSTARTEND_FIRST(int flags)81 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
82 {
83   return (flags & 0x50000) != 0;
84 }
85 static bool
SYNTAX_FLAGS_PREFIX(int flags)86 SYNTAX_FLAGS_PREFIX (int flags)
87 {
88   return (flags >> 20) & 1;
89 }
90 static bool
SYNTAX_FLAGS_COMMENT_STYLEB(int flags)91 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
92 {
93   return (flags >> 21) & 1;
94 }
95 static bool
SYNTAX_FLAGS_COMMENT_STYLEC(int flags)96 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
97 {
98   return (flags >> 23) & 1;
99 }
100 static int
SYNTAX_FLAGS_COMMENT_STYLEC2(int flags)101 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
102 {
103   return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
104 }
105 static bool
SYNTAX_FLAGS_COMMENT_NESTED(int flags)106 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
107 {
108   return (flags >> 22) & 1;
109 }
110 
111 /* FLAGS should be the flags of the main char of the comment marker, e.g.
112    the second for comstart and the first for comend.  */
113 static int
SYNTAX_FLAGS_COMMENT_STYLE(int flags,int other_flags)114 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
115 {
116   return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
117 	  | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
118 	  | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
119 }
120 
121 /* Extract a particular flag for a given character.  */
122 
123 static bool
SYNTAX_COMEND_FIRST(int c)124 SYNTAX_COMEND_FIRST (int c)
125 {
126   return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
127 }
128 
129 /* We use these constants in place for comment-style and
130    string-ender-char to distinguish comments/strings started by
131    comment_fence and string_fence codes.  */
132 
133 enum
134   {
135     ST_COMMENT_STYLE = 256 + 1,
136     ST_STRING_STYLE = 256 + 2
137   };
138 
139 /* This is the internal form of the parse state used in parse-partial-sexp.  */
140 
141 struct lisp_parse_state
142   {
143     EMACS_INT depth;	/* Depth at end of parsing.  */
144     int instring;  /* -1 if not within string, else desired terminator.  */
145     EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
146     int comstyle;  /* comment style a=0, or b=1, or ST_COMMENT_STYLE.  */
147     bool quoted;   /* True if just after an escape char at end of parsing.  */
148     EMACS_INT mindepth;	/* Minimum depth seen while scanning.  */
149     /* Char number of most recent start-of-expression at current level */
150     ptrdiff_t thislevelstart;
151     /* Char number of start of containing expression */
152     ptrdiff_t prevlevelstart;
153     ptrdiff_t location;	     /* Char number at which parsing stopped.  */
154     ptrdiff_t location_byte; /* Corresponding byte position.  */
155     ptrdiff_t comstr_start;  /* Position of last comment/string starter.  */
156     Lisp_Object levelstarts; /* Char numbers of starts-of-expression
157 				of levels (starting from outermost).  */
158     int prev_syntax; /* Syntax of previous position scanned, when
159                         that position (potentially) holds the first char
160                         of a 2-char construct, i.e. comment delimiter
161                         or Sescape, etc.  Smax otherwise. */
162   };
163 
164 /* These variables are a cache for finding the start of a defun.
165    find_start_pos is the place for which the defun start was found.
166    find_start_value is the defun start position found for it.
167    find_start_value_byte is the corresponding byte position.
168    find_start_buffer is the buffer it was found in.
169    find_start_begv is the BEGV value when it was found.
170    find_start_modiff is the value of MODIFF when it was found.  */
171 
172 static ptrdiff_t find_start_pos;
173 static ptrdiff_t find_start_value;
174 static ptrdiff_t find_start_value_byte;
175 static struct buffer *find_start_buffer;
176 static ptrdiff_t find_start_begv;
177 static modiff_count find_start_modiff;
178 
179 
180 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
181 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
182 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
183 static void scan_sexps_forward (struct lisp_parse_state *,
184                                 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
185                                 bool, int);
186 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
187 static bool in_classes (int, Lisp_Object);
188 static void parse_sexp_propertize (ptrdiff_t charpos);
189 
190 /* This setter is used only in this file, so it can be private.  */
191 static void
bset_syntax_table(struct buffer * b,Lisp_Object val)192 bset_syntax_table (struct buffer *b, Lisp_Object val)
193 {
194   b->syntax_table_ = val;
195 }
196 
197 /* Whether the syntax of the character C has the prefix flag set.  */
198 bool
syntax_prefix_flag_p(int c)199 syntax_prefix_flag_p (int c)
200 {
201   return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
202 }
203 
204 struct gl_state_s gl_state;		/* Global state of syntax parser.  */
205 
206 enum { INTERVALS_AT_ONCE = 10 };	/* 1 + max-number of intervals
207 					   to scan to property-change.  */
208 
209 /* Set the syntax entry VAL for char C in table TABLE.  */
210 
211 static void
SET_RAW_SYNTAX_ENTRY(Lisp_Object table,int c,Lisp_Object val)212 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
213 {
214   CHAR_TABLE_SET (table, c, val);
215 }
216 
217 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
218    RANGE is a cons (FROM . TO) specifying the range of characters.  */
219 
220 static void
SET_RAW_SYNTAX_ENTRY_RANGE(Lisp_Object table,Lisp_Object range,Lisp_Object val)221 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
222 			    Lisp_Object val)
223 {
224   Fset_char_table_range (table, range, val);
225 }
226 
227 /* Extract the information from the entry for character C
228    in the current syntax table.  */
229 
230 static Lisp_Object
SYNTAX_MATCH(int c)231 SYNTAX_MATCH (int c)
232 {
233   Lisp_Object ent = SYNTAX_ENTRY (c);
234   return CONSP (ent) ? XCDR (ent) : Qnil;
235 }
236 
237 /* This should be called with FROM at the start of forward
238    search, or after the last position of the backward search.  It
239    makes sure that the first char is picked up with correct table, so
240    one does not need to call UPDATE_SYNTAX_TABLE immediately after the
241    call.
242    Sign of COUNT gives the direction of the search.
243  */
244 
245 static void
SETUP_SYNTAX_TABLE(ptrdiff_t from,ptrdiff_t count)246 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
247 {
248   SETUP_BUFFER_SYNTAX_TABLE ();
249   gl_state.b_property = BEGV;
250   gl_state.e_property = ZV + 1;
251   gl_state.object = Qnil;
252   gl_state.offset = 0;
253   if (parse_sexp_lookup_properties)
254     {
255       if (count > 0)
256 	update_syntax_table_forward (from, true, Qnil);
257       else if (from > BEGV)
258 	{
259 	  update_syntax_table (from - 1, count, true, Qnil);
260 	  parse_sexp_propertize (from - 1);
261 	}
262     }
263 }
264 
265 /* Same as above, but in OBJECT.  If OBJECT is nil, use current buffer.
266    If it is t (which is only used in fast_c_string_match_ignore_case),
267    ignore properties altogether.
268 
269    This is meant for regex-emacs.c to use.  For buffers, regex-emacs.c
270    passes arguments to the UPDATE_SYNTAX_TABLE functions which are
271    relative to BEGV.  So if it is a buffer, we set the offset field to
272    BEGV.  */
273 
274 void
SETUP_SYNTAX_TABLE_FOR_OBJECT(Lisp_Object object,ptrdiff_t from,ptrdiff_t count)275 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
276 			       ptrdiff_t from, ptrdiff_t count)
277 {
278   SETUP_BUFFER_SYNTAX_TABLE ();
279   gl_state.object = object;
280   if (BUFFERP (gl_state.object))
281     {
282       struct buffer *buf = XBUFFER (gl_state.object);
283       gl_state.b_property = 1;
284       gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
285       gl_state.offset = BUF_BEGV (buf) - 1;
286     }
287   else if (NILP (gl_state.object))
288     {
289       gl_state.b_property = 1;
290       gl_state.e_property = ZV - BEGV + 1;
291       gl_state.offset = BEGV - 1;
292     }
293   else if (EQ (gl_state.object, Qt))
294     {
295       gl_state.b_property = 0;
296       gl_state.e_property = PTRDIFF_MAX;
297       gl_state.offset = 0;
298     }
299   else
300     {
301       gl_state.b_property = 0;
302       gl_state.e_property = 1 + SCHARS (gl_state.object);
303       gl_state.offset = 0;
304     }
305   if (parse_sexp_lookup_properties)
306     update_syntax_table (from + gl_state.offset - (count <= 0),
307 			 count, 1, gl_state.object);
308 }
309 
310 /* Update gl_state to an appropriate interval which contains CHARPOS.  The
311    sign of COUNT gives the relative position of CHARPOS wrt the previously
312    valid interval.  If INIT, only [be]_property fields of gl_state are
313    valid at start, the rest is filled basing on OBJECT.
314 
315    `gl_state.*_i' are the intervals, and CHARPOS is further in the search
316    direction than the intervals - or in an interval.  We update the
317    current syntax-table basing on the property of this interval, and
318    update the interval to start further than CHARPOS - or be
319    NULL.  We also update lim_property to be the next value of
320    charpos to call this subroutine again - or be before/after the
321    start/end of OBJECT.  */
322 
323 void
update_syntax_table(ptrdiff_t charpos,EMACS_INT count,bool init,Lisp_Object object)324 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
325 		     Lisp_Object object)
326 {
327   Lisp_Object tmp_table;
328   int cnt = 0;
329   bool invalidate = true;
330   INTERVAL i;
331 
332   if (init)
333     {
334       gl_state.old_prop = Qnil;
335       gl_state.start = gl_state.b_property;
336       gl_state.stop = gl_state.e_property;
337       i = interval_of (charpos, object);
338       gl_state.backward_i = gl_state.forward_i = i;
339       invalidate = false;
340       if (!i)
341 	return;
342       i = gl_state.forward_i;
343       gl_state.b_property = i->position - gl_state.offset;
344       gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
345     }
346   else
347     {
348       i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
349 
350       /* We are guaranteed to be called with CHARPOS either in i,
351          or further off.  */
352       if (!i)
353         error ("Error in syntax_table logic for to-the-end intervals");
354       else if (charpos < i->position)		/* Move left.  */
355         {
356           if (count > 0)
357 	    error ("Error in syntax_table logic for intervals <-");
358           /* Update the interval.  */
359           i = update_interval (i, charpos);
360           if (INTERVAL_LAST_POS (i) != gl_state.b_property)
361 	    {
362 	      invalidate = false;
363 	      gl_state.forward_i = i;
364 	      gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
365 	    }
366         }
367       else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right.  */
368         {
369           if (count < 0)
370 	    error ("Error in syntax_table logic for intervals ->");
371           /* Update the interval.  */
372           i = update_interval (i, charpos);
373           if (i->position != gl_state.e_property)
374 	    {
375 	      invalidate = false;
376 	      gl_state.backward_i = i;
377 	      gl_state.b_property = i->position - gl_state.offset;
378 	    }
379         }
380     }
381 
382   tmp_table = textget (i->plist, Qsyntax_table);
383 
384   if (invalidate)
385     invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
386 
387   if (invalidate)		/* Did not get to adjacent interval.  */
388     {				/* with the same table => */
389 				/* invalidate the old range.  */
390       if (count > 0)
391 	{
392 	  gl_state.backward_i = i;
393 	  gl_state.b_property = i->position - gl_state.offset;
394 	}
395       else
396 	{
397 	  gl_state.forward_i = i;
398 	  gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
399 	}
400     }
401 
402   if (!EQ (tmp_table, gl_state.old_prop))
403     {
404       gl_state.current_syntax_table = tmp_table;
405       gl_state.old_prop = tmp_table;
406       if (EQ (Fsyntax_table_p (tmp_table), Qt))
407 	{
408 	  gl_state.use_global = 0;
409 	}
410       else if (CONSP (tmp_table))
411 	{
412 	  gl_state.use_global = 1;
413 	  gl_state.global_code = tmp_table;
414 	}
415       else
416 	{
417 	  gl_state.use_global = 0;
418 	  gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
419 	}
420     }
421 
422   while (i)
423     {
424       if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
425 	{
426 	  if (count > 0)
427 	    {
428 	      gl_state.e_property = i->position - gl_state.offset;
429 	      gl_state.forward_i = i;
430 	    }
431 	  else
432 	    {
433 	      gl_state.b_property
434 		= i->position + LENGTH (i) - gl_state.offset;
435 	      gl_state.backward_i = i;
436 	    }
437 	  return;
438 	}
439       else if (cnt == INTERVALS_AT_ONCE)
440 	{
441 	  if (count > 0)
442 	    {
443 	      gl_state.e_property
444 		= i->position + LENGTH (i) - gl_state.offset
445 		/* e_property at EOB is not set to ZV but to ZV+1, so that
446 		   we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
447 		   having to check eob between the two.  */
448 		+ (next_interval (i) ? 0 : 1);
449 	      gl_state.forward_i = i;
450 	    }
451 	  else
452 	    {
453 	      gl_state.b_property = i->position - gl_state.offset;
454 	      gl_state.backward_i = i;
455 	    }
456 	  return;
457 	}
458       cnt++;
459       i = count > 0 ? next_interval (i) : previous_interval (i);
460     }
461   eassert (i == NULL); /* This property goes to the end.  */
462   if (count > 0)
463     {
464       gl_state.e_property = gl_state.stop;
465       gl_state.forward_i = i;
466     }
467   else
468     gl_state.b_property = gl_state.start;
469 }
470 
471 static void
parse_sexp_propertize(ptrdiff_t charpos)472 parse_sexp_propertize (ptrdiff_t charpos)
473 {
474   EMACS_INT zv = ZV;
475   if (syntax_propertize__done <= charpos
476       && syntax_propertize__done < zv)
477     {
478       modiff_count modiffs = CHARS_MODIFF;
479       safe_call1 (Qinternal__syntax_propertize,
480 		  make_fixnum (min (zv, 1 + charpos)));
481       if (modiffs != CHARS_MODIFF)
482 	error ("internal--syntax-propertize modified the buffer!");
483       if (syntax_propertize__done <= charpos
484 	  && syntax_propertize__done < zv)
485 	error ("internal--syntax-propertize did not move"
486 	       " syntax-propertize--done");
487       SETUP_SYNTAX_TABLE (charpos, 1);
488     }
489   else if (gl_state.e_property > syntax_propertize__done)
490     {
491       gl_state.e_property = syntax_propertize__done;
492       gl_state.e_property_truncated = true;
493     }
494   else if (gl_state.e_property_truncated
495 	   && gl_state.e_property < syntax_propertize__done)
496     { /* When moving backward, e_property might be set without resetting
497 	 e_property_truncated, so the e_property_truncated flag may
498 	 occasionally be left raised spuriously.  This should be rare.  */
499       gl_state.e_property_truncated = false;
500       update_syntax_table_forward (charpos, false, Qnil);
501     }
502 }
503 
504 void
update_syntax_table_forward(ptrdiff_t charpos,bool init,Lisp_Object object)505 update_syntax_table_forward (ptrdiff_t charpos, bool init,
506 			     Lisp_Object object)
507 {
508   if (gl_state.e_property_truncated)
509     {
510       eassert (NILP (object));
511       eassert (charpos >= gl_state.e_property);
512       parse_sexp_propertize (charpos);
513     }
514   else
515     {
516       update_syntax_table (charpos, 1, init, object);
517       if (NILP (object) && gl_state.e_property > syntax_propertize__done)
518 	parse_sexp_propertize (charpos);
519     }
520 }
521 
522 /* Returns true if char at CHARPOS is quoted.
523    Global syntax-table data should be set up already to be good at CHARPOS
524    or after.  On return global syntax data is good for lookup at CHARPOS.  */
525 
526 static bool
char_quoted(ptrdiff_t charpos,ptrdiff_t bytepos)527 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
528 {
529   enum syntaxcode code;
530   ptrdiff_t beg = BEGV;
531   bool quoted = 0;
532   ptrdiff_t orig = charpos;
533 
534   while (charpos > beg)
535     {
536       int c;
537       dec_both (&charpos, &bytepos);
538 
539       UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
540       c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
541       code = SYNTAX (c);
542       if (! (code == Scharquote || code == Sescape))
543 	break;
544 
545       quoted = !quoted;
546     }
547 
548   UPDATE_SYNTAX_TABLE (orig);
549   return quoted;
550 }
551 
552 /* Return the bytepos one character before BYTEPOS.
553    We assume that BYTEPOS is not at the start of the buffer.  */
554 
555 static ptrdiff_t
dec_bytepos(ptrdiff_t bytepos)556 dec_bytepos (ptrdiff_t bytepos)
557 {
558   return (bytepos
559 	  - (!NILP (BVAR (current_buffer, enable_multibyte_characters))
560 	     ? prev_char_len (bytepos) : 1));
561 }
562 
563 /* Return a defun-start position before POS and not too far before.
564    It should be the last one before POS, or nearly the last.
565 
566    When open_paren_in_column_0_is_defun_start is nonzero,
567    only the beginning of the buffer is treated as a defun-start.
568 
569    We record the information about where the scan started
570    and what its result was, so that another call in the same area
571    can return the same value very quickly.
572 
573    There is no promise at which position the global syntax data is
574    valid on return from the subroutine, so the caller should explicitly
575    update the global data.  */
576 
577 static ptrdiff_t
find_defun_start(ptrdiff_t pos,ptrdiff_t pos_byte)578 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
579 {
580   ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
581 
582   /* Use previous finding, if it's valid and applies to this inquiry.  */
583   if (current_buffer == find_start_buffer
584       /* Reuse the defun-start even if POS is a little farther on.
585 	 POS might be in the next defun, but that's ok.
586 	 Our value may not be the best possible, but will still be usable.  */
587       && pos <= find_start_pos + 1000
588       && pos >= find_start_value
589       && BEGV == find_start_begv
590       && MODIFF == find_start_modiff)
591     return find_start_value;
592 
593   if (!NILP (Vcomment_use_syntax_ppss))
594     {
595       modiff_count modiffs = CHARS_MODIFF;
596       Lisp_Object ppss = call1 (Qsyntax_ppss, make_fixnum (pos));
597       if (modiffs != CHARS_MODIFF)
598 	error ("syntax-ppss modified the buffer!");
599       TEMP_SET_PT_BOTH (opoint, opoint_byte);
600       Lisp_Object boc = Fnth (make_fixnum (8), ppss);
601       if (FIXNUMP (boc))
602         {
603           find_start_value = XFIXNUM (boc);
604           find_start_value_byte = CHAR_TO_BYTE (find_start_value);
605         }
606       else
607         {
608           find_start_value = pos;
609           find_start_value_byte = pos_byte;
610         }
611       goto found;
612     }
613   if (!open_paren_in_column_0_is_defun_start)
614     {
615       find_start_value = BEGV;
616       find_start_value_byte = BEGV_BYTE;
617       goto found;
618     }
619 
620   /* Back up to start of line.  */
621   scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
622 
623   /* We optimize syntax-table lookup for rare updates.  Thus we accept
624      only those `^\s(' which are good in global _and_ text-property
625      syntax-tables.  */
626   SETUP_BUFFER_SYNTAX_TABLE ();
627   while (PT > BEGV)
628     {
629       /* Open-paren at start of line means we may have found our
630 	 defun-start.  */
631       int c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
632       if (SYNTAX (c) == Sopen)
633 	{
634 	  SETUP_SYNTAX_TABLE (PT + 1, -1);	/* Try again... */
635 	  c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
636 	  if (SYNTAX (c) == Sopen)
637 	    break;
638 	  /* Now fallback to the default value.  */
639 	  SETUP_BUFFER_SYNTAX_TABLE ();
640 	}
641       /* Move to beg of previous line.  */
642       scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
643     }
644 
645   /* Record what we found, for the next try.  */
646   find_start_value = PT;
647   find_start_value_byte = PT_BYTE;
648   TEMP_SET_PT_BOTH (opoint, opoint_byte);
649 
650  found:
651   find_start_buffer = current_buffer;
652   find_start_modiff = MODIFF;
653   find_start_begv = BEGV;
654   find_start_pos = pos;
655 
656   return find_start_value;
657 }
658 
659 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE.  */
660 
661 static bool
prev_char_comend_first(ptrdiff_t pos,ptrdiff_t pos_byte)662 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
663 {
664   int c;
665   bool val;
666 
667   dec_both (&pos, &pos_byte);
668   UPDATE_SYNTAX_TABLE_BACKWARD (pos);
669   c = FETCH_CHAR (pos_byte);
670   val = SYNTAX_COMEND_FIRST (c);
671   UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
672   return val;
673 }
674 
675 /* Check whether charpos FROM is at the end of a comment.
676    FROM_BYTE is the bytepos corresponding to FROM.
677    Do not move back before STOP.
678 
679    Return true if we find a comment ending at FROM/FROM_BYTE.
680 
681    If successful, store the charpos of the comment's beginning
682    into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
683 
684    Global syntax data remains valid for backward search starting at
685    the returned value (or at FROM, if the search was not successful).  */
686 
687 static bool
back_comment(ptrdiff_t from,ptrdiff_t from_byte,ptrdiff_t stop,bool comnested,int comstyle,ptrdiff_t * charpos_ptr,ptrdiff_t * bytepos_ptr)688 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
689 	      bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
690 	      ptrdiff_t *bytepos_ptr)
691 {
692   /* Look back, counting the parity of string-quotes,
693      and recording the comment-starters seen.
694      When we reach a safe place, assume that's not in a string;
695      then step the main scan to the earliest comment-starter seen
696      an even number of string quotes away from the safe place.
697 
698      OFROM[I] is position of the earliest comment-starter seen
699      which is I+2X quotes from the comment-end.
700      PARITY is current parity of quotes from the comment end.  */
701   int string_style = -1;	/* Presumed outside of any string.  */
702   bool string_lossage = 0;
703   /* Not a real lossage: indicates that we have passed a matching comment
704      starter plus a non-matching comment-ender, meaning that any matching
705      comment-starter we might see later could be a false positive (hidden
706      inside another comment).
707      Test case:  { a (* b } c (* d *) */
708   bool comment_lossage = 0;
709   ptrdiff_t comment_end = from;
710   ptrdiff_t comment_end_byte = from_byte;
711   ptrdiff_t comstart_pos = 0;
712   ptrdiff_t comstart_byte;
713   /* Place where the containing defun starts,
714      or 0 if we didn't come across it yet.  */
715   ptrdiff_t defun_start = 0;
716   ptrdiff_t defun_start_byte = 0;
717   enum syntaxcode code;
718   ptrdiff_t nesting = 1;		/* Current comment nesting.  */
719   int c;
720   int syntax = 0;
721   unsigned short int quit_count = 0;
722 
723   /* FIXME: A }} comment-ender style leads to incorrect behavior
724      in the case of {{ c }}} because we ignore the last two chars which are
725      assumed to be comment-enders although they aren't.  */
726 
727   /* At beginning of range to scan, we're outside of strings;
728      that determines quote parity to the comment-end.  */
729   while (from != stop)
730     {
731       rarely_quit (++quit_count);
732 
733       ptrdiff_t temp_byte;
734       int prev_syntax;
735       bool com2start, com2end, comstart;
736 
737       /* Move back and examine a character.  */
738       dec_both (&from, &from_byte);
739       UPDATE_SYNTAX_TABLE_BACKWARD (from);
740 
741       prev_syntax = syntax;
742       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
743       syntax = SYNTAX_WITH_FLAGS (c);
744       code = SYNTAX (c);
745 
746       /* Check for 2-char comment markers.  */
747       com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
748 		   && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
749 		   && (comstyle
750 		       == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
751 		   && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
752 		       || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
753       com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
754 		 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
755       comstart = (com2start || code == Scomment);
756 
757       /* Nasty cases with overlapping 2-char comment markers:
758 	 - snmp-mode: -- c -- foo -- c --
759 	              --- c --
760 		      ------ c --
761 	 - c-mode:    *||*
762 		      |* *|* *|
763 		      |*| |* |*|
764 		      ///   */
765 
766       /* If a 2-char comment sequence partly overlaps with another,
767 	 we don't try to be clever.  E.g. |*| in C, or }% in modes that
768 	 have %..\n and %{..}%.  */
769       if (from > stop && (com2end || comstart))
770 	{
771 	  ptrdiff_t next = from, next_byte = from_byte;
772 	  int next_c, next_syntax;
773 	  dec_both (&next, &next_byte);
774 	  UPDATE_SYNTAX_TABLE_BACKWARD (next);
775 	  next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
776 	  next_syntax = SYNTAX_WITH_FLAGS (next_c);
777 	  if (((comstart || comnested)
778 	       && SYNTAX_FLAGS_COMEND_SECOND (syntax)
779 	       && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
780 	      || ((com2end || comnested)
781 		  && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
782 		  && (comstyle
783 		      == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
784 		  && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
785 	    goto lossage;
786 	  /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
787 	}
788 
789       if (com2start && comstart_pos == 0)
790 	/* We're looking at a comment starter.  But it might be a comment
791 	   ender as well (see snmp-mode).  The first time we see one, we
792 	   need to consider it as a comment starter,
793 	   and the subsequent times as a comment ender.  */
794 	com2end = 0;
795 
796       /* Turn a 2-char comment sequences into the appropriate syntax.  */
797       if (com2end)
798 	code = Sendcomment;
799       else if (com2start)
800 	code = Scomment;
801       /* Ignore comment starters of a different style.  */
802       else if (code == Scomment
803 	       && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
804 		   || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
805 	continue;
806 
807       /* Ignore escaped characters, except comment-enders which cannot
808          be escaped.  */
809       if ((comment_end_can_be_escaped || code != Sendcomment)
810           && char_quoted (from, from_byte))
811 	continue;
812 
813       switch (code)
814 	{
815 	case Sstring_fence:
816 	case Scomment_fence:
817 	  c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
818 	  FALLTHROUGH;
819 	case Sstring:
820 	  /* Track parity of quotes.  */
821 	  if (string_style == -1)
822 	    /* Entering a string.  */
823 	    string_style = c;
824 	  else if (string_style == c)
825 	    /* Leaving the string.  */
826 	    string_style = -1;
827 	  else
828 	    /* If we have two kinds of string delimiters.
829 	       There's no way to grok this scanning backwards.  */
830 	    string_lossage = 1;
831 	  break;
832 
833 	case Scomment:
834 	  /* We've already checked that it is the relevant comstyle.  */
835 	  if (string_style != -1 || comment_lossage || string_lossage)
836 	    /* There are odd string quotes involved, so let's be careful.
837 	       Test case in Pascal: " { " a { " } */
838 	    goto lossage;
839 
840 	  if (!comnested)
841 	    {
842 	      /* Record best comment-starter so far.  */
843 	      comstart_pos = from;
844 	      comstart_byte = from_byte;
845 	    }
846 	  else if (--nesting <= 0)
847 	    /* nested comments have to be balanced, so we don't need to
848 	       keep looking for earlier ones.  We use here the same (slightly
849 	       incorrect) reasoning as below:  since it is followed by uniform
850 	       paired string quotes, this comment-start has to be outside of
851 	       strings, else the comment-end itself would be inside a string. */
852 	    goto done;
853 	  break;
854 
855 	case Sendcomment:
856 	  if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
857 	      && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
858 		  || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
859 	    /* This is the same style of comment ender as ours. */
860 	    {
861 	      if (comnested)
862 		nesting++;
863 	      else
864 		/* Anything before that can't count because it would match
865 		   this comment-ender rather than ours.  */
866 		from = stop;	/* Break out of the loop.  */
867 	    }
868 	  else if (comstart_pos != 0 || c != '\n')
869 	    /* We're mixing comment styles here, so we'd better be careful.
870 	       The (comstart_pos != 0 || c != '\n') check is not quite correct
871 	       (we should just always set comment_lossage), but removing it
872 	       would imply that any multiline comment in C would go through
873 	       lossage, which seems overkill.
874 	       The failure should only happen in the rare cases such as
875 	         { (* } *)   */
876 	    comment_lossage = 1;
877 	  break;
878 
879 	case Sopen:
880 	  /* Assume a defun-start point is outside of strings.  */
881 	  if (open_paren_in_column_0_is_defun_start
882               && NILP (Vcomment_use_syntax_ppss)
883 	      && (from == stop
884 		  || (temp_byte = dec_bytepos (from_byte),
885 		      FETCH_CHAR (temp_byte) == '\n')))
886 	    {
887 	      defun_start = from;
888 	      defun_start_byte = from_byte;
889 	      from = stop;	/* Break out of the loop.  */
890 	    }
891 	  break;
892 
893 	default:
894 	  break;
895 	}
896     }
897 
898   if (comstart_pos == 0)
899     {
900       from = comment_end;
901       from_byte = comment_end_byte;
902       UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
903     }
904   /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
905      or `done'), then we've found the beginning of the non-nested comment.  */
906   else if (1)	/* !comnested */
907     {
908       from = comstart_pos;
909       from_byte = comstart_byte;
910       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
911     }
912   else lossage:
913     {
914       struct lisp_parse_state state;
915       bool adjusted = true;
916       /* We had two kinds of string delimiters mixed up
917 	 together.  Decode this going forwards.
918 	 Scan fwd from a known safe place (beginning-of-defun)
919 	 to the one in question; this records where we
920 	 last passed a comment starter.  */
921       /* If we did not already find the defun start, find it now.  */
922       if (defun_start == 0)
923 	{
924 	  defun_start = find_defun_start (comment_end, comment_end_byte);
925 	  defun_start_byte = find_start_value_byte;
926 	  adjusted = (defun_start > BEGV);
927 	}
928       do
929 	{
930           internalize_parse_state (Qnil, &state);
931 	  scan_sexps_forward (&state,
932 			      defun_start, defun_start_byte,
933 			      comment_end, TYPE_MINIMUM (EMACS_INT),
934 			      0, 0);
935 	  defun_start = comment_end;
936 	  if (!adjusted)
937 	    {
938 	      adjusted = true;
939 	      find_start_value
940 		= CONSP (state.levelstarts) ? XFIXNUM (XCAR (state.levelstarts))
941 		: state.thislevelstart >= 0 ? state.thislevelstart
942 		: find_start_value;
943 	      find_start_value_byte = CHAR_TO_BYTE (find_start_value);
944 	    }
945 
946 	  if (state.incomment == (comnested ? 1 : -1)
947 	      && state.comstyle == comstyle)
948 	    from = state.comstr_start;
949 	  else
950 	    {
951 	      from = comment_end;
952 	      if (state.incomment)
953 		/* If comment_end is inside some other comment, maybe ours
954 		   is nested, so we need to try again from within the
955 		   surrounding comment.  Example: { a (* " *)  */
956 		{
957 		  /* FIXME: We should advance by one or two chars.  */
958 		  defun_start = state.comstr_start + 2;
959 		  defun_start_byte = CHAR_TO_BYTE (defun_start);
960 		}
961 	    }
962 	  rarely_quit (++quit_count);
963 	}
964       while (defun_start < comment_end);
965 
966       from_byte = CHAR_TO_BYTE (from);
967       UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
968     }
969 
970  done:
971   *charpos_ptr = from;
972   *bytepos_ptr = from_byte;
973 
974   return from != comment_end;
975 }
976 
977 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
978        doc: /* Return t if OBJECT is a syntax table.
979 Currently, any char-table counts as a syntax table.  */)
980   (Lisp_Object object)
981 {
982   if (CHAR_TABLE_P (object)
983       && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
984     return Qt;
985   return Qnil;
986 }
987 
988 static void
check_syntax_table(Lisp_Object obj)989 check_syntax_table (Lisp_Object obj)
990 {
991   CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
992 	      Qsyntax_table_p, obj);
993 }
994 
995 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
996        doc: /* Return the current syntax table.
997 This is the one specified by the current buffer.  */)
998   (void)
999 {
1000   return BVAR (current_buffer, syntax_table);
1001 }
1002 
1003 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
1004    Sstandard_syntax_table, 0, 0, 0,
1005        doc: /* Return the standard syntax table.
1006 This is the one used for new buffers.  */)
1007   (void)
1008 {
1009   return Vstandard_syntax_table;
1010 }
1011 
1012 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
1013        doc: /* Construct a new syntax table and return it.
1014 It is a copy of the TABLE, which defaults to the standard syntax table.  */)
1015   (Lisp_Object table)
1016 {
1017   Lisp_Object copy;
1018 
1019   if (!NILP (table))
1020     check_syntax_table (table);
1021   else
1022     table = Vstandard_syntax_table;
1023 
1024   copy = Fcopy_sequence (table);
1025 
1026   /* Only the standard syntax table should have a default element.
1027      Other syntax tables should inherit from parents instead.  */
1028   set_char_table_defalt (copy, Qnil);
1029 
1030   /* Copied syntax tables should all have parents.
1031      If we copied one with no parent, such as the standard syntax table,
1032      use the standard syntax table as the copy's parent.  */
1033   if (NILP (XCHAR_TABLE (copy)->parent))
1034     Fset_char_table_parent (copy, Vstandard_syntax_table);
1035   return copy;
1036 }
1037 
1038 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1039        doc: /* Select a new syntax table for the current buffer.
1040 One argument, a syntax table.  */)
1041   (Lisp_Object table)
1042 {
1043   int idx;
1044   check_syntax_table (table);
1045   bset_syntax_table (current_buffer, table);
1046   /* Indicate that this buffer now has a specified syntax table.  */
1047   idx = PER_BUFFER_VAR_IDX (syntax_table);
1048   SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1049   return table;
1050 }
1051 
1052 /* Convert a letter which signifies a syntax code
1053  into the code it signifies.
1054  This is used by modify-syntax-entry, and other things.  */
1055 
1056 unsigned char const syntax_spec_code[0400] =
1057   { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1058     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1059     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1060     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1061     Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1062     Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1063     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1064     0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1065     Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* @, A ... */
1066     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1067     0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1068     0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1069     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,   /* `, a, ... */
1070     0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1071     0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1072     0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1073   };
1074 
1075 /* Indexed by syntax code, give the letter that describes it.  */
1076 
1077 char const syntax_code_spec[16] =
1078   {
1079     ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1080     '!', '|'
1081   };
1082 
1083 /* Indexed by syntax code, give the object (cons of syntax code and
1084    nil) to be stored in syntax table.  Since these objects can be
1085    shared among syntax tables, we generate them in advance.  By
1086    sharing objects, the function `describe-syntax' can give a more
1087    compact listing.  */
1088 static Lisp_Object Vsyntax_code_object;
1089 
1090 
1091 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1092        doc: /* Return the syntax code of CHARACTER, described by a character.
1093 For example, if CHARACTER is a word constituent, the
1094 character `w' (119) is returned.
1095 The characters that correspond to various syntax codes
1096 are listed in the documentation of `modify-syntax-entry'.
1097 
1098 If you're trying to determine the syntax of characters in the buffer,
1099 this is probably the wrong function to use, because it can't take
1100 `syntax-table' text properties into account.  Consider using
1101 `syntax-after' instead.  */)
1102   (Lisp_Object character)
1103 {
1104   int char_int;
1105   CHECK_CHARACTER (character);
1106   char_int = XFIXNUM (character);
1107   SETUP_BUFFER_SYNTAX_TABLE ();
1108   return make_fixnum (syntax_code_spec[SYNTAX (char_int)]);
1109 }
1110 
1111 DEFUN ("syntax-class-to-char", Fsyntax_class_to_char,
1112        Ssyntax_class_to_char, 1, 1, 0,
1113        doc: /* Return the syntax char of CLASS, described by an integer.
1114 For example, if SYNTAX is word constituent (the integer 2), the
1115 character `w' (119) is returned.  */)
1116   (Lisp_Object syntax)
1117 {
1118   int syn;
1119   CHECK_FIXNUM (syntax);
1120   syn = XFIXNUM (syntax);
1121 
1122   if (syn < 0 || syn >= sizeof syntax_code_spec)
1123     args_out_of_range (make_fixnum (sizeof syntax_code_spec - 1),
1124 		       syntax);
1125   return make_fixnum (syntax_code_spec[syn]);
1126 }
1127 
1128 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1129        doc: /* Return the matching parenthesis of CHARACTER, or nil if none.  */)
1130   (Lisp_Object character)
1131 {
1132   int char_int;
1133   enum syntaxcode code;
1134   CHECK_CHARACTER (character);
1135   char_int = XFIXNUM (character);
1136   SETUP_BUFFER_SYNTAX_TABLE ();
1137   code = SYNTAX (char_int);
1138   if (code == Sopen || code == Sclose)
1139     return SYNTAX_MATCH (char_int);
1140   return Qnil;
1141 }
1142 
1143 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1144        doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1145 STRING should be a string of the form allowed as argument of
1146 `modify-syntax-entry'.  The return value is a raw syntax descriptor: a
1147 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1148 the value of a `syntax-table' text property.  */)
1149   (Lisp_Object string)
1150 {
1151   const unsigned char *p;
1152   int val;
1153   Lisp_Object match;
1154 
1155   CHECK_STRING (string);
1156 
1157   p = SDATA (string);
1158   val = syntax_spec_code[*p++];
1159   if (val == 0377)
1160     error ("Invalid syntax description letter: %c", p[-1]);
1161 
1162   if (val == Sinherit)
1163     return Qnil;
1164 
1165   if (*p)
1166     {
1167       int len, character = string_char_and_length (p, &len);
1168       XSETINT (match, character);
1169       if (XFIXNAT (match) == ' ')
1170 	match = Qnil;
1171       p += len;
1172     }
1173   else
1174     match = Qnil;
1175 
1176   while (*p)
1177     switch (*p++)
1178       {
1179       case '1':
1180 	val |= 1 << 16;
1181 	break;
1182 
1183       case '2':
1184 	val |= 1 << 17;
1185 	break;
1186 
1187       case '3':
1188 	val |= 1 << 18;
1189 	break;
1190 
1191       case '4':
1192 	val |= 1 << 19;
1193 	break;
1194 
1195       case 'p':
1196 	val |= 1 << 20;
1197 	break;
1198 
1199       case 'b':
1200 	val |= 1 << 21;
1201 	break;
1202 
1203       case 'n':
1204 	val |= 1 << 22;
1205 	break;
1206 
1207       case 'c':
1208 	val |= 1 << 23;
1209 	break;
1210       }
1211 
1212   if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1213     return AREF (Vsyntax_code_object, val);
1214   else
1215     /* Since we can't use a shared object, let's make a new one.  */
1216     return Fcons (make_fixnum (val), match);
1217 }
1218 
1219 /* I really don't know why this is interactive
1220    help-form should at least be made useful whilst reading the second arg.  */
1221 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1222   "cSet syntax for character: \nsSet syntax for %s to: ",
1223        doc: /* Set syntax for character CHAR according to string NEWENTRY.
1224 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1225  the current buffer's syntax table.
1226 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1227 in the range MIN to MAX are changed.
1228 The first character of NEWENTRY should be one of the following:
1229   Space or -  whitespace syntax.    w   word constituent.
1230   _           symbol constituent.   .   punctuation.
1231   (           open-parenthesis.     )   close-parenthesis.
1232   "           string quote.         \\   escape.
1233   $           paired delimiter.     \\='   expression quote or prefix operator.
1234   <           comment starter.      >   comment ender.
1235   /           character-quote.      @   inherit from parent table.
1236   |           generic string fence. !   generic comment fence.
1237 
1238 Only single-character comment start and end sequences are represented thus.
1239 Two-character sequences are represented as described below.
1240 The second character of NEWENTRY is the matching parenthesis,
1241  used only if the first character is `(' or `)'.
1242 Any additional characters are flags.
1243 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1244  1 means CHAR is the start of a two-char comment start sequence.
1245  2 means CHAR is the second character of such a sequence.
1246  3 means CHAR is the start of a two-char comment end sequence.
1247  4 means CHAR is the second character of such a sequence.
1248 
1249 There can be several orthogonal comment sequences.  This is to support
1250 language modes such as C++.  By default, all comment sequences are of style
1251 a, but you can set the comment sequence style to b (on the second character
1252 of a comment-start, and the first character of a comment-end sequence) and/or
1253 c (on any of its chars) using this flag:
1254  b means CHAR is part of comment sequence b.
1255  c means CHAR is part of comment sequence c.
1256  n means CHAR is part of a nestable comment sequence.
1257 
1258  p means CHAR is a prefix character for `backward-prefix-chars';
1259    such characters are treated as whitespace when they occur
1260    between expressions.
1261 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE)  */)
1262   (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1263 {
1264   if (CONSP (c))
1265     {
1266       CHECK_CHARACTER_CAR (c);
1267       CHECK_CHARACTER_CDR (c);
1268     }
1269   else
1270     CHECK_CHARACTER (c);
1271 
1272   if (NILP (syntax_table))
1273     syntax_table = BVAR (current_buffer, syntax_table);
1274   else
1275     check_syntax_table (syntax_table);
1276 
1277   newentry = Fstring_to_syntax (newentry);
1278   if (CONSP (c))
1279     SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1280   else
1281     SET_RAW_SYNTAX_ENTRY (syntax_table, XFIXNUM (c), newentry);
1282 
1283   /* We clear the regexp cache, since character classes can now have
1284      different values from those in the compiled regexps.*/
1285   clear_regexp_cache ();
1286 
1287   return Qnil;
1288 }
1289 
1290 /* Dump syntax table to buffer in human-readable format */
1291 
1292 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1293        Sinternal_describe_syntax_value, 1, 1, 0,
1294        doc: /* Insert a description of the internal syntax description SYNTAX at point.  */)
1295   (Lisp_Object syntax)
1296 {
1297   int code, syntax_code;
1298   bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1299   char str[2];
1300   Lisp_Object first, match_lisp, value = syntax;
1301 
1302   if (NILP (value))
1303     {
1304       insert_string ("default");
1305       return syntax;
1306     }
1307 
1308   if (CHAR_TABLE_P (value))
1309     {
1310       insert_string ("deeper char-table ...");
1311       return syntax;
1312     }
1313 
1314   if (!CONSP (value))
1315     {
1316       insert_string ("invalid");
1317       return syntax;
1318     }
1319 
1320   first = XCAR (value);
1321   match_lisp = XCDR (value);
1322 
1323   if (!FIXNUMP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1324     {
1325       insert_string ("invalid");
1326       return syntax;
1327     }
1328 
1329   syntax_code = XFIXNUM (first) & INT_MAX;
1330   code = syntax_code & 0377;
1331   start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1332   start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1333   end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1334   end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1335   prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1336   comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1337   comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1338   comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1339 
1340   if (Smax <= code)
1341     {
1342       insert_string ("invalid");
1343       return syntax;
1344     }
1345 
1346   str[0] = syntax_code_spec[code], str[1] = 0;
1347   insert (str, 1);
1348 
1349   if (NILP (match_lisp))
1350     insert (" ", 1);
1351   else
1352     insert_char (XFIXNUM (match_lisp));
1353 
1354   if (start1)
1355     insert ("1", 1);
1356   if (start2)
1357     insert ("2", 1);
1358 
1359   if (end1)
1360     insert ("3", 1);
1361   if (end2)
1362     insert ("4", 1);
1363 
1364   if (prefix)
1365     insert ("p", 1);
1366   if (comstyleb)
1367     insert ("b", 1);
1368   if (comstylec)
1369     insert ("c", 1);
1370   if (comnested)
1371     insert ("n", 1);
1372 
1373   insert_string ("\twhich means: ");
1374 
1375   switch (code)
1376     {
1377     case Swhitespace:
1378       insert_string ("whitespace"); break;
1379     case Spunct:
1380       insert_string ("punctuation"); break;
1381     case Sword:
1382       insert_string ("word"); break;
1383     case Ssymbol:
1384       insert_string ("symbol"); break;
1385     case Sopen:
1386       insert_string ("open"); break;
1387     case Sclose:
1388       insert_string ("close"); break;
1389     case Squote:
1390       insert_string ("prefix"); break;
1391     case Sstring:
1392       insert_string ("string"); break;
1393     case Smath:
1394       insert_string ("math"); break;
1395     case Sescape:
1396       insert_string ("escape"); break;
1397     case Scharquote:
1398       insert_string ("charquote"); break;
1399     case Scomment:
1400       insert_string ("comment"); break;
1401     case Sendcomment:
1402       insert_string ("endcomment"); break;
1403     case Sinherit:
1404       insert_string ("inherit"); break;
1405     case Scomment_fence:
1406       insert_string ("comment fence"); break;
1407     case Sstring_fence:
1408       insert_string ("string fence"); break;
1409     default:
1410       insert_string ("invalid");
1411       return syntax;
1412     }
1413 
1414   if (!NILP (match_lisp))
1415     {
1416       insert_string (", matches ");
1417       insert_char (XFIXNUM (match_lisp));
1418     }
1419 
1420   if (start1)
1421     insert_string (",\n\t  is the first character of a comment-start sequence");
1422   if (start2)
1423     insert_string (",\n\t  is the second character of a comment-start sequence");
1424 
1425   if (end1)
1426     insert_string (",\n\t  is the first character of a comment-end sequence");
1427   if (end2)
1428     insert_string (",\n\t  is the second character of a comment-end sequence");
1429   if (comstyleb)
1430     insert_string (" (comment style b)");
1431   if (comstylec)
1432     insert_string (" (comment style c)");
1433   if (comnested)
1434     insert_string (" (nestable)");
1435 
1436   if (prefix)
1437     {
1438       AUTO_STRING (prefixdoc,
1439 		   ",\n\t  is a prefix character for `backward-prefix-chars'");
1440       insert1 (call1 (Qsubstitute_command_keys, prefixdoc));
1441     }
1442 
1443   return syntax;
1444 }
1445 
1446 /* Return the position across COUNT words from FROM.
1447    If that many words cannot be found before the end of the buffer, return 0.
1448    COUNT negative means scan backward and stop at word beginning.  */
1449 
1450 ptrdiff_t
scan_words(ptrdiff_t from,EMACS_INT count)1451 scan_words (ptrdiff_t from, EMACS_INT count)
1452 {
1453   ptrdiff_t beg = BEGV;
1454   ptrdiff_t end = ZV;
1455   ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1456   enum syntaxcode code;
1457   int ch0, ch1;
1458   Lisp_Object func, pos;
1459 
1460   SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count, PTRDIFF_MAX));
1461 
1462   while (count > 0)
1463     {
1464       while (true)
1465 	{
1466 	  if (from == end)
1467 	    return 0;
1468 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
1469 	  ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1470 	  code = SYNTAX (ch0);
1471 	  inc_both (&from, &from_byte);
1472 	  if (words_include_escapes
1473 	      && (code == Sescape || code == Scharquote))
1474 	    break;
1475 	  if (code == Sword)
1476 	    break;
1477 	  rarely_quit (from);
1478 	}
1479       /* Now CH0 is a character which begins a word and FROM is the
1480          position of the next character.  */
1481       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1482       if (! NILP (Ffboundp (func)))
1483 	{
1484 	  pos = call2 (func, make_fixnum (from - 1), make_fixnum (end));
1485 	  if (FIXNUMP (pos) && from < XFIXNUM (pos) && XFIXNUM (pos) <= ZV)
1486 	    {
1487 	      from = XFIXNUM (pos);
1488 	      from_byte = CHAR_TO_BYTE (from);
1489 	    }
1490 	}
1491       else
1492 	{
1493 	  while (1)
1494 	    {
1495 	      if (from == end) break;
1496 	      UPDATE_SYNTAX_TABLE_FORWARD (from);
1497 	      ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1498 	      code = SYNTAX (ch1);
1499 	      if ((code != Sword
1500 		   && (! words_include_escapes
1501 		       || (code != Sescape && code != Scharquote)))
1502 		  || word_boundary_p (ch0, ch1))
1503 		break;
1504 	      inc_both (&from, &from_byte);
1505 	      ch0 = ch1;
1506 	      rarely_quit (from);
1507 	    }
1508 	}
1509       count--;
1510     }
1511   while (count < 0)
1512     {
1513       while (true)
1514 	{
1515 	  if (from == beg)
1516 	    return 0;
1517 	  dec_both (&from, &from_byte);
1518 	  UPDATE_SYNTAX_TABLE_BACKWARD (from);
1519 	  ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1520 	  code = SYNTAX (ch1);
1521 	  if (words_include_escapes
1522 	      && (code == Sescape || code == Scharquote))
1523 	    break;
1524 	  if (code == Sword)
1525 	    break;
1526 	  rarely_quit (from);
1527 	}
1528       /* Now CH1 is a character which ends a word and FROM is the
1529          position of it.  */
1530       func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1531       if (! NILP (Ffboundp (func)))
1532  	{
1533 	  pos = call2 (func, make_fixnum (from), make_fixnum (beg));
1534 	  if (FIXNUMP (pos) && BEGV <= XFIXNUM (pos) && XFIXNUM (pos) < from)
1535 	    {
1536 	      from = XFIXNUM (pos);
1537 	      from_byte = CHAR_TO_BYTE (from);
1538 	    }
1539 	}
1540       else
1541 	{
1542 	  while (1)
1543 	    {
1544 	      if (from == beg)
1545 		break;
1546 	      dec_both (&from, &from_byte);
1547 	      UPDATE_SYNTAX_TABLE_BACKWARD (from);
1548 	      ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1549 	      code = SYNTAX (ch0);
1550 	      if ((code != Sword
1551 		   && (! words_include_escapes
1552 		       || (code != Sescape && code != Scharquote)))
1553 		  || word_boundary_p (ch0, ch1))
1554 		{
1555 		  inc_both (&from, &from_byte);
1556 		  break;
1557 		}
1558 	      ch1 = ch0;
1559 	      rarely_quit (from);
1560 	    }
1561 	}
1562       count++;
1563     }
1564 
1565   return from;
1566 }
1567 
1568 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1569        doc: /* Move point forward ARG words (backward if ARG is negative).
1570 If ARG is omitted or nil, move point forward one word.
1571 Normally returns t.
1572 If an edge of the buffer or a field boundary is reached, point is
1573 left there and the function returns nil.  Field boundaries are not
1574 noticed if `inhibit-field-text-motion' is non-nil.
1575 
1576 The word boundaries are normally determined by the buffer's syntax
1577 table and character script (according to `char-script-table'), but
1578 `find-word-boundary-function-table', such as set up by `subword-mode',
1579 can change that.  If a Lisp program needs to move by words determined
1580 strictly by the syntax table, it should use `forward-word-strictly'
1581 instead.  See Info node `(elisp) Word Motion' for details.  */)
1582   (Lisp_Object arg)
1583 {
1584   Lisp_Object tmp;
1585   ptrdiff_t orig_val, val;
1586 
1587   if (NILP (arg))
1588     XSETFASTINT (arg, 1);
1589   else
1590     CHECK_FIXNUM (arg);
1591 
1592   val = orig_val = scan_words (PT, XFIXNUM (arg));
1593   if (! orig_val)
1594     val = XFIXNUM (arg) > 0 ? ZV : BEGV;
1595 
1596   /* Avoid jumping out of an input field.  */
1597   tmp = Fconstrain_to_field (make_fixnum (val), make_fixnum (PT),
1598 			     Qnil, Qnil, Qnil);
1599   val = XFIXNAT (tmp);
1600 
1601   SET_PT (val);
1602   return val == orig_val ? Qt : Qnil;
1603 }
1604 
1605 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1606        doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1607 STRING is like the inside of a `[...]' in a regular expression
1608 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1609  (but not at the end of a range; quoting is never needed there).
1610 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1611 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1612 Char classes, e.g. `[:alpha:]', are supported.
1613 
1614 Returns the distance traveled, either zero or positive.  */)
1615   (Lisp_Object string, Lisp_Object lim)
1616 {
1617   return skip_chars (1, string, lim, 1);
1618 }
1619 
1620 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1621        doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1622 See `skip-chars-forward' for details.
1623 Returns the distance traveled, either zero or negative.  */)
1624   (Lisp_Object string, Lisp_Object lim)
1625 {
1626   return skip_chars (0, string, lim, 1);
1627 }
1628 
1629 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1630        doc: /* Move point forward across chars in specified syntax classes.
1631 SYNTAX is a string of syntax code characters.
1632 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1633 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1634 This function returns the distance traveled, either zero or positive.  */)
1635   (Lisp_Object syntax, Lisp_Object lim)
1636 {
1637   return skip_syntaxes (1, syntax, lim);
1638 }
1639 
1640 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1641        doc: /* Move point backward across chars in specified syntax classes.
1642 SYNTAX is a string of syntax code characters.
1643 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1644 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1645 This function returns either zero or a negative number, and the absolute value
1646 of this is the distance traveled.  */)
1647   (Lisp_Object syntax, Lisp_Object lim)
1648 {
1649   return skip_syntaxes (0, syntax, lim);
1650 }
1651 
1652 static Lisp_Object
skip_chars(bool forwardp,Lisp_Object string,Lisp_Object lim,bool handle_iso_classes)1653 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1654 	    bool handle_iso_classes)
1655 {
1656   int c;
1657   char fastmap[0400];
1658   /* Store the ranges of non-ASCII characters.  */
1659   int *char_ranges UNINIT;
1660   int n_char_ranges = 0;
1661   bool negate = 0;
1662   ptrdiff_t i, i_byte;
1663   /* True if the current buffer is multibyte and the region contains
1664      non-ASCII chars.  */
1665   bool multibyte;
1666   /* True if STRING is multibyte and it contains non-ASCII chars.  */
1667   bool string_multibyte;
1668   ptrdiff_t size_byte;
1669   const unsigned char *str;
1670   int len;
1671   Lisp_Object iso_classes;
1672   USE_SAFE_ALLOCA;
1673 
1674   CHECK_STRING (string);
1675   iso_classes = Qnil;
1676 
1677   if (NILP (lim))
1678     XSETINT (lim, forwardp ? ZV : BEGV);
1679   else
1680     CHECK_FIXNUM_COERCE_MARKER (lim);
1681 
1682   /* In any case, don't allow scan outside bounds of buffer.  */
1683   if (XFIXNUM (lim) > ZV)
1684     XSETFASTINT (lim, ZV);
1685   if (XFIXNUM (lim) < BEGV)
1686     XSETFASTINT (lim, BEGV);
1687 
1688   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1689 	       && (XFIXNUM (lim) - PT != CHAR_TO_BYTE (XFIXNUM (lim)) - PT_BYTE));
1690   string_multibyte = SBYTES (string) > SCHARS (string);
1691 
1692   memset (fastmap, 0, sizeof fastmap);
1693 
1694   str = SDATA (string);
1695   size_byte = SBYTES (string);
1696 
1697   i_byte = 0;
1698   if (i_byte < size_byte
1699       && SREF (string, 0) == '^')
1700     {
1701       negate = 1; i_byte++;
1702     }
1703 
1704   /* Find the characters specified and set their elements of fastmap.
1705      Handle backslashes and ranges specially.
1706 
1707      If STRING contains non-ASCII characters, setup char_ranges for
1708      them and use fastmap only for their leading codes.  */
1709 
1710   if (! string_multibyte)
1711     {
1712       bool string_has_eight_bit = 0;
1713 
1714       /* At first setup fastmap.  */
1715       while (i_byte < size_byte)
1716 	{
1717 	  if (handle_iso_classes)
1718 	    {
1719 	      const unsigned char *ch = str + i_byte;
1720 	      re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1721 	      if (cc == 0)
1722 		error ("Invalid ISO C character class");
1723 	      if (cc != -1)
1724 		{
1725 		  iso_classes = Fcons (make_fixnum (cc), iso_classes);
1726 		  i_byte = ch - str;
1727 		  continue;
1728 		}
1729 	    }
1730 
1731 	  c = str[i_byte++];
1732 
1733 	  if (c == '\\')
1734 	    {
1735 	      if (i_byte == size_byte)
1736 		break;
1737 
1738 	      c = str[i_byte++];
1739 	    }
1740 	  /* Treat `-' as range character only if another character
1741 	     follows.  */
1742 	  if (i_byte + 1 < size_byte
1743 	      && str[i_byte] == '-')
1744 	    {
1745 	      int c2;
1746 
1747 	      /* Skip over the dash.  */
1748 	      i_byte++;
1749 
1750 	      /* Get the end of the range.  */
1751 	      c2 = str[i_byte++];
1752 	      if (c2 == '\\'
1753 		  && i_byte < size_byte)
1754 		c2 = str[i_byte++];
1755 
1756 	      if (c <= c2)
1757 		{
1758 		  int lim2 = c2 + 1;
1759 		  while (c < lim2)
1760 		    fastmap[c++] = 1;
1761 		  if (! ASCII_CHAR_P (c2))
1762 		    string_has_eight_bit = 1;
1763 		}
1764 	    }
1765 	  else
1766 	    {
1767 	      fastmap[c] = 1;
1768 	      if (! ASCII_CHAR_P (c))
1769 		string_has_eight_bit = 1;
1770 	    }
1771 	}
1772 
1773       /* If the current range is multibyte and STRING contains
1774 	 eight-bit chars, arrange fastmap and setup char_ranges for
1775 	 the corresponding multibyte chars.  */
1776       if (multibyte && string_has_eight_bit)
1777 	{
1778 	  char *p1;
1779 	  char himap[0200 + 1];
1780 	  memcpy (himap, fastmap + 0200, 0200);
1781 	  himap[0200] = 0;
1782 	  memset (fastmap + 0200, 0, 0200);
1783 	  SAFE_NALLOCA (char_ranges, 2, 128);
1784 	  i = 0;
1785 
1786 	  while ((p1 = memchr (himap + i, 1, 0200 - i)))
1787 	    {
1788 	      /* Deduce the next range C..C2 from the next clump of 1s
1789 		 in HIMAP starting with &HIMAP[I].  HIMAP is the high
1790 		 order half of the old FASTMAP.  */
1791 	      int c2, leading_code;
1792 	      i = p1 - himap;
1793 	      c = BYTE8_TO_CHAR (i + 0200);
1794 	      i += strlen (p1);
1795 	      c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1796 
1797 	      char_ranges[n_char_ranges++] = c;
1798 	      char_ranges[n_char_ranges++] = c2;
1799 	      leading_code = CHAR_LEADING_CODE (c);
1800 	      memset (fastmap + leading_code, 1,
1801 		      CHAR_LEADING_CODE (c2) - leading_code + 1);
1802 	    }
1803 	}
1804     }
1805   else				/* STRING is multibyte */
1806     {
1807       SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
1808 
1809       while (i_byte < size_byte)
1810 	{
1811 	  int leading_code = str[i_byte];
1812 
1813 	  if (handle_iso_classes)
1814 	    {
1815 	      const unsigned char *ch = str + i_byte;
1816 	      re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1817 	      if (cc == 0)
1818 		error ("Invalid ISO C character class");
1819 	      if (cc != -1)
1820 		{
1821 		  iso_classes = Fcons (make_fixnum (cc), iso_classes);
1822 		  i_byte = ch - str;
1823 		  continue;
1824 		}
1825 	    }
1826 
1827 	  if (leading_code== '\\')
1828 	    {
1829 	      if (++i_byte == size_byte)
1830 		break;
1831 
1832 	      leading_code = str[i_byte];
1833 	    }
1834 	  c = string_char_and_length (str + i_byte, &len);
1835 	  i_byte += len;
1836 
1837 
1838 	  /* Treat `-' as range character only if another character
1839 	     follows.  */
1840 	  if (i_byte + 1 < size_byte
1841 	      && str[i_byte] == '-')
1842 	    {
1843 	      int c2, leading_code2;
1844 
1845 	      /* Skip over the dash.  */
1846 	      i_byte++;
1847 
1848 	      /* Get the end of the range.  */
1849 	      leading_code2 = str[i_byte];
1850 	      c2 = string_char_and_length (str + i_byte, &len);
1851 	      i_byte += len;
1852 
1853 	      if (c2 == '\\'
1854 		  && i_byte < size_byte)
1855 		{
1856 		  leading_code2 = str[i_byte];
1857 		  c2 = string_char_and_length (str + i_byte, &len);
1858 		  i_byte += len;
1859 		}
1860 
1861 	      if (c > c2)
1862 		continue;
1863 	      if (ASCII_CHAR_P (c))
1864 		{
1865 		  while (c <= c2 && c < 0x80)
1866 		    fastmap[c++] = 1;
1867 		  leading_code = CHAR_LEADING_CODE (c);
1868 		}
1869 	      if (! ASCII_CHAR_P (c))
1870 		{
1871 		  int lim2 = leading_code2 + 1;
1872 		  while (leading_code < lim2)
1873 		    fastmap[leading_code++] = 1;
1874 		  if (c <= c2)
1875 		    {
1876 		      char_ranges[n_char_ranges++] = c;
1877 		      char_ranges[n_char_ranges++] = c2;
1878 		    }
1879 		}
1880 	    }
1881 	  else
1882 	    {
1883 	      if (ASCII_CHAR_P (c))
1884 		fastmap[c] = 1;
1885 	      else
1886 		{
1887 		  fastmap[leading_code] = 1;
1888 		  char_ranges[n_char_ranges++] = c;
1889 		  char_ranges[n_char_ranges++] = c;
1890 		}
1891 	    }
1892 	}
1893 
1894       /* If the current range is unibyte and STRING contains non-ASCII
1895 	 chars, arrange fastmap for the corresponding unibyte
1896 	 chars.  */
1897 
1898       if (! multibyte && n_char_ranges > 0)
1899 	{
1900 	  memset (fastmap + 0200, 0, 0200);
1901 	  for (i = 0; i < n_char_ranges; i += 2)
1902 	    {
1903 	      int c1 = char_ranges[i];
1904 	      int lim2 = char_ranges[i + 1] + 1;
1905 
1906 	      for (; c1 < lim2; c1++)
1907 		{
1908 		  int b = CHAR_TO_BYTE_SAFE (c1);
1909 		  if (b >= 0)
1910 		    fastmap[b] = 1;
1911 		}
1912 	    }
1913 	}
1914     }
1915 
1916   /* If ^ was the first character, complement the fastmap.  */
1917   if (negate)
1918     {
1919       if (! multibyte)
1920 	for (i = 0; i < sizeof fastmap; i++)
1921 	  fastmap[i] ^= 1;
1922       else
1923 	{
1924 	  for (i = 0; i < 0200; i++)
1925 	    fastmap[i] ^= 1;
1926 	  /* All non-ASCII chars possibly match.  */
1927 	  for (; i < sizeof fastmap; i++)
1928 	    fastmap[i] = 1;
1929 	}
1930     }
1931 
1932   {
1933     ptrdiff_t start_point = PT;
1934     ptrdiff_t pos = PT;
1935     ptrdiff_t pos_byte = PT_BYTE;
1936     unsigned char *p = PT_ADDR, *endp, *stop;
1937 
1938     if (forwardp)
1939       {
1940 	endp = (XFIXNUM (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XFIXNUM (lim));
1941 	stop = (pos < GPT && GPT < XFIXNUM (lim)) ? GPT_ADDR : endp;
1942       }
1943     else
1944       {
1945 	endp = CHAR_POS_ADDR (XFIXNUM (lim));
1946 	stop = (pos >= GPT && GPT > XFIXNUM (lim)) ? GAP_END_ADDR : endp;
1947       }
1948 
1949     /* This code may look up syntax tables using functions that rely on the
1950        gl_state object.  To make sure this object is not out of date,
1951        let's initialize it manually.
1952        We ignore syntax-table text-properties for now, since that's
1953        what we've done in the past.  */
1954     SETUP_BUFFER_SYNTAX_TABLE ();
1955     if (forwardp)
1956       {
1957 	if (multibyte)
1958 	  while (1)
1959 	    {
1960 	      int nbytes;
1961 
1962 	      if (p >= stop)
1963 		{
1964 		  if (p >= endp)
1965 		    break;
1966 		  p = GAP_END_ADDR;
1967 		  stop = endp;
1968 		}
1969 	      c = string_char_and_length (p, &nbytes);
1970 	      if (! NILP (iso_classes) && in_classes (c, iso_classes))
1971 		{
1972 		  if (negate)
1973 		    break;
1974 		  else
1975 		    goto fwd_ok;
1976 		}
1977 
1978 	      if (! fastmap[*p])
1979 		break;
1980 	      if (! ASCII_CHAR_P (c))
1981 		{
1982 		  /* As we are looking at a multibyte character, we
1983 		     must look up the character in the table
1984 		     CHAR_RANGES.  If there's no data in the table,
1985 		     that character is not what we want to skip.  */
1986 
1987 		  /* The following code do the right thing even if
1988 		     n_char_ranges is zero (i.e. no data in
1989 		     CHAR_RANGES).  */
1990 		  for (i = 0; i < n_char_ranges; i += 2)
1991 		    if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1992 		      break;
1993 		  if (!(negate ^ (i < n_char_ranges)))
1994 		    break;
1995 		}
1996 	    fwd_ok:
1997 	      p += nbytes, pos++, pos_byte += nbytes;
1998 	      rarely_quit (pos);
1999 	    }
2000 	else
2001 	  while (true)
2002 	    {
2003 	      if (p >= stop)
2004 		{
2005 		  if (p >= endp)
2006 		    break;
2007 		  p = GAP_END_ADDR;
2008 		  stop = endp;
2009 		}
2010 
2011 	      if (!NILP (iso_classes) && in_classes (*p, iso_classes))
2012 		{
2013 		  if (negate)
2014 		    break;
2015 		  else
2016 		    goto fwd_unibyte_ok;
2017 		}
2018 
2019 	      if (!fastmap[*p])
2020 		break;
2021 	    fwd_unibyte_ok:
2022 	      p++, pos++, pos_byte++;
2023 	      rarely_quit (pos);
2024 	    }
2025       }
2026     else
2027       {
2028 	if (multibyte)
2029 	  while (true)
2030 	    {
2031 	      if (p <= stop)
2032 		{
2033 		  if (p <= endp)
2034 		    break;
2035 		  p = GPT_ADDR;
2036 		  stop = endp;
2037 		}
2038 	      unsigned char *prev_p = p;
2039 	      do
2040 		p--;
2041 	      while (stop <= p && ! CHAR_HEAD_P (*p));
2042 
2043 	      c = STRING_CHAR (p);
2044 
2045 	      if (! NILP (iso_classes) && in_classes (c, iso_classes))
2046 		{
2047 		  if (negate)
2048 		    break;
2049 		  else
2050 		    goto back_ok;
2051 		}
2052 
2053 	      if (! fastmap[*p])
2054 		break;
2055 	      if (! ASCII_CHAR_P (c))
2056 		{
2057 		  /* See the comment in the previous similar code.  */
2058 		  for (i = 0; i < n_char_ranges; i += 2)
2059 		    if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2060 		      break;
2061 		  if (!(negate ^ (i < n_char_ranges)))
2062 		    break;
2063 		}
2064 	    back_ok:
2065 	      pos--, pos_byte -= prev_p - p;
2066 	      rarely_quit (pos);
2067 	    }
2068 	else
2069 	  while (true)
2070 	    {
2071 	      if (p <= stop)
2072 		{
2073 		  if (p <= endp)
2074 		    break;
2075 		  p = GPT_ADDR;
2076 		  stop = endp;
2077 		}
2078 
2079 	      if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2080 		{
2081 		  if (negate)
2082 		    break;
2083 		  else
2084 		    goto back_unibyte_ok;
2085 		}
2086 
2087 	      if (!fastmap[p[-1]])
2088 		break;
2089 	    back_unibyte_ok:
2090 	      p--, pos--, pos_byte--;
2091 	      rarely_quit (pos);
2092 	    }
2093       }
2094 
2095     SET_PT_BOTH (pos, pos_byte);
2096 
2097     SAFE_FREE ();
2098     return make_fixnum (PT - start_point);
2099   }
2100 }
2101 
2102 
2103 static Lisp_Object
skip_syntaxes(bool forwardp,Lisp_Object string,Lisp_Object lim)2104 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2105 {
2106   int c;
2107   unsigned char fastmap[0400];
2108   bool negate = 0;
2109   ptrdiff_t i, i_byte;
2110   bool multibyte;
2111   ptrdiff_t size_byte;
2112   unsigned char *str;
2113 
2114   CHECK_STRING (string);
2115 
2116   if (NILP (lim))
2117     XSETINT (lim, forwardp ? ZV : BEGV);
2118   else
2119     CHECK_FIXNUM_COERCE_MARKER (lim);
2120 
2121   /* In any case, don't allow scan outside bounds of buffer.  */
2122   if (XFIXNUM (lim) > ZV)
2123     XSETFASTINT (lim, ZV);
2124   if (XFIXNUM (lim) < BEGV)
2125     XSETFASTINT (lim, BEGV);
2126 
2127   if (forwardp ? (PT >= XFIXNAT (lim)) : (PT <= XFIXNAT (lim)))
2128     return make_fixnum (0);
2129 
2130   multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2131 	       && (XFIXNUM (lim) - PT != CHAR_TO_BYTE (XFIXNUM (lim)) - PT_BYTE));
2132 
2133   memset (fastmap, 0, sizeof fastmap);
2134 
2135   if (SBYTES (string) > SCHARS (string))
2136     /* As this is very rare case (syntax spec is ASCII only), don't
2137        consider efficiency.  */
2138     string = string_make_unibyte (string);
2139 
2140   str = SDATA (string);
2141   size_byte = SBYTES (string);
2142 
2143   i_byte = 0;
2144   if (i_byte < size_byte
2145       && SREF (string, 0) == '^')
2146     {
2147       negate = 1; i_byte++;
2148     }
2149 
2150   /* Find the syntaxes specified and set their elements of fastmap.  */
2151 
2152   while (i_byte < size_byte)
2153     {
2154       c = str[i_byte++];
2155       fastmap[syntax_spec_code[c]] = 1;
2156     }
2157 
2158   /* If ^ was the first character, complement the fastmap.  */
2159   if (negate)
2160     for (i = 0; i < sizeof fastmap; i++)
2161       fastmap[i] ^= 1;
2162 
2163   {
2164     ptrdiff_t start_point = PT;
2165     ptrdiff_t pos = PT;
2166     ptrdiff_t pos_byte = PT_BYTE;
2167     unsigned char *p, *endp, *stop;
2168 
2169     SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2170 
2171     if (forwardp)
2172       {
2173 	while (true)
2174 	  {
2175 	    p = BYTE_POS_ADDR (pos_byte);
2176 	    endp = XFIXNUM (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XFIXNUM (lim));
2177 	    stop = pos < GPT && GPT < XFIXNUM (lim) ? GPT_ADDR : endp;
2178 
2179 	    do
2180 	      {
2181 		int nbytes;
2182 
2183 		if (p >= stop)
2184 		  {
2185 		    if (p >= endp)
2186 		      goto done;
2187 		    p = GAP_END_ADDR;
2188 		    stop = endp;
2189 		  }
2190 		if (multibyte)
2191 		  c = string_char_and_length (p, &nbytes);
2192 		else
2193 		  c = *p, nbytes = 1;
2194 		if (! fastmap[SYNTAX (c)])
2195 		  goto done;
2196 		p += nbytes, pos++, pos_byte += nbytes;
2197 		rarely_quit (pos);
2198 	      }
2199 	    while (!parse_sexp_lookup_properties
2200 		   || pos < gl_state.e_property);
2201 
2202 	    update_syntax_table_forward (pos + gl_state.offset,
2203 					 false, gl_state.object);
2204 	  }
2205       }
2206     else
2207       {
2208 	p = BYTE_POS_ADDR (pos_byte);
2209 	endp = CHAR_POS_ADDR (XFIXNUM (lim));
2210 	stop = pos >= GPT && GPT > XFIXNUM (lim) ? GAP_END_ADDR : endp;
2211 
2212 	if (multibyte)
2213 	  {
2214 	    while (true)
2215 	      {
2216 		if (p <= stop)
2217 		  {
2218 		    if (p <= endp)
2219 		      break;
2220 		    p = GPT_ADDR;
2221 		    stop = endp;
2222 		  }
2223 		UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2224 
2225 		unsigned char *prev_p = p;
2226 		do
2227 		  p--;
2228 		while (stop <= p && ! CHAR_HEAD_P (*p));
2229 
2230 		c = STRING_CHAR (p);
2231 		if (! fastmap[SYNTAX (c)])
2232 		  break;
2233 		pos--, pos_byte -= prev_p - p;
2234 		rarely_quit (pos);
2235 	      }
2236 	  }
2237 	else
2238 	  {
2239 	    while (true)
2240 	      {
2241 		if (p <= stop)
2242 		  {
2243 		    if (p <= endp)
2244 		      break;
2245 		    p = GPT_ADDR;
2246 		    stop = endp;
2247 		  }
2248 		UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2249 		if (! fastmap[SYNTAX (p[-1])])
2250 		  break;
2251 		p--, pos--, pos_byte--;
2252 		rarely_quit (pos);
2253 	      }
2254 	  }
2255       }
2256 
2257   done:
2258     SET_PT_BOTH (pos, pos_byte);
2259 
2260     return make_fixnum (PT - start_point);
2261   }
2262 }
2263 
2264 /* Return true if character C belongs to one of the ISO classes
2265    in the list ISO_CLASSES.  Each class is represented by an
2266    integer which is its type according to re_wctype.  */
2267 
2268 static bool
in_classes(int c,Lisp_Object iso_classes)2269 in_classes (int c, Lisp_Object iso_classes)
2270 {
2271   bool fits_class = 0;
2272 
2273   while (CONSP (iso_classes))
2274     {
2275       Lisp_Object elt;
2276       elt = XCAR (iso_classes);
2277       iso_classes = XCDR (iso_classes);
2278 
2279       if (re_iswctype (c, XFIXNAT (elt)))
2280 	fits_class = 1;
2281     }
2282 
2283   return fits_class;
2284 }
2285 
2286 /* Jump over a comment, assuming we are at the beginning of one.
2287    FROM is the current position.
2288    FROM_BYTE is the bytepos corresponding to FROM.
2289    Do not move past STOP (a charpos).
2290    The comment over which we have to jump is of style STYLE
2291      (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2292    NESTING should be positive to indicate the nesting at the beginning
2293      for nested comments and should be zero or negative else.
2294      ST_COMMENT_STYLE cannot be nested.
2295    PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2296      (or 0 if the search cannot start in the middle of a two-character).
2297 
2298    If successful, return true and store the charpos of the comment's
2299    end into *CHARPOS_PTR and the corresponding bytepos into
2300    *BYTEPOS_PTR.  Else, return false and store the charpos STOP into
2301    *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2302    current nesting (as defined for state->incomment) in
2303    *INCOMMENT_PTR.  Should the last character scanned in an incomplete
2304    comment be a possible first character of a two character construct,
2305    we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr.  Otherwise,
2306    we store Smax into *last_syntax_ptr.
2307 
2308    The comment end is the last character of the comment rather than the
2309    character just after the comment.
2310 
2311    Global syntax data is assumed to initially be valid for FROM and
2312    remains valid for forward search starting at the returned position. */
2313 
2314 static bool
forw_comment(ptrdiff_t from,ptrdiff_t from_byte,ptrdiff_t stop,EMACS_INT nesting,int style,int prev_syntax,ptrdiff_t * charpos_ptr,ptrdiff_t * bytepos_ptr,EMACS_INT * incomment_ptr,int * last_syntax_ptr)2315 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2316 	      EMACS_INT nesting, int style, int prev_syntax,
2317 	      ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2318 	      EMACS_INT *incomment_ptr, int *last_syntax_ptr)
2319 {
2320   unsigned short int quit_count = 0;
2321   int c, c1;
2322   enum syntaxcode code;
2323   int syntax, other_syntax;
2324 
2325   if (nesting <= 0) nesting = -1;
2326 
2327   /* Enter the loop in the middle so that we find
2328      a 2-char comment ender if we start in the middle of it.  */
2329   syntax = prev_syntax;
2330   code = syntax & 0xff;
2331   if (syntax != 0 && from < stop) goto forw_incomment;
2332 
2333   while (1)
2334     {
2335       if (from == stop)
2336 	{
2337 	  *incomment_ptr = nesting;
2338 	  *charpos_ptr = from;
2339 	  *bytepos_ptr = from_byte;
2340           *last_syntax_ptr =
2341             (code == Sescape || code == Scharquote
2342              || SYNTAX_FLAGS_COMEND_FIRST (syntax)
2343              || (nesting > 0
2344                  && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
2345             ? syntax : Smax ;
2346 	  return 0;
2347 	}
2348       c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2349       syntax = SYNTAX_WITH_FLAGS (c);
2350       code = syntax & 0xff;
2351       if (code == Sendcomment
2352 	  && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2353 	  && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2354 	      (nesting > 0 && --nesting == 0) : nesting < 0)
2355           && !(comment_end_can_be_escaped && char_quoted (from, from_byte)))
2356 	/* We have encountered a comment end of the same style
2357 	   as the comment sequence which began this comment
2358 	   section.  */
2359 	break;
2360       if (code == Scomment_fence
2361 	  && style == ST_COMMENT_STYLE)
2362 	/* We have encountered a comment end of the same style
2363 	   as the comment sequence which began this comment
2364 	   section.  */
2365 	break;
2366       if (nesting > 0
2367 	  && code == Scomment
2368 	  && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2369 	  && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2370 	/* We have encountered a nested comment of the same style
2371 	   as the comment sequence which began this comment section.  */
2372 	nesting++;
2373       if (comment_end_can_be_escaped
2374           && (code == Sescape || code == Scharquote))
2375         {
2376           inc_both (&from, &from_byte);
2377           UPDATE_SYNTAX_TABLE_FORWARD (from);
2378           if (from == stop) continue; /* Failure */
2379         }
2380       inc_both (&from, &from_byte);
2381       UPDATE_SYNTAX_TABLE_FORWARD (from);
2382 
2383     forw_incomment:
2384       if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2385 	  && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2386 	      other_syntax = SYNTAX_WITH_FLAGS (c1),
2387 	      SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2388 	  && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2389 	  && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2390 	       SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2391 	      ? nesting > 0 : nesting < 0))
2392 	{
2393 	  syntax = Smax;        /* So that "|#" (lisp) can not return
2394                                    the syntax of "#" in *last_syntax_ptr. */
2395           if (--nesting <= 0)
2396 	    /* We have encountered a comment end of the same style
2397 	       as the comment sequence which began this comment section.  */
2398 	    break;
2399 	  else
2400 	    {
2401 	      inc_both (&from, &from_byte);
2402 	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2403 	    }
2404 	}
2405       if (nesting > 0
2406 	  && from < stop
2407 	  && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2408 	  && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2409 	      other_syntax = SYNTAX_WITH_FLAGS (c1),
2410 	      SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2411 	      && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2412 	  && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2413 	      SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2414 	/* We have encountered a nested comment of the same style
2415 	   as the comment sequence which began this comment section.  */
2416 	{
2417           syntax = Smax; /* So that "#|#" isn't also a comment ender. */
2418 	  inc_both (&from, &from_byte);
2419 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
2420 	  nesting++;
2421 	}
2422 
2423       rarely_quit (++quit_count);
2424     }
2425   *charpos_ptr = from;
2426   *bytepos_ptr = from_byte;
2427   *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
2428                               used up. */
2429   return 1;
2430 }
2431 
2432 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2433        doc: /*
2434 Move forward across up to COUNT comments.  If COUNT is negative, move backward.
2435 Stop scanning if we find something other than a comment or whitespace.
2436 Set point to where scanning stops.
2437 If COUNT comments are found as expected, with nothing except whitespace
2438 between them, return t; otherwise return nil.  */)
2439   (Lisp_Object count)
2440 {
2441   ptrdiff_t from, from_byte, stop;
2442   int c, c1;
2443   enum syntaxcode code;
2444   int comstyle = 0;	    /* style of comment encountered */
2445   bool comnested = 0;	    /* whether the comment is nestable or not */
2446   bool found;
2447   EMACS_INT count1;
2448   ptrdiff_t out_charpos, out_bytepos;
2449   EMACS_INT dummy;
2450   int dummy2;
2451   unsigned short int quit_count = 0;
2452 
2453   CHECK_FIXNUM (count);
2454   count1 = XFIXNUM (count);
2455   stop = count1 > 0 ? ZV : BEGV;
2456 
2457   from = PT;
2458   from_byte = PT_BYTE;
2459 
2460   SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count1, PTRDIFF_MAX));
2461   while (count1 > 0)
2462     {
2463       do
2464 	{
2465 	  bool comstart_first;
2466 	  int syntax, other_syntax;
2467 
2468 	  if (from == stop)
2469 	    {
2470 	      SET_PT_BOTH (from, from_byte);
2471 	      return Qnil;
2472 	    }
2473 	  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2474 	  syntax = SYNTAX_WITH_FLAGS (c);
2475 	  code = SYNTAX (c);
2476 	  comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2477 	  comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2478 	  comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2479 	  inc_both (&from, &from_byte);
2480 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
2481 	  if (from < stop && comstart_first
2482 	      && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2483 		  other_syntax = SYNTAX_WITH_FLAGS (c1),
2484 		  SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2485 	    {
2486 	      /* We have encountered a comment start sequence and we
2487 		 are ignoring all text inside comments.  We must record
2488 		 the comment style this sequence begins so that later,
2489 		 only a comment end of the same style actually ends
2490 		 the comment section.  */
2491 	      code = Scomment;
2492 	      comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2493 	      comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2494 	      inc_both (&from, &from_byte);
2495 	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2496 	    }
2497 	  rarely_quit (++quit_count);
2498 	}
2499       while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2500 
2501       if (code == Scomment_fence)
2502 	comstyle = ST_COMMENT_STYLE;
2503       else if (code != Scomment)
2504 	{
2505 	  dec_both (&from, &from_byte);
2506 	  SET_PT_BOTH (from, from_byte);
2507 	  return Qnil;
2508 	}
2509       /* We're at the start of a comment.  */
2510       found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2511 			    &out_charpos, &out_bytepos, &dummy, &dummy2);
2512       from = out_charpos; from_byte = out_bytepos;
2513       if (!found)
2514 	{
2515 	  SET_PT_BOTH (from, from_byte);
2516 	  return Qnil;
2517 	}
2518       inc_both (&from, &from_byte);
2519       UPDATE_SYNTAX_TABLE_FORWARD (from);
2520       /* We have skipped one comment.  */
2521       count1--;
2522     }
2523 
2524   while (count1 < 0)
2525     {
2526       while (true)
2527 	{
2528 	  if (from <= stop)
2529 	    {
2530 	      SET_PT_BOTH (BEGV, BEGV_BYTE);
2531 	      return Qnil;
2532 	    }
2533 
2534 	  dec_both (&from, &from_byte);
2535 	  /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from).  */
2536 	  bool quoted = char_quoted (from, from_byte);
2537 	  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2538 	  int syntax = SYNTAX_WITH_FLAGS (c);
2539 	  code = SYNTAX (c);
2540 	  comstyle = 0;
2541 	  comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2542 	  if (code == Sendcomment)
2543 	    comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2544 	  if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2545 	      && prev_char_comend_first (from, from_byte)
2546 	      && !char_quoted (from - 1, dec_bytepos (from_byte)))
2547 	    {
2548 	      int other_syntax;
2549 	      /* We must record the comment style encountered so that
2550 		 later, we can match only the proper comment begin
2551 		 sequence of the same style.  */
2552 	      dec_both (&from, &from_byte);
2553 	      code = Sendcomment;
2554 	      /* Calling char_quoted, above, set up global syntax position
2555 		 at the new value of FROM.  */
2556 	      c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2557 	      other_syntax = SYNTAX_WITH_FLAGS (c1);
2558 	      comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2559 	      comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2560 	    }
2561 
2562 	  if (code == Scomment_fence)
2563 	    {
2564 	      /* Skip until first preceding unquoted comment_fence.  */
2565 	      bool fence_found = 0;
2566 	      ptrdiff_t ini = from, ini_byte = from_byte;
2567 
2568 	      if (from > stop)
2569 		{
2570 		  while (1)
2571 		    {
2572 		      dec_both (&from, &from_byte);
2573 		      UPDATE_SYNTAX_TABLE_BACKWARD (from);
2574 		      c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2575 		      if (SYNTAX (c) == Scomment_fence
2576 			  && !char_quoted (from, from_byte))
2577 			{
2578 			  fence_found = 1;
2579 			  break;
2580 			}
2581 		      else if (from == stop)
2582 			break;
2583 		      rarely_quit (++quit_count);
2584 		    }
2585 		}
2586 	      if (fence_found == 0)
2587 		{
2588 		  from = ini;		/* Set point to ini + 1.  */
2589 		  from_byte = ini_byte;
2590 		  goto leave;
2591 		}
2592  	      else
2593 		/* We have skipped one comment.  */
2594 		break;
2595 	    }
2596 	  else if (code == Sendcomment)
2597 	    {
2598               found = (!quoted || !comment_end_can_be_escaped)
2599                 && back_comment (from, from_byte, stop, comnested, comstyle,
2600                                  &out_charpos, &out_bytepos);
2601 	      if (!found)
2602 		{
2603 		  if (c == '\n')
2604 		    /* This end-of-line is not an end-of-comment.
2605 		       Treat it like a whitespace.
2606 		       CC-mode (and maybe others) relies on this behavior.  */
2607 		    ;
2608 		  else
2609 		    {
2610 		      /* Failure: we should go back to the end of this
2611 			 not-quite-endcomment.  */
2612 		      if (SYNTAX (c) != code)
2613 			/* It was a two-char Sendcomment.  */
2614 			inc_both (&from, &from_byte);
2615 		      goto leave;
2616 		    }
2617 		}
2618 	      else
2619 		{
2620 		  /* We have skipped one comment.  */
2621 		  from = out_charpos, from_byte = out_bytepos;
2622 		  break;
2623 		}
2624 	    }
2625 	  else if (code != Swhitespace || quoted)
2626 	    {
2627 	    leave:
2628 	      inc_both (&from, &from_byte);
2629 	      SET_PT_BOTH (from, from_byte);
2630 	      return Qnil;
2631 	    }
2632 
2633 	  rarely_quit (++quit_count);
2634 	}
2635 
2636       count1++;
2637     }
2638 
2639   SET_PT_BOTH (from, from_byte);
2640   return Qt;
2641 }
2642 
2643 /* Return syntax code of character C if C is an ASCII character
2644    or if MULTIBYTE_SYMBOL_P is false.  Otherwise, return Ssymbol.  */
2645 
2646 static enum syntaxcode
syntax_multibyte(int c,bool multibyte_symbol_p)2647 syntax_multibyte (int c, bool multibyte_symbol_p)
2648 {
2649   return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2650 }
2651 
2652 static Lisp_Object
scan_lists(EMACS_INT from0,EMACS_INT count,EMACS_INT depth,bool sexpflag)2653 scan_lists (EMACS_INT from0, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2654 {
2655   Lisp_Object val;
2656   ptrdiff_t stop = count > 0 ? ZV : BEGV;
2657   int c, c1;
2658   int stringterm;
2659   bool quoted;
2660   bool mathexit = 0;
2661   enum syntaxcode code;
2662   EMACS_INT min_depth = depth;  /* Err out if depth gets less than this.  */
2663   int comstyle = 0;		/* Style of comment encountered.  */
2664   bool comnested = 0;		/* Whether the comment is nestable or not.  */
2665   ptrdiff_t temp_pos;
2666   EMACS_INT last_good = from0;
2667   bool found;
2668   ptrdiff_t from_byte;
2669   ptrdiff_t out_bytepos, out_charpos;
2670   EMACS_INT dummy;
2671   int dummy2;
2672   bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2673   unsigned short int quit_count = 0;
2674 
2675   if (depth > 0) min_depth = 0;
2676 
2677   ptrdiff_t from = clip_to_bounds (BEGV, from0, ZV);
2678 
2679   from_byte = CHAR_TO_BYTE (from);
2680 
2681   maybe_quit ();
2682 
2683   SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count, PTRDIFF_MAX));
2684   while (count > 0)
2685     {
2686       while (from < stop)
2687 	{
2688 	  rarely_quit (++quit_count);
2689 	  bool comstart_first, prefix;
2690 	  int syntax, other_syntax;
2691 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
2692 	  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2693 	  syntax = SYNTAX_WITH_FLAGS (c);
2694 	  code = syntax_multibyte (c, multibyte_symbol_p);
2695 	  comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2696 	  comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2697 	  comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2698 	  prefix = SYNTAX_FLAGS_PREFIX (syntax);
2699 	  if (depth == min_depth)
2700 	    last_good = from;
2701 	  inc_both (&from, &from_byte);
2702 	  UPDATE_SYNTAX_TABLE_FORWARD (from);
2703 	  if (from < stop && comstart_first
2704 	      && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2705 		  other_syntax = SYNTAX_WITH_FLAGS (c),
2706 		  SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2707 	      && parse_sexp_ignore_comments)
2708 	    {
2709 	      /* We have encountered a comment start sequence and we
2710 		 are ignoring all text inside comments.  We must record
2711 		 the comment style this sequence begins so that later,
2712 		 only a comment end of the same style actually ends
2713 		 the comment section.  */
2714 	      code = Scomment;
2715 	      comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2716 	      comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2717 	      inc_both (&from, &from_byte);
2718 	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2719 	    }
2720 
2721 	  if (prefix)
2722 	    continue;
2723 
2724 	  switch (code)
2725 	    {
2726 	    case Sescape:
2727 	    case Scharquote:
2728 	      if (from == stop)
2729 		goto lose;
2730 	      inc_both (&from, &from_byte);
2731 	      /* Treat following character as a word constituent.  */
2732 	      FALLTHROUGH;
2733 	    case Sword:
2734 	    case Ssymbol:
2735 	      if (depth || !sexpflag) break;
2736 	      /* This word counts as a sexp; return at end of it.  */
2737 	      while (from < stop)
2738 		{
2739 		  UPDATE_SYNTAX_TABLE_FORWARD (from);
2740 
2741 		  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2742 		  switch (syntax_multibyte (c, multibyte_symbol_p))
2743 		    {
2744 		    case Scharquote:
2745 		    case Sescape:
2746 		      inc_both (&from, &from_byte);
2747 		      if (from == stop)
2748 			goto lose;
2749 		      break;
2750 		    case Sword:
2751 		    case Ssymbol:
2752 		    case Squote:
2753 		      break;
2754 		    default:
2755 		      goto done;
2756 		    }
2757 		  inc_both (&from, &from_byte);
2758 		  rarely_quit (++quit_count);
2759 		}
2760 	      goto done;
2761 
2762 	    case Scomment_fence:
2763 	      comstyle = ST_COMMENT_STYLE;
2764 	      FALLTHROUGH;
2765 	    case Scomment:
2766 	      if (!parse_sexp_ignore_comments) break;
2767 	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2768 	      found = forw_comment (from, from_byte, stop,
2769 				    comnested, comstyle, 0,
2770 				    &out_charpos, &out_bytepos, &dummy,
2771                                     &dummy2);
2772 	      from = out_charpos, from_byte = out_bytepos;
2773 	      if (!found)
2774 		{
2775 		  if (depth == 0)
2776 		    goto done;
2777 		  goto lose;
2778 		}
2779 	      inc_both (&from, &from_byte);
2780 	      UPDATE_SYNTAX_TABLE_FORWARD (from);
2781 	      break;
2782 
2783 	    case Smath:
2784 	      if (!sexpflag)
2785 		break;
2786 	      if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2787 		{
2788 		  inc_both (&from, &from_byte);
2789 		}
2790 	      if (mathexit)
2791 		{
2792 		  mathexit = 0;
2793 		  goto close1;
2794 		}
2795 	      mathexit = 1;
2796 	      FALLTHROUGH;
2797 	    case Sopen:
2798 	      if (!++depth) goto done;
2799 	      break;
2800 
2801 	    case Sclose:
2802 	    close1:
2803 	      if (!--depth) goto done;
2804 	      if (depth < min_depth)
2805 		xsignal3 (Qscan_error,
2806 			  build_string ("Containing expression ends prematurely"),
2807 			  make_fixnum (last_good), make_fixnum (from));
2808 	      break;
2809 
2810 	    case Sstring:
2811 	    case Sstring_fence:
2812 	      temp_pos = dec_bytepos (from_byte);
2813 	      stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2814 	      while (1)
2815 		{
2816 		  enum syntaxcode c_code;
2817 		  if (from >= stop)
2818 		    goto lose;
2819 		  UPDATE_SYNTAX_TABLE_FORWARD (from);
2820 		  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2821 		  c_code = syntax_multibyte (c, multibyte_symbol_p);
2822 		  if (code == Sstring
2823 		      ? c == stringterm && c_code == Sstring
2824 		      : c_code == Sstring_fence)
2825 		    break;
2826 
2827 		  if (c_code == Scharquote || c_code == Sescape)
2828 		    inc_both (&from, &from_byte);
2829 		  inc_both (&from, &from_byte);
2830 		  rarely_quit (++quit_count);
2831 		}
2832 	      inc_both (&from, &from_byte);
2833 	      if (!depth && sexpflag) goto done;
2834 	      break;
2835 	    default:
2836 	      /* Ignore whitespace, punctuation, quote, endcomment.  */
2837 	      break;
2838 	    }
2839 	}
2840 
2841       /* Reached end of buffer.  Error if within object, return nil if between */
2842       if (depth)
2843 	goto lose;
2844 
2845       return Qnil;
2846 
2847       /* End of object reached */
2848     done:
2849       count--;
2850     }
2851 
2852   while (count < 0)
2853     {
2854       while (from > stop)
2855 	{
2856 	  rarely_quit (++quit_count);
2857 	  dec_both (&from, &from_byte);
2858 	  UPDATE_SYNTAX_TABLE_BACKWARD (from);
2859 	  c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2860 	  int syntax = SYNTAX_WITH_FLAGS (c);
2861 	  code = syntax_multibyte (c, multibyte_symbol_p);
2862 	  if (depth == min_depth)
2863 	    last_good = from;
2864 	  comstyle = 0;
2865 	  comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2866 	  if (code == Sendcomment)
2867 	    comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2868 	  if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2869 	      && prev_char_comend_first (from, from_byte)
2870 	      && parse_sexp_ignore_comments)
2871 	    {
2872 	      /* We must record the comment style encountered so that
2873 		 later, we can match only the proper comment begin
2874 		 sequence of the same style.  */
2875 	      int c2, other_syntax;
2876 	      dec_both (&from, &from_byte);
2877 	      UPDATE_SYNTAX_TABLE_BACKWARD (from);
2878 	      code = Sendcomment;
2879 	      c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2880 	      other_syntax = SYNTAX_WITH_FLAGS (c2);
2881 	      comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2882 	      comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2883 	    }
2884 
2885 	  /* Quoting turns anything except a comment-ender
2886 	     into a word character.  Note that this cannot be true
2887 	     if we decremented FROM in the if-statement above.  */
2888 	  if (code != Sendcomment && char_quoted (from, from_byte))
2889 	    {
2890 	      dec_both (&from, &from_byte);
2891 	      code = Sword;
2892 	    }
2893 	  else if (SYNTAX_FLAGS_PREFIX (syntax))
2894 	    continue;
2895 
2896 	  switch (code)
2897 	    {
2898 	    case Sword:
2899 	    case Ssymbol:
2900 	    case Sescape:
2901 	    case Scharquote:
2902 	      if (depth || !sexpflag) break;
2903 	      /* This word counts as a sexp; count object finished
2904 		 after passing it.  */
2905 	      while (from > stop)
2906 		{
2907 		  temp_pos = dec_bytepos (from_byte);
2908 		  UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2909 		  c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2910 		  /* Don't allow comment-end to be quoted.  */
2911 		  if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2912 		    goto done2;
2913 		  quoted = char_quoted (from - 1, temp_pos);
2914 		  if (quoted)
2915 		    {
2916 		      dec_both (&from, &from_byte);
2917 		      temp_pos = dec_bytepos (temp_pos);
2918 		      UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2919 		    }
2920 		  c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2921 		  if (! quoted)
2922 		    switch (syntax_multibyte (c1, multibyte_symbol_p))
2923 		      {
2924 		      case Sword: case Ssymbol: case Squote: break;
2925 		      default: goto done2;
2926 		      }
2927 		  dec_both (&from, &from_byte);
2928 		  rarely_quit (++quit_count);
2929 		}
2930 	      goto done2;
2931 
2932 	    case Smath:
2933 	      if (!sexpflag)
2934 		break;
2935 	      if (from > BEGV)
2936 		{
2937 		  temp_pos = dec_bytepos (from_byte);
2938 		  UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2939 		  if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2940 		    dec_both (&from, &from_byte);
2941 		}
2942 	      if (mathexit)
2943 		{
2944 		  mathexit = 0;
2945 		  goto open2;
2946 		}
2947 	      mathexit = 1;
2948 	      FALLTHROUGH;
2949 	    case Sclose:
2950 	      if (!++depth) goto done2;
2951 	      break;
2952 
2953 	    case Sopen:
2954 	    open2:
2955 	      if (!--depth) goto done2;
2956 	      if (depth < min_depth)
2957 		xsignal3 (Qscan_error,
2958 			  build_string ("Containing expression ends prematurely"),
2959 			  make_fixnum (last_good), make_fixnum (from));
2960 	      break;
2961 
2962 	    case Sendcomment:
2963 	      if (!parse_sexp_ignore_comments)
2964 		break;
2965 	      found = back_comment (from, from_byte, stop, comnested, comstyle,
2966 				    &out_charpos, &out_bytepos);
2967 	      /* FIXME:  if !found, it really wasn't a comment-end.
2968 		 For single-char Sendcomment, we can't do much about it apart
2969 		 from skipping the char.
2970 		 For 2-char endcomments, we could try again, taking both
2971 		 chars as separate entities, but it's a lot of trouble
2972 		 for very little gain, so we don't bother either.  -sm */
2973 	      if (found)
2974 		from = out_charpos, from_byte = out_bytepos;
2975 	      break;
2976 
2977 	    case Scomment_fence:
2978 	    case Sstring_fence:
2979 	      while (1)
2980 		{
2981 		  if (from == stop)
2982 		    goto lose;
2983 		  dec_both (&from, &from_byte);
2984 		  UPDATE_SYNTAX_TABLE_BACKWARD (from);
2985 		  if (!char_quoted (from, from_byte))
2986 		    {
2987 		      c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2988 		      if (syntax_multibyte (c, multibyte_symbol_p) == code)
2989 			break;
2990 		    }
2991 		  rarely_quit (++quit_count);
2992 		}
2993 	      if (code == Sstring_fence && !depth && sexpflag) goto done2;
2994 	      break;
2995 
2996 	    case Sstring:
2997 	      stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2998 	      while (true)
2999 		{
3000 		  if (from == stop)
3001 		    goto lose;
3002 		  dec_both (&from, &from_byte);
3003 		  UPDATE_SYNTAX_TABLE_BACKWARD (from);
3004 		  if (!char_quoted (from, from_byte))
3005 		    {
3006 		      c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3007 		      if (c == stringterm
3008 			  && (syntax_multibyte (c, multibyte_symbol_p)
3009 			      == Sstring))
3010 			break;
3011 		    }
3012 		  rarely_quit (++quit_count);
3013 		}
3014 	      if (!depth && sexpflag) goto done2;
3015 	      break;
3016 	    default:
3017 	      /* Ignore whitespace, punctuation, quote, endcomment.  */
3018 	      break;
3019 	    }
3020 	}
3021 
3022       /* Reached start of buffer.  Error if within object, return nil if between */
3023       if (depth)
3024 	goto lose;
3025 
3026       return Qnil;
3027 
3028     done2:
3029       count++;
3030     }
3031 
3032 
3033   XSETFASTINT (val, from);
3034   return val;
3035 
3036  lose:
3037   xsignal3 (Qscan_error,
3038 	    build_string ("Unbalanced parentheses"),
3039 	    make_fixnum (last_good), make_fixnum (from));
3040 }
3041 
3042 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3043        doc: /* Scan from character number FROM by COUNT lists.
3044 Scan forward if COUNT is positive, backward if COUNT is negative.
3045 Return the character number of the position thus found.
3046 
3047 A \"list", in this context, refers to a balanced parenthetical
3048 grouping, as determined by the syntax table.
3049 
3050 If DEPTH is nonzero, treat that as the nesting depth of the starting
3051 point (i.e. the starting point is DEPTH parentheses deep).  This
3052 function scans over parentheses until the depth goes to zero COUNT
3053 times.  Hence, positive DEPTH moves out that number of levels of
3054 parentheses, while negative DEPTH moves to a deeper level.
3055 
3056 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3057 
3058 If we reach the beginning or end of the accessible part of the buffer
3059 before we have scanned over COUNT lists, return nil if the depth at
3060 that point is zero, and signal an error if the depth is nonzero.  */)
3061   (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3062 {
3063   CHECK_FIXNUM (from);
3064   CHECK_FIXNUM (count);
3065   CHECK_FIXNUM (depth);
3066 
3067   return scan_lists (XFIXNUM (from), XFIXNUM (count), XFIXNUM (depth), 0);
3068 }
3069 
3070 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3071        doc: /* Scan from character number FROM by COUNT balanced expressions.
3072 If COUNT is negative, scan backwards.
3073 Returns the character number of the position thus found.
3074 
3075 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3076 
3077 If the beginning or end of (the accessible part of) the buffer is reached
3078 in the middle of a parenthetical grouping, an error is signaled.
3079 If the beginning or end is reached between groupings
3080 but before count is used up, nil is returned.  */)
3081   (Lisp_Object from, Lisp_Object count)
3082 {
3083   CHECK_FIXNUM (from);
3084   CHECK_FIXNUM (count);
3085 
3086   return scan_lists (XFIXNUM (from), XFIXNUM (count), 0, 1);
3087 }
3088 
3089 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3090        0, 0, 0,
3091        doc: /* Move point backward over any number of chars with prefix syntax.
3092 This includes chars with expression prefix syntax class (\\=') and those with
3093 the prefix syntax flag (p).  */)
3094   (void)
3095 {
3096   ptrdiff_t beg = BEGV;
3097   ptrdiff_t opoint = PT;
3098   ptrdiff_t opoint_byte = PT_BYTE;
3099   ptrdiff_t pos = PT;
3100   ptrdiff_t pos_byte = PT_BYTE;
3101   int c;
3102 
3103   if (pos <= beg)
3104     {
3105       SET_PT_BOTH (opoint, opoint_byte);
3106 
3107       return Qnil;
3108     }
3109 
3110   SETUP_SYNTAX_TABLE (pos, -1);
3111 
3112   dec_both (&pos, &pos_byte);
3113 
3114   while (!char_quoted (pos, pos_byte)
3115 	 /* Previous statement updates syntax table.  */
3116 	 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3117 	     || syntax_prefix_flag_p (c)))
3118     {
3119       opoint = pos;
3120       opoint_byte = pos_byte;
3121 
3122       if (pos <= beg)
3123         break;
3124       dec_both (&pos, &pos_byte);
3125       rarely_quit (pos);
3126     }
3127 
3128   SET_PT_BOTH (opoint, opoint_byte);
3129 
3130   return Qnil;
3131 }
3132 
3133 
3134 /* If the character at FROM_BYTE is the second part of a 2-character
3135    comment opener based on PREV_FROM_SYNTAX, update STATE and return
3136    true.  */
3137 static bool
in_2char_comment_start(struct lisp_parse_state * state,int prev_from_syntax,ptrdiff_t prev_from,ptrdiff_t from_byte)3138 in_2char_comment_start (struct lisp_parse_state *state,
3139                         int prev_from_syntax,
3140                         ptrdiff_t prev_from,
3141                         ptrdiff_t from_byte)
3142 {
3143   int c1, syntax;
3144   if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3145       && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
3146           syntax = SYNTAX_WITH_FLAGS (c1),
3147           SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3148     {
3149       /* Record the comment style we have entered so that only
3150          the comment-end sequence of the same style actually
3151          terminates the comment section.  */
3152       state->comstyle
3153         = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3154       bool comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3155                         | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3156       state->incomment = comnested ? 1 : -1;
3157       state->comstr_start = prev_from;
3158       return true;
3159     }
3160   return false;
3161 }
3162 
3163 /* Parse forward from FROM / FROM_BYTE to END,
3164    assuming that FROM has state STATE,
3165    and return a description of the state of the parse at END.
3166    If STOPBEFORE, stop at the start of an atom.
3167    If COMMENTSTOP is 1, stop at the start of a comment.
3168    If COMMENTSTOP is -1, stop at the start or end of a comment,
3169    after the beginning of a string, or after the end of a string.  */
3170 
3171 static void
scan_sexps_forward(struct lisp_parse_state * state,ptrdiff_t from,ptrdiff_t from_byte,ptrdiff_t end,EMACS_INT targetdepth,bool stopbefore,int commentstop)3172 scan_sexps_forward (struct lisp_parse_state *state,
3173 		    ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3174 		    EMACS_INT targetdepth, bool stopbefore,
3175 		    int commentstop)
3176 {
3177   enum syntaxcode code;
3178   struct level { ptrdiff_t last, prev; };
3179   struct level levelstart[100];
3180   struct level *curlevel = levelstart;
3181   struct level *endlevel = levelstart + 100;
3182   EMACS_INT depth;      /* Paren depth of current scanning location.
3183 			   level - levelstart equals this except
3184 			   when the depth becomes negative.  */
3185   EMACS_INT mindepth;		/* Lowest DEPTH value seen.  */
3186   bool start_quoted = 0;	/* True means starting after a char quote.  */
3187   Lisp_Object tem;
3188   ptrdiff_t prev_from;		/* Keep one character before FROM.  */
3189   ptrdiff_t prev_from_byte;
3190   int prev_from_syntax, prev_prev_from_syntax;
3191   bool boundary_stop = commentstop == -1;
3192   bool nofence;
3193   bool found;
3194   ptrdiff_t out_bytepos, out_charpos;
3195   int temp;
3196   unsigned short int quit_count = 0;
3197 
3198   prev_from = from;
3199   prev_from_byte = from_byte;
3200   if (from != BEGV)
3201     dec_both (&prev_from, &prev_from_byte);
3202 
3203   /* Use this macro instead of `from++'.  */
3204 #define INC_FROM				\
3205 do { prev_from = from;				\
3206      prev_from_byte = from_byte; 		\
3207      temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte);	\
3208      prev_prev_from_syntax = prev_from_syntax;  \
3209      prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3210      inc_both (&from, &from_byte);		\
3211      if (from < end)				\
3212        UPDATE_SYNTAX_TABLE_FORWARD (from);	\
3213   } while (0)
3214 
3215   maybe_quit ();
3216 
3217   depth = state->depth;
3218   start_quoted = state->quoted;
3219   prev_prev_from_syntax = Smax;
3220   prev_from_syntax = state->prev_syntax;
3221 
3222   tem = state->levelstarts;
3223   while (!NILP (tem))		/* >= second enclosing sexps.  */
3224     {
3225       Lisp_Object temhd = Fcar (tem);
3226       if (RANGED_FIXNUMP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3227         curlevel->last = XFIXNUM (temhd);
3228       if (++curlevel == endlevel)
3229         curlevel--; /* error ("Nesting too deep for parser"); */
3230       curlevel->prev = -1;
3231       curlevel->last = -1;
3232       tem = Fcdr (tem);
3233     }
3234   curlevel->prev = -1;
3235   curlevel->last = -1;
3236 
3237   state->quoted = 0;
3238   mindepth = depth;
3239 
3240   SETUP_SYNTAX_TABLE (from, 1);
3241 
3242   /* Enter the loop at a place appropriate for initial state.  */
3243 
3244   if (state->incomment)
3245     goto startincomment;
3246   if (state->instring >= 0)
3247     {
3248       nofence = state->instring != ST_STRING_STYLE;
3249       if (start_quoted)
3250 	goto startquotedinstring;
3251       goto startinstring;
3252     }
3253   else if (start_quoted)
3254     goto startquoted;
3255   else if ((from < end)
3256            && (in_2char_comment_start (state, prev_from_syntax,
3257                                        prev_from, from_byte)))
3258     {
3259       INC_FROM;
3260       prev_from_syntax = Smax; /* the syntax has already been "used up". */
3261       goto atcomment;
3262     }
3263 
3264   while (from < end)
3265     {
3266       rarely_quit (++quit_count);
3267       INC_FROM;
3268 
3269       if ((from < end)
3270           && (in_2char_comment_start (state, prev_from_syntax,
3271                                       prev_from, from_byte)))
3272         {
3273           INC_FROM;
3274           prev_from_syntax = Smax; /* the syntax has already been "used up". */
3275           goto atcomment;
3276         }
3277 
3278       if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3279 	continue;
3280       code = prev_from_syntax & 0xff;
3281       switch (code)
3282 	{
3283 	case Sescape:
3284 	case Scharquote:
3285 	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3286 	  curlevel->last = prev_from;
3287 	startquoted:
3288 	  if (from == end) goto endquoted;
3289 	  INC_FROM;
3290 	  goto symstarted;
3291 	  /* treat following character as a word constituent */
3292 	case Sword:
3293 	case Ssymbol:
3294 	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3295 	  curlevel->last = prev_from;
3296 	symstarted:
3297 	  while (from < end)
3298 	    {
3299               if (in_2char_comment_start (state, prev_from_syntax,
3300                                           prev_from, from_byte))
3301                 {
3302                   INC_FROM;
3303                   prev_from_syntax = Smax; /* the syntax has already been "used up". */
3304                   goto atcomment;
3305                 }
3306 
3307 	      int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3308               switch (SYNTAX (symchar))
3309 		{
3310 		case Scharquote:
3311 		case Sescape:
3312 		  INC_FROM;
3313 		  if (from == end) goto endquoted;
3314 		  break;
3315 		case Sword:
3316 		case Ssymbol:
3317 		case Squote:
3318 		  break;
3319 		default:
3320 		  goto symdone;
3321 		}
3322 	      INC_FROM;
3323 	      rarely_quit (++quit_count);
3324 	    }
3325 	symdone:
3326 	  curlevel->prev = curlevel->last;
3327 	  break;
3328 
3329 	case Scomment_fence:
3330           /* Record the comment style we have entered so that only
3331              the comment-end sequence of the same style actually
3332              terminates the comment section.  */
3333           state->comstyle = ST_COMMENT_STYLE;
3334           state->incomment = -1;
3335           state->comstr_start = prev_from;
3336           goto atcomment;
3337 	case Scomment:
3338           state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3339           state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3340                               1 : -1);
3341           state->comstr_start = prev_from;
3342         atcomment:
3343           if (commentstop || boundary_stop) goto done;
3344 	startincomment:
3345 	  /* The (from == BEGV) test was to enter the loop in the middle so
3346 	     that we find a 2-char comment ender even if we start in the
3347 	     middle of it.  We don't want to do that if we're just at the
3348 	     beginning of the comment (think of (*) ... (*)).  */
3349 	  found = forw_comment (from, from_byte, end,
3350 				state->incomment, state->comstyle,
3351 				from == BEGV ? 0 : prev_from_syntax,
3352 				&out_charpos, &out_bytepos, &state->incomment,
3353                                 &prev_from_syntax);
3354 	  from = out_charpos; from_byte = out_bytepos;
3355 	  /* Beware!  prev_from and friends (except prev_from_syntax)
3356 	     are invalid now.  Luckily, the `done' doesn't use them
3357 	     and the INC_FROM sets them to a sane value without
3358 	     looking at them. */
3359 	  if (!found) goto done;
3360 	  INC_FROM;
3361 	  state->incomment = 0;
3362 	  state->comstyle = 0;	/* reset the comment style */
3363 	  prev_from_syntax = Smax; /* For the comment closer */
3364           if (boundary_stop) goto done;
3365 	  break;
3366 
3367 	case Sopen:
3368 	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3369 	  depth++;
3370 	  /* curlevel++->last ran into compiler bug on Apollo */
3371 	  curlevel->last = prev_from;
3372 	  if (++curlevel == endlevel)
3373 	    curlevel--; /* error ("Nesting too deep for parser"); */
3374 	  curlevel->prev = -1;
3375 	  curlevel->last = -1;
3376 	  if (targetdepth == depth) goto done;
3377 	  break;
3378 
3379 	case Sclose:
3380 	  depth--;
3381 	  if (depth < mindepth)
3382 	    mindepth = depth;
3383 	  if (curlevel != levelstart)
3384 	    curlevel--;
3385 	  curlevel->prev = curlevel->last;
3386 	  if (targetdepth == depth) goto done;
3387 	  break;
3388 
3389 	case Sstring:
3390 	case Sstring_fence:
3391 	  state->comstr_start = from - 1;
3392 	  if (stopbefore) goto stop;  /* this arg means stop at sexp start */
3393 	  curlevel->last = prev_from;
3394 	  state->instring = (code == Sstring
3395 			    ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3396 			    : ST_STRING_STYLE);
3397 	  if (boundary_stop) goto done;
3398 	startinstring:
3399 	  {
3400 	    nofence = state->instring != ST_STRING_STYLE;
3401 
3402 	    while (1)
3403 	      {
3404 		int c;
3405 		enum syntaxcode c_code;
3406 
3407 		if (from >= end) goto done;
3408 		c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3409 		c_code = SYNTAX (c);
3410 
3411 		/* Check C_CODE here so that if the char has
3412 		   a syntax-table property which says it is NOT
3413 		   a string character, it does not end the string.  */
3414 		if (nofence && c == state->instring && c_code == Sstring)
3415 		  break;
3416 
3417 		switch (c_code)
3418 		  {
3419 		  case Sstring_fence:
3420 		    if (!nofence) goto string_end;
3421 		    break;
3422 
3423 		  case Scharquote:
3424 		  case Sescape:
3425 		    INC_FROM;
3426 		  startquotedinstring:
3427 		    if (from >= end) goto endquoted;
3428 		    break;
3429 
3430 		  default:
3431 		    break;
3432 		  }
3433 		INC_FROM;
3434 		rarely_quit (++quit_count);
3435 	      }
3436 	  }
3437 	string_end:
3438 	  state->instring = -1;
3439 	  curlevel->prev = curlevel->last;
3440 	  INC_FROM;
3441 	  if (boundary_stop) goto done;
3442 	  break;
3443 
3444 	case Smath:
3445 	  /* FIXME: We should do something with it.  */
3446 	  break;
3447 	default:
3448 	  /* Ignore whitespace, punctuation, quote, endcomment.  */
3449 	  break;
3450 	}
3451     }
3452   goto done;
3453 
3454  stop:   /* Here if stopping before start of sexp. */
3455   from = prev_from;    /* We have just fetched the char that starts it; */
3456   from_byte = prev_from_byte;
3457   prev_from_syntax = prev_prev_from_syntax;
3458   goto done; /* but return the position before it. */
3459 
3460  endquoted:
3461   state->quoted = 1;
3462  done:
3463   state->depth = depth;
3464   state->mindepth = mindepth;
3465   state->thislevelstart = curlevel->prev;
3466   state->prevlevelstart
3467     = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3468   state->location = from;
3469   state->location_byte = from_byte;
3470   state->levelstarts = Qnil;
3471   while (curlevel > levelstart)
3472     state->levelstarts = Fcons (make_fixnum ((--curlevel)->last),
3473                                 state->levelstarts);
3474   state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
3475                         || state->quoted) ? prev_from_syntax : Smax;
3476 }
3477 
3478 /* Convert a (lisp) parse state to the internal form used in
3479    scan_sexps_forward.  */
3480 static void
internalize_parse_state(Lisp_Object external,struct lisp_parse_state * state)3481 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
3482 {
3483   Lisp_Object tem;
3484 
3485   if (NILP (external))
3486     {
3487       state->depth = 0;
3488       state->instring = -1;
3489       state->incomment = 0;
3490       state->quoted = 0;
3491       state->comstyle = 0;	/* comment style a by default.  */
3492       state->comstr_start = -1;	/* no comment/string seen.  */
3493       state->levelstarts = Qnil;
3494       state->prev_syntax = Smax;
3495     }
3496   else
3497     {
3498       tem = Fcar (external);
3499       state->depth = FIXNUMP (tem) ? XFIXNUM (tem) : 0;
3500 
3501       external = Fcdr (external);
3502       external = Fcdr (external);
3503       external = Fcdr (external);
3504       tem = Fcar (external);
3505       /* Check whether we are inside string_fence-style string: */
3506       state->instring = (!NILP (tem)
3507                          ? (CHARACTERP (tem) ? XFIXNAT (tem) : ST_STRING_STYLE)
3508                          : -1);
3509 
3510       external = Fcdr (external);
3511       tem = Fcar (external);
3512       state->incomment = (!NILP (tem)
3513                           ? (FIXNUMP (tem) ? XFIXNUM (tem) : -1)
3514                           : 0);
3515 
3516       external = Fcdr (external);
3517       tem = Fcar (external);
3518       state->quoted = !NILP (tem);
3519 
3520       /* if the eighth element of the list is nil, we are in comment
3521 	 style a.  If it is non-nil, we are in comment style b */
3522       external = Fcdr (external);
3523       external = Fcdr (external);
3524       tem = Fcar (external);
3525       state->comstyle = (NILP (tem)
3526                          ? 0
3527                          : (RANGED_FIXNUMP (0, tem, ST_COMMENT_STYLE)
3528                             ? XFIXNUM (tem)
3529                             : ST_COMMENT_STYLE));
3530 
3531       external = Fcdr (external);
3532       tem = Fcar (external);
3533       state->comstr_start =
3534 	RANGED_FIXNUMP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XFIXNUM (tem) : -1;
3535       external = Fcdr (external);
3536       tem = Fcar (external);
3537       state->levelstarts = tem;
3538 
3539       external = Fcdr (external);
3540       tem = Fcar (external);
3541       state->prev_syntax = NILP (tem) ? Smax : XFIXNUM (tem);
3542     }
3543 }
3544 
3545 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3546        doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3547 Parsing stops at TO or when certain criteria are met;
3548  point is set to where parsing stops.
3549 
3550 If OLDSTATE is omitted or nil, parsing assumes that FROM is the
3551  beginning of a function.  If not, OLDSTATE should be the state at
3552  FROM.
3553 
3554 Value is a list of elements describing final state of parsing:
3555  0. depth in parens.
3556  1. character address of start of innermost containing list; nil if none.
3557  2. character address of start of last complete sexp terminated.
3558  3. non-nil if inside a string.
3559     (it is the character that will terminate the string,
3560      or t if the string should be terminated by a generic string delimiter.)
3561  4. nil if outside a comment, t if inside a non-nestable comment,
3562     else an integer (the current comment nesting).
3563  5. t if following a quote character.
3564  6. the minimum paren-depth encountered during this scan.
3565  7. style of comment, if any.
3566  8. character address of start of comment or string; nil if not in one.
3567  9. List of positions of currently open parens, outermost first.
3568 10. When the last position scanned holds the first character of a
3569     (potential) two character construct, the syntax of that position,
3570     otherwise nil.  That construct can be a two character comment
3571     delimiter or an Escaped or Char-quoted character.
3572 11..... Possible further internal information used by `parse-partial-sexp'.
3573 
3574 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3575 in parentheses becomes equal to TARGETDEPTH.
3576 Fourth arg STOPBEFORE non-nil means stop when we come to
3577  any character that starts a sexp.
3578 Fifth arg OLDSTATE is a list like what this function returns.
3579  It is used to initialize the state of the parse.  Elements number 1, 2, 6
3580  are ignored.
3581 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3582  If it is the symbol `syntax-table', stop after the start of a comment or a
3583  string, or after end of a comment or a string.  */)
3584   (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3585    Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3586 {
3587   struct lisp_parse_state state;
3588   EMACS_INT target;
3589 
3590   if (!NILP (targetdepth))
3591     {
3592       CHECK_FIXNUM (targetdepth);
3593       target = XFIXNUM (targetdepth);
3594     }
3595   else
3596     target = TYPE_MINIMUM (EMACS_INT);	/* We won't reach this depth.  */
3597 
3598   if (fix_position (to) < fix_position (from))
3599     error ("End position is smaller than start position");
3600 
3601   validate_region (&from, &to);
3602   internalize_parse_state (oldstate, &state);
3603   scan_sexps_forward (&state, XFIXNUM (from), CHAR_TO_BYTE (XFIXNUM (from)),
3604 		      XFIXNUM (to),
3605 		      target, !NILP (stopbefore),
3606 		      (NILP (commentstop)
3607 		       ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3608 
3609   SET_PT_BOTH (state.location, state.location_byte);
3610 
3611   return
3612     Fcons (make_fixnum (state.depth),
3613 	   Fcons (state.prevlevelstart < 0
3614 		  ? Qnil : make_fixnum (state.prevlevelstart),
3615 	     Fcons (state.thislevelstart < 0
3616 		    ? Qnil : make_fixnum (state.thislevelstart),
3617 	       Fcons (state.instring >= 0
3618 		      ? (state.instring == ST_STRING_STYLE
3619 			 ? Qt : make_fixnum (state.instring)) : Qnil,
3620 		 Fcons (state.incomment < 0 ? Qt :
3621 			(state.incomment == 0 ? Qnil :
3622 			 make_fixnum (state.incomment)),
3623 		   Fcons (state.quoted ? Qt : Qnil,
3624 		     Fcons (make_fixnum (state.mindepth),
3625 		       Fcons ((state.comstyle
3626 			       ? (state.comstyle == ST_COMMENT_STYLE
3627 				  ? Qsyntax_table
3628 				  : make_fixnum (state.comstyle))
3629 			       : Qnil),
3630 		         Fcons (((state.incomment
3631                                   || (state.instring >= 0))
3632                                  ? make_fixnum (state.comstr_start)
3633                                  : Qnil),
3634 			   Fcons (state.levelstarts,
3635                              Fcons (state.prev_syntax == Smax
3636                                     ? Qnil
3637                                     : make_fixnum (state.prev_syntax),
3638                                 Qnil)))))))))));
3639 }
3640 
3641 void
init_syntax_once(void)3642 init_syntax_once (void)
3643 {
3644   register int i, c;
3645   Lisp_Object temp;
3646 
3647   /* This has to be done here, before we call Fmake_char_table.  */
3648   DEFSYM (Qsyntax_table, "syntax-table");
3649 
3650   /* Create objects which can be shared among syntax tables.  */
3651   Vsyntax_code_object = make_nil_vector (Smax);
3652   for (i = 0; i < Smax; i++)
3653     ASET (Vsyntax_code_object, i, list1 (make_fixnum (i)));
3654 
3655   /* Now we are ready to set up this property, so we can
3656      create syntax tables.  */
3657   Fput (Qsyntax_table, Qchar_table_extra_slots, make_fixnum (0));
3658 
3659   temp = AREF (Vsyntax_code_object, Swhitespace);
3660 
3661   Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3662 
3663   /* Control characters should not be whitespace.  */
3664   temp = AREF (Vsyntax_code_object, Spunct);
3665   for (i = 0; i <= ' ' - 1; i++)
3666     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3667   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3668 
3669   /* Except that a few really are whitespace.  */
3670   temp = AREF (Vsyntax_code_object, Swhitespace);
3671   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3672   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3673   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3674   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3675   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3676 
3677   temp = AREF (Vsyntax_code_object, Sword);
3678   for (i = 'a'; i <= 'z'; i++)
3679     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3680   for (i = 'A'; i <= 'Z'; i++)
3681     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3682   for (i = '0'; i <= '9'; i++)
3683     SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3684 
3685   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3686   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3687 
3688   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3689 			Fcons (make_fixnum (Sopen), make_fixnum (')')));
3690   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3691 			Fcons (make_fixnum (Sclose), make_fixnum ('(')));
3692   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3693 			Fcons (make_fixnum (Sopen), make_fixnum (']')));
3694   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3695 			Fcons (make_fixnum (Sclose), make_fixnum ('[')));
3696   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3697 			Fcons (make_fixnum (Sopen), make_fixnum ('}')));
3698   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3699 			Fcons (make_fixnum (Sclose), make_fixnum ('{')));
3700   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3701 			Fcons (make_fixnum (Sstring), Qnil));
3702   SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3703 			Fcons (make_fixnum (Sescape), Qnil));
3704 
3705   temp = AREF (Vsyntax_code_object, Ssymbol);
3706   for (i = 0; i < 10; i++)
3707     {
3708       c = "_-+*/&|<>="[i];
3709       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3710     }
3711 
3712   temp = AREF (Vsyntax_code_object, Spunct);
3713   for (i = 0; i < 12; i++)
3714     {
3715       c = ".,;:?!#@~^'`"[i];
3716       SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3717     }
3718 
3719   /* All multibyte characters have syntax `word' by default.  */
3720   temp = AREF (Vsyntax_code_object, Sword);
3721   char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3722 }
3723 
3724 void
syms_of_syntax(void)3725 syms_of_syntax (void)
3726 {
3727   DEFSYM (Qsyntax_table_p, "syntax-table-p");
3728   DEFSYM (Qsyntax_ppss, "syntax-ppss");
3729   DEFVAR_LISP ("comment-use-syntax-ppss",
3730 	       Vcomment_use_syntax_ppss,
3731 	       doc: /* Non-nil means `forward-comment' can use `syntax-ppss' internally.  */);
3732   Vcomment_use_syntax_ppss = Qt;
3733 
3734   staticpro (&Vsyntax_code_object);
3735 
3736   staticpro (&gl_state.object);
3737   staticpro (&gl_state.global_code);
3738   staticpro (&gl_state.current_syntax_table);
3739   staticpro (&gl_state.old_prop);
3740 
3741   DEFSYM (Qscan_error, "scan-error");
3742   Fput (Qscan_error, Qerror_conditions,
3743 	pure_list (Qscan_error, Qerror));
3744   Fput (Qscan_error, Qerror_message,
3745 	build_pure_c_string ("Scan error"));
3746 
3747   DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3748 	       doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace.  */);
3749 
3750   DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3751 	       doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3752 Otherwise, that text property is simply ignored.
3753 See the info node `(elisp)Syntax Properties' for a description of the
3754 `syntax-table' property.  */);
3755 
3756   DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3757 	      doc: /* Position up to which syntax-table properties have been set.  */);
3758   syntax_propertize__done = -1;
3759   DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3760   Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3761 
3762   words_include_escapes = 0;
3763   DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3764 	       doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words.  */);
3765 
3766   DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3767 	       doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol.  */);
3768   multibyte_syntax_as_symbol = 0;
3769 
3770   DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3771 	       open_paren_in_column_0_is_defun_start,
3772 	       doc: /* Non-nil means an open paren in column 0 denotes the start of a defun.  */);
3773   open_paren_in_column_0_is_defun_start = 1;
3774 
3775 
3776   DEFVAR_LISP ("find-word-boundary-function-table",
3777 	       Vfind_word_boundary_function_table,
3778 	       doc: /*
3779 Char table of functions to search for the word boundary.
3780 Each function is called with two arguments; POS and LIMIT.
3781 POS and LIMIT are character positions in the current buffer.
3782 
3783 If POS is less than LIMIT, POS is at the first character of a word,
3784 and the return value of a function should be a position after the
3785 last character of that word.
3786 
3787 If POS is not less than LIMIT, POS is at the last character of a word,
3788 and the return value of a function should be a position at the first
3789 character of that word.
3790 
3791 In both cases, LIMIT bounds the search. */);
3792   Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3793 
3794   DEFVAR_BOOL ("comment-end-can-be-escaped", comment_end_can_be_escaped,
3795                doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment.  */);
3796   comment_end_can_be_escaped = false;
3797   DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
3798   Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
3799 
3800   defsubr (&Ssyntax_table_p);
3801   defsubr (&Ssyntax_table);
3802   defsubr (&Sstandard_syntax_table);
3803   defsubr (&Scopy_syntax_table);
3804   defsubr (&Sset_syntax_table);
3805   defsubr (&Schar_syntax);
3806   defsubr (&Ssyntax_class_to_char);
3807   defsubr (&Smatching_paren);
3808   defsubr (&Sstring_to_syntax);
3809   defsubr (&Smodify_syntax_entry);
3810   defsubr (&Sinternal_describe_syntax_value);
3811 
3812   defsubr (&Sforward_word);
3813 
3814   defsubr (&Sskip_chars_forward);
3815   defsubr (&Sskip_chars_backward);
3816   defsubr (&Sskip_syntax_forward);
3817   defsubr (&Sskip_syntax_backward);
3818 
3819   defsubr (&Sforward_comment);
3820   defsubr (&Sscan_lists);
3821   defsubr (&Sscan_sexps);
3822   defsubr (&Sbackward_prefix_chars);
3823   defsubr (&Sparse_partial_sexp);
3824 }
3825