1 /* GNU Emacs routines to deal with syntax tables; also word and list parsing.
2 Copyright (C) 1985, 1987, 1993-1995, 1997-1999, 2001-2021 Free
3 Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software: you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation, either version 3 of the License, or (at
10 your option) any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs. If not, see <https://www.gnu.org/licenses/>. */
19
20 #include <config.h>
21
22 #include "lisp.h"
23 #include "character.h"
24 #include "buffer.h"
25 #include "regex-emacs.h"
26 #include "syntax.h"
27 #include "intervals.h"
28 #include "category.h"
29
30 /* Make syntax table lookup grant data in gl_state. */
31 #define SYNTAX(c) syntax_property (c, 1)
32 #define SYNTAX_ENTRY(c) syntax_property_entry (c, 1)
33 #define SYNTAX_WITH_FLAGS(c) syntax_property_with_flags (c, 1)
34
35 /* Eight single-bit flags have the following meanings:
36 1. This character is the first of a two-character comment-start sequence.
37 2. This character is the second of a two-character comment-start sequence.
38 3. This character is the first of a two-character comment-end sequence.
39 4. This character is the second of a two-character comment-end sequence.
40 5. This character is a prefix, for backward-prefix-chars.
41 6. The char is part of a delimiter for comments of style "b".
42 7. This character is part of a nestable comment sequence.
43 8. The char is part of a delimiter for comments of style "c".
44 Note that any two-character sequence whose first character has flag 1
45 and whose second character has flag 2 will be interpreted as a comment start.
46
47 Bits 6 and 8 discriminate among different comment styles.
48 Languages such as C++ allow two orthogonal syntax start/end pairs
49 and bit 6 determines whether a comment-end or Scommentend
50 ends style a or b. Comment markers can start style a, b, c, or bc.
51 Style a is always the default.
52 For 2-char comment markers, the style b flag is looked up only on the second
53 char of the comment marker and on the first char of the comment ender.
54 For style c (like the nested flag), the flag can be placed on any of
55 the chars. */
56
57 /* These functions extract specific flags from an integer
58 that holds the syntax code and the flags. */
59
60 static bool
SYNTAX_FLAGS_COMSTART_FIRST(int flags)61 SYNTAX_FLAGS_COMSTART_FIRST (int flags)
62 {
63 return (flags >> 16) & 1;
64 }
65 static bool
SYNTAX_FLAGS_COMSTART_SECOND(int flags)66 SYNTAX_FLAGS_COMSTART_SECOND (int flags)
67 {
68 return (flags >> 17) & 1;
69 }
70 static bool
SYNTAX_FLAGS_COMEND_FIRST(int flags)71 SYNTAX_FLAGS_COMEND_FIRST (int flags)
72 {
73 return (flags >> 18) & 1;
74 }
75 static bool
SYNTAX_FLAGS_COMEND_SECOND(int flags)76 SYNTAX_FLAGS_COMEND_SECOND (int flags)
77 {
78 return (flags >> 19) & 1;
79 }
80 static bool
SYNTAX_FLAGS_COMSTARTEND_FIRST(int flags)81 SYNTAX_FLAGS_COMSTARTEND_FIRST (int flags)
82 {
83 return (flags & 0x50000) != 0;
84 }
85 static bool
SYNTAX_FLAGS_PREFIX(int flags)86 SYNTAX_FLAGS_PREFIX (int flags)
87 {
88 return (flags >> 20) & 1;
89 }
90 static bool
SYNTAX_FLAGS_COMMENT_STYLEB(int flags)91 SYNTAX_FLAGS_COMMENT_STYLEB (int flags)
92 {
93 return (flags >> 21) & 1;
94 }
95 static bool
SYNTAX_FLAGS_COMMENT_STYLEC(int flags)96 SYNTAX_FLAGS_COMMENT_STYLEC (int flags)
97 {
98 return (flags >> 23) & 1;
99 }
100 static int
SYNTAX_FLAGS_COMMENT_STYLEC2(int flags)101 SYNTAX_FLAGS_COMMENT_STYLEC2 (int flags)
102 {
103 return (flags >> 22) & 2; /* SYNTAX_FLAGS_COMMENT_STYLEC (flags) * 2 */
104 }
105 static bool
SYNTAX_FLAGS_COMMENT_NESTED(int flags)106 SYNTAX_FLAGS_COMMENT_NESTED (int flags)
107 {
108 return (flags >> 22) & 1;
109 }
110
111 /* FLAGS should be the flags of the main char of the comment marker, e.g.
112 the second for comstart and the first for comend. */
113 static int
SYNTAX_FLAGS_COMMENT_STYLE(int flags,int other_flags)114 SYNTAX_FLAGS_COMMENT_STYLE (int flags, int other_flags)
115 {
116 return (SYNTAX_FLAGS_COMMENT_STYLEB (flags)
117 | SYNTAX_FLAGS_COMMENT_STYLEC2 (flags)
118 | SYNTAX_FLAGS_COMMENT_STYLEC2 (other_flags));
119 }
120
121 /* Extract a particular flag for a given character. */
122
123 static bool
SYNTAX_COMEND_FIRST(int c)124 SYNTAX_COMEND_FIRST (int c)
125 {
126 return SYNTAX_FLAGS_COMEND_FIRST (SYNTAX_WITH_FLAGS (c));
127 }
128
129 /* We use these constants in place for comment-style and
130 string-ender-char to distinguish comments/strings started by
131 comment_fence and string_fence codes. */
132
133 enum
134 {
135 ST_COMMENT_STYLE = 256 + 1,
136 ST_STRING_STYLE = 256 + 2
137 };
138
139 /* This is the internal form of the parse state used in parse-partial-sexp. */
140
141 struct lisp_parse_state
142 {
143 EMACS_INT depth; /* Depth at end of parsing. */
144 int instring; /* -1 if not within string, else desired terminator. */
145 EMACS_INT incomment; /* -1 if in unnestable comment else comment nesting */
146 int comstyle; /* comment style a=0, or b=1, or ST_COMMENT_STYLE. */
147 bool quoted; /* True if just after an escape char at end of parsing. */
148 EMACS_INT mindepth; /* Minimum depth seen while scanning. */
149 /* Char number of most recent start-of-expression at current level */
150 ptrdiff_t thislevelstart;
151 /* Char number of start of containing expression */
152 ptrdiff_t prevlevelstart;
153 ptrdiff_t location; /* Char number at which parsing stopped. */
154 ptrdiff_t location_byte; /* Corresponding byte position. */
155 ptrdiff_t comstr_start; /* Position of last comment/string starter. */
156 Lisp_Object levelstarts; /* Char numbers of starts-of-expression
157 of levels (starting from outermost). */
158 int prev_syntax; /* Syntax of previous position scanned, when
159 that position (potentially) holds the first char
160 of a 2-char construct, i.e. comment delimiter
161 or Sescape, etc. Smax otherwise. */
162 };
163
164 /* These variables are a cache for finding the start of a defun.
165 find_start_pos is the place for which the defun start was found.
166 find_start_value is the defun start position found for it.
167 find_start_value_byte is the corresponding byte position.
168 find_start_buffer is the buffer it was found in.
169 find_start_begv is the BEGV value when it was found.
170 find_start_modiff is the value of MODIFF when it was found. */
171
172 static ptrdiff_t find_start_pos;
173 static ptrdiff_t find_start_value;
174 static ptrdiff_t find_start_value_byte;
175 static struct buffer *find_start_buffer;
176 static ptrdiff_t find_start_begv;
177 static modiff_count find_start_modiff;
178
179
180 static Lisp_Object skip_chars (bool, Lisp_Object, Lisp_Object, bool);
181 static Lisp_Object skip_syntaxes (bool, Lisp_Object, Lisp_Object);
182 static Lisp_Object scan_lists (EMACS_INT, EMACS_INT, EMACS_INT, bool);
183 static void scan_sexps_forward (struct lisp_parse_state *,
184 ptrdiff_t, ptrdiff_t, ptrdiff_t, EMACS_INT,
185 bool, int);
186 static void internalize_parse_state (Lisp_Object, struct lisp_parse_state *);
187 static bool in_classes (int, Lisp_Object);
188 static void parse_sexp_propertize (ptrdiff_t charpos);
189
190 /* This setter is used only in this file, so it can be private. */
191 static void
bset_syntax_table(struct buffer * b,Lisp_Object val)192 bset_syntax_table (struct buffer *b, Lisp_Object val)
193 {
194 b->syntax_table_ = val;
195 }
196
197 /* Whether the syntax of the character C has the prefix flag set. */
198 bool
syntax_prefix_flag_p(int c)199 syntax_prefix_flag_p (int c)
200 {
201 return SYNTAX_FLAGS_PREFIX (SYNTAX_WITH_FLAGS (c));
202 }
203
204 struct gl_state_s gl_state; /* Global state of syntax parser. */
205
206 enum { INTERVALS_AT_ONCE = 10 }; /* 1 + max-number of intervals
207 to scan to property-change. */
208
209 /* Set the syntax entry VAL for char C in table TABLE. */
210
211 static void
SET_RAW_SYNTAX_ENTRY(Lisp_Object table,int c,Lisp_Object val)212 SET_RAW_SYNTAX_ENTRY (Lisp_Object table, int c, Lisp_Object val)
213 {
214 CHAR_TABLE_SET (table, c, val);
215 }
216
217 /* Set the syntax entry VAL for char-range RANGE in table TABLE.
218 RANGE is a cons (FROM . TO) specifying the range of characters. */
219
220 static void
SET_RAW_SYNTAX_ENTRY_RANGE(Lisp_Object table,Lisp_Object range,Lisp_Object val)221 SET_RAW_SYNTAX_ENTRY_RANGE (Lisp_Object table, Lisp_Object range,
222 Lisp_Object val)
223 {
224 Fset_char_table_range (table, range, val);
225 }
226
227 /* Extract the information from the entry for character C
228 in the current syntax table. */
229
230 static Lisp_Object
SYNTAX_MATCH(int c)231 SYNTAX_MATCH (int c)
232 {
233 Lisp_Object ent = SYNTAX_ENTRY (c);
234 return CONSP (ent) ? XCDR (ent) : Qnil;
235 }
236
237 /* This should be called with FROM at the start of forward
238 search, or after the last position of the backward search. It
239 makes sure that the first char is picked up with correct table, so
240 one does not need to call UPDATE_SYNTAX_TABLE immediately after the
241 call.
242 Sign of COUNT gives the direction of the search.
243 */
244
245 static void
SETUP_SYNTAX_TABLE(ptrdiff_t from,ptrdiff_t count)246 SETUP_SYNTAX_TABLE (ptrdiff_t from, ptrdiff_t count)
247 {
248 SETUP_BUFFER_SYNTAX_TABLE ();
249 gl_state.b_property = BEGV;
250 gl_state.e_property = ZV + 1;
251 gl_state.object = Qnil;
252 gl_state.offset = 0;
253 if (parse_sexp_lookup_properties)
254 {
255 if (count > 0)
256 update_syntax_table_forward (from, true, Qnil);
257 else if (from > BEGV)
258 {
259 update_syntax_table (from - 1, count, true, Qnil);
260 parse_sexp_propertize (from - 1);
261 }
262 }
263 }
264
265 /* Same as above, but in OBJECT. If OBJECT is nil, use current buffer.
266 If it is t (which is only used in fast_c_string_match_ignore_case),
267 ignore properties altogether.
268
269 This is meant for regex-emacs.c to use. For buffers, regex-emacs.c
270 passes arguments to the UPDATE_SYNTAX_TABLE functions which are
271 relative to BEGV. So if it is a buffer, we set the offset field to
272 BEGV. */
273
274 void
SETUP_SYNTAX_TABLE_FOR_OBJECT(Lisp_Object object,ptrdiff_t from,ptrdiff_t count)275 SETUP_SYNTAX_TABLE_FOR_OBJECT (Lisp_Object object,
276 ptrdiff_t from, ptrdiff_t count)
277 {
278 SETUP_BUFFER_SYNTAX_TABLE ();
279 gl_state.object = object;
280 if (BUFFERP (gl_state.object))
281 {
282 struct buffer *buf = XBUFFER (gl_state.object);
283 gl_state.b_property = 1;
284 gl_state.e_property = BUF_ZV (buf) - BUF_BEGV (buf) + 1;
285 gl_state.offset = BUF_BEGV (buf) - 1;
286 }
287 else if (NILP (gl_state.object))
288 {
289 gl_state.b_property = 1;
290 gl_state.e_property = ZV - BEGV + 1;
291 gl_state.offset = BEGV - 1;
292 }
293 else if (EQ (gl_state.object, Qt))
294 {
295 gl_state.b_property = 0;
296 gl_state.e_property = PTRDIFF_MAX;
297 gl_state.offset = 0;
298 }
299 else
300 {
301 gl_state.b_property = 0;
302 gl_state.e_property = 1 + SCHARS (gl_state.object);
303 gl_state.offset = 0;
304 }
305 if (parse_sexp_lookup_properties)
306 update_syntax_table (from + gl_state.offset - (count <= 0),
307 count, 1, gl_state.object);
308 }
309
310 /* Update gl_state to an appropriate interval which contains CHARPOS. The
311 sign of COUNT gives the relative position of CHARPOS wrt the previously
312 valid interval. If INIT, only [be]_property fields of gl_state are
313 valid at start, the rest is filled basing on OBJECT.
314
315 `gl_state.*_i' are the intervals, and CHARPOS is further in the search
316 direction than the intervals - or in an interval. We update the
317 current syntax-table basing on the property of this interval, and
318 update the interval to start further than CHARPOS - or be
319 NULL. We also update lim_property to be the next value of
320 charpos to call this subroutine again - or be before/after the
321 start/end of OBJECT. */
322
323 void
update_syntax_table(ptrdiff_t charpos,EMACS_INT count,bool init,Lisp_Object object)324 update_syntax_table (ptrdiff_t charpos, EMACS_INT count, bool init,
325 Lisp_Object object)
326 {
327 Lisp_Object tmp_table;
328 int cnt = 0;
329 bool invalidate = true;
330 INTERVAL i;
331
332 if (init)
333 {
334 gl_state.old_prop = Qnil;
335 gl_state.start = gl_state.b_property;
336 gl_state.stop = gl_state.e_property;
337 i = interval_of (charpos, object);
338 gl_state.backward_i = gl_state.forward_i = i;
339 invalidate = false;
340 if (!i)
341 return;
342 i = gl_state.forward_i;
343 gl_state.b_property = i->position - gl_state.offset;
344 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
345 }
346 else
347 {
348 i = count > 0 ? gl_state.forward_i : gl_state.backward_i;
349
350 /* We are guaranteed to be called with CHARPOS either in i,
351 or further off. */
352 if (!i)
353 error ("Error in syntax_table logic for to-the-end intervals");
354 else if (charpos < i->position) /* Move left. */
355 {
356 if (count > 0)
357 error ("Error in syntax_table logic for intervals <-");
358 /* Update the interval. */
359 i = update_interval (i, charpos);
360 if (INTERVAL_LAST_POS (i) != gl_state.b_property)
361 {
362 invalidate = false;
363 gl_state.forward_i = i;
364 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
365 }
366 }
367 else if (charpos >= INTERVAL_LAST_POS (i)) /* Move right. */
368 {
369 if (count < 0)
370 error ("Error in syntax_table logic for intervals ->");
371 /* Update the interval. */
372 i = update_interval (i, charpos);
373 if (i->position != gl_state.e_property)
374 {
375 invalidate = false;
376 gl_state.backward_i = i;
377 gl_state.b_property = i->position - gl_state.offset;
378 }
379 }
380 }
381
382 tmp_table = textget (i->plist, Qsyntax_table);
383
384 if (invalidate)
385 invalidate = !EQ (tmp_table, gl_state.old_prop); /* Need to invalidate? */
386
387 if (invalidate) /* Did not get to adjacent interval. */
388 { /* with the same table => */
389 /* invalidate the old range. */
390 if (count > 0)
391 {
392 gl_state.backward_i = i;
393 gl_state.b_property = i->position - gl_state.offset;
394 }
395 else
396 {
397 gl_state.forward_i = i;
398 gl_state.e_property = INTERVAL_LAST_POS (i) - gl_state.offset;
399 }
400 }
401
402 if (!EQ (tmp_table, gl_state.old_prop))
403 {
404 gl_state.current_syntax_table = tmp_table;
405 gl_state.old_prop = tmp_table;
406 if (EQ (Fsyntax_table_p (tmp_table), Qt))
407 {
408 gl_state.use_global = 0;
409 }
410 else if (CONSP (tmp_table))
411 {
412 gl_state.use_global = 1;
413 gl_state.global_code = tmp_table;
414 }
415 else
416 {
417 gl_state.use_global = 0;
418 gl_state.current_syntax_table = BVAR (current_buffer, syntax_table);
419 }
420 }
421
422 while (i)
423 {
424 if (cnt && !EQ (tmp_table, textget (i->plist, Qsyntax_table)))
425 {
426 if (count > 0)
427 {
428 gl_state.e_property = i->position - gl_state.offset;
429 gl_state.forward_i = i;
430 }
431 else
432 {
433 gl_state.b_property
434 = i->position + LENGTH (i) - gl_state.offset;
435 gl_state.backward_i = i;
436 }
437 return;
438 }
439 else if (cnt == INTERVALS_AT_ONCE)
440 {
441 if (count > 0)
442 {
443 gl_state.e_property
444 = i->position + LENGTH (i) - gl_state.offset
445 /* e_property at EOB is not set to ZV but to ZV+1, so that
446 we can do INC(from);UPDATE_SYNTAX_TABLE_FORWARD without
447 having to check eob between the two. */
448 + (next_interval (i) ? 0 : 1);
449 gl_state.forward_i = i;
450 }
451 else
452 {
453 gl_state.b_property = i->position - gl_state.offset;
454 gl_state.backward_i = i;
455 }
456 return;
457 }
458 cnt++;
459 i = count > 0 ? next_interval (i) : previous_interval (i);
460 }
461 eassert (i == NULL); /* This property goes to the end. */
462 if (count > 0)
463 {
464 gl_state.e_property = gl_state.stop;
465 gl_state.forward_i = i;
466 }
467 else
468 gl_state.b_property = gl_state.start;
469 }
470
471 static void
parse_sexp_propertize(ptrdiff_t charpos)472 parse_sexp_propertize (ptrdiff_t charpos)
473 {
474 EMACS_INT zv = ZV;
475 if (syntax_propertize__done <= charpos
476 && syntax_propertize__done < zv)
477 {
478 modiff_count modiffs = CHARS_MODIFF;
479 safe_call1 (Qinternal__syntax_propertize,
480 make_fixnum (min (zv, 1 + charpos)));
481 if (modiffs != CHARS_MODIFF)
482 error ("internal--syntax-propertize modified the buffer!");
483 if (syntax_propertize__done <= charpos
484 && syntax_propertize__done < zv)
485 error ("internal--syntax-propertize did not move"
486 " syntax-propertize--done");
487 SETUP_SYNTAX_TABLE (charpos, 1);
488 }
489 else if (gl_state.e_property > syntax_propertize__done)
490 {
491 gl_state.e_property = syntax_propertize__done;
492 gl_state.e_property_truncated = true;
493 }
494 else if (gl_state.e_property_truncated
495 && gl_state.e_property < syntax_propertize__done)
496 { /* When moving backward, e_property might be set without resetting
497 e_property_truncated, so the e_property_truncated flag may
498 occasionally be left raised spuriously. This should be rare. */
499 gl_state.e_property_truncated = false;
500 update_syntax_table_forward (charpos, false, Qnil);
501 }
502 }
503
504 void
update_syntax_table_forward(ptrdiff_t charpos,bool init,Lisp_Object object)505 update_syntax_table_forward (ptrdiff_t charpos, bool init,
506 Lisp_Object object)
507 {
508 if (gl_state.e_property_truncated)
509 {
510 eassert (NILP (object));
511 eassert (charpos >= gl_state.e_property);
512 parse_sexp_propertize (charpos);
513 }
514 else
515 {
516 update_syntax_table (charpos, 1, init, object);
517 if (NILP (object) && gl_state.e_property > syntax_propertize__done)
518 parse_sexp_propertize (charpos);
519 }
520 }
521
522 /* Returns true if char at CHARPOS is quoted.
523 Global syntax-table data should be set up already to be good at CHARPOS
524 or after. On return global syntax data is good for lookup at CHARPOS. */
525
526 static bool
char_quoted(ptrdiff_t charpos,ptrdiff_t bytepos)527 char_quoted (ptrdiff_t charpos, ptrdiff_t bytepos)
528 {
529 enum syntaxcode code;
530 ptrdiff_t beg = BEGV;
531 bool quoted = 0;
532 ptrdiff_t orig = charpos;
533
534 while (charpos > beg)
535 {
536 int c;
537 dec_both (&charpos, &bytepos);
538
539 UPDATE_SYNTAX_TABLE_BACKWARD (charpos);
540 c = FETCH_CHAR_AS_MULTIBYTE (bytepos);
541 code = SYNTAX (c);
542 if (! (code == Scharquote || code == Sescape))
543 break;
544
545 quoted = !quoted;
546 }
547
548 UPDATE_SYNTAX_TABLE (orig);
549 return quoted;
550 }
551
552 /* Return the bytepos one character before BYTEPOS.
553 We assume that BYTEPOS is not at the start of the buffer. */
554
555 static ptrdiff_t
dec_bytepos(ptrdiff_t bytepos)556 dec_bytepos (ptrdiff_t bytepos)
557 {
558 return (bytepos
559 - (!NILP (BVAR (current_buffer, enable_multibyte_characters))
560 ? prev_char_len (bytepos) : 1));
561 }
562
563 /* Return a defun-start position before POS and not too far before.
564 It should be the last one before POS, or nearly the last.
565
566 When open_paren_in_column_0_is_defun_start is nonzero,
567 only the beginning of the buffer is treated as a defun-start.
568
569 We record the information about where the scan started
570 and what its result was, so that another call in the same area
571 can return the same value very quickly.
572
573 There is no promise at which position the global syntax data is
574 valid on return from the subroutine, so the caller should explicitly
575 update the global data. */
576
577 static ptrdiff_t
find_defun_start(ptrdiff_t pos,ptrdiff_t pos_byte)578 find_defun_start (ptrdiff_t pos, ptrdiff_t pos_byte)
579 {
580 ptrdiff_t opoint = PT, opoint_byte = PT_BYTE;
581
582 /* Use previous finding, if it's valid and applies to this inquiry. */
583 if (current_buffer == find_start_buffer
584 /* Reuse the defun-start even if POS is a little farther on.
585 POS might be in the next defun, but that's ok.
586 Our value may not be the best possible, but will still be usable. */
587 && pos <= find_start_pos + 1000
588 && pos >= find_start_value
589 && BEGV == find_start_begv
590 && MODIFF == find_start_modiff)
591 return find_start_value;
592
593 if (!NILP (Vcomment_use_syntax_ppss))
594 {
595 modiff_count modiffs = CHARS_MODIFF;
596 Lisp_Object ppss = call1 (Qsyntax_ppss, make_fixnum (pos));
597 if (modiffs != CHARS_MODIFF)
598 error ("syntax-ppss modified the buffer!");
599 TEMP_SET_PT_BOTH (opoint, opoint_byte);
600 Lisp_Object boc = Fnth (make_fixnum (8), ppss);
601 if (FIXNUMP (boc))
602 {
603 find_start_value = XFIXNUM (boc);
604 find_start_value_byte = CHAR_TO_BYTE (find_start_value);
605 }
606 else
607 {
608 find_start_value = pos;
609 find_start_value_byte = pos_byte;
610 }
611 goto found;
612 }
613 if (!open_paren_in_column_0_is_defun_start)
614 {
615 find_start_value = BEGV;
616 find_start_value_byte = BEGV_BYTE;
617 goto found;
618 }
619
620 /* Back up to start of line. */
621 scan_newline (pos, pos_byte, BEGV, BEGV_BYTE, -1, 1);
622
623 /* We optimize syntax-table lookup for rare updates. Thus we accept
624 only those `^\s(' which are good in global _and_ text-property
625 syntax-tables. */
626 SETUP_BUFFER_SYNTAX_TABLE ();
627 while (PT > BEGV)
628 {
629 /* Open-paren at start of line means we may have found our
630 defun-start. */
631 int c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
632 if (SYNTAX (c) == Sopen)
633 {
634 SETUP_SYNTAX_TABLE (PT + 1, -1); /* Try again... */
635 c = FETCH_CHAR_AS_MULTIBYTE (PT_BYTE);
636 if (SYNTAX (c) == Sopen)
637 break;
638 /* Now fallback to the default value. */
639 SETUP_BUFFER_SYNTAX_TABLE ();
640 }
641 /* Move to beg of previous line. */
642 scan_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, -2, 1);
643 }
644
645 /* Record what we found, for the next try. */
646 find_start_value = PT;
647 find_start_value_byte = PT_BYTE;
648 TEMP_SET_PT_BOTH (opoint, opoint_byte);
649
650 found:
651 find_start_buffer = current_buffer;
652 find_start_modiff = MODIFF;
653 find_start_begv = BEGV;
654 find_start_pos = pos;
655
656 return find_start_value;
657 }
658
659 /* Return the SYNTAX_COMEND_FIRST of the character before POS, POS_BYTE. */
660
661 static bool
prev_char_comend_first(ptrdiff_t pos,ptrdiff_t pos_byte)662 prev_char_comend_first (ptrdiff_t pos, ptrdiff_t pos_byte)
663 {
664 int c;
665 bool val;
666
667 dec_both (&pos, &pos_byte);
668 UPDATE_SYNTAX_TABLE_BACKWARD (pos);
669 c = FETCH_CHAR (pos_byte);
670 val = SYNTAX_COMEND_FIRST (c);
671 UPDATE_SYNTAX_TABLE_FORWARD (pos + 1);
672 return val;
673 }
674
675 /* Check whether charpos FROM is at the end of a comment.
676 FROM_BYTE is the bytepos corresponding to FROM.
677 Do not move back before STOP.
678
679 Return true if we find a comment ending at FROM/FROM_BYTE.
680
681 If successful, store the charpos of the comment's beginning
682 into *CHARPOS_PTR, and the bytepos into *BYTEPOS_PTR.
683
684 Global syntax data remains valid for backward search starting at
685 the returned value (or at FROM, if the search was not successful). */
686
687 static bool
back_comment(ptrdiff_t from,ptrdiff_t from_byte,ptrdiff_t stop,bool comnested,int comstyle,ptrdiff_t * charpos_ptr,ptrdiff_t * bytepos_ptr)688 back_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
689 bool comnested, int comstyle, ptrdiff_t *charpos_ptr,
690 ptrdiff_t *bytepos_ptr)
691 {
692 /* Look back, counting the parity of string-quotes,
693 and recording the comment-starters seen.
694 When we reach a safe place, assume that's not in a string;
695 then step the main scan to the earliest comment-starter seen
696 an even number of string quotes away from the safe place.
697
698 OFROM[I] is position of the earliest comment-starter seen
699 which is I+2X quotes from the comment-end.
700 PARITY is current parity of quotes from the comment end. */
701 int string_style = -1; /* Presumed outside of any string. */
702 bool string_lossage = 0;
703 /* Not a real lossage: indicates that we have passed a matching comment
704 starter plus a non-matching comment-ender, meaning that any matching
705 comment-starter we might see later could be a false positive (hidden
706 inside another comment).
707 Test case: { a (* b } c (* d *) */
708 bool comment_lossage = 0;
709 ptrdiff_t comment_end = from;
710 ptrdiff_t comment_end_byte = from_byte;
711 ptrdiff_t comstart_pos = 0;
712 ptrdiff_t comstart_byte;
713 /* Place where the containing defun starts,
714 or 0 if we didn't come across it yet. */
715 ptrdiff_t defun_start = 0;
716 ptrdiff_t defun_start_byte = 0;
717 enum syntaxcode code;
718 ptrdiff_t nesting = 1; /* Current comment nesting. */
719 int c;
720 int syntax = 0;
721 unsigned short int quit_count = 0;
722
723 /* FIXME: A }} comment-ender style leads to incorrect behavior
724 in the case of {{ c }}} because we ignore the last two chars which are
725 assumed to be comment-enders although they aren't. */
726
727 /* At beginning of range to scan, we're outside of strings;
728 that determines quote parity to the comment-end. */
729 while (from != stop)
730 {
731 rarely_quit (++quit_count);
732
733 ptrdiff_t temp_byte;
734 int prev_syntax;
735 bool com2start, com2end, comstart;
736
737 /* Move back and examine a character. */
738 dec_both (&from, &from_byte);
739 UPDATE_SYNTAX_TABLE_BACKWARD (from);
740
741 prev_syntax = syntax;
742 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
743 syntax = SYNTAX_WITH_FLAGS (c);
744 code = SYNTAX (c);
745
746 /* Check for 2-char comment markers. */
747 com2start = (SYNTAX_FLAGS_COMSTART_FIRST (syntax)
748 && SYNTAX_FLAGS_COMSTART_SECOND (prev_syntax)
749 && (comstyle
750 == SYNTAX_FLAGS_COMMENT_STYLE (prev_syntax, syntax))
751 && (SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax)
752 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested);
753 com2end = (SYNTAX_FLAGS_COMEND_FIRST (syntax)
754 && SYNTAX_FLAGS_COMEND_SECOND (prev_syntax));
755 comstart = (com2start || code == Scomment);
756
757 /* Nasty cases with overlapping 2-char comment markers:
758 - snmp-mode: -- c -- foo -- c --
759 --- c --
760 ------ c --
761 - c-mode: *||*
762 |* *|* *|
763 |*| |* |*|
764 /// */
765
766 /* If a 2-char comment sequence partly overlaps with another,
767 we don't try to be clever. E.g. |*| in C, or }% in modes that
768 have %..\n and %{..}%. */
769 if (from > stop && (com2end || comstart))
770 {
771 ptrdiff_t next = from, next_byte = from_byte;
772 int next_c, next_syntax;
773 dec_both (&next, &next_byte);
774 UPDATE_SYNTAX_TABLE_BACKWARD (next);
775 next_c = FETCH_CHAR_AS_MULTIBYTE (next_byte);
776 next_syntax = SYNTAX_WITH_FLAGS (next_c);
777 if (((comstart || comnested)
778 && SYNTAX_FLAGS_COMEND_SECOND (syntax)
779 && SYNTAX_FLAGS_COMEND_FIRST (next_syntax))
780 || ((com2end || comnested)
781 && SYNTAX_FLAGS_COMSTART_SECOND (syntax)
782 && (comstyle
783 == SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_syntax))
784 && SYNTAX_FLAGS_COMSTART_FIRST (next_syntax)))
785 goto lossage;
786 /* UPDATE_SYNTAX_TABLE_FORWARD (next + 1); */
787 }
788
789 if (com2start && comstart_pos == 0)
790 /* We're looking at a comment starter. But it might be a comment
791 ender as well (see snmp-mode). The first time we see one, we
792 need to consider it as a comment starter,
793 and the subsequent times as a comment ender. */
794 com2end = 0;
795
796 /* Turn a 2-char comment sequences into the appropriate syntax. */
797 if (com2end)
798 code = Sendcomment;
799 else if (com2start)
800 code = Scomment;
801 /* Ignore comment starters of a different style. */
802 else if (code == Scomment
803 && (comstyle != SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0)
804 || SYNTAX_FLAGS_COMMENT_NESTED (syntax) != comnested))
805 continue;
806
807 /* Ignore escaped characters, except comment-enders which cannot
808 be escaped. */
809 if ((comment_end_can_be_escaped || code != Sendcomment)
810 && char_quoted (from, from_byte))
811 continue;
812
813 switch (code)
814 {
815 case Sstring_fence:
816 case Scomment_fence:
817 c = (code == Sstring_fence ? ST_STRING_STYLE : ST_COMMENT_STYLE);
818 FALLTHROUGH;
819 case Sstring:
820 /* Track parity of quotes. */
821 if (string_style == -1)
822 /* Entering a string. */
823 string_style = c;
824 else if (string_style == c)
825 /* Leaving the string. */
826 string_style = -1;
827 else
828 /* If we have two kinds of string delimiters.
829 There's no way to grok this scanning backwards. */
830 string_lossage = 1;
831 break;
832
833 case Scomment:
834 /* We've already checked that it is the relevant comstyle. */
835 if (string_style != -1 || comment_lossage || string_lossage)
836 /* There are odd string quotes involved, so let's be careful.
837 Test case in Pascal: " { " a { " } */
838 goto lossage;
839
840 if (!comnested)
841 {
842 /* Record best comment-starter so far. */
843 comstart_pos = from;
844 comstart_byte = from_byte;
845 }
846 else if (--nesting <= 0)
847 /* nested comments have to be balanced, so we don't need to
848 keep looking for earlier ones. We use here the same (slightly
849 incorrect) reasoning as below: since it is followed by uniform
850 paired string quotes, this comment-start has to be outside of
851 strings, else the comment-end itself would be inside a string. */
852 goto done;
853 break;
854
855 case Sendcomment:
856 if (SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == comstyle
857 && ((com2end && SYNTAX_FLAGS_COMMENT_NESTED (prev_syntax))
858 || SYNTAX_FLAGS_COMMENT_NESTED (syntax)) == comnested)
859 /* This is the same style of comment ender as ours. */
860 {
861 if (comnested)
862 nesting++;
863 else
864 /* Anything before that can't count because it would match
865 this comment-ender rather than ours. */
866 from = stop; /* Break out of the loop. */
867 }
868 else if (comstart_pos != 0 || c != '\n')
869 /* We're mixing comment styles here, so we'd better be careful.
870 The (comstart_pos != 0 || c != '\n') check is not quite correct
871 (we should just always set comment_lossage), but removing it
872 would imply that any multiline comment in C would go through
873 lossage, which seems overkill.
874 The failure should only happen in the rare cases such as
875 { (* } *) */
876 comment_lossage = 1;
877 break;
878
879 case Sopen:
880 /* Assume a defun-start point is outside of strings. */
881 if (open_paren_in_column_0_is_defun_start
882 && NILP (Vcomment_use_syntax_ppss)
883 && (from == stop
884 || (temp_byte = dec_bytepos (from_byte),
885 FETCH_CHAR (temp_byte) == '\n')))
886 {
887 defun_start = from;
888 defun_start_byte = from_byte;
889 from = stop; /* Break out of the loop. */
890 }
891 break;
892
893 default:
894 break;
895 }
896 }
897
898 if (comstart_pos == 0)
899 {
900 from = comment_end;
901 from_byte = comment_end_byte;
902 UPDATE_SYNTAX_TABLE_FORWARD (comment_end);
903 }
904 /* If comstart_pos is set and we get here (ie. didn't jump to `lossage'
905 or `done'), then we've found the beginning of the non-nested comment. */
906 else if (1) /* !comnested */
907 {
908 from = comstart_pos;
909 from_byte = comstart_byte;
910 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
911 }
912 else lossage:
913 {
914 struct lisp_parse_state state;
915 bool adjusted = true;
916 /* We had two kinds of string delimiters mixed up
917 together. Decode this going forwards.
918 Scan fwd from a known safe place (beginning-of-defun)
919 to the one in question; this records where we
920 last passed a comment starter. */
921 /* If we did not already find the defun start, find it now. */
922 if (defun_start == 0)
923 {
924 defun_start = find_defun_start (comment_end, comment_end_byte);
925 defun_start_byte = find_start_value_byte;
926 adjusted = (defun_start > BEGV);
927 }
928 do
929 {
930 internalize_parse_state (Qnil, &state);
931 scan_sexps_forward (&state,
932 defun_start, defun_start_byte,
933 comment_end, TYPE_MINIMUM (EMACS_INT),
934 0, 0);
935 defun_start = comment_end;
936 if (!adjusted)
937 {
938 adjusted = true;
939 find_start_value
940 = CONSP (state.levelstarts) ? XFIXNUM (XCAR (state.levelstarts))
941 : state.thislevelstart >= 0 ? state.thislevelstart
942 : find_start_value;
943 find_start_value_byte = CHAR_TO_BYTE (find_start_value);
944 }
945
946 if (state.incomment == (comnested ? 1 : -1)
947 && state.comstyle == comstyle)
948 from = state.comstr_start;
949 else
950 {
951 from = comment_end;
952 if (state.incomment)
953 /* If comment_end is inside some other comment, maybe ours
954 is nested, so we need to try again from within the
955 surrounding comment. Example: { a (* " *) */
956 {
957 /* FIXME: We should advance by one or two chars. */
958 defun_start = state.comstr_start + 2;
959 defun_start_byte = CHAR_TO_BYTE (defun_start);
960 }
961 }
962 rarely_quit (++quit_count);
963 }
964 while (defun_start < comment_end);
965
966 from_byte = CHAR_TO_BYTE (from);
967 UPDATE_SYNTAX_TABLE_FORWARD (from - 1);
968 }
969
970 done:
971 *charpos_ptr = from;
972 *bytepos_ptr = from_byte;
973
974 return from != comment_end;
975 }
976
977 DEFUN ("syntax-table-p", Fsyntax_table_p, Ssyntax_table_p, 1, 1, 0,
978 doc: /* Return t if OBJECT is a syntax table.
979 Currently, any char-table counts as a syntax table. */)
980 (Lisp_Object object)
981 {
982 if (CHAR_TABLE_P (object)
983 && EQ (XCHAR_TABLE (object)->purpose, Qsyntax_table))
984 return Qt;
985 return Qnil;
986 }
987
988 static void
check_syntax_table(Lisp_Object obj)989 check_syntax_table (Lisp_Object obj)
990 {
991 CHECK_TYPE (CHAR_TABLE_P (obj) && EQ (XCHAR_TABLE (obj)->purpose, Qsyntax_table),
992 Qsyntax_table_p, obj);
993 }
994
995 DEFUN ("syntax-table", Fsyntax_table, Ssyntax_table, 0, 0, 0,
996 doc: /* Return the current syntax table.
997 This is the one specified by the current buffer. */)
998 (void)
999 {
1000 return BVAR (current_buffer, syntax_table);
1001 }
1002
1003 DEFUN ("standard-syntax-table", Fstandard_syntax_table,
1004 Sstandard_syntax_table, 0, 0, 0,
1005 doc: /* Return the standard syntax table.
1006 This is the one used for new buffers. */)
1007 (void)
1008 {
1009 return Vstandard_syntax_table;
1010 }
1011
1012 DEFUN ("copy-syntax-table", Fcopy_syntax_table, Scopy_syntax_table, 0, 1, 0,
1013 doc: /* Construct a new syntax table and return it.
1014 It is a copy of the TABLE, which defaults to the standard syntax table. */)
1015 (Lisp_Object table)
1016 {
1017 Lisp_Object copy;
1018
1019 if (!NILP (table))
1020 check_syntax_table (table);
1021 else
1022 table = Vstandard_syntax_table;
1023
1024 copy = Fcopy_sequence (table);
1025
1026 /* Only the standard syntax table should have a default element.
1027 Other syntax tables should inherit from parents instead. */
1028 set_char_table_defalt (copy, Qnil);
1029
1030 /* Copied syntax tables should all have parents.
1031 If we copied one with no parent, such as the standard syntax table,
1032 use the standard syntax table as the copy's parent. */
1033 if (NILP (XCHAR_TABLE (copy)->parent))
1034 Fset_char_table_parent (copy, Vstandard_syntax_table);
1035 return copy;
1036 }
1037
1038 DEFUN ("set-syntax-table", Fset_syntax_table, Sset_syntax_table, 1, 1, 0,
1039 doc: /* Select a new syntax table for the current buffer.
1040 One argument, a syntax table. */)
1041 (Lisp_Object table)
1042 {
1043 int idx;
1044 check_syntax_table (table);
1045 bset_syntax_table (current_buffer, table);
1046 /* Indicate that this buffer now has a specified syntax table. */
1047 idx = PER_BUFFER_VAR_IDX (syntax_table);
1048 SET_PER_BUFFER_VALUE_P (current_buffer, idx, 1);
1049 return table;
1050 }
1051
1052 /* Convert a letter which signifies a syntax code
1053 into the code it signifies.
1054 This is used by modify-syntax-entry, and other things. */
1055
1056 unsigned char const syntax_spec_code[0400] =
1057 { 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1058 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1059 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1060 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1061 Swhitespace, Scomment_fence, Sstring, 0377, Smath, 0377, 0377, Squote,
1062 Sopen, Sclose, 0377, 0377, 0377, Swhitespace, Spunct, Scharquote,
1063 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1064 0377, 0377, 0377, 0377, Scomment, 0377, Sendcomment, 0377,
1065 Sinherit, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* @, A ... */
1066 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1067 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1068 0377, 0377, 0377, 0377, Sescape, 0377, 0377, Ssymbol,
1069 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377, /* `, a, ... */
1070 0377, 0377, 0377, 0377, 0377, 0377, 0377, 0377,
1071 0377, 0377, 0377, 0377, 0377, 0377, 0377, Sword,
1072 0377, 0377, 0377, 0377, Sstring_fence, 0377, 0377, 0377
1073 };
1074
1075 /* Indexed by syntax code, give the letter that describes it. */
1076
1077 char const syntax_code_spec[16] =
1078 {
1079 ' ', '.', 'w', '_', '(', ')', '\'', '\"', '$', '\\', '/', '<', '>', '@',
1080 '!', '|'
1081 };
1082
1083 /* Indexed by syntax code, give the object (cons of syntax code and
1084 nil) to be stored in syntax table. Since these objects can be
1085 shared among syntax tables, we generate them in advance. By
1086 sharing objects, the function `describe-syntax' can give a more
1087 compact listing. */
1088 static Lisp_Object Vsyntax_code_object;
1089
1090
1091 DEFUN ("char-syntax", Fchar_syntax, Schar_syntax, 1, 1, 0,
1092 doc: /* Return the syntax code of CHARACTER, described by a character.
1093 For example, if CHARACTER is a word constituent, the
1094 character `w' (119) is returned.
1095 The characters that correspond to various syntax codes
1096 are listed in the documentation of `modify-syntax-entry'.
1097
1098 If you're trying to determine the syntax of characters in the buffer,
1099 this is probably the wrong function to use, because it can't take
1100 `syntax-table' text properties into account. Consider using
1101 `syntax-after' instead. */)
1102 (Lisp_Object character)
1103 {
1104 int char_int;
1105 CHECK_CHARACTER (character);
1106 char_int = XFIXNUM (character);
1107 SETUP_BUFFER_SYNTAX_TABLE ();
1108 return make_fixnum (syntax_code_spec[SYNTAX (char_int)]);
1109 }
1110
1111 DEFUN ("syntax-class-to-char", Fsyntax_class_to_char,
1112 Ssyntax_class_to_char, 1, 1, 0,
1113 doc: /* Return the syntax char of CLASS, described by an integer.
1114 For example, if SYNTAX is word constituent (the integer 2), the
1115 character `w' (119) is returned. */)
1116 (Lisp_Object syntax)
1117 {
1118 int syn;
1119 CHECK_FIXNUM (syntax);
1120 syn = XFIXNUM (syntax);
1121
1122 if (syn < 0 || syn >= sizeof syntax_code_spec)
1123 args_out_of_range (make_fixnum (sizeof syntax_code_spec - 1),
1124 syntax);
1125 return make_fixnum (syntax_code_spec[syn]);
1126 }
1127
1128 DEFUN ("matching-paren", Fmatching_paren, Smatching_paren, 1, 1, 0,
1129 doc: /* Return the matching parenthesis of CHARACTER, or nil if none. */)
1130 (Lisp_Object character)
1131 {
1132 int char_int;
1133 enum syntaxcode code;
1134 CHECK_CHARACTER (character);
1135 char_int = XFIXNUM (character);
1136 SETUP_BUFFER_SYNTAX_TABLE ();
1137 code = SYNTAX (char_int);
1138 if (code == Sopen || code == Sclose)
1139 return SYNTAX_MATCH (char_int);
1140 return Qnil;
1141 }
1142
1143 DEFUN ("string-to-syntax", Fstring_to_syntax, Sstring_to_syntax, 1, 1, 0,
1144 doc: /* Convert a syntax descriptor STRING into a raw syntax descriptor.
1145 STRING should be a string of the form allowed as argument of
1146 `modify-syntax-entry'. The return value is a raw syntax descriptor: a
1147 cons cell (CODE . MATCHING-CHAR) which can be used, for example, as
1148 the value of a `syntax-table' text property. */)
1149 (Lisp_Object string)
1150 {
1151 const unsigned char *p;
1152 int val;
1153 Lisp_Object match;
1154
1155 CHECK_STRING (string);
1156
1157 p = SDATA (string);
1158 val = syntax_spec_code[*p++];
1159 if (val == 0377)
1160 error ("Invalid syntax description letter: %c", p[-1]);
1161
1162 if (val == Sinherit)
1163 return Qnil;
1164
1165 if (*p)
1166 {
1167 int len, character = string_char_and_length (p, &len);
1168 XSETINT (match, character);
1169 if (XFIXNAT (match) == ' ')
1170 match = Qnil;
1171 p += len;
1172 }
1173 else
1174 match = Qnil;
1175
1176 while (*p)
1177 switch (*p++)
1178 {
1179 case '1':
1180 val |= 1 << 16;
1181 break;
1182
1183 case '2':
1184 val |= 1 << 17;
1185 break;
1186
1187 case '3':
1188 val |= 1 << 18;
1189 break;
1190
1191 case '4':
1192 val |= 1 << 19;
1193 break;
1194
1195 case 'p':
1196 val |= 1 << 20;
1197 break;
1198
1199 case 'b':
1200 val |= 1 << 21;
1201 break;
1202
1203 case 'n':
1204 val |= 1 << 22;
1205 break;
1206
1207 case 'c':
1208 val |= 1 << 23;
1209 break;
1210 }
1211
1212 if (val < ASIZE (Vsyntax_code_object) && NILP (match))
1213 return AREF (Vsyntax_code_object, val);
1214 else
1215 /* Since we can't use a shared object, let's make a new one. */
1216 return Fcons (make_fixnum (val), match);
1217 }
1218
1219 /* I really don't know why this is interactive
1220 help-form should at least be made useful whilst reading the second arg. */
1221 DEFUN ("modify-syntax-entry", Fmodify_syntax_entry, Smodify_syntax_entry, 2, 3,
1222 "cSet syntax for character: \nsSet syntax for %s to: ",
1223 doc: /* Set syntax for character CHAR according to string NEWENTRY.
1224 The syntax is changed only for table SYNTAX-TABLE, which defaults to
1225 the current buffer's syntax table.
1226 CHAR may be a cons (MIN . MAX), in which case, syntaxes of all characters
1227 in the range MIN to MAX are changed.
1228 The first character of NEWENTRY should be one of the following:
1229 Space or - whitespace syntax. w word constituent.
1230 _ symbol constituent. . punctuation.
1231 ( open-parenthesis. ) close-parenthesis.
1232 " string quote. \\ escape.
1233 $ paired delimiter. \\=' expression quote or prefix operator.
1234 < comment starter. > comment ender.
1235 / character-quote. @ inherit from parent table.
1236 | generic string fence. ! generic comment fence.
1237
1238 Only single-character comment start and end sequences are represented thus.
1239 Two-character sequences are represented as described below.
1240 The second character of NEWENTRY is the matching parenthesis,
1241 used only if the first character is `(' or `)'.
1242 Any additional characters are flags.
1243 Defined flags are the characters 1, 2, 3, 4, b, p, and n.
1244 1 means CHAR is the start of a two-char comment start sequence.
1245 2 means CHAR is the second character of such a sequence.
1246 3 means CHAR is the start of a two-char comment end sequence.
1247 4 means CHAR is the second character of such a sequence.
1248
1249 There can be several orthogonal comment sequences. This is to support
1250 language modes such as C++. By default, all comment sequences are of style
1251 a, but you can set the comment sequence style to b (on the second character
1252 of a comment-start, and the first character of a comment-end sequence) and/or
1253 c (on any of its chars) using this flag:
1254 b means CHAR is part of comment sequence b.
1255 c means CHAR is part of comment sequence c.
1256 n means CHAR is part of a nestable comment sequence.
1257
1258 p means CHAR is a prefix character for `backward-prefix-chars';
1259 such characters are treated as whitespace when they occur
1260 between expressions.
1261 usage: (modify-syntax-entry CHAR NEWENTRY &optional SYNTAX-TABLE) */)
1262 (Lisp_Object c, Lisp_Object newentry, Lisp_Object syntax_table)
1263 {
1264 if (CONSP (c))
1265 {
1266 CHECK_CHARACTER_CAR (c);
1267 CHECK_CHARACTER_CDR (c);
1268 }
1269 else
1270 CHECK_CHARACTER (c);
1271
1272 if (NILP (syntax_table))
1273 syntax_table = BVAR (current_buffer, syntax_table);
1274 else
1275 check_syntax_table (syntax_table);
1276
1277 newentry = Fstring_to_syntax (newentry);
1278 if (CONSP (c))
1279 SET_RAW_SYNTAX_ENTRY_RANGE (syntax_table, c, newentry);
1280 else
1281 SET_RAW_SYNTAX_ENTRY (syntax_table, XFIXNUM (c), newentry);
1282
1283 /* We clear the regexp cache, since character classes can now have
1284 different values from those in the compiled regexps.*/
1285 clear_regexp_cache ();
1286
1287 return Qnil;
1288 }
1289
1290 /* Dump syntax table to buffer in human-readable format */
1291
1292 DEFUN ("internal-describe-syntax-value", Finternal_describe_syntax_value,
1293 Sinternal_describe_syntax_value, 1, 1, 0,
1294 doc: /* Insert a description of the internal syntax description SYNTAX at point. */)
1295 (Lisp_Object syntax)
1296 {
1297 int code, syntax_code;
1298 bool start1, start2, end1, end2, prefix, comstyleb, comstylec, comnested;
1299 char str[2];
1300 Lisp_Object first, match_lisp, value = syntax;
1301
1302 if (NILP (value))
1303 {
1304 insert_string ("default");
1305 return syntax;
1306 }
1307
1308 if (CHAR_TABLE_P (value))
1309 {
1310 insert_string ("deeper char-table ...");
1311 return syntax;
1312 }
1313
1314 if (!CONSP (value))
1315 {
1316 insert_string ("invalid");
1317 return syntax;
1318 }
1319
1320 first = XCAR (value);
1321 match_lisp = XCDR (value);
1322
1323 if (!FIXNUMP (first) || !(NILP (match_lisp) || CHARACTERP (match_lisp)))
1324 {
1325 insert_string ("invalid");
1326 return syntax;
1327 }
1328
1329 syntax_code = XFIXNUM (first) & INT_MAX;
1330 code = syntax_code & 0377;
1331 start1 = SYNTAX_FLAGS_COMSTART_FIRST (syntax_code);
1332 start2 = SYNTAX_FLAGS_COMSTART_SECOND (syntax_code);
1333 end1 = SYNTAX_FLAGS_COMEND_FIRST (syntax_code);
1334 end2 = SYNTAX_FLAGS_COMEND_SECOND (syntax_code);
1335 prefix = SYNTAX_FLAGS_PREFIX (syntax_code);
1336 comstyleb = SYNTAX_FLAGS_COMMENT_STYLEB (syntax_code);
1337 comstylec = SYNTAX_FLAGS_COMMENT_STYLEC (syntax_code);
1338 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax_code);
1339
1340 if (Smax <= code)
1341 {
1342 insert_string ("invalid");
1343 return syntax;
1344 }
1345
1346 str[0] = syntax_code_spec[code], str[1] = 0;
1347 insert (str, 1);
1348
1349 if (NILP (match_lisp))
1350 insert (" ", 1);
1351 else
1352 insert_char (XFIXNUM (match_lisp));
1353
1354 if (start1)
1355 insert ("1", 1);
1356 if (start2)
1357 insert ("2", 1);
1358
1359 if (end1)
1360 insert ("3", 1);
1361 if (end2)
1362 insert ("4", 1);
1363
1364 if (prefix)
1365 insert ("p", 1);
1366 if (comstyleb)
1367 insert ("b", 1);
1368 if (comstylec)
1369 insert ("c", 1);
1370 if (comnested)
1371 insert ("n", 1);
1372
1373 insert_string ("\twhich means: ");
1374
1375 switch (code)
1376 {
1377 case Swhitespace:
1378 insert_string ("whitespace"); break;
1379 case Spunct:
1380 insert_string ("punctuation"); break;
1381 case Sword:
1382 insert_string ("word"); break;
1383 case Ssymbol:
1384 insert_string ("symbol"); break;
1385 case Sopen:
1386 insert_string ("open"); break;
1387 case Sclose:
1388 insert_string ("close"); break;
1389 case Squote:
1390 insert_string ("prefix"); break;
1391 case Sstring:
1392 insert_string ("string"); break;
1393 case Smath:
1394 insert_string ("math"); break;
1395 case Sescape:
1396 insert_string ("escape"); break;
1397 case Scharquote:
1398 insert_string ("charquote"); break;
1399 case Scomment:
1400 insert_string ("comment"); break;
1401 case Sendcomment:
1402 insert_string ("endcomment"); break;
1403 case Sinherit:
1404 insert_string ("inherit"); break;
1405 case Scomment_fence:
1406 insert_string ("comment fence"); break;
1407 case Sstring_fence:
1408 insert_string ("string fence"); break;
1409 default:
1410 insert_string ("invalid");
1411 return syntax;
1412 }
1413
1414 if (!NILP (match_lisp))
1415 {
1416 insert_string (", matches ");
1417 insert_char (XFIXNUM (match_lisp));
1418 }
1419
1420 if (start1)
1421 insert_string (",\n\t is the first character of a comment-start sequence");
1422 if (start2)
1423 insert_string (",\n\t is the second character of a comment-start sequence");
1424
1425 if (end1)
1426 insert_string (",\n\t is the first character of a comment-end sequence");
1427 if (end2)
1428 insert_string (",\n\t is the second character of a comment-end sequence");
1429 if (comstyleb)
1430 insert_string (" (comment style b)");
1431 if (comstylec)
1432 insert_string (" (comment style c)");
1433 if (comnested)
1434 insert_string (" (nestable)");
1435
1436 if (prefix)
1437 {
1438 AUTO_STRING (prefixdoc,
1439 ",\n\t is a prefix character for `backward-prefix-chars'");
1440 insert1 (call1 (Qsubstitute_command_keys, prefixdoc));
1441 }
1442
1443 return syntax;
1444 }
1445
1446 /* Return the position across COUNT words from FROM.
1447 If that many words cannot be found before the end of the buffer, return 0.
1448 COUNT negative means scan backward and stop at word beginning. */
1449
1450 ptrdiff_t
scan_words(ptrdiff_t from,EMACS_INT count)1451 scan_words (ptrdiff_t from, EMACS_INT count)
1452 {
1453 ptrdiff_t beg = BEGV;
1454 ptrdiff_t end = ZV;
1455 ptrdiff_t from_byte = CHAR_TO_BYTE (from);
1456 enum syntaxcode code;
1457 int ch0, ch1;
1458 Lisp_Object func, pos;
1459
1460 SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count, PTRDIFF_MAX));
1461
1462 while (count > 0)
1463 {
1464 while (true)
1465 {
1466 if (from == end)
1467 return 0;
1468 UPDATE_SYNTAX_TABLE_FORWARD (from);
1469 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1470 code = SYNTAX (ch0);
1471 inc_both (&from, &from_byte);
1472 if (words_include_escapes
1473 && (code == Sescape || code == Scharquote))
1474 break;
1475 if (code == Sword)
1476 break;
1477 rarely_quit (from);
1478 }
1479 /* Now CH0 is a character which begins a word and FROM is the
1480 position of the next character. */
1481 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch0);
1482 if (! NILP (Ffboundp (func)))
1483 {
1484 pos = call2 (func, make_fixnum (from - 1), make_fixnum (end));
1485 if (FIXNUMP (pos) && from < XFIXNUM (pos) && XFIXNUM (pos) <= ZV)
1486 {
1487 from = XFIXNUM (pos);
1488 from_byte = CHAR_TO_BYTE (from);
1489 }
1490 }
1491 else
1492 {
1493 while (1)
1494 {
1495 if (from == end) break;
1496 UPDATE_SYNTAX_TABLE_FORWARD (from);
1497 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1498 code = SYNTAX (ch1);
1499 if ((code != Sword
1500 && (! words_include_escapes
1501 || (code != Sescape && code != Scharquote)))
1502 || word_boundary_p (ch0, ch1))
1503 break;
1504 inc_both (&from, &from_byte);
1505 ch0 = ch1;
1506 rarely_quit (from);
1507 }
1508 }
1509 count--;
1510 }
1511 while (count < 0)
1512 {
1513 while (true)
1514 {
1515 if (from == beg)
1516 return 0;
1517 dec_both (&from, &from_byte);
1518 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1519 ch1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1520 code = SYNTAX (ch1);
1521 if (words_include_escapes
1522 && (code == Sescape || code == Scharquote))
1523 break;
1524 if (code == Sword)
1525 break;
1526 rarely_quit (from);
1527 }
1528 /* Now CH1 is a character which ends a word and FROM is the
1529 position of it. */
1530 func = CHAR_TABLE_REF (Vfind_word_boundary_function_table, ch1);
1531 if (! NILP (Ffboundp (func)))
1532 {
1533 pos = call2 (func, make_fixnum (from), make_fixnum (beg));
1534 if (FIXNUMP (pos) && BEGV <= XFIXNUM (pos) && XFIXNUM (pos) < from)
1535 {
1536 from = XFIXNUM (pos);
1537 from_byte = CHAR_TO_BYTE (from);
1538 }
1539 }
1540 else
1541 {
1542 while (1)
1543 {
1544 if (from == beg)
1545 break;
1546 dec_both (&from, &from_byte);
1547 UPDATE_SYNTAX_TABLE_BACKWARD (from);
1548 ch0 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
1549 code = SYNTAX (ch0);
1550 if ((code != Sword
1551 && (! words_include_escapes
1552 || (code != Sescape && code != Scharquote)))
1553 || word_boundary_p (ch0, ch1))
1554 {
1555 inc_both (&from, &from_byte);
1556 break;
1557 }
1558 ch1 = ch0;
1559 rarely_quit (from);
1560 }
1561 }
1562 count++;
1563 }
1564
1565 return from;
1566 }
1567
1568 DEFUN ("forward-word", Fforward_word, Sforward_word, 0, 1, "^p",
1569 doc: /* Move point forward ARG words (backward if ARG is negative).
1570 If ARG is omitted or nil, move point forward one word.
1571 Normally returns t.
1572 If an edge of the buffer or a field boundary is reached, point is
1573 left there and the function returns nil. Field boundaries are not
1574 noticed if `inhibit-field-text-motion' is non-nil.
1575
1576 The word boundaries are normally determined by the buffer's syntax
1577 table and character script (according to `char-script-table'), but
1578 `find-word-boundary-function-table', such as set up by `subword-mode',
1579 can change that. If a Lisp program needs to move by words determined
1580 strictly by the syntax table, it should use `forward-word-strictly'
1581 instead. See Info node `(elisp) Word Motion' for details. */)
1582 (Lisp_Object arg)
1583 {
1584 Lisp_Object tmp;
1585 ptrdiff_t orig_val, val;
1586
1587 if (NILP (arg))
1588 XSETFASTINT (arg, 1);
1589 else
1590 CHECK_FIXNUM (arg);
1591
1592 val = orig_val = scan_words (PT, XFIXNUM (arg));
1593 if (! orig_val)
1594 val = XFIXNUM (arg) > 0 ? ZV : BEGV;
1595
1596 /* Avoid jumping out of an input field. */
1597 tmp = Fconstrain_to_field (make_fixnum (val), make_fixnum (PT),
1598 Qnil, Qnil, Qnil);
1599 val = XFIXNAT (tmp);
1600
1601 SET_PT (val);
1602 return val == orig_val ? Qt : Qnil;
1603 }
1604
1605 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
1606 doc: /* Move point forward, stopping before a char not in STRING, or at pos LIM.
1607 STRING is like the inside of a `[...]' in a regular expression
1608 except that `]' is never special and `\\' quotes `^', `-' or `\\'
1609 (but not at the end of a range; quoting is never needed there).
1610 Thus, with arg "a-zA-Z", this skips letters stopping before first nonletter.
1611 With arg "^a-zA-Z", skips nonletters stopping before first letter.
1612 Char classes, e.g. `[:alpha:]', are supported.
1613
1614 Returns the distance traveled, either zero or positive. */)
1615 (Lisp_Object string, Lisp_Object lim)
1616 {
1617 return skip_chars (1, string, lim, 1);
1618 }
1619
1620 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
1621 doc: /* Move point backward, stopping after a char not in STRING, or at pos LIM.
1622 See `skip-chars-forward' for details.
1623 Returns the distance traveled, either zero or negative. */)
1624 (Lisp_Object string, Lisp_Object lim)
1625 {
1626 return skip_chars (0, string, lim, 1);
1627 }
1628
1629 DEFUN ("skip-syntax-forward", Fskip_syntax_forward, Sskip_syntax_forward, 1, 2, 0,
1630 doc: /* Move point forward across chars in specified syntax classes.
1631 SYNTAX is a string of syntax code characters.
1632 Stop before a char whose syntax is not in SYNTAX, or at position LIM.
1633 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1634 This function returns the distance traveled, either zero or positive. */)
1635 (Lisp_Object syntax, Lisp_Object lim)
1636 {
1637 return skip_syntaxes (1, syntax, lim);
1638 }
1639
1640 DEFUN ("skip-syntax-backward", Fskip_syntax_backward, Sskip_syntax_backward, 1, 2, 0,
1641 doc: /* Move point backward across chars in specified syntax classes.
1642 SYNTAX is a string of syntax code characters.
1643 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.
1644 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.
1645 This function returns either zero or a negative number, and the absolute value
1646 of this is the distance traveled. */)
1647 (Lisp_Object syntax, Lisp_Object lim)
1648 {
1649 return skip_syntaxes (0, syntax, lim);
1650 }
1651
1652 static Lisp_Object
skip_chars(bool forwardp,Lisp_Object string,Lisp_Object lim,bool handle_iso_classes)1653 skip_chars (bool forwardp, Lisp_Object string, Lisp_Object lim,
1654 bool handle_iso_classes)
1655 {
1656 int c;
1657 char fastmap[0400];
1658 /* Store the ranges of non-ASCII characters. */
1659 int *char_ranges UNINIT;
1660 int n_char_ranges = 0;
1661 bool negate = 0;
1662 ptrdiff_t i, i_byte;
1663 /* True if the current buffer is multibyte and the region contains
1664 non-ASCII chars. */
1665 bool multibyte;
1666 /* True if STRING is multibyte and it contains non-ASCII chars. */
1667 bool string_multibyte;
1668 ptrdiff_t size_byte;
1669 const unsigned char *str;
1670 int len;
1671 Lisp_Object iso_classes;
1672 USE_SAFE_ALLOCA;
1673
1674 CHECK_STRING (string);
1675 iso_classes = Qnil;
1676
1677 if (NILP (lim))
1678 XSETINT (lim, forwardp ? ZV : BEGV);
1679 else
1680 CHECK_FIXNUM_COERCE_MARKER (lim);
1681
1682 /* In any case, don't allow scan outside bounds of buffer. */
1683 if (XFIXNUM (lim) > ZV)
1684 XSETFASTINT (lim, ZV);
1685 if (XFIXNUM (lim) < BEGV)
1686 XSETFASTINT (lim, BEGV);
1687
1688 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
1689 && (XFIXNUM (lim) - PT != CHAR_TO_BYTE (XFIXNUM (lim)) - PT_BYTE));
1690 string_multibyte = SBYTES (string) > SCHARS (string);
1691
1692 memset (fastmap, 0, sizeof fastmap);
1693
1694 str = SDATA (string);
1695 size_byte = SBYTES (string);
1696
1697 i_byte = 0;
1698 if (i_byte < size_byte
1699 && SREF (string, 0) == '^')
1700 {
1701 negate = 1; i_byte++;
1702 }
1703
1704 /* Find the characters specified and set their elements of fastmap.
1705 Handle backslashes and ranges specially.
1706
1707 If STRING contains non-ASCII characters, setup char_ranges for
1708 them and use fastmap only for their leading codes. */
1709
1710 if (! string_multibyte)
1711 {
1712 bool string_has_eight_bit = 0;
1713
1714 /* At first setup fastmap. */
1715 while (i_byte < size_byte)
1716 {
1717 if (handle_iso_classes)
1718 {
1719 const unsigned char *ch = str + i_byte;
1720 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1721 if (cc == 0)
1722 error ("Invalid ISO C character class");
1723 if (cc != -1)
1724 {
1725 iso_classes = Fcons (make_fixnum (cc), iso_classes);
1726 i_byte = ch - str;
1727 continue;
1728 }
1729 }
1730
1731 c = str[i_byte++];
1732
1733 if (c == '\\')
1734 {
1735 if (i_byte == size_byte)
1736 break;
1737
1738 c = str[i_byte++];
1739 }
1740 /* Treat `-' as range character only if another character
1741 follows. */
1742 if (i_byte + 1 < size_byte
1743 && str[i_byte] == '-')
1744 {
1745 int c2;
1746
1747 /* Skip over the dash. */
1748 i_byte++;
1749
1750 /* Get the end of the range. */
1751 c2 = str[i_byte++];
1752 if (c2 == '\\'
1753 && i_byte < size_byte)
1754 c2 = str[i_byte++];
1755
1756 if (c <= c2)
1757 {
1758 int lim2 = c2 + 1;
1759 while (c < lim2)
1760 fastmap[c++] = 1;
1761 if (! ASCII_CHAR_P (c2))
1762 string_has_eight_bit = 1;
1763 }
1764 }
1765 else
1766 {
1767 fastmap[c] = 1;
1768 if (! ASCII_CHAR_P (c))
1769 string_has_eight_bit = 1;
1770 }
1771 }
1772
1773 /* If the current range is multibyte and STRING contains
1774 eight-bit chars, arrange fastmap and setup char_ranges for
1775 the corresponding multibyte chars. */
1776 if (multibyte && string_has_eight_bit)
1777 {
1778 char *p1;
1779 char himap[0200 + 1];
1780 memcpy (himap, fastmap + 0200, 0200);
1781 himap[0200] = 0;
1782 memset (fastmap + 0200, 0, 0200);
1783 SAFE_NALLOCA (char_ranges, 2, 128);
1784 i = 0;
1785
1786 while ((p1 = memchr (himap + i, 1, 0200 - i)))
1787 {
1788 /* Deduce the next range C..C2 from the next clump of 1s
1789 in HIMAP starting with &HIMAP[I]. HIMAP is the high
1790 order half of the old FASTMAP. */
1791 int c2, leading_code;
1792 i = p1 - himap;
1793 c = BYTE8_TO_CHAR (i + 0200);
1794 i += strlen (p1);
1795 c2 = BYTE8_TO_CHAR (i + 0200 - 1);
1796
1797 char_ranges[n_char_ranges++] = c;
1798 char_ranges[n_char_ranges++] = c2;
1799 leading_code = CHAR_LEADING_CODE (c);
1800 memset (fastmap + leading_code, 1,
1801 CHAR_LEADING_CODE (c2) - leading_code + 1);
1802 }
1803 }
1804 }
1805 else /* STRING is multibyte */
1806 {
1807 SAFE_NALLOCA (char_ranges, 2, SCHARS (string));
1808
1809 while (i_byte < size_byte)
1810 {
1811 int leading_code = str[i_byte];
1812
1813 if (handle_iso_classes)
1814 {
1815 const unsigned char *ch = str + i_byte;
1816 re_wctype_t cc = re_wctype_parse (&ch, size_byte - i_byte);
1817 if (cc == 0)
1818 error ("Invalid ISO C character class");
1819 if (cc != -1)
1820 {
1821 iso_classes = Fcons (make_fixnum (cc), iso_classes);
1822 i_byte = ch - str;
1823 continue;
1824 }
1825 }
1826
1827 if (leading_code== '\\')
1828 {
1829 if (++i_byte == size_byte)
1830 break;
1831
1832 leading_code = str[i_byte];
1833 }
1834 c = string_char_and_length (str + i_byte, &len);
1835 i_byte += len;
1836
1837
1838 /* Treat `-' as range character only if another character
1839 follows. */
1840 if (i_byte + 1 < size_byte
1841 && str[i_byte] == '-')
1842 {
1843 int c2, leading_code2;
1844
1845 /* Skip over the dash. */
1846 i_byte++;
1847
1848 /* Get the end of the range. */
1849 leading_code2 = str[i_byte];
1850 c2 = string_char_and_length (str + i_byte, &len);
1851 i_byte += len;
1852
1853 if (c2 == '\\'
1854 && i_byte < size_byte)
1855 {
1856 leading_code2 = str[i_byte];
1857 c2 = string_char_and_length (str + i_byte, &len);
1858 i_byte += len;
1859 }
1860
1861 if (c > c2)
1862 continue;
1863 if (ASCII_CHAR_P (c))
1864 {
1865 while (c <= c2 && c < 0x80)
1866 fastmap[c++] = 1;
1867 leading_code = CHAR_LEADING_CODE (c);
1868 }
1869 if (! ASCII_CHAR_P (c))
1870 {
1871 int lim2 = leading_code2 + 1;
1872 while (leading_code < lim2)
1873 fastmap[leading_code++] = 1;
1874 if (c <= c2)
1875 {
1876 char_ranges[n_char_ranges++] = c;
1877 char_ranges[n_char_ranges++] = c2;
1878 }
1879 }
1880 }
1881 else
1882 {
1883 if (ASCII_CHAR_P (c))
1884 fastmap[c] = 1;
1885 else
1886 {
1887 fastmap[leading_code] = 1;
1888 char_ranges[n_char_ranges++] = c;
1889 char_ranges[n_char_ranges++] = c;
1890 }
1891 }
1892 }
1893
1894 /* If the current range is unibyte and STRING contains non-ASCII
1895 chars, arrange fastmap for the corresponding unibyte
1896 chars. */
1897
1898 if (! multibyte && n_char_ranges > 0)
1899 {
1900 memset (fastmap + 0200, 0, 0200);
1901 for (i = 0; i < n_char_ranges; i += 2)
1902 {
1903 int c1 = char_ranges[i];
1904 int lim2 = char_ranges[i + 1] + 1;
1905
1906 for (; c1 < lim2; c1++)
1907 {
1908 int b = CHAR_TO_BYTE_SAFE (c1);
1909 if (b >= 0)
1910 fastmap[b] = 1;
1911 }
1912 }
1913 }
1914 }
1915
1916 /* If ^ was the first character, complement the fastmap. */
1917 if (negate)
1918 {
1919 if (! multibyte)
1920 for (i = 0; i < sizeof fastmap; i++)
1921 fastmap[i] ^= 1;
1922 else
1923 {
1924 for (i = 0; i < 0200; i++)
1925 fastmap[i] ^= 1;
1926 /* All non-ASCII chars possibly match. */
1927 for (; i < sizeof fastmap; i++)
1928 fastmap[i] = 1;
1929 }
1930 }
1931
1932 {
1933 ptrdiff_t start_point = PT;
1934 ptrdiff_t pos = PT;
1935 ptrdiff_t pos_byte = PT_BYTE;
1936 unsigned char *p = PT_ADDR, *endp, *stop;
1937
1938 if (forwardp)
1939 {
1940 endp = (XFIXNUM (lim) == GPT) ? GPT_ADDR : CHAR_POS_ADDR (XFIXNUM (lim));
1941 stop = (pos < GPT && GPT < XFIXNUM (lim)) ? GPT_ADDR : endp;
1942 }
1943 else
1944 {
1945 endp = CHAR_POS_ADDR (XFIXNUM (lim));
1946 stop = (pos >= GPT && GPT > XFIXNUM (lim)) ? GAP_END_ADDR : endp;
1947 }
1948
1949 /* This code may look up syntax tables using functions that rely on the
1950 gl_state object. To make sure this object is not out of date,
1951 let's initialize it manually.
1952 We ignore syntax-table text-properties for now, since that's
1953 what we've done in the past. */
1954 SETUP_BUFFER_SYNTAX_TABLE ();
1955 if (forwardp)
1956 {
1957 if (multibyte)
1958 while (1)
1959 {
1960 int nbytes;
1961
1962 if (p >= stop)
1963 {
1964 if (p >= endp)
1965 break;
1966 p = GAP_END_ADDR;
1967 stop = endp;
1968 }
1969 c = string_char_and_length (p, &nbytes);
1970 if (! NILP (iso_classes) && in_classes (c, iso_classes))
1971 {
1972 if (negate)
1973 break;
1974 else
1975 goto fwd_ok;
1976 }
1977
1978 if (! fastmap[*p])
1979 break;
1980 if (! ASCII_CHAR_P (c))
1981 {
1982 /* As we are looking at a multibyte character, we
1983 must look up the character in the table
1984 CHAR_RANGES. If there's no data in the table,
1985 that character is not what we want to skip. */
1986
1987 /* The following code do the right thing even if
1988 n_char_ranges is zero (i.e. no data in
1989 CHAR_RANGES). */
1990 for (i = 0; i < n_char_ranges; i += 2)
1991 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
1992 break;
1993 if (!(negate ^ (i < n_char_ranges)))
1994 break;
1995 }
1996 fwd_ok:
1997 p += nbytes, pos++, pos_byte += nbytes;
1998 rarely_quit (pos);
1999 }
2000 else
2001 while (true)
2002 {
2003 if (p >= stop)
2004 {
2005 if (p >= endp)
2006 break;
2007 p = GAP_END_ADDR;
2008 stop = endp;
2009 }
2010
2011 if (!NILP (iso_classes) && in_classes (*p, iso_classes))
2012 {
2013 if (negate)
2014 break;
2015 else
2016 goto fwd_unibyte_ok;
2017 }
2018
2019 if (!fastmap[*p])
2020 break;
2021 fwd_unibyte_ok:
2022 p++, pos++, pos_byte++;
2023 rarely_quit (pos);
2024 }
2025 }
2026 else
2027 {
2028 if (multibyte)
2029 while (true)
2030 {
2031 if (p <= stop)
2032 {
2033 if (p <= endp)
2034 break;
2035 p = GPT_ADDR;
2036 stop = endp;
2037 }
2038 unsigned char *prev_p = p;
2039 do
2040 p--;
2041 while (stop <= p && ! CHAR_HEAD_P (*p));
2042
2043 c = STRING_CHAR (p);
2044
2045 if (! NILP (iso_classes) && in_classes (c, iso_classes))
2046 {
2047 if (negate)
2048 break;
2049 else
2050 goto back_ok;
2051 }
2052
2053 if (! fastmap[*p])
2054 break;
2055 if (! ASCII_CHAR_P (c))
2056 {
2057 /* See the comment in the previous similar code. */
2058 for (i = 0; i < n_char_ranges; i += 2)
2059 if (c >= char_ranges[i] && c <= char_ranges[i + 1])
2060 break;
2061 if (!(negate ^ (i < n_char_ranges)))
2062 break;
2063 }
2064 back_ok:
2065 pos--, pos_byte -= prev_p - p;
2066 rarely_quit (pos);
2067 }
2068 else
2069 while (true)
2070 {
2071 if (p <= stop)
2072 {
2073 if (p <= endp)
2074 break;
2075 p = GPT_ADDR;
2076 stop = endp;
2077 }
2078
2079 if (! NILP (iso_classes) && in_classes (p[-1], iso_classes))
2080 {
2081 if (negate)
2082 break;
2083 else
2084 goto back_unibyte_ok;
2085 }
2086
2087 if (!fastmap[p[-1]])
2088 break;
2089 back_unibyte_ok:
2090 p--, pos--, pos_byte--;
2091 rarely_quit (pos);
2092 }
2093 }
2094
2095 SET_PT_BOTH (pos, pos_byte);
2096
2097 SAFE_FREE ();
2098 return make_fixnum (PT - start_point);
2099 }
2100 }
2101
2102
2103 static Lisp_Object
skip_syntaxes(bool forwardp,Lisp_Object string,Lisp_Object lim)2104 skip_syntaxes (bool forwardp, Lisp_Object string, Lisp_Object lim)
2105 {
2106 int c;
2107 unsigned char fastmap[0400];
2108 bool negate = 0;
2109 ptrdiff_t i, i_byte;
2110 bool multibyte;
2111 ptrdiff_t size_byte;
2112 unsigned char *str;
2113
2114 CHECK_STRING (string);
2115
2116 if (NILP (lim))
2117 XSETINT (lim, forwardp ? ZV : BEGV);
2118 else
2119 CHECK_FIXNUM_COERCE_MARKER (lim);
2120
2121 /* In any case, don't allow scan outside bounds of buffer. */
2122 if (XFIXNUM (lim) > ZV)
2123 XSETFASTINT (lim, ZV);
2124 if (XFIXNUM (lim) < BEGV)
2125 XSETFASTINT (lim, BEGV);
2126
2127 if (forwardp ? (PT >= XFIXNAT (lim)) : (PT <= XFIXNAT (lim)))
2128 return make_fixnum (0);
2129
2130 multibyte = (!NILP (BVAR (current_buffer, enable_multibyte_characters))
2131 && (XFIXNUM (lim) - PT != CHAR_TO_BYTE (XFIXNUM (lim)) - PT_BYTE));
2132
2133 memset (fastmap, 0, sizeof fastmap);
2134
2135 if (SBYTES (string) > SCHARS (string))
2136 /* As this is very rare case (syntax spec is ASCII only), don't
2137 consider efficiency. */
2138 string = string_make_unibyte (string);
2139
2140 str = SDATA (string);
2141 size_byte = SBYTES (string);
2142
2143 i_byte = 0;
2144 if (i_byte < size_byte
2145 && SREF (string, 0) == '^')
2146 {
2147 negate = 1; i_byte++;
2148 }
2149
2150 /* Find the syntaxes specified and set their elements of fastmap. */
2151
2152 while (i_byte < size_byte)
2153 {
2154 c = str[i_byte++];
2155 fastmap[syntax_spec_code[c]] = 1;
2156 }
2157
2158 /* If ^ was the first character, complement the fastmap. */
2159 if (negate)
2160 for (i = 0; i < sizeof fastmap; i++)
2161 fastmap[i] ^= 1;
2162
2163 {
2164 ptrdiff_t start_point = PT;
2165 ptrdiff_t pos = PT;
2166 ptrdiff_t pos_byte = PT_BYTE;
2167 unsigned char *p, *endp, *stop;
2168
2169 SETUP_SYNTAX_TABLE (pos, forwardp ? 1 : -1);
2170
2171 if (forwardp)
2172 {
2173 while (true)
2174 {
2175 p = BYTE_POS_ADDR (pos_byte);
2176 endp = XFIXNUM (lim) == GPT ? GPT_ADDR : CHAR_POS_ADDR (XFIXNUM (lim));
2177 stop = pos < GPT && GPT < XFIXNUM (lim) ? GPT_ADDR : endp;
2178
2179 do
2180 {
2181 int nbytes;
2182
2183 if (p >= stop)
2184 {
2185 if (p >= endp)
2186 goto done;
2187 p = GAP_END_ADDR;
2188 stop = endp;
2189 }
2190 if (multibyte)
2191 c = string_char_and_length (p, &nbytes);
2192 else
2193 c = *p, nbytes = 1;
2194 if (! fastmap[SYNTAX (c)])
2195 goto done;
2196 p += nbytes, pos++, pos_byte += nbytes;
2197 rarely_quit (pos);
2198 }
2199 while (!parse_sexp_lookup_properties
2200 || pos < gl_state.e_property);
2201
2202 update_syntax_table_forward (pos + gl_state.offset,
2203 false, gl_state.object);
2204 }
2205 }
2206 else
2207 {
2208 p = BYTE_POS_ADDR (pos_byte);
2209 endp = CHAR_POS_ADDR (XFIXNUM (lim));
2210 stop = pos >= GPT && GPT > XFIXNUM (lim) ? GAP_END_ADDR : endp;
2211
2212 if (multibyte)
2213 {
2214 while (true)
2215 {
2216 if (p <= stop)
2217 {
2218 if (p <= endp)
2219 break;
2220 p = GPT_ADDR;
2221 stop = endp;
2222 }
2223 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2224
2225 unsigned char *prev_p = p;
2226 do
2227 p--;
2228 while (stop <= p && ! CHAR_HEAD_P (*p));
2229
2230 c = STRING_CHAR (p);
2231 if (! fastmap[SYNTAX (c)])
2232 break;
2233 pos--, pos_byte -= prev_p - p;
2234 rarely_quit (pos);
2235 }
2236 }
2237 else
2238 {
2239 while (true)
2240 {
2241 if (p <= stop)
2242 {
2243 if (p <= endp)
2244 break;
2245 p = GPT_ADDR;
2246 stop = endp;
2247 }
2248 UPDATE_SYNTAX_TABLE_BACKWARD (pos - 1);
2249 if (! fastmap[SYNTAX (p[-1])])
2250 break;
2251 p--, pos--, pos_byte--;
2252 rarely_quit (pos);
2253 }
2254 }
2255 }
2256
2257 done:
2258 SET_PT_BOTH (pos, pos_byte);
2259
2260 return make_fixnum (PT - start_point);
2261 }
2262 }
2263
2264 /* Return true if character C belongs to one of the ISO classes
2265 in the list ISO_CLASSES. Each class is represented by an
2266 integer which is its type according to re_wctype. */
2267
2268 static bool
in_classes(int c,Lisp_Object iso_classes)2269 in_classes (int c, Lisp_Object iso_classes)
2270 {
2271 bool fits_class = 0;
2272
2273 while (CONSP (iso_classes))
2274 {
2275 Lisp_Object elt;
2276 elt = XCAR (iso_classes);
2277 iso_classes = XCDR (iso_classes);
2278
2279 if (re_iswctype (c, XFIXNAT (elt)))
2280 fits_class = 1;
2281 }
2282
2283 return fits_class;
2284 }
2285
2286 /* Jump over a comment, assuming we are at the beginning of one.
2287 FROM is the current position.
2288 FROM_BYTE is the bytepos corresponding to FROM.
2289 Do not move past STOP (a charpos).
2290 The comment over which we have to jump is of style STYLE
2291 (either SYNTAX_FLAGS_COMMENT_STYLE (foo) or ST_COMMENT_STYLE).
2292 NESTING should be positive to indicate the nesting at the beginning
2293 for nested comments and should be zero or negative else.
2294 ST_COMMENT_STYLE cannot be nested.
2295 PREV_SYNTAX is the SYNTAX_WITH_FLAGS of the previous character
2296 (or 0 if the search cannot start in the middle of a two-character).
2297
2298 If successful, return true and store the charpos of the comment's
2299 end into *CHARPOS_PTR and the corresponding bytepos into
2300 *BYTEPOS_PTR. Else, return false and store the charpos STOP into
2301 *CHARPOS_PTR, the corresponding bytepos into *BYTEPOS_PTR and the
2302 current nesting (as defined for state->incomment) in
2303 *INCOMMENT_PTR. Should the last character scanned in an incomplete
2304 comment be a possible first character of a two character construct,
2305 we store its SYNTAX_WITH_FLAGS into *last_syntax_ptr. Otherwise,
2306 we store Smax into *last_syntax_ptr.
2307
2308 The comment end is the last character of the comment rather than the
2309 character just after the comment.
2310
2311 Global syntax data is assumed to initially be valid for FROM and
2312 remains valid for forward search starting at the returned position. */
2313
2314 static bool
forw_comment(ptrdiff_t from,ptrdiff_t from_byte,ptrdiff_t stop,EMACS_INT nesting,int style,int prev_syntax,ptrdiff_t * charpos_ptr,ptrdiff_t * bytepos_ptr,EMACS_INT * incomment_ptr,int * last_syntax_ptr)2315 forw_comment (ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t stop,
2316 EMACS_INT nesting, int style, int prev_syntax,
2317 ptrdiff_t *charpos_ptr, ptrdiff_t *bytepos_ptr,
2318 EMACS_INT *incomment_ptr, int *last_syntax_ptr)
2319 {
2320 unsigned short int quit_count = 0;
2321 int c, c1;
2322 enum syntaxcode code;
2323 int syntax, other_syntax;
2324
2325 if (nesting <= 0) nesting = -1;
2326
2327 /* Enter the loop in the middle so that we find
2328 a 2-char comment ender if we start in the middle of it. */
2329 syntax = prev_syntax;
2330 code = syntax & 0xff;
2331 if (syntax != 0 && from < stop) goto forw_incomment;
2332
2333 while (1)
2334 {
2335 if (from == stop)
2336 {
2337 *incomment_ptr = nesting;
2338 *charpos_ptr = from;
2339 *bytepos_ptr = from_byte;
2340 *last_syntax_ptr =
2341 (code == Sescape || code == Scharquote
2342 || SYNTAX_FLAGS_COMEND_FIRST (syntax)
2343 || (nesting > 0
2344 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)))
2345 ? syntax : Smax ;
2346 return 0;
2347 }
2348 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2349 syntax = SYNTAX_WITH_FLAGS (c);
2350 code = syntax & 0xff;
2351 if (code == Sendcomment
2352 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style
2353 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ?
2354 (nesting > 0 && --nesting == 0) : nesting < 0)
2355 && !(comment_end_can_be_escaped && char_quoted (from, from_byte)))
2356 /* We have encountered a comment end of the same style
2357 as the comment sequence which began this comment
2358 section. */
2359 break;
2360 if (code == Scomment_fence
2361 && style == ST_COMMENT_STYLE)
2362 /* We have encountered a comment end of the same style
2363 as the comment sequence which began this comment
2364 section. */
2365 break;
2366 if (nesting > 0
2367 && code == Scomment
2368 && SYNTAX_FLAGS_COMMENT_NESTED (syntax)
2369 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0) == style)
2370 /* We have encountered a nested comment of the same style
2371 as the comment sequence which began this comment section. */
2372 nesting++;
2373 if (comment_end_can_be_escaped
2374 && (code == Sescape || code == Scharquote))
2375 {
2376 inc_both (&from, &from_byte);
2377 UPDATE_SYNTAX_TABLE_FORWARD (from);
2378 if (from == stop) continue; /* Failure */
2379 }
2380 inc_both (&from, &from_byte);
2381 UPDATE_SYNTAX_TABLE_FORWARD (from);
2382
2383 forw_incomment:
2384 if (from < stop && SYNTAX_FLAGS_COMEND_FIRST (syntax)
2385 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2386 other_syntax = SYNTAX_WITH_FLAGS (c1),
2387 SYNTAX_FLAGS_COMEND_SECOND (other_syntax))
2388 && SYNTAX_FLAGS_COMMENT_STYLE (syntax, other_syntax) == style
2389 && ((SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2390 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax))
2391 ? nesting > 0 : nesting < 0))
2392 {
2393 syntax = Smax; /* So that "|#" (lisp) can not return
2394 the syntax of "#" in *last_syntax_ptr. */
2395 if (--nesting <= 0)
2396 /* We have encountered a comment end of the same style
2397 as the comment sequence which began this comment section. */
2398 break;
2399 else
2400 {
2401 inc_both (&from, &from_byte);
2402 UPDATE_SYNTAX_TABLE_FORWARD (from);
2403 }
2404 }
2405 if (nesting > 0
2406 && from < stop
2407 && SYNTAX_FLAGS_COMSTART_FIRST (syntax)
2408 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2409 other_syntax = SYNTAX_WITH_FLAGS (c1),
2410 SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax) == style
2411 && SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2412 && (SYNTAX_FLAGS_COMMENT_NESTED (syntax) ||
2413 SYNTAX_FLAGS_COMMENT_NESTED (other_syntax)))
2414 /* We have encountered a nested comment of the same style
2415 as the comment sequence which began this comment section. */
2416 {
2417 syntax = Smax; /* So that "#|#" isn't also a comment ender. */
2418 inc_both (&from, &from_byte);
2419 UPDATE_SYNTAX_TABLE_FORWARD (from);
2420 nesting++;
2421 }
2422
2423 rarely_quit (++quit_count);
2424 }
2425 *charpos_ptr = from;
2426 *bytepos_ptr = from_byte;
2427 *last_syntax_ptr = Smax; /* Any syntactic power the last byte had is
2428 used up. */
2429 return 1;
2430 }
2431
2432 DEFUN ("forward-comment", Fforward_comment, Sforward_comment, 1, 1, 0,
2433 doc: /*
2434 Move forward across up to COUNT comments. If COUNT is negative, move backward.
2435 Stop scanning if we find something other than a comment or whitespace.
2436 Set point to where scanning stops.
2437 If COUNT comments are found as expected, with nothing except whitespace
2438 between them, return t; otherwise return nil. */)
2439 (Lisp_Object count)
2440 {
2441 ptrdiff_t from, from_byte, stop;
2442 int c, c1;
2443 enum syntaxcode code;
2444 int comstyle = 0; /* style of comment encountered */
2445 bool comnested = 0; /* whether the comment is nestable or not */
2446 bool found;
2447 EMACS_INT count1;
2448 ptrdiff_t out_charpos, out_bytepos;
2449 EMACS_INT dummy;
2450 int dummy2;
2451 unsigned short int quit_count = 0;
2452
2453 CHECK_FIXNUM (count);
2454 count1 = XFIXNUM (count);
2455 stop = count1 > 0 ? ZV : BEGV;
2456
2457 from = PT;
2458 from_byte = PT_BYTE;
2459
2460 SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count1, PTRDIFF_MAX));
2461 while (count1 > 0)
2462 {
2463 do
2464 {
2465 bool comstart_first;
2466 int syntax, other_syntax;
2467
2468 if (from == stop)
2469 {
2470 SET_PT_BOTH (from, from_byte);
2471 return Qnil;
2472 }
2473 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2474 syntax = SYNTAX_WITH_FLAGS (c);
2475 code = SYNTAX (c);
2476 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2477 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2478 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2479 inc_both (&from, &from_byte);
2480 UPDATE_SYNTAX_TABLE_FORWARD (from);
2481 if (from < stop && comstart_first
2482 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2483 other_syntax = SYNTAX_WITH_FLAGS (c1),
2484 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax)))
2485 {
2486 /* We have encountered a comment start sequence and we
2487 are ignoring all text inside comments. We must record
2488 the comment style this sequence begins so that later,
2489 only a comment end of the same style actually ends
2490 the comment section. */
2491 code = Scomment;
2492 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2493 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2494 inc_both (&from, &from_byte);
2495 UPDATE_SYNTAX_TABLE_FORWARD (from);
2496 }
2497 rarely_quit (++quit_count);
2498 }
2499 while (code == Swhitespace || (code == Sendcomment && c == '\n'));
2500
2501 if (code == Scomment_fence)
2502 comstyle = ST_COMMENT_STYLE;
2503 else if (code != Scomment)
2504 {
2505 dec_both (&from, &from_byte);
2506 SET_PT_BOTH (from, from_byte);
2507 return Qnil;
2508 }
2509 /* We're at the start of a comment. */
2510 found = forw_comment (from, from_byte, stop, comnested, comstyle, 0,
2511 &out_charpos, &out_bytepos, &dummy, &dummy2);
2512 from = out_charpos; from_byte = out_bytepos;
2513 if (!found)
2514 {
2515 SET_PT_BOTH (from, from_byte);
2516 return Qnil;
2517 }
2518 inc_both (&from, &from_byte);
2519 UPDATE_SYNTAX_TABLE_FORWARD (from);
2520 /* We have skipped one comment. */
2521 count1--;
2522 }
2523
2524 while (count1 < 0)
2525 {
2526 while (true)
2527 {
2528 if (from <= stop)
2529 {
2530 SET_PT_BOTH (BEGV, BEGV_BYTE);
2531 return Qnil;
2532 }
2533
2534 dec_both (&from, &from_byte);
2535 /* char_quoted does UPDATE_SYNTAX_TABLE_BACKWARD (from). */
2536 bool quoted = char_quoted (from, from_byte);
2537 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2538 int syntax = SYNTAX_WITH_FLAGS (c);
2539 code = SYNTAX (c);
2540 comstyle = 0;
2541 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2542 if (code == Sendcomment)
2543 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2544 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2545 && prev_char_comend_first (from, from_byte)
2546 && !char_quoted (from - 1, dec_bytepos (from_byte)))
2547 {
2548 int other_syntax;
2549 /* We must record the comment style encountered so that
2550 later, we can match only the proper comment begin
2551 sequence of the same style. */
2552 dec_both (&from, &from_byte);
2553 code = Sendcomment;
2554 /* Calling char_quoted, above, set up global syntax position
2555 at the new value of FROM. */
2556 c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2557 other_syntax = SYNTAX_WITH_FLAGS (c1);
2558 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2559 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2560 }
2561
2562 if (code == Scomment_fence)
2563 {
2564 /* Skip until first preceding unquoted comment_fence. */
2565 bool fence_found = 0;
2566 ptrdiff_t ini = from, ini_byte = from_byte;
2567
2568 if (from > stop)
2569 {
2570 while (1)
2571 {
2572 dec_both (&from, &from_byte);
2573 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2574 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2575 if (SYNTAX (c) == Scomment_fence
2576 && !char_quoted (from, from_byte))
2577 {
2578 fence_found = 1;
2579 break;
2580 }
2581 else if (from == stop)
2582 break;
2583 rarely_quit (++quit_count);
2584 }
2585 }
2586 if (fence_found == 0)
2587 {
2588 from = ini; /* Set point to ini + 1. */
2589 from_byte = ini_byte;
2590 goto leave;
2591 }
2592 else
2593 /* We have skipped one comment. */
2594 break;
2595 }
2596 else if (code == Sendcomment)
2597 {
2598 found = (!quoted || !comment_end_can_be_escaped)
2599 && back_comment (from, from_byte, stop, comnested, comstyle,
2600 &out_charpos, &out_bytepos);
2601 if (!found)
2602 {
2603 if (c == '\n')
2604 /* This end-of-line is not an end-of-comment.
2605 Treat it like a whitespace.
2606 CC-mode (and maybe others) relies on this behavior. */
2607 ;
2608 else
2609 {
2610 /* Failure: we should go back to the end of this
2611 not-quite-endcomment. */
2612 if (SYNTAX (c) != code)
2613 /* It was a two-char Sendcomment. */
2614 inc_both (&from, &from_byte);
2615 goto leave;
2616 }
2617 }
2618 else
2619 {
2620 /* We have skipped one comment. */
2621 from = out_charpos, from_byte = out_bytepos;
2622 break;
2623 }
2624 }
2625 else if (code != Swhitespace || quoted)
2626 {
2627 leave:
2628 inc_both (&from, &from_byte);
2629 SET_PT_BOTH (from, from_byte);
2630 return Qnil;
2631 }
2632
2633 rarely_quit (++quit_count);
2634 }
2635
2636 count1++;
2637 }
2638
2639 SET_PT_BOTH (from, from_byte);
2640 return Qt;
2641 }
2642
2643 /* Return syntax code of character C if C is an ASCII character
2644 or if MULTIBYTE_SYMBOL_P is false. Otherwise, return Ssymbol. */
2645
2646 static enum syntaxcode
syntax_multibyte(int c,bool multibyte_symbol_p)2647 syntax_multibyte (int c, bool multibyte_symbol_p)
2648 {
2649 return ASCII_CHAR_P (c) || !multibyte_symbol_p ? SYNTAX (c) : Ssymbol;
2650 }
2651
2652 static Lisp_Object
scan_lists(EMACS_INT from0,EMACS_INT count,EMACS_INT depth,bool sexpflag)2653 scan_lists (EMACS_INT from0, EMACS_INT count, EMACS_INT depth, bool sexpflag)
2654 {
2655 Lisp_Object val;
2656 ptrdiff_t stop = count > 0 ? ZV : BEGV;
2657 int c, c1;
2658 int stringterm;
2659 bool quoted;
2660 bool mathexit = 0;
2661 enum syntaxcode code;
2662 EMACS_INT min_depth = depth; /* Err out if depth gets less than this. */
2663 int comstyle = 0; /* Style of comment encountered. */
2664 bool comnested = 0; /* Whether the comment is nestable or not. */
2665 ptrdiff_t temp_pos;
2666 EMACS_INT last_good = from0;
2667 bool found;
2668 ptrdiff_t from_byte;
2669 ptrdiff_t out_bytepos, out_charpos;
2670 EMACS_INT dummy;
2671 int dummy2;
2672 bool multibyte_symbol_p = sexpflag && multibyte_syntax_as_symbol;
2673 unsigned short int quit_count = 0;
2674
2675 if (depth > 0) min_depth = 0;
2676
2677 ptrdiff_t from = clip_to_bounds (BEGV, from0, ZV);
2678
2679 from_byte = CHAR_TO_BYTE (from);
2680
2681 maybe_quit ();
2682
2683 SETUP_SYNTAX_TABLE (from, clip_to_bounds (PTRDIFF_MIN, count, PTRDIFF_MAX));
2684 while (count > 0)
2685 {
2686 while (from < stop)
2687 {
2688 rarely_quit (++quit_count);
2689 bool comstart_first, prefix;
2690 int syntax, other_syntax;
2691 UPDATE_SYNTAX_TABLE_FORWARD (from);
2692 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2693 syntax = SYNTAX_WITH_FLAGS (c);
2694 code = syntax_multibyte (c, multibyte_symbol_p);
2695 comstart_first = SYNTAX_FLAGS_COMSTART_FIRST (syntax);
2696 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2697 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2698 prefix = SYNTAX_FLAGS_PREFIX (syntax);
2699 if (depth == min_depth)
2700 last_good = from;
2701 inc_both (&from, &from_byte);
2702 UPDATE_SYNTAX_TABLE_FORWARD (from);
2703 if (from < stop && comstart_first
2704 && (c = FETCH_CHAR_AS_MULTIBYTE (from_byte),
2705 other_syntax = SYNTAX_WITH_FLAGS (c),
2706 SYNTAX_FLAGS_COMSTART_SECOND (other_syntax))
2707 && parse_sexp_ignore_comments)
2708 {
2709 /* We have encountered a comment start sequence and we
2710 are ignoring all text inside comments. We must record
2711 the comment style this sequence begins so that later,
2712 only a comment end of the same style actually ends
2713 the comment section. */
2714 code = Scomment;
2715 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2716 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2717 inc_both (&from, &from_byte);
2718 UPDATE_SYNTAX_TABLE_FORWARD (from);
2719 }
2720
2721 if (prefix)
2722 continue;
2723
2724 switch (code)
2725 {
2726 case Sescape:
2727 case Scharquote:
2728 if (from == stop)
2729 goto lose;
2730 inc_both (&from, &from_byte);
2731 /* Treat following character as a word constituent. */
2732 FALLTHROUGH;
2733 case Sword:
2734 case Ssymbol:
2735 if (depth || !sexpflag) break;
2736 /* This word counts as a sexp; return at end of it. */
2737 while (from < stop)
2738 {
2739 UPDATE_SYNTAX_TABLE_FORWARD (from);
2740
2741 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2742 switch (syntax_multibyte (c, multibyte_symbol_p))
2743 {
2744 case Scharquote:
2745 case Sescape:
2746 inc_both (&from, &from_byte);
2747 if (from == stop)
2748 goto lose;
2749 break;
2750 case Sword:
2751 case Ssymbol:
2752 case Squote:
2753 break;
2754 default:
2755 goto done;
2756 }
2757 inc_both (&from, &from_byte);
2758 rarely_quit (++quit_count);
2759 }
2760 goto done;
2761
2762 case Scomment_fence:
2763 comstyle = ST_COMMENT_STYLE;
2764 FALLTHROUGH;
2765 case Scomment:
2766 if (!parse_sexp_ignore_comments) break;
2767 UPDATE_SYNTAX_TABLE_FORWARD (from);
2768 found = forw_comment (from, from_byte, stop,
2769 comnested, comstyle, 0,
2770 &out_charpos, &out_bytepos, &dummy,
2771 &dummy2);
2772 from = out_charpos, from_byte = out_bytepos;
2773 if (!found)
2774 {
2775 if (depth == 0)
2776 goto done;
2777 goto lose;
2778 }
2779 inc_both (&from, &from_byte);
2780 UPDATE_SYNTAX_TABLE_FORWARD (from);
2781 break;
2782
2783 case Smath:
2784 if (!sexpflag)
2785 break;
2786 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (from_byte))
2787 {
2788 inc_both (&from, &from_byte);
2789 }
2790 if (mathexit)
2791 {
2792 mathexit = 0;
2793 goto close1;
2794 }
2795 mathexit = 1;
2796 FALLTHROUGH;
2797 case Sopen:
2798 if (!++depth) goto done;
2799 break;
2800
2801 case Sclose:
2802 close1:
2803 if (!--depth) goto done;
2804 if (depth < min_depth)
2805 xsignal3 (Qscan_error,
2806 build_string ("Containing expression ends prematurely"),
2807 make_fixnum (last_good), make_fixnum (from));
2808 break;
2809
2810 case Sstring:
2811 case Sstring_fence:
2812 temp_pos = dec_bytepos (from_byte);
2813 stringterm = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2814 while (1)
2815 {
2816 enum syntaxcode c_code;
2817 if (from >= stop)
2818 goto lose;
2819 UPDATE_SYNTAX_TABLE_FORWARD (from);
2820 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2821 c_code = syntax_multibyte (c, multibyte_symbol_p);
2822 if (code == Sstring
2823 ? c == stringterm && c_code == Sstring
2824 : c_code == Sstring_fence)
2825 break;
2826
2827 if (c_code == Scharquote || c_code == Sescape)
2828 inc_both (&from, &from_byte);
2829 inc_both (&from, &from_byte);
2830 rarely_quit (++quit_count);
2831 }
2832 inc_both (&from, &from_byte);
2833 if (!depth && sexpflag) goto done;
2834 break;
2835 default:
2836 /* Ignore whitespace, punctuation, quote, endcomment. */
2837 break;
2838 }
2839 }
2840
2841 /* Reached end of buffer. Error if within object, return nil if between */
2842 if (depth)
2843 goto lose;
2844
2845 return Qnil;
2846
2847 /* End of object reached */
2848 done:
2849 count--;
2850 }
2851
2852 while (count < 0)
2853 {
2854 while (from > stop)
2855 {
2856 rarely_quit (++quit_count);
2857 dec_both (&from, &from_byte);
2858 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2859 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2860 int syntax = SYNTAX_WITH_FLAGS (c);
2861 code = syntax_multibyte (c, multibyte_symbol_p);
2862 if (depth == min_depth)
2863 last_good = from;
2864 comstyle = 0;
2865 comnested = SYNTAX_FLAGS_COMMENT_NESTED (syntax);
2866 if (code == Sendcomment)
2867 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (syntax, 0);
2868 if (from > stop && SYNTAX_FLAGS_COMEND_SECOND (syntax)
2869 && prev_char_comend_first (from, from_byte)
2870 && parse_sexp_ignore_comments)
2871 {
2872 /* We must record the comment style encountered so that
2873 later, we can match only the proper comment begin
2874 sequence of the same style. */
2875 int c2, other_syntax;
2876 dec_both (&from, &from_byte);
2877 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2878 code = Sendcomment;
2879 c2 = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2880 other_syntax = SYNTAX_WITH_FLAGS (c2);
2881 comstyle = SYNTAX_FLAGS_COMMENT_STYLE (other_syntax, syntax);
2882 comnested |= SYNTAX_FLAGS_COMMENT_NESTED (other_syntax);
2883 }
2884
2885 /* Quoting turns anything except a comment-ender
2886 into a word character. Note that this cannot be true
2887 if we decremented FROM in the if-statement above. */
2888 if (code != Sendcomment && char_quoted (from, from_byte))
2889 {
2890 dec_both (&from, &from_byte);
2891 code = Sword;
2892 }
2893 else if (SYNTAX_FLAGS_PREFIX (syntax))
2894 continue;
2895
2896 switch (code)
2897 {
2898 case Sword:
2899 case Ssymbol:
2900 case Sescape:
2901 case Scharquote:
2902 if (depth || !sexpflag) break;
2903 /* This word counts as a sexp; count object finished
2904 after passing it. */
2905 while (from > stop)
2906 {
2907 temp_pos = dec_bytepos (from_byte);
2908 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2909 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2910 /* Don't allow comment-end to be quoted. */
2911 if (syntax_multibyte (c1, multibyte_symbol_p) == Sendcomment)
2912 goto done2;
2913 quoted = char_quoted (from - 1, temp_pos);
2914 if (quoted)
2915 {
2916 dec_both (&from, &from_byte);
2917 temp_pos = dec_bytepos (temp_pos);
2918 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2919 }
2920 c1 = FETCH_CHAR_AS_MULTIBYTE (temp_pos);
2921 if (! quoted)
2922 switch (syntax_multibyte (c1, multibyte_symbol_p))
2923 {
2924 case Sword: case Ssymbol: case Squote: break;
2925 default: goto done2;
2926 }
2927 dec_both (&from, &from_byte);
2928 rarely_quit (++quit_count);
2929 }
2930 goto done2;
2931
2932 case Smath:
2933 if (!sexpflag)
2934 break;
2935 if (from > BEGV)
2936 {
2937 temp_pos = dec_bytepos (from_byte);
2938 UPDATE_SYNTAX_TABLE_BACKWARD (from - 1);
2939 if (from != stop && c == FETCH_CHAR_AS_MULTIBYTE (temp_pos))
2940 dec_both (&from, &from_byte);
2941 }
2942 if (mathexit)
2943 {
2944 mathexit = 0;
2945 goto open2;
2946 }
2947 mathexit = 1;
2948 FALLTHROUGH;
2949 case Sclose:
2950 if (!++depth) goto done2;
2951 break;
2952
2953 case Sopen:
2954 open2:
2955 if (!--depth) goto done2;
2956 if (depth < min_depth)
2957 xsignal3 (Qscan_error,
2958 build_string ("Containing expression ends prematurely"),
2959 make_fixnum (last_good), make_fixnum (from));
2960 break;
2961
2962 case Sendcomment:
2963 if (!parse_sexp_ignore_comments)
2964 break;
2965 found = back_comment (from, from_byte, stop, comnested, comstyle,
2966 &out_charpos, &out_bytepos);
2967 /* FIXME: if !found, it really wasn't a comment-end.
2968 For single-char Sendcomment, we can't do much about it apart
2969 from skipping the char.
2970 For 2-char endcomments, we could try again, taking both
2971 chars as separate entities, but it's a lot of trouble
2972 for very little gain, so we don't bother either. -sm */
2973 if (found)
2974 from = out_charpos, from_byte = out_bytepos;
2975 break;
2976
2977 case Scomment_fence:
2978 case Sstring_fence:
2979 while (1)
2980 {
2981 if (from == stop)
2982 goto lose;
2983 dec_both (&from, &from_byte);
2984 UPDATE_SYNTAX_TABLE_BACKWARD (from);
2985 if (!char_quoted (from, from_byte))
2986 {
2987 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2988 if (syntax_multibyte (c, multibyte_symbol_p) == code)
2989 break;
2990 }
2991 rarely_quit (++quit_count);
2992 }
2993 if (code == Sstring_fence && !depth && sexpflag) goto done2;
2994 break;
2995
2996 case Sstring:
2997 stringterm = FETCH_CHAR_AS_MULTIBYTE (from_byte);
2998 while (true)
2999 {
3000 if (from == stop)
3001 goto lose;
3002 dec_both (&from, &from_byte);
3003 UPDATE_SYNTAX_TABLE_BACKWARD (from);
3004 if (!char_quoted (from, from_byte))
3005 {
3006 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3007 if (c == stringterm
3008 && (syntax_multibyte (c, multibyte_symbol_p)
3009 == Sstring))
3010 break;
3011 }
3012 rarely_quit (++quit_count);
3013 }
3014 if (!depth && sexpflag) goto done2;
3015 break;
3016 default:
3017 /* Ignore whitespace, punctuation, quote, endcomment. */
3018 break;
3019 }
3020 }
3021
3022 /* Reached start of buffer. Error if within object, return nil if between */
3023 if (depth)
3024 goto lose;
3025
3026 return Qnil;
3027
3028 done2:
3029 count++;
3030 }
3031
3032
3033 XSETFASTINT (val, from);
3034 return val;
3035
3036 lose:
3037 xsignal3 (Qscan_error,
3038 build_string ("Unbalanced parentheses"),
3039 make_fixnum (last_good), make_fixnum (from));
3040 }
3041
3042 DEFUN ("scan-lists", Fscan_lists, Sscan_lists, 3, 3, 0,
3043 doc: /* Scan from character number FROM by COUNT lists.
3044 Scan forward if COUNT is positive, backward if COUNT is negative.
3045 Return the character number of the position thus found.
3046
3047 A \"list", in this context, refers to a balanced parenthetical
3048 grouping, as determined by the syntax table.
3049
3050 If DEPTH is nonzero, treat that as the nesting depth of the starting
3051 point (i.e. the starting point is DEPTH parentheses deep). This
3052 function scans over parentheses until the depth goes to zero COUNT
3053 times. Hence, positive DEPTH moves out that number of levels of
3054 parentheses, while negative DEPTH moves to a deeper level.
3055
3056 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3057
3058 If we reach the beginning or end of the accessible part of the buffer
3059 before we have scanned over COUNT lists, return nil if the depth at
3060 that point is zero, and signal an error if the depth is nonzero. */)
3061 (Lisp_Object from, Lisp_Object count, Lisp_Object depth)
3062 {
3063 CHECK_FIXNUM (from);
3064 CHECK_FIXNUM (count);
3065 CHECK_FIXNUM (depth);
3066
3067 return scan_lists (XFIXNUM (from), XFIXNUM (count), XFIXNUM (depth), 0);
3068 }
3069
3070 DEFUN ("scan-sexps", Fscan_sexps, Sscan_sexps, 2, 2, 0,
3071 doc: /* Scan from character number FROM by COUNT balanced expressions.
3072 If COUNT is negative, scan backwards.
3073 Returns the character number of the position thus found.
3074
3075 Comments are ignored if `parse-sexp-ignore-comments' is non-nil.
3076
3077 If the beginning or end of (the accessible part of) the buffer is reached
3078 in the middle of a parenthetical grouping, an error is signaled.
3079 If the beginning or end is reached between groupings
3080 but before count is used up, nil is returned. */)
3081 (Lisp_Object from, Lisp_Object count)
3082 {
3083 CHECK_FIXNUM (from);
3084 CHECK_FIXNUM (count);
3085
3086 return scan_lists (XFIXNUM (from), XFIXNUM (count), 0, 1);
3087 }
3088
3089 DEFUN ("backward-prefix-chars", Fbackward_prefix_chars, Sbackward_prefix_chars,
3090 0, 0, 0,
3091 doc: /* Move point backward over any number of chars with prefix syntax.
3092 This includes chars with expression prefix syntax class (\\=') and those with
3093 the prefix syntax flag (p). */)
3094 (void)
3095 {
3096 ptrdiff_t beg = BEGV;
3097 ptrdiff_t opoint = PT;
3098 ptrdiff_t opoint_byte = PT_BYTE;
3099 ptrdiff_t pos = PT;
3100 ptrdiff_t pos_byte = PT_BYTE;
3101 int c;
3102
3103 if (pos <= beg)
3104 {
3105 SET_PT_BOTH (opoint, opoint_byte);
3106
3107 return Qnil;
3108 }
3109
3110 SETUP_SYNTAX_TABLE (pos, -1);
3111
3112 dec_both (&pos, &pos_byte);
3113
3114 while (!char_quoted (pos, pos_byte)
3115 /* Previous statement updates syntax table. */
3116 && ((c = FETCH_CHAR_AS_MULTIBYTE (pos_byte), SYNTAX (c) == Squote)
3117 || syntax_prefix_flag_p (c)))
3118 {
3119 opoint = pos;
3120 opoint_byte = pos_byte;
3121
3122 if (pos <= beg)
3123 break;
3124 dec_both (&pos, &pos_byte);
3125 rarely_quit (pos);
3126 }
3127
3128 SET_PT_BOTH (opoint, opoint_byte);
3129
3130 return Qnil;
3131 }
3132
3133
3134 /* If the character at FROM_BYTE is the second part of a 2-character
3135 comment opener based on PREV_FROM_SYNTAX, update STATE and return
3136 true. */
3137 static bool
in_2char_comment_start(struct lisp_parse_state * state,int prev_from_syntax,ptrdiff_t prev_from,ptrdiff_t from_byte)3138 in_2char_comment_start (struct lisp_parse_state *state,
3139 int prev_from_syntax,
3140 ptrdiff_t prev_from,
3141 ptrdiff_t from_byte)
3142 {
3143 int c1, syntax;
3144 if (SYNTAX_FLAGS_COMSTART_FIRST (prev_from_syntax)
3145 && (c1 = FETCH_CHAR_AS_MULTIBYTE (from_byte),
3146 syntax = SYNTAX_WITH_FLAGS (c1),
3147 SYNTAX_FLAGS_COMSTART_SECOND (syntax)))
3148 {
3149 /* Record the comment style we have entered so that only
3150 the comment-end sequence of the same style actually
3151 terminates the comment section. */
3152 state->comstyle
3153 = SYNTAX_FLAGS_COMMENT_STYLE (syntax, prev_from_syntax);
3154 bool comnested = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax)
3155 | SYNTAX_FLAGS_COMMENT_NESTED (syntax));
3156 state->incomment = comnested ? 1 : -1;
3157 state->comstr_start = prev_from;
3158 return true;
3159 }
3160 return false;
3161 }
3162
3163 /* Parse forward from FROM / FROM_BYTE to END,
3164 assuming that FROM has state STATE,
3165 and return a description of the state of the parse at END.
3166 If STOPBEFORE, stop at the start of an atom.
3167 If COMMENTSTOP is 1, stop at the start of a comment.
3168 If COMMENTSTOP is -1, stop at the start or end of a comment,
3169 after the beginning of a string, or after the end of a string. */
3170
3171 static void
scan_sexps_forward(struct lisp_parse_state * state,ptrdiff_t from,ptrdiff_t from_byte,ptrdiff_t end,EMACS_INT targetdepth,bool stopbefore,int commentstop)3172 scan_sexps_forward (struct lisp_parse_state *state,
3173 ptrdiff_t from, ptrdiff_t from_byte, ptrdiff_t end,
3174 EMACS_INT targetdepth, bool stopbefore,
3175 int commentstop)
3176 {
3177 enum syntaxcode code;
3178 struct level { ptrdiff_t last, prev; };
3179 struct level levelstart[100];
3180 struct level *curlevel = levelstart;
3181 struct level *endlevel = levelstart + 100;
3182 EMACS_INT depth; /* Paren depth of current scanning location.
3183 level - levelstart equals this except
3184 when the depth becomes negative. */
3185 EMACS_INT mindepth; /* Lowest DEPTH value seen. */
3186 bool start_quoted = 0; /* True means starting after a char quote. */
3187 Lisp_Object tem;
3188 ptrdiff_t prev_from; /* Keep one character before FROM. */
3189 ptrdiff_t prev_from_byte;
3190 int prev_from_syntax, prev_prev_from_syntax;
3191 bool boundary_stop = commentstop == -1;
3192 bool nofence;
3193 bool found;
3194 ptrdiff_t out_bytepos, out_charpos;
3195 int temp;
3196 unsigned short int quit_count = 0;
3197
3198 prev_from = from;
3199 prev_from_byte = from_byte;
3200 if (from != BEGV)
3201 dec_both (&prev_from, &prev_from_byte);
3202
3203 /* Use this macro instead of `from++'. */
3204 #define INC_FROM \
3205 do { prev_from = from; \
3206 prev_from_byte = from_byte; \
3207 temp = FETCH_CHAR_AS_MULTIBYTE (prev_from_byte); \
3208 prev_prev_from_syntax = prev_from_syntax; \
3209 prev_from_syntax = SYNTAX_WITH_FLAGS (temp); \
3210 inc_both (&from, &from_byte); \
3211 if (from < end) \
3212 UPDATE_SYNTAX_TABLE_FORWARD (from); \
3213 } while (0)
3214
3215 maybe_quit ();
3216
3217 depth = state->depth;
3218 start_quoted = state->quoted;
3219 prev_prev_from_syntax = Smax;
3220 prev_from_syntax = state->prev_syntax;
3221
3222 tem = state->levelstarts;
3223 while (!NILP (tem)) /* >= second enclosing sexps. */
3224 {
3225 Lisp_Object temhd = Fcar (tem);
3226 if (RANGED_FIXNUMP (PTRDIFF_MIN, temhd, PTRDIFF_MAX))
3227 curlevel->last = XFIXNUM (temhd);
3228 if (++curlevel == endlevel)
3229 curlevel--; /* error ("Nesting too deep for parser"); */
3230 curlevel->prev = -1;
3231 curlevel->last = -1;
3232 tem = Fcdr (tem);
3233 }
3234 curlevel->prev = -1;
3235 curlevel->last = -1;
3236
3237 state->quoted = 0;
3238 mindepth = depth;
3239
3240 SETUP_SYNTAX_TABLE (from, 1);
3241
3242 /* Enter the loop at a place appropriate for initial state. */
3243
3244 if (state->incomment)
3245 goto startincomment;
3246 if (state->instring >= 0)
3247 {
3248 nofence = state->instring != ST_STRING_STYLE;
3249 if (start_quoted)
3250 goto startquotedinstring;
3251 goto startinstring;
3252 }
3253 else if (start_quoted)
3254 goto startquoted;
3255 else if ((from < end)
3256 && (in_2char_comment_start (state, prev_from_syntax,
3257 prev_from, from_byte)))
3258 {
3259 INC_FROM;
3260 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3261 goto atcomment;
3262 }
3263
3264 while (from < end)
3265 {
3266 rarely_quit (++quit_count);
3267 INC_FROM;
3268
3269 if ((from < end)
3270 && (in_2char_comment_start (state, prev_from_syntax,
3271 prev_from, from_byte)))
3272 {
3273 INC_FROM;
3274 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3275 goto atcomment;
3276 }
3277
3278 if (SYNTAX_FLAGS_PREFIX (prev_from_syntax))
3279 continue;
3280 code = prev_from_syntax & 0xff;
3281 switch (code)
3282 {
3283 case Sescape:
3284 case Scharquote:
3285 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3286 curlevel->last = prev_from;
3287 startquoted:
3288 if (from == end) goto endquoted;
3289 INC_FROM;
3290 goto symstarted;
3291 /* treat following character as a word constituent */
3292 case Sword:
3293 case Ssymbol:
3294 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3295 curlevel->last = prev_from;
3296 symstarted:
3297 while (from < end)
3298 {
3299 if (in_2char_comment_start (state, prev_from_syntax,
3300 prev_from, from_byte))
3301 {
3302 INC_FROM;
3303 prev_from_syntax = Smax; /* the syntax has already been "used up". */
3304 goto atcomment;
3305 }
3306
3307 int symchar = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3308 switch (SYNTAX (symchar))
3309 {
3310 case Scharquote:
3311 case Sescape:
3312 INC_FROM;
3313 if (from == end) goto endquoted;
3314 break;
3315 case Sword:
3316 case Ssymbol:
3317 case Squote:
3318 break;
3319 default:
3320 goto symdone;
3321 }
3322 INC_FROM;
3323 rarely_quit (++quit_count);
3324 }
3325 symdone:
3326 curlevel->prev = curlevel->last;
3327 break;
3328
3329 case Scomment_fence:
3330 /* Record the comment style we have entered so that only
3331 the comment-end sequence of the same style actually
3332 terminates the comment section. */
3333 state->comstyle = ST_COMMENT_STYLE;
3334 state->incomment = -1;
3335 state->comstr_start = prev_from;
3336 goto atcomment;
3337 case Scomment:
3338 state->comstyle = SYNTAX_FLAGS_COMMENT_STYLE (prev_from_syntax, 0);
3339 state->incomment = (SYNTAX_FLAGS_COMMENT_NESTED (prev_from_syntax) ?
3340 1 : -1);
3341 state->comstr_start = prev_from;
3342 atcomment:
3343 if (commentstop || boundary_stop) goto done;
3344 startincomment:
3345 /* The (from == BEGV) test was to enter the loop in the middle so
3346 that we find a 2-char comment ender even if we start in the
3347 middle of it. We don't want to do that if we're just at the
3348 beginning of the comment (think of (*) ... (*)). */
3349 found = forw_comment (from, from_byte, end,
3350 state->incomment, state->comstyle,
3351 from == BEGV ? 0 : prev_from_syntax,
3352 &out_charpos, &out_bytepos, &state->incomment,
3353 &prev_from_syntax);
3354 from = out_charpos; from_byte = out_bytepos;
3355 /* Beware! prev_from and friends (except prev_from_syntax)
3356 are invalid now. Luckily, the `done' doesn't use them
3357 and the INC_FROM sets them to a sane value without
3358 looking at them. */
3359 if (!found) goto done;
3360 INC_FROM;
3361 state->incomment = 0;
3362 state->comstyle = 0; /* reset the comment style */
3363 prev_from_syntax = Smax; /* For the comment closer */
3364 if (boundary_stop) goto done;
3365 break;
3366
3367 case Sopen:
3368 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3369 depth++;
3370 /* curlevel++->last ran into compiler bug on Apollo */
3371 curlevel->last = prev_from;
3372 if (++curlevel == endlevel)
3373 curlevel--; /* error ("Nesting too deep for parser"); */
3374 curlevel->prev = -1;
3375 curlevel->last = -1;
3376 if (targetdepth == depth) goto done;
3377 break;
3378
3379 case Sclose:
3380 depth--;
3381 if (depth < mindepth)
3382 mindepth = depth;
3383 if (curlevel != levelstart)
3384 curlevel--;
3385 curlevel->prev = curlevel->last;
3386 if (targetdepth == depth) goto done;
3387 break;
3388
3389 case Sstring:
3390 case Sstring_fence:
3391 state->comstr_start = from - 1;
3392 if (stopbefore) goto stop; /* this arg means stop at sexp start */
3393 curlevel->last = prev_from;
3394 state->instring = (code == Sstring
3395 ? (FETCH_CHAR_AS_MULTIBYTE (prev_from_byte))
3396 : ST_STRING_STYLE);
3397 if (boundary_stop) goto done;
3398 startinstring:
3399 {
3400 nofence = state->instring != ST_STRING_STYLE;
3401
3402 while (1)
3403 {
3404 int c;
3405 enum syntaxcode c_code;
3406
3407 if (from >= end) goto done;
3408 c = FETCH_CHAR_AS_MULTIBYTE (from_byte);
3409 c_code = SYNTAX (c);
3410
3411 /* Check C_CODE here so that if the char has
3412 a syntax-table property which says it is NOT
3413 a string character, it does not end the string. */
3414 if (nofence && c == state->instring && c_code == Sstring)
3415 break;
3416
3417 switch (c_code)
3418 {
3419 case Sstring_fence:
3420 if (!nofence) goto string_end;
3421 break;
3422
3423 case Scharquote:
3424 case Sescape:
3425 INC_FROM;
3426 startquotedinstring:
3427 if (from >= end) goto endquoted;
3428 break;
3429
3430 default:
3431 break;
3432 }
3433 INC_FROM;
3434 rarely_quit (++quit_count);
3435 }
3436 }
3437 string_end:
3438 state->instring = -1;
3439 curlevel->prev = curlevel->last;
3440 INC_FROM;
3441 if (boundary_stop) goto done;
3442 break;
3443
3444 case Smath:
3445 /* FIXME: We should do something with it. */
3446 break;
3447 default:
3448 /* Ignore whitespace, punctuation, quote, endcomment. */
3449 break;
3450 }
3451 }
3452 goto done;
3453
3454 stop: /* Here if stopping before start of sexp. */
3455 from = prev_from; /* We have just fetched the char that starts it; */
3456 from_byte = prev_from_byte;
3457 prev_from_syntax = prev_prev_from_syntax;
3458 goto done; /* but return the position before it. */
3459
3460 endquoted:
3461 state->quoted = 1;
3462 done:
3463 state->depth = depth;
3464 state->mindepth = mindepth;
3465 state->thislevelstart = curlevel->prev;
3466 state->prevlevelstart
3467 = (curlevel == levelstart) ? -1 : (curlevel - 1)->last;
3468 state->location = from;
3469 state->location_byte = from_byte;
3470 state->levelstarts = Qnil;
3471 while (curlevel > levelstart)
3472 state->levelstarts = Fcons (make_fixnum ((--curlevel)->last),
3473 state->levelstarts);
3474 state->prev_syntax = (SYNTAX_FLAGS_COMSTARTEND_FIRST (prev_from_syntax)
3475 || state->quoted) ? prev_from_syntax : Smax;
3476 }
3477
3478 /* Convert a (lisp) parse state to the internal form used in
3479 scan_sexps_forward. */
3480 static void
internalize_parse_state(Lisp_Object external,struct lisp_parse_state * state)3481 internalize_parse_state (Lisp_Object external, struct lisp_parse_state *state)
3482 {
3483 Lisp_Object tem;
3484
3485 if (NILP (external))
3486 {
3487 state->depth = 0;
3488 state->instring = -1;
3489 state->incomment = 0;
3490 state->quoted = 0;
3491 state->comstyle = 0; /* comment style a by default. */
3492 state->comstr_start = -1; /* no comment/string seen. */
3493 state->levelstarts = Qnil;
3494 state->prev_syntax = Smax;
3495 }
3496 else
3497 {
3498 tem = Fcar (external);
3499 state->depth = FIXNUMP (tem) ? XFIXNUM (tem) : 0;
3500
3501 external = Fcdr (external);
3502 external = Fcdr (external);
3503 external = Fcdr (external);
3504 tem = Fcar (external);
3505 /* Check whether we are inside string_fence-style string: */
3506 state->instring = (!NILP (tem)
3507 ? (CHARACTERP (tem) ? XFIXNAT (tem) : ST_STRING_STYLE)
3508 : -1);
3509
3510 external = Fcdr (external);
3511 tem = Fcar (external);
3512 state->incomment = (!NILP (tem)
3513 ? (FIXNUMP (tem) ? XFIXNUM (tem) : -1)
3514 : 0);
3515
3516 external = Fcdr (external);
3517 tem = Fcar (external);
3518 state->quoted = !NILP (tem);
3519
3520 /* if the eighth element of the list is nil, we are in comment
3521 style a. If it is non-nil, we are in comment style b */
3522 external = Fcdr (external);
3523 external = Fcdr (external);
3524 tem = Fcar (external);
3525 state->comstyle = (NILP (tem)
3526 ? 0
3527 : (RANGED_FIXNUMP (0, tem, ST_COMMENT_STYLE)
3528 ? XFIXNUM (tem)
3529 : ST_COMMENT_STYLE));
3530
3531 external = Fcdr (external);
3532 tem = Fcar (external);
3533 state->comstr_start =
3534 RANGED_FIXNUMP (PTRDIFF_MIN, tem, PTRDIFF_MAX) ? XFIXNUM (tem) : -1;
3535 external = Fcdr (external);
3536 tem = Fcar (external);
3537 state->levelstarts = tem;
3538
3539 external = Fcdr (external);
3540 tem = Fcar (external);
3541 state->prev_syntax = NILP (tem) ? Smax : XFIXNUM (tem);
3542 }
3543 }
3544
3545 DEFUN ("parse-partial-sexp", Fparse_partial_sexp, Sparse_partial_sexp, 2, 6, 0,
3546 doc: /* Parse Lisp syntax starting at FROM until TO; return status of parse at TO.
3547 Parsing stops at TO or when certain criteria are met;
3548 point is set to where parsing stops.
3549
3550 If OLDSTATE is omitted or nil, parsing assumes that FROM is the
3551 beginning of a function. If not, OLDSTATE should be the state at
3552 FROM.
3553
3554 Value is a list of elements describing final state of parsing:
3555 0. depth in parens.
3556 1. character address of start of innermost containing list; nil if none.
3557 2. character address of start of last complete sexp terminated.
3558 3. non-nil if inside a string.
3559 (it is the character that will terminate the string,
3560 or t if the string should be terminated by a generic string delimiter.)
3561 4. nil if outside a comment, t if inside a non-nestable comment,
3562 else an integer (the current comment nesting).
3563 5. t if following a quote character.
3564 6. the minimum paren-depth encountered during this scan.
3565 7. style of comment, if any.
3566 8. character address of start of comment or string; nil if not in one.
3567 9. List of positions of currently open parens, outermost first.
3568 10. When the last position scanned holds the first character of a
3569 (potential) two character construct, the syntax of that position,
3570 otherwise nil. That construct can be a two character comment
3571 delimiter or an Escaped or Char-quoted character.
3572 11..... Possible further internal information used by `parse-partial-sexp'.
3573
3574 If third arg TARGETDEPTH is non-nil, parsing stops if the depth
3575 in parentheses becomes equal to TARGETDEPTH.
3576 Fourth arg STOPBEFORE non-nil means stop when we come to
3577 any character that starts a sexp.
3578 Fifth arg OLDSTATE is a list like what this function returns.
3579 It is used to initialize the state of the parse. Elements number 1, 2, 6
3580 are ignored.
3581 Sixth arg COMMENTSTOP non-nil means stop after the start of a comment.
3582 If it is the symbol `syntax-table', stop after the start of a comment or a
3583 string, or after end of a comment or a string. */)
3584 (Lisp_Object from, Lisp_Object to, Lisp_Object targetdepth,
3585 Lisp_Object stopbefore, Lisp_Object oldstate, Lisp_Object commentstop)
3586 {
3587 struct lisp_parse_state state;
3588 EMACS_INT target;
3589
3590 if (!NILP (targetdepth))
3591 {
3592 CHECK_FIXNUM (targetdepth);
3593 target = XFIXNUM (targetdepth);
3594 }
3595 else
3596 target = TYPE_MINIMUM (EMACS_INT); /* We won't reach this depth. */
3597
3598 if (fix_position (to) < fix_position (from))
3599 error ("End position is smaller than start position");
3600
3601 validate_region (&from, &to);
3602 internalize_parse_state (oldstate, &state);
3603 scan_sexps_forward (&state, XFIXNUM (from), CHAR_TO_BYTE (XFIXNUM (from)),
3604 XFIXNUM (to),
3605 target, !NILP (stopbefore),
3606 (NILP (commentstop)
3607 ? 0 : (EQ (commentstop, Qsyntax_table) ? -1 : 1)));
3608
3609 SET_PT_BOTH (state.location, state.location_byte);
3610
3611 return
3612 Fcons (make_fixnum (state.depth),
3613 Fcons (state.prevlevelstart < 0
3614 ? Qnil : make_fixnum (state.prevlevelstart),
3615 Fcons (state.thislevelstart < 0
3616 ? Qnil : make_fixnum (state.thislevelstart),
3617 Fcons (state.instring >= 0
3618 ? (state.instring == ST_STRING_STYLE
3619 ? Qt : make_fixnum (state.instring)) : Qnil,
3620 Fcons (state.incomment < 0 ? Qt :
3621 (state.incomment == 0 ? Qnil :
3622 make_fixnum (state.incomment)),
3623 Fcons (state.quoted ? Qt : Qnil,
3624 Fcons (make_fixnum (state.mindepth),
3625 Fcons ((state.comstyle
3626 ? (state.comstyle == ST_COMMENT_STYLE
3627 ? Qsyntax_table
3628 : make_fixnum (state.comstyle))
3629 : Qnil),
3630 Fcons (((state.incomment
3631 || (state.instring >= 0))
3632 ? make_fixnum (state.comstr_start)
3633 : Qnil),
3634 Fcons (state.levelstarts,
3635 Fcons (state.prev_syntax == Smax
3636 ? Qnil
3637 : make_fixnum (state.prev_syntax),
3638 Qnil)))))))))));
3639 }
3640
3641 void
init_syntax_once(void)3642 init_syntax_once (void)
3643 {
3644 register int i, c;
3645 Lisp_Object temp;
3646
3647 /* This has to be done here, before we call Fmake_char_table. */
3648 DEFSYM (Qsyntax_table, "syntax-table");
3649
3650 /* Create objects which can be shared among syntax tables. */
3651 Vsyntax_code_object = make_nil_vector (Smax);
3652 for (i = 0; i < Smax; i++)
3653 ASET (Vsyntax_code_object, i, list1 (make_fixnum (i)));
3654
3655 /* Now we are ready to set up this property, so we can
3656 create syntax tables. */
3657 Fput (Qsyntax_table, Qchar_table_extra_slots, make_fixnum (0));
3658
3659 temp = AREF (Vsyntax_code_object, Swhitespace);
3660
3661 Vstandard_syntax_table = Fmake_char_table (Qsyntax_table, temp);
3662
3663 /* Control characters should not be whitespace. */
3664 temp = AREF (Vsyntax_code_object, Spunct);
3665 for (i = 0; i <= ' ' - 1; i++)
3666 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3667 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 0177, temp);
3668
3669 /* Except that a few really are whitespace. */
3670 temp = AREF (Vsyntax_code_object, Swhitespace);
3671 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ' ', temp);
3672 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\t', temp);
3673 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\n', temp);
3674 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 015, temp);
3675 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, 014, temp);
3676
3677 temp = AREF (Vsyntax_code_object, Sword);
3678 for (i = 'a'; i <= 'z'; i++)
3679 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3680 for (i = 'A'; i <= 'Z'; i++)
3681 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3682 for (i = '0'; i <= '9'; i++)
3683 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, i, temp);
3684
3685 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '$', temp);
3686 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '%', temp);
3687
3688 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '(',
3689 Fcons (make_fixnum (Sopen), make_fixnum (')')));
3690 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ')',
3691 Fcons (make_fixnum (Sclose), make_fixnum ('(')));
3692 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '[',
3693 Fcons (make_fixnum (Sopen), make_fixnum (']')));
3694 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, ']',
3695 Fcons (make_fixnum (Sclose), make_fixnum ('[')));
3696 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '{',
3697 Fcons (make_fixnum (Sopen), make_fixnum ('}')));
3698 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '}',
3699 Fcons (make_fixnum (Sclose), make_fixnum ('{')));
3700 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '"',
3701 Fcons (make_fixnum (Sstring), Qnil));
3702 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, '\\',
3703 Fcons (make_fixnum (Sescape), Qnil));
3704
3705 temp = AREF (Vsyntax_code_object, Ssymbol);
3706 for (i = 0; i < 10; i++)
3707 {
3708 c = "_-+*/&|<>="[i];
3709 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3710 }
3711
3712 temp = AREF (Vsyntax_code_object, Spunct);
3713 for (i = 0; i < 12; i++)
3714 {
3715 c = ".,;:?!#@~^'`"[i];
3716 SET_RAW_SYNTAX_ENTRY (Vstandard_syntax_table, c, temp);
3717 }
3718
3719 /* All multibyte characters have syntax `word' by default. */
3720 temp = AREF (Vsyntax_code_object, Sword);
3721 char_table_set_range (Vstandard_syntax_table, 0x80, MAX_CHAR, temp);
3722 }
3723
3724 void
syms_of_syntax(void)3725 syms_of_syntax (void)
3726 {
3727 DEFSYM (Qsyntax_table_p, "syntax-table-p");
3728 DEFSYM (Qsyntax_ppss, "syntax-ppss");
3729 DEFVAR_LISP ("comment-use-syntax-ppss",
3730 Vcomment_use_syntax_ppss,
3731 doc: /* Non-nil means `forward-comment' can use `syntax-ppss' internally. */);
3732 Vcomment_use_syntax_ppss = Qt;
3733
3734 staticpro (&Vsyntax_code_object);
3735
3736 staticpro (&gl_state.object);
3737 staticpro (&gl_state.global_code);
3738 staticpro (&gl_state.current_syntax_table);
3739 staticpro (&gl_state.old_prop);
3740
3741 DEFSYM (Qscan_error, "scan-error");
3742 Fput (Qscan_error, Qerror_conditions,
3743 pure_list (Qscan_error, Qerror));
3744 Fput (Qscan_error, Qerror_message,
3745 build_pure_c_string ("Scan error"));
3746
3747 DEFVAR_BOOL ("parse-sexp-ignore-comments", parse_sexp_ignore_comments,
3748 doc: /* Non-nil means `forward-sexp', etc., should treat comments as whitespace. */);
3749
3750 DEFVAR_BOOL ("parse-sexp-lookup-properties", parse_sexp_lookup_properties,
3751 doc: /* Non-nil means `forward-sexp', etc., obey `syntax-table' property.
3752 Otherwise, that text property is simply ignored.
3753 See the info node `(elisp)Syntax Properties' for a description of the
3754 `syntax-table' property. */);
3755
3756 DEFVAR_INT ("syntax-propertize--done", syntax_propertize__done,
3757 doc: /* Position up to which syntax-table properties have been set. */);
3758 syntax_propertize__done = -1;
3759 DEFSYM (Qinternal__syntax_propertize, "internal--syntax-propertize");
3760 Fmake_variable_buffer_local (intern ("syntax-propertize--done"));
3761
3762 words_include_escapes = 0;
3763 DEFVAR_BOOL ("words-include-escapes", words_include_escapes,
3764 doc: /* Non-nil means `forward-word', etc., should treat escape chars part of words. */);
3765
3766 DEFVAR_BOOL ("multibyte-syntax-as-symbol", multibyte_syntax_as_symbol,
3767 doc: /* Non-nil means `scan-sexps' treats all multibyte characters as symbol. */);
3768 multibyte_syntax_as_symbol = 0;
3769
3770 DEFVAR_BOOL ("open-paren-in-column-0-is-defun-start",
3771 open_paren_in_column_0_is_defun_start,
3772 doc: /* Non-nil means an open paren in column 0 denotes the start of a defun. */);
3773 open_paren_in_column_0_is_defun_start = 1;
3774
3775
3776 DEFVAR_LISP ("find-word-boundary-function-table",
3777 Vfind_word_boundary_function_table,
3778 doc: /*
3779 Char table of functions to search for the word boundary.
3780 Each function is called with two arguments; POS and LIMIT.
3781 POS and LIMIT are character positions in the current buffer.
3782
3783 If POS is less than LIMIT, POS is at the first character of a word,
3784 and the return value of a function should be a position after the
3785 last character of that word.
3786
3787 If POS is not less than LIMIT, POS is at the last character of a word,
3788 and the return value of a function should be a position at the first
3789 character of that word.
3790
3791 In both cases, LIMIT bounds the search. */);
3792 Vfind_word_boundary_function_table = Fmake_char_table (Qnil, Qnil);
3793
3794 DEFVAR_BOOL ("comment-end-can-be-escaped", comment_end_can_be_escaped,
3795 doc: /* Non-nil means an escaped ender inside a comment doesn't end the comment. */);
3796 comment_end_can_be_escaped = false;
3797 DEFSYM (Qcomment_end_can_be_escaped, "comment-end-can-be-escaped");
3798 Fmake_variable_buffer_local (Qcomment_end_can_be_escaped);
3799
3800 defsubr (&Ssyntax_table_p);
3801 defsubr (&Ssyntax_table);
3802 defsubr (&Sstandard_syntax_table);
3803 defsubr (&Scopy_syntax_table);
3804 defsubr (&Sset_syntax_table);
3805 defsubr (&Schar_syntax);
3806 defsubr (&Ssyntax_class_to_char);
3807 defsubr (&Smatching_paren);
3808 defsubr (&Sstring_to_syntax);
3809 defsubr (&Smodify_syntax_entry);
3810 defsubr (&Sinternal_describe_syntax_value);
3811
3812 defsubr (&Sforward_word);
3813
3814 defsubr (&Sskip_chars_forward);
3815 defsubr (&Sskip_chars_backward);
3816 defsubr (&Sskip_syntax_forward);
3817 defsubr (&Sskip_syntax_backward);
3818
3819 defsubr (&Sforward_comment);
3820 defsubr (&Sscan_lists);
3821 defsubr (&Sscan_sexps);
3822 defsubr (&Sbackward_prefix_chars);
3823 defsubr (&Sparse_partial_sexp);
3824 }
3825