1 /* Extended regular expression matching and search library, version
2    0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
3    internationalization features.)
4 
5    Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
6 
7    This program is free software; you can redistribute it and/or modify
8    it under the terms of the GNU General Public License as published by
9    the Free Software Foundation; either version 2, or (at your option)
10    any later version.
11 
12    This program is distributed in the hope that it will be useful,
13    but WITHOUT ANY WARRANTY; without even the implied warranty of
14    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.	 See the
15    GNU General Public License for more details.
16 
17    You should have received a copy of the GNU General Public License
18    along with this program; if not, write to the Free Software
19    Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
20    USA.	 */
21 
22 /* TODO:
23    - structure the opcode space into opcode+flag.
24    - merge with glibc's regex.[ch].
25    - replace (succeed_n + jump_n + set_number_at) with something that doesn't
26      need to modify the compiled regexp so that re_match can be reentrant.
27    - get rid of on_failure_jump_smart by doing the optimization in re_comp
28      rather than at run-time, so that re_match can be reentrant.
29 */
30 
31 /* AIX requires this to be the first thing in the file. */
32 #if defined _AIX && !defined REGEX_MALLOC
33   #pragma alloca
34 #endif
35 
36 #undef	_GNU_SOURCE
37 #define _GNU_SOURCE
38 
39 #ifdef HAVE_CONFIG_H
40 # include <config.h>
41 #endif
42 
43 #if defined STDC_HEADERS && !defined emacs
44 # include <stddef.h>
45 #else
46 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
47 # include <sys/types.h>
48 #endif
49 
50 /* Whether to use ISO C Amendment 1 wide char functions.
51    Those should not be used for Emacs since it uses its own.  */
52 #if defined _LIBC
53 #define WIDE_CHAR_SUPPORT 1
54 #else
55 #define WIDE_CHAR_SUPPORT \
56 	(HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC && !emacs)
57 #endif
58 
59 /* For platform which support the ISO C amendement 1 functionality we
60    support user defined character classes.  */
61 #if WIDE_CHAR_SUPPORT
62 /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
63 # include <wchar.h>
64 # include <wctype.h>
65 #endif
66 
67 #ifdef _LIBC
68 /* We have to keep the namespace clean.  */
69 # define regfree(preg) __regfree (preg)
70 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
71 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
72 # define regerror(errcode, preg, errbuf, errbuf_size) \
73 	__regerror(errcode, preg, errbuf, errbuf_size)
74 # define re_set_registers(bu, re, nu, st, en) \
75 	__re_set_registers (bu, re, nu, st, en)
76 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
77 	__re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
78 # define re_match(bufp, string, size, pos, regs) \
79 	__re_match (bufp, string, size, pos, regs)
80 # define re_search(bufp, string, size, startpos, range, regs) \
81 	__re_search (bufp, string, size, startpos, range, regs)
82 # define re_compile_pattern(pattern, length, bufp) \
83 	__re_compile_pattern (pattern, length, bufp)
84 # define re_set_syntax(syntax) __re_set_syntax (syntax)
85 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
86 	__re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
87 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
88 
89 /* Make sure we call libc's function even if the user overrides them.  */
90 # define btowc __btowc
91 # define iswctype __iswctype
92 # define wctype __wctype
93 
94 # define WEAK_ALIAS(a,b) weak_alias (a, b)
95 
96 /* We are also using some library internals.  */
97 # include <locale/localeinfo.h>
98 # include <locale/elem-hash.h>
99 # include <langinfo.h>
100 #else
101 # define WEAK_ALIAS(a,b)
102 #endif
103 
104 /* This is for other GNU distributions with internationalized messages.  */
105 #if HAVE_LIBINTL_H || defined _LIBC
106 # include <libintl.h>
107 #else
108 # define gettext(msgid) (msgid)
109 #endif
110 
111 #ifndef gettext_noop
112 /* This define is so xgettext can find the internationalizable
113    strings.  */
114 # define gettext_noop(String) String
115 #endif
116 
117 /* The `emacs' switch turns on certain matching commands
118    that make sense only in Emacs. */
119 #ifdef emacs
120 
121 # include "lisp.h"
122 # include "buffer.h"
123 
124 /* Make syntax table lookup grant data in gl_state.  */
125 # define SYNTAX_ENTRY_VIA_PROPERTY
126 
127 # include "syntax.h"
128 # include "charset.h"
129 # include "category.h"
130 
131 # ifdef malloc
132 #  undef malloc
133 # endif
134 # define malloc xmalloc
135 # ifdef realloc
136 #  undef realloc
137 # endif
138 # define realloc xrealloc
139 # ifdef free
140 #  undef free
141 # endif
142 # define free xfree
143 
144 /* Converts the pointer to the char to BEG-based offset from the start.	 */
145 # define PTR_TO_OFFSET(d) POS_AS_IN_BUFFER (POINTER_TO_OFFSET (d))
146 # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
147 
148 # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
149 # define RE_STRING_CHAR(p, s) \
150   (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
151 # define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
152   (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
153 
154 /* Set C a (possibly multibyte) character before P.  P points into a
155    string which is the virtual concatenation of STR1 (which ends at
156    END1) or STR2 (which ends at END2).  */
157 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)		\
158   do {									\
159     if (multibyte)							\
160        {						    		\
161 	 re_char *dtemp = (p) == (str2) ? (end1) : (p);		    	\
162 	 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
163 	 while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp));		\
164 	 c = STRING_CHAR (dtemp, (p) - dtemp);				\
165        }						    		\
166      else						    		\
167        (c = ((p) == (str2) ? (end1) : (p))[-1]);			\
168   } while (0)
169 
170 
171 #else  /* not emacs */
172 
173 /* If we are not linking with Emacs proper,
174    we can't use the relocating allocator
175    even if config.h says that we can.  */
176 # undef REL_ALLOC
177 
178 # if defined STDC_HEADERS || defined _LIBC
179 #  include <stdlib.h>
180 # else
181 char *malloc ();
182 char *realloc ();
183 # endif
184 
185 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
186    If nothing else has been done, use the method below.  */
187 # ifdef INHIBIT_STRING_HEADER
188 #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
189 #   if !defined bzero && !defined bcopy
190 #    undef INHIBIT_STRING_HEADER
191 #   endif
192 #  endif
193 # endif
194 
195 /* This is the normal way of making sure we have memcpy, memcmp and bzero.
196    This is used in most programs--a few other programs avoid this
197    by defining INHIBIT_STRING_HEADER.  */
198 # ifndef INHIBIT_STRING_HEADER
199 #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
200 #   include <string.h>
201 #   ifndef bzero
202 #    ifndef _LIBC
203 #     define bzero(s, n)	(memset (s, '\0', n), (s))
204 #    else
205 #     define bzero(s, n)	__bzero (s, n)
206 #    endif
207 #   endif
208 #  else
209 #   include <strings.h>
210 #   ifndef memcmp
211 #    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
212 #   endif
213 #   ifndef memcpy
214 #    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
215 #   endif
216 #  endif
217 # endif
218 
219 /* Define the syntax stuff for \<, \>, etc.  */
220 
221 /* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
222 enum syntaxcode { Swhitespace = 0, Sword = 1 };
223 
224 # ifdef SWITCH_ENUM_BUG
225 #  define SWITCH_ENUM_CAST(x) ((int)(x))
226 # else
227 #  define SWITCH_ENUM_CAST(x) (x)
228 # endif
229 
230 /* Dummy macros for non-Emacs environments.  */
231 # define BASE_LEADING_CODE_P(c) (0)
232 # define CHAR_CHARSET(c) 0
233 # define CHARSET_LEADING_CODE_BASE(c) 0
234 # define MAX_MULTIBYTE_LENGTH 1
235 # define RE_MULTIBYTE_P(x) 0
236 # define WORD_BOUNDARY_P(c1, c2) (0)
237 # define CHAR_HEAD_P(p) (1)
238 # define SINGLE_BYTE_CHAR_P(c) (1)
239 # define SAME_CHARSET_P(c1, c2) (1)
240 # define MULTIBYTE_FORM_LENGTH(p, s) (1)
241 # define STRING_CHAR(p, s) (*(p))
242 # define RE_STRING_CHAR STRING_CHAR
243 # define CHAR_STRING(c, s) (*(s) = (c), 1)
244 # define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
245 # define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
246 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
247   (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
248 # define MAKE_CHAR(charset, c1, c2) (c1)
249 #endif /* not emacs */
250 
251 #ifndef RE_TRANSLATE
252 # define RE_TRANSLATE(TBL, C) ((unsigned char)(TBL)[C])
253 # define RE_TRANSLATE_P(TBL) (TBL)
254 #endif
255 
256 /* Get the interface, including the syntax bits.  */
257 #include "regex.h"
258 
259 /* isalpha etc. are used for the character classes.  */
260 #include <ctype.h>
261 
262 #ifdef emacs
263 
264 /* 1 if C is an ASCII character.  */
265 # define IS_REAL_ASCII(c) ((c) < 0200)
266 
267 /* 1 if C is a unibyte character.  */
268 # define ISUNIBYTE(c) (SINGLE_BYTE_CHAR_P ((c)))
269 
270 /* The Emacs definitions should not be directly affected by locales.  */
271 
272 /* In Emacs, these are only used for single-byte characters.  */
273 # define ISDIGIT(c) ((c) >= '0' && (c) <= '9')
274 # define ISCNTRL(c) ((c) < ' ')
275 # define ISXDIGIT(c) (((c) >= '0' && (c) <= '9')		\
276 		     || ((c) >= 'a' && (c) <= 'f')	\
277 		     || ((c) >= 'A' && (c) <= 'F'))
278 
279 /* This is only used for single-byte characters.  */
280 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
281 
282 /* The rest must handle multibyte characters.  */
283 
284 # define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c)				\
285 		    ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237)	\
286 		    : 1)
287 
288 # define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c)				\
289 		    ? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237)	\
290 		    : 1)
291 
292 # define ISALNUM(c) (IS_REAL_ASCII (c)			\
293 		    ? (((c) >= 'a' && (c) <= 'z')	\
294 		       || ((c) >= 'A' && (c) <= 'Z')	\
295 		       || ((c) >= '0' && (c) <= '9'))	\
296 		    : SYNTAX (c) == Sword)
297 
298 # define ISALPHA(c) (IS_REAL_ASCII (c)			\
299 		    ? (((c) >= 'a' && (c) <= 'z')	\
300 		       || ((c) >= 'A' && (c) <= 'Z'))	\
301 		    : SYNTAX (c) == Sword)
302 
303 # define ISLOWER(c) (LOWERCASEP (c))
304 
305 # define ISPUNCT(c) (IS_REAL_ASCII (c)				\
306 		    ? ((c) > ' ' && (c) < 0177			\
307 		       && !(((c) >= 'a' && (c) <= 'z')		\
308 		            || ((c) >= 'A' && (c) <= 'Z')	\
309 		            || ((c) >= '0' && (c) <= '9')))	\
310 		    : SYNTAX (c) != Sword)
311 
312 # define ISSPACE(c) (SYNTAX (c) == Swhitespace)
313 
314 # define ISUPPER(c) (UPPERCASEP (c))
315 
316 # define ISWORD(c) (SYNTAX (c) == Sword)
317 
318 #else /* not emacs */
319 
320 /* Jim Meyering writes:
321 
322    "... Some ctype macros are valid only for character codes that
323    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
324    using /bin/cc or gcc but without giving an ansi option).  So, all
325    ctype uses should be through macros like ISPRINT...  If
326    STDC_HEADERS is defined, then autoconf has verified that the ctype
327    macros don't need to be guarded with references to isascii. ...
328    Defining isascii to 1 should let any compiler worth its salt
329    eliminate the && through constant folding."
330    Solaris defines some of these symbols so we must undefine them first.  */
331 
332 # undef ISASCII
333 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
334 #  define ISASCII(c) 1
335 # else
336 #  define ISASCII(c) isascii(c)
337 # endif
338 
339 /* 1 if C is an ASCII character.  */
340 # define IS_REAL_ASCII(c) ((c) < 0200)
341 
342 /* This distinction is not meaningful, except in Emacs.  */
343 # define ISUNIBYTE(c) 1
344 
345 # ifdef isblank
346 #  define ISBLANK(c) (ISASCII (c) && isblank (c))
347 # else
348 #  define ISBLANK(c) ((c) == ' ' || (c) == '\t')
349 # endif
350 # ifdef isgraph
351 #  define ISGRAPH(c) (ISASCII (c) && isgraph (c))
352 # else
353 #  define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
354 # endif
355 
356 # undef ISPRINT
357 # define ISPRINT(c) (ISASCII (c) && isprint (c))
358 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
359 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
360 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
361 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
362 # define ISLOWER(c) (ISASCII (c) && islower (c))
363 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
364 # define ISSPACE(c) (ISASCII (c) && isspace (c))
365 # define ISUPPER(c) (ISASCII (c) && isupper (c))
366 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
367 
368 # define ISWORD(c) ISALPHA(c)
369 
370 # ifdef _tolower
371 #  define TOLOWER(c) _tolower(c)
372 # else
373 #  define TOLOWER(c) tolower(c)
374 # endif
375 
376 /* How many characters in the character set.  */
377 # define CHAR_SET_SIZE 256
378 
379 # ifdef SYNTAX_TABLE
380 
381 extern char *re_syntax_table;
382 
383 # else /* not SYNTAX_TABLE */
384 
385 static char re_syntax_table[CHAR_SET_SIZE];
386 
387 static void
init_syntax_once()388 init_syntax_once ()
389 {
390    register int c;
391    static int done = 0;
392 
393    if (done)
394      return;
395 
396    bzero (re_syntax_table, sizeof re_syntax_table);
397 
398    for (c = 0; c < CHAR_SET_SIZE; ++c)
399      if (ISALNUM (c))
400 	re_syntax_table[c] = Sword;
401 
402    re_syntax_table['_'] = Sword;
403 
404    done = 1;
405 }
406 
407 # endif /* not SYNTAX_TABLE */
408 
409 # define SYNTAX(c) re_syntax_table[(c)]
410 
411 #endif /* not emacs */
412 
413 #ifndef NULL
414 # define NULL (void *)0
415 #endif
416 
417 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
418    since ours (we hope) works properly with all combinations of
419    machines, compilers, `char' and `unsigned char' argument types.
420    (Per Bothner suggested the basic approach.)  */
421 #undef SIGN_EXTEND_CHAR
422 #if __STDC__
423 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
424 #else  /* not __STDC__ */
425 /* As in Harbison and Steele.  */
426 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
427 #endif
428 
429 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
430    use `alloca' instead of `malloc'.  This is because using malloc in
431    re_search* or re_match* could cause memory leaks when C-g is used in
432    Emacs; also, malloc is slower and causes storage fragmentation.  On
433    the other hand, malloc is more portable, and easier to debug.
434 
435    Because we sometimes use alloca, some routines have to be macros,
436    not functions -- `alloca'-allocated space disappears at the end of the
437    function it is called in.  */
438 
439 #ifdef REGEX_MALLOC
440 
441 # define REGEX_ALLOCATE malloc
442 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
443 # define REGEX_FREE free
444 
445 #else /* not REGEX_MALLOC  */
446 
447 /* Emacs already defines alloca, sometimes.  */
448 # ifndef alloca
449 
450 /* Make alloca work the best possible way.  */
451 #  ifdef __GNUC__
452 #   define alloca __builtin_alloca
453 #  else /* not __GNUC__ */
454 #   if HAVE_ALLOCA_H
455 #    include <alloca.h>
456 #   endif /* HAVE_ALLOCA_H */
457 #  endif /* not __GNUC__ */
458 
459 # endif /* not alloca */
460 
461 # define REGEX_ALLOCATE alloca
462 
463 /* Assumes a `char *destination' variable.  */
464 # define REGEX_REALLOCATE(source, osize, nsize)				\
465   (destination = (char *) alloca (nsize),				\
466    memcpy (destination, source, osize))
467 
468 /* No need to do anything to free, after alloca.  */
469 # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
470 
471 #endif /* not REGEX_MALLOC */
472 
473 /* Define how to allocate the failure stack.  */
474 
475 #if defined REL_ALLOC && defined REGEX_MALLOC
476 
477 # define REGEX_ALLOCATE_STACK(size)				\
478   r_alloc (&failure_stack_ptr, (size))
479 # define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
480   r_re_alloc (&failure_stack_ptr, (nsize))
481 # define REGEX_FREE_STACK(ptr)					\
482   r_alloc_free (&failure_stack_ptr)
483 
484 #else /* not using relocating allocator */
485 
486 # ifdef REGEX_MALLOC
487 
488 #  define REGEX_ALLOCATE_STACK malloc
489 #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
490 #  define REGEX_FREE_STACK free
491 
492 # else /* not REGEX_MALLOC */
493 
494 #  define REGEX_ALLOCATE_STACK alloca
495 
496 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
497    REGEX_REALLOCATE (source, osize, nsize)
498 /* No need to explicitly free anything.	 */
499 #  define REGEX_FREE_STACK(arg) ((void)0)
500 
501 # endif /* not REGEX_MALLOC */
502 #endif /* not using relocating allocator */
503 
504 
505 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
506    `string1' or just past its end.  This works if PTR is NULL, which is
507    a good thing.  */
508 #define FIRST_STRING_P(ptr)					\
509   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
510 
511 /* (Re)Allocate N items of type T using malloc, or fail.  */
512 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
513 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
514 #define RETALLOC_IF(addr, n, t) \
515   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
516 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
517 
518 #define BYTEWIDTH 8 /* In bits.  */
519 
520 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
521 
522 #undef MAX
523 #undef MIN
524 #define MAX(a, b) ((a) > (b) ? (a) : (b))
525 #define MIN(a, b) ((a) < (b) ? (a) : (b))
526 
527 /* Type of source-pattern and string chars.  */
528 typedef const unsigned char re_char;
529 
530 typedef char boolean;
531 #define false 0
532 #define true 1
533 
534 static int re_match_2_internal _RE_ARGS ((struct re_pattern_buffer *bufp,
535 					re_char *string1, int size1,
536 					re_char *string2, int size2,
537 					int pos,
538 					struct re_registers *regs,
539 					int stop));
540 
541 /* These are the command codes that appear in compiled regular
542    expressions.  Some opcodes are followed by argument bytes.  A
543    command code can specify any interpretation whatsoever for its
544    arguments.  Zero bytes may appear in the compiled regular expression.  */
545 
546 typedef enum
547 {
548   no_op = 0,
549 
550   /* Succeed right away--no more backtracking.  */
551   succeed,
552 
553 	/* Followed by one byte giving n, then by n literal bytes.  */
554   exactn,
555 
556 	/* Matches any (more or less) character.  */
557   anychar,
558 
559 	/* Matches any one char belonging to specified set.  First
560 	   following byte is number of bitmap bytes.  Then come bytes
561 	   for a bitmap saying which chars are in.  Bits in each byte
562 	   are ordered low-bit-first.  A character is in the set if its
563 	   bit is 1.  A character too large to have a bit in the map is
564 	   automatically not in the set.
565 
566 	   If the length byte has the 0x80 bit set, then that stuff
567 	   is followed by a range table:
568 	       2 bytes of flags for character sets (low 8 bits, high 8 bits)
569 		   See RANGE_TABLE_WORK_BITS below.
570 	       2 bytes, the number of pairs that follow (upto 32767)
571 	       pairs, each 2 multibyte characters,
572 		   each multibyte character represented as 3 bytes.  */
573   charset,
574 
575 	/* Same parameters as charset, but match any character that is
576 	   not one of those specified.  */
577   charset_not,
578 
579 	/* Start remembering the text that is matched, for storing in a
580 	   register.  Followed by one byte with the register number, in
581 	   the range 0 to one less than the pattern buffer's re_nsub
582 	   field.  */
583   start_memory,
584 
585 	/* Stop remembering the text that is matched and store it in a
586 	   memory register.  Followed by one byte with the register
587 	   number, in the range 0 to one less than `re_nsub' in the
588 	   pattern buffer.  */
589   stop_memory,
590 
591 	/* Match a duplicate of something remembered. Followed by one
592 	   byte containing the register number.  */
593   duplicate,
594 
595 	/* Fail unless at beginning of line.  */
596   begline,
597 
598 	/* Fail unless at end of line.  */
599   endline,
600 
601 	/* Succeeds if at beginning of buffer (if emacs) or at beginning
602 	   of string to be matched (if not).  */
603   begbuf,
604 
605 	/* Analogously, for end of buffer/string.  */
606   endbuf,
607 
608 	/* Followed by two byte relative address to which to jump.  */
609   jump,
610 
611 	/* Followed by two-byte relative address of place to resume at
612 	   in case of failure.	*/
613   on_failure_jump,
614 
615 	/* Like on_failure_jump, but pushes a placeholder instead of the
616 	   current string position when executed.  */
617   on_failure_keep_string_jump,
618 
619 	/* Just like `on_failure_jump', except that it checks that we
620 	   don't get stuck in an infinite loop (matching an empty string
621 	   indefinitely).  */
622   on_failure_jump_loop,
623 
624 	/* Just like `on_failure_jump_loop', except that it checks for
625 	   a different kind of loop (the kind that shows up with non-greedy
626 	   operators).  This operation has to be immediately preceded
627 	   by a `no_op'.  */
628   on_failure_jump_nastyloop,
629 
630 	/* A smart `on_failure_jump' used for greedy * and + operators.
631 	   It analyses the loop before which it is put and if the
632 	   loop does not require backtracking, it changes itself to
633 	   `on_failure_keep_string_jump' and short-circuits the loop,
634 	   else it just defaults to changing itself into `on_failure_jump'.
635 	   It assumes that it is pointing to just past a `jump'.  */
636   on_failure_jump_smart,
637 
638 	/* Followed by two-byte relative address and two-byte number n.
639 	   After matching N times, jump to the address upon failure.
640 	   Does not work if N starts at 0: use on_failure_jump_loop
641 	   instead.  */
642   succeed_n,
643 
644 	/* Followed by two-byte relative address, and two-byte number n.
645 	   Jump to the address N times, then fail.  */
646   jump_n,
647 
648 	/* Set the following two-byte relative address to the
649 	   subsequent two-byte number.	The address *includes* the two
650 	   bytes of number.  */
651   set_number_at,
652 
653   wordbeg,	/* Succeeds if at word beginning.  */
654   wordend,	/* Succeeds if at word end.  */
655 
656   wordbound,	/* Succeeds if at a word boundary.  */
657   notwordbound,	/* Succeeds if not at a word boundary.	*/
658 
659 	/* Matches any character whose syntax is specified.  Followed by
660 	   a byte which contains a syntax code, e.g., Sword.  */
661   syntaxspec,
662 
663 	/* Matches any character whose syntax is not that specified.  */
664   notsyntaxspec
665 
666 #ifdef emacs
667   ,before_dot,	/* Succeeds if before point.  */
668   at_dot,	/* Succeeds if at point.  */
669   after_dot,	/* Succeeds if after point.  */
670 
671   /* Matches any character whose category-set contains the specified
672      category.	The operator is followed by a byte which contains a
673      category code (mnemonic ASCII character).	*/
674   categoryspec,
675 
676   /* Matches any character whose category-set does not contain the
677      specified category.  The operator is followed by a byte which
678      contains the category code (mnemonic ASCII character).  */
679   notcategoryspec
680 #endif /* emacs */
681 } re_opcode_t;
682 
683 /* Common operations on the compiled pattern.  */
684 
685 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
686 
687 #define STORE_NUMBER(destination, number)				\
688   do {									\
689     (destination)[0] = (number) & 0377;					\
690     (destination)[1] = (number) >> 8;					\
691   } while (0)
692 
693 /* Same as STORE_NUMBER, except increment DESTINATION to
694    the byte after where the number is stored.  Therefore, DESTINATION
695    must be an lvalue.  */
696 
697 #define STORE_NUMBER_AND_INCR(destination, number)			\
698   do {									\
699     STORE_NUMBER (destination, number);					\
700     (destination) += 2;							\
701   } while (0)
702 
703 /* Put into DESTINATION a number stored in two contiguous bytes starting
704    at SOURCE.  */
705 
706 #define EXTRACT_NUMBER(destination, source)				\
707   do {									\
708     (destination) = *(source) & 0377;					\
709     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
710   } while (0)
711 
712 #ifdef DEBUG
713 static void extract_number _RE_ARGS ((int *dest, re_char *source));
714 static void
extract_number(dest,source)715 extract_number (dest, source)
716     int *dest;
717     re_char *source;
718 {
719   int temp = SIGN_EXTEND_CHAR (*(source + 1));
720   *dest = *source & 0377;
721   *dest += temp << 8;
722 }
723 
724 # ifndef EXTRACT_MACROS /* To debug the macros.  */
725 #  undef EXTRACT_NUMBER
726 #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
727 # endif /* not EXTRACT_MACROS */
728 
729 #endif /* DEBUG */
730 
731 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
732    SOURCE must be an lvalue.  */
733 
734 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
735   do {									\
736     EXTRACT_NUMBER (destination, source);				\
737     (source) += 2;							\
738   } while (0)
739 
740 #ifdef DEBUG
741 static void extract_number_and_incr _RE_ARGS ((int *destination,
742 					       re_char **source));
743 static void
extract_number_and_incr(destination,source)744 extract_number_and_incr (destination, source)
745     int *destination;
746     re_char **source;
747 {
748   extract_number (destination, *source);
749   *source += 2;
750 }
751 
752 # ifndef EXTRACT_MACROS
753 #  undef EXTRACT_NUMBER_AND_INCR
754 #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
755   extract_number_and_incr (&dest, &src)
756 # endif /* not EXTRACT_MACROS */
757 
758 #endif /* DEBUG */
759 
760 /* Store a multibyte character in three contiguous bytes starting
761    DESTINATION, and increment DESTINATION to the byte after where the
762    character is stored.	 Therefore, DESTINATION must be an lvalue.  */
763 
764 #define STORE_CHARACTER_AND_INCR(destination, character)	\
765   do {								\
766     (destination)[0] = (character) & 0377;			\
767     (destination)[1] = ((character) >> 8) & 0377;		\
768     (destination)[2] = (character) >> 16;			\
769     (destination) += 3;						\
770   } while (0)
771 
772 /* Put into DESTINATION a character stored in three contiguous bytes
773    starting at SOURCE.	*/
774 
775 #define EXTRACT_CHARACTER(destination, source)	\
776   do {						\
777     (destination) = ((source)[0]		\
778 		     | ((source)[1] << 8)	\
779 		     | ((source)[2] << 16));	\
780   } while (0)
781 
782 
783 /* Macros for charset. */
784 
785 /* Size of bitmap of charset P in bytes.  P is a start of charset,
786    i.e. *P is (re_opcode_t) charset or (re_opcode_t) charset_not.  */
787 #define CHARSET_BITMAP_SIZE(p) ((p)[1] & 0x7F)
788 
789 /* Nonzero if charset P has range table.  */
790 #define CHARSET_RANGE_TABLE_EXISTS_P(p)	 ((p)[1] & 0x80)
791 
792 /* Return the address of range table of charset P.  But not the start
793    of table itself, but the before where the number of ranges is
794    stored.  `2 +' means to skip re_opcode_t and size of bitmap,
795    and the 2 bytes of flags at the start of the range table.  */
796 #define CHARSET_RANGE_TABLE(p) (&(p)[4 + CHARSET_BITMAP_SIZE (p)])
797 
798 /* Extract the bit flags that start a range table.  */
799 #define CHARSET_RANGE_TABLE_BITS(p)		\
800   ((p)[2 + CHARSET_BITMAP_SIZE (p)]		\
801    + (p)[3 + CHARSET_BITMAP_SIZE (p)] * 0x100)
802 
803 /* Test if C is listed in the bitmap of charset P.  */
804 #define CHARSET_LOOKUP_BITMAP(p, c)				\
805   ((c) < CHARSET_BITMAP_SIZE (p) * BYTEWIDTH			\
806    && (p)[2 + (c) / BYTEWIDTH] & (1 << ((c) % BYTEWIDTH)))
807 
808 /* Return the address of end of RANGE_TABLE.  COUNT is number of
809    ranges (which is a pair of (start, end)) in the RANGE_TABLE.	 `* 2'
810    is start of range and end of range.	`* 3' is size of each start
811    and end.  */
812 #define CHARSET_RANGE_TABLE_END(range_table, count)	\
813   ((range_table) + (count) * 2 * 3)
814 
815 /* Test if C is in RANGE_TABLE.	 A flag NOT is negated if C is in.
816    COUNT is number of ranges in RANGE_TABLE.  */
817 #define CHARSET_LOOKUP_RANGE_TABLE_RAW(not, c, range_table, count)	\
818   do									\
819     {									\
820       re_wchar_t range_start, range_end;				\
821       re_char *p;							\
822       re_char *range_table_end						\
823 	= CHARSET_RANGE_TABLE_END ((range_table), (count));		\
824 									\
825       for (p = (range_table); p < range_table_end; p += 2 * 3)		\
826 	{								\
827 	  EXTRACT_CHARACTER (range_start, p);				\
828 	  EXTRACT_CHARACTER (range_end, p + 3);				\
829 									\
830 	  if (range_start <= (c) && (c) <= range_end)			\
831 	    {								\
832 	      (not) = !(not);						\
833 	      break;							\
834 	    }								\
835 	}								\
836     }									\
837   while (0)
838 
839 /* Test if C is in range table of CHARSET.  The flag NOT is negated if
840    C is listed in it.  */
841 #define CHARSET_LOOKUP_RANGE_TABLE(not, c, charset)			\
842   do									\
843     {									\
844       /* Number of ranges in range table. */				\
845       int count;							\
846       re_char *range_table = CHARSET_RANGE_TABLE (charset);		\
847       									\
848       EXTRACT_NUMBER_AND_INCR (count, range_table);			\
849       CHARSET_LOOKUP_RANGE_TABLE_RAW ((not), (c), range_table, count);	\
850     }									\
851   while (0)
852 
853 /* If DEBUG is defined, Regex prints many voluminous messages about what
854    it is doing (if the variable `debug' is nonzero).  If linked with the
855    main program in `iregex.c', you can enter patterns and strings
856    interactively.  And if linked with the main program in `main.c' and
857    the other test files, you can run the already-written tests.  */
858 
859 #ifdef DEBUG
860 
861 /* We use standard I/O for debugging.  */
862 # include <stdio.h>
863 
864 /* It is useful to test things that ``must'' be true when debugging.  */
865 # include <assert.h>
866 
867 static int debug = -100000;
868 
869 # define DEBUG_STATEMENT(e) e
870 # define DEBUG_PRINT1(x) if (debug > 0) printf (x)
871 # define DEBUG_PRINT2(x1, x2) if (debug > 0) printf (x1, x2)
872 # define DEBUG_PRINT3(x1, x2, x3) if (debug > 0) printf (x1, x2, x3)
873 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug > 0) printf (x1, x2, x3, x4)
874 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)				\
875   if (debug > 0) print_partial_compiled_pattern (s, e)
876 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
877   if (debug > 0) print_double_string (w, s1, sz1, s2, sz2)
878 
879 
880 /* Print the fastmap in human-readable form.  */
881 
882 void
print_fastmap(fastmap)883 print_fastmap (fastmap)
884     char *fastmap;
885 {
886   unsigned was_a_range = 0;
887   unsigned i = 0;
888 
889   while (i < (1 << BYTEWIDTH))
890     {
891       if (fastmap[i++])
892 	{
893 	  was_a_range = 0;
894 	  putchar (i - 1);
895 	  while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
896 	    {
897 	      was_a_range = 1;
898 	      i++;
899 	    }
900 	  if (was_a_range)
901 	    {
902 	      printf ("-");
903 	      putchar (i - 1);
904 	    }
905 	}
906     }
907   putchar ('\n');
908 }
909 
910 
911 /* Print a compiled pattern string in human-readable form, starting at
912    the START pointer into it and ending just before the pointer END.  */
913 
914 void
print_partial_compiled_pattern(start,end)915 print_partial_compiled_pattern (start, end)
916     re_char *start;
917     re_char *end;
918 {
919   int mcnt, mcnt2;
920   re_char *p = start;
921   re_char *pend = end;
922 
923   if (start == NULL)
924     {
925       printf ("(null)\n");
926       return;
927     }
928 
929   /* Loop over pattern commands.  */
930   while (p < pend)
931     {
932       printf ("%d:\t", p - start);
933 
934       switch ((re_opcode_t) *p++)
935 	{
936 	case no_op:
937 	  printf ("/no_op");
938 	  break;
939 
940 	case succeed:
941 	  printf ("/succeed");
942 	  break;
943 
944 	case exactn:
945 	  mcnt = *p++;
946 	  printf ("/exactn/%d", mcnt);
947 	  do
948 	    {
949 	      putchar ('/');
950 	      putchar (*p++);
951 	    }
952 	  while (--mcnt);
953 	  break;
954 
955 	case start_memory:
956 	  printf ("/start_memory/%d", *p++);
957 	  break;
958 
959 	case stop_memory:
960 	  printf ("/stop_memory/%d", *p++);
961 	  break;
962 
963 	case duplicate:
964 	  printf ("/duplicate/%d", *p++);
965 	  break;
966 
967 	case anychar:
968 	  printf ("/anychar");
969 	  break;
970 
971 	case charset:
972 	case charset_not:
973 	  {
974 	    register int c, last = -100;
975 	    register int in_range = 0;
976 	    int length = CHARSET_BITMAP_SIZE (p - 1);
977 	    int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
978 
979 	    printf ("/charset [%s",
980 		    (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
981 
982 	    assert (p + *p < pend);
983 
984 	    for (c = 0; c < 256; c++)
985 	      if (c / 8 < length
986 		  && (p[1 + (c/8)] & (1 << (c % 8))))
987 		{
988 		  /* Are we starting a range?  */
989 		  if (last + 1 == c && ! in_range)
990 		    {
991 		      putchar ('-');
992 		      in_range = 1;
993 		    }
994 		  /* Have we broken a range?  */
995 		  else if (last + 1 != c && in_range)
996 		    {
997 		      putchar (last);
998 		      in_range = 0;
999 		    }
1000 
1001 		  if (! in_range)
1002 		    putchar (c);
1003 
1004 		  last = c;
1005 	      }
1006 
1007 	    if (in_range)
1008 	      putchar (last);
1009 
1010 	    putchar (']');
1011 
1012 	    p += 1 + length;
1013 
1014 	    if (has_range_table)
1015 	      {
1016 		int count;
1017 		printf ("has-range-table");
1018 
1019 		/* ??? Should print the range table; for now, just skip it.  */
1020 		p += 2;		/* skip range table bits */
1021 		EXTRACT_NUMBER_AND_INCR (count, p);
1022 		p = CHARSET_RANGE_TABLE_END (p, count);
1023 	      }
1024 	  }
1025 	  break;
1026 
1027 	case begline:
1028 	  printf ("/begline");
1029 	  break;
1030 
1031 	case endline:
1032 	  printf ("/endline");
1033 	  break;
1034 
1035 	case on_failure_jump:
1036 	  extract_number_and_incr (&mcnt, &p);
1037 	  printf ("/on_failure_jump to %d", p + mcnt - start);
1038 	  break;
1039 
1040 	case on_failure_keep_string_jump:
1041 	  extract_number_and_incr (&mcnt, &p);
1042 	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
1043 	  break;
1044 
1045 	case on_failure_jump_nastyloop:
1046 	  extract_number_and_incr (&mcnt, &p);
1047 	  printf ("/on_failure_jump_nastyloop to %d", p + mcnt - start);
1048 	  break;
1049 
1050 	case on_failure_jump_loop:
1051 	  extract_number_and_incr (&mcnt, &p);
1052 	  printf ("/on_failure_jump_loop to %d", p + mcnt - start);
1053 	  break;
1054 
1055 	case on_failure_jump_smart:
1056 	  extract_number_and_incr (&mcnt, &p);
1057 	  printf ("/on_failure_jump_smart to %d", p + mcnt - start);
1058 	  break;
1059 
1060 	case jump:
1061 	  extract_number_and_incr (&mcnt, &p);
1062 	  printf ("/jump to %d", p + mcnt - start);
1063 	  break;
1064 
1065 	case succeed_n:
1066 	  extract_number_and_incr (&mcnt, &p);
1067 	  extract_number_and_incr (&mcnt2, &p);
1068 	  printf ("/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
1069 	  break;
1070 
1071 	case jump_n:
1072 	  extract_number_and_incr (&mcnt, &p);
1073 	  extract_number_and_incr (&mcnt2, &p);
1074 	  printf ("/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
1075 	  break;
1076 
1077 	case set_number_at:
1078 	  extract_number_and_incr (&mcnt, &p);
1079 	  extract_number_and_incr (&mcnt2, &p);
1080 	  printf ("/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
1081 	  break;
1082 
1083 	case wordbound:
1084 	  printf ("/wordbound");
1085 	  break;
1086 
1087 	case notwordbound:
1088 	  printf ("/notwordbound");
1089 	  break;
1090 
1091 	case wordbeg:
1092 	  printf ("/wordbeg");
1093 	  break;
1094 
1095 	case wordend:
1096 	  printf ("/wordend");
1097 
1098 	case syntaxspec:
1099 	  printf ("/syntaxspec");
1100 	  mcnt = *p++;
1101 	  printf ("/%d", mcnt);
1102 	  break;
1103 
1104 	case notsyntaxspec:
1105 	  printf ("/notsyntaxspec");
1106 	  mcnt = *p++;
1107 	  printf ("/%d", mcnt);
1108 	  break;
1109 
1110 # ifdef emacs
1111 	case before_dot:
1112 	  printf ("/before_dot");
1113 	  break;
1114 
1115 	case at_dot:
1116 	  printf ("/at_dot");
1117 	  break;
1118 
1119 	case after_dot:
1120 	  printf ("/after_dot");
1121 	  break;
1122 
1123 	case categoryspec:
1124 	  printf ("/categoryspec");
1125 	  mcnt = *p++;
1126 	  printf ("/%d", mcnt);
1127 	  break;
1128 
1129 	case notcategoryspec:
1130 	  printf ("/notcategoryspec");
1131 	  mcnt = *p++;
1132 	  printf ("/%d", mcnt);
1133 	  break;
1134 # endif /* emacs */
1135 
1136 	case begbuf:
1137 	  printf ("/begbuf");
1138 	  break;
1139 
1140 	case endbuf:
1141 	  printf ("/endbuf");
1142 	  break;
1143 
1144 	default:
1145 	  printf ("?%d", *(p-1));
1146 	}
1147 
1148       putchar ('\n');
1149     }
1150 
1151   printf ("%d:\tend of pattern.\n", p - start);
1152 }
1153 
1154 
1155 void
print_compiled_pattern(bufp)1156 print_compiled_pattern (bufp)
1157     struct re_pattern_buffer *bufp;
1158 {
1159   re_char *buffer = bufp->buffer;
1160 
1161   print_partial_compiled_pattern (buffer, buffer + bufp->used);
1162   printf ("%ld bytes used/%ld bytes allocated.\n",
1163 	  bufp->used, bufp->allocated);
1164 
1165   if (bufp->fastmap_accurate && bufp->fastmap)
1166     {
1167       printf ("fastmap: ");
1168       print_fastmap (bufp->fastmap);
1169     }
1170 
1171   printf ("re_nsub: %d\t", bufp->re_nsub);
1172   printf ("regs_alloc: %d\t", bufp->regs_allocated);
1173   printf ("can_be_null: %d\t", bufp->can_be_null);
1174   printf ("no_sub: %d\t", bufp->no_sub);
1175   printf ("not_bol: %d\t", bufp->not_bol);
1176   printf ("not_eol: %d\t", bufp->not_eol);
1177   printf ("syntax: %lx\n", bufp->syntax);
1178   fflush (stdout);
1179   /* Perhaps we should print the translate table?  */
1180 }
1181 
1182 
1183 void
print_double_string(where,string1,size1,string2,size2)1184 print_double_string (where, string1, size1, string2, size2)
1185     re_char *where;
1186     re_char *string1;
1187     re_char *string2;
1188     int size1;
1189     int size2;
1190 {
1191   int this_char;
1192 
1193   if (where == NULL)
1194     printf ("(null)");
1195   else
1196     {
1197       if (FIRST_STRING_P (where))
1198 	{
1199 	  for (this_char = where - string1; this_char < size1; this_char++)
1200 	    putchar (string1[this_char]);
1201 
1202 	  where = string2;
1203 	}
1204 
1205       for (this_char = where - string2; this_char < size2; this_char++)
1206 	putchar (string2[this_char]);
1207     }
1208 }
1209 
1210 #else /* not DEBUG */
1211 
1212 # undef assert
1213 # define assert(e)
1214 
1215 # define DEBUG_STATEMENT(e)
1216 # define DEBUG_PRINT1(x)
1217 # define DEBUG_PRINT2(x1, x2)
1218 # define DEBUG_PRINT3(x1, x2, x3)
1219 # define DEBUG_PRINT4(x1, x2, x3, x4)
1220 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
1221 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
1222 
1223 #endif /* not DEBUG */
1224 
1225 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
1226    also be assigned to arbitrarily: each pattern buffer stores its own
1227    syntax, so it can be changed between regex compilations.  */
1228 /* This has no initializer because initialized variables in Emacs
1229    become read-only after dumping.  */
1230 reg_syntax_t re_syntax_options;
1231 
1232 
1233 /* Specify the precise syntax of regexps for compilation.  This provides
1234    for compatibility for various utilities which historically have
1235    different, incompatible syntaxes.
1236 
1237    The argument SYNTAX is a bit mask comprised of the various bits
1238    defined in regex.h.  We return the old syntax.  */
1239 
1240 reg_syntax_t
re_set_syntax(syntax)1241 re_set_syntax (syntax)
1242     reg_syntax_t syntax;
1243 {
1244   reg_syntax_t ret = re_syntax_options;
1245 
1246   re_syntax_options = syntax;
1247   return ret;
1248 }
1249 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
1250 
1251 /* This table gives an error message for each of the error codes listed
1252    in regex.h.  Obviously the order here has to be same as there.
1253    POSIX doesn't require that we do anything for REG_NOERROR,
1254    but why not be nice?  */
1255 
1256 static const char *re_error_msgid[] =
1257   {
1258     gettext_noop ("Success"),	/* REG_NOERROR */
1259     gettext_noop ("No match"),	/* REG_NOMATCH */
1260     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1261     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1262     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1263     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1264     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1265     gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1266     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1267     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1268     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1269     gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1270     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1271     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1272     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1273     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1274     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1275   };
1276 
1277 /* Avoiding alloca during matching, to placate r_alloc.  */
1278 
1279 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1280    searching and matching functions should not call alloca.  On some
1281    systems, alloca is implemented in terms of malloc, and if we're
1282    using the relocating allocator routines, then malloc could cause a
1283    relocation, which might (if the strings being searched are in the
1284    ralloc heap) shift the data out from underneath the regexp
1285    routines.
1286 
1287    Here's another reason to avoid allocation: Emacs
1288    processes input from X in a signal handler; processing X input may
1289    call malloc; if input arrives while a matching routine is calling
1290    malloc, then we're scrod.  But Emacs can't just block input while
1291    calling matching routines; then we don't notice interrupts when
1292    they come in.  So, Emacs blocks input around all regexp calls
1293    except the matching calls, which it leaves unprotected, in the
1294    faith that they will not malloc.  */
1295 
1296 /* Normally, this is fine.  */
1297 #define MATCH_MAY_ALLOCATE
1298 
1299 /* When using GNU C, we are not REALLY using the C alloca, no matter
1300    what config.h may say.  So don't take precautions for it.  */
1301 #ifdef __GNUC__
1302 # undef C_ALLOCA
1303 #endif
1304 
1305 /* The match routines may not allocate if (1) they would do it with malloc
1306    and (2) it's not safe for them to use malloc.
1307    Note that if REL_ALLOC is defined, matching would not use malloc for the
1308    failure stack, but we would still use it for the register vectors;
1309    so REL_ALLOC should not affect this.  */
1310 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1311 # undef MATCH_MAY_ALLOCATE
1312 #endif
1313 
1314 
1315 /* Failure stack declarations and macros; both re_compile_fastmap and
1316    re_match_2 use a failure stack.  These have to be macros because of
1317    REGEX_ALLOCATE_STACK.  */
1318 
1319 
1320 /* Approximate number of failure points for which to initially allocate space
1321    when matching.  If this number is exceeded, we allocate more
1322    space, so it is not a hard limit.  */
1323 #ifndef INIT_FAILURE_ALLOC
1324 # define INIT_FAILURE_ALLOC 20
1325 #endif
1326 
1327 /* Roughly the maximum number of failure points on the stack.  Would be
1328    exactly that if always used TYPICAL_FAILURE_SIZE items each time we failed.
1329    This is a variable only so users of regex can assign to it; we never
1330    change it ourselves.  We always multiply it by TYPICAL_FAILURE_SIZE
1331    before using it, so it should probably be a byte-count instead.  */
1332 # if defined MATCH_MAY_ALLOCATE
1333 /* Note that 4400 was enough to cause a crash on Alpha OSF/1,
1334    whose default stack limit is 2mb.  In order for a larger
1335    value to work reliably, you have to try to make it accord
1336    with the process stack limit.  */
1337 size_t re_max_failures = 40000;
1338 # else
1339 size_t re_max_failures = 4000;
1340 # endif
1341 
1342 union fail_stack_elt
1343 {
1344   re_char *pointer;
1345   /* This should be the biggest `int' that's no bigger than a pointer.  */
1346   long integer;
1347 };
1348 
1349 typedef union fail_stack_elt fail_stack_elt_t;
1350 
1351 typedef struct
1352 {
1353   fail_stack_elt_t *stack;
1354   size_t size;
1355   size_t avail;	/* Offset of next open position.  */
1356   size_t frame;	/* Offset of the cur constructed frame.  */
1357 } fail_stack_type;
1358 
1359 #define FAIL_STACK_EMPTY()     (fail_stack.frame == 0)
1360 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1361 
1362 
1363 /* Define macros to initialize and free the failure stack.
1364    Do `return -2' if the alloc fails.  */
1365 
1366 #ifdef MATCH_MAY_ALLOCATE
1367 # define INIT_FAIL_STACK()						\
1368   do {									\
1369     fail_stack.stack = (fail_stack_elt_t *)				\
1370       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * TYPICAL_FAILURE_SIZE	\
1371 			    * sizeof (fail_stack_elt_t));		\
1372 									\
1373     if (fail_stack.stack == NULL)					\
1374       return -2;							\
1375 									\
1376     fail_stack.size = INIT_FAILURE_ALLOC;				\
1377     fail_stack.avail = 0;						\
1378     fail_stack.frame = 0;						\
1379   } while (0)
1380 
1381 # define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1382 #else
1383 # define INIT_FAIL_STACK()						\
1384   do {									\
1385     fail_stack.avail = 0;						\
1386     fail_stack.frame = 0;						\
1387   } while (0)
1388 
1389 # define RESET_FAIL_STACK() ((void)0)
1390 #endif
1391 
1392 
1393 /* Double the size of FAIL_STACK, up to a limit
1394    which allows approximately `re_max_failures' items.
1395 
1396    Return 1 if succeeds, and 0 if either ran out of memory
1397    allocating space for it or it was already too large.
1398 
1399    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1400 
1401 /* Factor to increase the failure stack size by
1402    when we increase it.
1403    This used to be 2, but 2 was too wasteful
1404    because the old discarded stacks added up to as much space
1405    were as ultimate, maximum-size stack.  */
1406 #define FAIL_STACK_GROWTH_FACTOR 4
1407 
1408 #define GROW_FAIL_STACK(fail_stack)					\
1409   (((fail_stack).size * sizeof (fail_stack_elt_t)			\
1410     >= re_max_failures * TYPICAL_FAILURE_SIZE)				\
1411    ? 0									\
1412    : ((fail_stack).stack						\
1413       = (fail_stack_elt_t *)						\
1414 	REGEX_REALLOCATE_STACK ((fail_stack).stack,			\
1415 	  (fail_stack).size * sizeof (fail_stack_elt_t),		\
1416 	  MIN (re_max_failures * TYPICAL_FAILURE_SIZE,			\
1417 	       ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1418 		* FAIL_STACK_GROWTH_FACTOR))),				\
1419 									\
1420       (fail_stack).stack == NULL					\
1421       ? 0								\
1422       : ((fail_stack).size						\
1423 	 = (MIN (re_max_failures * TYPICAL_FAILURE_SIZE,		\
1424 		 ((fail_stack).size * sizeof (fail_stack_elt_t)		\
1425 		  * FAIL_STACK_GROWTH_FACTOR))				\
1426 	    / sizeof (fail_stack_elt_t)),				\
1427 	 1)))
1428 
1429 
1430 /* Push a pointer value onto the failure stack.
1431    Assumes the variable `fail_stack'.  Probably should only
1432    be called from within `PUSH_FAILURE_POINT'.  */
1433 #define PUSH_FAILURE_POINTER(item)					\
1434   fail_stack.stack[fail_stack.avail++].pointer = (item)
1435 
1436 /* This pushes an integer-valued item onto the failure stack.
1437    Assumes the variable `fail_stack'.  Probably should only
1438    be called from within `PUSH_FAILURE_POINT'.  */
1439 #define PUSH_FAILURE_INT(item)					\
1440   fail_stack.stack[fail_stack.avail++].integer = (item)
1441 
1442 /* Push a fail_stack_elt_t value onto the failure stack.
1443    Assumes the variable `fail_stack'.  Probably should only
1444    be called from within `PUSH_FAILURE_POINT'.  */
1445 #define PUSH_FAILURE_ELT(item)					\
1446   fail_stack.stack[fail_stack.avail++] =  (item)
1447 
1448 /* These three POP... operations complement the three PUSH... operations.
1449    All assume that `fail_stack' is nonempty.  */
1450 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1451 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1452 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1453 
1454 /* Individual items aside from the registers.  */
1455 #define NUM_NONREG_ITEMS 3
1456 
1457 /* Used to examine the stack (to detect infinite loops).  */
1458 #define FAILURE_PAT(h) fail_stack.stack[(h) - 1].pointer
1459 #define FAILURE_STR(h) (fail_stack.stack[(h) - 2].pointer)
1460 #define NEXT_FAILURE_HANDLE(h) fail_stack.stack[(h) - 3].integer
1461 #define TOP_FAILURE_HANDLE() fail_stack.frame
1462 
1463 
1464 #define ENSURE_FAIL_STACK(space)					\
1465 while (REMAINING_AVAIL_SLOTS <= space) {				\
1466   if (!GROW_FAIL_STACK (fail_stack))					\
1467     return -2;								\
1468   DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n", (fail_stack).size);\
1469   DEBUG_PRINT2 ("	 slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1470 }
1471 
1472 /* Push register NUM onto the stack.  */
1473 #define PUSH_FAILURE_REG(num)						\
1474 do {									\
1475   char *destination;							\
1476   ENSURE_FAIL_STACK(3);							\
1477   DEBUG_PRINT4 ("    Push reg %d (spanning %p -> %p)\n",		\
1478 		num, regstart[num], regend[num]);			\
1479   PUSH_FAILURE_POINTER (regstart[num]);					\
1480   PUSH_FAILURE_POINTER (regend[num]);					\
1481   PUSH_FAILURE_INT (num);						\
1482 } while (0)
1483 
1484 /* Change the counter's value to VAL, but make sure that it will
1485    be reset when backtracking.  */
1486 #define PUSH_NUMBER(ptr,val)						\
1487 do {									\
1488   char *destination;							\
1489   int c;								\
1490   ENSURE_FAIL_STACK(3);							\
1491   EXTRACT_NUMBER (c, ptr);						\
1492   DEBUG_PRINT4 ("    Push number %p = %d -> %d\n", ptr, c, val);	\
1493   PUSH_FAILURE_INT (c);							\
1494   PUSH_FAILURE_POINTER (ptr);						\
1495   PUSH_FAILURE_INT (-1);						\
1496   STORE_NUMBER (ptr, val);						\
1497 } while (0)
1498 
1499 /* Pop a saved register off the stack.  */
1500 #define POP_FAILURE_REG_OR_COUNT()					\
1501 do {									\
1502   int reg = POP_FAILURE_INT ();						\
1503   if (reg == -1)							\
1504     {									\
1505       /* It's a counter.  */						\
1506       /* Here, we discard `const', making re_match non-reentrant.  */	\
1507       unsigned char *ptr = (unsigned char*) POP_FAILURE_POINTER ();	\
1508       reg = POP_FAILURE_INT ();						\
1509       STORE_NUMBER (ptr, reg);						\
1510       DEBUG_PRINT3 ("     Pop counter %p = %d\n", ptr, reg);		\
1511     }									\
1512   else									\
1513     {									\
1514       regend[reg] = POP_FAILURE_POINTER ();				\
1515       regstart[reg] = POP_FAILURE_POINTER ();				\
1516       DEBUG_PRINT4 ("     Pop reg %d (spanning %p -> %p)\n",		\
1517 		    reg, regstart[reg], regend[reg]);			\
1518     }									\
1519 } while (0)
1520 
1521 /* Check that we are not stuck in an infinite loop.  */
1522 #define CHECK_INFINITE_LOOP(pat_cur, string_place)			\
1523 do {									\
1524   int failure = TOP_FAILURE_HANDLE();					\
1525   /* Check for infinite matching loops */				\
1526   while (failure > 0 &&							\
1527 	 (FAILURE_STR (failure) == string_place				\
1528 	  || FAILURE_STR (failure) == NULL))				\
1529     {									\
1530       assert (FAILURE_PAT (failure) >= bufp->buffer			\
1531 	      && FAILURE_PAT (failure) <= bufp->buffer + bufp->used);	\
1532       if (FAILURE_PAT (failure) == pat_cur)				\
1533 	goto fail;							\
1534       DEBUG_PRINT2 ("  Other pattern: %p\n", FAILURE_PAT (failure));	\
1535       failure = NEXT_FAILURE_HANDLE(failure);				\
1536     }									\
1537   DEBUG_PRINT2 ("  Other string: %p\n", FAILURE_STR (failure));		\
1538 } while (0)
1539 
1540 /* Push the information about the state we will need
1541    if we ever fail back to it.
1542 
1543    Requires variables fail_stack, regstart, regend and
1544    num_regs be declared.  GROW_FAIL_STACK requires `destination' be
1545    declared.
1546 
1547    Does `return FAILURE_CODE' if runs out of memory.  */
1548 
1549 #define PUSH_FAILURE_POINT(pattern, string_place)			\
1550 do {									\
1551   char *destination;							\
1552   /* Must be int, so when we don't save any registers, the arithmetic	\
1553      of 0 + -1 isn't done as unsigned.  */				\
1554   									\
1555   DEBUG_STATEMENT (nfailure_points_pushed++);				\
1556   DEBUG_PRINT1 ("\nPUSH_FAILURE_POINT:\n");				\
1557   DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);	\
1558   DEBUG_PRINT2 ("			size: %d\n", (fail_stack).size);\
1559   									\
1560   ENSURE_FAIL_STACK (NUM_NONREG_ITEMS);					\
1561   									\
1562   DEBUG_PRINT1 ("\n");							\
1563   									\
1564   DEBUG_PRINT2 ("  Push frame index: %d\n", fail_stack.frame);		\
1565   PUSH_FAILURE_INT (fail_stack.frame);					\
1566   									\
1567   DEBUG_PRINT2 ("  Push string %p: `", string_place);			\
1568   DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
1569   DEBUG_PRINT1 ("'\n");							\
1570   PUSH_FAILURE_POINTER (string_place);					\
1571   									\
1572   DEBUG_PRINT2 ("  Push pattern %p: ", pattern);			\
1573   DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern, pend);			\
1574   PUSH_FAILURE_POINTER (pattern);					\
1575   									\
1576   /* Close the frame by moving the frame pointer past it.  */		\
1577   fail_stack.frame = fail_stack.avail;					\
1578 } while (0)
1579 
1580 /* Estimate the size of data pushed by a typical failure stack entry.
1581    An estimate is all we need, because all we use this for
1582    is to choose a limit for how big to make the failure stack.  */
1583 /* BEWARE, the value `20' is hard-coded in emacs.c:main().  */
1584 #define TYPICAL_FAILURE_SIZE 20
1585 
1586 /* How many items can still be added to the stack without overflowing it.  */
1587 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1588 
1589 
1590 /* Pops what PUSH_FAIL_STACK pushes.
1591 
1592    We restore into the parameters, all of which should be lvalues:
1593      STR -- the saved data position.
1594      PAT -- the saved pattern position.
1595      REGSTART, REGEND -- arrays of string positions.
1596 
1597    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1598    `pend', `string1', `size1', `string2', and `size2'.	*/
1599 
1600 #define POP_FAILURE_POINT(str, pat)                                     \
1601 do {									\
1602   assert (!FAIL_STACK_EMPTY ());					\
1603 									\
1604   /* Remove failure points and point to how many regs pushed.  */	\
1605   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1606   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1607   DEBUG_PRINT2 ("		     size: %d\n", fail_stack.size);	\
1608 									\
1609   /* Pop the saved registers.  */					\
1610   while (fail_stack.frame < fail_stack.avail)				\
1611     POP_FAILURE_REG_OR_COUNT ();					\
1612 									\
1613   pat = POP_FAILURE_POINTER ();				\
1614   DEBUG_PRINT2 ("  Popping pattern %p: ", pat);				\
1615   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1616 									\
1617   /* If the saved string location is NULL, it came from an		\
1618      on_failure_keep_string_jump opcode, and we want to throw away the	\
1619      saved NULL, thus retaining our current position in the string.  */	\
1620   str = POP_FAILURE_POINTER ();						\
1621   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1622   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1623   DEBUG_PRINT1 ("'\n");							\
1624 									\
1625   fail_stack.frame = POP_FAILURE_INT ();				\
1626   DEBUG_PRINT2 ("  Popping  frame index: %d\n", fail_stack.frame);	\
1627 									\
1628   assert (fail_stack.avail >= 0);					\
1629   assert (fail_stack.frame <= fail_stack.avail);			\
1630 									\
1631   DEBUG_STATEMENT (nfailure_points_popped++);				\
1632 } while (0) /* POP_FAILURE_POINT */
1633 
1634 
1635 
1636 /* Registers are set to a sentinel when they haven't yet matched.  */
1637 #define REG_UNSET(e) ((e) == NULL)
1638 
1639 /* Subroutine declarations and macros for regex_compile.  */
1640 
1641 static reg_errcode_t regex_compile _RE_ARGS ((re_char *pattern, size_t size,
1642 					      reg_syntax_t syntax,
1643 					      struct re_pattern_buffer *bufp));
1644 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1645 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1646 				 int arg1, int arg2));
1647 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1648 				  int arg, unsigned char *end));
1649 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1650 				  int arg1, int arg2, unsigned char *end));
1651 static boolean at_begline_loc_p _RE_ARGS ((re_char *pattern,
1652 					   re_char *p,
1653 					   reg_syntax_t syntax));
1654 static boolean at_endline_loc_p _RE_ARGS ((re_char *p,
1655 					   re_char *pend,
1656 					   reg_syntax_t syntax));
1657 static re_char *skip_one_char _RE_ARGS ((re_char *p));
1658 static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
1659 				    char *fastmap, const int multibyte));
1660 
1661 /* Fetch the next character in the uncompiled pattern---translating it
1662    if necessary.  */
1663 #define PATFETCH(c)							\
1664   do {									\
1665     PATFETCH_RAW (c);							\
1666     c = TRANSLATE (c);							\
1667   } while (0)
1668 
1669 /* Fetch the next character in the uncompiled pattern, with no
1670    translation.  */
1671 #define PATFETCH_RAW(c)							\
1672   do {									\
1673     int len;								\
1674     if (p == pend) return REG_EEND;					\
1675     c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len);			\
1676     p += len;								\
1677   } while (0)
1678 
1679 
1680 /* If `translate' is non-null, return translate[D], else just D.  We
1681    cast the subscript to translate because some data is declared as
1682    `char *', to avoid warnings when a string constant is passed.  But
1683    when we use a character as a subscript we must make it unsigned.  */
1684 #ifndef TRANSLATE
1685 # define TRANSLATE(d) \
1686   (RE_TRANSLATE_P (translate) ? RE_TRANSLATE (translate, (d)) : (d))
1687 #endif
1688 
1689 
1690 /* Macros for outputting the compiled pattern into `buffer'.  */
1691 
1692 /* If the buffer isn't allocated when it comes in, use this.  */
1693 #define INIT_BUF_SIZE  32
1694 
1695 /* Make sure we have at least N more bytes of space in buffer.  */
1696 #define GET_BUFFER_SPACE(n)						\
1697     while ((size_t) (b - bufp->buffer + (n)) > bufp->allocated)		\
1698       EXTEND_BUFFER ()
1699 
1700 /* Make sure we have one more byte of buffer space and then add C to it.  */
1701 #define BUF_PUSH(c)							\
1702   do {									\
1703     GET_BUFFER_SPACE (1);						\
1704     *b++ = (unsigned char) (c);						\
1705   } while (0)
1706 
1707 
1708 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1709 #define BUF_PUSH_2(c1, c2)						\
1710   do {									\
1711     GET_BUFFER_SPACE (2);						\
1712     *b++ = (unsigned char) (c1);					\
1713     *b++ = (unsigned char) (c2);					\
1714   } while (0)
1715 
1716 
1717 /* As with BUF_PUSH_2, except for three bytes.  */
1718 #define BUF_PUSH_3(c1, c2, c3)						\
1719   do {									\
1720     GET_BUFFER_SPACE (3);						\
1721     *b++ = (unsigned char) (c1);					\
1722     *b++ = (unsigned char) (c2);					\
1723     *b++ = (unsigned char) (c3);					\
1724   } while (0)
1725 
1726 
1727 /* Store a jump with opcode OP at LOC to location TO.  We store a
1728    relative address offset by the three bytes the jump itself occupies.  */
1729 #define STORE_JUMP(op, loc, to) \
1730   store_op1 (op, loc, (to) - (loc) - 3)
1731 
1732 /* Likewise, for a two-argument jump.  */
1733 #define STORE_JUMP2(op, loc, to, arg) \
1734   store_op2 (op, loc, (to) - (loc) - 3, arg)
1735 
1736 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1737 #define INSERT_JUMP(op, loc, to) \
1738   insert_op1 (op, loc, (to) - (loc) - 3, b)
1739 
1740 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1741 #define INSERT_JUMP2(op, loc, to, arg) \
1742   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1743 
1744 
1745 /* This is not an arbitrary limit: the arguments which represent offsets
1746    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1747    be too small, many things would have to change.  */
1748 /* Any other compiler which, like MSC, has allocation limit below 2^16
1749    bytes will have to use approach similar to what was done below for
1750    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1751    reallocating to 0 bytes.  Such thing is not going to work too well.
1752    You have been warned!!  */
1753 #if defined _MSC_VER  && !defined WIN32
1754 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.  */
1755 # define MAX_BUF_SIZE  65500L
1756 #else
1757 # define MAX_BUF_SIZE (1L << 16)
1758 #endif
1759 
1760 /* Extend the buffer by twice its current size via realloc and
1761    reset the pointers that pointed into the old block to point to the
1762    correct places in the new one.  If extending the buffer results in it
1763    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1764 #if __BOUNDED_POINTERS__
1765 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1766 # define MOVE_BUFFER_POINTER(P) \
1767   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
1768 # define ELSE_EXTEND_BUFFER_HIGH_BOUND		\
1769   else						\
1770     {						\
1771       SET_HIGH_BOUND (b);			\
1772       SET_HIGH_BOUND (begalt);			\
1773       if (fixup_alt_jump)			\
1774 	SET_HIGH_BOUND (fixup_alt_jump);	\
1775       if (laststart)				\
1776 	SET_HIGH_BOUND (laststart);		\
1777       if (pending_exact)			\
1778 	SET_HIGH_BOUND (pending_exact);		\
1779     }
1780 #else
1781 # define MOVE_BUFFER_POINTER(P) (P) += incr
1782 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
1783 #endif
1784 #define EXTEND_BUFFER()							\
1785   do {									\
1786     re_char *old_buffer = bufp->buffer;					\
1787     if (bufp->allocated == MAX_BUF_SIZE)				\
1788       return REG_ESIZE;							\
1789     bufp->allocated <<= 1;						\
1790     if (bufp->allocated > MAX_BUF_SIZE)					\
1791       bufp->allocated = MAX_BUF_SIZE;					\
1792     RETALLOC (bufp->buffer, bufp->allocated, unsigned char);		\
1793     if (bufp->buffer == NULL)						\
1794       return REG_ESPACE;						\
1795     /* If the buffer moved, move all the pointers into it.  */		\
1796     if (old_buffer != bufp->buffer)					\
1797       {									\
1798 	int incr = bufp->buffer - old_buffer;				\
1799 	MOVE_BUFFER_POINTER (b);					\
1800 	MOVE_BUFFER_POINTER (begalt);					\
1801 	if (fixup_alt_jump)						\
1802 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
1803 	if (laststart)							\
1804 	  MOVE_BUFFER_POINTER (laststart);				\
1805 	if (pending_exact)						\
1806 	  MOVE_BUFFER_POINTER (pending_exact);				\
1807       }									\
1808     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
1809   } while (0)
1810 
1811 
1812 /* Since we have one byte reserved for the register number argument to
1813    {start,stop}_memory, the maximum number of groups we can report
1814    things about is what fits in that byte.  */
1815 #define MAX_REGNUM 255
1816 
1817 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1818    ignore the excess.  */
1819 typedef unsigned regnum_t;
1820 
1821 
1822 /* Macros for the compile stack.  */
1823 
1824 /* Since offsets can go either forwards or backwards, this type needs to
1825    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1826 /* int may be not enough when sizeof(int) == 2.  */
1827 typedef long pattern_offset_t;
1828 
1829 typedef struct
1830 {
1831   pattern_offset_t begalt_offset;
1832   pattern_offset_t fixup_alt_jump;
1833   pattern_offset_t laststart_offset;
1834   regnum_t regnum;
1835 } compile_stack_elt_t;
1836 
1837 
1838 typedef struct
1839 {
1840   compile_stack_elt_t *stack;
1841   unsigned size;
1842   unsigned avail;			/* Offset of next open position.  */
1843 } compile_stack_type;
1844 
1845 
1846 #define INIT_COMPILE_STACK_SIZE 32
1847 
1848 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1849 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1850 
1851 /* The next available element.  */
1852 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1853 
1854 
1855 /* Structure to manage work area for range table.  */
1856 struct range_table_work_area
1857 {
1858   int *table;			/* actual work area.  */
1859   int allocated;		/* allocated size for work area in bytes.  */
1860   int used;			/* actually used size in words.	 */
1861   int bits;			/* flag to record character classes */
1862 };
1863 
1864 /* Make sure that WORK_AREA can hold more N multibyte characters.  */
1865 #define EXTEND_RANGE_TABLE_WORK_AREA(work_area, n)			  \
1866   do {									  \
1867     if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated)  \
1868       {									  \
1869 	(work_area).allocated += 16 * sizeof (int);			  \
1870 	if ((work_area).table)						  \
1871 	  (work_area).table						  \
1872 	    = (int *) realloc ((work_area).table, (work_area).allocated); \
1873 	else								  \
1874 	  (work_area).table						  \
1875 	    = (int *) malloc ((work_area).allocated);			  \
1876 	if ((work_area).table == 0)					  \
1877 	  FREE_STACK_RETURN (REG_ESPACE);				  \
1878       }									  \
1879   } while (0)
1880 
1881 #define SET_RANGE_TABLE_WORK_AREA_BIT(work_area, bit)		\
1882   (work_area).bits |= (bit)
1883 
1884 /* Bits used to implement the multibyte-part of the various character classes
1885    such as [:alnum:] in a charset's range table.  */
1886 #define BIT_WORD	0x1
1887 #define BIT_LOWER	0x2
1888 #define BIT_PUNCT	0x4
1889 #define BIT_SPACE	0x8
1890 #define BIT_UPPER	0x10
1891 #define BIT_MULTIBYTE	0x20
1892 
1893 /* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
1894 #define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
1895   do {									\
1896     EXTEND_RANGE_TABLE_WORK_AREA ((work_area), 2);			\
1897     (work_area).table[(work_area).used++] = (range_start);		\
1898     (work_area).table[(work_area).used++] = (range_end);		\
1899   } while (0)
1900 
1901 /* Free allocated memory for WORK_AREA.	 */
1902 #define FREE_RANGE_TABLE_WORK_AREA(work_area)	\
1903   do {						\
1904     if ((work_area).table)			\
1905       free ((work_area).table);			\
1906   } while (0)
1907 
1908 #define CLEAR_RANGE_TABLE_WORK_USED(work_area) ((work_area).used = 0, (work_area).bits = 0)
1909 #define RANGE_TABLE_WORK_USED(work_area) ((work_area).used)
1910 #define RANGE_TABLE_WORK_BITS(work_area) ((work_area).bits)
1911 #define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
1912 
1913 
1914 /* Set the bit for character C in a list.  */
1915 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
1916 
1917 
1918 /* Get the next unsigned number in the uncompiled pattern.  */
1919 #define GET_UNSIGNED_NUMBER(num)					\
1920  do { if (p != pend)							\
1921      {									\
1922        PATFETCH (c);							\
1923        while ('0' <= c && c <= '9')					\
1924 	 {								\
1925 	   if (num < 0)							\
1926 	      num = 0;							\
1927 	   num = num * 10 + c - '0';					\
1928 	   if (p == pend)						\
1929 	      break;							\
1930 	   PATFETCH (c);						\
1931 	 }								\
1932        }								\
1933     } while (0)
1934 
1935 #if WIDE_CHAR_SUPPORT
1936 /* The GNU C library provides support for user-defined character classes
1937    and the functions from ISO C amendement 1.  */
1938 # ifdef CHARCLASS_NAME_MAX
1939 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1940 # else
1941 /* This shouldn't happen but some implementation might still have this
1942    problem.  Use a reasonable default value.  */
1943 #  define CHAR_CLASS_MAX_LENGTH 256
1944 # endif
1945 typedef wctype_t re_wctype_t;
1946 typedef wchar_t re_wchar_t;
1947 # define re_wctype wctype
1948 # define re_iswctype iswctype
1949 # define re_wctype_to_bit(cc) 0
1950 #else
1951 # define CHAR_CLASS_MAX_LENGTH  9 /* Namely, `multibyte'.  */
1952 # define btowc(c) c
1953 
1954 /* Character classes.  */
1955 typedef enum { RECC_ERROR = 0,
1956 	       RECC_ALNUM, RECC_ALPHA, RECC_WORD,
1957 	       RECC_GRAPH, RECC_PRINT,
1958 	       RECC_LOWER, RECC_UPPER,
1959 	       RECC_PUNCT, RECC_CNTRL,
1960 	       RECC_DIGIT, RECC_XDIGIT,
1961 	       RECC_BLANK, RECC_SPACE,
1962 	       RECC_MULTIBYTE, RECC_NONASCII,
1963 	       RECC_ASCII, RECC_UNIBYTE
1964 } re_wctype_t;
1965 
1966 typedef int re_wchar_t;
1967 
1968 /* Map a string to the char class it names (if any).  */
1969 static re_wctype_t
re_wctype(str)1970 re_wctype (str)
1971      re_char *str;
1972 {
1973   const char *string = str;
1974   if      (STREQ (string, "alnum"))	return RECC_ALNUM;
1975   else if (STREQ (string, "alpha"))	return RECC_ALPHA;
1976   else if (STREQ (string, "word"))	return RECC_WORD;
1977   else if (STREQ (string, "ascii"))	return RECC_ASCII;
1978   else if (STREQ (string, "nonascii"))	return RECC_NONASCII;
1979   else if (STREQ (string, "graph"))	return RECC_GRAPH;
1980   else if (STREQ (string, "lower"))	return RECC_LOWER;
1981   else if (STREQ (string, "print"))	return RECC_PRINT;
1982   else if (STREQ (string, "punct"))	return RECC_PUNCT;
1983   else if (STREQ (string, "space"))	return RECC_SPACE;
1984   else if (STREQ (string, "upper"))	return RECC_UPPER;
1985   else if (STREQ (string, "unibyte"))	return RECC_UNIBYTE;
1986   else if (STREQ (string, "multibyte"))	return RECC_MULTIBYTE;
1987   else if (STREQ (string, "digit"))	return RECC_DIGIT;
1988   else if (STREQ (string, "xdigit"))	return RECC_XDIGIT;
1989   else if (STREQ (string, "cntrl"))	return RECC_CNTRL;
1990   else if (STREQ (string, "blank"))	return RECC_BLANK;
1991   else return 0;
1992 }
1993 
1994 /* True iff CH is in the char class CC.  */
1995 static boolean
re_iswctype(ch,cc)1996 re_iswctype (ch, cc)
1997      int ch;
1998      re_wctype_t cc;
1999 {
2000   switch (cc)
2001     {
2002     case RECC_ALNUM: return ISALNUM (ch);
2003     case RECC_ALPHA: return ISALPHA (ch);
2004     case RECC_BLANK: return ISBLANK (ch);
2005     case RECC_CNTRL: return ISCNTRL (ch);
2006     case RECC_DIGIT: return ISDIGIT (ch);
2007     case RECC_GRAPH: return ISGRAPH (ch);
2008     case RECC_LOWER: return ISLOWER (ch);
2009     case RECC_PRINT: return ISPRINT (ch);
2010     case RECC_PUNCT: return ISPUNCT (ch);
2011     case RECC_SPACE: return ISSPACE (ch);
2012     case RECC_UPPER: return ISUPPER (ch);
2013     case RECC_XDIGIT: return ISXDIGIT (ch);
2014     case RECC_ASCII: return IS_REAL_ASCII (ch);
2015     case RECC_NONASCII: return !IS_REAL_ASCII (ch);
2016     case RECC_UNIBYTE: return ISUNIBYTE (ch);
2017     case RECC_MULTIBYTE: return !ISUNIBYTE (ch);
2018     case RECC_WORD: return ISWORD (ch);
2019     case RECC_ERROR: return false;
2020     default:
2021       abort();
2022     }
2023 }
2024 
2025 /* Return a bit-pattern to use in the range-table bits to match multibyte
2026    chars of class CC.  */
2027 static int
re_wctype_to_bit(cc)2028 re_wctype_to_bit (cc)
2029      re_wctype_t cc;
2030 {
2031   switch (cc)
2032     {
2033     case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
2034     case RECC_MULTIBYTE: return BIT_MULTIBYTE;
2035     case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
2036     case RECC_LOWER: return BIT_LOWER;
2037     case RECC_UPPER: return BIT_UPPER;
2038     case RECC_PUNCT: return BIT_PUNCT;
2039     case RECC_SPACE: return BIT_SPACE;
2040     case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
2041     case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
2042     default:
2043       abort();
2044     }
2045 }
2046 #endif
2047 
2048 /* Explicit quit checking is only used on NTemacs.  */
2049 #if defined WINDOWSNT && defined emacs && defined QUIT
2050 extern int immediate_quit;
2051 # define IMMEDIATE_QUIT_CHECK			\
2052     do {					\
2053       if (immediate_quit) QUIT;			\
2054     } while (0)
2055 #else
2056 # define IMMEDIATE_QUIT_CHECK    ((void)0)
2057 #endif
2058 
2059 #ifndef MATCH_MAY_ALLOCATE
2060 
2061 /* If we cannot allocate large objects within re_match_2_internal,
2062    we make the fail stack and register vectors global.
2063    The fail stack, we grow to the maximum size when a regexp
2064    is compiled.
2065    The register vectors, we adjust in size each time we
2066    compile a regexp, according to the number of registers it needs.  */
2067 
2068 static fail_stack_type fail_stack;
2069 
2070 /* Size with which the following vectors are currently allocated.
2071    That is so we can make them bigger as needed,
2072    but never make them smaller.  */
2073 static int regs_allocated_size;
2074 
2075 static re_char **     regstart, **     regend;
2076 static re_char **best_regstart, **best_regend;
2077 
2078 /* Make the register vectors big enough for NUM_REGS registers,
2079    but don't make them smaller.  */
2080 
2081 static
regex_grow_registers(num_regs)2082 regex_grow_registers (num_regs)
2083      int num_regs;
2084 {
2085   if (num_regs > regs_allocated_size)
2086     {
2087       RETALLOC_IF (regstart,	 num_regs, re_char *);
2088       RETALLOC_IF (regend,	 num_regs, re_char *);
2089       RETALLOC_IF (best_regstart, num_regs, re_char *);
2090       RETALLOC_IF (best_regend,	 num_regs, re_char *);
2091 
2092       regs_allocated_size = num_regs;
2093     }
2094 }
2095 
2096 #endif /* not MATCH_MAY_ALLOCATE */
2097 
2098 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2099 						 compile_stack,
2100 						 regnum_t regnum));
2101 
2102 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2103    Returns one of error codes defined in `regex.h', or zero for success.
2104 
2105    Assumes the `allocated' (and perhaps `buffer') and `translate'
2106    fields are set in BUFP on entry.
2107 
2108    If it succeeds, results are put in BUFP (if it returns an error, the
2109    contents of BUFP are undefined):
2110      `buffer' is the compiled pattern;
2111      `syntax' is set to SYNTAX;
2112      `used' is set to the length of the compiled pattern;
2113      `fastmap_accurate' is zero;
2114      `re_nsub' is the number of subexpressions in PATTERN;
2115      `not_bol' and `not_eol' are zero;
2116 
2117    The `fastmap' field is neither examined nor set.  */
2118 
2119 /* Insert the `jump' from the end of last alternative to "here".
2120    The space for the jump has already been allocated. */
2121 #define FIXUP_ALT_JUMP()						\
2122 do {									\
2123   if (fixup_alt_jump)							\
2124     STORE_JUMP (jump, fixup_alt_jump, b);				\
2125 } while (0)
2126 
2127 
2128 /* Return, freeing storage we allocated.  */
2129 #define FREE_STACK_RETURN(value)		\
2130   do {							\
2131     FREE_RANGE_TABLE_WORK_AREA (range_table_work);	\
2132     free (compile_stack.stack);				\
2133     return value;					\
2134   } while (0)
2135 
2136 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)2137 regex_compile (pattern, size, syntax, bufp)
2138      re_char *pattern;
2139      size_t size;
2140      reg_syntax_t syntax;
2141      struct re_pattern_buffer *bufp;
2142 {
2143   /* We fetch characters from PATTERN here.  */
2144   register re_wchar_t c, c1;
2145 
2146   /* A random temporary spot in PATTERN.  */
2147   re_char *p1;
2148 
2149   /* Points to the end of the buffer, where we should append.  */
2150   register unsigned char *b;
2151 
2152   /* Keeps track of unclosed groups.  */
2153   compile_stack_type compile_stack;
2154 
2155   /* Points to the current (ending) position in the pattern.  */
2156 #ifdef AIX
2157   /* `const' makes AIX compiler fail.  */
2158   unsigned char *p = pattern;
2159 #else
2160   re_char *p = pattern;
2161 #endif
2162   re_char *pend = pattern + size;
2163 
2164   /* How to translate the characters in the pattern.  */
2165   RE_TRANSLATE_TYPE translate = bufp->translate;
2166 
2167   /* Address of the count-byte of the most recently inserted `exactn'
2168      command.  This makes it possible to tell if a new exact-match
2169      character can be added to that command or if the character requires
2170      a new `exactn' command.  */
2171   unsigned char *pending_exact = 0;
2172 
2173   /* Address of start of the most recently finished expression.
2174      This tells, e.g., postfix * where to find the start of its
2175      operand.  Reset at the beginning of groups and alternatives.  */
2176   unsigned char *laststart = 0;
2177 
2178   /* Address of beginning of regexp, or inside of last group.  */
2179   unsigned char *begalt;
2180 
2181   /* Place in the uncompiled pattern (i.e., the {) to
2182      which to go back if the interval is invalid.  */
2183   re_char *beg_interval;
2184 
2185   /* Address of the place where a forward jump should go to the end of
2186      the containing expression.	 Each alternative of an `or' -- except the
2187      last -- ends with a forward jump of this sort.  */
2188   unsigned char *fixup_alt_jump = 0;
2189 
2190   /* Counts open-groups as they are encountered.  Remembered for the
2191      matching close-group on the compile stack, so the same register
2192      number is put in the stop_memory as the start_memory.  */
2193   regnum_t regnum = 0;
2194 
2195   /* Work area for range table of charset.  */
2196   struct range_table_work_area range_table_work;
2197 
2198   /* If the object matched can contain multibyte characters.  */
2199   const boolean multibyte = RE_MULTIBYTE_P (bufp);
2200 
2201 #ifdef DEBUG
2202   debug++;
2203   DEBUG_PRINT1 ("\nCompiling pattern: ");
2204   if (debug > 0)
2205     {
2206       unsigned debug_count;
2207 
2208       for (debug_count = 0; debug_count < size; debug_count++)
2209 	putchar (pattern[debug_count]);
2210       putchar ('\n');
2211     }
2212 #endif /* DEBUG */
2213 
2214   /* Initialize the compile stack.  */
2215   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2216   if (compile_stack.stack == NULL)
2217     return REG_ESPACE;
2218 
2219   compile_stack.size = INIT_COMPILE_STACK_SIZE;
2220   compile_stack.avail = 0;
2221 
2222   range_table_work.table = 0;
2223   range_table_work.allocated = 0;
2224 
2225   /* Initialize the pattern buffer.  */
2226   bufp->syntax = syntax;
2227   bufp->fastmap_accurate = 0;
2228   bufp->not_bol = bufp->not_eol = 0;
2229 
2230   /* Set `used' to zero, so that if we return an error, the pattern
2231      printer (for debugging) will think there's no pattern.  We reset it
2232      at the end.  */
2233   bufp->used = 0;
2234 
2235   /* Always count groups, whether or not bufp->no_sub is set.  */
2236   bufp->re_nsub = 0;
2237 
2238 #if !defined emacs && !defined SYNTAX_TABLE
2239   /* Initialize the syntax table.  */
2240    init_syntax_once ();
2241 #endif
2242 
2243   if (bufp->allocated == 0)
2244     {
2245       if (bufp->buffer)
2246 	{ /* If zero allocated, but buffer is non-null, try to realloc
2247 	     enough space.  This loses if buffer's address is bogus, but
2248 	     that is the user's responsibility.	 */
2249 	  RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2250 	}
2251       else
2252 	{ /* Caller did not allocate a buffer.	Do it for them.	 */
2253 	  bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2254 	}
2255       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2256 
2257       bufp->allocated = INIT_BUF_SIZE;
2258     }
2259 
2260   begalt = b = bufp->buffer;
2261 
2262   /* Loop through the uncompiled pattern until we're at the end.  */
2263   while (p != pend)
2264     {
2265       PATFETCH (c);
2266 
2267       switch (c)
2268 	{
2269 	case '^':
2270 	  {
2271 	    if (   /* If at start of pattern, it's an operator.	 */
2272 		   p == pattern + 1
2273 		   /* If context independent, it's an operator.	 */
2274 		|| syntax & RE_CONTEXT_INDEP_ANCHORS
2275 		   /* Otherwise, depends on what's come before.	 */
2276 		|| at_begline_loc_p (pattern, p, syntax))
2277 	      BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? begbuf : begline);
2278 	    else
2279 	      goto normal_char;
2280 	  }
2281 	  break;
2282 
2283 
2284 	case '$':
2285 	  {
2286 	    if (   /* If at end of pattern, it's an operator.  */
2287 		   p == pend
2288 		   /* If context independent, it's an operator.	 */
2289 		|| syntax & RE_CONTEXT_INDEP_ANCHORS
2290 		   /* Otherwise, depends on what's next.  */
2291 		|| at_endline_loc_p (p, pend, syntax))
2292 	       BUF_PUSH ((syntax & RE_NO_NEWLINE_ANCHOR) ? endbuf : endline);
2293 	     else
2294 	       goto normal_char;
2295 	   }
2296 	   break;
2297 
2298 
2299 	case '+':
2300 	case '?':
2301 	  if ((syntax & RE_BK_PLUS_QM)
2302 	      || (syntax & RE_LIMITED_OPS))
2303 	    goto normal_char;
2304 	handle_plus:
2305 	case '*':
2306 	  /* If there is no previous pattern... */
2307 	  if (!laststart)
2308 	    {
2309 	      if (syntax & RE_CONTEXT_INVALID_OPS)
2310 		FREE_STACK_RETURN (REG_BADRPT);
2311 	      else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2312 		goto normal_char;
2313 	    }
2314 
2315 	  {
2316 	    /* 1 means zero (many) matches is allowed.	*/
2317 	    boolean zero_times_ok = 0, many_times_ok = 0;
2318 	    boolean greedy = 1;
2319 
2320 	    /* If there is a sequence of repetition chars, collapse it
2321 	       down to just one (the right one).  We can't combine
2322 	       interval operators with these because of, e.g., `a{2}*',
2323 	       which should only match an even number of `a's.	*/
2324 
2325 	    for (;;)
2326 	      {
2327 		if ((syntax & RE_FRUGAL)
2328 		    && c == '?' && (zero_times_ok || many_times_ok))
2329 		  greedy = 0;
2330 		else
2331 		  {
2332 		    zero_times_ok |= c != '+';
2333 		    many_times_ok |= c != '?';
2334 		  }
2335 
2336 		if (p == pend)
2337 		  break;
2338 		else if (*p == '*'
2339 			 || (!(syntax & RE_BK_PLUS_QM)
2340 			     && (*p == '+' || *p == '?')))
2341 		  ;
2342 		else if (syntax & RE_BK_PLUS_QM	 && *p == '\\')
2343 		  {
2344 		    if (p+1 == pend)
2345 		      FREE_STACK_RETURN (REG_EESCAPE);
2346 		    if (p[1] == '+' || p[1] == '?')
2347 		      PATFETCH (c); /* Gobble up the backslash.  */
2348 		    else
2349 		      break;
2350 		  }
2351 		else
2352 		  break;
2353 		/* If we get here, we found another repeat character.  */
2354 		PATFETCH (c);
2355 	       }
2356 
2357 	    /* Star, etc. applied to an empty pattern is equivalent
2358 	       to an empty pattern.  */
2359 	    if (!laststart || laststart == b)
2360 	      break;
2361 
2362 	    /* Now we know whether or not zero matches is allowed
2363 	       and also whether or not two or more matches is allowed.	*/
2364 	    if (greedy)
2365 	      {
2366 		if (many_times_ok)
2367 		  {
2368 		    boolean simple = skip_one_char (laststart) == b;
2369 		    unsigned int startoffset = 0;
2370 		    re_opcode_t ofj =
2371 		      /* Check if the loop can match the empty string.  */
2372 		      (simple || !analyse_first (laststart, b, NULL, 0)) ?
2373 		      on_failure_jump : on_failure_jump_loop;
2374 		    assert (skip_one_char (laststart) <= b);
2375 
2376 		    if (!zero_times_ok && simple)
2377 		      { /* Since simple * loops can be made faster by using
2378 		    	   on_failure_keep_string_jump, we turn simple P+
2379 		    	   into PP* if P is simple.  */
2380 		    	unsigned char *p1, *p2;
2381 		    	startoffset = b - laststart;
2382 		    	GET_BUFFER_SPACE (startoffset);
2383 		    	p1 = b; p2 = laststart;
2384 		    	while (p2 < p1)
2385 		    	  *b++ = *p2++;
2386 		    	zero_times_ok = 1;
2387 		      }
2388 
2389 		    GET_BUFFER_SPACE (6);
2390 		    if (!zero_times_ok)
2391 		      /* A + loop.  */
2392 		      STORE_JUMP (ofj, b, b + 6);
2393 		    else
2394 		      /* Simple * loops can use on_failure_keep_string_jump
2395 			 depending on what follows.  But since we don't know
2396 			 that yet, we leave the decision up to
2397 			 on_failure_jump_smart.  */
2398 		      INSERT_JUMP (simple ? on_failure_jump_smart : ofj,
2399 				   laststart + startoffset, b + 6);
2400 		    b += 3;
2401 		    STORE_JUMP (jump, b, laststart + startoffset);
2402 		    b += 3;
2403 		  }
2404 		else
2405 		  {
2406 		    /* A simple ? pattern.  */
2407 		    assert (zero_times_ok);
2408 		    GET_BUFFER_SPACE (3);
2409 		    INSERT_JUMP (on_failure_jump, laststart, b + 3);
2410 		    b += 3;
2411 		  }
2412 	      }
2413 	    else		/* not greedy */
2414 	      { /* I wish the greedy and non-greedy cases could be merged. */
2415 
2416 		GET_BUFFER_SPACE (7); /* We might use less.  */
2417 		if (many_times_ok)
2418 		  {
2419 		    boolean emptyp = analyse_first (laststart, b, NULL, 0);
2420 
2421 		    /* The non-greedy multiple match looks like a repeat..until:
2422 		       we only need a conditional jump at the end of the loop */
2423 		    if (emptyp) BUF_PUSH (no_op);
2424 		    STORE_JUMP (emptyp ? on_failure_jump_nastyloop
2425 				: on_failure_jump, b, laststart);
2426 		    b += 3;
2427 		    if (zero_times_ok)
2428 		      {
2429 			/* The repeat...until naturally matches one or more.
2430 			   To also match zero times, we need to first jump to
2431 			   the end of the loop (its conditional jump). */
2432 			INSERT_JUMP (jump, laststart, b);
2433 			b += 3;
2434 		      }
2435 		  }
2436 		else
2437 		  {
2438 		    /* non-greedy a?? */
2439 		    INSERT_JUMP (jump, laststart, b + 3);
2440 		    b += 3;
2441 		    INSERT_JUMP (on_failure_jump, laststart, laststart + 6);
2442 		    b += 3;
2443 		  }
2444 	      }
2445 	  }
2446 	  pending_exact = 0;
2447 	  break;
2448 
2449 
2450 	case '.':
2451 	  laststart = b;
2452 	  BUF_PUSH (anychar);
2453 	  break;
2454 
2455 
2456 	case '[':
2457 	  {
2458 	    CLEAR_RANGE_TABLE_WORK_USED (range_table_work);
2459 
2460 	    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2461 
2462 	    /* Ensure that we have enough space to push a charset: the
2463 	       opcode, the length count, and the bitset; 34 bytes in all.  */
2464 	    GET_BUFFER_SPACE (34);
2465 
2466 	    laststart = b;
2467 
2468 	    /* We test `*p == '^' twice, instead of using an if
2469 	       statement, so we only need one BUF_PUSH.	 */
2470 	    BUF_PUSH (*p == '^' ? charset_not : charset);
2471 	    if (*p == '^')
2472 	      p++;
2473 
2474 	    /* Remember the first position in the bracket expression.  */
2475 	    p1 = p;
2476 
2477 	    /* Push the number of bytes in the bitmap.	*/
2478 	    BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2479 
2480 	    /* Clear the whole map.  */
2481 	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2482 
2483 	    /* charset_not matches newline according to a syntax bit.  */
2484 	    if ((re_opcode_t) b[-2] == charset_not
2485 		&& (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2486 	      SET_LIST_BIT ('\n');
2487 
2488 	    /* Read in characters and ranges, setting map bits.	 */
2489 	    for (;;)
2490 	      {
2491 		boolean escaped_char = false;
2492 		const unsigned char *p2 = p;
2493 
2494 		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2495 
2496 		PATFETCH (c);
2497 
2498 		/* \ might escape characters inside [...] and [^...].  */
2499 		if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2500 		  {
2501 		    if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2502 
2503 		    PATFETCH (c);
2504 		    escaped_char = true;
2505 		  }
2506 		else
2507 		  {
2508 		    /* Could be the end of the bracket expression.	If it's
2509 		       not (i.e., when the bracket expression is `[]' so
2510 		       far), the ']' character bit gets set way below.  */
2511 		    if (c == ']' && p2 != p1)
2512 		      break;
2513 		  }
2514 
2515 		/* What should we do for the character which is
2516 		   greater than 0x7F, but not BASE_LEADING_CODE_P?
2517 		   XXX */
2518 
2519 		/* See if we're at the beginning of a possible character
2520 		   class.  */
2521 
2522 		if (!escaped_char &&
2523 		    syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2524 		  {
2525 		    /* Leave room for the null.	 */
2526 		    unsigned char str[CHAR_CLASS_MAX_LENGTH + 1];
2527 		    const unsigned char *class_beg;
2528 
2529 		    PATFETCH (c);
2530 		    c1 = 0;
2531 		    class_beg = p;
2532 
2533 		    /* If pattern is `[[:'.  */
2534 		    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2535 
2536 		    for (;;)
2537 		      {
2538 		        PATFETCH (c);
2539 		        if ((c == ':' && *p == ']') || p == pend)
2540 		          break;
2541 			if (c1 < CHAR_CLASS_MAX_LENGTH)
2542 			  str[c1++] = c;
2543 			else
2544 			  /* This is in any case an invalid class name.  */
2545 			  str[0] = '\0';
2546 		      }
2547 		    str[c1] = '\0';
2548 
2549 		    /* If isn't a word bracketed by `[:' and `:]':
2550 		       undo the ending character, the letters, and
2551 		       leave the leading `:' and `[' (but set bits for
2552 		       them).  */
2553 		    if (c == ':' && *p == ']')
2554 		      {
2555 			int ch;
2556 			re_wctype_t cc;
2557 
2558 			cc = re_wctype (str);
2559 
2560 			if (cc == 0)
2561 			  FREE_STACK_RETURN (REG_ECTYPE);
2562 
2563                         /* Throw away the ] at the end of the character
2564                            class.  */
2565                         PATFETCH (c);
2566 
2567                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2568 
2569 			/* Most character classes in a multibyte match
2570 			   just set a flag.  Exceptions are is_blank,
2571 			   is_digit, is_cntrl, and is_xdigit, since
2572 			   they can only match ASCII characters.  We
2573 			   don't need to handle them for multibyte.
2574 			   They are distinguished by a negative wctype.  */
2575 
2576 			if (multibyte)
2577 			  SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
2578 							 re_wctype_to_bit (cc));
2579 
2580                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2581 			  {
2582 			    int translated = TRANSLATE (ch);
2583 			    if (re_iswctype (btowc (ch), cc))
2584 			      SET_LIST_BIT (translated);
2585 			  }
2586 
2587 			/* Repeat the loop. */
2588 			continue;
2589 		      }
2590 		    else
2591 		      {
2592 			/* Go back to right after the "[:".  */
2593 			p = class_beg;
2594 			SET_LIST_BIT ('[');
2595 
2596 			/* Because the `:' may starts the range, we
2597 			   can't simply set bit and repeat the loop.
2598 			   Instead, just set it to C and handle below.	*/
2599 			c = ':';
2600 		      }
2601 		  }
2602 
2603 		if (p < pend && p[0] == '-' && p[1] != ']')
2604 		  {
2605 
2606 		    /* Discard the `-'. */
2607 		    PATFETCH (c1);
2608 
2609 		    /* Fetch the character which ends the range. */
2610 		    PATFETCH (c1);
2611 
2612 		    if (SINGLE_BYTE_CHAR_P (c))
2613 		      {
2614 			if (! SINGLE_BYTE_CHAR_P (c1))
2615 			  {
2616 			    /* Handle a range starting with a
2617 			       character of less than 256, and ending
2618 			       with a character of not less than 256.
2619 			       Split that into two ranges, the low one
2620 			       ending at 0377, and the high one
2621 			       starting at the smallest character in
2622 			       the charset of C1 and ending at C1.  */
2623 			    int charset = CHAR_CHARSET (c1);
2624 			    int c2 = MAKE_CHAR (charset, 0, 0);
2625 
2626 			    SET_RANGE_TABLE_WORK_AREA (range_table_work,
2627 						       c2, c1);
2628 			    c1 = 0377;
2629 			  }
2630 		      }
2631 		    else if (!SAME_CHARSET_P (c, c1))
2632 		      FREE_STACK_RETURN (REG_ERANGE);
2633 		  }
2634 		else
2635 		  /* Range from C to C. */
2636 		  c1 = c;
2637 
2638 		/* Set the range ... */
2639 		if (SINGLE_BYTE_CHAR_P (c))
2640 		  /* ... into bitmap.  */
2641 		  {
2642 		    re_wchar_t this_char;
2643 		    int range_start = c, range_end = c1;
2644 
2645 		    /* If the start is after the end, the range is empty.  */
2646 		    if (range_start > range_end)
2647 		      {
2648 			if (syntax & RE_NO_EMPTY_RANGES)
2649 			  FREE_STACK_RETURN (REG_ERANGE);
2650 			/* Else, repeat the loop.  */
2651 		      }
2652 		    else
2653 		      {
2654 			for (this_char = range_start; this_char <= range_end;
2655 			     this_char++)
2656 			  SET_LIST_BIT (TRANSLATE (this_char));
2657 		      }
2658 		  }
2659 		else
2660 		  /* ... into range table.  */
2661 		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
2662 	      }
2663 
2664 	    /* Discard any (non)matching list bytes that are all 0 at the
2665 	       end of the map.	Decrease the map-length byte too.  */
2666 	    while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2667 	      b[-1]--;
2668 	    b += b[-1];
2669 
2670 	    /* Build real range table from work area.  */
2671 	    if (RANGE_TABLE_WORK_USED (range_table_work)
2672 		|| RANGE_TABLE_WORK_BITS (range_table_work))
2673 	      {
2674 		int i;
2675 		int used = RANGE_TABLE_WORK_USED (range_table_work);
2676 
2677 		/* Allocate space for COUNT + RANGE_TABLE.  Needs two
2678 		   bytes for flags, two for COUNT, and three bytes for
2679 		   each character. */
2680 		GET_BUFFER_SPACE (4 + used * 3);
2681 
2682 		/* Indicate the existence of range table.  */
2683 		laststart[1] |= 0x80;
2684 
2685 		/* Store the character class flag bits into the range table.
2686 		   If not in emacs, these flag bits are always 0.  */
2687 		*b++ = RANGE_TABLE_WORK_BITS (range_table_work) & 0xff;
2688 		*b++ = RANGE_TABLE_WORK_BITS (range_table_work) >> 8;
2689 
2690 		STORE_NUMBER_AND_INCR (b, used / 2);
2691 		for (i = 0; i < used; i++)
2692 		  STORE_CHARACTER_AND_INCR
2693 		    (b, RANGE_TABLE_WORK_ELT (range_table_work, i));
2694 	      }
2695 	  }
2696 	  break;
2697 
2698 
2699 	case '(':
2700 	  if (syntax & RE_NO_BK_PARENS)
2701 	    goto handle_open;
2702 	  else
2703 	    goto normal_char;
2704 
2705 
2706 	case ')':
2707 	  if (syntax & RE_NO_BK_PARENS)
2708 	    goto handle_close;
2709 	  else
2710 	    goto normal_char;
2711 
2712 
2713 	case '\n':
2714 	  if (syntax & RE_NEWLINE_ALT)
2715 	    goto handle_alt;
2716 	  else
2717 	    goto normal_char;
2718 
2719 
2720 	case '|':
2721 	  if (syntax & RE_NO_BK_VBAR)
2722 	    goto handle_alt;
2723 	  else
2724 	    goto normal_char;
2725 
2726 
2727 	case '{':
2728 	   if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2729 	     goto handle_interval;
2730 	   else
2731 	     goto normal_char;
2732 
2733 
2734 	case '\\':
2735 	  if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2736 
2737 	  /* Do not translate the character after the \, so that we can
2738 	     distinguish, e.g., \B from \b, even if we normally would
2739 	     translate, e.g., B to b.  */
2740 	  PATFETCH_RAW (c);
2741 
2742 	  switch (c)
2743 	    {
2744 	    case '(':
2745 	      if (syntax & RE_NO_BK_PARENS)
2746 		goto normal_backslash;
2747 
2748 	    handle_open:
2749 	      {
2750 		int shy = 0;
2751 		if (p+1 < pend)
2752 		  {
2753 		    /* Look for a special (?...) construct */
2754 		    if ((syntax & RE_SHY_GROUPS) && *p == '?')
2755 		      {
2756 			PATFETCH (c); /* Gobble up the '?'.  */
2757 			PATFETCH (c);
2758 			switch (c)
2759 			  {
2760 			  case ':': shy = 1; break;
2761 			  default:
2762 			    /* Only (?:...) is supported right now. */
2763 			    FREE_STACK_RETURN (REG_BADPAT);
2764 			  }
2765 		      }
2766 		  }
2767 
2768 		if (!shy)
2769 		  {
2770 		    bufp->re_nsub++;
2771 		    regnum++;
2772 		  }
2773 
2774 		if (COMPILE_STACK_FULL)
2775 		  {
2776 		    RETALLOC (compile_stack.stack, compile_stack.size << 1,
2777 			      compile_stack_elt_t);
2778 		    if (compile_stack.stack == NULL) return REG_ESPACE;
2779 
2780 		    compile_stack.size <<= 1;
2781 		  }
2782 
2783 		/* These are the values to restore when we hit end of this
2784 		   group.	 They are all relative offsets, so that if the
2785 		   whole pattern moves because of realloc, they will still
2786 		   be valid.  */
2787 		COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2788 		COMPILE_STACK_TOP.fixup_alt_jump
2789 		  = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2790 		COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2791 		COMPILE_STACK_TOP.regnum = shy ? -regnum : regnum;
2792 
2793 		/* Do not push a
2794 		   start_memory for groups beyond the last one we can
2795 		   represent in the compiled pattern.  */
2796 		if (regnum <= MAX_REGNUM && !shy)
2797 		  BUF_PUSH_2 (start_memory, regnum);
2798 
2799 		compile_stack.avail++;
2800 
2801 		fixup_alt_jump = 0;
2802 		laststart = 0;
2803 		begalt = b;
2804 		/* If we've reached MAX_REGNUM groups, then this open
2805 		   won't actually generate any code, so we'll have to
2806 		   clear pending_exact explicitly.  */
2807 		pending_exact = 0;
2808 		break;
2809 	      }
2810 
2811 	    case ')':
2812 	      if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2813 
2814 	      if (COMPILE_STACK_EMPTY)
2815 		{
2816 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2817 		    goto normal_backslash;
2818 		  else
2819 		    FREE_STACK_RETURN (REG_ERPAREN);
2820 		}
2821 
2822 	    handle_close:
2823 	      FIXUP_ALT_JUMP ();
2824 
2825 	      /* See similar code for backslashed left paren above.  */
2826 	      if (COMPILE_STACK_EMPTY)
2827 		{
2828 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2829 		    goto normal_char;
2830 		  else
2831 		    FREE_STACK_RETURN (REG_ERPAREN);
2832 		}
2833 
2834 	      /* Since we just checked for an empty stack above, this
2835 		 ``can't happen''.  */
2836 	      assert (compile_stack.avail != 0);
2837 	      {
2838 		/* We don't just want to restore into `regnum', because
2839 		   later groups should continue to be numbered higher,
2840 		   as in `(ab)c(de)' -- the second group is #2.	 */
2841 		regnum_t this_group_regnum;
2842 
2843 		compile_stack.avail--;
2844 		begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2845 		fixup_alt_jump
2846 		  = COMPILE_STACK_TOP.fixup_alt_jump
2847 		    ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2848 		    : 0;
2849 		laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2850 		this_group_regnum = COMPILE_STACK_TOP.regnum;
2851 		/* If we've reached MAX_REGNUM groups, then this open
2852 		   won't actually generate any code, so we'll have to
2853 		   clear pending_exact explicitly.  */
2854 		pending_exact = 0;
2855 
2856 		/* We're at the end of the group, so now we know how many
2857 		   groups were inside this one.	 */
2858 		if (this_group_regnum <= MAX_REGNUM && this_group_regnum > 0)
2859 		  BUF_PUSH_2 (stop_memory, this_group_regnum);
2860 	      }
2861 	      break;
2862 
2863 
2864 	    case '|':					/* `\|'.  */
2865 	      if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2866 		goto normal_backslash;
2867 	    handle_alt:
2868 	      if (syntax & RE_LIMITED_OPS)
2869 		goto normal_char;
2870 
2871 	      /* Insert before the previous alternative a jump which
2872 		 jumps to this alternative if the former fails.	 */
2873 	      GET_BUFFER_SPACE (3);
2874 	      INSERT_JUMP (on_failure_jump, begalt, b + 6);
2875 	      pending_exact = 0;
2876 	      b += 3;
2877 
2878 	      /* The alternative before this one has a jump after it
2879 		 which gets executed if it gets matched.  Adjust that
2880 		 jump so it will jump to this alternative's analogous
2881 		 jump (put in below, which in turn will jump to the next
2882 		 (if any) alternative's such jump, etc.).  The last such
2883 		 jump jumps to the correct final destination.  A picture:
2884 			  _____ _____
2885 			  |   | |   |
2886 			  |   v |   v
2887 			 a | b	 | c
2888 
2889 		 If we are at `b', then fixup_alt_jump right now points to a
2890 		 three-byte space after `a'.  We'll put in the jump, set
2891 		 fixup_alt_jump to right after `b', and leave behind three
2892 		 bytes which we'll fill in when we get to after `c'.  */
2893 
2894 	      FIXUP_ALT_JUMP ();
2895 
2896 	      /* Mark and leave space for a jump after this alternative,
2897 		 to be filled in later either by next alternative or
2898 		 when know we're at the end of a series of alternatives.  */
2899 	      fixup_alt_jump = b;
2900 	      GET_BUFFER_SPACE (3);
2901 	      b += 3;
2902 
2903 	      laststart = 0;
2904 	      begalt = b;
2905 	      break;
2906 
2907 
2908 	    case '{':
2909 	      /* If \{ is a literal.  */
2910 	      if (!(syntax & RE_INTERVALS)
2911 		     /* If we're at `\{' and it's not the open-interval
2912 			operator.  */
2913 		  || (syntax & RE_NO_BK_BRACES))
2914 		goto normal_backslash;
2915 
2916 	    handle_interval:
2917 	      {
2918 		/* If got here, then the syntax allows intervals.  */
2919 
2920 		/* At least (most) this many matches must be made.  */
2921 		int lower_bound = 0, upper_bound = -1;
2922 
2923 		beg_interval = p;
2924 
2925 		if (p == pend)
2926 		  FREE_STACK_RETURN (REG_EBRACE);
2927 
2928 		GET_UNSIGNED_NUMBER (lower_bound);
2929 
2930 		if (c == ',')
2931 		  GET_UNSIGNED_NUMBER (upper_bound);
2932 		else
2933 		  /* Interval such as `{1}' => match exactly once. */
2934 		  upper_bound = lower_bound;
2935 
2936 		if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2937 		    || (upper_bound >= 0 && lower_bound > upper_bound))
2938 		  FREE_STACK_RETURN (REG_BADBR);
2939 
2940 		if (!(syntax & RE_NO_BK_BRACES))
2941 		  {
2942 		    if (c != '\\')
2943 		      FREE_STACK_RETURN (REG_BADBR);
2944 
2945 		    PATFETCH (c);
2946 		  }
2947 
2948 		if (c != '}')
2949 		  FREE_STACK_RETURN (REG_BADBR);
2950 
2951 		/* We just parsed a valid interval.  */
2952 
2953 		/* If it's invalid to have no preceding re.  */
2954 		if (!laststart)
2955 		  {
2956 		    if (syntax & RE_CONTEXT_INVALID_OPS)
2957 		      FREE_STACK_RETURN (REG_BADRPT);
2958 		    else if (syntax & RE_CONTEXT_INDEP_OPS)
2959 		      laststart = b;
2960 		    else
2961 		      goto unfetch_interval;
2962 		  }
2963 
2964 		 if (upper_bound == 0)
2965 		   /* If the upper bound is zero, just drop the sub pattern
2966 		      altogether.  */
2967 		   b = laststart;
2968 		 else if (lower_bound == 1 && upper_bound == 1)
2969 		   /* Just match it once: nothing to do here.  */
2970 		   ;
2971 
2972 		 /* Otherwise, we have a nontrivial interval.  When
2973 		    we're all done, the pattern will look like:
2974 		      set_number_at <jump count> <upper bound>
2975 		      set_number_at <succeed_n count> <lower bound>
2976 		      succeed_n <after jump addr> <succeed_n count>
2977 		      <body of loop>
2978 		      jump_n <succeed_n addr> <jump count>
2979 		    (The upper bound and `jump_n' are omitted if
2980 		    `upper_bound' is 1, though.)  */
2981 		 else
2982 		   { /* If the upper bound is > 1, we need to insert
2983 			more at the end of the loop.  */
2984 		     unsigned int nbytes = (upper_bound < 0 ? 3
2985 					    : upper_bound > 1 ? 5 : 0);
2986 		     unsigned int startoffset = 0;
2987 
2988 		     GET_BUFFER_SPACE (20); /* We might use less.  */
2989 
2990 		     if (lower_bound == 0)
2991 		       {
2992 			 /* A succeed_n that starts with 0 is really a
2993 			    a simple on_failure_jump_loop.  */
2994 			 INSERT_JUMP (on_failure_jump_loop, laststart,
2995 				      b + 3 + nbytes);
2996 			 b += 3;
2997 		       }
2998 		     else
2999 		       {
3000 			 /* Initialize lower bound of the `succeed_n', even
3001 			    though it will be set during matching by its
3002 			    attendant `set_number_at' (inserted next),
3003 			    because `re_compile_fastmap' needs to know.
3004 			    Jump to the `jump_n' we might insert below.  */
3005 			 INSERT_JUMP2 (succeed_n, laststart,
3006 				       b + 5 + nbytes,
3007 				       lower_bound);
3008 			 b += 5;
3009 
3010 			 /* Code to initialize the lower bound.  Insert
3011 			    before the `succeed_n'.	 The `5' is the last two
3012 			    bytes of this `set_number_at', plus 3 bytes of
3013 			    the following `succeed_n'.  */
3014 			 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
3015 			 b += 5;
3016 			 startoffset += 5;
3017 		       }
3018 
3019 		     if (upper_bound < 0)
3020 		       {
3021 			 /* A negative upper bound stands for infinity,
3022 			    in which case it degenerates to a plain jump.  */
3023 			 STORE_JUMP (jump, b, laststart + startoffset);
3024 			 b += 3;
3025 		       }
3026 		     else if (upper_bound > 1)
3027 		       { /* More than one repetition is allowed, so
3028 			    append a backward jump to the `succeed_n'
3029 			    that starts this interval.
3030 
3031 			    When we've reached this during matching,
3032 			    we'll have matched the interval once, so
3033 			    jump back only `upper_bound - 1' times.  */
3034 			 STORE_JUMP2 (jump_n, b, laststart + startoffset,
3035 				      upper_bound - 1);
3036 			 b += 5;
3037 
3038 			 /* The location we want to set is the second
3039 			    parameter of the `jump_n'; that is `b-2' as
3040 			    an absolute address.  `laststart' will be
3041 			    the `set_number_at' we're about to insert;
3042 			    `laststart+3' the number to set, the source
3043 			    for the relative address.  But we are
3044 			    inserting into the middle of the pattern --
3045 			    so everything is getting moved up by 5.
3046 			    Conclusion: (b - 2) - (laststart + 3) + 5,
3047 			    i.e., b - laststart.
3048 
3049 			    We insert this at the beginning of the loop
3050 			    so that if we fail during matching, we'll
3051 			    reinitialize the bounds.  */
3052 			 insert_op2 (set_number_at, laststart, b - laststart,
3053 				     upper_bound - 1, b);
3054 			 b += 5;
3055 		       }
3056 		   }
3057 		pending_exact = 0;
3058 		beg_interval = NULL;
3059 	      }
3060 	      break;
3061 
3062 	    unfetch_interval:
3063 	      /* If an invalid interval, match the characters as literals.  */
3064 	       assert (beg_interval);
3065 	       p = beg_interval;
3066 	       beg_interval = NULL;
3067 
3068 	       /* normal_char and normal_backslash need `c'.  */
3069 	       c = '{';
3070 
3071 	       if (!(syntax & RE_NO_BK_BRACES))
3072 		 {
3073 		   assert (p > pattern && p[-1] == '\\');
3074 		   goto normal_backslash;
3075 		 }
3076 	       else
3077 		 goto normal_char;
3078 
3079 #ifdef emacs
3080 	    /* There is no way to specify the before_dot and after_dot
3081 	       operators.  rms says this is ok.	 --karl	 */
3082 	    case '=':
3083 	      BUF_PUSH (at_dot);
3084 	      break;
3085 
3086 	    case 's':
3087 	      laststart = b;
3088 	      PATFETCH (c);
3089 	      BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3090 	      break;
3091 
3092 	    case 'S':
3093 	      laststart = b;
3094 	      PATFETCH (c);
3095 	      BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3096 	      break;
3097 
3098 	    case 'c':
3099 	      laststart = b;
3100 	      PATFETCH_RAW (c);
3101 	      BUF_PUSH_2 (categoryspec, c);
3102 	      break;
3103 
3104 	    case 'C':
3105 	      laststart = b;
3106 	      PATFETCH_RAW (c);
3107 	      BUF_PUSH_2 (notcategoryspec, c);
3108 	      break;
3109 #endif /* emacs */
3110 
3111 
3112 	    case 'w':
3113 	      if (syntax & RE_NO_GNU_OPS)
3114 		goto normal_char;
3115 	      laststart = b;
3116 	      BUF_PUSH_2 (syntaxspec, Sword);
3117 	      break;
3118 
3119 
3120 	    case 'W':
3121 	      if (syntax & RE_NO_GNU_OPS)
3122 		goto normal_char;
3123 	      laststart = b;
3124 	      BUF_PUSH_2 (notsyntaxspec, Sword);
3125 	      break;
3126 
3127 
3128 	    case '<':
3129 	      if (syntax & RE_NO_GNU_OPS)
3130 		goto normal_char;
3131 	      BUF_PUSH (wordbeg);
3132 	      break;
3133 
3134 	    case '>':
3135 	      if (syntax & RE_NO_GNU_OPS)
3136 		goto normal_char;
3137 	      BUF_PUSH (wordend);
3138 	      break;
3139 
3140 	    case 'b':
3141 	      if (syntax & RE_NO_GNU_OPS)
3142 		goto normal_char;
3143 	      BUF_PUSH (wordbound);
3144 	      break;
3145 
3146 	    case 'B':
3147 	      if (syntax & RE_NO_GNU_OPS)
3148 		goto normal_char;
3149 	      BUF_PUSH (notwordbound);
3150 	      break;
3151 
3152 	    case '`':
3153 	      if (syntax & RE_NO_GNU_OPS)
3154 		goto normal_char;
3155 	      BUF_PUSH (begbuf);
3156 	      break;
3157 
3158 	    case '\'':
3159 	      if (syntax & RE_NO_GNU_OPS)
3160 		goto normal_char;
3161 	      BUF_PUSH (endbuf);
3162 	      break;
3163 
3164 	    case '1': case '2': case '3': case '4': case '5':
3165 	    case '6': case '7': case '8': case '9':
3166 	      {
3167 		regnum_t reg;
3168 
3169 		if (syntax & RE_NO_BK_REFS)
3170 		  goto normal_backslash;
3171 
3172 		reg = c - '0';
3173 
3174 		/* Can't back reference to a subexpression before its end.  */
3175 		if (reg > regnum || group_in_compile_stack (compile_stack, reg))
3176 		  FREE_STACK_RETURN (REG_ESUBREG);
3177 
3178 		laststart = b;
3179 		BUF_PUSH_2 (duplicate, reg);
3180 	      }
3181 	      break;
3182 
3183 
3184 	    case '+':
3185 	    case '?':
3186 	      if (syntax & RE_BK_PLUS_QM)
3187 		goto handle_plus;
3188 	      else
3189 		goto normal_backslash;
3190 
3191 	    default:
3192 	    normal_backslash:
3193 	      /* You might think it would be useful for \ to mean
3194 		 not to translate; but if we don't translate it
3195 		 it will never match anything.  */
3196 	      c = TRANSLATE (c);
3197 	      goto normal_char;
3198 	    }
3199 	  break;
3200 
3201 
3202 	default:
3203 	/* Expects the character in `c'.  */
3204 	normal_char:
3205 	      /* If no exactn currently being built.  */
3206 	  if (!pending_exact
3207 
3208 	      /* If last exactn not at current position.  */
3209 	      || pending_exact + *pending_exact + 1 != b
3210 
3211 	      /* We have only one byte following the exactn for the count.  */
3212 	      || *pending_exact >= (1 << BYTEWIDTH) - MAX_MULTIBYTE_LENGTH
3213 
3214 	      /* If followed by a repetition operator.	*/
3215 	      || (p != pend && (*p == '*' || *p == '^'))
3216 	      || ((syntax & RE_BK_PLUS_QM)
3217 		  ? p + 1 < pend && *p == '\\' && (p[1] == '+' || p[1] == '?')
3218 		  : p != pend && (*p == '+' || *p == '?'))
3219 	      || ((syntax & RE_INTERVALS)
3220 		  && ((syntax & RE_NO_BK_BRACES)
3221 		      ? p != pend && *p == '{'
3222 		      : p + 1 < pend && p[0] == '\\' && p[1] == '{')))
3223 	    {
3224 	      /* Start building a new exactn.  */
3225 
3226 	      laststart = b;
3227 
3228 	      BUF_PUSH_2 (exactn, 0);
3229 	      pending_exact = b - 1;
3230 	    }
3231 
3232 	  GET_BUFFER_SPACE (MAX_MULTIBYTE_LENGTH);
3233 	  {
3234 	    int len;
3235 
3236 	    if (multibyte)
3237 	      len = CHAR_STRING (c, b);
3238 	    else
3239 	      *b = c, len = 1;
3240 	    b += len;
3241 	    (*pending_exact) += len;
3242 	  }
3243 
3244 	  break;
3245 	} /* switch (c) */
3246     } /* while p != pend */
3247 
3248 
3249   /* Through the pattern now.  */
3250 
3251   FIXUP_ALT_JUMP ();
3252 
3253   if (!COMPILE_STACK_EMPTY)
3254     FREE_STACK_RETURN (REG_EPAREN);
3255 
3256   /* If we don't want backtracking, force success
3257      the first time we reach the end of the compiled pattern.  */
3258   if (syntax & RE_NO_POSIX_BACKTRACKING)
3259     BUF_PUSH (succeed);
3260 
3261   free (compile_stack.stack);
3262 
3263   /* We have succeeded; set the length of the buffer.  */
3264   bufp->used = b - bufp->buffer;
3265 
3266 #ifdef DEBUG
3267   if (debug > 0)
3268     {
3269       re_compile_fastmap (bufp);
3270       DEBUG_PRINT1 ("\nCompiled pattern: \n");
3271       print_compiled_pattern (bufp);
3272     }
3273   debug--;
3274 #endif /* DEBUG */
3275 
3276 #ifndef MATCH_MAY_ALLOCATE
3277   /* Initialize the failure stack to the largest possible stack.  This
3278      isn't necessary unless we're trying to avoid calling alloca in
3279      the search and match routines.  */
3280   {
3281     int num_regs = bufp->re_nsub + 1;
3282 
3283     if (fail_stack.size < re_max_failures * TYPICAL_FAILURE_SIZE)
3284       {
3285 	fail_stack.size = re_max_failures * TYPICAL_FAILURE_SIZE;
3286 
3287 	if (! fail_stack.stack)
3288 	  fail_stack.stack
3289 	    = (fail_stack_elt_t *) malloc (fail_stack.size
3290 					   * sizeof (fail_stack_elt_t));
3291 	else
3292 	  fail_stack.stack
3293 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
3294 					    (fail_stack.size
3295 					     * sizeof (fail_stack_elt_t)));
3296       }
3297 
3298     regex_grow_registers (num_regs);
3299   }
3300 #endif /* not MATCH_MAY_ALLOCATE */
3301 
3302   return REG_NOERROR;
3303 } /* regex_compile */
3304 
3305 /* Subroutines for `regex_compile'.  */
3306 
3307 /* Store OP at LOC followed by two-byte integer parameter ARG.	*/
3308 
3309 static void
store_op1(op,loc,arg)3310 store_op1 (op, loc, arg)
3311     re_opcode_t op;
3312     unsigned char *loc;
3313     int arg;
3314 {
3315   *loc = (unsigned char) op;
3316   STORE_NUMBER (loc + 1, arg);
3317 }
3318 
3319 
3320 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3321 
3322 static void
store_op2(op,loc,arg1,arg2)3323 store_op2 (op, loc, arg1, arg2)
3324     re_opcode_t op;
3325     unsigned char *loc;
3326     int arg1, arg2;
3327 {
3328   *loc = (unsigned char) op;
3329   STORE_NUMBER (loc + 1, arg1);
3330   STORE_NUMBER (loc + 3, arg2);
3331 }
3332 
3333 
3334 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3335    for OP followed by two-byte integer parameter ARG.  */
3336 
3337 static void
insert_op1(op,loc,arg,end)3338 insert_op1 (op, loc, arg, end)
3339     re_opcode_t op;
3340     unsigned char *loc;
3341     int arg;
3342     unsigned char *end;
3343 {
3344   register unsigned char *pfrom = end;
3345   register unsigned char *pto = end + 3;
3346 
3347   while (pfrom != loc)
3348     *--pto = *--pfrom;
3349 
3350   store_op1 (op, loc, arg);
3351 }
3352 
3353 
3354 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3355 
3356 static void
insert_op2(op,loc,arg1,arg2,end)3357 insert_op2 (op, loc, arg1, arg2, end)
3358     re_opcode_t op;
3359     unsigned char *loc;
3360     int arg1, arg2;
3361     unsigned char *end;
3362 {
3363   register unsigned char *pfrom = end;
3364   register unsigned char *pto = end + 5;
3365 
3366   while (pfrom != loc)
3367     *--pto = *--pfrom;
3368 
3369   store_op2 (op, loc, arg1, arg2);
3370 }
3371 
3372 
3373 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3374    after an alternative or a begin-subexpression.  We assume there is at
3375    least one character before the ^.  */
3376 
3377 static boolean
at_begline_loc_p(pattern,p,syntax)3378 at_begline_loc_p (pattern, p, syntax)
3379     re_char *pattern, *p;
3380     reg_syntax_t syntax;
3381 {
3382   re_char *prev = p - 2;
3383   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3384 
3385   return
3386        /* After a subexpression?  */
3387        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3388        /* After an alternative?	 */
3389     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash))
3390        /* After a shy subexpression?  */
3391     || ((syntax & RE_SHY_GROUPS) && prev - 2 >= pattern
3392 	&& prev[-1] == '?' && prev[-2] == '('
3393 	&& (syntax & RE_NO_BK_PARENS
3394 	    || (prev - 3 >= pattern && prev[-3] == '\\')));
3395 }
3396 
3397 
3398 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3399    at least one character after the $, i.e., `P < PEND'.  */
3400 
3401 static boolean
at_endline_loc_p(p,pend,syntax)3402 at_endline_loc_p (p, pend, syntax)
3403     re_char *p, *pend;
3404     reg_syntax_t syntax;
3405 {
3406   re_char *next = p;
3407   boolean next_backslash = *next == '\\';
3408   re_char *next_next = p + 1 < pend ? p + 1 : 0;
3409 
3410   return
3411        /* Before a subexpression?  */
3412        (syntax & RE_NO_BK_PARENS ? *next == ')'
3413 	: next_backslash && next_next && *next_next == ')')
3414        /* Before an alternative?  */
3415     || (syntax & RE_NO_BK_VBAR ? *next == '|'
3416 	: next_backslash && next_next && *next_next == '|');
3417 }
3418 
3419 
3420 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3421    false if it's not.  */
3422 
3423 static boolean
group_in_compile_stack(compile_stack,regnum)3424 group_in_compile_stack (compile_stack, regnum)
3425     compile_stack_type compile_stack;
3426     regnum_t regnum;
3427 {
3428   int this_element;
3429 
3430   for (this_element = compile_stack.avail - 1;
3431        this_element >= 0;
3432        this_element--)
3433     if (compile_stack.stack[this_element].regnum == regnum)
3434       return true;
3435 
3436   return false;
3437 }
3438 
3439 /* analyse_first.
3440    If fastmap is non-NULL, go through the pattern and fill fastmap
3441    with all the possible leading chars.  If fastmap is NULL, don't
3442    bother filling it up (obviously) and only return whether the
3443    pattern could potentially match the empty string.
3444 
3445    Return 1  if p..pend might match the empty string.
3446    Return 0  if p..pend matches at least one char.
3447    Return -1 if fastmap was not updated accurately.  */
3448 
3449 static int
analyse_first(p,pend,fastmap,multibyte)3450 analyse_first (p, pend, fastmap, multibyte)
3451      re_char *p, *pend;
3452      char *fastmap;
3453      const int multibyte;
3454 {
3455   int j, k;
3456   boolean not;
3457 
3458   /* If all elements for base leading-codes in fastmap is set, this
3459      flag is set true.	*/
3460   boolean match_any_multibyte_characters = false;
3461 
3462   assert (p);
3463 
3464   /* The loop below works as follows:
3465      - It has a working-list kept in the PATTERN_STACK and which basically
3466        starts by only containing a pointer to the first operation.
3467      - If the opcode we're looking at is a match against some set of
3468        chars, then we add those chars to the fastmap and go on to the
3469        next work element from the worklist (done via `break').
3470      - If the opcode is a control operator on the other hand, we either
3471        ignore it (if it's meaningless at this point, such as `start_memory')
3472        or execute it (if it's a jump).  If the jump has several destinations
3473        (i.e. `on_failure_jump'), then we push the other destination onto the
3474        worklist.
3475      We guarantee termination by ignoring backward jumps (more or less),
3476      so that `p' is monotonically increasing.  More to the point, we
3477      never set `p' (or push) anything `<= p1'.  */
3478 
3479   while (p < pend)
3480     {
3481       /* `p1' is used as a marker of how far back a `on_failure_jump'
3482 	 can go without being ignored.  It is normally equal to `p'
3483 	 (which prevents any backward `on_failure_jump') except right
3484 	 after a plain `jump', to allow patterns such as:
3485 	    0: jump 10
3486 	    3..9: <body>
3487 	    10: on_failure_jump 3
3488 	 as used for the *? operator.  */
3489       re_char *p1 = p;
3490 
3491       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3492 	{
3493 	case succeed:
3494 	  return 1;
3495 	  continue;
3496 
3497 	case duplicate:
3498 	  /* If the first character has to match a backreference, that means
3499 	     that the group was empty (since it already matched).  Since this
3500 	     is the only case that interests us here, we can assume that the
3501 	     backreference must match the empty string.  */
3502 	  p++;
3503 	  continue;
3504 
3505 
3506       /* Following are the cases which match a character.  These end
3507 	 with `break'.	*/
3508 
3509 	case exactn:
3510 	  if (fastmap)
3511 	    {
3512 	      int c = RE_STRING_CHAR (p + 1, pend - p);
3513 
3514 	      if (SINGLE_BYTE_CHAR_P (c))
3515 		fastmap[c] = 1;
3516 	      else
3517 		fastmap[p[1]] = 1;
3518 	    }
3519 	  break;
3520 
3521 
3522 	case anychar:
3523 	  /* We could put all the chars except for \n (and maybe \0)
3524 	     but we don't bother since it is generally not worth it.  */
3525 	  if (!fastmap) break;
3526 	  return -1;
3527 
3528 
3529 	case charset_not:
3530 	  /* Chars beyond end of bitmap are possible matches.
3531 	     All the single-byte codes can occur in multibyte buffers.
3532 	     So any that are not listed in the charset
3533 	     are possible matches, even in multibyte buffers.  */
3534 	  if (!fastmap) break;
3535 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
3536 	       j < (1 << BYTEWIDTH); j++)
3537 	    fastmap[j] = 1;
3538 	  /* Fallthrough */
3539 	case charset:
3540 	  if (!fastmap) break;
3541 	  not = (re_opcode_t) *(p - 1) == charset_not;
3542 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
3543 	       j >= 0; j--)
3544 	    if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
3545 	      fastmap[j] = 1;
3546 
3547 	  if ((not && multibyte)
3548 	      /* Any character set can possibly contain a character
3549 		 which doesn't match the specified set of characters.  */
3550 	      || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3551 		  && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
3552 	    /* If we can match a character class, we can match
3553 	       any character set.  */
3554 	    {
3555 	    set_fastmap_for_multibyte_characters:
3556 	      if (match_any_multibyte_characters == false)
3557 		{
3558 		  for (j = 0x80; j < 0xA0; j++)	/* XXX */
3559 		    if (BASE_LEADING_CODE_P (j))
3560 		      fastmap[j] = 1;
3561 		  match_any_multibyte_characters = true;
3562 		}
3563 	    }
3564 
3565 	  else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
3566 		   && match_any_multibyte_characters == false)
3567 	    {
3568 	      /* Set fastmap[I] 1 where I is a base leading code of each
3569 		 multibyte character in the range table. */
3570 	      int c, count;
3571 
3572 	      /* Make P points the range table.  `+ 2' is to skip flag
3573 		 bits for a character class.  */
3574 	      p += CHARSET_BITMAP_SIZE (&p[-2]) + 2;
3575 
3576 	      /* Extract the number of ranges in range table into COUNT.  */
3577 	      EXTRACT_NUMBER_AND_INCR (count, p);
3578 	      for (; count > 0; count--, p += 2 * 3) /* XXX */
3579 		{
3580 		  /* Extract the start of each range.  */
3581 		  EXTRACT_CHARACTER (c, p);
3582 		  j = CHAR_CHARSET (c);
3583 		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
3584 		}
3585 	    }
3586 	  break;
3587 
3588 	case syntaxspec:
3589 	case notsyntaxspec:
3590 	  if (!fastmap) break;
3591 #ifndef emacs
3592 	  not = (re_opcode_t)p[-1] == notsyntaxspec;
3593 	  k = *p++;
3594 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3595 	    if ((SYNTAX (j) == (enum syntaxcode) k) ^ not)
3596 	      fastmap[j] = 1;
3597 	  break;
3598 #else  /* emacs */
3599 	  /* This match depends on text properties.  These end with
3600 	     aborting optimizations.  */
3601 	  return -1;
3602 
3603 	case categoryspec:
3604 	case notcategoryspec:
3605 	  if (!fastmap) break;
3606 	  not = (re_opcode_t)p[-1] == notcategoryspec;
3607 	  k = *p++;
3608 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3609 	    if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
3610 	      fastmap[j] = 1;
3611 
3612 	  if (multibyte)
3613 	    /* Any character set can possibly contain a character
3614 	       whose category is K (or not).  */
3615 	    goto set_fastmap_for_multibyte_characters;
3616 	  break;
3617 
3618       /* All cases after this match the empty string.  These end with
3619 	 `continue'.  */
3620 
3621 	case before_dot:
3622 	case at_dot:
3623 	case after_dot:
3624 #endif /* !emacs */
3625 	case no_op:
3626 	case begline:
3627 	case endline:
3628 	case begbuf:
3629 	case endbuf:
3630 	case wordbound:
3631 	case notwordbound:
3632 	case wordbeg:
3633 	case wordend:
3634 	  continue;
3635 
3636 
3637 	case jump:
3638 	  EXTRACT_NUMBER_AND_INCR (j, p);
3639 	  if (j < 0)
3640 	    /* Backward jumps can only go back to code that we've already
3641 	       visited.  `re_compile' should make sure this is true.  */
3642 	    break;
3643 	  p += j;
3644 	  switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
3645 	    {
3646 	    case on_failure_jump:
3647 	    case on_failure_keep_string_jump:
3648 	    case on_failure_jump_loop:
3649 	    case on_failure_jump_nastyloop:
3650 	    case on_failure_jump_smart:
3651 	      p++;
3652 	      break;
3653 	    default:
3654 	      continue;
3655 	    };
3656 	  /* Keep `p1' to allow the `on_failure_jump' we are jumping to
3657 	     to jump back to "just after here".  */
3658 	  /* Fallthrough */
3659 
3660 	case on_failure_jump:
3661 	case on_failure_keep_string_jump:
3662 	case on_failure_jump_nastyloop:
3663 	case on_failure_jump_loop:
3664 	case on_failure_jump_smart:
3665 	  EXTRACT_NUMBER_AND_INCR (j, p);
3666 	  if (p + j <= p1)
3667 	    ; /* Backward jump to be ignored.  */
3668 	  else
3669 	    { /* We have to look down both arms.
3670 		 We first go down the "straight" path so as to minimize
3671 		 stack usage when going through alternatives.  */
3672 	      int r = analyse_first (p, pend, fastmap, multibyte);
3673 	      if (r) return r;
3674 	      p += j;
3675 	    }
3676 	  continue;
3677 
3678 
3679 	case jump_n:
3680 	  /* This code simply does not properly handle forward jump_n.  */
3681 	  DEBUG_STATEMENT (EXTRACT_NUMBER (j, p); assert (j < 0));
3682 	  p += 4;
3683 	  /* jump_n can either jump or fall through.  The (backward) jump
3684 	     case has already been handled, so we only need to look at the
3685 	     fallthrough case.  */
3686 	  continue;
3687 
3688 	case succeed_n:
3689 	  /* If N == 0, it should be an on_failure_jump_loop instead.  */
3690 	  DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
3691 	  p += 4;
3692 	  /* We only care about one iteration of the loop, so we don't
3693 	     need to consider the case where this behaves like an
3694 	     on_failure_jump.  */
3695 	  continue;
3696 
3697 
3698 	case set_number_at:
3699 	  p += 4;
3700 	  continue;
3701 
3702 
3703 	case start_memory:
3704 	case stop_memory:
3705 	  p += 1;
3706 	  continue;
3707 
3708 
3709 	default:
3710 	  abort (); /* We have listed all the cases.  */
3711 	} /* switch *p++ */
3712 
3713       /* Getting here means we have found the possible starting
3714 	 characters for one path of the pattern -- and that the empty
3715 	 string does not match.	 We need not follow this path further.  */
3716       return 0;
3717     } /* while p */
3718 
3719   /* We reached the end without matching anything.  */
3720   return 1;
3721 
3722 } /* analyse_first */
3723 
3724 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3725    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3726    characters can start a string that matches the pattern.  This fastmap
3727    is used by re_search to skip quickly over impossible starting points.
3728 
3729    Character codes above (1 << BYTEWIDTH) are not represented in the
3730    fastmap, but the leading codes are represented.  Thus, the fastmap
3731    indicates which character sets could start a match.
3732 
3733    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3734    area as BUFP->fastmap.
3735 
3736    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3737    the pattern buffer.
3738 
3739    Returns 0 if we succeed, -2 if an internal error.   */
3740 
3741 int
re_compile_fastmap(bufp)3742 re_compile_fastmap (bufp)
3743      struct re_pattern_buffer *bufp;
3744 {
3745   char *fastmap = bufp->fastmap;
3746   int analysis;
3747 
3748   assert (fastmap && bufp->buffer);
3749 
3750   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.	*/
3751   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3752 
3753   analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
3754 			    fastmap, RE_MULTIBYTE_P (bufp));
3755   bufp->can_be_null = (analysis != 0);
3756   return 0;
3757 } /* re_compile_fastmap */
3758 
3759 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3760    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3761    this memory for recording register information.  STARTS and ENDS
3762    must be allocated using the malloc library routine, and must each
3763    be at least NUM_REGS * sizeof (regoff_t) bytes long.
3764 
3765    If NUM_REGS == 0, then subsequent matches should allocate their own
3766    register data.
3767 
3768    Unless this function is called, the first search or match using
3769    PATTERN_BUFFER will allocate its own register data, without
3770    freeing the old data.  */
3771 
3772 void
re_set_registers(bufp,regs,num_regs,starts,ends)3773 re_set_registers (bufp, regs, num_regs, starts, ends)
3774     struct re_pattern_buffer *bufp;
3775     struct re_registers *regs;
3776     unsigned num_regs;
3777     regoff_t *starts, *ends;
3778 {
3779   if (num_regs)
3780     {
3781       bufp->regs_allocated = REGS_REALLOCATE;
3782       regs->num_regs = num_regs;
3783       regs->start = starts;
3784       regs->end = ends;
3785     }
3786   else
3787     {
3788       bufp->regs_allocated = REGS_UNALLOCATED;
3789       regs->num_regs = 0;
3790       regs->start = regs->end = (regoff_t *) 0;
3791     }
3792 }
3793 WEAK_ALIAS (__re_set_registers, re_set_registers)
3794 
3795 /* Searching routines.	*/
3796 
3797 /* Like re_search_2, below, but only one string is specified, and
3798    doesn't let you say where to stop matching. */
3799 
3800 int
3801 re_search (bufp, string, size, startpos, range, regs)
3802      struct re_pattern_buffer *bufp;
3803      const char *string;
3804      int size, startpos, range;
3805      struct re_registers *regs;
3806 {
3807   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3808 		      regs, size);
3809 }
3810 WEAK_ALIAS (__re_search, re_search)
3811 
3812 /* End address of virtual concatenation of string.  */
3813 #define STOP_ADDR_VSTRING(P)				\
3814   (((P) >= size1 ? string2 + size2 : string1 + size1))
3815 
3816 /* Address of POS in the concatenation of virtual string. */
3817 #define POS_ADDR_VSTRING(POS)					\
3818   (((POS) >= size1 ? string2 - size1 : string1) + (POS))
3819 
3820 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3821    virtual concatenation of STRING1 and STRING2, starting first at index
3822    STARTPOS, then at STARTPOS + 1, and so on.
3823 
3824    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3825 
3826    RANGE is how far to scan while trying to match.  RANGE = 0 means try
3827    only at STARTPOS; in general, the last start tried is STARTPOS +
3828    RANGE.
3829 
3830    In REGS, return the indices of the virtual concatenation of STRING1
3831    and STRING2 that matched the entire BUFP->buffer and its contained
3832    subexpressions.
3833 
3834    Do not consider matching one past the index STOP in the virtual
3835    concatenation of STRING1 and STRING2.
3836 
3837    We return either the position in the strings at which the match was
3838    found, -1 if no match, or -2 if error (such as failure
3839    stack overflow).  */
3840 
3841 int
3842 re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
3843      struct re_pattern_buffer *bufp;
3844      const char *str1, *str2;
3845      int size1, size2;
3846      int startpos;
3847      int range;
3848      struct re_registers *regs;
3849      int stop;
3850 {
3851   int val;
3852   re_char *string1 = (re_char*) str1;
3853   re_char *string2 = (re_char*) str2;
3854   register char *fastmap = bufp->fastmap;
3855   register RE_TRANSLATE_TYPE translate = bufp->translate;
3856   int total_size = size1 + size2;
3857   int endpos = startpos + range;
3858   boolean anchored_start;
3859 
3860   /* Nonzero if we have to concern multibyte character.	 */
3861   const boolean multibyte = RE_MULTIBYTE_P (bufp);
3862 
3863   /* Check for out-of-range STARTPOS.  */
3864   if (startpos < 0 || startpos > total_size)
3865     return -1;
3866 
3867   /* Fix up RANGE if it might eventually take us outside
3868      the virtual concatenation of STRING1 and STRING2.
3869      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3870   if (endpos < 0)
3871     range = 0 - startpos;
3872   else if (endpos > total_size)
3873     range = total_size - startpos;
3874 
3875   /* If the search isn't to be a backwards one, don't waste time in a
3876      search for a pattern anchored at beginning of buffer.  */
3877   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3878     {
3879       if (startpos > 0)
3880 	return -1;
3881       else
3882 	range = 0;
3883     }
3884 
3885 #ifdef emacs
3886   /* In a forward search for something that starts with \=.
3887      don't keep searching past point.  */
3888   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3889     {
3890       range = PT_BYTE - BEGV_BYTE - startpos;
3891       if (range < 0)
3892 	return -1;
3893     }
3894 #endif /* emacs */
3895 
3896   /* Update the fastmap now if not correct already.  */
3897   if (fastmap && !bufp->fastmap_accurate)
3898     re_compile_fastmap (bufp);
3899 
3900   /* See whether the pattern is anchored.  */
3901   anchored_start = (bufp->buffer[0] == begline);
3902 
3903 #ifdef emacs
3904   gl_state.object = re_match_object;
3905   {
3906     int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (startpos));
3907 
3908     SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
3909   }
3910 #endif
3911 
3912   /* Loop through the string, looking for a place to start matching.  */
3913   for (;;)
3914     {
3915       /* If the pattern is anchored,
3916 	 skip quickly past places we cannot match.
3917 	 We don't bother to treat startpos == 0 specially
3918 	 because that case doesn't repeat.  */
3919       if (anchored_start && startpos > 0)
3920 	{
3921 	  if (! ((startpos <= size1 ? string1[startpos - 1]
3922 		  : string2[startpos - size1 - 1])
3923 		 == '\n'))
3924 	    goto advance;
3925 	}
3926 
3927       /* If a fastmap is supplied, skip quickly over characters that
3928 	 cannot be the start of a match.  If the pattern can match the
3929 	 null string, however, we don't need to skip characters; we want
3930 	 the first null string.	 */
3931       if (fastmap && startpos < total_size && !bufp->can_be_null)
3932 	{
3933 	  register re_char *d;
3934 	  register re_wchar_t buf_ch;
3935 
3936 	  d = POS_ADDR_VSTRING (startpos);
3937 
3938 	  if (range > 0)	/* Searching forwards.	*/
3939 	    {
3940 	      register int lim = 0;
3941 	      int irange = range;
3942 
3943 	      if (startpos < size1 && startpos + range >= size1)
3944 		lim = range - (size1 - startpos);
3945 
3946 	      /* Written out as an if-else to avoid testing `translate'
3947 		 inside the loop.  */
3948 	      if (RE_TRANSLATE_P (translate))
3949 		{
3950 		  if (multibyte)
3951 		    while (range > lim)
3952 		      {
3953 			int buf_charlen;
3954 
3955 			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
3956 							 buf_charlen);
3957 
3958 			buf_ch = RE_TRANSLATE (translate, buf_ch);
3959 			if (buf_ch >= 0400
3960 			    || fastmap[buf_ch])
3961 			  break;
3962 
3963 			range -= buf_charlen;
3964 			d += buf_charlen;
3965 		      }
3966 		  else
3967 		    while (range > lim
3968 			   && !fastmap[RE_TRANSLATE (translate, *d)])
3969 		      {
3970 			d++;
3971 			range--;
3972 		      }
3973 		}
3974 	      else
3975 		while (range > lim && !fastmap[*d])
3976 		  {
3977 		    d++;
3978 		    range--;
3979 		  }
3980 
3981 	      startpos += irange - range;
3982 	    }
3983 	  else				/* Searching backwards.	 */
3984 	    {
3985 	      int room = (startpos >= size1
3986 			  ? size2 + size1 - startpos
3987 			  : size1 - startpos);
3988 	      buf_ch = RE_STRING_CHAR (d, room);
3989 	      buf_ch = TRANSLATE (buf_ch);
3990 
3991 	      if (! (buf_ch >= 0400
3992 		     || fastmap[buf_ch]))
3993 		goto advance;
3994 	    }
3995 	}
3996 
3997       /* If can't match the null string, and that's all we have left, fail.  */
3998       if (range >= 0 && startpos == total_size && fastmap
3999 	  && !bufp->can_be_null)
4000 	return -1;
4001 
4002       val = re_match_2_internal (bufp, string1, size1, string2, size2,
4003 				 startpos, regs, stop);
4004 #ifndef REGEX_MALLOC
4005 # ifdef C_ALLOCA
4006       alloca (0);
4007 # endif
4008 #endif
4009 
4010       if (val >= 0)
4011 	return startpos;
4012 
4013       if (val == -2)
4014 	return -2;
4015 
4016     advance:
4017       if (!range)
4018 	break;
4019       else if (range > 0)
4020 	{
4021 	  /* Update STARTPOS to the next character boundary.  */
4022 	  if (multibyte)
4023 	    {
4024 	      re_char *p = POS_ADDR_VSTRING (startpos);
4025 	      re_char *pend = STOP_ADDR_VSTRING (startpos);
4026 	      int len = MULTIBYTE_FORM_LENGTH (p, pend - p);
4027 
4028 	      range -= len;
4029 	      if (range < 0)
4030 		break;
4031 	      startpos += len;
4032 	    }
4033 	  else
4034 	    {
4035 	      range--;
4036 	      startpos++;
4037 	    }
4038 	}
4039       else
4040 	{
4041 	  range++;
4042 	  startpos--;
4043 
4044 	  /* Update STARTPOS to the previous character boundary.  */
4045 	  if (multibyte)
4046 	    {
4047 	      re_char *p = POS_ADDR_VSTRING (startpos);
4048 	      int len = 0;
4049 
4050 	      /* Find the head of multibyte form.  */
4051 	      while (!CHAR_HEAD_P (*p))
4052 		p--, len++;
4053 
4054 	      /* Adjust it. */
4055 #if 0				/* XXX */
4056 	      if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
4057 		;
4058 	      else
4059 #endif
4060 		{
4061 		  range += len;
4062 		  if (range > 0)
4063 		    break;
4064 
4065 		  startpos -= len;
4066 		}
4067 	    }
4068 	}
4069     }
4070   return -1;
4071 } /* re_search_2 */
4072 WEAK_ALIAS (__re_search_2, re_search_2)
4073 
4074 /* Declarations and macros for re_match_2.  */
4075 
4076 static int bcmp_translate _RE_ARGS((re_char *s1, re_char *s2,
4077 				    register int len,
4078 				    RE_TRANSLATE_TYPE translate,
4079 				    const int multibyte));
4080 
4081 /* This converts PTR, a pointer into one of the search strings `string1'
4082    and `string2' into an offset from the beginning of that string.  */
4083 #define POINTER_TO_OFFSET(ptr)			\
4084   (FIRST_STRING_P (ptr)				\
4085    ? ((regoff_t) ((ptr) - string1))		\
4086    : ((regoff_t) ((ptr) - string2 + size1)))
4087 
4088 /* Call before fetching a character with *d.  This switches over to
4089    string2 if necessary.
4090    Check re_match_2_internal for a discussion of why end_match_2 might
4091    not be within string2 (but be equal to end_match_1 instead).  */
4092 #define PREFETCH()							\
4093   while (d == dend)							\
4094     {									\
4095       /* End of string2 => fail.  */					\
4096       if (dend == end_match_2)						\
4097 	goto fail;							\
4098       /* End of string1 => advance to string2.  */			\
4099       d = string2;							\
4100       dend = end_match_2;						\
4101     }
4102 
4103 /* Call before fetching a char with *d if you already checked other limits.
4104    This is meant for use in lookahead operations like wordend, etc..
4105    where we might need to look at parts of the string that might be
4106    outside of the LIMITs (i.e past `stop').  */
4107 #define PREFETCH_NOLIMIT()						\
4108   if (d == end1)							\
4109      {									\
4110        d = string2;							\
4111        dend = end_match_2;						\
4112      }									\
4113 
4114 /* Test if at very beginning or at very end of the virtual concatenation
4115    of `string1' and `string2'.	If only one string, it's `string2'.  */
4116 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4117 #define AT_STRINGS_END(d) ((d) == end2)
4118 
4119 
4120 /* Test if D points to a character which is word-constituent.  We have
4121    two special cases to check for: if past the end of string1, look at
4122    the first character in string2; and if before the beginning of
4123    string2, look at the last character in string1.  */
4124 #define WORDCHAR_P(d)							\
4125   (SYNTAX ((d) == end1 ? *string2					\
4126 	   : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
4127    == Sword)
4128 
4129 /* Disabled due to a compiler bug -- see comment at case wordbound */
4130 
4131 /* The comment at case wordbound is following one, but we don't use
4132    AT_WORD_BOUNDARY anymore to support multibyte form.
4133 
4134    The DEC Alpha C compiler 3.x generates incorrect code for the
4135    test	 WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4136    AT_WORD_BOUNDARY, so this code is disabled.	Expanding the
4137    macro and introducing temporary variables works around the bug.  */
4138 
4139 #if 0
4140 /* Test if the character before D and the one at D differ with respect
4141    to being word-constituent.  */
4142 #define AT_WORD_BOUNDARY(d)						\
4143   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
4144    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4145 #endif
4146 
4147 /* Free everything we malloc.  */
4148 #ifdef MATCH_MAY_ALLOCATE
4149 # define FREE_VAR(var) if (var) { REGEX_FREE (var); var = NULL; } else
4150 # define FREE_VARIABLES()						\
4151   do {									\
4152     REGEX_FREE_STACK (fail_stack.stack);				\
4153     FREE_VAR (regstart);						\
4154     FREE_VAR (regend);							\
4155     FREE_VAR (best_regstart);						\
4156     FREE_VAR (best_regend);						\
4157   } while (0)
4158 #else
4159 # define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
4160 #endif /* not MATCH_MAY_ALLOCATE */
4161 
4162 
4163 /* Optimization routines.  */
4164 
4165 /* If the operation is a match against one or more chars,
4166    return a pointer to the next operation, else return NULL.  */
4167 static re_char *
skip_one_char(p)4168 skip_one_char (p)
4169      re_char *p;
4170 {
4171   switch (SWITCH_ENUM_CAST (*p++))
4172     {
4173     case anychar:
4174       break;
4175 
4176     case exactn:
4177       p += *p + 1;
4178       break;
4179 
4180     case charset_not:
4181     case charset:
4182       if (CHARSET_RANGE_TABLE_EXISTS_P (p - 1))
4183 	{
4184 	  int mcnt;
4185 	  p = CHARSET_RANGE_TABLE (p - 1);
4186 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4187 	  p = CHARSET_RANGE_TABLE_END (p, mcnt);
4188 	}
4189       else
4190 	p += 1 + CHARSET_BITMAP_SIZE (p - 1);
4191       break;
4192 
4193     case syntaxspec:
4194     case notsyntaxspec:
4195 #ifdef emacs
4196     case categoryspec:
4197     case notcategoryspec:
4198 #endif /* emacs */
4199       p++;
4200       break;
4201 
4202     default:
4203       p = NULL;
4204     }
4205   return p;
4206 }
4207 
4208 
4209 /* Jump over non-matching operations.  */
4210 static unsigned char *
skip_noops(p,pend)4211 skip_noops (p, pend)
4212      unsigned char *p, *pend;
4213 {
4214   int mcnt;
4215   while (p < pend)
4216     {
4217       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p))
4218 	{
4219 	case start_memory:
4220 	case stop_memory:
4221 	  p += 2; break;
4222 	case no_op:
4223 	  p += 1; break;
4224 	case jump:
4225 	  p += 1;
4226 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4227 	  p += mcnt;
4228 	  break;
4229 	default:
4230 	  return p;
4231 	}
4232     }
4233   assert (p == pend);
4234   return p;
4235 }
4236 
4237 /* Non-zero if "p1 matches something" implies "p2 fails".  */
4238 static int
mutually_exclusive_p(bufp,p1,p2)4239 mutually_exclusive_p (bufp, p1, p2)
4240      struct re_pattern_buffer *bufp;
4241      unsigned char *p1, *p2;
4242 {
4243   re_opcode_t op2;
4244   const boolean multibyte = RE_MULTIBYTE_P (bufp);
4245   unsigned char *pend = bufp->buffer + bufp->used;
4246 
4247   assert (p1 >= bufp->buffer && p1 < pend
4248 	  && p2 >= bufp->buffer && p2 <= pend);
4249 
4250   /* Skip over open/close-group commands.
4251      If what follows this loop is a ...+ construct,
4252      look at what begins its body, since we will have to
4253      match at least one of that.  */
4254   p2 = skip_noops (p2, pend);
4255   /* The same skip can be done for p1, except that this function
4256      is only used in the case where p1 is a simple match operator.  */
4257   /* p1 = skip_noops (p1, pend); */
4258 
4259   assert (p1 >= bufp->buffer && p1 < pend
4260 	  && p2 >= bufp->buffer && p2 <= pend);
4261 
4262   op2 = p2 == pend ? succeed : *p2;
4263 
4264   switch (SWITCH_ENUM_CAST (op2))
4265     {
4266     case succeed:
4267     case endbuf:
4268       /* If we're at the end of the pattern, we can change.  */
4269       if (skip_one_char (p1))
4270 	{
4271 	  DEBUG_PRINT1 ("  End of pattern: fast loop.\n");
4272 	  return 1;
4273 	}
4274       break;
4275 
4276     case endline:
4277     case exactn:
4278       {
4279 	register re_wchar_t c
4280 	  = (re_opcode_t) *p2 == endline ? '\n'
4281 	  : RE_STRING_CHAR (p2 + 2, pend - p2 - 2);
4282 
4283 	if ((re_opcode_t) *p1 == exactn)
4284 	  {
4285 	    if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2))
4286 	      {
4287 		DEBUG_PRINT3 ("  '%c' != '%c' => fast loop.\n", c, p1[2]);
4288 		return 1;
4289 	      }
4290 	  }
4291 
4292 	else if ((re_opcode_t) *p1 == charset
4293 		 || (re_opcode_t) *p1 == charset_not)
4294 	  {
4295 	    int not = (re_opcode_t) *p1 == charset_not;
4296 
4297 	    /* Test if C is listed in charset (or charset_not)
4298 	       at `p1'.  */
4299 	    if (SINGLE_BYTE_CHAR_P (c))
4300 	      {
4301 		if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
4302 		    && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4303 		  not = !not;
4304 	      }
4305 	    else if (CHARSET_RANGE_TABLE_EXISTS_P (p1))
4306 	      CHARSET_LOOKUP_RANGE_TABLE (not, c, p1);
4307 
4308 	    /* `not' is equal to 1 if c would match, which means
4309 	       that we can't change to pop_failure_jump.  */
4310 	    if (!not)
4311 	      {
4312 		DEBUG_PRINT1 ("	 No match => fast loop.\n");
4313 		return 1;
4314 	      }
4315 	  }
4316 	else if ((re_opcode_t) *p1 == anychar
4317 		 && c == '\n')
4318 	  {
4319 	    DEBUG_PRINT1 ("   . != \\n => fast loop.\n");
4320 	    return 1;
4321 	  }
4322       }
4323       break;
4324 
4325     case charset:
4326       {
4327 	if ((re_opcode_t) *p1 == exactn)
4328 	  /* Reuse the code above.  */
4329 	  return mutually_exclusive_p (bufp, p2, p1);
4330 
4331       /* It is hard to list up all the character in charset
4332 	 P2 if it includes multibyte character.  Give up in
4333 	 such case.  */
4334       else if (!multibyte || !CHARSET_RANGE_TABLE_EXISTS_P (p2))
4335 	{
4336 	  /* Now, we are sure that P2 has no range table.
4337 	     So, for the size of bitmap in P2, `p2[1]' is
4338 	     enough.	But P1 may have range table, so the
4339 	     size of bitmap table of P1 is extracted by
4340 	     using macro `CHARSET_BITMAP_SIZE'.
4341 
4342 	     Since we know that all the character listed in
4343 	     P2 is ASCII, it is enough to test only bitmap
4344 	     table of P1.  */
4345 
4346 	  if ((re_opcode_t) *p1 == charset)
4347 	    {
4348 	      int idx;
4349 	      /* We win if the charset inside the loop
4350 		 has no overlap with the one after the loop.  */
4351 	      for (idx = 0;
4352 		   (idx < (int) p2[1]
4353 		    && idx < CHARSET_BITMAP_SIZE (p1));
4354 		   idx++)
4355 		if ((p2[2 + idx] & p1[2 + idx]) != 0)
4356 		  break;
4357 
4358 	      if (idx == p2[1]
4359 		  || idx == CHARSET_BITMAP_SIZE (p1))
4360 		{
4361 		  DEBUG_PRINT1 ("	 No match => fast loop.\n");
4362 		  return 1;
4363 		}
4364 	    }
4365 	  else if ((re_opcode_t) *p1 == charset_not)
4366 	    {
4367 	      int idx;
4368 	      /* We win if the charset_not inside the loop lists
4369 		 every character listed in the charset after.	 */
4370 	      for (idx = 0; idx < (int) p2[1]; idx++)
4371 		if (! (p2[2 + idx] == 0
4372 		       || (idx < CHARSET_BITMAP_SIZE (p1)
4373 			   && ((p2[2 + idx] & ~ p1[2 + idx]) == 0))))
4374 		  break;
4375 
4376 		if (idx == p2[1])
4377 		  {
4378 		    DEBUG_PRINT1 ("	 No match => fast loop.\n");
4379 		    return 1;
4380 		  }
4381 	      }
4382 	  }
4383       }
4384       break;
4385 
4386     case charset_not:
4387       switch (SWITCH_ENUM_CAST (*p1))
4388 	{
4389 	case exactn:
4390 	case charset:
4391 	  /* Reuse the code above.  */
4392 	  return mutually_exclusive_p (bufp, p2, p1);
4393 	case charset_not:
4394 	  /* When we have two charset_not, it's very unlikely that
4395 	     they don't overlap.  The union of the two sets of excluded
4396 	     chars should cover all possible chars, which, as a matter of
4397 	     fact, is virtually impossible in multibyte buffers.  */
4398 	  ;
4399 	}
4400       break;
4401 
4402     case wordend:
4403     case notsyntaxspec:
4404       return ((re_opcode_t) *p1 == syntaxspec
4405 	      && p1[1] == (op2 == wordend ? Sword : p2[1]));
4406 
4407     case wordbeg:
4408     case syntaxspec:
4409       return ((re_opcode_t) *p1 == notsyntaxspec
4410 	      && p1[1] == (op2 == wordend ? Sword : p2[1]));
4411 
4412     case wordbound:
4413       return (((re_opcode_t) *p1 == notsyntaxspec
4414 	       || (re_opcode_t) *p1 == syntaxspec)
4415 	      && p1[1] == Sword);
4416 
4417 #ifdef emacs
4418     case categoryspec:
4419       return ((re_opcode_t) *p1 == notcategoryspec && p1[1] == p2[1]);
4420     case notcategoryspec:
4421       return ((re_opcode_t) *p1 == categoryspec && p1[1] == p2[1]);
4422 #endif /* emacs */
4423 
4424     default:
4425       ;
4426     }
4427 
4428   /* Safe default.  */
4429   return 0;
4430 }
4431 
4432 
4433 /* Matching routines.  */
4434 
4435 #ifndef emacs	/* Emacs never uses this.  */
4436 /* re_match is like re_match_2 except it takes only a single string.  */
4437 
4438 int
re_match(bufp,string,size,pos,regs)4439 re_match (bufp, string, size, pos, regs)
4440      struct re_pattern_buffer *bufp;
4441      const char *string;
4442      int size, pos;
4443      struct re_registers *regs;
4444 {
4445   int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
4446 				    pos, regs, size);
4447 # if defined C_ALLOCA && !defined REGEX_MALLOC
4448   alloca (0);
4449 # endif
4450   return result;
4451 }
4452 WEAK_ALIAS (__re_match, re_match)
4453 #endif /* not emacs */
4454 
4455 #ifdef emacs
4456 /* In Emacs, this is the string or buffer in which we
4457    are matching.  It is used for looking up syntax properties.	*/
4458 Lisp_Object re_match_object;
4459 #endif
4460 
4461 /* re_match_2 matches the compiled pattern in BUFP against the
4462    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4463    and SIZE2, respectively).  We start matching at POS, and stop
4464    matching at STOP.
4465 
4466    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4467    store offsets for the substring each group matched in REGS.	See the
4468    documentation for exactly how many groups we fill.
4469 
4470    We return -1 if no match, -2 if an internal error (such as the
4471    failure stack overflowing).	Otherwise, we return the length of the
4472    matched substring.  */
4473 
4474 int
4475 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4476      struct re_pattern_buffer *bufp;
4477      const char *string1, *string2;
4478      int size1, size2;
4479      int pos;
4480      struct re_registers *regs;
4481      int stop;
4482 {
4483   int result;
4484 
4485 #ifdef emacs
4486   int charpos;
4487   gl_state.object = re_match_object;
4488   charpos = SYNTAX_TABLE_BYTE_TO_CHAR (POS_AS_IN_BUFFER (pos));
4489   SETUP_SYNTAX_TABLE_FOR_OBJECT (re_match_object, charpos, 1);
4490 #endif
4491 
4492   result = re_match_2_internal (bufp, (re_char*) string1, size1,
4493 				(re_char*) string2, size2,
4494 				pos, regs, stop);
4495 #if defined C_ALLOCA && !defined REGEX_MALLOC
4496   alloca (0);
4497 #endif
4498   return result;
4499 }
4500 WEAK_ALIAS (__re_match_2, re_match_2)
4501 
4502 /* This is a separate function so that we can force an alloca cleanup
4503    afterwards.	*/
4504 static int
4505 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4506      struct re_pattern_buffer *bufp;
4507      re_char *string1, *string2;
4508      int size1, size2;
4509      int pos;
4510      struct re_registers *regs;
4511      int stop;
4512 {
4513   /* General temporaries.  */
4514   int mcnt;
4515   size_t reg;
4516   boolean not;
4517 
4518   /* Just past the end of the corresponding string.  */
4519   re_char *end1, *end2;
4520 
4521   /* Pointers into string1 and string2, just past the last characters in
4522      each to consider matching.	 */
4523   re_char *end_match_1, *end_match_2;
4524 
4525   /* Where we are in the data, and the end of the current string.  */
4526   re_char *d, *dend;
4527 
4528   /* Used sometimes to remember where we were before starting matching
4529      an operator so that we can go back in case of failure.  This "atomic"
4530      behavior of matching opcodes is indispensable to the correctness
4531      of the on_failure_keep_string_jump optimization.  */
4532   re_char *dfail;
4533 
4534   /* Where we are in the pattern, and the end of the pattern.  */
4535   re_char *p = bufp->buffer;
4536   re_char *pend = p + bufp->used;
4537 
4538   /* We use this to map every character in the string.	*/
4539   RE_TRANSLATE_TYPE translate = bufp->translate;
4540 
4541   /* Nonzero if we have to concern multibyte character.	 */
4542   const boolean multibyte = RE_MULTIBYTE_P (bufp);
4543 
4544   /* Failure point stack.  Each place that can handle a failure further
4545      down the line pushes a failure point on this stack.  It consists of
4546      regstart, and regend for all registers corresponding to
4547      the subexpressions we're currently inside, plus the number of such
4548      registers, and, finally, two char *'s.  The first char * is where
4549      to resume scanning the pattern; the second one is where to resume
4550      scanning the strings.	*/
4551 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.	 */
4552   fail_stack_type fail_stack;
4553 #endif
4554 #ifdef DEBUG
4555   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4556 #endif
4557 
4558 #if defined REL_ALLOC && defined REGEX_MALLOC
4559   /* This holds the pointer to the failure stack, when
4560      it is allocated relocatably.  */
4561   fail_stack_elt_t *failure_stack_ptr;
4562 #endif
4563 
4564   /* We fill all the registers internally, independent of what we
4565      return, for use in backreferences.	 The number here includes
4566      an element for register zero.  */
4567   size_t num_regs = bufp->re_nsub + 1;
4568 
4569   /* Information on the contents of registers. These are pointers into
4570      the input strings; they record just what was matched (on this
4571      attempt) by a subexpression part of the pattern, that is, the
4572      regnum-th regstart pointer points to where in the pattern we began
4573      matching and the regnum-th regend points to right after where we
4574      stopped matching the regnum-th subexpression.  (The zeroth register
4575      keeps track of what the whole pattern matches.)  */
4576 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4577   re_char **regstart, **regend;
4578 #endif
4579 
4580   /* The following record the register info as found in the above
4581      variables when we find a match better than any we've seen before.
4582      This happens as we backtrack through the failure points, which in
4583      turn happens only if we have not yet matched the entire string. */
4584   unsigned best_regs_set = false;
4585 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4586   re_char **best_regstart, **best_regend;
4587 #endif
4588 
4589   /* Logically, this is `best_regend[0]'.  But we don't want to have to
4590      allocate space for that if we're not allocating space for anything
4591      else (see below).	Also, we never need info about register 0 for
4592      any of the other register vectors, and it seems rather a kludge to
4593      treat `best_regend' differently than the rest.  So we keep track of
4594      the end of the best match so far in a separate variable.  We
4595      initialize this to NULL so that when we backtrack the first time
4596      and need to test it, it's not garbage.  */
4597   re_char *match_end = NULL;
4598 
4599 #ifdef DEBUG
4600   /* Counts the total number of registers pushed.  */
4601   unsigned num_regs_pushed = 0;
4602 #endif
4603 
4604   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4605 
4606   INIT_FAIL_STACK ();
4607 
4608 #ifdef MATCH_MAY_ALLOCATE
4609   /* Do not bother to initialize all the register variables if there are
4610      no groups in the pattern, as it takes a fair amount of time.  If
4611      there are groups, we include space for register 0 (the whole
4612      pattern), even though we never use it, since it simplifies the
4613      array indexing.  We should fix this.  */
4614   if (bufp->re_nsub)
4615     {
4616       regstart = REGEX_TALLOC (num_regs, re_char *);
4617       regend = REGEX_TALLOC (num_regs, re_char *);
4618       best_regstart = REGEX_TALLOC (num_regs, re_char *);
4619       best_regend = REGEX_TALLOC (num_regs, re_char *);
4620 
4621       if (!(regstart && regend && best_regstart && best_regend))
4622 	{
4623 	  FREE_VARIABLES ();
4624 	  return -2;
4625 	}
4626     }
4627   else
4628     {
4629       /* We must initialize all our variables to NULL, so that
4630 	 `FREE_VARIABLES' doesn't try to free them.  */
4631       regstart = regend = best_regstart = best_regend = NULL;
4632     }
4633 #endif /* MATCH_MAY_ALLOCATE */
4634 
4635   /* The starting position is bogus.  */
4636   if (pos < 0 || pos > size1 + size2)
4637     {
4638       FREE_VARIABLES ();
4639       return -1;
4640     }
4641 
4642   /* Initialize subexpression text positions to -1 to mark ones that no
4643      start_memory/stop_memory has been seen for. Also initialize the
4644      register information struct.  */
4645   for (reg = 1; reg < num_regs; reg++)
4646     regstart[reg] = regend[reg] = NULL;
4647 
4648   /* We move `string1' into `string2' if the latter's empty -- but not if
4649      `string1' is null.	 */
4650   if (size2 == 0 && string1 != NULL)
4651     {
4652       string2 = string1;
4653       size2 = size1;
4654       string1 = 0;
4655       size1 = 0;
4656     }
4657   end1 = string1 + size1;
4658   end2 = string2 + size2;
4659 
4660   /* `p' scans through the pattern as `d' scans through the data.
4661      `dend' is the end of the input string that `d' points within.  `d'
4662      is advanced into the following input string whenever necessary, but
4663      this happens before fetching; therefore, at the beginning of the
4664      loop, `d' can be pointing at the end of a string, but it cannot
4665      equal `string2'.  */
4666   if (pos >= size1)
4667     {
4668       /* Only match within string2.  */
4669       d = string2 + pos - size1;
4670       dend = end_match_2 = string2 + stop - size1;
4671       end_match_1 = end1;	/* Just to give it a value.  */
4672     }
4673   else
4674     {
4675       if (stop < size1)
4676 	{
4677 	  /* Only match within string1.  */
4678 	  end_match_1 = string1 + stop;
4679 	  /* BEWARE!
4680 	     When we reach end_match_1, PREFETCH normally switches to string2.
4681 	     But in the present case, this means that just doing a PREFETCH
4682 	     makes us jump from `stop' to `gap' within the string.
4683 	     What we really want here is for the search to stop as
4684 	     soon as we hit end_match_1.  That's why we set end_match_2
4685 	     to end_match_1 (since PREFETCH fails as soon as we hit
4686 	     end_match_2).  */
4687 	  end_match_2 = end_match_1;
4688 	}
4689       else
4690 	{ /* It's important to use this code when stop == size so that
4691 	     moving `d' from end1 to string2 will not prevent the d == dend
4692 	     check from catching the end of string.  */
4693 	  end_match_1 = end1;
4694 	  end_match_2 = string2 + stop - size1;
4695 	}
4696       d = string1 + pos;
4697       dend = end_match_1;
4698     }
4699 
4700   DEBUG_PRINT1 ("The compiled pattern is: ");
4701   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4702   DEBUG_PRINT1 ("The string to match is: `");
4703   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4704   DEBUG_PRINT1 ("'\n");
4705 
4706   /* This loops over pattern commands.	It exits by returning from the
4707      function if the match is complete, or it drops through if the match
4708      fails at this starting point in the input data.  */
4709   for (;;)
4710     {
4711       DEBUG_PRINT2 ("\n%p: ", p);
4712 
4713       if (p == pend)
4714 	{ /* End of pattern means we might have succeeded.  */
4715 	  DEBUG_PRINT1 ("end of pattern ... ");
4716 
4717 	  /* If we haven't matched the entire string, and we want the
4718 	     longest match, try backtracking.  */
4719 	  if (d != end_match_2)
4720 	    {
4721 	      /* 1 if this match ends in the same string (string1 or string2)
4722 		 as the best previous match.  */
4723 	      boolean same_str_p = (FIRST_STRING_P (match_end)
4724 				    == FIRST_STRING_P (d));
4725 	      /* 1 if this match is the best seen so far.  */
4726 	      boolean best_match_p;
4727 
4728 	      /* AIX compiler got confused when this was combined
4729 		 with the previous declaration.	 */
4730 	      if (same_str_p)
4731 		best_match_p = d > match_end;
4732 	      else
4733 		best_match_p = !FIRST_STRING_P (d);
4734 
4735 	      DEBUG_PRINT1 ("backtracking.\n");
4736 
4737 	      if (!FAIL_STACK_EMPTY ())
4738 		{ /* More failure points to try.  */
4739 
4740 		  /* If exceeds best match so far, save it.  */
4741 		  if (!best_regs_set || best_match_p)
4742 		    {
4743 		      best_regs_set = true;
4744 		      match_end = d;
4745 
4746 		      DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4747 
4748 		      for (reg = 1; reg < num_regs; reg++)
4749 			{
4750 			  best_regstart[reg] = regstart[reg];
4751 			  best_regend[reg] = regend[reg];
4752 			}
4753 		    }
4754 		  goto fail;
4755 		}
4756 
4757 	      /* If no failure points, don't restore garbage.  And if
4758 		 last match is real best match, don't restore second
4759 		 best one. */
4760 	      else if (best_regs_set && !best_match_p)
4761 		{
4762 		restore_best_regs:
4763 		  /* Restore best match.  It may happen that `dend ==
4764 		     end_match_1' while the restored d is in string2.
4765 		     For example, the pattern `x.*y.*z' against the
4766 		     strings `x-' and `y-z-', if the two strings are
4767 		     not consecutive in memory.	 */
4768 		  DEBUG_PRINT1 ("Restoring best registers.\n");
4769 
4770 		  d = match_end;
4771 		  dend = ((d >= string1 && d <= end1)
4772 			   ? end_match_1 : end_match_2);
4773 
4774 		  for (reg = 1; reg < num_regs; reg++)
4775 		    {
4776 		      regstart[reg] = best_regstart[reg];
4777 		      regend[reg] = best_regend[reg];
4778 		    }
4779 		}
4780 	    } /* d != end_match_2 */
4781 
4782 	succeed_label:
4783 	  DEBUG_PRINT1 ("Accepting match.\n");
4784 
4785 	  /* If caller wants register contents data back, do it.  */
4786 	  if (regs && !bufp->no_sub)
4787 	    {
4788 	      /* Have the register data arrays been allocated?	*/
4789 	      if (bufp->regs_allocated == REGS_UNALLOCATED)
4790 		{ /* No.  So allocate them with malloc.	 We need one
4791 		     extra element beyond `num_regs' for the `-1' marker
4792 		     GNU code uses.  */
4793 		  regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4794 		  regs->start = TALLOC (regs->num_regs, regoff_t);
4795 		  regs->end = TALLOC (regs->num_regs, regoff_t);
4796 		  if (regs->start == NULL || regs->end == NULL)
4797 		    {
4798 		      FREE_VARIABLES ();
4799 		      return -2;
4800 		    }
4801 		  bufp->regs_allocated = REGS_REALLOCATE;
4802 		}
4803 	      else if (bufp->regs_allocated == REGS_REALLOCATE)
4804 		{ /* Yes.  If we need more elements than were already
4805 		     allocated, reallocate them.  If we need fewer, just
4806 		     leave it alone.  */
4807 		  if (regs->num_regs < num_regs + 1)
4808 		    {
4809 		      regs->num_regs = num_regs + 1;
4810 		      RETALLOC (regs->start, regs->num_regs, regoff_t);
4811 		      RETALLOC (regs->end, regs->num_regs, regoff_t);
4812 		      if (regs->start == NULL || regs->end == NULL)
4813 			{
4814 			  FREE_VARIABLES ();
4815 			  return -2;
4816 			}
4817 		    }
4818 		}
4819 	      else
4820 		{
4821 		  /* These braces fend off a "empty body in an else-statement"
4822 		     warning under GCC when assert expands to nothing.	*/
4823 		  assert (bufp->regs_allocated == REGS_FIXED);
4824 		}
4825 
4826 	      /* Convert the pointer data in `regstart' and `regend' to
4827 		 indices.  Register zero has to be set differently,
4828 		 since we haven't kept track of any info for it.  */
4829 	      if (regs->num_regs > 0)
4830 		{
4831 		  regs->start[0] = pos;
4832 		  regs->end[0] = POINTER_TO_OFFSET (d);
4833 		}
4834 
4835 	      /* Go through the first `min (num_regs, regs->num_regs)'
4836 		 registers, since that is all we initialized.  */
4837 	      for (reg = 1; reg < MIN (num_regs, regs->num_regs); reg++)
4838 		{
4839 		  if (REG_UNSET (regstart[reg]) || REG_UNSET (regend[reg]))
4840 		    regs->start[reg] = regs->end[reg] = -1;
4841 		  else
4842 		    {
4843 		      regs->start[reg]
4844 			= (regoff_t) POINTER_TO_OFFSET (regstart[reg]);
4845 		      regs->end[reg]
4846 			= (regoff_t) POINTER_TO_OFFSET (regend[reg]);
4847 		    }
4848 		}
4849 
4850 	      /* If the regs structure we return has more elements than
4851 		 were in the pattern, set the extra elements to -1.  If
4852 		 we (re)allocated the registers, this is the case,
4853 		 because we always allocate enough to have at least one
4854 		 -1 at the end.	 */
4855 	      for (reg = num_regs; reg < regs->num_regs; reg++)
4856 		regs->start[reg] = regs->end[reg] = -1;
4857 	    } /* regs && !bufp->no_sub */
4858 
4859 	  DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4860 			nfailure_points_pushed, nfailure_points_popped,
4861 			nfailure_points_pushed - nfailure_points_popped);
4862 	  DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4863 
4864 	  mcnt = POINTER_TO_OFFSET (d) - pos;
4865 
4866 	  DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4867 
4868 	  FREE_VARIABLES ();
4869 	  return mcnt;
4870 	}
4871 
4872       /* Otherwise match next pattern command.	*/
4873       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4874 	{
4875 	/* Ignore these.  Used to ignore the n of succeed_n's which
4876 	   currently have n == 0.  */
4877 	case no_op:
4878 	  DEBUG_PRINT1 ("EXECUTING no_op.\n");
4879 	  break;
4880 
4881 	case succeed:
4882 	  DEBUG_PRINT1 ("EXECUTING succeed.\n");
4883 	  goto succeed_label;
4884 
4885 	/* Match the next n pattern characters exactly.	 The following
4886 	   byte in the pattern defines n, and the n bytes after that
4887 	   are the characters to match.	 */
4888 	case exactn:
4889 	  mcnt = *p++;
4890 	  DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4891 
4892 	  /* Remember the start point to rollback upon failure.  */
4893 	  dfail = d;
4894 
4895 	  /* This is written out as an if-else so we don't waste time
4896 	     testing `translate' inside the loop.  */
4897 	  if (RE_TRANSLATE_P (translate))
4898 	    {
4899 	      if (multibyte)
4900 		do
4901 		  {
4902 		    int pat_charlen, buf_charlen;
4903 		    unsigned int pat_ch, buf_ch;
4904 
4905 		    PREFETCH ();
4906 		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
4907 		    buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4908 
4909 		    if (RE_TRANSLATE (translate, buf_ch)
4910 			!= pat_ch)
4911 		      {
4912 			d = dfail;
4913 			goto fail;
4914 		      }
4915 
4916 		    p += pat_charlen;
4917 		    d += buf_charlen;
4918 		    mcnt -= pat_charlen;
4919 		  }
4920 		while (mcnt > 0);
4921 	      else
4922 		do
4923 		  {
4924 		    PREFETCH ();
4925 		    if (RE_TRANSLATE (translate, *d) != *p++)
4926 		      {
4927 			d = dfail;
4928 			goto fail;
4929 		      }
4930 		    d++;
4931 		  }
4932 		while (--mcnt);
4933 	    }
4934 	  else
4935 	    {
4936 	      do
4937 		{
4938 		  PREFETCH ();
4939 		  if (*d++ != *p++)
4940 		    {
4941 		      d = dfail;
4942 		      goto fail;
4943 		    }
4944 		}
4945 	      while (--mcnt);
4946 	    }
4947 	  break;
4948 
4949 
4950 	/* Match any character except possibly a newline or a null.  */
4951 	case anychar:
4952 	  {
4953 	    int buf_charlen;
4954 	    re_wchar_t buf_ch;
4955 
4956 	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
4957 
4958 	    PREFETCH ();
4959 	    buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
4960 	    buf_ch = TRANSLATE (buf_ch);
4961 
4962 	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
4963 		 && buf_ch == '\n')
4964 		|| ((bufp->syntax & RE_DOT_NOT_NULL)
4965 		    && buf_ch == '\000'))
4966 	      goto fail;
4967 
4968 	    DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4969 	    d += buf_charlen;
4970 	  }
4971 	  break;
4972 
4973 
4974 	case charset:
4975 	case charset_not:
4976 	  {
4977 	    register unsigned int c;
4978 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4979 	    int len;
4980 
4981 	    /* Start of actual range_table, or end of bitmap if there is no
4982 	       range table.  */
4983 	    re_char *range_table;
4984 
4985 	    /* Nonzero if there is a range table.  */
4986 	    int range_table_exists;
4987 
4988 	    /* Number of ranges of range table.  This is not included
4989 	       in the initial byte-length of the command.  */
4990 	    int count = 0;
4991 
4992 	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4993 
4994 	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
4995 
4996 	    if (range_table_exists)
4997 	      {
4998 		range_table = CHARSET_RANGE_TABLE (&p[-1]); /* Past the bitmap.  */
4999 		EXTRACT_NUMBER_AND_INCR (count, range_table);
5000 	      }
5001 
5002 	    PREFETCH ();
5003 	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
5004 	    c = TRANSLATE (c); /* The character to match.  */
5005 
5006 	    if (SINGLE_BYTE_CHAR_P (c))
5007 	      {			/* Lookup bitmap.  */
5008 		/* Cast to `unsigned' instead of `unsigned char' in
5009 		   case the bit list is a full 32 bytes long.  */
5010 		if (c < (unsigned) (CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH)
5011 		    && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
5012 		  not = !not;
5013 	      }
5014 #ifdef emacs
5015 	    else if (range_table_exists)
5016 	      {
5017 		int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
5018 
5019 		if (  (class_bits & BIT_LOWER && ISLOWER (c))
5020 		    | (class_bits & BIT_MULTIBYTE)
5021 		    | (class_bits & BIT_PUNCT && ISPUNCT (c))
5022 		    | (class_bits & BIT_SPACE && ISSPACE (c))
5023 		    | (class_bits & BIT_UPPER && ISUPPER (c))
5024 		    | (class_bits & BIT_WORD  && ISWORD (c)))
5025 		  not = !not;
5026 		else
5027 		  CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
5028 	      }
5029 #endif /* emacs */
5030 
5031 	    if (range_table_exists)
5032 	      p = CHARSET_RANGE_TABLE_END (range_table, count);
5033 	    else
5034 	      p += CHARSET_BITMAP_SIZE (&p[-1]) + 1;
5035 
5036 	    if (!not) goto fail;
5037 
5038 	    d += len;
5039 	    break;
5040 	  }
5041 
5042 
5043 	/* The beginning of a group is represented by start_memory.
5044 	   The argument is the register number.  The text
5045 	   matched within the group is recorded (in the internal
5046 	   registers data structure) under the register number.	 */
5047 	case start_memory:
5048 	  DEBUG_PRINT2 ("EXECUTING start_memory %d:\n", *p);
5049 
5050 	  /* In case we need to undo this operation (via backtracking).  */
5051 	  PUSH_FAILURE_REG ((unsigned int)*p);
5052 
5053 	  regstart[*p] = d;
5054 	  regend[*p] = NULL;	/* probably unnecessary.  -sm  */
5055 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
5056 
5057 	  /* Move past the register number and inner group count.  */
5058 	  p += 1;
5059 	  break;
5060 
5061 
5062 	/* The stop_memory opcode represents the end of a group.  Its
5063 	   argument is the same as start_memory's: the register number.  */
5064 	case stop_memory:
5065 	  DEBUG_PRINT2 ("EXECUTING stop_memory %d:\n", *p);
5066 
5067 	  assert (!REG_UNSET (regstart[*p]));
5068 	  /* Strictly speaking, there should be code such as:
5069 
5070 		assert (REG_UNSET (regend[*p]));
5071 		PUSH_FAILURE_REGSTOP ((unsigned int)*p);
5072 
5073 	     But the only info to be pushed is regend[*p] and it is known to
5074 	     be UNSET, so there really isn't anything to push.
5075 	     Not pushing anything, on the other hand deprives us from the
5076 	     guarantee that regend[*p] is UNSET since undoing this operation
5077 	     will not reset its value properly.  This is not important since
5078 	     the value will only be read on the next start_memory or at
5079 	     the very end and both events can only happen if this stop_memory
5080 	     is *not* undone.  */
5081 
5082 	  regend[*p] = d;
5083 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
5084 
5085 	  /* Move past the register number and the inner group count.  */
5086 	  p += 1;
5087 	  break;
5088 
5089 
5090 	/* \<digit> has been turned into a `duplicate' command which is
5091 	   followed by the numeric value of <digit> as the register number.  */
5092 	case duplicate:
5093 	  {
5094 	    register re_char *d2, *dend2;
5095 	    int regno = *p++;	/* Get which register to match against.	 */
5096 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
5097 
5098 	    /* Can't back reference a group which we've never matched.	*/
5099 	    if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
5100 	      goto fail;
5101 
5102 	    /* Where in input to try to start matching.	 */
5103 	    d2 = regstart[regno];
5104 
5105 	    /* Remember the start point to rollback upon failure.  */
5106 	    dfail = d;
5107 
5108 	    /* Where to stop matching; if both the place to start and
5109 	       the place to stop matching are in the same string, then
5110 	       set to the place to stop, otherwise, for now have to use
5111 	       the end of the first string.  */
5112 
5113 	    dend2 = ((FIRST_STRING_P (regstart[regno])
5114 		      == FIRST_STRING_P (regend[regno]))
5115 		     ? regend[regno] : end_match_1);
5116 	    for (;;)
5117 	      {
5118 		/* If necessary, advance to next segment in register
5119 		   contents.  */
5120 		while (d2 == dend2)
5121 		  {
5122 		    if (dend2 == end_match_2) break;
5123 		    if (dend2 == regend[regno]) break;
5124 
5125 		    /* End of string1 => advance to string2. */
5126 		    d2 = string2;
5127 		    dend2 = regend[regno];
5128 		  }
5129 		/* At end of register contents => success */
5130 		if (d2 == dend2) break;
5131 
5132 		/* If necessary, advance to next segment in data.  */
5133 		PREFETCH ();
5134 
5135 		/* How many characters left in this segment to match.  */
5136 		mcnt = dend - d;
5137 
5138 		/* Want how many consecutive characters we can match in
5139 		   one shot, so, if necessary, adjust the count.  */
5140 		if (mcnt > dend2 - d2)
5141 		  mcnt = dend2 - d2;
5142 
5143 		/* Compare that many; failure if mismatch, else move
5144 		   past them.  */
5145 		if (RE_TRANSLATE_P (translate)
5146 		    ? bcmp_translate (d, d2, mcnt, translate, multibyte)
5147 		    : memcmp (d, d2, mcnt))
5148 		  {
5149 		    d = dfail;
5150 		    goto fail;
5151 		  }
5152 		d += mcnt, d2 += mcnt;
5153 	      }
5154 	  }
5155 	  break;
5156 
5157 
5158 	/* begline matches the empty string at the beginning of the string
5159 	   (unless `not_bol' is set in `bufp'), and after newlines.  */
5160 	case begline:
5161 	  DEBUG_PRINT1 ("EXECUTING begline.\n");
5162 
5163 	  if (AT_STRINGS_BEG (d))
5164 	    {
5165 	      if (!bufp->not_bol) break;
5166 	    }
5167 	  else
5168 	    {
5169 	      unsigned char c;
5170 	      GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
5171 	      if (c == '\n')
5172 		break;
5173 	    }
5174 	  /* In all other cases, we fail.  */
5175 	  goto fail;
5176 
5177 
5178 	/* endline is the dual of begline.  */
5179 	case endline:
5180 	  DEBUG_PRINT1 ("EXECUTING endline.\n");
5181 
5182 	  if (AT_STRINGS_END (d))
5183 	    {
5184 	      if (!bufp->not_eol) break;
5185 	    }
5186 	  else
5187 	    {
5188 	      PREFETCH_NOLIMIT ();
5189 	      if (*d == '\n')
5190 		break;
5191 	    }
5192 	  goto fail;
5193 
5194 
5195 	/* Match at the very beginning of the data.  */
5196 	case begbuf:
5197 	  DEBUG_PRINT1 ("EXECUTING begbuf.\n");
5198 	  if (AT_STRINGS_BEG (d))
5199 	    break;
5200 	  goto fail;
5201 
5202 
5203 	/* Match at the very end of the data.  */
5204 	case endbuf:
5205 	  DEBUG_PRINT1 ("EXECUTING endbuf.\n");
5206 	  if (AT_STRINGS_END (d))
5207 	    break;
5208 	  goto fail;
5209 
5210 
5211 	/* on_failure_keep_string_jump is used to optimize `.*\n'.  It
5212 	   pushes NULL as the value for the string on the stack.  Then
5213 	   `POP_FAILURE_POINT' will keep the current value for the
5214 	   string, instead of restoring it.  To see why, consider
5215 	   matching `foo\nbar' against `.*\n'.	The .* matches the foo;
5216 	   then the . fails against the \n.  But the next thing we want
5217 	   to do is match the \n against the \n; if we restored the
5218 	   string value, we would be back at the foo.
5219 
5220 	   Because this is used only in specific cases, we don't need to
5221 	   check all the things that `on_failure_jump' does, to make
5222 	   sure the right things get saved on the stack.  Hence we don't
5223 	   share its code.  The only reason to push anything on the
5224 	   stack at all is that otherwise we would have to change
5225 	   `anychar's code to do something besides goto fail in this
5226 	   case; that seems worse than this.  */
5227 	case on_failure_keep_string_jump:
5228 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5229 	  DEBUG_PRINT3 ("EXECUTING on_failure_keep_string_jump %d (to %p):\n",
5230 			mcnt, p + mcnt);
5231 
5232 	  PUSH_FAILURE_POINT (p - 3, NULL);
5233 	  break;
5234 
5235 	  /* A nasty loop is introduced by the non-greedy *? and +?.
5236 	     With such loops, the stack only ever contains one failure point
5237 	     at a time, so that a plain on_failure_jump_loop kind of
5238 	     cycle detection cannot work.  Worse yet, such a detection
5239 	     can not only fail to detect a cycle, but it can also wrongly
5240 	     detect a cycle (between different instantiations of the same
5241 	     loop.
5242 	     So the method used for those nasty loops is a little different:
5243 	     We use a special cycle-detection-stack-frame which is pushed
5244 	     when the on_failure_jump_nastyloop failure-point is *popped*.
5245 	     This special frame thus marks the beginning of one iteration
5246 	     through the loop and we can hence easily check right here
5247 	     whether something matched between the beginning and the end of
5248 	     the loop.  */
5249 	case on_failure_jump_nastyloop:
5250 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5251 	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_nastyloop %d (to %p):\n",
5252 			mcnt, p + mcnt);
5253 
5254 	  assert ((re_opcode_t)p[-4] == no_op);
5255 	  CHECK_INFINITE_LOOP (p - 4, d);
5256 	  PUSH_FAILURE_POINT (p - 3, d);
5257 	  break;
5258 
5259 
5260 	  /* Simple loop detecting on_failure_jump:  just check on the
5261 	     failure stack if the same spot was already hit earlier.  */
5262 	case on_failure_jump_loop:
5263 	on_failure:
5264 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5265 	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
5266 			mcnt, p + mcnt);
5267 
5268 	  CHECK_INFINITE_LOOP (p - 3, d);
5269 	  PUSH_FAILURE_POINT (p - 3, d);
5270 	  break;
5271 
5272 
5273 	/* Uses of on_failure_jump:
5274 
5275 	   Each alternative starts with an on_failure_jump that points
5276 	   to the beginning of the next alternative.  Each alternative
5277 	   except the last ends with a jump that in effect jumps past
5278 	   the rest of the alternatives.  (They really jump to the
5279 	   ending jump of the following alternative, because tensioning
5280 	   these jumps is a hassle.)
5281 
5282 	   Repeats start with an on_failure_jump that points past both
5283 	   the repetition text and either the following jump or
5284 	   pop_failure_jump back to this on_failure_jump.  */
5285 	case on_failure_jump:
5286 	  IMMEDIATE_QUIT_CHECK;
5287 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5288 	  DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
5289 			mcnt, p + mcnt);
5290 
5291 	  PUSH_FAILURE_POINT (p -3, d);
5292 	  break;
5293 
5294 	/* This operation is used for greedy *.
5295 	   Compare the beginning of the repeat with what in the
5296 	   pattern follows its end. If we can establish that there
5297 	   is nothing that they would both match, i.e., that we
5298 	   would have to backtrack because of (as in, e.g., `a*a')
5299 	   then we can use a non-backtracking loop based on
5300 	   on_failure_keep_string_jump instead of on_failure_jump.  */
5301 	case on_failure_jump_smart:
5302 	  IMMEDIATE_QUIT_CHECK;
5303 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
5304 	  DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
5305 			mcnt, p + mcnt);
5306 	  {
5307 	    re_char *p1 = p; /* Next operation.  */
5308 	    /* Here, we discard `const', making re_match non-reentrant.  */
5309 	    unsigned char *p2 = (unsigned char*) p + mcnt; /* Jump dest.  */
5310 	    unsigned char *p3 = (unsigned char*) p - 3; /* opcode location.  */
5311 
5312 	    p -= 3;		/* Reset so that we will re-execute the
5313 				   instruction once it's been changed. */
5314 
5315 	    EXTRACT_NUMBER (mcnt, p2 - 2);
5316 
5317 	    /* Ensure this is a indeed the trivial kind of loop
5318 	       we are expecting.  */
5319 	    assert (skip_one_char (p1) == p2 - 3);
5320 	    assert ((re_opcode_t) p2[-3] == jump && p2 + mcnt == p);
5321 	    DEBUG_STATEMENT (debug += 2);
5322 	    if (mutually_exclusive_p (bufp, p1, p2))
5323 	      {
5324 		/* Use a fast `on_failure_keep_string_jump' loop.  */
5325 		DEBUG_PRINT1 ("  smart exclusive => fast loop.\n");
5326 		*p3 = (unsigned char) on_failure_keep_string_jump;
5327 		STORE_NUMBER (p2 - 2, mcnt + 3);
5328 	      }
5329 	    else
5330 	      {
5331 		/* Default to a safe `on_failure_jump' loop.  */
5332 		DEBUG_PRINT1 ("  smart default => slow loop.\n");
5333 		*p3 = (unsigned char) on_failure_jump;
5334 	      }
5335 	    DEBUG_STATEMENT (debug -= 2);
5336 	  }
5337 	  break;
5338 
5339 	/* Unconditionally jump (without popping any failure points).  */
5340 	case jump:
5341 	unconditional_jump:
5342 	  IMMEDIATE_QUIT_CHECK;
5343 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
5344 	  DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5345 	  p += mcnt;				/* Do the jump.	 */
5346 	  DEBUG_PRINT2 ("(to %p).\n", p);
5347 	  break;
5348 
5349 
5350 	/* Have to succeed matching what follows at least n times.
5351 	   After that, handle like `on_failure_jump'.  */
5352 	case succeed_n:
5353 	  /* Signedness doesn't matter since we only compare MCNT to 0.  */
5354 	  EXTRACT_NUMBER (mcnt, p + 2);
5355 	  DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5356 
5357 	  /* Originally, mcnt is how many times we HAVE to succeed.  */
5358 	  if (mcnt != 0)
5359 	    {
5360 	      /* Here, we discard `const', making re_match non-reentrant.  */
5361 	      unsigned char *p2 = (unsigned char*) p + 2; /* counter loc.  */
5362 	      mcnt--;
5363 	      p += 4;
5364 	      PUSH_NUMBER (p2, mcnt);
5365 	    }
5366 	  else
5367 	    /* The two bytes encoding mcnt == 0 are two no_op opcodes.  */
5368 	    goto on_failure;
5369 	  break;
5370 
5371 	case jump_n:
5372 	  /* Signedness doesn't matter since we only compare MCNT to 0.  */
5373 	  EXTRACT_NUMBER (mcnt, p + 2);
5374 	  DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5375 
5376 	  /* Originally, this is how many times we CAN jump.  */
5377 	  if (mcnt != 0)
5378 	    {
5379 	       /* Here, we discard `const', making re_match non-reentrant.  */
5380 	      unsigned char *p2 = (unsigned char*) p + 2; /* counter loc.  */
5381 	      mcnt--;
5382 	      PUSH_NUMBER (p2, mcnt);
5383 	      goto unconditional_jump;
5384 	    }
5385 	  /* If don't have to jump any more, skip over the rest of command.  */
5386 	  else
5387 	    p += 4;
5388 	  break;
5389 
5390 	case set_number_at:
5391 	  {
5392 	    unsigned char *p2;	/* Location of the counter.  */
5393 	    DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5394 
5395 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
5396 	    /* Here, we discard `const', making re_match non-reentrant.  */
5397 	    p2 = (unsigned char*) p + mcnt;
5398 	    /* Signedness doesn't matter since we only copy MCNT's bits .  */
5399 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
5400 	    DEBUG_PRINT3 ("  Setting %p to %d.\n", p2, mcnt);
5401 	    PUSH_NUMBER (p2, mcnt);
5402 	    break;
5403 	  }
5404 
5405 	case wordbound:
5406 	case notwordbound:
5407 	  not = (re_opcode_t) *(p - 1) == notwordbound;
5408 	  DEBUG_PRINT2 ("EXECUTING %swordbound.\n", not?"not":"");
5409 
5410 	  /* We SUCCEED (or FAIL) in one of the following cases: */
5411 
5412 	  /* Case 1: D is at the beginning or the end of string.  */
5413 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5414 	    not = !not;
5415 	  else
5416 	    {
5417 	      /* C1 is the character before D, S1 is the syntax of C1, C2
5418 		 is the character at D, and S2 is the syntax of C2.  */
5419 	      re_wchar_t c1, c2;
5420 	      int s1, s2;
5421 #ifdef emacs
5422 	      int offset = PTR_TO_OFFSET (d - 1);
5423 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5424 	      UPDATE_SYNTAX_TABLE (charpos);
5425 #endif
5426 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5427 	      s1 = SYNTAX (c1);
5428 #ifdef emacs
5429 	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
5430 #endif
5431 	      PREFETCH_NOLIMIT ();
5432 	      c2 = RE_STRING_CHAR (d, dend - d);
5433 	      s2 = SYNTAX (c2);
5434 
5435 	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
5436 		  ((s1 == Sword) != (s2 == Sword))
5437 		  /* Case 3: Both of S1 and S2 are Sword, and macro
5438 		     WORD_BOUNDARY_P (C1, C2) returns nonzero.	*/
5439 		  || ((s1 == Sword) && WORD_BOUNDARY_P (c1, c2)))
5440 		not = !not;
5441 	    }
5442 	  if (not)
5443 	    break;
5444 	  else
5445 	    goto fail;
5446 
5447 	case wordbeg:
5448 	  DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5449 
5450 	  /* We FAIL in one of the following cases: */
5451 
5452 	  /* Case 1: D is at the end of string.	 */
5453 	  if (AT_STRINGS_END (d))
5454 	    goto fail;
5455 	  else
5456 	    {
5457 	      /* C1 is the character before D, S1 is the syntax of C1, C2
5458 		 is the character at D, and S2 is the syntax of C2.  */
5459 	      re_wchar_t c1, c2;
5460 	      int s1, s2;
5461 #ifdef emacs
5462 	      int offset = PTR_TO_OFFSET (d);
5463 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5464 	      UPDATE_SYNTAX_TABLE (charpos);
5465 #endif
5466 	      PREFETCH ();
5467 	      c2 = RE_STRING_CHAR (d, dend - d);
5468 	      s2 = SYNTAX (c2);
5469 
5470 	      /* Case 2: S2 is not Sword. */
5471 	      if (s2 != Sword)
5472 		goto fail;
5473 
5474 	      /* Case 3: D is not at the beginning of string ... */
5475 	      if (!AT_STRINGS_BEG (d))
5476 		{
5477 		  GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5478 #ifdef emacs
5479 		  UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
5480 #endif
5481 		  s1 = SYNTAX (c1);
5482 
5483 		  /* ... and S1 is Sword, and WORD_BOUNDARY_P (C1, C2)
5484 		     returns 0.	 */
5485 		  if ((s1 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5486 		    goto fail;
5487 		}
5488 	    }
5489 	  break;
5490 
5491 	case wordend:
5492 	  DEBUG_PRINT1 ("EXECUTING wordend.\n");
5493 
5494 	  /* We FAIL in one of the following cases: */
5495 
5496 	  /* Case 1: D is at the beginning of string.  */
5497 	  if (AT_STRINGS_BEG (d))
5498 	    goto fail;
5499 	  else
5500 	    {
5501 	      /* C1 is the character before D, S1 is the syntax of C1, C2
5502 		 is the character at D, and S2 is the syntax of C2.  */
5503 	      re_wchar_t c1, c2;
5504 	      int s1, s2;
5505 #ifdef emacs
5506 	      int offset = PTR_TO_OFFSET (d) - 1;
5507 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5508 	      UPDATE_SYNTAX_TABLE (charpos);
5509 #endif
5510 	      GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
5511 	      s1 = SYNTAX (c1);
5512 
5513 	      /* Case 2: S1 is not Sword.  */
5514 	      if (s1 != Sword)
5515 		goto fail;
5516 
5517 	      /* Case 3: D is not at the end of string ... */
5518 	      if (!AT_STRINGS_END (d))
5519 		{
5520 		  PREFETCH_NOLIMIT ();
5521 		  c2 = RE_STRING_CHAR (d, dend - d);
5522 #ifdef emacs
5523 		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
5524 #endif
5525 		  s2 = SYNTAX (c2);
5526 
5527 		  /* ... and S2 is Sword, and WORD_BOUNDARY_P (C1, C2)
5528 		     returns 0.	 */
5529 		  if ((s2 == Sword) && !WORD_BOUNDARY_P (c1, c2))
5530 	  goto fail;
5531 		}
5532 	    }
5533 	  break;
5534 
5535 	case syntaxspec:
5536 	case notsyntaxspec:
5537 	  not = (re_opcode_t) *(p - 1) == notsyntaxspec;
5538 	  mcnt = *p++;
5539 	  DEBUG_PRINT3 ("EXECUTING %ssyntaxspec %d.\n", not?"not":"", mcnt);
5540 	  PREFETCH ();
5541 #ifdef emacs
5542 	  {
5543 	    int offset = PTR_TO_OFFSET (d);
5544 	    int pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
5545 	    UPDATE_SYNTAX_TABLE (pos1);
5546 	  }
5547 #endif
5548 	  {
5549 	    int len;
5550 	    re_wchar_t c;
5551 
5552 	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
5553 
5554 	    if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
5555 	      goto fail;
5556 	    d += len;
5557 	  }
5558 	  break;
5559 
5560 #ifdef emacs
5561 	case before_dot:
5562 	  DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5563 	  if (PTR_BYTE_POS (d) >= PT_BYTE)
5564 	    goto fail;
5565 	  break;
5566 
5567 	case at_dot:
5568 	  DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5569 	  if (PTR_BYTE_POS (d) != PT_BYTE)
5570 	    goto fail;
5571 	  break;
5572 
5573 	case after_dot:
5574 	  DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5575 	  if (PTR_BYTE_POS (d) <= PT_BYTE)
5576 	    goto fail;
5577 	  break;
5578 
5579 	case categoryspec:
5580 	case notcategoryspec:
5581 	  not = (re_opcode_t) *(p - 1) == notcategoryspec;
5582 	  mcnt = *p++;
5583 	  DEBUG_PRINT3 ("EXECUTING %scategoryspec %d.\n", not?"not":"", mcnt);
5584 	  PREFETCH ();
5585 	  {
5586 	    int len;
5587 	    re_wchar_t c;
5588 
5589 	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
5590 
5591 	    if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
5592 	      goto fail;
5593 	    d += len;
5594 	  }
5595 	  break;
5596 
5597 #endif /* emacs */
5598 
5599 	default:
5600 	  abort ();
5601 	}
5602       continue;  /* Successfully executed one pattern command; keep going.  */
5603 
5604 
5605     /* We goto here if a matching operation fails. */
5606     fail:
5607       IMMEDIATE_QUIT_CHECK;
5608       if (!FAIL_STACK_EMPTY ())
5609 	{
5610 	  re_char *str, *pat;
5611 	  /* A restart point is known.  Restore to that state.  */
5612 	  DEBUG_PRINT1 ("\nFAIL:\n");
5613 	  POP_FAILURE_POINT (str, pat);
5614 	  switch (SWITCH_ENUM_CAST ((re_opcode_t) *pat++))
5615 	    {
5616 	    case on_failure_keep_string_jump:
5617 	      assert (str == NULL);
5618 	      goto continue_failure_jump;
5619 
5620 	    case on_failure_jump_nastyloop:
5621 	      assert ((re_opcode_t)pat[-2] == no_op);
5622 	      PUSH_FAILURE_POINT (pat - 2, str);
5623 	      /* Fallthrough */
5624 
5625 	    case on_failure_jump_loop:
5626 	    case on_failure_jump:
5627 	    case succeed_n:
5628 	      d = str;
5629 	    continue_failure_jump:
5630 	      EXTRACT_NUMBER_AND_INCR (mcnt, pat);
5631 	      p = pat + mcnt;
5632 	      break;
5633 
5634 	    case no_op:
5635 	      /* A special frame used for nastyloops. */
5636 	      goto fail;
5637 
5638 	    default:
5639 	      abort();
5640 	    }
5641 
5642 	  assert (p >= bufp->buffer && p <= pend);
5643 
5644 	  if (d >= string1 && d <= end1)
5645 	    dend = end_match_1;
5646 	}
5647       else
5648 	break;   /* Matching at this starting point really fails.  */
5649     } /* for (;;) */
5650 
5651   if (best_regs_set)
5652     goto restore_best_regs;
5653 
5654   FREE_VARIABLES ();
5655 
5656   return -1;         			/* Failure to match.  */
5657 } /* re_match_2 */
5658 
5659 /* Subroutine definitions for re_match_2.  */
5660 
5661 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5662    bytes; nonzero otherwise.  */
5663 
5664 static int
bcmp_translate(s1,s2,len,translate,multibyte)5665 bcmp_translate (s1, s2, len, translate, multibyte)
5666      re_char *s1, *s2;
5667      register int len;
5668      RE_TRANSLATE_TYPE translate;
5669      const int multibyte;
5670 {
5671   register re_char *p1 = s1, *p2 = s2;
5672   re_char *p1_end = s1 + len;
5673   re_char *p2_end = s2 + len;
5674 
5675   /* FIXME: Checking both p1 and p2 presumes that the two strings might have
5676      different lengths, but relying on a single `len' would break this. -sm  */
5677   while (p1 < p1_end && p2 < p2_end)
5678     {
5679       int p1_charlen, p2_charlen;
5680       re_wchar_t p1_ch, p2_ch;
5681 
5682       p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
5683       p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
5684 
5685       if (RE_TRANSLATE (translate, p1_ch)
5686 	  != RE_TRANSLATE (translate, p2_ch))
5687 	return 1;
5688 
5689       p1 += p1_charlen, p2 += p2_charlen;
5690     }
5691 
5692   if (p1 != p1_end || p2 != p2_end)
5693     return 1;
5694 
5695   return 0;
5696 }
5697 
5698 /* Entry points for GNU code.  */
5699 
5700 /* re_compile_pattern is the GNU regular expression compiler: it
5701    compiles PATTERN (of length SIZE) and puts the result in BUFP.
5702    Returns 0 if the pattern was valid, otherwise an error string.
5703 
5704    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5705    are set in BUFP on entry.
5706 
5707    We call regex_compile to do the actual compilation.  */
5708 
5709 const char *
re_compile_pattern(pattern,length,bufp)5710 re_compile_pattern (pattern, length, bufp)
5711      const char *pattern;
5712      size_t length;
5713      struct re_pattern_buffer *bufp;
5714 {
5715   reg_errcode_t ret;
5716 
5717   /* GNU code is written to assume at least RE_NREGS registers will be set
5718      (and at least one extra will be -1).  */
5719   bufp->regs_allocated = REGS_UNALLOCATED;
5720 
5721   /* And GNU code determines whether or not to get register information
5722      by passing null for the REGS argument to re_match, etc., not by
5723      setting no_sub.  */
5724   bufp->no_sub = 0;
5725 
5726   ret = regex_compile ((re_char*) pattern, length, re_syntax_options, bufp);
5727 
5728   if (!ret)
5729     return NULL;
5730   return gettext (re_error_msgid[(int) ret]);
5731 }
5732 WEAK_ALIAS (__re_compile_pattern, re_compile_pattern)
5733 
5734 /* Entry points compatible with 4.2 BSD regex library.  We don't define
5735    them unless specifically requested.  */
5736 
5737 #if defined _REGEX_RE_COMP || defined _LIBC
5738 
5739 /* BSD has one and only one pattern buffer.  */
5740 static struct re_pattern_buffer re_comp_buf;
5741 
5742 char *
5743 # ifdef _LIBC
5744 /* Make these definitions weak in libc, so POSIX programs can redefine
5745    these names if they don't use our functions, and still use
5746    regcomp/regexec below without link errors.  */
5747 weak_function
5748 # endif
re_comp(s)5749 re_comp (s)
5750     const char *s;
5751 {
5752   reg_errcode_t ret;
5753 
5754   if (!s)
5755     {
5756       if (!re_comp_buf.buffer)
5757 	/* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5758 	return (char *) gettext ("No previous regular expression");
5759       return 0;
5760     }
5761 
5762   if (!re_comp_buf.buffer)
5763     {
5764       re_comp_buf.buffer = (unsigned char *) malloc (200);
5765       if (re_comp_buf.buffer == NULL)
5766 	/* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5767 	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5768       re_comp_buf.allocated = 200;
5769 
5770       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5771       if (re_comp_buf.fastmap == NULL)
5772 	/* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5773 	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5774     }
5775 
5776   /* Since `re_exec' always passes NULL for the `regs' argument, we
5777      don't need to initialize the pattern buffer fields which affect it.  */
5778 
5779   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5780 
5781   if (!ret)
5782     return NULL;
5783 
5784   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5785   return (char *) gettext (re_error_msgid[(int) ret]);
5786 }
5787 
5788 
5789 int
5790 # ifdef _LIBC
5791 weak_function
5792 # endif
re_exec(s)5793 re_exec (s)
5794     const char *s;
5795 {
5796   const int len = strlen (s);
5797   return
5798     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5799 }
5800 #endif /* _REGEX_RE_COMP */
5801 
5802 /* POSIX.2 functions.  Don't define these for Emacs.  */
5803 
5804 #ifndef emacs
5805 
5806 /* regcomp takes a regular expression as a string and compiles it.
5807 
5808    PREG is a regex_t *.  We do not expect any fields to be initialized,
5809    since POSIX says we shouldn't.  Thus, we set
5810 
5811      `buffer' to the compiled pattern;
5812      `used' to the length of the compiled pattern;
5813      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5814        REG_EXTENDED bit in CFLAGS is set; otherwise, to
5815        RE_SYNTAX_POSIX_BASIC;
5816      `fastmap' to an allocated space for the fastmap;
5817      `fastmap_accurate' to zero;
5818      `re_nsub' to the number of subexpressions in PATTERN.
5819 
5820    PATTERN is the address of the pattern string.
5821 
5822    CFLAGS is a series of bits which affect compilation.
5823 
5824      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5825      use POSIX basic syntax.
5826 
5827      If REG_NEWLINE is set, then . and [^...] don't match newline.
5828      Also, regexec will try a match beginning after every newline.
5829 
5830      If REG_ICASE is set, then we considers upper- and lowercase
5831      versions of letters to be equivalent when matching.
5832 
5833      If REG_NOSUB is set, then when PREG is passed to regexec, that
5834      routine will report only success or failure, and nothing about the
5835      registers.
5836 
5837    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
5838    the return codes and their meanings.)  */
5839 
5840 int
regcomp(preg,pattern,cflags)5841 regcomp (preg, pattern, cflags)
5842     regex_t *__restrict preg;
5843     const char *__restrict pattern;
5844     int cflags;
5845 {
5846   reg_errcode_t ret;
5847   reg_syntax_t syntax
5848     = (cflags & REG_EXTENDED) ?
5849       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5850 
5851   /* regex_compile will allocate the space for the compiled pattern.  */
5852   preg->buffer = 0;
5853   preg->allocated = 0;
5854   preg->used = 0;
5855 
5856   /* Try to allocate space for the fastmap.  */
5857   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
5858 
5859   if (cflags & REG_ICASE)
5860     {
5861       unsigned i;
5862 
5863       preg->translate
5864 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5865 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5866       if (preg->translate == NULL)
5867 	return (int) REG_ESPACE;
5868 
5869       /* Map uppercase characters to corresponding lowercase ones.  */
5870       for (i = 0; i < CHAR_SET_SIZE; i++)
5871 	preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
5872     }
5873   else
5874     preg->translate = NULL;
5875 
5876   /* If REG_NEWLINE is set, newlines are treated differently.  */
5877   if (cflags & REG_NEWLINE)
5878     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5879       syntax &= ~RE_DOT_NEWLINE;
5880       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5881     }
5882   else
5883     syntax |= RE_NO_NEWLINE_ANCHOR;
5884 
5885   preg->no_sub = !!(cflags & REG_NOSUB);
5886 
5887   /* POSIX says a null character in the pattern terminates it, so we
5888      can use strlen here in compiling the pattern.  */
5889   ret = regex_compile ((re_char*) pattern, strlen (pattern), syntax, preg);
5890 
5891   /* POSIX doesn't distinguish between an unmatched open-group and an
5892      unmatched close-group: both are REG_EPAREN.  */
5893   if (ret == REG_ERPAREN)
5894     ret = REG_EPAREN;
5895 
5896   if (ret == REG_NOERROR && preg->fastmap)
5897     { /* Compute the fastmap now, since regexec cannot modify the pattern
5898 	 buffer.  */
5899       re_compile_fastmap (preg);
5900       if (preg->can_be_null)
5901 	{ /* The fastmap can't be used anyway.  */
5902 	  free (preg->fastmap);
5903 	  preg->fastmap = NULL;
5904 	}
5905     }
5906   return (int) ret;
5907 }
5908 WEAK_ALIAS (__regcomp, regcomp)
5909 
5910 
5911 /* regexec searches for a given pattern, specified by PREG, in the
5912    string STRING.
5913 
5914    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5915    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5916    least NMATCH elements, and we set them to the offsets of the
5917    corresponding matched substrings.
5918 
5919    EFLAGS specifies `execution flags' which affect matching: if
5920    REG_NOTBOL is set, then ^ does not match at the beginning of the
5921    string; if REG_NOTEOL is set, then $ does not match at the end.
5922 
5923    We return 0 if we find a match and REG_NOMATCH if not.  */
5924 
5925 int
5926 regexec (preg, string, nmatch, pmatch, eflags)
5927     const regex_t *__restrict preg;
5928     const char *__restrict string;
5929     size_t nmatch;
5930     regmatch_t pmatch[];
5931     int eflags;
5932 {
5933   int ret;
5934   struct re_registers regs;
5935   regex_t private_preg;
5936   int len = strlen (string);
5937   boolean want_reg_info = !preg->no_sub && nmatch > 0 && pmatch;
5938 
5939   private_preg = *preg;
5940 
5941   private_preg.not_bol = !!(eflags & REG_NOTBOL);
5942   private_preg.not_eol = !!(eflags & REG_NOTEOL);
5943 
5944   /* The user has told us exactly how many registers to return
5945      information about, via `nmatch'.  We have to pass that on to the
5946      matching routines.  */
5947   private_preg.regs_allocated = REGS_FIXED;
5948 
5949   if (want_reg_info)
5950     {
5951       regs.num_regs = nmatch;
5952       regs.start = TALLOC (nmatch * 2, regoff_t);
5953       if (regs.start == NULL)
5954 	return (int) REG_NOMATCH;
5955       regs.end = regs.start + nmatch;
5956     }
5957 
5958   /* Instead of using not_eol to implement REG_NOTEOL, we could simply
5959      pass (&private_preg, string, len + 1, 0, len, ...) pretending the string
5960      was a little bit longer but still only matching the real part.
5961      This works because the `endline' will check for a '\n' and will find a
5962      '\0', correctly deciding that this is not the end of a line.
5963      But it doesn't work out so nicely for REG_NOTBOL, since we don't have
5964      a convenient '\0' there.  For all we know, the string could be preceded
5965      by '\n' which would throw things off.  */
5966 
5967   /* Perform the searching operation.  */
5968   ret = re_search (&private_preg, string, len,
5969 		   /* start: */ 0, /* range: */ len,
5970 		   want_reg_info ? &regs : (struct re_registers *) 0);
5971 
5972   /* Copy the register information to the POSIX structure.  */
5973   if (want_reg_info)
5974     {
5975       if (ret >= 0)
5976 	{
5977 	  unsigned r;
5978 
5979 	  for (r = 0; r < nmatch; r++)
5980 	    {
5981 	      pmatch[r].rm_so = regs.start[r];
5982 	      pmatch[r].rm_eo = regs.end[r];
5983 	    }
5984 	}
5985 
5986       /* If we needed the temporary register info, free the space now.  */
5987       free (regs.start);
5988     }
5989 
5990   /* We want zero return to mean success, unlike `re_search'.  */
5991   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5992 }
5993 WEAK_ALIAS (__regexec, regexec)
5994 
5995 
5996 /* Returns a message corresponding to an error code, ERRCODE, returned
5997    from either regcomp or regexec.   We don't use PREG here.  */
5998 
5999 size_t
6000 regerror (errcode, preg, errbuf, errbuf_size)
6001     int errcode;
6002     const regex_t *preg;
6003     char *errbuf;
6004     size_t errbuf_size;
6005 {
6006   const char *msg;
6007   size_t msg_size;
6008 
6009   if (errcode < 0
6010       || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
6011     /* Only error codes returned by the rest of the code should be passed
6012        to this routine.  If we are given anything else, or if other regex
6013        code generates an invalid error code, then the program has a bug.
6014        Dump core so we can fix it.  */
6015     abort ();
6016 
6017   msg = gettext (re_error_msgid[errcode]);
6018 
6019   msg_size = strlen (msg) + 1; /* Includes the null.  */
6020 
6021   if (errbuf_size != 0)
6022     {
6023       if (msg_size > errbuf_size)
6024 	{
6025 	  strncpy (errbuf, msg, errbuf_size - 1);
6026 	  errbuf[errbuf_size - 1] = 0;
6027 	}
6028       else
6029 	strcpy (errbuf, msg);
6030     }
6031 
6032   return msg_size;
6033 }
6034 WEAK_ALIAS (__regerror, regerror)
6035 
6036 
6037 /* Free dynamically allocated space used by PREG.  */
6038 
6039 void
6040 regfree (preg)
6041     regex_t *preg;
6042 {
6043   if (preg->buffer != NULL)
6044     free (preg->buffer);
6045   preg->buffer = NULL;
6046 
6047   preg->allocated = 0;
6048   preg->used = 0;
6049 
6050   if (preg->fastmap != NULL)
6051     free (preg->fastmap);
6052   preg->fastmap = NULL;
6053   preg->fastmap_accurate = 0;
6054 
6055   if (preg->translate != NULL)
6056     free (preg->translate);
6057   preg->translate = NULL;
6058 }
6059 WEAK_ALIAS (__regfree, regfree)
6060 
6061 #endif /* not emacs  */
6062