1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P1003.2/D11.2, except for some of the
4    internationalization features.)
5 
6    Copyright (C) 1993,1994,1995,1996,1997,1999,2000 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software Foundation,
20    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
21 
22 /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
23    Last change: Dec 1, 2000 by okabe katsuyuki */
24 
25 /* AIX requires this to be the first thing in the file. */
26 #if defined (_AIX) && !defined (REGEX_MALLOC)
27   #pragma alloca
28 #endif
29 
30 #undef	_GNU_SOURCE
31 #define _GNU_SOURCE
32 
33 #ifdef HAVE_CONFIG_H
34 #include <config.h>
35 #endif
36 
37 #if defined(STDC_HEADERS) && !defined(emacs)
38 #include <stddef.h>
39 #else
40 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
41 #include <sys/types.h>
42 #endif
43 
44 /* For platform which support the ISO C amendement 1 functionality we
45    support user defined character classes.  */
46 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
47 # include <wctype.h>
48 # include <wchar.h>
49 #endif
50 
51 /* This is for other GNU distributions with internationalized messages.  */
52 #if HAVE_LIBINTL_H || defined (_LIBC)
53 # include <libintl.h>
54 #else
55 # define gettext(msgid) (msgid)
56 #endif
57 
58 #ifndef gettext_noop
59 /* This define is so xgettext can find the internationalizable
60    strings.  */
61 #define gettext_noop(String) String
62 #endif
63 
64 /* The `emacs' switch turns on certain matching commands
65    that make sense only in Emacs. */
66 #ifdef emacs
67 
68 #include "lisp.h"
69 #include "buffer.h"
70 #include "syntax.h"
71 
72 #else  /* not emacs */
73 
74 /* If we are not linking with Emacs proper,
75    we can't use the relocating allocator
76    even if config.h says that we can.  */
77 #undef REL_ALLOC
78 
79 #if defined (STDC_HEADERS) || defined (_LIBC)
80 #include <stdlib.h>
81 #else
82 char *malloc ();
83 char *realloc ();
84 #endif
85 
86 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
87    If nothing else has been done, use the method below.  */
88 #ifdef INHIBIT_STRING_HEADER
89 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
90 #if !defined (bzero) && !defined (bcopy)
91 #undef INHIBIT_STRING_HEADER
92 #endif
93 #endif
94 #endif
95 
96 /* This is the normal way of making sure we have a bcopy and a bzero.
97    This is used in most programs--a few other programs avoid this
98    by defining INHIBIT_STRING_HEADER.  */
99 #ifndef INHIBIT_STRING_HEADER
100 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
101 #include <string.h>
102 #ifndef bcmp
103 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
104 #endif
105 #ifndef bcopy
106 #define bcopy(s, d, n)	memmove ((d), (s), (n))
107 #endif
108 #ifndef bzero
109 #define bzero(s, n)	memset ((s), 0, (n))
110 #endif
111 #else
112 #include <strings.h>
113 #endif
114 #endif
115 
116 /* Define the syntax stuff for \<, \>, etc.  */
117 
118 /* This must be nonzero for the wordchar and notwordchar pattern
119    commands in re_match_2.  */
120 #ifndef Sword
121 #define Sword 1
122 #endif
123 
124 #ifdef SWITCH_ENUM_BUG
125 #define SWITCH_ENUM_CAST(x) ((int)(x))
126 #else
127 #define SWITCH_ENUM_CAST(x) (x)
128 #endif
129 
130 #endif /* not emacs */
131 
132 /* Get the interface, including the syntax bits.  */
133 #include "regex.h"
134 #include "mbc.h"
135 
136 /* isalpha etc. are used for the character classes.  */
137 #include <ctype.h>
138 
139 /* Jim Meyering writes:
140 
141    "... Some ctype macros are valid only for character codes that
142    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
143    using /bin/cc or gcc but without giving an ansi option).  So, all
144    ctype uses should be through macros like ISPRINT...  If
145    STDC_HEADERS is defined, then autoconf has verified that the ctype
146    macros don't need to be guarded with references to isascii. ...
147    Defining isascii to 1 should let any compiler worth its salt
148    eliminate the && through constant folding."  */
149 
150 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
151 #define ISASCII(c) 1
152 #else
153 #define ISASCII(c) isascii(c)
154 #endif
155 
156 #ifdef isblank
157 #define ISBLANK(c) (ISASCII (c) && isblank (c))
158 #else
159 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
160 #endif
161 #ifdef isgraph
162 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
163 #else
164 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
165 #endif
166 
167 #define ISPRINT(c) (ISASCII (c) && isprint (c))
168 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
169 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
170 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
171 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
172 #define ISLOWER(c) (ISASCII (c) && islower (c))
173 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
174 #define ISSPACE(c) (ISASCII (c) && isspace (c))
175 #define ISUPPER(c) (ISASCII (c) && isupper (c))
176 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
177 
178 #ifndef NULL
179 #define NULL (void *)0
180 #endif
181 
182 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
183    since ours (we hope) works properly with all combinations of
184    machines, compilers, `char' and `unsigned char' argument types.
185    (Per Bothner suggested the basic approach.)  */
186 #undef SIGN_EXTEND_CHAR
187 #if __STDC__  || defined(MSDOS)
188 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
189 #else  /* not __STDC__ */
190 /* As in Harbison and Steele.  */
191 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
192 #endif
193 
194 #ifndef emacs
195 /* How many characters in the character set.  */
196 #define CHAR_SET_SIZE 256
197 
198 #ifdef SYNTAX_TABLE
199 
200 extern char *re_syntax_table;
201 
202 #else /* not SYNTAX_TABLE */
203 
204 static char re_syntax_table[CHAR_SET_SIZE];
205 
206 static void
init_syntax_once()207 init_syntax_once ()
208 {
209    register int c;
210    static int done = 0;
211 
212    if (done)
213      return;
214    bzero (re_syntax_table, sizeof re_syntax_table);
215 
216    for (c = 0; c < CHAR_SET_SIZE; c++)
217      if (ISALNUM (c))
218 	re_syntax_table[c] = Sword;
219 
220    re_syntax_table['_'] = Sword;
221 
222    done = 1;
223 }
224 
225 #endif /* not SYNTAX_TABLE */
226 
227 #define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
228 
229 #endif /* emacs */
230 
231 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
232    use `alloca' instead of `malloc'.  This is because using malloc in
233    re_search* or re_match* could cause memory leaks when C-g is used in
234    Emacs; also, malloc is slower and causes storage fragmentation.  On
235    the other hand, malloc is more portable, and easier to debug.
236 
237    Because we sometimes use alloca, some routines have to be macros,
238    not functions -- `alloca'-allocated space disappears at the end of the
239    function it is called in.  */
240 
241 #ifdef REGEX_MALLOC
242 
243 #define REGEX_ALLOCATE malloc
244 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
245 #define REGEX_FREE free
246 
247 #else /* not REGEX_MALLOC  */
248 
249 /* Emacs already defines alloca, sometimes.  */
250 #ifndef alloca
251 
252 /* Make alloca work the best possible way.  */
253 #ifdef __GNUC__
254 #define alloca __builtin_alloca
255 #else /* not __GNUC__ */
256 #if HAVE_ALLOCA_H
257 #include <alloca.h>
258 #else /* not __GNUC__ or HAVE_ALLOCA_H */
259 #if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
260 #ifndef _AIX /* Already did AIX, up at the top.  */
261 #ifdef __TURBOC__
262 #include <malloc.h>
263 #else
264 char *alloca ();
265 #endif
266 #endif /* not _AIX */
267 #endif
268 #endif /* not HAVE_ALLOCA_H */
269 #endif /* not __GNUC__ */
270 
271 #endif /* not alloca */
272 
273 #define REGEX_ALLOCATE alloca
274 
275 /* Assumes a `char *destination' variable.  */
276 #define REGEX_REALLOCATE(source, osize, nsize)				\
277   (destination = (char *) alloca (nsize),				\
278    bcopy (source, destination, osize),					\
279    destination)
280 
281 /* No need to do anything to free, after alloca.  */
282 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
283 
284 #endif /* not REGEX_MALLOC */
285 
286 /* Define how to allocate the failure stack.  */
287 
288 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
289 
290 #define REGEX_ALLOCATE_STACK(size)				\
291   r_alloc (&failure_stack_ptr, (size))
292 #define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
293   r_re_alloc (&failure_stack_ptr, (nsize))
294 #define REGEX_FREE_STACK(ptr)					\
295   r_alloc_free (&failure_stack_ptr)
296 
297 #else /* not using relocating allocator */
298 
299 #ifdef REGEX_MALLOC
300 
301 #define REGEX_ALLOCATE_STACK malloc
302 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
303 #define REGEX_FREE_STACK free
304 
305 #else /* not REGEX_MALLOC */
306 
307 #define REGEX_ALLOCATE_STACK alloca
308 
309 #define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
310    REGEX_REALLOCATE (source, osize, nsize)
311 /* No need to explicitly free anything.  */
312 #define REGEX_FREE_STACK(arg)
313 
314 #endif /* not REGEX_MALLOC */
315 #endif /* not using relocating allocator */
316 
317 
318 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
319    `string1' or just past its end.  This works if PTR is NULL, which is
320    a good thing.  */
321 #define FIRST_STRING_P(ptr) 					\
322   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
323 
324 /* (Re)Allocate N items of type T using malloc, or fail.  */
325 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
326 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
327 #define RETALLOC_IF(addr, n, t) \
328   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
329 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
330 
331 #define BYTEWIDTH 8 /* In bits.  */
332 
333 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
334 
335 #undef MAX
336 #undef MIN
337 #define MAX(a, b) ((a) > (b) ? (a) : (b))
338 #define MIN(a, b) ((a) < (b) ? (a) : (b))
339 
340 typedef char boolean;
341 #define false 0
342 #define true 1
343 
344 static int re_match_2_internal ();
345 
346 /* These are the command codes that appear in compiled regular
347    expressions.  Some opcodes are followed by argument bytes.  A
348    command code can specify any interpretation whatsoever for its
349    arguments.  Zero bytes may appear in the compiled regular expression.  */
350 
351 typedef enum
352 {
353   no_op = 0,
354 
355   /* Succeed right away--no more backtracking.  */
356   succeed,
357 
358         /* Followed by one byte giving n, then by n literal bytes.  */
359   exactn,
360 
361         /* Matches any (more or less) character.  */
362   anychar,
363 
364         /* Matches any one char belonging to specified set.  First
365            following byte is number of bitmap bytes.  Then come bytes
366            for a bitmap saying which chars are in.  Bits in each byte
367            are ordered low-bit-first.  A character is in the set if its
368            bit is 1.  A character too large to have a bit in the map is
369            automatically not in the set.  */
370   charset,
371 
372         /* Same parameters as charset, but match any character that is
373            not one of those specified.  */
374   charset_not,
375 
376         /* Start remembering the text that is matched, for storing in a
377            register.  Followed by one byte with the register number, in
378            the range 0 to one less than the pattern buffer's re_nsub
379            field.  Then followed by one byte with the number of groups
380            inner to this one.  (This last has to be part of the
381            start_memory only because we need it in the on_failure_jump
382            of re_match_2.)  */
383   start_memory,
384 
385         /* Stop remembering the text that is matched and store it in a
386            memory register.  Followed by one byte with the register
387            number, in the range 0 to one less than `re_nsub' in the
388            pattern buffer, and one byte with the number of inner groups,
389            just like `start_memory'.  (We need the number of inner
390            groups here because we don't have any easy way of finding the
391            corresponding start_memory when we're at a stop_memory.)  */
392   stop_memory,
393 
394         /* Match a duplicate of something remembered. Followed by one
395            byte containing the register number.  */
396   duplicate,
397 
398         /* Fail unless at beginning of line.  */
399   begline,
400 
401         /* Fail unless at end of line.  */
402   endline,
403 
404         /* Succeeds if at beginning of buffer (if emacs) or at beginning
405            of string to be matched (if not).  */
406   begbuf,
407 
408         /* Analogously, for end of buffer/string.  */
409   endbuf,
410 
411         /* Followed by two byte relative address to which to jump.  */
412   jump,
413 
414 	/* Same as jump, but marks the end of an alternative.  */
415   jump_past_alt,
416 
417         /* Followed by two-byte relative address of place to resume at
418            in case of failure.  */
419   on_failure_jump,
420 
421         /* Like on_failure_jump, but pushes a placeholder instead of the
422            current string position when executed.  */
423   on_failure_keep_string_jump,
424 
425         /* Throw away latest failure point and then jump to following
426            two-byte relative address.  */
427   pop_failure_jump,
428 
429         /* Change to pop_failure_jump if know won't have to backtrack to
430            match; otherwise change to jump.  This is used to jump
431            back to the beginning of a repeat.  If what follows this jump
432            clearly won't match what the repeat does, such that we can be
433            sure that there is no use backtracking out of repetitions
434            already matched, then we change it to a pop_failure_jump.
435            Followed by two-byte address.  */
436   maybe_pop_jump,
437 
438         /* Jump to following two-byte address, and push a dummy failure
439            point. This failure point will be thrown away if an attempt
440            is made to use it for a failure.  A `+' construct makes this
441            before the first repeat.  Also used as an intermediary kind
442            of jump when compiling an alternative.  */
443   dummy_failure_jump,
444 
445 	/* Push a dummy failure point and continue.  Used at the end of
446 	   alternatives.  */
447   push_dummy_failure,
448 
449         /* Followed by two-byte relative address and two-byte number n.
450            After matching N times, jump to the address upon failure.  */
451   succeed_n,
452 
453         /* Followed by two-byte relative address, and two-byte number n.
454            Jump to the address N times, then fail.  */
455   jump_n,
456 
457         /* Set the following two-byte relative address to the
458            subsequent two-byte number.  The address *includes* the two
459            bytes of number.  */
460   set_number_at,
461 
462   wordchar,	/* Matches any word-constituent character.  */
463   notwordchar,	/* Matches any char that is not a word-constituent.  */
464 
465   wordbeg,	/* Succeeds if at word beginning.  */
466   wordend,	/* Succeeds if at word end.  */
467 
468   wordbound,	/* Succeeds if at a word boundary.  */
469   notwordbound	/* Succeeds if not at a word boundary.  */
470 
471 #ifdef emacs
472   ,before_dot,	/* Succeeds if before point.  */
473   at_dot,	/* Succeeds if at point.  */
474   after_dot,	/* Succeeds if after point.  */
475 
476 	/* Matches any character whose syntax is specified.  Followed by
477            a byte which contains a syntax code, e.g., Sword.  */
478   syntaxspec,
479 
480 	/* Matches any character whose syntax is not that specified.  */
481   notsyntaxspec
482 #endif /* emacs */
483 } re_opcode_t;
484 
485 /* Common operations on the compiled pattern.  */
486 
487 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
488 
489 #define STORE_NUMBER(destination, number)				\
490   do {									\
491     (destination)[0] = (number) & 0377;					\
492     (destination)[1] = (number) >> 8;					\
493   } while (0)
494 
495 /* Same as STORE_NUMBER, except increment DESTINATION to
496    the byte after where the number is stored.  Therefore, DESTINATION
497    must be an lvalue.  */
498 
499 #define STORE_NUMBER_AND_INCR(destination, number)			\
500   do {									\
501     STORE_NUMBER (destination, number);					\
502     (destination) += 2;							\
503   } while (0)
504 
505 /* Put into DESTINATION a number stored in two contiguous bytes starting
506    at SOURCE.  */
507 
508 #define EXTRACT_NUMBER(destination, source)				\
509   do {									\
510     (destination) = *(source) & 0377;					\
511     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
512   } while (0)
513 
514 #ifdef DEBUG
515 static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
516 static void
extract_number(dest,source)517 extract_number (dest, source)
518     int *dest;
519     unsigned char *source;
520 {
521   int temp = SIGN_EXTEND_CHAR (*(source + 1));
522   *dest = *source & 0377;
523   *dest += temp << 8;
524 }
525 
526 #ifndef EXTRACT_MACROS /* To debug the macros.  */
527 #undef EXTRACT_NUMBER
528 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
529 #endif /* not EXTRACT_MACROS */
530 
531 #endif /* DEBUG */
532 
533 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
534    SOURCE must be an lvalue.  */
535 
536 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
537   do {									\
538     EXTRACT_NUMBER (destination, source);				\
539     (source) += 2; 							\
540   } while (0)
541 
542 #ifdef DEBUG
543 static void extract_number_and_incr _RE_ARGS ((int *destination,
544 					       unsigned char **source));
545 static void
extract_number_and_incr(destination,source)546 extract_number_and_incr (destination, source)
547     int *destination;
548     unsigned char **source;
549 {
550   extract_number (destination, *source);
551   *source += 2;
552 }
553 
554 #ifndef EXTRACT_MACROS
555 #undef EXTRACT_NUMBER_AND_INCR
556 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
557   extract_number_and_incr (&dest, &src)
558 #endif /* not EXTRACT_MACROS */
559 
560 #endif /* DEBUG */
561 
562 #define STORE_MBC(p, wc) do { \
563   unsigned long _wc = (wc); \
564   (p)[3] = (unsigned char) _wc; \
565   (p)[2] = (unsigned char) (_wc >>= 8);	\
566   (p)[1] = (unsigned char) (_wc >>= 8);	\
567   (p)[0] = (unsigned char) (_wc >> 8); \
568 } while (0)
569 #define STORE_MBC_AND_INCR(p, wc) do { \
570   STORE_MBC((p), (wc));	\
571   (p) += 4; \
572 } while (0)
573 
574 #define EXTRACT_MBC(p) \
575   ((((mbchar_t) (unsigned char) (p)[0]) << 24) \
576    | (((mbchar_t) (unsigned char) (p)[1]) << 16) \
577    | (((mbchar_t) (unsigned char) (p)[2]) << 8)	\
578    | ((mbchar_t) (unsigned char) (p)[3]))
579 #define EXTRACT_MBC_AND_INCR(p) ((p) += 4, EXTRACT_MBC(p - 4))
580 
581 #define EXTRACT_UNSIGNED(p) \
582   ((unsigned char) (p)[0] | (unsigned char) (p)[1] << 8)
583 #define EXTRACT_UNSIGNED_AND_INCR(p) \
584   ((p) += 2, (unsigned char) (p)[-2] | (unsigned char) (p)[-1] << 8)
585 
586 /* If DEBUG is defined, Regex prints many voluminous messages about what
587    it is doing (if the variable `debug' is nonzero).  If linked with the
588    main program in `iregex.c', you can enter patterns and strings
589    interactively.  And if linked with the main program in `main.c' and
590    the other test files, you can run the already-written tests.  */
591 
592 #ifdef DEBUG
593 
594 /* We use standard I/O for debugging.  */
595 #include <stdio.h>
596 
597 /* It is useful to test things that ``must'' be true when debugging.  */
598 #include <assert.h>
599 
600 static int debug = 0;
601 
602 #define DEBUG_STATEMENT(e) e
603 #define DEBUG_PRINT1(x) if (debug) printf (x)
604 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
605 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
606 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
607 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
608   if (debug) print_partial_compiled_pattern (s, e)
609 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
610   if (debug) print_double_string (w, s1, sz1, s2, sz2)
611 
612 
613 /* Print the fastmap in human-readable form.  */
614 
615 void
print_fastmap(fastmap)616 print_fastmap (fastmap)
617     char *fastmap;
618 {
619   unsigned was_a_range = 0;
620   unsigned i = 0;
621 
622   while (i < (1 << BYTEWIDTH))
623     {
624       if (fastmap[i++])
625 	{
626 	  was_a_range = 0;
627           putchar (i - 1);
628           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
629             {
630               was_a_range = 1;
631               i++;
632             }
633 	  if (was_a_range)
634             {
635               printf ("-");
636               putchar (i - 1);
637             }
638         }
639     }
640   putchar ('\n');
641 }
642 
643 
644 /* Print a compiled pattern string in human-readable form, starting at
645    the START pointer into it and ending just before the pointer END.  */
646 
647 void
print_partial_compiled_pattern(start,end)648 print_partial_compiled_pattern (start, end)
649     unsigned char *start;
650     unsigned char *end;
651 {
652   int mcnt, mcnt2;
653   unsigned char *p1;
654   unsigned char *p = start;
655   unsigned char *pend = end;
656 
657   if (start == NULL)
658     {
659       printf ("(null)\n");
660       return;
661     }
662 
663   /* Loop over pattern commands.  */
664   while (p < pend)
665     {
666       printf ("%d:\t", p - start);
667 
668       switch ((re_opcode_t) *p++)
669 	{
670         case no_op:
671           printf ("/no_op");
672           break;
673 
674 	case exactn:
675 	  mcnt = *p++;
676           printf ("/exactn/%d", mcnt);
677           do
678 	    {
679 	      size_t n;
680               putchar ('/');
681 	      if (ismbchar (*p) && (n = mbclen(*p)) <= mcnt) {
682 		printf ("/%.*s", n, (char *) p);
683 		p += n;
684 		mcnt -= (n - 1);
685 		continue;
686 	      }
687 	      putchar (*p++);
688             }
689           while (--mcnt);
690           break;
691 
692 	case start_memory:
693           mcnt = *p++;
694           printf ("/start_memory/%d/%d", mcnt, *p++);
695           break;
696 
697 	case stop_memory:
698           mcnt = *p++;
699 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
700           break;
701 
702 	case duplicate:
703 	  printf ("/duplicate/%d", *p++);
704 	  break;
705 
706 	case anychar:
707 	  printf ("/anychar");
708 	  break;
709 
710 	case charset:
711         case charset_not:
712           {
713             register int c, last = -100;
714 	    register int in_range = 0;
715 
716 	    printf ("/charset [%s",
717 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
718 
719             assert (p + *p < pend);
720 
721             for (c = 0; c < 256; c++)
722 	      if (c / 8 < *p
723 		  && (p[1 + (c/8)] & (1 << (c % 8))))
724 		{
725 		  /* Are we starting a range?  */
726 		  if (last + 1 == c && ! in_range)
727 		    {
728 		      putchar ('-');
729 		      in_range = 1;
730 		    }
731 		  /* Have we broken a range?  */
732 		  else if (last + 1 != c && in_range)
733               {
734 		      putchar (last);
735 		      in_range = 0;
736 		    }
737 
738 		  if (! in_range)
739 		    putchar (c);
740 
741 		  last = c;
742               }
743 
744 	    if (in_range)
745 	      putchar (last);
746 
747 	    p += 1 + *p;
748 	    {
749 	      unsigned short i, size;
750 	      char buf0[8];
751 	      char buf1[8];
752 
753 	      size = EXTRACT_UNSIGNED_AND_INCR (p);
754 	      for (i = 0; i < size; i++) {
755 		bzero (buf0, sizeof buf0);
756 		bzero (buf1, sizeof buf1);
757 		mbcstore (buf0, EXTRACT_MBC_AND_INCR (p));
758 		mbcstore (buf1, EXTRACT_MBC_AND_INCR (p));
759 		printf ("%s-%s", buf0, buf1);
760 	      }
761 	    }
762 	    putchar (']');
763 	  }
764 	  break;
765 
766 	case begline:
767 	  printf ("/begline");
768           break;
769 
770 	case endline:
771           printf ("/endline");
772           break;
773 
774 	case on_failure_jump:
775           extract_number_and_incr (&mcnt, &p);
776   	  printf ("/on_failure_jump to %d", p + mcnt - start);
777           break;
778 
779 	case on_failure_keep_string_jump:
780           extract_number_and_incr (&mcnt, &p);
781   	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
782           break;
783 
784 	case dummy_failure_jump:
785           extract_number_and_incr (&mcnt, &p);
786   	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
787           break;
788 
789 	case push_dummy_failure:
790           printf ("/push_dummy_failure");
791           break;
792 
793         case maybe_pop_jump:
794           extract_number_and_incr (&mcnt, &p);
795   	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
796 	  break;
797 
798         case pop_failure_jump:
799 	  extract_number_and_incr (&mcnt, &p);
800   	  printf ("/pop_failure_jump to %d", p + mcnt - start);
801 	  break;
802 
803         case jump_past_alt:
804 	  extract_number_and_incr (&mcnt, &p);
805   	  printf ("/jump_past_alt to %d", p + mcnt - start);
806 	  break;
807 
808         case jump:
809 	  extract_number_and_incr (&mcnt, &p);
810   	  printf ("/jump to %d", p + mcnt - start);
811 	  break;
812 
813         case succeed_n:
814           extract_number_and_incr (&mcnt, &p);
815 	  p1 = p + mcnt;
816           extract_number_and_incr (&mcnt2, &p);
817 	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
818           break;
819 
820         case jump_n:
821           extract_number_and_incr (&mcnt, &p);
822 	  p1 = p + mcnt;
823           extract_number_and_incr (&mcnt2, &p);
824 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
825           break;
826 
827         case set_number_at:
828           extract_number_and_incr (&mcnt, &p);
829 	  p1 = p + mcnt;
830           extract_number_and_incr (&mcnt2, &p);
831 	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
832           break;
833 
834         case wordbound:
835 	  printf ("/wordbound");
836 	  break;
837 
838 	case notwordbound:
839 	  printf ("/notwordbound");
840           break;
841 
842 	case wordbeg:
843 	  printf ("/wordbeg");
844 	  break;
845 
846 	case wordend:
847 	  printf ("/wordend");
848 
849 #ifdef emacs
850 	case before_dot:
851 	  printf ("/before_dot");
852           break;
853 
854 	case at_dot:
855 	  printf ("/at_dot");
856           break;
857 
858 	case after_dot:
859 	  printf ("/after_dot");
860           break;
861 
862 	case syntaxspec:
863           printf ("/syntaxspec");
864 	  mcnt = *p++;
865 	  printf ("/%d", mcnt);
866           break;
867 
868 	case notsyntaxspec:
869           printf ("/notsyntaxspec");
870 	  mcnt = *p++;
871 	  printf ("/%d", mcnt);
872 	  break;
873 #endif /* emacs */
874 
875 	case wordchar:
876 	  printf ("/wordchar");
877           break;
878 
879 	case notwordchar:
880 	  printf ("/notwordchar");
881           break;
882 
883 	case begbuf:
884 	  printf ("/begbuf");
885           break;
886 
887 	case endbuf:
888 	  printf ("/endbuf");
889           break;
890 
891         default:
892           printf ("?%d", *(p-1));
893 	}
894 
895       putchar ('\n');
896     }
897 
898   printf ("%d:\tend of pattern.\n", p - start);
899 }
900 
901 
902 void
print_compiled_pattern(bufp)903 print_compiled_pattern (bufp)
904     struct re_pattern_buffer *bufp;
905 {
906   unsigned char *buffer = bufp->buffer;
907 
908   print_partial_compiled_pattern (buffer, buffer + bufp->used);
909   printf ("%ld bytes used/%ld bytes allocated.\n",
910 	  bufp->used, bufp->allocated);
911 
912   if (bufp->fastmap_accurate && bufp->fastmap)
913     {
914       printf ("fastmap: ");
915       print_fastmap (bufp->fastmap);
916     }
917 
918   printf ("re_nsub: %d\t", bufp->re_nsub);
919   printf ("regs_alloc: %d\t", bufp->regs_allocated);
920   printf ("can_be_null: %d\t", bufp->can_be_null);
921   printf ("newline_anchor: %d\n", bufp->newline_anchor);
922   printf ("no_sub: %d\t", bufp->no_sub);
923   printf ("not_bol: %d\t", bufp->not_bol);
924   printf ("not_eol: %d\t", bufp->not_eol);
925   printf ("syntax: %lx\n", bufp->syntax);
926   /* Perhaps we should print the translate table?  */
927 }
928 
929 
930 void
print_double_string(where,string1,size1,string2,size2)931 print_double_string (where, string1, size1, string2, size2)
932     const char *where;
933     const char *string1;
934     const char *string2;
935     int size1;
936     int size2;
937 {
938   int this_char;
939 
940   if (where == NULL)
941     printf ("(null)");
942   else
943     {
944       if (FIRST_STRING_P (where))
945         {
946           for (this_char = where - string1; this_char < size1; this_char++)
947             putchar (string1[this_char]);
948 
949           where = string2;
950         }
951 
952       for (this_char = where - string2; this_char < size2; this_char++)
953         putchar (string2[this_char]);
954     }
955 }
956 
957 void
printchar(c)958 printchar (c)
959      int c;
960 {
961   putc (c, stderr);
962 }
963 
964 #else /* not DEBUG */
965 
966 #undef assert
967 #define assert(e)
968 
969 #define DEBUG_STATEMENT(e)
970 #define DEBUG_PRINT1(x)
971 #define DEBUG_PRINT2(x1, x2)
972 #define DEBUG_PRINT3(x1, x2, x3)
973 #define DEBUG_PRINT4(x1, x2, x3, x4)
974 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
975 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
976 
977 #endif /* not DEBUG */
978 
979 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
980    also be assigned to arbitrarily: each pattern buffer stores its own
981    syntax, so it can be changed between regex compilations.  */
982 /* This has no initializer because initialized variables in Emacs
983    become read-only after dumping.  */
984 reg_syntax_t re_syntax_options;
985 
986 
987 /* Specify the precise syntax of regexps for compilation.  This provides
988    for compatibility for various utilities which historically have
989    different, incompatible syntaxes.
990 
991    The argument SYNTAX is a bit mask comprised of the various bits
992    defined in regex.h.  We return the old syntax.  */
993 
994 reg_syntax_t
re_set_syntax(syntax)995 re_set_syntax (syntax)
996     reg_syntax_t syntax;
997 {
998   reg_syntax_t ret = re_syntax_options;
999 
1000   re_syntax_options = syntax;
1001 #ifdef DEBUG
1002   if (syntax & RE_DEBUG)
1003     debug = 1;
1004   else if (debug) /* was on but now is not */
1005     debug = 0;
1006 #endif /* DEBUG */
1007   return ret;
1008 }
1009 
1010 /* This table gives an error message for each of the error codes listed
1011    in regex.h.  Obviously the order here has to be same as there.
1012    POSIX doesn't require that we do anything for REG_NOERROR,
1013    but why not be nice?  */
1014 
1015 static const char *re_error_msgid[] =
1016   {
1017     gettext_noop ("Success"),	/* REG_NOERROR */
1018     gettext_noop ("No match"),	/* REG_NOMATCH */
1019     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1020     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1021     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1022     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1023     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1024     gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1025     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1026     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1027     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1028     gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1029     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1030     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1031     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1032     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1033     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1034   };
1035 
1036 /* Avoiding alloca during matching, to placate r_alloc.  */
1037 
1038 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1039    searching and matching functions should not call alloca.  On some
1040    systems, alloca is implemented in terms of malloc, and if we're
1041    using the relocating allocator routines, then malloc could cause a
1042    relocation, which might (if the strings being searched are in the
1043    ralloc heap) shift the data out from underneath the regexp
1044    routines.
1045 
1046    Here's another reason to avoid allocation: Emacs
1047    processes input from X in a signal handler; processing X input may
1048    call malloc; if input arrives while a matching routine is calling
1049    malloc, then we're scrod.  But Emacs can't just block input while
1050    calling matching routines; then we don't notice interrupts when
1051    they come in.  So, Emacs blocks input around all regexp calls
1052    except the matching calls, which it leaves unprotected, in the
1053    faith that they will not malloc.  */
1054 
1055 /* Normally, this is fine.  */
1056 #define MATCH_MAY_ALLOCATE
1057 
1058 /* When using GNU C, we are not REALLY using the C alloca, no matter
1059    what config.h may say.  So don't take precautions for it.  */
1060 #ifdef __GNUC__
1061 #undef C_ALLOCA
1062 #endif
1063 
1064 /* The match routines may not allocate if (1) they would do it with malloc
1065    and (2) it's not safe for them to use malloc.
1066    Note that if REL_ALLOC is defined, matching would not use malloc for the
1067    failure stack, but we would still use it for the register vectors;
1068    so REL_ALLOC should not affect this.  */
1069 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1070 #undef MATCH_MAY_ALLOCATE
1071 #endif
1072 
1073 
1074 /* Failure stack declarations and macros; both re_compile_fastmap and
1075    re_match_2 use a failure stack.  These have to be macros because of
1076    REGEX_ALLOCATE_STACK.  */
1077 
1078 
1079 /* Number of failure points for which to initially allocate space
1080    when matching.  If this number is exceeded, we allocate more
1081    space, so it is not a hard limit.  */
1082 #ifndef INIT_FAILURE_ALLOC
1083 #define INIT_FAILURE_ALLOC 5
1084 #endif
1085 
1086 /* Roughly the maximum number of failure points on the stack.  Would be
1087    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1088    This is a variable only so users of regex can assign to it; we never
1089    change it ourselves.  */
1090 
1091 #ifndef INT_IS_16BIT
1092 #ifndef WIN32
1093 #if defined (_MSC_VER) || defined (__TURBOC__)
1094 #define INT_IS_16BIT
1095 #endif
1096 #endif
1097 #endif
1098 
1099 #ifdef INT_IS_16BIT
1100 
1101 #if defined (MATCH_MAY_ALLOCATE)
1102 /* 4400 was enough to cause a crash on Alpha OSF/1,
1103    whose default stack limit is 2mb.  */
1104 long int re_max_failures = 4000;
1105 #else
1106 long int re_max_failures = 2000;
1107 #endif
1108 
1109 union fail_stack_elt
1110 {
1111   unsigned char *pointer;
1112   long int integer;
1113 };
1114 
1115 typedef union fail_stack_elt fail_stack_elt_t;
1116 
1117 typedef struct
1118 {
1119   fail_stack_elt_t *stack;
1120   unsigned long int size;
1121   unsigned long int avail;		/* Offset of next open position.  */
1122 } fail_stack_type;
1123 
1124 #else /* not INT_IS_16BIT */
1125 
1126 #if defined (MATCH_MAY_ALLOCATE)
1127 /* 4400 was enough to cause a crash on Alpha OSF/1,
1128    whose default stack limit is 2mb.  */
1129 int re_max_failures = 20000;
1130 #else
1131 int re_max_failures = 2000;
1132 #endif
1133 
1134 union fail_stack_elt
1135 {
1136   unsigned char *pointer;
1137   int integer;
1138 };
1139 
1140 typedef union fail_stack_elt fail_stack_elt_t;
1141 
1142 typedef struct
1143 {
1144   fail_stack_elt_t *stack;
1145   unsigned size;
1146   unsigned avail;			/* Offset of next open position.  */
1147 } fail_stack_type;
1148 
1149 #endif /* INT_IS_16BIT */
1150 
1151 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1152 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1153 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1154 
1155 
1156 /* Define macros to initialize and free the failure stack.
1157    Do `return -2' if the alloc fails.  */
1158 
1159 #ifdef MATCH_MAY_ALLOCATE
1160 #define INIT_FAIL_STACK()						\
1161   do {									\
1162     fail_stack.stack = (fail_stack_elt_t *)				\
1163       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
1164 									\
1165     if (fail_stack.stack == NULL)					\
1166       return -2;							\
1167 									\
1168     fail_stack.size = INIT_FAILURE_ALLOC;				\
1169     fail_stack.avail = 0;						\
1170   } while (0)
1171 
1172 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1173 #else
1174 #define INIT_FAIL_STACK()						\
1175   do {									\
1176     fail_stack.avail = 0;						\
1177   } while (0)
1178 
1179 #define RESET_FAIL_STACK()
1180 #endif
1181 
1182 
1183 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1184 
1185    Return 1 if succeeds, and 0 if either ran out of memory
1186    allocating space for it or it was already too large.
1187 
1188    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1189 
1190 #define DOUBLE_FAIL_STACK(fail_stack)					\
1191   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1192    ? 0									\
1193    : ((fail_stack).stack = (fail_stack_elt_t *)				\
1194         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1195           (fail_stack).size * sizeof (fail_stack_elt_t),		\
1196           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1197 									\
1198       (fail_stack).stack == NULL					\
1199       ? 0								\
1200       : ((fail_stack).size <<= 1, 					\
1201          1)))
1202 
1203 
1204 /* Push pointer POINTER on FAIL_STACK.
1205    Return 1 if was able to do so and 0 if ran out of memory allocating
1206    space to do so.  */
1207 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1208   ((FAIL_STACK_FULL ()							\
1209     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1210    ? 0									\
1211    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1212       1))
1213 
1214 /* Push a pointer value onto the failure stack.
1215    Assumes the variable `fail_stack'.  Probably should only
1216    be called from within `PUSH_FAILURE_POINT'.  */
1217 #define PUSH_FAILURE_POINTER(item)					\
1218   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1219 
1220 /* This pushes an integer-valued item onto the failure stack.
1221    Assumes the variable `fail_stack'.  Probably should only
1222    be called from within `PUSH_FAILURE_POINT'.  */
1223 #define PUSH_FAILURE_INT(item)					\
1224   fail_stack.stack[fail_stack.avail++].integer = (item)
1225 
1226 /* Push a fail_stack_elt_t value onto the failure stack.
1227    Assumes the variable `fail_stack'.  Probably should only
1228    be called from within `PUSH_FAILURE_POINT'.  */
1229 #define PUSH_FAILURE_ELT(item)					\
1230   fail_stack.stack[fail_stack.avail++] =  (item)
1231 
1232 /* These three POP... operations complement the three PUSH... operations.
1233    All assume that `fail_stack' is nonempty.  */
1234 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1235 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1236 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1237 
1238 /* Used to omit pushing failure point id's when we're not debugging.  */
1239 #ifdef DEBUG
1240 #define DEBUG_PUSH PUSH_FAILURE_INT
1241 #define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
1242 #else
1243 #define DEBUG_PUSH(item)
1244 #define DEBUG_POP(item_addr)
1245 #endif
1246 
1247 
1248 /* Push the information about the state we will need
1249    if we ever fail back to it.
1250 
1251    Requires variables fail_stack, regstart, regend, reg_info, and
1252    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1253    declared.
1254 
1255    Does `return FAILURE_CODE' if runs out of memory.  */
1256 
1257 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1258   do {									\
1259     char *destination;							\
1260     /* Must be int, so when we don't save any registers, the arithmetic	\
1261        of 0 + -1 isn't done as unsigned.  */				\
1262     /* Can't be int, since there is not a shred of a guarantee that int	\
1263        is wide enough to hold a value of something to which pointer can	\
1264        be assigned */							\
1265     s_reg_t this_reg;							\
1266     									\
1267     DEBUG_STATEMENT (failure_id++);					\
1268     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1269     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1270     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1271     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1272 									\
1273     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
1274     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1275 									\
1276     /* Ensure we have enough space allocated for what we will push.  */	\
1277     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1278       {									\
1279         if (!DOUBLE_FAIL_STACK (fail_stack))				\
1280           return failure_code;						\
1281 									\
1282         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1283 		       (fail_stack).size);				\
1284         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1285       }									\
1286 									\
1287     /* Push the info, starting with the registers.  */			\
1288     DEBUG_PRINT1 ("\n");						\
1289 									\
1290     if (1)								\
1291       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1292 	   this_reg++)							\
1293 	{								\
1294 	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
1295 	  DEBUG_STATEMENT (num_regs_pushed++);				\
1296 									\
1297 	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
1298 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1299 									\
1300 	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
1301 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1302 									\
1303 	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
1304 	  DEBUG_PRINT2 (" match_null=%d",				\
1305 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1306 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1307 	  DEBUG_PRINT2 (" matched_something=%d",			\
1308 			MATCHED_SOMETHING (reg_info[this_reg]));	\
1309 	  DEBUG_PRINT2 (" ever_matched=%d",				\
1310 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1311 	  DEBUG_PRINT1 ("\n");						\
1312 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1313 	}								\
1314 									\
1315     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1316     PUSH_FAILURE_INT (lowest_active_reg);				\
1317 									\
1318     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1319     PUSH_FAILURE_INT (highest_active_reg);				\
1320 									\
1321     DEBUG_PRINT2 ("  Pushing pattern 0x%x:\n", pattern_place);		\
1322     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1323     PUSH_FAILURE_POINTER (pattern_place);				\
1324 									\
1325     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
1326     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1327 				 size2);				\
1328     DEBUG_PRINT1 ("'\n");						\
1329     PUSH_FAILURE_POINTER (string_place);				\
1330 									\
1331     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1332     DEBUG_PUSH (failure_id);						\
1333   } while (0)
1334 
1335 /* This is the number of items that are pushed and popped on the stack
1336    for each register.  */
1337 #define NUM_REG_ITEMS  3
1338 
1339 /* Individual items aside from the registers.  */
1340 #ifdef DEBUG
1341 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1342 #else
1343 #define NUM_NONREG_ITEMS 4
1344 #endif
1345 
1346 /* We push at most this many items on the stack.  */
1347 /* We used to use (num_regs - 1), which is the number of registers
1348    this regexp will save; but that was changed to 5
1349    to avoid stack overflow for a regexp with lots of parens.  */
1350 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1351 
1352 /* We actually push this many items.  */
1353 #define NUM_FAILURE_ITEMS				\
1354   (((0							\
1355      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1356     * NUM_REG_ITEMS)					\
1357    + NUM_NONREG_ITEMS)
1358 
1359 /* How many items can still be added to the stack without overflowing it.  */
1360 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1361 
1362 
1363 /* Pops what PUSH_FAIL_STACK pushes.
1364 
1365    We restore into the parameters, all of which should be lvalues:
1366      STR -- the saved data position.
1367      PAT -- the saved pattern position.
1368      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1369      REGSTART, REGEND -- arrays of string positions.
1370      REG_INFO -- array of information about each subexpression.
1371 
1372    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1373    `pend', `string1', `size1', `string2', and `size2'.  */
1374 
1375 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1376 {									\
1377   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
1378   s_reg_t this_reg;							\
1379   const unsigned char *string_temp;					\
1380 									\
1381   assert (!FAIL_STACK_EMPTY ());					\
1382 									\
1383   /* Remove failure points and point to how many regs pushed.  */	\
1384   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1385   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1386   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1387 									\
1388   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1389 									\
1390   DEBUG_POP (&failure_id);						\
1391   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1392 									\
1393   /* If the saved string location is NULL, it came from an		\
1394      on_failure_keep_string_jump opcode, and we want to throw away the	\
1395      saved NULL, thus retaining our current position in the string.  */	\
1396   string_temp = POP_FAILURE_POINTER ();					\
1397   if (string_temp != NULL)						\
1398     str = (const char *) string_temp;					\
1399 									\
1400   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
1401   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1402   DEBUG_PRINT1 ("'\n");							\
1403 									\
1404   pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1405   DEBUG_PRINT2 ("  Popping pattern 0x%x:\n", pat);			\
1406   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1407 									\
1408   /* Restore register info.  */						\
1409   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1410   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
1411 									\
1412   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1413   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
1414 									\
1415   if (1)								\
1416     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1417       {									\
1418 	DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);		\
1419 									\
1420 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1421 	DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);	\
1422 									\
1423 	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1424 	DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
1425 									\
1426 	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1427 	DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);	\
1428       }									\
1429   else									\
1430     {									\
1431       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1432 	{								\
1433 	  reg_info[this_reg].word.integer = 0;				\
1434 	  regend[this_reg] = 0;						\
1435 	  regstart[this_reg] = 0;					\
1436 	}								\
1437       highest_active_reg = high_reg;					\
1438     }									\
1439 									\
1440   set_regs_matched_done = 0;						\
1441   DEBUG_STATEMENT (nfailure_points_popped++);				\
1442 } /* POP_FAILURE_POINT */
1443 
1444 
1445 
1446 /* Structure for per-register (a.k.a. per-group) information.
1447    Other register information, such as the
1448    starting and ending positions (which are addresses), and the list of
1449    inner groups (which is a bits list) are maintained in separate
1450    variables.
1451 
1452    We are making a (strictly speaking) nonportable assumption here: that
1453    the compiler will pack our bit fields into something that fits into
1454    the type of `word', i.e., is something that fits into one item on the
1455    failure stack.  */
1456 
1457 
1458 /* Declarations and macros for re_match_2.  */
1459 
1460 typedef union
1461 {
1462   fail_stack_elt_t word;
1463   struct
1464   {
1465       /* This field is one if this group can match the empty string,
1466          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1467 #define MATCH_NULL_UNSET_VALUE 3
1468     unsigned match_null_string_p : 2;
1469     unsigned is_active : 1;
1470     unsigned matched_something : 1;
1471     unsigned ever_matched_something : 1;
1472   } bits;
1473 } register_info_type;
1474 
1475 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1476 #define IS_ACTIVE(R)  ((R).bits.is_active)
1477 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1478 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1479 
1480 
1481 /* Call this when have matched a real character; it sets `matched' flags
1482    for the subexpressions which we are currently inside.  Also records
1483    that those subexprs have matched.  */
1484 #define SET_REGS_MATCHED()						\
1485   do									\
1486     {									\
1487       if (!set_regs_matched_done)					\
1488 	{								\
1489 	  active_reg_t r;						\
1490 	  set_regs_matched_done = 1;					\
1491 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1492 	    {								\
1493 	      MATCHED_SOMETHING (reg_info[r])				\
1494 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1495 		= 1;							\
1496 	    }								\
1497 	}								\
1498     }									\
1499   while (0)
1500 
1501 /* Registers are set to a sentinel when they haven't yet matched.  */
1502 static char reg_unset_dummy;
1503 #define REG_UNSET_VALUE (&reg_unset_dummy)
1504 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1505 
1506 /* Subroutine declarations and macros for regex_compile.  */
1507 
1508 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1509 					      reg_syntax_t syntax,
1510 					      struct re_pattern_buffer *bufp));
1511 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1512 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1513 				 int arg1, int arg2));
1514 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1515 				  int arg, unsigned char *end));
1516 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1517 				  int arg1, int arg2, unsigned char *end));
1518 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1519 					   reg_syntax_t syntax));
1520 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1521 					   reg_syntax_t syntax));
1522 #if 0 /* We don't use this. */
1523 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1524 					      const char *pend,
1525 					      char *translate,
1526 					      reg_syntax_t syntax,
1527 					      unsigned char *b));
1528 #endif
1529 
1530 /* Fetch the next character in the uncompiled pattern---translating it
1531    if necessary.  Also cast from a signed character in the constant
1532    string passed to us by the user to an unsigned char that we can use
1533    as an array index (in, e.g., `translate').  */
1534 #ifndef PATFETCH
1535 #define PATFETCH(c)							\
1536   do {if (p == pend) return REG_EEND;					\
1537     c = (unsigned char) *p++;						\
1538     if (translate && !ismbchar (c))					\
1539       c = (unsigned char) translate[(unsigned char) (c)];		\
1540   } while (0)
1541 #endif
1542 
1543 /* Fetch the next character in the uncompiled pattern, with no
1544    translation.  */
1545 #define PATFETCH_RAW(c)							\
1546   do {if (p == pend) return REG_EEND;					\
1547     c = (unsigned char) *p++; 						\
1548   } while (0)
1549 
1550 /* Go backwards one character in the pattern.  */
1551 #define PATUNFETCH p--
1552 
1553 #define PATFETCH_MBC(wc)						\
1554   do {if (p == pend || p + mbclen(*p) > pend) return REG_EEND;		\
1555     { char *end; wc = mbcfetch (p, &end); p = end; }			\
1556     if (translate && wc < (1 << BYTEWIDTH))				\
1557       wc = (mbchar_t) translate[(unsigned char) (wc)];		\
1558   } while (0)
1559 
1560 
1561 
1562 /* If `translate' is non-null, return translate[D], else just D.  We
1563    cast the subscript to translate because some data is declared as
1564    `char *', to avoid warnings when a string constant is passed.  But
1565    when we use a character as a subscript we must make it unsigned.  */
1566 #ifndef TRANSLATE
1567 #define TRANSLATE(d) (translate						\
1568 		      ? (unsigned char) translate[(unsigned char) (d)]	\
1569 		      : (d))
1570 #endif
1571 
1572 
1573 /* Macros for outputting the compiled pattern into `buffer'.  */
1574 
1575 /* If the buffer isn't allocated when it comes in, use this.  */
1576 #define INIT_BUF_SIZE  32
1577 
1578 /* Make sure we have at least N more bytes of space in buffer.  */
1579 #define GET_BUFFER_SPACE(n)						\
1580     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1581       EXTEND_BUFFER ()
1582 
1583 /* Make sure we have one more byte of buffer space and then add C to it.  */
1584 #define BUF_PUSH(c)							\
1585   do {									\
1586     GET_BUFFER_SPACE (1);						\
1587     *b++ = (unsigned char) (c);						\
1588   } while (0)
1589 
1590 
1591 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1592 #define BUF_PUSH_2(c1, c2)						\
1593   do {									\
1594     GET_BUFFER_SPACE (2);						\
1595     *b++ = (unsigned char) (c1);					\
1596     *b++ = (unsigned char) (c2);					\
1597   } while (0)
1598 
1599 
1600 /* As with BUF_PUSH_2, except for three bytes.  */
1601 #define BUF_PUSH_3(c1, c2, c3)						\
1602   do {									\
1603     GET_BUFFER_SPACE (3);						\
1604     *b++ = (unsigned char) (c1);					\
1605     *b++ = (unsigned char) (c2);					\
1606     *b++ = (unsigned char) (c3);					\
1607   } while (0)
1608 
1609 
1610 /* Store a jump with opcode OP at LOC to location TO.  We store a
1611    relative address offset by the three bytes the jump itself occupies.  */
1612 #define STORE_JUMP(op, loc, to) \
1613   store_op1 (op, loc, (int) ((to) - (loc) - 3))
1614 
1615 /* Likewise, for a two-argument jump.  */
1616 #define STORE_JUMP2(op, loc, to, arg) \
1617   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1618 
1619 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1620 #define INSERT_JUMP(op, loc, to) \
1621   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1622 
1623 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1624 #define INSERT_JUMP2(op, loc, to, arg) \
1625   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1626 
1627 
1628 /* This is not an arbitrary limit: the arguments which represent offsets
1629    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1630    be too small, many things would have to change.  */
1631 /* Any other compiler which, like MSC, has allocation limit below 2^16
1632    bytes will have to use approach similar to what was done below for
1633    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1634    reallocating to 0 bytes.  Such thing is not going to work too well.
1635    You have been warned!!  */
1636 #if (defined(_MSC_VER) || defined(__TURBOC__)) && !defined(WIN32)
1637 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1638    The REALLOC define eliminates a flurry of conversion warnings,
1639    but is not required. */
1640 #define MAX_BUF_SIZE  65500L
1641 #define REALLOC(p,s) realloc ((p), (size_t) (s))
1642 #else
1643 #define MAX_BUF_SIZE (1L << 16)
1644 #define REALLOC(p,s) realloc ((p), (s))
1645 #endif
1646 
1647 /* Extend the buffer by twice its current size via realloc and
1648    reset the pointers that pointed into the old block to point to the
1649    correct places in the new one.  If extending the buffer results in it
1650    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1651 #if __BOUNDED_POINTERS__
1652 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
1653 # define MOVE_BUFFER_POINTER(P) \
1654   (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
1655 # define ELSE_EXTEND_BUFFER_HIGH_BOUND		\
1656   else						\
1657     {						\
1658       SET_HIGH_BOUND (b);			\
1659       SET_HIGH_BOUND (begalt);			\
1660       if (fixup_alt_jump)			\
1661 	SET_HIGH_BOUND (fixup_alt_jump);	\
1662       if (laststart)				\
1663 	SET_HIGH_BOUND (laststart);		\
1664       if (pending_exact)			\
1665 	SET_HIGH_BOUND (pending_exact);		\
1666     }
1667 #else
1668 # define MOVE_BUFFER_POINTER(P) (P) += incr
1669 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
1670 #endif
1671 #define EXTEND_BUFFER()							\
1672   do { 									\
1673     unsigned char *old_buffer = bufp->buffer;				\
1674     if (bufp->allocated == MAX_BUF_SIZE) 				\
1675       return REG_ESIZE;							\
1676     bufp->allocated <<= 1;						\
1677     if (bufp->allocated > MAX_BUF_SIZE)					\
1678       bufp->allocated = MAX_BUF_SIZE; 					\
1679     bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1680     if (bufp->buffer == NULL)						\
1681       return REG_ESPACE;						\
1682     /* If the buffer moved, move all the pointers into it.  */		\
1683     if (old_buffer != bufp->buffer)					\
1684       {									\
1685 	int incr = bufp->buffer - old_buffer;				\
1686 	MOVE_BUFFER_POINTER (b);					\
1687 	MOVE_BUFFER_POINTER (begalt);					\
1688         if (fixup_alt_jump)						\
1689 	  MOVE_BUFFER_POINTER (fixup_alt_jump);				\
1690         if (laststart)							\
1691 	  MOVE_BUFFER_POINTER (laststart);				\
1692         if (pending_exact)						\
1693 	  MOVE_BUFFER_POINTER (pending_exact);				\
1694       }									\
1695     ELSE_EXTEND_BUFFER_HIGH_BOUND					\
1696   } while (0)
1697 
1698 
1699 /* Since we have one byte reserved for the register number argument to
1700    {start,stop}_memory, the maximum number of groups we can report
1701    things about is what fits in that byte.  */
1702 #define MAX_REGNUM 255
1703 
1704 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1705    ignore the excess.  */
1706 typedef unsigned regnum_t;
1707 
1708 
1709 /* Macros for the compile stack.  */
1710 
1711 /* Since offsets can go either forwards or backwards, this type needs to
1712    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1713 /* int may be not enough when sizeof(int) == 2.  */
1714 typedef long pattern_offset_t;
1715 
1716 typedef struct
1717 {
1718   pattern_offset_t begalt_offset;
1719   pattern_offset_t fixup_alt_jump;
1720   pattern_offset_t inner_group_offset;
1721   pattern_offset_t laststart_offset;
1722   regnum_t regnum;
1723 } compile_stack_elt_t;
1724 
1725 
1726 typedef struct
1727 {
1728   compile_stack_elt_t *stack;
1729   unsigned size;
1730   unsigned avail;			/* Offset of next open position.  */
1731 } compile_stack_type;
1732 
1733 
1734 #define INIT_COMPILE_STACK_SIZE 32
1735 
1736 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1737 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1738 
1739 /* The next available element.  */
1740 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1741 
1742 
1743 /* Set the bit for character C in a list.  */
1744 #define SET_LIST_BIT(c)                               \
1745   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1746    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1747 
1748 
1749 /* Get the next unsigned number in the uncompiled pattern.  */
1750 #define GET_UNSIGNED_NUMBER(num) 					\
1751   { if (p != pend)							\
1752      {									\
1753        PATFETCH (c); 							\
1754        while (ISDIGIT (c)) 						\
1755          { 								\
1756            if (num < 0)							\
1757               num = 0;							\
1758            num = num * 10 + c - '0'; 					\
1759            if (p == pend) 						\
1760               break; 							\
1761            PATFETCH (c);						\
1762          } 								\
1763        } 								\
1764     }
1765 
1766 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1767 /* The GNU C library provides support for user-defined character classes
1768    and the functions from ISO C amendement 1.  */
1769 # ifdef CHARCLASS_NAME_MAX
1770 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1771 # else
1772 /* This shouldn't happen but some implementation might still have this
1773    problem.  Use a reasonable default value.  */
1774 #  define CHAR_CLASS_MAX_LENGTH 256
1775 # endif
1776 
1777 # define IS_CHAR_CLASS(string) wctype (string)
1778 #else
1779 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1780 
1781 # define IS_CHAR_CLASS(string)						\
1782    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1783     || STREQ (string, "lower") || STREQ (string, "digit")		\
1784     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1785     || STREQ (string, "space") || STREQ (string, "print")		\
1786     || STREQ (string, "punct") || STREQ (string, "graph")		\
1787     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1788 #endif
1789 
1790 /* Handle charset(_not)?.
1791 
1792    Structure of charset(_not)? in compiled pattern.
1793 
1794      struct {
1795        unsinged char id;		charset(_not)?
1796        unsigned char sbc_size;
1797        unsigned char sbc_map[sbc_size];	same as original up to here.
1798        unsigned short mbc_size;		number of intervals.
1799        struct {
1800          uint32_t beg;			beginning of interval.
1801 	 uint32_t end;			end of interval.
1802        } intervals[mbc_size];
1803      }; */
1804 #define UTF8_CHARSET_SIZE 128
1805 
1806 static reg_errcode_t
1807 #if defined(__STDC__) || defined(MSDOS)
set_list_bits(mbchar_t c1,mbchar_t c2,reg_syntax_t syntax,unsigned char * b,const char * translate)1808 set_list_bits (mbchar_t c1, mbchar_t c2,
1809 	       reg_syntax_t syntax, unsigned char *b, const char *translate)
1810 #else
1811 set_list_bits (c1, c2, syntax, b, translate)
1812      mbchar_t c1, c2;
1813      reg_syntax_t syntax;
1814      unsigned char *b;
1815      const char *translate;
1816 #endif
1817 {
1818   unsigned char sbc_size = b[-1];
1819   unsigned short mbc_size = EXTRACT_UNSIGNED (&b[sbc_size]);
1820   mbchar_t beg, end, upb;
1821   boolean isutf8 = current_mbctype == MBCTYPE_UTF8;
1822   size_t charset_size;
1823 
1824   if (c1 > c2)
1825     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
1826   charset_size = isutf8 ? UTF8_CHARSET_SIZE : (1 << BYTEWIDTH);
1827   if (c1 < charset_size) {
1828     upb = c2;
1829     if (charset_size <= upb)
1830       upb = charset_size - 1;	/* The last single-byte char */
1831     if (sbc_size <= upb / BYTEWIDTH) {
1832       /* Allocate maximum size so it never happens again.  */
1833       /* NOTE: memcpy() would not work here.  */
1834       bcopy (&b[sbc_size], &b[charset_size / BYTEWIDTH], 2 + mbc_size*8);
1835       bzero (&b[sbc_size], charset_size / BYTEWIDTH - sbc_size);
1836       b[-1] = sbc_size = charset_size / BYTEWIDTH;
1837     }
1838     if (!translate) {
1839       for (; c1 <= upb; c1++)
1840 	if (!ismbchar (c1))
1841 	  SET_LIST_BIT (c1);
1842     }
1843     else
1844       for (; c1 <= upb; c1++)
1845 	if (!ismbchar (c1))
1846 	  SET_LIST_BIT (TRANSLATE (c1));
1847     if (c2 < charset_size)
1848       return REG_NOERROR;
1849     c1 = isutf8 ? 0x80 : 0x8000 ;	/* The first wide char */
1850   }
1851   b = &b[sbc_size + 2];
1852 
1853   /*                             intervals[beg]
1854          ��----------��          ��----------��
1855                          c1
1856                        ��----------------------��
1857 
1858      ��ޤΤ褦�ʶ�֤Υ���ǥå��� beg ����ꤹ��. */
1859   for (beg = 0, upb = mbc_size; beg < upb; ) {
1860     mbchar_t mid = (beg + upb) >> 1;
1861 
1862     if (c1 - 1 > EXTRACT_MBC (&b[mid*8 + 4]))
1863       beg = mid + 1;
1864     else
1865       upb = mid;
1866   }
1867 
1868   /*                        intervals[end]
1869            ��-------��      ��----------��
1870                         c2
1871          ��---------------��
1872 
1873      ��ޤΤ褦�ʶ�֤Υ���ǥå��� end ����ꤹ��. */
1874   for (end = beg, upb = mbc_size; end < upb; ) {
1875     mbchar_t mid = (end + upb) >> 1;
1876 
1877     if (c2 >= EXTRACT_MBC (&b[mid*8]) - 1)
1878       end = mid + 1;
1879     else
1880       upb = mid;
1881   }
1882 
1883   if (beg != end) {
1884     /* ��¸�ζ�֤��ʤ��Ȥ�1�����礹����,
1885        ��֤λ���, ������������. */
1886     if (c1 > EXTRACT_MBC (&b[beg*8]))
1887       c1 = EXTRACT_MBC (&b[beg*8]);
1888     if (c2 < EXTRACT_MBC (&b[end*8 - 4]))
1889       c2 = EXTRACT_MBC (&b[end*8 - 4]);
1890   }
1891   if (end < mbc_size && end != beg + 1)
1892     /* �ɲä�����֤θ��˴�¸�ζ�֤��ư����. */
1893     /* NOTE: memcpy() would not work here.  */
1894     bcopy (&b[end*8], &b[(beg + 1)*8], (mbc_size - end)*8);
1895   STORE_MBC (&b[beg*8 + 0], c1);
1896   STORE_MBC (&b[beg*8 + 4], c2);
1897   mbc_size += beg + 1 - end;
1898   STORE_NUMBER (&b[-2], mbc_size);
1899   return REG_NOERROR;
1900 }
1901 
1902 static int
1903 #if defined(__STDC__) || defined(MSDOS)
is_in_list(mbchar_t c,const unsigned char * b)1904 is_in_list (mbchar_t c, const unsigned char *b)
1905 #else
1906 is_in_list (c, b)
1907      mbchar_t c;
1908      const unsigned char *b;
1909 #endif
1910 {
1911   unsigned short size;
1912   int in = (re_opcode_t) b[-1] == charset_not;
1913   boolean isutf8 = current_mbctype == MBCTYPE_UTF8;
1914 
1915   size = *b++;
1916   if (c < (isutf8 ? UTF8_CHARSET_SIZE : (1 << BYTEWIDTH))) {
1917     if (c / BYTEWIDTH < size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH)
1918       in = !in;
1919   }
1920   else {
1921     mbchar_t i, j;
1922 
1923     b += size + 2;
1924     size = EXTRACT_UNSIGNED (&b[-2]);
1925 
1926     /*                        intervals[i]
1927              ��-------��      ��--------��
1928                         c
1929                       ��----------------��
1930 
1931      ��ޤΤ褦�ʶ�֤Υ���ǥå��� i ����ꤹ��. */
1932     for (i = 0, j = size; i < j; ) {
1933       mbchar_t k = (i + j) >> 1;
1934 
1935       if (c > EXTRACT_MBC (&b[k*8 + 4]))
1936 	i = k + 1;
1937       else
1938 	j = k;
1939     }
1940     if (i < size && EXTRACT_MBC (&b[i*8]) <= c
1941 	/* [...] ����, ̵���ʥޥ���Х���ʸ�����������.  �����Ǥϴ�ñ��
1942 	   ���ᣲ�Х����ܤ� '\n' �ޤ��� '\0' ������̵���Ȥ���.  [^...]
1943 	   �ξ���, �դ�̵���ʥޥ���Х���ʸ����ޥå�������.  */
1944 	&& (((unsigned char) c != '\n' && (unsigned char) c != '\0') || isutf8))
1945       in = !in;
1946   }
1947   return in;
1948 }
1949 
1950 static boolean
1951 #if defined(__STDC__) || defined(MSDOS)
is_mblist_nooverlap(const unsigned char * b1,const unsigned char * b2)1952 is_mblist_nooverlap (const unsigned char *b1, const unsigned char *b2)
1953 #else
1954 is_mblist_nooverlap (b1, b2)
1955      const unsigned char *b1;
1956      const unsigned char *b2;
1957 #endif
1958 {
1959   unsigned short size1;
1960   mbchar_t c11;
1961   mbchar_t c12;
1962   unsigned short size2;
1963   mbchar_t c21;
1964   mbchar_t c22;
1965   unsigned short i;
1966   unsigned short j;
1967 
1968   b1 = &b1[2];
1969   size1 = EXTRACT_UNSIGNED (&b1[-2]);
1970   b2 = &b2[2];
1971   size2 = EXTRACT_UNSIGNED (&b2[-2]);
1972   for (i = 0; i < size1; i++)
1973     {
1974       c11 = EXTRACT_MBC (&b1[i * 8]);
1975       c12 = EXTRACT_MBC (&b1[i * 8 + 4]);
1976       for (j = 0; j < size2; j++)
1977 	{
1978 	  c21 = EXTRACT_MBC (&b2[j * 8]);
1979 	  c22 = EXTRACT_MBC (&b2[j * 8 + 4]);
1980 	  if (! ((c22 < c11) || (c12 < c21)))
1981 	    return false;
1982 	}
1983     }
1984   return true;
1985 }
1986 
1987 static boolean
1988 #if defined(__STDC__) || defined(MSDOS)
is_invmblist_nooverlap(const unsigned char * b1,const unsigned char * b2)1989 is_invmblist_nooverlap (const unsigned char *b1, const unsigned char *b2)
1990 #else
1991 is_invmblist_nooverlap (b1, b2)
1992      const unsigned char *b1;
1993      const unsigned char *b2;
1994 #endif
1995 {
1996   unsigned short size2;
1997   unsigned char *inv_b2;
1998   unsigned short inv_size2;
1999   boolean isutf8 = current_mbctype == MBCTYPE_UTF8;
2000   unsigned short i;
2001   mbchar_t c2;
2002   mbchar_t c;
2003   boolean ret;
2004 
2005   b2 = &b2[2];
2006   size2 = EXTRACT_UNSIGNED (&b2[-2]);
2007   inv_b2 = REGEX_ALLOCATE (2 + 8 * (size2 + 2));
2008   inv_b2 = &inv_b2[2];
2009 
2010   c = isutf8 ? 0x80 : 0x8000;	/* The first wide char */
2011   inv_size2 = 0;
2012   for (i = 0; i < size2; i++) {
2013     c2 = EXTRACT_MBC (&b2[i * 8]) - 1;
2014     if (c <= c2) {
2015       STORE_MBC (&inv_b2[inv_size2 * 8], c);
2016       STORE_MBC (&inv_b2[inv_size2 * 8 + 4], c2);
2017       inv_size2++;
2018     }
2019     c = EXTRACT_MBC (&b2[i * 8 + 4]) + 1;
2020   }
2021   c2 = 0x7fffffffL;
2022   STORE_MBC (&inv_b2[inv_size2 * 8], c);
2023   STORE_MBC (&inv_b2[inv_size2 * 8 + 4], c2);
2024   inv_size2++;
2025   STORE_NUMBER (&inv_b2[-2], inv_size2);
2026   inv_b2 = &inv_b2[-2];
2027 
2028   ret = is_mblist_nooverlap(b1, inv_b2);
2029 
2030   REGEX_FREE (inv_b2);
2031 
2032   return ret;
2033 }
2034 
2035 #ifndef MATCH_MAY_ALLOCATE
2036 
2037 /* If we cannot allocate large objects within re_match_2_internal,
2038    we make the fail stack and register vectors global.
2039    The fail stack, we grow to the maximum size when a regexp
2040    is compiled.
2041    The register vectors, we adjust in size each time we
2042    compile a regexp, according to the number of registers it needs.  */
2043 
2044 static fail_stack_type fail_stack;
2045 
2046 /* Size with which the following vectors are currently allocated.
2047    That is so we can make them bigger as needed,
2048    but never make them smaller.  */
2049 static int regs_allocated_size;
2050 
2051 static const char **     regstart, **     regend;
2052 static const char ** old_regstart, ** old_regend;
2053 static const char **best_regstart, **best_regend;
2054 static register_info_type *reg_info;
2055 static const char **reg_dummy;
2056 static register_info_type *reg_info_dummy;
2057 
2058 /* Make the register vectors big enough for NUM_REGS registers,
2059    but don't make them smaller.  */
2060 
2061 static
regex_grow_registers(num_regs)2062 regex_grow_registers (num_regs)
2063      int num_regs;
2064 {
2065   if (num_regs > regs_allocated_size)
2066     {
2067       RETALLOC_IF (regstart,	 num_regs, const char *);
2068       RETALLOC_IF (regend,	 num_regs, const char *);
2069       RETALLOC_IF (old_regstart, num_regs, const char *);
2070       RETALLOC_IF (old_regend,	 num_regs, const char *);
2071       RETALLOC_IF (best_regstart, num_regs, const char *);
2072       RETALLOC_IF (best_regend,	 num_regs, const char *);
2073       RETALLOC_IF (reg_info,	 num_regs, register_info_type);
2074       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
2075       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
2076 
2077       regs_allocated_size = num_regs;
2078     }
2079 }
2080 
2081 #endif /* not MATCH_MAY_ALLOCATE */
2082 
2083 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
2084 						 compile_stack,
2085 						 regnum_t regnum));
2086 
2087 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
2088    Returns one of error codes defined in `regex.h', or zero for success.
2089 
2090    Assumes the `allocated' (and perhaps `buffer') and `translate'
2091    fields are set in BUFP on entry.
2092 
2093    If it succeeds, results are put in BUFP (if it returns an error, the
2094    contents of BUFP are undefined):
2095      `buffer' is the compiled pattern;
2096      `syntax' is set to SYNTAX;
2097      `used' is set to the length of the compiled pattern;
2098      `fastmap_accurate' is zero;
2099      `re_nsub' is the number of subexpressions in PATTERN;
2100      `not_bol' and `not_eol' are zero;
2101 
2102    The `fastmap' and `newline_anchor' fields are neither
2103    examined nor set.  */
2104 
2105 /* Return, freeing storage we allocated.  */
2106 #define FREE_STACK_RETURN(value)		\
2107   return (free (compile_stack.stack), value)
2108 
2109 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)2110 regex_compile (pattern, size, syntax, bufp)
2111      const char *pattern;
2112      size_t size;
2113      reg_syntax_t syntax;
2114      struct re_pattern_buffer *bufp;
2115 {
2116   /* We fetch characters from PATTERN here.  Even though PATTERN is
2117      `char *' (i.e., signed), we declare these variables as unsigned, so
2118      they can be reliably used as array indices.  */
2119   register unsigned char c, c1;
2120 
2121   /* A random temporary spot in PATTERN.  */
2122   const char *p1;
2123 
2124   /* Points to the end of the buffer, where we should append.  */
2125   register unsigned char *b;
2126 
2127   /* Keeps track of unclosed groups.  */
2128   compile_stack_type compile_stack;
2129 
2130   /* Points to the current (ending) position in the pattern.  */
2131   const char *p = pattern;
2132   const char *pend = pattern + size;
2133 
2134   /* How to translate the characters in the pattern.  */
2135   RE_TRANSLATE_TYPE translate = bufp->translate;
2136 
2137   /* Address of the count-byte of the most recently inserted `exactn'
2138      command.  This makes it possible to tell if a new exact-match
2139      character can be added to that command or if the character requires
2140      a new `exactn' command.  */
2141   unsigned char *pending_exact = 0;
2142 
2143   /* Address of start of the most recently finished expression.
2144      This tells, e.g., postfix * where to find the start of its
2145      operand.  Reset at the beginning of groups and alternatives.  */
2146   unsigned char *laststart = 0;
2147 
2148   /* Address of beginning of regexp, or inside of last group.  */
2149   unsigned char *begalt;
2150 
2151   /* Place in the uncompiled pattern (i.e., the {) to
2152      which to go back if the interval is invalid.  */
2153   const char *beg_interval;
2154 
2155   /* Address of the place where a forward jump should go to the end of
2156      the containing expression.  Each alternative of an `or' -- except the
2157      last -- ends with a forward jump of this sort.  */
2158   unsigned char *fixup_alt_jump = 0;
2159 
2160   /* Counts open-groups as they are encountered.  Remembered for the
2161      matching close-group on the compile stack, so the same register
2162      number is put in the stop_memory as the start_memory.  */
2163   regnum_t regnum = 0;
2164 
2165 #ifdef DEBUG
2166   DEBUG_PRINT1 ("\nCompiling pattern: ");
2167   if (debug)
2168     {
2169       unsigned debug_count;
2170 
2171       for (debug_count = 0; debug_count < size; debug_count++)
2172         putchar (pattern[debug_count]);
2173       putchar ('\n');
2174     }
2175 #endif /* DEBUG */
2176 
2177   /* Initialize the compile stack.  */
2178   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
2179   if (compile_stack.stack == NULL)
2180     return REG_ESPACE;
2181 
2182   compile_stack.size = INIT_COMPILE_STACK_SIZE;
2183   compile_stack.avail = 0;
2184 
2185   /* Initialize the pattern buffer.  */
2186   bufp->syntax = syntax;
2187   bufp->fastmap_accurate = 0;
2188   bufp->not_bol = bufp->not_eol = 0;
2189 
2190   /* Set `used' to zero, so that if we return an error, the pattern
2191      printer (for debugging) will think there's no pattern.  We reset it
2192      at the end.  */
2193   bufp->used = 0;
2194 
2195   /* Always count groups, whether or not bufp->no_sub is set.  */
2196   bufp->re_nsub = 0;
2197 
2198 #if !defined (emacs) && !defined (SYNTAX_TABLE)
2199   /* Initialize the syntax table.  */
2200    init_syntax_once ();
2201 #endif
2202 
2203   if (bufp->allocated == 0)
2204     {
2205       if (bufp->buffer)
2206 	{ /* If zero allocated, but buffer is non-null, try to realloc
2207              enough space.  This loses if buffer's address is bogus, but
2208              that is the user's responsibility.  */
2209           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
2210         }
2211       else
2212         { /* Caller did not allocate a buffer.  Do it for them.  */
2213           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
2214         }
2215       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
2216 
2217       bufp->allocated = INIT_BUF_SIZE;
2218     }
2219 
2220   begalt = b = bufp->buffer;
2221 
2222   /* Loop through the uncompiled pattern until we're at the end.  */
2223   while (p != pend)
2224     {
2225       PATFETCH (c);
2226 
2227       switch (c)
2228         {
2229         case '^':
2230           {
2231             if (   /* If at start of pattern, it's an operator.  */
2232                    p == pattern + 1
2233                    /* If context independent, it's an operator.  */
2234                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2235                    /* Otherwise, depends on what's come before.  */
2236                 || at_begline_loc_p (pattern, p, syntax))
2237               BUF_PUSH (begline);
2238             else
2239               goto normal_char;
2240           }
2241           break;
2242 
2243 
2244         case '$':
2245           {
2246             if (   /* If at end of pattern, it's an operator.  */
2247                    p == pend
2248                    /* If context independent, it's an operator.  */
2249                 || syntax & RE_CONTEXT_INDEP_ANCHORS
2250                    /* Otherwise, depends on what's next.  */
2251                 || at_endline_loc_p (p, pend, syntax))
2252                BUF_PUSH (endline);
2253              else
2254                goto normal_char;
2255            }
2256            break;
2257 
2258 
2259 	case '+':
2260         case '?':
2261           if ((syntax & RE_BK_PLUS_QM)
2262               || (syntax & RE_LIMITED_OPS))
2263             goto normal_char;
2264         handle_plus:
2265         case '*':
2266           /* If there is no previous pattern... */
2267           if (!laststart)
2268             {
2269               if (syntax & RE_CONTEXT_INVALID_OPS)
2270                 FREE_STACK_RETURN (REG_BADRPT);
2271               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
2272                 goto normal_char;
2273             }
2274 
2275           {
2276             /* Are we optimizing this jump?  */
2277             boolean keep_string_p = false;
2278 
2279             /* 1 means zero (many) matches is allowed.  */
2280             char zero_times_ok = 0, many_times_ok = 0;
2281 
2282             /* If there is a sequence of repetition chars, collapse it
2283                down to just one (the right one).  We can't combine
2284                interval operators with these because of, e.g., `a{2}*',
2285                which should only match an even number of `a's.  */
2286 
2287             for (;;)
2288               {
2289                 zero_times_ok |= c != '+';
2290                 many_times_ok |= c != '?';
2291 
2292                 if (p == pend)
2293                   break;
2294 
2295                 PATFETCH (c);
2296 
2297                 if (c == '*'
2298                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2299                   ;
2300 
2301                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2302                   {
2303                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2304 
2305                     PATFETCH (c1);
2306                     if (!(c1 == '+' || c1 == '?'))
2307                       {
2308                         PATUNFETCH;
2309                         PATUNFETCH;
2310                         break;
2311                       }
2312 
2313                     c = c1;
2314                   }
2315                 else
2316                   {
2317                     PATUNFETCH;
2318                     break;
2319                   }
2320 
2321                 /* If we get here, we found another repeat character.  */
2322                }
2323 
2324             /* Star, etc. applied to an empty pattern is equivalent
2325                to an empty pattern.  */
2326             if (!laststart)
2327               break;
2328 
2329             /* Now we know whether or not zero matches is allowed
2330                and also whether or not two or more matches is allowed.  */
2331             if (many_times_ok)
2332               { /* More than one repetition is allowed, so put in at the
2333                    end a backward relative jump from `b' to before the next
2334                    jump we're going to put in below (which jumps from
2335                    laststart to after this jump).
2336 
2337                    But if we are at the `*' in the exact sequence `.*\n',
2338                    insert an unconditional jump backwards to the .,
2339                    instead of the beginning of the loop.  This way we only
2340                    push a failure point once, instead of every time
2341                    through the loop.  */
2342                 assert (p - 1 > pattern);
2343 
2344                 /* Allocate the space for the jump.  */
2345                 GET_BUFFER_SPACE (3);
2346 
2347                 /* We know we are not at the first character of the pattern,
2348                    because laststart was nonzero.  And we've already
2349                    incremented `p', by the way, to be the character after
2350                    the `*'.  Do we have to do something analogous here
2351                    for null bytes, because of RE_DOT_NOT_NULL?  */
2352                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2353 		    && zero_times_ok
2354                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2355                     && !(syntax & RE_DOT_NEWLINE))
2356                   { /* We have .*\n.  */
2357                     STORE_JUMP (jump, b, laststart);
2358                     keep_string_p = true;
2359                   }
2360                 else
2361                   /* Anything else.  */
2362                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2363 
2364                 /* We've added more stuff to the buffer.  */
2365                 b += 3;
2366               }
2367 
2368             /* On failure, jump from laststart to b + 3, which will be the
2369                end of the buffer after this jump is inserted.  */
2370             GET_BUFFER_SPACE (3);
2371             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2372                                        : on_failure_jump,
2373                          laststart, b + 3);
2374             pending_exact = 0;
2375             b += 3;
2376 
2377             if (!zero_times_ok)
2378               {
2379                 /* At least one repetition is required, so insert a
2380                    `dummy_failure_jump' before the initial
2381                    `on_failure_jump' instruction of the loop. This
2382                    effects a skip over that instruction the first time
2383                    we hit that loop.  */
2384                 GET_BUFFER_SPACE (3);
2385                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2386                 b += 3;
2387               }
2388             }
2389 	  break;
2390 
2391 
2392 	case '.':
2393           laststart = b;
2394           BUF_PUSH (anychar);
2395           break;
2396 
2397 
2398         case '[':
2399           {
2400             boolean had_char_class = false;
2401 	    mbchar_t c, c1;
2402 	    long last_char = -1L;
2403 
2404             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2405 
2406             /* Ensure that we have enough space to push a charset: the
2407                opcode, the length count, and the bitset; 34 bytes in all.  */
2408 	    /* + 2 + 8 for mbcharset(_not)? with just one interval.  */
2409 	    GET_BUFFER_SPACE (34 + 2 + 8);
2410 
2411             laststart = b;
2412 
2413             /* We test `*p == '^' twice, instead of using an if
2414                statement, so we only need one BUF_PUSH.  */
2415             BUF_PUSH (*p == '^' ? charset_not : charset);
2416             if (*p == '^')
2417               p++;
2418 
2419             /* Remember the first position in the bracket expression.  */
2420             p1 = p;
2421 
2422             /* Push the number of bytes in the bitmap.  */
2423             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2424 
2425             /* Clear the whole map.  */
2426 	    bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
2427 
2428             /* charset_not matches newline according to a syntax bit.  */
2429             if ((re_opcode_t) b[-2] == charset_not
2430                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2431               SET_LIST_BIT ('\n');
2432 
2433             /* Read in characters and ranges, setting map bits.  */
2434             for (;;)
2435               {
2436 		int size;
2437 
2438                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2439 
2440 		if ((size = EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH])))
2441 		  /* Ensure the space is enough to hold another interval
2442 		     of multi-byte chars in charset(_not)?.  */
2443 		  GET_BUFFER_SPACE (32 + 2 + size*8 + 8);
2444 
2445 		PATFETCH_RAW (c);
2446 
2447                 /* \ might escape characters inside [...] and [^...].  */
2448                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2449                   {
2450                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2451 
2452 		    PATFETCH_RAW (c1);
2453 		    if (ismbchar (c1)) {
2454 		      PATUNFETCH;
2455 		      PATFETCH_MBC (c1);
2456 		      (void) set_list_bits (c1, c1, syntax, b, translate);
2457 		      last_char = c1;
2458 		      continue;
2459 		    }
2460 		    SET_LIST_BIT (TRANSLATE (c1));
2461 		    last_char = c1;
2462                     continue;
2463                   }
2464 
2465                 /* Could be the end of the bracket expression.  If it's
2466                    not (i.e., when the bracket expression is `[]' so
2467                    far), the ']' character bit gets set way below.  */
2468                 if (c == ']' && p != p1 + 1)
2469                   break;
2470 
2471                 /* Look ahead to see if it's a range when the last thing
2472                    was a character class.  */
2473                 if (had_char_class && c == '-' && *p != ']')
2474                   FREE_STACK_RETURN (REG_ERANGE);
2475 
2476 		if (ismbchar (c)) {
2477 		  PATUNFETCH;
2478 		  PATFETCH_MBC (c);
2479 		}
2480 
2481                 /* Look ahead to see if it's a range when the last thing
2482                    was a character: if this is a hyphen not at the
2483                    beginning or the end of a list, then it's the range
2484                    operator.  */
2485                 if (c == '-'
2486 #if 0 /* The original was: */
2487                     && !(p - 2 >= pattern && p[-2] == '[')
2488                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2489 #else /* I wonder why he did not write like this.
2490 	 Have we got any problems?  */
2491 		    && p != p1 + 1
2492 #endif
2493                     && *p != ']')
2494                   {
2495 		    reg_errcode_t ret;
2496 
2497 		    assert (last_char >= 0);
2498 		    PATFETCH_RAW (c1);
2499 		    if (ismbchar (c1)) {
2500 		      PATUNFETCH;
2501 		      PATFETCH_MBC (c1);
2502 		    }
2503 		    ret = set_list_bits (last_char, c1, syntax, b, translate);
2504 		    last_char = c1;
2505                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2506                   }
2507 
2508                 else if (p[0] == '-' && p[1] != ']')
2509                   { /* This handles ranges made up of characters only.  */
2510                     reg_errcode_t ret;
2511 
2512 		    /* Move past the `-'.  */
2513 		    PATFETCH_RAW (c1);
2514 
2515 		    PATFETCH_RAW (c1);
2516 		    if (ismbchar (c1)) {
2517 		      PATUNFETCH;
2518 		      PATFETCH_MBC (c1);
2519 		    }
2520 		    ret = set_list_bits (c, c1, syntax, b, translate);
2521 		    last_char = c1;
2522                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2523                   }
2524 
2525                 /* See if we're at the beginning of a possible character
2526                    class.  */
2527 
2528                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2529                   { /* Leave room for the null.  */
2530                     char str[CHAR_CLASS_MAX_LENGTH + 1];
2531 
2532 		    PATFETCH_RAW (c);
2533                     c1 = 0;
2534 
2535                     /* If pattern is `[[:'.  */
2536                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2537 
2538                     for (;;)
2539                       {
2540                         PATFETCH (c);
2541                         if (c == ':' || c == ']' || p == pend
2542                             || c1 == CHAR_CLASS_MAX_LENGTH)
2543                           break;
2544                         str[c1++] = c;
2545                       }
2546                     str[c1] = '\0';
2547 
2548                     /* If isn't a word bracketed by `[:' and:`]':
2549                        undo the ending character, the letters, and leave
2550                        the leading `:' and `[' (but set bits for them).  */
2551                     if (c == ':' && *p == ']')
2552                       {
2553 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
2554                         boolean is_lower = STREQ (str, "lower");
2555                         boolean is_upper = STREQ (str, "upper");
2556 			wctype_t wt;
2557                         int ch;
2558 
2559 			wt = wctype (str);
2560 			if (wt == 0)
2561 			  FREE_STACK_RETURN (REG_ECTYPE);
2562 
2563                         /* Throw away the ] at the end of the character
2564                            class.  */
2565                         PATFETCH (c);
2566 
2567                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2568 
2569                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2570 			  {
2571 			    if (iswctype (btowc (ch), wt))
2572 			      SET_LIST_BIT (ch);
2573 
2574 			    if (translate && (is_upper || is_lower)
2575 				&& (ISUPPER (ch) || ISLOWER (ch)))
2576 			      SET_LIST_BIT (ch);
2577 			  }
2578 
2579                         had_char_class = true;
2580 #else
2581                         int ch;
2582                         boolean is_alnum = STREQ (str, "alnum");
2583                         boolean is_alpha = STREQ (str, "alpha");
2584                         boolean is_blank = STREQ (str, "blank");
2585                         boolean is_cntrl = STREQ (str, "cntrl");
2586                         boolean is_digit = STREQ (str, "digit");
2587                         boolean is_graph = STREQ (str, "graph");
2588                         boolean is_lower = STREQ (str, "lower");
2589                         boolean is_print = STREQ (str, "print");
2590                         boolean is_punct = STREQ (str, "punct");
2591                         boolean is_space = STREQ (str, "space");
2592                         boolean is_upper = STREQ (str, "upper");
2593                         boolean is_xdigit = STREQ (str, "xdigit");
2594 
2595                         if (!IS_CHAR_CLASS (str))
2596 			  FREE_STACK_RETURN (REG_ECTYPE);
2597 
2598                         /* Throw away the ] at the end of the character
2599                            class.  */
2600                         PATFETCH (c);
2601 
2602                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2603 
2604                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2605                           {
2606 			    /* This was split into 3 if's to
2607 			       avoid an arbitrary limit in some compiler.  */
2608                             if (   (is_alnum  && ISALNUM (ch))
2609                                 || (is_alpha  && ISALPHA (ch))
2610                                 || (is_blank  && ISBLANK (ch))
2611                                 || (is_cntrl  && ISCNTRL (ch)))
2612 			      SET_LIST_BIT (ch);
2613 			    if (   (is_digit  && ISDIGIT (ch))
2614                                 || (is_graph  && ISGRAPH (ch))
2615                                 || (is_lower  && ISLOWER (ch))
2616                                 || (is_print  && ISPRINT (ch)))
2617 			      SET_LIST_BIT (ch);
2618 			    if (   (is_punct  && ISPUNCT (ch))
2619                                 || (is_space  && ISSPACE (ch))
2620                                 || (is_upper  && ISUPPER (ch))
2621                                 || (is_xdigit && ISXDIGIT (ch)))
2622 			      SET_LIST_BIT (ch);
2623 			    if (   translate && (is_upper || is_lower)
2624 				&& (ISUPPER (ch) || ISLOWER (ch)))
2625 			      SET_LIST_BIT (ch);
2626                           }
2627                         had_char_class = true;
2628 #endif	/* libc || wctype.h */
2629 #ifdef DEBUG
2630 			last_char = -1;
2631 #endif
2632                       }
2633                     else
2634                       {
2635                         c1++;
2636                         while (c1--)
2637                           PATUNFETCH;
2638 #if 0 /* The original was: */
2639                         SET_LIST_BIT ('[');
2640                         SET_LIST_BIT (':');
2641 #else /* I think this is the right way.  */
2642 			SET_LIST_BIT (TRANSLATE ('['));
2643 			SET_LIST_BIT (TRANSLATE (':'));
2644 #endif
2645                         had_char_class = false;
2646 			last_char = ':';
2647                       }
2648                   }
2649                 else
2650                   {
2651                     had_char_class = false;
2652 		    (void) set_list_bits (c, c, syntax, b, translate);
2653 		    last_char = c;
2654                   }
2655               }
2656 
2657             /* Discard any (non)matching list bytes that are all 0 at the
2658                end of the map.  Decrease the map-length byte too.  */
2659             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2660               b[-1]--;
2661 	    if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
2662 	      bcopy (&b[(1 << BYTEWIDTH) / BYTEWIDTH], &b[b[-1]],
2663 		     2 + EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
2664 	    b += b[-1] + 2 + EXTRACT_UNSIGNED (&b[b[-1]])*8;
2665 	    break;
2666           }
2667           break;
2668 
2669 
2670 	case '(':
2671           if (syntax & RE_NO_BK_PARENS)
2672             goto handle_open;
2673           else
2674             goto normal_char;
2675 
2676 
2677         case ')':
2678           if (syntax & RE_NO_BK_PARENS)
2679             goto handle_close;
2680           else
2681             goto normal_char;
2682 
2683 
2684         case '\n':
2685           if (syntax & RE_NEWLINE_ALT)
2686             goto handle_alt;
2687           else
2688             goto normal_char;
2689 
2690 
2691 	case '|':
2692           if (syntax & RE_NO_BK_VBAR)
2693             goto handle_alt;
2694           else
2695             goto normal_char;
2696 
2697 
2698         case '{':
2699            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2700              goto handle_interval;
2701            else
2702              goto normal_char;
2703 
2704 
2705         case '\\':
2706           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2707 
2708           /* Do not translate the character after the \, so that we can
2709              distinguish, e.g., \B from \b, even if we normally would
2710              translate, e.g., B to b.  */
2711           PATFETCH_RAW (c);
2712 
2713           switch (c)
2714             {
2715             case '(':
2716               if (syntax & RE_NO_BK_PARENS)
2717                 goto normal_backslash;
2718 
2719             handle_open:
2720               bufp->re_nsub++;
2721               regnum++;
2722 
2723               if (COMPILE_STACK_FULL)
2724                 {
2725                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
2726                             compile_stack_elt_t);
2727                   if (compile_stack.stack == NULL) return REG_ESPACE;
2728 
2729                   compile_stack.size <<= 1;
2730                 }
2731 
2732               /* These are the values to restore when we hit end of this
2733                  group.  They are all relative offsets, so that if the
2734                  whole pattern moves because of realloc, they will still
2735                  be valid.  */
2736               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2737               COMPILE_STACK_TOP.fixup_alt_jump
2738                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2739               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2740               COMPILE_STACK_TOP.regnum = regnum;
2741 
2742               /* We will eventually replace the 0 with the number of
2743                  groups inner to this one.  But do not push a
2744                  start_memory for groups beyond the last one we can
2745                  represent in the compiled pattern.  */
2746               if (regnum <= MAX_REGNUM)
2747                 {
2748                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2749                   BUF_PUSH_3 (start_memory, regnum, 0);
2750                 }
2751 
2752               compile_stack.avail++;
2753 
2754               fixup_alt_jump = 0;
2755               laststart = 0;
2756               begalt = b;
2757 	      /* If we've reached MAX_REGNUM groups, then this open
2758 		 won't actually generate any code, so we'll have to
2759 		 clear pending_exact explicitly.  */
2760 	      pending_exact = 0;
2761               break;
2762 
2763 
2764             case ')':
2765               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2766 
2767               if (COMPILE_STACK_EMPTY)
2768                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2769                   goto normal_backslash;
2770                 else
2771                   FREE_STACK_RETURN (REG_ERPAREN);
2772 
2773             handle_close:
2774               if (fixup_alt_jump)
2775                 { /* Push a dummy failure point at the end of the
2776                      alternative for a possible future
2777                      `pop_failure_jump' to pop.  See comments at
2778                      `push_dummy_failure' in `re_match_2'.  */
2779                   BUF_PUSH (push_dummy_failure);
2780 
2781                   /* We allocated space for this jump when we assigned
2782                      to `fixup_alt_jump', in the `handle_alt' case below.  */
2783                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2784                 }
2785 
2786               /* See similar code for backslashed left paren above.  */
2787               if (COMPILE_STACK_EMPTY)
2788                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2789                   goto normal_char;
2790                 else
2791                   FREE_STACK_RETURN (REG_ERPAREN);
2792 
2793               /* Since we just checked for an empty stack above, this
2794                  ``can't happen''.  */
2795               assert (compile_stack.avail != 0);
2796               {
2797                 /* We don't just want to restore into `regnum', because
2798                    later groups should continue to be numbered higher,
2799                    as in `(ab)c(de)' -- the second group is #2.  */
2800                 regnum_t this_group_regnum;
2801 
2802                 compile_stack.avail--;
2803                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2804                 fixup_alt_jump
2805                   = COMPILE_STACK_TOP.fixup_alt_jump
2806                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2807                     : 0;
2808                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2809                 this_group_regnum = COMPILE_STACK_TOP.regnum;
2810 		/* If we've reached MAX_REGNUM groups, then this open
2811 		   won't actually generate any code, so we'll have to
2812 		   clear pending_exact explicitly.  */
2813 		pending_exact = 0;
2814 
2815                 /* We're at the end of the group, so now we know how many
2816                    groups were inside this one.  */
2817                 if (this_group_regnum <= MAX_REGNUM)
2818                   {
2819                     unsigned char *inner_group_loc
2820                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2821 
2822                     *inner_group_loc = regnum - this_group_regnum;
2823                     BUF_PUSH_3 (stop_memory, this_group_regnum,
2824                                 regnum - this_group_regnum);
2825                   }
2826               }
2827               break;
2828 
2829 
2830             case '|':					/* `\|'.  */
2831               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2832                 goto normal_backslash;
2833             handle_alt:
2834               if (syntax & RE_LIMITED_OPS)
2835                 goto normal_char;
2836 
2837               /* Insert before the previous alternative a jump which
2838                  jumps to this alternative if the former fails.  */
2839               GET_BUFFER_SPACE (3);
2840               INSERT_JUMP (on_failure_jump, begalt, b + 6);
2841               pending_exact = 0;
2842               b += 3;
2843 
2844               /* The alternative before this one has a jump after it
2845                  which gets executed if it gets matched.  Adjust that
2846                  jump so it will jump to this alternative's analogous
2847                  jump (put in below, which in turn will jump to the next
2848                  (if any) alternative's such jump, etc.).  The last such
2849                  jump jumps to the correct final destination.  A picture:
2850                           _____ _____
2851                           |   | |   |
2852                           |   v |   v
2853                          a | b   | c
2854 
2855                  If we are at `b', then fixup_alt_jump right now points to a
2856                  three-byte space after `a'.  We'll put in the jump, set
2857                  fixup_alt_jump to right after `b', and leave behind three
2858                  bytes which we'll fill in when we get to after `c'.  */
2859 
2860               if (fixup_alt_jump)
2861                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2862 
2863               /* Mark and leave space for a jump after this alternative,
2864                  to be filled in later either by next alternative or
2865                  when know we're at the end of a series of alternatives.  */
2866               fixup_alt_jump = b;
2867               GET_BUFFER_SPACE (3);
2868               b += 3;
2869 
2870               laststart = 0;
2871               begalt = b;
2872               break;
2873 
2874 
2875             case '{':
2876               /* If \{ is a literal.  */
2877               if (!(syntax & RE_INTERVALS)
2878                      /* If we're at `\{' and it's not the open-interval
2879                         operator.  */
2880                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2881                   || (p - 2 == pattern  &&  p == pend))
2882                 goto normal_backslash;
2883 
2884             handle_interval:
2885               {
2886                 /* If got here, then the syntax allows intervals.  */
2887 
2888                 /* At least (most) this many matches must be made.  */
2889                 int lower_bound = -1, upper_bound = -1;
2890 
2891                 beg_interval = p - 1;
2892 
2893                 if (p == pend)
2894                   {
2895                     if (syntax & RE_NO_BK_BRACES)
2896                       goto unfetch_interval;
2897                     else
2898                       FREE_STACK_RETURN (REG_EBRACE);
2899                   }
2900 
2901                 GET_UNSIGNED_NUMBER (lower_bound);
2902 
2903                 if (c == ',')
2904                   {
2905                     GET_UNSIGNED_NUMBER (upper_bound);
2906                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2907                   }
2908                 else
2909                   /* Interval such as `{1}' => match exactly once. */
2910                   upper_bound = lower_bound;
2911 
2912                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2913                     || lower_bound > upper_bound)
2914                   {
2915                     if (syntax & RE_NO_BK_BRACES)
2916                       goto unfetch_interval;
2917                     else
2918                       FREE_STACK_RETURN (REG_BADBR);
2919                   }
2920 
2921                 if (!(syntax & RE_NO_BK_BRACES))
2922                   {
2923                     if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2924 
2925                     PATFETCH (c);
2926                   }
2927 
2928                 if (c != '}')
2929                   {
2930                     if (syntax & RE_NO_BK_BRACES)
2931                       goto unfetch_interval;
2932                     else
2933                       FREE_STACK_RETURN (REG_BADBR);
2934                   }
2935 
2936                 /* We just parsed a valid interval.  */
2937 
2938                 /* If it's invalid to have no preceding re.  */
2939                 if (!laststart)
2940                   {
2941                     if (syntax & RE_CONTEXT_INVALID_OPS)
2942                       FREE_STACK_RETURN (REG_BADRPT);
2943                     else if (syntax & RE_CONTEXT_INDEP_OPS)
2944                       laststart = b;
2945                     else
2946                       goto unfetch_interval;
2947                   }
2948 
2949                 /* If the upper bound is zero, don't want to succeed at
2950                    all; jump from `laststart' to `b + 3', which will be
2951                    the end of the buffer after we insert the jump.  */
2952                  if (upper_bound == 0)
2953                    {
2954                      GET_BUFFER_SPACE (3);
2955                      INSERT_JUMP (jump, laststart, b + 3);
2956                      b += 3;
2957                    }
2958 
2959                  /* Otherwise, we have a nontrivial interval.  When
2960                     we're all done, the pattern will look like:
2961                       set_number_at <jump count> <upper bound>
2962                       set_number_at <succeed_n count> <lower bound>
2963                       succeed_n <after jump addr> <succeed_n count>
2964                       <body of loop>
2965                       jump_n <succeed_n addr> <jump count>
2966                     (The upper bound and `jump_n' are omitted if
2967                     `upper_bound' is 1, though.)  */
2968                  else
2969                    { /* If the upper bound is > 1, we need to insert
2970                         more at the end of the loop.  */
2971                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
2972 
2973                      GET_BUFFER_SPACE (nbytes);
2974 
2975                      /* Initialize lower bound of the `succeed_n', even
2976                         though it will be set during matching by its
2977                         attendant `set_number_at' (inserted next),
2978                         because `re_compile_fastmap' needs to know.
2979                         Jump to the `jump_n' we might insert below.  */
2980                      INSERT_JUMP2 (succeed_n, laststart,
2981                                    b + 5 + (upper_bound > 1) * 5,
2982                                    lower_bound);
2983                      b += 5;
2984 
2985                      /* Code to initialize the lower bound.  Insert
2986                         before the `succeed_n'.  The `5' is the last two
2987                         bytes of this `set_number_at', plus 3 bytes of
2988                         the following `succeed_n'.  */
2989                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2990                      b += 5;
2991 
2992                      if (upper_bound > 1)
2993                        { /* More than one repetition is allowed, so
2994                             append a backward jump to the `succeed_n'
2995                             that starts this interval.
2996 
2997                             When we've reached this during matching,
2998                             we'll have matched the interval once, so
2999                             jump back only `upper_bound - 1' times.  */
3000                          STORE_JUMP2 (jump_n, b, laststart + 5,
3001                                       upper_bound - 1);
3002                          b += 5;
3003 
3004                          /* The location we want to set is the second
3005                             parameter of the `jump_n'; that is `b-2' as
3006                             an absolute address.  `laststart' will be
3007                             the `set_number_at' we're about to insert;
3008                             `laststart+3' the number to set, the source
3009                             for the relative address.  But we are
3010                             inserting into the middle of the pattern --
3011                             so everything is getting moved up by 5.
3012                             Conclusion: (b - 2) - (laststart + 3) + 5,
3013                             i.e., b - laststart.
3014 
3015                             We insert this at the beginning of the loop
3016                             so that if we fail during matching, we'll
3017                             reinitialize the bounds.  */
3018                          insert_op2 (set_number_at, laststart, b - laststart,
3019                                      upper_bound - 1, b);
3020                          b += 5;
3021                        }
3022                    }
3023                 pending_exact = 0;
3024                 beg_interval = NULL;
3025               }
3026               break;
3027 
3028             unfetch_interval:
3029               /* If an invalid interval, match the characters as literals.  */
3030                assert (beg_interval);
3031                p = beg_interval;
3032                beg_interval = NULL;
3033 
3034                /* normal_char and normal_backslash need `c'.  */
3035                PATFETCH (c);
3036 
3037                if (!(syntax & RE_NO_BK_BRACES))
3038                  {
3039                    if (p > pattern  &&  p[-1] == '\\')
3040                      goto normal_backslash;
3041                  }
3042                goto normal_char;
3043 
3044 #ifdef emacs
3045             /* There is no way to specify the before_dot and after_dot
3046                operators.  rms says this is ok.  --karl  */
3047             case '=':
3048               BUF_PUSH (at_dot);
3049               break;
3050 
3051             case 's':
3052               laststart = b;
3053               PATFETCH (c);
3054               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
3055               break;
3056 
3057             case 'S':
3058               laststart = b;
3059               PATFETCH (c);
3060               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
3061               break;
3062 #endif /* emacs */
3063 
3064 
3065             case 'w':
3066 	      if (re_syntax_options & RE_NO_GNU_OPS)
3067 		goto normal_char;
3068               laststart = b;
3069               BUF_PUSH (wordchar);
3070               break;
3071 
3072 
3073             case 'W':
3074 	      if (re_syntax_options & RE_NO_GNU_OPS)
3075 		goto normal_char;
3076               laststart = b;
3077               BUF_PUSH (notwordchar);
3078               break;
3079 
3080 
3081             case '<':
3082 	      if (re_syntax_options & RE_NO_GNU_OPS)
3083 		goto normal_char;
3084               BUF_PUSH (wordbeg);
3085               break;
3086 
3087             case '>':
3088 	      if (re_syntax_options & RE_NO_GNU_OPS)
3089 		goto normal_char;
3090               BUF_PUSH (wordend);
3091               break;
3092 
3093             case 'b':
3094 	      if (re_syntax_options & RE_NO_GNU_OPS)
3095 		goto normal_char;
3096               BUF_PUSH (wordbound);
3097               break;
3098 
3099             case 'B':
3100 	      if (re_syntax_options & RE_NO_GNU_OPS)
3101 		goto normal_char;
3102               BUF_PUSH (notwordbound);
3103               break;
3104 
3105             case '`':
3106 	      if (re_syntax_options & RE_NO_GNU_OPS)
3107 		goto normal_char;
3108               BUF_PUSH (begbuf);
3109               break;
3110 
3111             case '\'':
3112 	      if (re_syntax_options & RE_NO_GNU_OPS)
3113 		goto normal_char;
3114               BUF_PUSH (endbuf);
3115               break;
3116 
3117             case '1': case '2': case '3': case '4': case '5':
3118             case '6': case '7': case '8': case '9':
3119               if (syntax & RE_NO_BK_REFS)
3120                 goto normal_char;
3121 
3122               c1 = c - '0';
3123 
3124               if (c1 > regnum)
3125                 FREE_STACK_RETURN (REG_ESUBREG);
3126 
3127               /* Can't back reference to a subexpression if inside of it.  */
3128               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
3129                 goto normal_char;
3130 
3131               laststart = b;
3132               BUF_PUSH_2 (duplicate, c1);
3133               break;
3134 
3135 
3136             case '+':
3137             case '?':
3138               if (syntax & RE_BK_PLUS_QM)
3139                 goto handle_plus;
3140               else
3141                 goto normal_backslash;
3142 
3143             default:
3144             normal_backslash:
3145               /* You might think it would be useful for \ to mean
3146                  not to translate; but if we don't translate it
3147                  it will never match anything.  */
3148 	      if (!ismbchar (c))
3149 		c = TRANSLATE (c);
3150               goto normal_char;
3151             }
3152           break;
3153 
3154 
3155 	default:
3156         /* Expects the character in `c'.  */
3157 	normal_char:
3158 
3159 	  p1 = p;
3160 	  if (ismbchar (c)) {
3161 	    p += mbclen (c) - 1;
3162 	    if (p > pend)
3163 	      return REG_EEND;
3164 	  }
3165 
3166 	      /* If no exactn currently being built.  */
3167           if (!pending_exact
3168 
3169               /* If last exactn not at current position.  */
3170               || pending_exact + *pending_exact + 1 != b
3171 
3172               /* We have only one byte following the exactn for the count.  */
3173 	      || *pending_exact >= (1 << BYTEWIDTH) - mbclen(c)
3174 
3175               /* If followed by a repetition operator.  */
3176               || *p == '*' || *p == '^'
3177 	      || ((syntax & RE_BK_PLUS_QM)
3178 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
3179 		  : (*p == '+' || *p == '?'))
3180 	      || ((syntax & RE_INTERVALS)
3181                   && ((syntax & RE_NO_BK_BRACES)
3182 		      ? *p == '{'
3183                       : (p[0] == '\\' && p[1] == '{'))))
3184 	    {
3185 	      /* Start building a new exactn.  */
3186 
3187               laststart = b;
3188 
3189 	      BUF_PUSH_2 (exactn, 0);
3190 	      pending_exact = b - 1;
3191             }
3192 
3193 	  BUF_PUSH (c);
3194           (*pending_exact)++;
3195 	  while (p1 < p) {
3196 	    BUF_PUSH (*p1++);
3197 	    (*pending_exact)++;
3198 	  }
3199 	  break;
3200         } /* switch (c) */
3201     } /* while p != pend */
3202 
3203 
3204   /* Through the pattern now.  */
3205 
3206   if (fixup_alt_jump)
3207     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
3208 
3209   if (!COMPILE_STACK_EMPTY)
3210     FREE_STACK_RETURN (REG_EPAREN);
3211 
3212   /* If we don't want backtracking, force success
3213      the first time we reach the end of the compiled pattern.  */
3214   if (syntax & RE_NO_POSIX_BACKTRACKING)
3215     BUF_PUSH (succeed);
3216 
3217   free (compile_stack.stack);
3218 
3219   /* We have succeeded; set the length of the buffer.  */
3220   bufp->used = b - bufp->buffer;
3221 
3222 #ifdef DEBUG
3223   if (debug)
3224     {
3225       DEBUG_PRINT1 ("\nCompiled pattern: \n");
3226       print_compiled_pattern (bufp);
3227     }
3228 #endif /* DEBUG */
3229 
3230 #ifndef MATCH_MAY_ALLOCATE
3231   /* Initialize the failure stack to the largest possible stack.  This
3232      isn't necessary unless we're trying to avoid calling alloca in
3233      the search and match routines.  */
3234   {
3235     int num_regs = bufp->re_nsub + 1;
3236 
3237     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
3238        is strictly greater than re_max_failures, the largest possible stack
3239        is 2 * re_max_failures failure points.  */
3240     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
3241       {
3242 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
3243 
3244 #ifdef emacs
3245 	if (! fail_stack.stack)
3246 	  fail_stack.stack
3247 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
3248 					    * sizeof (fail_stack_elt_t));
3249 	else
3250 	  fail_stack.stack
3251 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
3252 					     (fail_stack.size
3253 					      * sizeof (fail_stack_elt_t)));
3254 #else /* not emacs */
3255 	if (! fail_stack.stack)
3256 	  fail_stack.stack
3257 	    = (fail_stack_elt_t *) malloc (fail_stack.size
3258 					   * sizeof (fail_stack_elt_t));
3259 	else
3260 	  fail_stack.stack
3261 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
3262 					    (fail_stack.size
3263 					     * sizeof (fail_stack_elt_t)));
3264 #endif /* not emacs */
3265       }
3266 
3267     regex_grow_registers (num_regs);
3268   }
3269 #endif /* not MATCH_MAY_ALLOCATE */
3270 
3271   return REG_NOERROR;
3272 } /* regex_compile */
3273 
3274 /* Subroutines for `regex_compile'.  */
3275 
3276 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
3277 
3278 static void
store_op1(op,loc,arg)3279 store_op1 (op, loc, arg)
3280     re_opcode_t op;
3281     unsigned char *loc;
3282     int arg;
3283 {
3284   *loc = (unsigned char) op;
3285   STORE_NUMBER (loc + 1, arg);
3286 }
3287 
3288 
3289 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
3290 
3291 static void
store_op2(op,loc,arg1,arg2)3292 store_op2 (op, loc, arg1, arg2)
3293     re_opcode_t op;
3294     unsigned char *loc;
3295     int arg1, arg2;
3296 {
3297   *loc = (unsigned char) op;
3298   STORE_NUMBER (loc + 1, arg1);
3299   STORE_NUMBER (loc + 3, arg2);
3300 }
3301 
3302 
3303 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
3304    for OP followed by two-byte integer parameter ARG.  */
3305 
3306 static void
insert_op1(op,loc,arg,end)3307 insert_op1 (op, loc, arg, end)
3308     re_opcode_t op;
3309     unsigned char *loc;
3310     int arg;
3311     unsigned char *end;
3312 {
3313   register unsigned char *pfrom = end;
3314   register unsigned char *pto = end + 3;
3315 
3316   while (pfrom != loc)
3317     *--pto = *--pfrom;
3318 
3319   store_op1 (op, loc, arg);
3320 }
3321 
3322 
3323 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
3324 
3325 static void
insert_op2(op,loc,arg1,arg2,end)3326 insert_op2 (op, loc, arg1, arg2, end)
3327     re_opcode_t op;
3328     unsigned char *loc;
3329     int arg1, arg2;
3330     unsigned char *end;
3331 {
3332   register unsigned char *pfrom = end;
3333   register unsigned char *pto = end + 5;
3334 
3335   while (pfrom != loc)
3336     *--pto = *--pfrom;
3337 
3338   store_op2 (op, loc, arg1, arg2);
3339 }
3340 
3341 
3342 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
3343    after an alternative or a begin-subexpression.  We assume there is at
3344    least one character before the ^.  */
3345 
3346 static boolean
at_begline_loc_p(pattern,p,syntax)3347 at_begline_loc_p (pattern, p, syntax)
3348     const char *pattern, *p;
3349     reg_syntax_t syntax;
3350 {
3351   const char *prev = p - 2;
3352   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
3353 
3354   return
3355        /* After a subexpression?  */
3356        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
3357        /* After an alternative?  */
3358     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3359 }
3360 
3361 
3362 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3363    at least one character after the $, i.e., `P < PEND'.  */
3364 
3365 static boolean
at_endline_loc_p(p,pend,syntax)3366 at_endline_loc_p (p, pend, syntax)
3367     const char *p, *pend;
3368     reg_syntax_t syntax;
3369 {
3370   const char *next = p;
3371   boolean next_backslash = *next == '\\';
3372   const char *next_next = p + 1 < pend ? p + 1 : 0;
3373 
3374   return
3375        /* Before a subexpression?  */
3376        (syntax & RE_NO_BK_PARENS ? *next == ')'
3377         : next_backslash && next_next && *next_next == ')')
3378        /* Before an alternative?  */
3379     || (syntax & RE_NO_BK_VBAR ? *next == '|'
3380         : next_backslash && next_next && *next_next == '|');
3381 }
3382 
3383 
3384 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3385    false if it's not.  */
3386 
3387 static boolean
group_in_compile_stack(compile_stack,regnum)3388 group_in_compile_stack (compile_stack, regnum)
3389     compile_stack_type compile_stack;
3390     regnum_t regnum;
3391 {
3392   int this_element;
3393 
3394   for (this_element = compile_stack.avail - 1;
3395        this_element >= 0;
3396        this_element--)
3397     if (compile_stack.stack[this_element].regnum == regnum)
3398       return true;
3399 
3400   return false;
3401 }
3402 
3403 
3404 #if 0 /* We use set_list_bits() now.  */
3405 /* Read the ending character of a range (in a bracket expression) from the
3406    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3407    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3408    Then we set the translation of all bits between the starting and
3409    ending characters (inclusive) in the compiled pattern B.
3410 
3411    Return an error code.
3412 
3413    We use these short variable names so we can use the same macros as
3414    `regex_compile' itself.  */
3415 
3416 static reg_errcode_t
3417 compile_range (p_ptr, pend, translate, syntax, b)
3418     const char **p_ptr, *pend;
3419     RE_TRANSLATE_TYPE translate;
3420     reg_syntax_t syntax;
3421     unsigned char *b;
3422 {
3423   unsigned this_char;
3424 
3425   const char *p = *p_ptr;
3426   unsigned int range_start, range_end;
3427 
3428   if (p == pend)
3429     return REG_ERANGE;
3430 
3431   /* Even though the pattern is a signed `char *', we need to fetch
3432      with unsigned char *'s; if the high bit of the pattern character
3433      is set, the range endpoints will be negative if we fetch using a
3434      signed char *.
3435 
3436      We also want to fetch the endpoints without translating them; the
3437      appropriate translation is done in the bit-setting loop below.  */
3438   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3439   range_start = ((const unsigned char *) p)[-2];
3440   range_end   = ((const unsigned char *) p)[0];
3441 
3442   /* Have to increment the pointer into the pattern string, so the
3443      caller isn't still at the ending character.  */
3444   (*p_ptr)++;
3445 
3446   /* If the start is after the end, the range is empty.  */
3447   if (range_start > range_end)
3448     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3449 
3450   /* Here we see why `this_char' has to be larger than an `unsigned
3451      char' -- the range is inclusive, so if `range_end' == 0xff
3452      (assuming 8-bit characters), we would otherwise go into an infinite
3453      loop, since all characters <= 0xff.  */
3454   for (this_char = range_start; this_char <= range_end; this_char++)
3455     {
3456       SET_LIST_BIT (TRANSLATE (this_char));
3457     }
3458 
3459   return REG_NOERROR;
3460 }
3461 #endif
3462 
3463 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3464    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3465    characters can start a string that matches the pattern.  This fastmap
3466    is used by re_search to skip quickly over impossible starting points.
3467 
3468    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3469    area as BUFP->fastmap.
3470 
3471    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3472    the pattern buffer.
3473 
3474    Returns 0 if we succeed, -2 if an internal error.   */
3475 
3476 int
re_compile_fastmap(bufp)3477 re_compile_fastmap (bufp)
3478      struct re_pattern_buffer *bufp;
3479 {
3480   int j, k;
3481 #ifdef MATCH_MAY_ALLOCATE
3482   fail_stack_type fail_stack;
3483 #endif
3484 #ifndef REGEX_MALLOC
3485   char *destination;
3486 #endif
3487 
3488   register char *fastmap = bufp->fastmap;
3489   unsigned char *pattern = bufp->buffer;
3490   unsigned char *p = pattern;
3491   register unsigned char *pend = pattern + bufp->used;
3492 
3493 #ifdef REL_ALLOC
3494   /* This holds the pointer to the failure stack, when
3495      it is allocated relocatably.  */
3496   fail_stack_elt_t *failure_stack_ptr;
3497 #endif
3498 
3499   /* Assume that each path through the pattern can be null until
3500      proven otherwise.  We set this false at the bottom of switch
3501      statement, to which we get only if a particular path doesn't
3502      match the empty string.  */
3503   boolean path_can_be_null = true;
3504 
3505   /* We aren't doing a `succeed_n' to begin with.  */
3506   boolean succeed_n_p = false;
3507 
3508   assert (fastmap != NULL && p != NULL);
3509 
3510   INIT_FAIL_STACK ();
3511   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3512   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3513   bufp->can_be_null = 0;
3514 
3515   while (1)
3516     {
3517       if (p == pend || *p == succeed)
3518 	{
3519 	  /* We have reached the (effective) end of pattern.  */
3520 	  if (!FAIL_STACK_EMPTY ())
3521 	    {
3522 	      bufp->can_be_null |= path_can_be_null;
3523 
3524 	      /* Reset for next path.  */
3525 	      path_can_be_null = true;
3526 
3527 	      p = fail_stack.stack[--fail_stack.avail].pointer;
3528 
3529 	      continue;
3530 	    }
3531 	  else
3532 	    break;
3533 	}
3534 
3535       /* We should never be about to go beyond the end of the pattern.  */
3536       assert (p < pend);
3537 
3538       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3539 	{
3540 
3541         /* I guess the idea here is to simply not bother with a fastmap
3542            if a backreference is used, since it's too hard to figure out
3543            the fastmap for the corresponding group.  Setting
3544            `can_be_null' stops `re_search_2' from using the fastmap, so
3545            that is all we do.  */
3546 	case duplicate:
3547 	  bufp->can_be_null = 1;
3548           goto done;
3549 
3550 
3551       /* Following are the cases which match a character.  These end
3552          with `break'.  */
3553 
3554 	case exactn:
3555           fastmap[p[1]] = 1;
3556 	  break;
3557 
3558 
3559         case charset:
3560 	  /* NOTE: Charset for single-byte chars never contain
3561 		   multi-byte char.  See set_list_bits().  */
3562           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3563 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3564               fastmap[j] = 1;
3565 	  {
3566 	    unsigned short size;
3567 	    unsigned char c, end;
3568 	    char buf[8], bufend[8];
3569 
3570 	    p += p[-1] + 2;
3571 	    size = EXTRACT_UNSIGNED (&p[-2]);
3572 	    for (j = 0; j < size; j++)
3573 	      {
3574 		/* set bits for 1st bytes of multi-byte chars.  */
3575 		mbcstore (buf, EXTRACT_MBC (&p[j*8]));
3576 		mbcstore (bufend, EXTRACT_MBC (&p[j*8 + 4]));
3577 		for (c = (unsigned char) buf[0], end = (unsigned char) bufend[0];
3578 		     c <= end;
3579 		     c++)
3580 		  /* NOTE: Charset for multi-byte chars might contain
3581 		           single-byte chars.  We must reject them. */
3582 		  if (ismbchar (c))
3583 		    fastmap[c] = 1;
3584 	      }
3585 	  }
3586 	  break;
3587 
3588 
3589 	case charset_not:
3590 	  /* S: set of all single-byte chars.
3591 	     M: set of all first bytes that can start multi-byte chars.
3592 	     s: any set of single-byte chars.
3593 	     m: any set of first bytes that can start multi-byte chars.
3594 
3595 	     We assume S+M = U.
3596 	       ___      _   _
3597 	       s+m = (S*s+M*m).  */
3598 	  /* Chars beyond end of map must be allowed.  */
3599 	  /* NOTE: Charset_not for single-byte chars might contain
3600 		   multi-byte chars.  See set_list_bits(). */
3601 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3602 	    if (!ismbchar (j))
3603 	      fastmap[j] = 1;
3604 
3605 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3606 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3607 	      if (!ismbchar (j))
3608 		fastmap[j] = 1;
3609 	  {
3610 	    unsigned short size;
3611 	    unsigned c, beg;
3612 	    char buf[8];
3613 
3614 	    p += p[-1] + 2;
3615 	    size = EXTRACT_UNSIGNED (&p[-2]);
3616 	    c = 0x00;
3617 	    for (j = 0; j < size; j++) {
3618 	      mbcstore (buf, EXTRACT_MBC (&p[j*8 + 0]));
3619 	      for (beg = (unsigned char) buf[0]; c <= beg; c++)
3620 		if (ismbchar (c))
3621 		  fastmap[c] = 1;
3622 	      mbcstore (buf, EXTRACT_MBC (&p[j*8 + 4]));
3623 	      c = (unsigned char) buf[0];
3624 	    }
3625 	    for (beg = 0xff; c <= beg; c++)
3626 	      if (ismbchar (c))
3627 		fastmap[c] = 1;
3628 	  }
3629           break;
3630 
3631 
3632 	case wordchar:
3633 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3634 	    if (SYNTAX (j) == Sword)
3635 	      fastmap[j] = 1;
3636 	  break;
3637 
3638 
3639 	case notwordchar:
3640 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3641 	    if (SYNTAX (j) != Sword)
3642 	      fastmap[j] = 1;
3643 	  break;
3644 
3645 
3646         case anychar:
3647 	  {
3648 	    int fastmap_newline = fastmap['\n'];
3649 
3650 	    /* `.' matches anything ...  */
3651 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3652 	      fastmap[j] = 1;
3653 
3654 	    /* ... except perhaps newline.  */
3655 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3656 	      fastmap['\n'] = fastmap_newline;
3657 
3658 	    /* Return if we have already set `can_be_null'; if we have,
3659 	       then the fastmap is irrelevant.  Something's wrong here.  */
3660 	    else if (bufp->can_be_null)
3661 	      goto done;
3662 
3663 	    /* Otherwise, have to check alternative paths.  */
3664 	    break;
3665 	  }
3666 
3667 #ifdef emacs
3668         case syntaxspec:
3669 	  k = *p++;
3670 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3671 	    if (SYNTAX (j) == (enum syntaxcode) k)
3672 	      fastmap[j] = 1;
3673 	  break;
3674 
3675 
3676 	case notsyntaxspec:
3677 	  k = *p++;
3678 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3679 	    if (SYNTAX (j) != (enum syntaxcode) k)
3680 	      fastmap[j] = 1;
3681 	  break;
3682 
3683 
3684       /* All cases after this match the empty string.  These end with
3685          `continue'.  */
3686 
3687 
3688 	case before_dot:
3689 	case at_dot:
3690 	case after_dot:
3691           continue;
3692 #endif /* emacs */
3693 
3694 
3695         case no_op:
3696         case begline:
3697         case endline:
3698 	case begbuf:
3699 	case endbuf:
3700 	case wordbound:
3701 	case notwordbound:
3702 	case wordbeg:
3703 	case wordend:
3704         case push_dummy_failure:
3705           continue;
3706 
3707 
3708 	case jump_n:
3709         case pop_failure_jump:
3710 	case maybe_pop_jump:
3711 	case jump:
3712         case jump_past_alt:
3713 	case dummy_failure_jump:
3714           EXTRACT_NUMBER_AND_INCR (j, p);
3715 	  p += j;
3716 	  if (j > 0)
3717 	    continue;
3718 
3719           /* Jump backward implies we just went through the body of a
3720              loop and matched nothing.  Opcode jumped to should be
3721              `on_failure_jump' or `succeed_n'.  Just treat it like an
3722              ordinary jump.  For a * loop, it has pushed its failure
3723              point already; if so, discard that as redundant.  */
3724           if ((re_opcode_t) *p != on_failure_jump
3725 	      && (re_opcode_t) *p != succeed_n)
3726 	    continue;
3727 
3728           p++;
3729           EXTRACT_NUMBER_AND_INCR (j, p);
3730           p += j;
3731 
3732           /* If what's on the stack is where we are now, pop it.  */
3733           if (!FAIL_STACK_EMPTY ()
3734 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3735             fail_stack.avail--;
3736 
3737           continue;
3738 
3739 
3740         case on_failure_jump:
3741         case on_failure_keep_string_jump:
3742 	handle_on_failure_jump:
3743           EXTRACT_NUMBER_AND_INCR (j, p);
3744 
3745           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3746              end of the pattern.  We don't want to push such a point,
3747              since when we restore it above, entering the switch will
3748              increment `p' past the end of the pattern.  We don't need
3749              to push such a point since we obviously won't find any more
3750              fastmap entries beyond `pend'.  Such a pattern can match
3751              the null string, though.  */
3752           if (p + j < pend)
3753             {
3754               if (!PUSH_PATTERN_OP (p + j, fail_stack))
3755 		{
3756 		  RESET_FAIL_STACK ();
3757 		  return -2;
3758 		}
3759             }
3760           else
3761             bufp->can_be_null = 1;
3762 
3763           if (succeed_n_p)
3764             {
3765               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3766               succeed_n_p = false;
3767 	    }
3768 
3769           continue;
3770 
3771 
3772 	case succeed_n:
3773           /* Get to the number of times to succeed.  */
3774           p += 2;
3775 
3776           /* Increment p past the n for when k != 0.  */
3777           EXTRACT_NUMBER_AND_INCR (k, p);
3778           if (k == 0)
3779 	    {
3780               p -= 4;
3781   	      succeed_n_p = true;  /* Spaghetti code alert.  */
3782               goto handle_on_failure_jump;
3783             }
3784           continue;
3785 
3786 
3787 	case set_number_at:
3788           p += 4;
3789           continue;
3790 
3791 
3792 	case start_memory:
3793         case stop_memory:
3794 	  p += 2;
3795 	  continue;
3796 
3797 
3798 	default:
3799           abort (); /* We have listed all the cases.  */
3800         } /* switch *p++ */
3801 
3802       /* Getting here means we have found the possible starting
3803          characters for one path of the pattern -- and that the empty
3804          string does not match.  We need not follow this path further.
3805          Instead, look at the next alternative (remembered on the
3806          stack), or quit if no more.  The test at the top of the loop
3807          does these things.  */
3808       path_can_be_null = false;
3809       p = pend;
3810     } /* while p */
3811 
3812   /* Set `can_be_null' for the last path (also the first path, if the
3813      pattern is empty).  */
3814   bufp->can_be_null |= path_can_be_null;
3815 
3816  done:
3817   RESET_FAIL_STACK ();
3818   return 0;
3819 } /* re_compile_fastmap */
3820 
3821 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3822    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3823    this memory for recording register information.  STARTS and ENDS
3824    must be allocated using the malloc library routine, and must each
3825    be at least NUM_REGS * sizeof (regoff_t) bytes long.
3826 
3827    If NUM_REGS == 0, then subsequent matches should allocate their own
3828    register data.
3829 
3830    Unless this function is called, the first search or match using
3831    PATTERN_BUFFER will allocate its own register data, without
3832    freeing the old data.  */
3833 
3834 void
re_set_registers(bufp,regs,num_regs,starts,ends)3835 re_set_registers (bufp, regs, num_regs, starts, ends)
3836     struct re_pattern_buffer *bufp;
3837     struct re_registers *regs;
3838     unsigned num_regs;
3839     regoff_t *starts, *ends;
3840 {
3841   if (num_regs)
3842     {
3843       bufp->regs_allocated = REGS_REALLOCATE;
3844       regs->num_regs = num_regs;
3845       regs->start = starts;
3846       regs->end = ends;
3847     }
3848   else
3849     {
3850       bufp->regs_allocated = REGS_UNALLOCATED;
3851       regs->num_regs = 0;
3852       regs->start = regs->end = (regoff_t *) 0;
3853     }
3854 }
3855 
3856 /* Searching routines.  */
3857 
3858 /* Like re_search_2, below, but only one string is specified, and
3859    doesn't let you say where to stop matching. */
3860 
3861 int
re_search(bufp,string,size,startpos,range,regs)3862 re_search (bufp, string, size, startpos, range, regs)
3863      struct re_pattern_buffer *bufp;
3864      const char *string;
3865      int size, startpos, range;
3866      struct re_registers *regs;
3867 {
3868   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3869 		      regs, size);
3870 }
3871 
3872 
3873 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3874    virtual concatenation of STRING1 and STRING2, starting first at index
3875    STARTPOS, then at STARTPOS + 1, and so on.
3876 
3877    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3878 
3879    RANGE is how far to scan while trying to match.  RANGE = 0 means try
3880    only at STARTPOS; in general, the last start tried is STARTPOS +
3881    RANGE.
3882 
3883    In REGS, return the indices of the virtual concatenation of STRING1
3884    and STRING2 that matched the entire BUFP->buffer and its contained
3885    subexpressions.
3886 
3887    Do not consider matching one past the index STOP in the virtual
3888    concatenation of STRING1 and STRING2.
3889 
3890    We return either the position in the strings at which the match was
3891    found, -1 if no match, or -2 if error (such as failure
3892    stack overflow).  */
3893 
3894 int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)3895 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3896      struct re_pattern_buffer *bufp;
3897      const char *string1, *string2;
3898      int size1, size2;
3899      int startpos;
3900      int range;
3901      struct re_registers *regs;
3902      int stop;
3903 {
3904   int val;
3905   register char *fastmap = bufp->fastmap;
3906   register RE_TRANSLATE_TYPE translate = bufp->translate;
3907   int total_size = size1 + size2;
3908   int endpos = startpos + range;
3909 
3910   /* Check for out-of-range STARTPOS.  */
3911   if (startpos < 0 || startpos > total_size)
3912     return -1;
3913 
3914   /* Fix up RANGE if it might eventually take us outside
3915      the virtual concatenation of STRING1 and STRING2.
3916      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3917   if (endpos < 0)
3918     range = 0 - startpos;
3919   else if (endpos > total_size)
3920     range = total_size - startpos;
3921 
3922   /* If the search isn't to be a backwards one, don't waste time in a
3923      search for a pattern that must be anchored.  */
3924   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3925     {
3926       if (startpos > 0)
3927 	return -1;
3928       else
3929 	range = 1;
3930     }
3931 
3932 #ifdef emacs
3933   /* In a forward search for something that starts with \=.
3934      don't keep searching past point.  */
3935   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3936     {
3937       range = PT - startpos;
3938       if (range <= 0)
3939 	return -1;
3940     }
3941 #endif /* emacs */
3942 
3943   /* Update the fastmap now if not correct already.  */
3944   if (fastmap && !bufp->fastmap_accurate)
3945     if (re_compile_fastmap (bufp) == -2)
3946       return -2;
3947 
3948   /* Loop through the string, looking for a place to start matching.  */
3949   for (;;)
3950     {
3951       /* If a fastmap is supplied, skip quickly over characters that
3952          cannot be the start of a match.  If the pattern can match the
3953          null string, however, we don't need to skip characters; we want
3954          the first null string.  */
3955       if (fastmap && startpos < total_size && !bufp->can_be_null)
3956 	{
3957 	  if (range > 0)	/* Searching forwards.  */
3958 	    {
3959 	      register const char *d;
3960 	      register int lim = 0;
3961 	      int irange = range;
3962 	      unsigned char c;
3963 
3964               if (startpos < size1 && startpos + range >= size1)
3965                 lim = range - (size1 - startpos);
3966 
3967 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3968 
3969               /* Written out as an if-else to avoid testing `translate'
3970                  inside the loop.  */
3971 	      if (translate)
3972 		while (range > lim) {
3973 		  c = *d++;
3974 		  if (ismbchar (c)) {
3975 		    size_t n;
3976 		    if (fastmap[c])
3977 		      break;
3978 		    d += (n = mbclen (c)) - 1;
3979 		    range -= n;
3980 		    continue;
3981 		  }
3982 		  if (fastmap[(unsigned char) translate[c]])
3983 		    break;
3984                   range--;
3985 		}
3986 	      else
3987 		while (range > lim && (c = *d++, !fastmap[c])) {
3988 		  if (ismbchar (c)) {
3989 		    size_t n;
3990 		    d += (n = mbclen (c)) - 1;
3991 		    range -= n;
3992 		    continue;
3993 		  }
3994                   range--;
3995 		}
3996 
3997 	      startpos += irange - range;
3998 	    }
3999 	  else				/* Searching backwards.  */
4000 	    {
4001 	      register char c = (size1 == 0 || startpos >= size1
4002                                  ? string2[startpos - size1]
4003                                  : string1[startpos]);
4004 
4005 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
4006 		goto advance;
4007 	    }
4008 	}
4009 
4010       /* If can't match the null string, and that's all we have left, fail.  */
4011       if (range >= 0 && startpos == total_size && fastmap
4012           && !bufp->can_be_null)
4013 	return -1;
4014 
4015       val = re_match_2_internal (bufp, string1, size1, string2, size2,
4016 				 startpos, regs, stop);
4017 #ifndef REGEX_MALLOC
4018 #ifdef C_ALLOCA
4019       alloca (0);
4020 #endif
4021 #endif
4022 
4023       if (val >= 0)
4024 	return startpos;
4025 
4026       if (val == -2)
4027 	return -2;
4028 
4029     advance:
4030       if (!range)
4031         break;
4032       else if (range > 0)
4033         {
4034 	  const char *d = ((startpos >= size1 ? string2 - size1 : string1)
4035 			   + startpos);
4036 
4037 	  if (ismbchar (*d)) {
4038 	    size_t n;
4039 	    range -= (n = mbclen (*d) - 1);
4040 	    startpos += n;
4041 	    if (range <= 0)
4042 	      break;
4043 	  }
4044           range--, startpos++;
4045         }
4046       else
4047         {
4048           range++, startpos--;
4049 	  {
4050 	    const char *s, *d, *p;
4051 
4052 	    if (startpos < size1)
4053 	      s = string1, d = string1 + startpos;
4054 	    else
4055 	      s = string2, d = string2 + startpos - size1;
4056 	    if (current_mbctype == MBCTYPE_UTF8) {
4057 	      for (p = d; p > s && !ismbchar(*p) && !((unsigned char) *p < 0x80); p--) {
4058 		range++;
4059 		startpos--;
4060 	      }
4061 	      if (range > 0)
4062 		break;
4063 	    } else {
4064 	      for (p = d; p-- > s && ismbchar(*p); )
4065 		/* --p >= s ���� 80[12]?86 ��ư���ʤ���ǽ��������.  (huge
4066 		   model �ʳ���, s �Υ��ե��åȤ� 0 ���ä����.)  */
4067 		;
4068 	      if (!((d - p) & 1)) {
4069 		if (!range)
4070 		  break;
4071 		range++, startpos--;
4072 	      }
4073 	    }
4074 	  }
4075         }
4076     }
4077   return -1;
4078 } /* re_search_2 */
4079 
4080 /* This converts PTR, a pointer into one of the search strings `string1'
4081    and `string2' into an offset from the beginning of that string.  */
4082 #define POINTER_TO_OFFSET(ptr)			\
4083   (FIRST_STRING_P (ptr)				\
4084    ? ((regoff_t) ((ptr) - string1))		\
4085    : ((regoff_t) ((ptr) - string2 + size1)))
4086 
4087 /* Macros for dealing with the split strings in re_match_2.  */
4088 
4089 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
4090 
4091 /* Call before fetching a character with *d.  This switches over to
4092    string2 if necessary.  */
4093 #define PREFETCH()							\
4094   while (d == dend)						    	\
4095     {									\
4096       /* End of string2 => fail.  */					\
4097       if (dend == end_match_2) 						\
4098         goto fail;							\
4099       /* End of string1 => advance to string2.  */ 			\
4100       d = string2;						        \
4101       dend = end_match_2;						\
4102     }
4103 
4104 
4105 /* Test if at very beginning or at very end of the virtual concatenation
4106    of `string1' and `string2'.  If only one string, it's `string2'.  */
4107 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
4108 #define AT_STRINGS_END(d) ((d) == end2)
4109 
4110 
4111 /* Test if D points to a character which is word-constituent.  We have
4112    two special cases to check for: if past the end of string1, look at
4113    the first character in string2; and if before the beginning of
4114    string2, look at the last character in string1.  */
4115 #define WORDCHAR_P(d)							\
4116   (SYNTAX ((d) == end1 ? *string2					\
4117            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
4118    == Sword)
4119 
4120 /* Disabled due to a compiler bug -- see comment at case wordbound */
4121 #if 0
4122 /* Test if the character before D and the one at D differ with respect
4123    to being word-constituent.  */
4124 #define AT_WORD_BOUNDARY(d)						\
4125   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
4126    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
4127 #endif
4128 
4129 /* Free everything we malloc.  */
4130 #ifdef MATCH_MAY_ALLOCATE
4131 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
4132 #define FREE_VARIABLES()						\
4133   do {									\
4134     REGEX_FREE_STACK (fail_stack.stack);				\
4135     FREE_VAR (regstart);						\
4136     FREE_VAR (regend);							\
4137     FREE_VAR (old_regstart);						\
4138     FREE_VAR (old_regend);						\
4139     FREE_VAR (best_regstart);						\
4140     FREE_VAR (best_regend);						\
4141     FREE_VAR (reg_info);						\
4142     FREE_VAR (reg_dummy);						\
4143     FREE_VAR (reg_info_dummy);						\
4144   } while (0)
4145 #else
4146 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
4147 #endif /* not MATCH_MAY_ALLOCATE */
4148 
4149 /* These values must meet several constraints.  They must not be valid
4150    register values; since we have a limit of 255 registers (because
4151    we use only one byte in the pattern for the register number), we can
4152    use numbers larger than 255.  They must differ by 1, because of
4153    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
4154    be larger than the value for the highest register, so we do not try
4155    to actually save any registers when none are active.  */
4156 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
4157 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
4158 
4159 /* Matching routines.  */
4160 
4161 #ifndef emacs   /* Emacs never uses this.  */
4162 /* re_match is like re_match_2 except it takes only a single string.  */
4163 
4164 int
re_match(bufp,string,size,pos,regs)4165 re_match (bufp, string, size, pos, regs)
4166      struct re_pattern_buffer *bufp;
4167      const char *string;
4168      int size, pos;
4169      struct re_registers *regs;
4170 {
4171   int result = re_match_2_internal (bufp, NULL, 0, string, size,
4172 				    pos, regs, size);
4173 #ifndef REGEX_MALLOC
4174 #ifdef C_ALLOCA
4175   alloca (0);
4176 #endif
4177 #endif
4178   return result;
4179 }
4180 #endif /* not emacs */
4181 
4182 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
4183 						    unsigned char *end,
4184 						register_info_type *reg_info));
4185 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
4186 						  unsigned char *end,
4187 						register_info_type *reg_info));
4188 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
4189 							unsigned char *end,
4190 						register_info_type *reg_info));
4191 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
4192 				     int len, char *translate));
4193 
4194 /* re_match_2 matches the compiled pattern in BUFP against the
4195    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
4196    and SIZE2, respectively).  We start matching at POS, and stop
4197    matching at STOP.
4198 
4199    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
4200    store offsets for the substring each group matched in REGS.  See the
4201    documentation for exactly how many groups we fill.
4202 
4203    We return -1 if no match, -2 if an internal error (such as the
4204    failure stack overflowing).  Otherwise, we return the length of the
4205    matched substring.  */
4206 
4207 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)4208 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
4209      struct re_pattern_buffer *bufp;
4210      const char *string1, *string2;
4211      int size1, size2;
4212      int pos;
4213      struct re_registers *regs;
4214      int stop;
4215 {
4216   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
4217 				    pos, regs, stop);
4218 #ifndef REGEX_MALLOC
4219 #ifdef C_ALLOCA
4220   alloca (0);
4221 #endif
4222 #endif
4223   return result;
4224 }
4225 
4226 /* This is a separate function so that we can force an alloca cleanup
4227    afterwards.  */
4228 static int
re_match_2_internal(bufp,string1,size1,string2,size2,pos,regs,stop)4229 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
4230      struct re_pattern_buffer *bufp;
4231      const char *string1, *string2;
4232      int size1, size2;
4233      int pos;
4234      struct re_registers *regs;
4235      int stop;
4236 {
4237   /* General temporaries.  */
4238   int mcnt;
4239   unsigned char *p1;
4240 
4241   /* Just past the end of the corresponding string.  */
4242   const char *end1, *end2;
4243 
4244   /* Pointers into string1 and string2, just past the last characters in
4245      each to consider matching.  */
4246   const char *end_match_1, *end_match_2;
4247 
4248   /* Where we are in the data, and the end of the current string.  */
4249   const char *d, *dend;
4250 
4251   /* Where we are in the pattern, and the end of the pattern.  */
4252   unsigned char *p = bufp->buffer;
4253   register unsigned char *pend = p + bufp->used;
4254 
4255   /* Mark the opcode just after a start_memory, so we can test for an
4256      empty subpattern when we get to the stop_memory.  */
4257   unsigned char *just_past_start_mem = 0;
4258 
4259   /* We use this to map every character in the string.  */
4260   RE_TRANSLATE_TYPE translate = bufp->translate;
4261 
4262   /* Failure point stack.  Each place that can handle a failure further
4263      down the line pushes a failure point on this stack.  It consists of
4264      restart, regend, and reg_info for all registers corresponding to
4265      the subexpressions we're currently inside, plus the number of such
4266      registers, and, finally, two char *'s.  The first char * is where
4267      to resume scanning the pattern; the second one is where to resume
4268      scanning the strings.  If the latter is zero, the failure point is
4269      a ``dummy''; if a failure happens and the failure point is a dummy,
4270      it gets discarded and the next next one is tried.  */
4271 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
4272   fail_stack_type fail_stack;
4273 #endif
4274 #ifdef DEBUG
4275   static unsigned failure_id = 0;
4276   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
4277 #endif
4278 
4279 #ifdef REL_ALLOC
4280   /* This holds the pointer to the failure stack, when
4281      it is allocated relocatably.  */
4282   fail_stack_elt_t *failure_stack_ptr;
4283 #endif
4284 
4285   /* We fill all the registers internally, independent of what we
4286      return, for use in backreferences.  The number here includes
4287      an element for register zero.  */
4288   size_t num_regs = bufp->re_nsub + 1;
4289 
4290   /* The currently active registers.  */
4291   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4292   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4293 
4294   /* Information on the contents of registers. These are pointers into
4295      the input strings; they record just what was matched (on this
4296      attempt) by a subexpression part of the pattern, that is, the
4297      regnum-th regstart pointer points to where in the pattern we began
4298      matching and the regnum-th regend points to right after where we
4299      stopped matching the regnum-th subexpression.  (The zeroth register
4300      keeps track of what the whole pattern matches.)  */
4301 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4302   const char **regstart, **regend;
4303 #endif
4304 
4305   /* If a group that's operated upon by a repetition operator fails to
4306      match anything, then the register for its start will need to be
4307      restored because it will have been set to wherever in the string we
4308      are when we last see its open-group operator.  Similarly for a
4309      register's end.  */
4310 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4311   const char **old_regstart, **old_regend;
4312 #endif
4313 
4314   /* The is_active field of reg_info helps us keep track of which (possibly
4315      nested) subexpressions we are currently in. The matched_something
4316      field of reg_info[reg_num] helps us tell whether or not we have
4317      matched any of the pattern so far this time through the reg_num-th
4318      subexpression.  These two fields get reset each time through any
4319      loop their register is in.  */
4320 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
4321   register_info_type *reg_info;
4322 #endif
4323 
4324   /* The following record the register info as found in the above
4325      variables when we find a match better than any we've seen before.
4326      This happens as we backtrack through the failure points, which in
4327      turn happens only if we have not yet matched the entire string. */
4328   unsigned best_regs_set = false;
4329 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4330   const char **best_regstart, **best_regend;
4331 #endif
4332 
4333   /* Logically, this is `best_regend[0]'.  But we don't want to have to
4334      allocate space for that if we're not allocating space for anything
4335      else (see below).  Also, we never need info about register 0 for
4336      any of the other register vectors, and it seems rather a kludge to
4337      treat `best_regend' differently than the rest.  So we keep track of
4338      the end of the best match so far in a separate variable.  We
4339      initialize this to NULL so that when we backtrack the first time
4340      and need to test it, it's not garbage.  */
4341   const char *match_end = NULL;
4342 
4343   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
4344   int set_regs_matched_done = 0;
4345 
4346   /* Used when we pop values we don't care about.  */
4347 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
4348   const char **reg_dummy;
4349   register_info_type *reg_info_dummy;
4350 #endif
4351 
4352 #ifdef DEBUG
4353   /* Counts the total number of registers pushed.  */
4354   unsigned num_regs_pushed = 0;
4355 #endif
4356 
4357   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
4358 
4359   INIT_FAIL_STACK ();
4360 
4361 #ifdef MATCH_MAY_ALLOCATE
4362   /* Do not bother to initialize all the register variables if there are
4363      no groups in the pattern, as it takes a fair amount of time.  If
4364      there are groups, we include space for register 0 (the whole
4365      pattern), even though we never use it, since it simplifies the
4366      array indexing.  We should fix this.  */
4367   if (bufp->re_nsub)
4368     {
4369       regstart = REGEX_TALLOC (num_regs, const char *);
4370       regend = REGEX_TALLOC (num_regs, const char *);
4371       old_regstart = REGEX_TALLOC (num_regs, const char *);
4372       old_regend = REGEX_TALLOC (num_regs, const char *);
4373       best_regstart = REGEX_TALLOC (num_regs, const char *);
4374       best_regend = REGEX_TALLOC (num_regs, const char *);
4375       reg_info = REGEX_TALLOC (num_regs, register_info_type);
4376       reg_dummy = REGEX_TALLOC (num_regs, const char *);
4377       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
4378 
4379       if (!(regstart && regend && old_regstart && old_regend && reg_info
4380             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
4381         {
4382           FREE_VARIABLES ();
4383           return -2;
4384         }
4385     }
4386   else
4387     {
4388       /* We must initialize all our variables to NULL, so that
4389          `FREE_VARIABLES' doesn't try to free them.  */
4390       regstart = regend = old_regstart = old_regend = best_regstart
4391         = best_regend = reg_dummy = NULL;
4392       reg_info = reg_info_dummy = (register_info_type *) NULL;
4393     }
4394 #endif /* MATCH_MAY_ALLOCATE */
4395 
4396   /* The starting position is bogus.  */
4397   if (pos < 0 || pos > size1 + size2)
4398     {
4399       FREE_VARIABLES ();
4400       return -1;
4401     }
4402 
4403   /* Initialize subexpression text positions to -1 to mark ones that no
4404      start_memory/stop_memory has been seen for. Also initialize the
4405      register information struct.  */
4406   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4407     {
4408       regstart[mcnt] = regend[mcnt]
4409         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
4410 
4411       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
4412       IS_ACTIVE (reg_info[mcnt]) = 0;
4413       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4414       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
4415     }
4416 
4417   /* We move `string1' into `string2' if the latter's empty -- but not if
4418      `string1' is null.  */
4419   if (size2 == 0 && string1 != NULL)
4420     {
4421       string2 = string1;
4422       size2 = size1;
4423       string1 = 0;
4424       size1 = 0;
4425     }
4426   end1 = string1 + size1;
4427   end2 = string2 + size2;
4428 
4429   /* Compute where to stop matching, within the two strings.  */
4430   if (stop <= size1)
4431     {
4432       end_match_1 = string1 + stop;
4433       end_match_2 = string2;
4434     }
4435   else
4436     {
4437       end_match_1 = end1;
4438       end_match_2 = string2 + stop - size1;
4439     }
4440 
4441   /* `p' scans through the pattern as `d' scans through the data.
4442      `dend' is the end of the input string that `d' points within.  `d'
4443      is advanced into the following input string whenever necessary, but
4444      this happens before fetching; therefore, at the beginning of the
4445      loop, `d' can be pointing at the end of a string, but it cannot
4446      equal `string2'.  */
4447   if (size1 > 0 && pos <= size1)
4448     {
4449       d = string1 + pos;
4450       dend = end_match_1;
4451     }
4452   else
4453     {
4454       d = string2 + pos - size1;
4455       dend = end_match_2;
4456     }
4457 
4458   DEBUG_PRINT1 ("The compiled pattern is:\n");
4459   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
4460   DEBUG_PRINT1 ("The string to match is: `");
4461   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
4462   DEBUG_PRINT1 ("'\n");
4463 
4464   /* This loops over pattern commands.  It exits by returning from the
4465      function if the match is complete, or it drops through if the match
4466      fails at this starting point in the input data.  */
4467   for (;;)
4468     {
4469 #if defined (_LIBC) || defined (MSDOS)
4470       DEBUG_PRINT2 ("\n%p: ", p);
4471 #else
4472       DEBUG_PRINT2 ("\n0x%x: ", p);
4473 #endif
4474 
4475       if (p == pend)
4476 	{ /* End of pattern means we might have succeeded.  */
4477           DEBUG_PRINT1 ("end of pattern ... ");
4478 
4479 	  /* If we haven't matched the entire string, and we want the
4480              longest match, try backtracking.  */
4481           if (d != end_match_2)
4482 	    {
4483 	      /* 1 if this match ends in the same string (string1 or string2)
4484 		 as the best previous match.  */
4485 	      boolean same_str_p = (FIRST_STRING_P (match_end)
4486 				    == MATCHING_IN_FIRST_STRING);
4487 	      /* 1 if this match is the best seen so far.  */
4488 	      boolean best_match_p;
4489 
4490 	      /* AIX compiler got confused when this was combined
4491 		 with the previous declaration.  */
4492 	      if (same_str_p)
4493 		best_match_p = d > match_end;
4494 	      else
4495 		best_match_p = !MATCHING_IN_FIRST_STRING;
4496 
4497               DEBUG_PRINT1 ("backtracking.\n");
4498 
4499               if (!FAIL_STACK_EMPTY ())
4500                 { /* More failure points to try.  */
4501 
4502                   /* If exceeds best match so far, save it.  */
4503                   if (!best_regs_set || best_match_p)
4504                     {
4505                       best_regs_set = true;
4506                       match_end = d;
4507 
4508                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4509 
4510                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4511                         {
4512                           best_regstart[mcnt] = regstart[mcnt];
4513                           best_regend[mcnt] = regend[mcnt];
4514                         }
4515                     }
4516                   goto fail;
4517                 }
4518 
4519               /* If no failure points, don't restore garbage.  And if
4520                  last match is real best match, don't restore second
4521                  best one. */
4522               else if (best_regs_set && !best_match_p)
4523                 {
4524   	        restore_best_regs:
4525                   /* Restore best match.  It may happen that `dend ==
4526                      end_match_1' while the restored d is in string2.
4527                      For example, the pattern `x.*y.*z' against the
4528                      strings `x-' and `y-z-', if the two strings are
4529                      not consecutive in memory.  */
4530                   DEBUG_PRINT1 ("Restoring best registers.\n");
4531 
4532                   d = match_end;
4533                   dend = ((d >= string1 && d <= end1)
4534 		           ? end_match_1 : end_match_2);
4535 
4536 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4537 		    {
4538 		      regstart[mcnt] = best_regstart[mcnt];
4539 		      regend[mcnt] = best_regend[mcnt];
4540 		    }
4541                 }
4542             } /* d != end_match_2 */
4543 
4544 	succeed_label:
4545           DEBUG_PRINT1 ("Accepting match.\n");
4546 
4547           /* If caller wants register contents data back, do it.  */
4548           if (regs && !bufp->no_sub)
4549 	    {
4550               /* Have the register data arrays been allocated?  */
4551               if (bufp->regs_allocated == REGS_UNALLOCATED)
4552                 { /* No.  So allocate them with malloc.  We need one
4553                      extra element beyond `num_regs' for the `-1' marker
4554                      GNU code uses.  */
4555                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4556                   regs->start = TALLOC (regs->num_regs, regoff_t);
4557                   regs->end = TALLOC (regs->num_regs, regoff_t);
4558                   if (regs->start == NULL || regs->end == NULL)
4559 		    {
4560 		      FREE_VARIABLES ();
4561 		      return -2;
4562 		    }
4563                   bufp->regs_allocated = REGS_REALLOCATE;
4564                 }
4565               else if (bufp->regs_allocated == REGS_REALLOCATE)
4566                 { /* Yes.  If we need more elements than were already
4567                      allocated, reallocate them.  If we need fewer, just
4568                      leave it alone.  */
4569                   if (regs->num_regs < num_regs + 1)
4570                     {
4571                       regs->num_regs = num_regs + 1;
4572                       RETALLOC (regs->start, regs->num_regs, regoff_t);
4573                       RETALLOC (regs->end, regs->num_regs, regoff_t);
4574                       if (regs->start == NULL || regs->end == NULL)
4575 			{
4576 			  FREE_VARIABLES ();
4577 			  return -2;
4578 			}
4579                     }
4580                 }
4581               else
4582 		{
4583 		  /* These braces fend off a "empty body in an else-statement"
4584 		     warning under GCC when assert expands to nothing.  */
4585 		  assert (bufp->regs_allocated == REGS_FIXED);
4586 		}
4587 
4588               /* Convert the pointer data in `regstart' and `regend' to
4589                  indices.  Register zero has to be set differently,
4590                  since we haven't kept track of any info for it.  */
4591               if (regs->num_regs > 0)
4592                 {
4593                   regs->start[0] = pos;
4594                   regs->end[0] = (MATCHING_IN_FIRST_STRING
4595 				  ? ((regoff_t) (d - string1))
4596 			          : ((regoff_t) (d - string2 + size1)));
4597                 }
4598 
4599               /* Go through the first `min (num_regs, regs->num_regs)'
4600                  registers, since that is all we initialized.  */
4601 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4602 		   mcnt++)
4603 		{
4604                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4605                     regs->start[mcnt] = regs->end[mcnt] = -1;
4606                   else
4607                     {
4608 		      regs->start[mcnt]
4609 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4610                       regs->end[mcnt]
4611 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4612                     }
4613 		}
4614 
4615               /* If the regs structure we return has more elements than
4616                  were in the pattern, set the extra elements to -1.  If
4617                  we (re)allocated the registers, this is the case,
4618                  because we always allocate enough to have at least one
4619                  -1 at the end.  */
4620               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4621                 regs->start[mcnt] = regs->end[mcnt] = -1;
4622 	    } /* regs && !bufp->no_sub */
4623 
4624           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4625                         nfailure_points_pushed, nfailure_points_popped,
4626                         nfailure_points_pushed - nfailure_points_popped);
4627           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4628 
4629           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4630 			    ? string1
4631 			    : string2 - size1);
4632 
4633           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4634 
4635           FREE_VARIABLES ();
4636           return mcnt;
4637         }
4638 
4639       /* Otherwise match next pattern command.  */
4640       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4641 	{
4642         /* Ignore these.  Used to ignore the n of succeed_n's which
4643            currently have n == 0.  */
4644         case no_op:
4645           DEBUG_PRINT1 ("EXECUTING no_op.\n");
4646           break;
4647 
4648 	case succeed:
4649           DEBUG_PRINT1 ("EXECUTING succeed.\n");
4650 	  goto succeed_label;
4651 
4652         /* Match the next n pattern characters exactly.  The following
4653            byte in the pattern defines n, and the n bytes after that
4654            are the characters to match.  */
4655 	case exactn:
4656 	  mcnt = *p++;
4657           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4658 
4659           /* This is written out as an if-else so we don't waste time
4660              testing `translate' inside the loop.  */
4661           if (translate)
4662 	    {
4663 	      do
4664 		{
4665 		  unsigned char c;
4666 
4667 		  PREFETCH ();
4668 		  c = *d++;
4669 		  if (ismbchar (c)) {
4670 		    size_t n;
4671 		    if (c != (unsigned char) *p++)
4672 		      goto fail;
4673 		    for (n = mbclen (c) - 1; n > 0; n--)
4674 		      if (!--mcnt	/* �ѥ���������������ѥ��뤵
4675 					   ��Ƥ���¤�, ���Υ����å���
4676 					   ��Ĺ����ǰ�Τ���.  */
4677 			  || d == dend
4678 			  || (unsigned char) *d++ != (unsigned char) *p++)
4679 			goto fail;
4680 		    continue;
4681 		  }
4682 		  if ((unsigned char) translate[c] != (unsigned char) *p++)
4683                     goto fail;
4684 		}
4685 	      while (--mcnt);
4686 	    }
4687 	  else
4688 	    {
4689 	      do
4690 		{
4691 		  PREFETCH ();
4692 #if 0
4693 		  /* ¾����ʬ�Ǥ�, string1 �� string2 �˥ޥ���Х���ʸ��
4694 		     ���٤�Τ�����Ƥ��ʤ�.  ���Τ��Ȥ�®�٤����ˤ���
4695 		     ������å��������, ������ `#if 0' �� `#if 1' ��
4696 		     �Ѥ��뤳��.  */
4697 		  {
4698 		    unsigned char c;
4699 		    size_t n;
4700 
4701 		    c = (unsigned char) *d++;
4702 		    if (c != (unsigned char) *p++)
4703 		      goto fail;
4704 		    if (ismbchar (c))
4705 		      for (n = mbclen (c) - 1; n > 0; n--)
4706 			if (!--mcnt
4707 			    || d == dend
4708 			    || (unsigned char) *d++ != (unsigned char) *p++)
4709 			  goto fail;
4710 		  }
4711 #else
4712 		  if (*d++ != (char) *p++) goto fail;
4713 #endif
4714 		}
4715 	      while (--mcnt);
4716 	    }
4717 	  SET_REGS_MATCHED ();
4718           break;
4719 
4720 
4721         /* Match any character except possibly a newline or a null.  */
4722 	case anychar:
4723           DEBUG_PRINT1 ("EXECUTING anychar.\n");
4724 
4725           PREFETCH ();
4726 	  if (ismbchar (*d)) {
4727 	    size_t i, n = mbclen (*d);
4728 	    const char *p;
4729 
4730 	    if (d + n > dend)
4731 	      goto fail;
4732 	    for (p = d, i = n - 1; i > 0; i--)
4733 	      if (*++p == '\n' || *p == '\0')
4734 		/* ̵���ʥޥ���Х���ʸ���ˤϥޥå������ʤ�.  �����Ǥ�, ��
4735 		   ñ�Τ��ᣲ�Х����ܰʹߤ� '\n', '\0' �Τ�Τ�����̵����
4736 		   ����.  */
4737 		goto fail;
4738 
4739 	    SET_REGS_MATCHED ();
4740 	    DEBUG_PRINT2 ("  Matched `%ld'.\n", (long) mbcfetch (d, NULL));
4741 	    d += n;
4742 	    break;
4743 	  }
4744 
4745           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4746               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4747 	    goto fail;
4748 
4749           SET_REGS_MATCHED ();
4750           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4751           d++;
4752 	  break;
4753 
4754 
4755 	case charset:
4756 	case charset_not:
4757 	  {
4758 	    register mbchar_t c;
4759 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4760 
4761             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4762 
4763 	    PREFETCH ();
4764 	    c = (unsigned char) *d;
4765 	    if (ismbchar (c)) {
4766 	      if (d + mbclen (c) > dend)
4767 		goto fail;
4768 	      c = mbcfetch (d, NULL);
4769 	    }
4770 	    else
4771 	      c = TRANSLATE (c); /* The character to match.  */
4772 
4773 	    not = is_in_list (c, p);
4774 
4775 	    p += 1 + *p + 2 + EXTRACT_UNSIGNED (&p[1 + *p])*8;
4776 
4777 	    if (!not) goto fail;
4778 
4779 	    SET_REGS_MATCHED ();
4780 	    d += mbclen (*d);
4781 	    break;
4782 	  }
4783 
4784 
4785         /* The beginning of a group is represented by start_memory.
4786            The arguments are the register number in the next byte, and the
4787            number of groups inner to this one in the next.  The text
4788            matched within the group is recorded (in the internal
4789            registers data structure) under the register number.  */
4790         case start_memory:
4791 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4792 
4793           /* Find out if this group can match the empty string.  */
4794 	  p1 = p;		/* To send to group_match_null_string_p.  */
4795 
4796           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4797             REG_MATCH_NULL_STRING_P (reg_info[*p])
4798               = group_match_null_string_p (&p1, pend, reg_info);
4799 
4800           /* Save the position in the string where we were the last time
4801              we were at this open-group operator in case the group is
4802              operated upon by a repetition operator, e.g., with `(a*)*b'
4803              against `ab'; then we want to ignore where we are now in
4804              the string in case this attempt to match fails.  */
4805           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4806                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4807                              : regstart[*p];
4808 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4809 			 POINTER_TO_OFFSET (old_regstart[*p]));
4810 
4811           regstart[*p] = d;
4812 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4813 
4814           IS_ACTIVE (reg_info[*p]) = 1;
4815           MATCHED_SOMETHING (reg_info[*p]) = 0;
4816 
4817 	  /* Clear this whenever we change the register activity status.  */
4818 	  set_regs_matched_done = 0;
4819 
4820           /* This is the new highest active register.  */
4821           highest_active_reg = *p;
4822 
4823           /* If nothing was active before, this is the new lowest active
4824              register.  */
4825           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4826             lowest_active_reg = *p;
4827 
4828           /* Move past the register number and inner group count.  */
4829           p += 2;
4830 	  just_past_start_mem = p;
4831 
4832           break;
4833 
4834 
4835         /* The stop_memory opcode represents the end of a group.  Its
4836            arguments are the same as start_memory's: the register
4837            number, and the number of inner groups.  */
4838 	case stop_memory:
4839 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4840 
4841           /* We need to save the string position the last time we were at
4842              this close-group operator in case the group is operated
4843              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4844              against `aba'; then we want to ignore where we are now in
4845              the string in case this attempt to match fails.  */
4846           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4847                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
4848 			   : regend[*p];
4849 	  DEBUG_PRINT2 ("      old_regend: %d\n",
4850 			 POINTER_TO_OFFSET (old_regend[*p]));
4851 
4852           regend[*p] = d;
4853 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4854 
4855           /* This register isn't active anymore.  */
4856           IS_ACTIVE (reg_info[*p]) = 0;
4857 
4858 	  /* Clear this whenever we change the register activity status.  */
4859 	  set_regs_matched_done = 0;
4860 
4861           /* If this was the only register active, nothing is active
4862              anymore.  */
4863           if (lowest_active_reg == highest_active_reg)
4864             {
4865               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4866               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4867             }
4868           else
4869             { /* We must scan for the new highest active register, since
4870                  it isn't necessarily one less than now: consider
4871                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4872                  new highest active register is 1.  */
4873               unsigned char r = *p - 1;
4874               while (r > 0 && !IS_ACTIVE (reg_info[r]))
4875                 r--;
4876 
4877               /* If we end up at register zero, that means that we saved
4878                  the registers as the result of an `on_failure_jump', not
4879                  a `start_memory', and we jumped to past the innermost
4880                  `stop_memory'.  For example, in ((.)*) we save
4881                  registers 1 and 2 as a result of the *, but when we pop
4882                  back to the second ), we are at the stop_memory 1.
4883                  Thus, nothing is active.  */
4884 	      if (r == 0)
4885                 {
4886                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4887                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4888                 }
4889               else
4890                 highest_active_reg = r;
4891             }
4892 
4893           /* If just failed to match something this time around with a
4894              group that's operated on by a repetition operator, try to
4895              force exit from the ``loop'', and restore the register
4896              information for this group that we had before trying this
4897              last match.  */
4898           if ((!MATCHED_SOMETHING (reg_info[*p])
4899                || just_past_start_mem == p - 1)
4900 	      && (p + 2) < pend)
4901             {
4902               boolean is_a_jump_n = false;
4903 
4904               p1 = p + 2;
4905               mcnt = 0;
4906               switch ((re_opcode_t) *p1++)
4907                 {
4908                   case jump_n:
4909 		    is_a_jump_n = true;
4910                   case pop_failure_jump:
4911 		  case maybe_pop_jump:
4912 		  case jump:
4913 		  case dummy_failure_jump:
4914                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4915 		    if (is_a_jump_n)
4916 		      p1 += 2;
4917                     break;
4918 
4919                   default:
4920                     /* do nothing */ ;
4921                 }
4922 	      p1 += mcnt;
4923 
4924               /* If the next operation is a jump backwards in the pattern
4925 	         to an on_failure_jump right before the start_memory
4926                  corresponding to this stop_memory, exit from the loop
4927                  by forcing a failure after pushing on the stack the
4928                  on_failure_jump's jump in the pattern, and d.  */
4929               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4930                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4931 		{
4932                   /* If this group ever matched anything, then restore
4933                      what its registers were before trying this last
4934                      failed match, e.g., with `(a*)*b' against `ab' for
4935                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
4936                      against `aba' for regend[3].
4937 
4938                      Also restore the registers for inner groups for,
4939                      e.g., `((a*)(b*))*' against `aba' (register 3 would
4940                      otherwise get trashed).  */
4941 
4942                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4943 		    {
4944 		      unsigned r;
4945 
4946                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4947 
4948 		      /* Restore this and inner groups' (if any) registers.  */
4949                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4950 			   r++)
4951                         {
4952                           regstart[r] = old_regstart[r];
4953 
4954                           /* xx why this test?  */
4955                           if (old_regend[r] >= regstart[r])
4956                             regend[r] = old_regend[r];
4957                         }
4958                     }
4959 		  p1++;
4960                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4961                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4962 
4963                   goto fail;
4964                 }
4965             }
4966 
4967           /* Move past the register number and the inner group count.  */
4968           p += 2;
4969           break;
4970 
4971 
4972 	/* \<digit> has been turned into a `duplicate' command which is
4973            followed by the numeric value of <digit> as the register number.  */
4974         case duplicate:
4975 	  {
4976 	    register const char *d2, *dend2;
4977 	    int regno = *p++;   /* Get which register to match against.  */
4978 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4979 
4980 	    /* Can't back reference a group which we've never matched.  */
4981             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4982               goto fail;
4983 
4984             /* Where in input to try to start matching.  */
4985             d2 = regstart[regno];
4986 
4987             /* Where to stop matching; if both the place to start and
4988                the place to stop matching are in the same string, then
4989                set to the place to stop, otherwise, for now have to use
4990                the end of the first string.  */
4991 
4992             dend2 = ((FIRST_STRING_P (regstart[regno])
4993 		      == FIRST_STRING_P (regend[regno]))
4994 		     ? regend[regno] : end_match_1);
4995 	    for (;;)
4996 	      {
4997 		/* If necessary, advance to next segment in register
4998                    contents.  */
4999 		while (d2 == dend2)
5000 		  {
5001 		    if (dend2 == end_match_2) break;
5002 		    if (dend2 == regend[regno]) break;
5003 
5004                     /* End of string1 => advance to string2. */
5005                     d2 = string2;
5006                     dend2 = regend[regno];
5007 		  }
5008 		/* At end of register contents => success */
5009 		if (d2 == dend2) break;
5010 
5011 		/* If necessary, advance to next segment in data.  */
5012 		PREFETCH ();
5013 
5014 		/* How many characters left in this segment to match.  */
5015 		mcnt = dend - d;
5016 
5017 		/* Want how many consecutive characters we can match in
5018                    one shot, so, if necessary, adjust the count.  */
5019                 if (mcnt > dend2 - d2)
5020 		  mcnt = dend2 - d2;
5021 
5022 		/* Compare that many; failure if mismatch, else move
5023                    past them.  */
5024 		if (translate
5025                     ? bcmp_translate (d, d2, mcnt, translate)
5026                     : bcmp (d, d2, mcnt))
5027 		  goto fail;
5028 		d += mcnt, d2 += mcnt;
5029 
5030 		/* Do this because we've match some characters.  */
5031 		SET_REGS_MATCHED ();
5032 	      }
5033 	  }
5034 	  break;
5035 
5036 
5037         /* begline matches the empty string at the beginning of the string
5038            (unless `not_bol' is set in `bufp'), and, if
5039            `newline_anchor' is set, after newlines.  */
5040 	case begline:
5041           DEBUG_PRINT1 ("EXECUTING begline.\n");
5042 
5043           if (AT_STRINGS_BEG (d))
5044             {
5045               if (!bufp->not_bol) break;
5046             }
5047           else if (d[-1] == '\n' && bufp->newline_anchor)
5048             {
5049               break;
5050             }
5051           /* In all other cases, we fail.  */
5052           goto fail;
5053 
5054 
5055         /* endline is the dual of begline.  */
5056 	case endline:
5057           DEBUG_PRINT1 ("EXECUTING endline.\n");
5058 
5059           if (AT_STRINGS_END (d))
5060             {
5061               if (!bufp->not_eol) break;
5062             }
5063 
5064           /* We have to ``prefetch'' the next character.  */
5065           else if ((d == end1 ? *string2 : *d) == '\n'
5066                    && bufp->newline_anchor)
5067             {
5068               break;
5069             }
5070           goto fail;
5071 
5072 
5073 	/* Match at the very beginning of the data.  */
5074         case begbuf:
5075           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
5076           if (AT_STRINGS_BEG (d))
5077             break;
5078           goto fail;
5079 
5080 
5081 	/* Match at the very end of the data.  */
5082         case endbuf:
5083           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
5084 	  if (AT_STRINGS_END (d))
5085 	    break;
5086           goto fail;
5087 
5088 
5089         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
5090            pushes NULL as the value for the string on the stack.  Then
5091            `pop_failure_point' will keep the current value for the
5092            string, instead of restoring it.  To see why, consider
5093            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
5094            then the . fails against the \n.  But the next thing we want
5095            to do is match the \n against the \n; if we restored the
5096            string value, we would be back at the foo.
5097 
5098            Because this is used only in specific cases, we don't need to
5099            check all the things that `on_failure_jump' does, to make
5100            sure the right things get saved on the stack.  Hence we don't
5101            share its code.  The only reason to push anything on the
5102            stack at all is that otherwise we would have to change
5103            `anychar's code to do something besides goto fail in this
5104            case; that seems worse than this.  */
5105         case on_failure_keep_string_jump:
5106           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
5107 
5108           EXTRACT_NUMBER_AND_INCR (mcnt, p);
5109 #if defined (_LIBC) || defined (MSDOS)
5110           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
5111 #else
5112           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
5113 #endif
5114 
5115           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
5116           break;
5117 
5118 
5119 	/* Uses of on_failure_jump:
5120 
5121            Each alternative starts with an on_failure_jump that points
5122            to the beginning of the next alternative.  Each alternative
5123            except the last ends with a jump that in effect jumps past
5124            the rest of the alternatives.  (They really jump to the
5125            ending jump of the following alternative, because tensioning
5126            these jumps is a hassle.)
5127 
5128            Repeats start with an on_failure_jump that points past both
5129            the repetition text and either the following jump or
5130            pop_failure_jump back to this on_failure_jump.  */
5131 	case on_failure_jump:
5132         on_failure:
5133           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
5134 
5135           EXTRACT_NUMBER_AND_INCR (mcnt, p);
5136 #if defined (_LIBC) || defined (MSDOS)
5137           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
5138 #else
5139           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
5140 #endif
5141 
5142           /* If this on_failure_jump comes right before a group (i.e.,
5143              the original * applied to a group), save the information
5144              for that group and all inner ones, so that if we fail back
5145              to this point, the group's information will be correct.
5146              For example, in \(a*\)*\1, we need the preceding group,
5147              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
5148 
5149           /* We can't use `p' to check ahead because we push
5150              a failure point to `p + mcnt' after we do this.  */
5151           p1 = p;
5152 
5153           /* We need to skip no_op's before we look for the
5154              start_memory in case this on_failure_jump is happening as
5155              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
5156              against aba.  */
5157           while (p1 < pend && (re_opcode_t) *p1 == no_op)
5158             p1++;
5159 
5160           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
5161             {
5162               /* We have a new highest active register now.  This will
5163                  get reset at the start_memory we are about to get to,
5164                  but we will have saved all the registers relevant to
5165                  this repetition op, as described above.  */
5166               highest_active_reg = *(p1 + 1) + *(p1 + 2);
5167               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
5168                 lowest_active_reg = *(p1 + 1);
5169             }
5170 
5171           DEBUG_PRINT1 (":\n");
5172           PUSH_FAILURE_POINT (p + mcnt, d, -2);
5173           break;
5174 
5175 
5176         /* A smart repeat ends with `maybe_pop_jump'.
5177 	   We change it to either `pop_failure_jump' or `jump'.  */
5178         case maybe_pop_jump:
5179           EXTRACT_NUMBER_AND_INCR (mcnt, p);
5180           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
5181           {
5182 	    register unsigned char *p2 = p;
5183 
5184             /* Compare the beginning of the repeat with what in the
5185                pattern follows its end. If we can establish that there
5186                is nothing that they would both match, i.e., that we
5187                would have to backtrack because of (as in, e.g., `a*a')
5188                then we can change to pop_failure_jump, because we'll
5189                never have to backtrack.
5190 
5191                This is not true in the case of alternatives: in
5192                `(a|ab)*' we do need to backtrack to the `ab' alternative
5193                (e.g., if the string was `ab').  But instead of trying to
5194                detect that here, the alternative has put on a dummy
5195                failure point which is what we will end up popping.  */
5196 
5197 	    /* Skip over open/close-group commands.
5198 	       If what follows this loop is a ...+ construct,
5199 	       look at what begins its body, since we will have to
5200 	       match at least one of that.  */
5201 	    while (1)
5202 	      {
5203 		if (p2 + 2 < pend
5204 		    && ((re_opcode_t) *p2 == stop_memory
5205 			|| (re_opcode_t) *p2 == start_memory))
5206 		  p2 += 3;
5207 		else if (p2 + 6 < pend
5208 			 && (re_opcode_t) *p2 == dummy_failure_jump)
5209 		  p2 += 6;
5210 		else
5211 		  break;
5212 	      }
5213 
5214 	    p1 = p + mcnt;
5215 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
5216 	       to the `maybe_finalize_jump' of this case.  Examine what
5217 	       follows.  */
5218 
5219             /* If we're at the end of the pattern, we can change.  */
5220             if (p2 == pend)
5221 	      {
5222 		/* Consider what happens when matching ":\(.*\)"
5223 		   against ":/".  I don't really understand this code
5224 		   yet.  */
5225   	        p[-3] = (unsigned char) pop_failure_jump;
5226                 DEBUG_PRINT1
5227                   ("  End of pattern: change to `pop_failure_jump'.\n");
5228               }
5229 
5230             else if ((re_opcode_t) *p2 == exactn
5231 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
5232 	      {
5233 		register mbchar_t c
5234                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
5235 
5236                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
5237                   {
5238   		    p[-3] = (unsigned char) pop_failure_jump;
5239                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
5240                                   c, p1[5]);
5241                   }
5242 
5243 		else if ((re_opcode_t) p1[3] == charset
5244 			 || (re_opcode_t) p1[3] == charset_not)
5245 		  {
5246 		    if (ismbchar (c))
5247 		      c = mbcfetch (&p2[2], NULL);
5248 
5249 		    /* `is_in_list()' is TRUE if c would match, which means
5250                         that we can't change to pop_failure_jump.  */
5251 		    if (!is_in_list (c, p1 + 4))
5252                       {
5253   		        p[-3] = (unsigned char) pop_failure_jump;
5254                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
5255                       }
5256 		  }
5257 	      }
5258             else if ((re_opcode_t) *p2 == charset)
5259 	      {
5260 #ifdef DEBUG
5261 		register mbchar_t c
5262 		  = *p2 == (unsigned char) endline ? '\n' : p2[2];
5263 #endif
5264 
5265 		mbchar_t wc = mbcfetch (&p1[5], NULL);
5266 		if ((re_opcode_t) p1[3] == exactn && !is_in_list (wc, p2 + 1))
5267                   {
5268   		    p[-3] = (unsigned char) pop_failure_jump;
5269                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
5270                                   c, p1[5]);
5271                   }
5272 
5273 		else if ((re_opcode_t) p1[3] == charset_not)
5274 		  {
5275 		    int idx;
5276 		    /* We win if the charset_not inside the loop
5277 		       lists every character listed in the charset after.  */
5278 		    for (idx = 0; idx < (int) p2[1]; idx++)
5279 		      if (! (p2[2 + idx] == 0
5280 			     || (idx < (int) p1[4]
5281 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
5282 			break;
5283 
5284 		    if (idx == p2[1]
5285 			&& is_invmblist_nooverlap (&p2[2 + p2[1]], &p1[5 + p1[4]]))
5286                       {
5287   		        p[-3] = (unsigned char) pop_failure_jump;
5288                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
5289                       }
5290 		  }
5291 		else if ((re_opcode_t) p1[3] == charset)
5292 		  {
5293 		    int idx;
5294 		    /* We win if the charset inside the loop
5295 		       has no overlap with the one after the loop.  */
5296 		    for (idx = 0;
5297 			 idx < (int) p2[1] && idx < (int) p1[4];
5298 			 idx++)
5299 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
5300 			break;
5301 
5302 		    if ((idx == p2[1] || idx == p1[4])
5303 			&& is_mblist_nooverlap (&p2[2 + p2[1]], &p1[5 + p1[4]]))
5304                       {
5305   		        p[-3] = (unsigned char) pop_failure_jump;
5306                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
5307                       }
5308 		  }
5309 	      }
5310 	  }
5311 	  p -= 2;		/* Point at relative address again.  */
5312 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
5313 	    {
5314 	      p[-1] = (unsigned char) jump;
5315               DEBUG_PRINT1 ("  Match => jump.\n");
5316 	      goto unconditional_jump;
5317 	    }
5318         /* Note fall through.  */
5319 
5320 
5321 	/* The end of a simple repeat has a pop_failure_jump back to
5322            its matching on_failure_jump, where the latter will push a
5323            failure point.  The pop_failure_jump takes off failure
5324            points put on by this pop_failure_jump's matching
5325            on_failure_jump; we got through the pattern to here from the
5326            matching on_failure_jump, so didn't fail.  */
5327         case pop_failure_jump:
5328           {
5329             /* We need to pass separate storage for the lowest and
5330                highest registers, even though we don't care about the
5331                actual values.  Otherwise, we will restore only one
5332                register from the stack, since lowest will == highest in
5333                `pop_failure_point'.  */
5334             active_reg_t dummy_low_reg, dummy_high_reg;
5335             unsigned char *pdummy;
5336             const char *sdummy;
5337 
5338             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
5339             POP_FAILURE_POINT (sdummy, pdummy,
5340                                dummy_low_reg, dummy_high_reg,
5341                                reg_dummy, reg_dummy, reg_info_dummy);
5342           }
5343 	  /* Note fall through.  */
5344 
5345 	unconditional_jump:
5346 #if defined (_LIBC) || defined (MSDOS)
5347 	  DEBUG_PRINT2 ("\n%p: ", p);
5348 #else
5349 	  DEBUG_PRINT2 ("\n0x%x: ", p);
5350 #endif
5351           /* Note fall through.  */
5352 
5353         /* Unconditionally jump (without popping any failure points).  */
5354         case jump:
5355 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
5356           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
5357 	  p += mcnt;				/* Do the jump.  */
5358 #if defined (_LIBC) || defined (MSDOS)
5359           DEBUG_PRINT2 ("(to %p).\n", p);
5360 #else
5361           DEBUG_PRINT2 ("(to 0x%x).\n", p);
5362 #endif
5363 	  break;
5364 
5365 
5366         /* We need this opcode so we can detect where alternatives end
5367            in `group_match_null_string_p' et al.  */
5368         case jump_past_alt:
5369           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
5370           goto unconditional_jump;
5371 
5372 
5373         /* Normally, the on_failure_jump pushes a failure point, which
5374            then gets popped at pop_failure_jump.  We will end up at
5375            pop_failure_jump, also, and with a pattern of, say, `a+', we
5376            are skipping over the on_failure_jump, so we have to push
5377            something meaningless for pop_failure_jump to pop.  */
5378         case dummy_failure_jump:
5379           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
5380           /* It doesn't matter what we push for the string here.  What
5381              the code at `fail' tests is the value for the pattern.  */
5382           PUSH_FAILURE_POINT (0, 0, -2);
5383           goto unconditional_jump;
5384 
5385 
5386         /* At the end of an alternative, we need to push a dummy failure
5387            point in case we are followed by a `pop_failure_jump', because
5388            we don't want the failure point for the alternative to be
5389            popped.  For example, matching `(a|ab)*' against `aab'
5390            requires that we match the `ab' alternative.  */
5391         case push_dummy_failure:
5392           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
5393           /* See comments just above at `dummy_failure_jump' about the
5394              two zeroes.  */
5395           PUSH_FAILURE_POINT (0, 0, -2);
5396           break;
5397 
5398         /* Have to succeed matching what follows at least n times.
5399            After that, handle like `on_failure_jump'.  */
5400         case succeed_n:
5401           EXTRACT_NUMBER (mcnt, p + 2);
5402           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
5403 
5404           assert (mcnt >= 0);
5405           /* Originally, this is how many times we HAVE to succeed.  */
5406           if (mcnt > 0)
5407             {
5408                mcnt--;
5409 	       p += 2;
5410                STORE_NUMBER_AND_INCR (p, mcnt);
5411 #if defined (_LIBC) || defined (MSDOS)
5412                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
5413 #else
5414                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
5415 #endif
5416             }
5417 	  else if (mcnt == 0)
5418             {
5419 #if defined (_LIBC) || defined (MSDOS)
5420               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
5421 #else
5422               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
5423 #endif
5424 	      p[2] = (unsigned char) no_op;
5425               p[3] = (unsigned char) no_op;
5426               goto on_failure;
5427             }
5428           break;
5429 
5430         case jump_n:
5431           EXTRACT_NUMBER (mcnt, p + 2);
5432           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
5433 
5434           /* Originally, this is how many times we CAN jump.  */
5435           if (mcnt)
5436             {
5437                mcnt--;
5438                STORE_NUMBER (p + 2, mcnt);
5439 #if defined (_LIBC) || defined (MSDOS)
5440                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
5441 #else
5442                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
5443 #endif
5444 	       goto unconditional_jump;
5445             }
5446           /* If don't have to jump any more, skip over the rest of command.  */
5447 	  else
5448 	    p += 4;
5449           break;
5450 
5451 	case set_number_at:
5452 	  {
5453             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
5454 
5455             EXTRACT_NUMBER_AND_INCR (mcnt, p);
5456             p1 = p + mcnt;
5457             EXTRACT_NUMBER_AND_INCR (mcnt, p);
5458 #if defined (_LIBC) || defined (MSDOS)
5459             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
5460 #else
5461             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
5462 #endif
5463 	    STORE_NUMBER (p1, mcnt);
5464             break;
5465           }
5466 
5467 #if 0
5468 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
5469 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
5470 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
5471 	   macro and introducing temporary variables works around the bug.  */
5472 
5473 	case wordbound:
5474 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5475 	  if (AT_WORD_BOUNDARY (d))
5476 	    break;
5477 	  goto fail;
5478 
5479 	case notwordbound:
5480 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5481 	  if (AT_WORD_BOUNDARY (d))
5482 	    goto fail;
5483 	  break;
5484 #else
5485 	case wordbound:
5486 	{
5487 	  boolean prevchar, thischar;
5488 
5489 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
5490 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5491 	    break;
5492 
5493 	  prevchar = WORDCHAR_P (d - 1);
5494 	  thischar = WORDCHAR_P (d);
5495 	  if (prevchar != thischar)
5496 	    break;
5497 	  goto fail;
5498 	}
5499 
5500       case notwordbound:
5501 	{
5502 	  boolean prevchar, thischar;
5503 
5504 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
5505 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
5506 	    goto fail;
5507 
5508 	  prevchar = WORDCHAR_P (d - 1);
5509 	  thischar = WORDCHAR_P (d);
5510 	  if (prevchar != thischar)
5511 	    goto fail;
5512 	  break;
5513 	}
5514 #endif
5515 
5516 	case wordbeg:
5517           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5518 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5519 	    break;
5520           goto fail;
5521 
5522 	case wordend:
5523           DEBUG_PRINT1 ("EXECUTING wordend.\n");
5524 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5525               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5526 	    break;
5527           goto fail;
5528 
5529 #ifdef emacs
5530   	case before_dot:
5531           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5532  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5533   	    goto fail;
5534   	  break;
5535 
5536   	case at_dot:
5537           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5538  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
5539   	    goto fail;
5540   	  break;
5541 
5542   	case after_dot:
5543           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5544           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5545   	    goto fail;
5546   	  break;
5547 
5548 	case syntaxspec:
5549           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5550 	  mcnt = *p++;
5551 	  goto matchsyntax;
5552 
5553         case wordchar:
5554           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5555 	  mcnt = (int) Sword;
5556         matchsyntax:
5557 	  PREFETCH ();
5558 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5559 	  d++;
5560 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5561 	    goto fail;
5562           SET_REGS_MATCHED ();
5563 	  break;
5564 
5565 	case notsyntaxspec:
5566           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5567 	  mcnt = *p++;
5568 	  goto matchnotsyntax;
5569 
5570         case notwordchar:
5571           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5572 	  mcnt = (int) Sword;
5573         matchnotsyntax:
5574 	  PREFETCH ();
5575 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5576 	  d++;
5577 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5578 	    goto fail;
5579 	  SET_REGS_MATCHED ();
5580           break;
5581 
5582 #else /* not emacs */
5583 	case wordchar:
5584           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5585 	  PREFETCH ();
5586           if (!WORDCHAR_P (d))
5587             goto fail;
5588 	  SET_REGS_MATCHED ();
5589           d++;
5590 	  break;
5591 
5592 	case notwordchar:
5593           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5594 	  PREFETCH ();
5595 	  if (WORDCHAR_P (d))
5596             goto fail;
5597           SET_REGS_MATCHED ();
5598           d++;
5599 	  break;
5600 #endif /* not emacs */
5601 
5602         default:
5603           abort ();
5604 	}
5605       continue;  /* Successfully executed one pattern command; keep going.  */
5606 
5607 
5608     /* We goto here if a matching operation fails. */
5609     fail:
5610       if (!FAIL_STACK_EMPTY ())
5611 	{ /* A restart point is known.  Restore to that state.  */
5612           DEBUG_PRINT1 ("\nFAIL:\n");
5613           POP_FAILURE_POINT (d, p,
5614                              lowest_active_reg, highest_active_reg,
5615                              regstart, regend, reg_info);
5616 
5617           /* If this failure point is a dummy, try the next one.  */
5618           if (!p)
5619 	    goto fail;
5620 
5621           /* If we failed to the end of the pattern, don't examine *p.  */
5622 	  assert (p <= pend);
5623           if (p < pend)
5624             {
5625               boolean is_a_jump_n = false;
5626 
5627               /* If failed to a backwards jump that's part of a repetition
5628                  loop, need to pop this failure point and use the next one.  */
5629               switch ((re_opcode_t) *p)
5630                 {
5631                 case jump_n:
5632                   is_a_jump_n = true;
5633                 case maybe_pop_jump:
5634                 case pop_failure_jump:
5635                 case jump:
5636                   p1 = p + 1;
5637                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5638                   p1 += mcnt;
5639 
5640                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5641                       || (!is_a_jump_n
5642                           && (re_opcode_t) *p1 == on_failure_jump))
5643                     goto fail;
5644                   break;
5645                 default:
5646                   /* do nothing */ ;
5647                 }
5648             }
5649 
5650           if (d >= string1 && d <= end1)
5651 	    dend = end_match_1;
5652         }
5653       else
5654         break;   /* Matching at this starting point really fails.  */
5655     } /* for (;;) */
5656 
5657   if (best_regs_set)
5658     goto restore_best_regs;
5659 
5660   FREE_VARIABLES ();
5661 
5662   return -1;         			/* Failure to match.  */
5663 } /* re_match_2 */
5664 
5665 /* Subroutine definitions for re_match_2.  */
5666 
5667 
5668 /* We are passed P pointing to a register number after a start_memory.
5669 
5670    Return true if the pattern up to the corresponding stop_memory can
5671    match the empty string, and false otherwise.
5672 
5673    If we find the matching stop_memory, sets P to point to one past its number.
5674    Otherwise, sets P to an undefined byte less than or equal to END.
5675 
5676    We don't handle duplicates properly (yet).  */
5677 
5678 static boolean
group_match_null_string_p(p,end,reg_info)5679 group_match_null_string_p (p, end, reg_info)
5680     unsigned char **p, *end;
5681     register_info_type *reg_info;
5682 {
5683   int mcnt;
5684   /* Point to after the args to the start_memory.  */
5685   unsigned char *p1 = *p + 2;
5686 
5687   while (p1 < end)
5688     {
5689       /* Skip over opcodes that can match nothing, and return true or
5690 	 false, as appropriate, when we get to one that can't, or to the
5691          matching stop_memory.  */
5692 
5693       switch ((re_opcode_t) *p1)
5694         {
5695         /* Could be either a loop or a series of alternatives.  */
5696         case on_failure_jump:
5697           p1++;
5698           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5699 
5700           /* If the next operation is not a jump backwards in the
5701 	     pattern.  */
5702 
5703 	  if (mcnt >= 0)
5704 	    {
5705               /* Go through the on_failure_jumps of the alternatives,
5706                  seeing if any of the alternatives cannot match nothing.
5707                  The last alternative starts with only a jump,
5708                  whereas the rest start with on_failure_jump and end
5709                  with a jump, e.g., here is the pattern for `a|b|c':
5710 
5711                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5712                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5713                  /exactn/1/c
5714 
5715                  So, we have to first go through the first (n-1)
5716                  alternatives and then deal with the last one separately.  */
5717 
5718 
5719               /* Deal with the first (n-1) alternatives, which start
5720                  with an on_failure_jump (see above) that jumps to right
5721                  past a jump_past_alt.  */
5722 
5723               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5724                 {
5725                   /* `mcnt' holds how many bytes long the alternative
5726                      is, including the ending `jump_past_alt' and
5727                      its number.  */
5728 
5729                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5730 				                      reg_info))
5731                     return false;
5732 
5733                   /* Move to right after this alternative, including the
5734 		     jump_past_alt.  */
5735                   p1 += mcnt;
5736 
5737                   /* Break if it's the beginning of an n-th alternative
5738                      that doesn't begin with an on_failure_jump.  */
5739                   if ((re_opcode_t) *p1 != on_failure_jump)
5740                     break;
5741 
5742 		  /* Still have to check that it's not an n-th
5743 		     alternative that starts with an on_failure_jump.  */
5744 		  p1++;
5745                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5746                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5747                     {
5748 		      /* Get to the beginning of the n-th alternative.  */
5749                       p1 -= 3;
5750                       break;
5751                     }
5752                 }
5753 
5754               /* Deal with the last alternative: go back and get number
5755                  of the `jump_past_alt' just before it.  `mcnt' contains
5756                  the length of the alternative.  */
5757               EXTRACT_NUMBER (mcnt, p1 - 2);
5758 
5759               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5760                 return false;
5761 
5762               p1 += mcnt;	/* Get past the n-th alternative.  */
5763             } /* if mcnt > 0 */
5764           break;
5765 
5766 
5767         case stop_memory:
5768 	  assert (p1[1] == **p);
5769           *p = p1 + 2;
5770           return true;
5771 
5772 
5773         default:
5774           if (!common_op_match_null_string_p (&p1, end, reg_info))
5775             return false;
5776         }
5777     } /* while p1 < end */
5778 
5779   return false;
5780 } /* group_match_null_string_p */
5781 
5782 
5783 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5784    It expects P to be the first byte of a single alternative and END one
5785    byte past the last. The alternative can contain groups.  */
5786 
5787 static boolean
alt_match_null_string_p(p,end,reg_info)5788 alt_match_null_string_p (p, end, reg_info)
5789     unsigned char *p, *end;
5790     register_info_type *reg_info;
5791 {
5792   int mcnt;
5793   unsigned char *p1 = p;
5794 
5795   while (p1 < end)
5796     {
5797       /* Skip over opcodes that can match nothing, and break when we get
5798          to one that can't.  */
5799 
5800       switch ((re_opcode_t) *p1)
5801         {
5802 	/* It's a loop.  */
5803         case on_failure_jump:
5804           p1++;
5805           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5806           p1 += mcnt;
5807           break;
5808 
5809 	default:
5810           if (!common_op_match_null_string_p (&p1, end, reg_info))
5811             return false;
5812         }
5813     }  /* while p1 < end */
5814 
5815   return true;
5816 } /* alt_match_null_string_p */
5817 
5818 
5819 /* Deals with the ops common to group_match_null_string_p and
5820    alt_match_null_string_p.
5821 
5822    Sets P to one after the op and its arguments, if any.  */
5823 
5824 static boolean
common_op_match_null_string_p(p,end,reg_info)5825 common_op_match_null_string_p (p, end, reg_info)
5826     unsigned char **p, *end;
5827     register_info_type *reg_info;
5828 {
5829   int mcnt;
5830   boolean ret;
5831   int reg_no;
5832   unsigned char *p1 = *p;
5833 
5834   switch ((re_opcode_t) *p1++)
5835     {
5836     case no_op:
5837     case begline:
5838     case endline:
5839     case begbuf:
5840     case endbuf:
5841     case wordbeg:
5842     case wordend:
5843     case wordbound:
5844     case notwordbound:
5845 #ifdef emacs
5846     case before_dot:
5847     case at_dot:
5848     case after_dot:
5849 #endif
5850       break;
5851 
5852     case start_memory:
5853       reg_no = *p1;
5854       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5855       ret = group_match_null_string_p (&p1, end, reg_info);
5856 
5857       /* Have to set this here in case we're checking a group which
5858          contains a group and a back reference to it.  */
5859 
5860       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5861         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5862 
5863       if (!ret)
5864         return false;
5865       break;
5866 
5867     /* If this is an optimized succeed_n for zero times, make the jump.  */
5868     case jump:
5869       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5870       if (mcnt >= 0)
5871         p1 += mcnt;
5872       else
5873         return false;
5874       break;
5875 
5876     case succeed_n:
5877       /* Get to the number of times to succeed.  */
5878       p1 += 2;
5879       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5880 
5881       if (mcnt == 0)
5882         {
5883           p1 -= 4;
5884           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5885           p1 += mcnt;
5886         }
5887       else
5888         return false;
5889       break;
5890 
5891     case duplicate:
5892       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5893         return false;
5894       break;
5895 
5896     case set_number_at:
5897       p1 += 4;
5898 
5899     default:
5900       /* All other opcodes mean we cannot match the empty string.  */
5901       return false;
5902   }
5903 
5904   *p = p1;
5905   return true;
5906 } /* common_op_match_null_string_p */
5907 
5908 
5909 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5910    bytes; nonzero otherwise.  */
5911 
5912 static int
bcmp_translate(s1,s2,len,translate)5913 bcmp_translate (s1, s2, len, translate)
5914      const char *s1, *s2;
5915      register int len;
5916      RE_TRANSLATE_TYPE translate;
5917 {
5918   register const unsigned char *p1 = (const unsigned char *) s1;
5919   register const unsigned char *p2 = (const unsigned char *) s2;
5920   unsigned char c;
5921 
5922   while (len)
5923     {
5924       c = *p1++;
5925       if (ismbchar(c)) {
5926 	size_t n;
5927 	if (c != *p2++)
5928 	  return 1;
5929 	for (n = mbclen (c) - 1; n > 0; n--)
5930 	  if (!--len || *p1++ != *p2++)
5931 	    return 1;
5932       }
5933       else
5934 	if (translate[c] != translate[*p2++])
5935 	  return 1;
5936       len--;
5937     }
5938   return 0;
5939 }
5940 
5941 /* Entry points for GNU code.  */
5942 
5943 /* re_compile_pattern is the GNU regular expression compiler: it
5944    compiles PATTERN (of length SIZE) and puts the result in BUFP.
5945    Returns 0 if the pattern was valid, otherwise an error string.
5946 
5947    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5948    are set in BUFP on entry.
5949 
5950    We call regex_compile to do the actual compilation.  */
5951 
5952 const char *
re_compile_pattern(pattern,length,bufp)5953 re_compile_pattern (pattern, length, bufp)
5954      const char *pattern;
5955      size_t length;
5956      struct re_pattern_buffer *bufp;
5957 {
5958   reg_errcode_t ret;
5959 
5960   /* GNU code is written to assume at least RE_NREGS registers will be set
5961      (and at least one extra will be -1).  */
5962   bufp->regs_allocated = REGS_UNALLOCATED;
5963 
5964   /* And GNU code determines whether or not to get register information
5965      by passing null for the REGS argument to re_match, etc., not by
5966      setting no_sub.  */
5967   bufp->no_sub = 0;
5968 
5969   /* Match anchors at newline.  */
5970   bufp->newline_anchor = 1;
5971 
5972   ret = regex_compile (pattern, length, re_syntax_options, bufp);
5973 
5974   if (!ret)
5975     return NULL;
5976   return gettext (re_error_msgid[(int) ret]);
5977 }
5978 
5979 /* Entry points compatible with 4.2 BSD regex library.  We don't define
5980    them unless specifically requested.  */
5981 
5982 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
5983 
5984 /* BSD has one and only one pattern buffer.  */
5985 static struct re_pattern_buffer re_comp_buf;
5986 
5987 char *
5988 #ifdef _LIBC
5989 /* Make these definitions weak in libc, so POSIX programs can redefine
5990    these names if they don't use our functions, and still use
5991    regcomp/regexec below without link errors.  */
5992 weak_function
5993 #endif
re_comp(s)5994 re_comp (s)
5995     const char *s;
5996 {
5997   reg_errcode_t ret;
5998 
5999   if (!s)
6000     {
6001       if (!re_comp_buf.buffer)
6002 	return gettext ("No previous regular expression");
6003       return 0;
6004     }
6005 
6006   if (!re_comp_buf.buffer)
6007     {
6008       re_comp_buf.buffer = (unsigned char *) malloc (200);
6009       if (re_comp_buf.buffer == NULL)
6010         return gettext (re_error_msgid[(int) REG_ESPACE]);
6011       re_comp_buf.allocated = 200;
6012 
6013       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
6014       if (re_comp_buf.fastmap == NULL)
6015 	return gettext (re_error_msgid[(int) REG_ESPACE]);
6016     }
6017 
6018   /* Since `re_exec' always passes NULL for the `regs' argument, we
6019      don't need to initialize the pattern buffer fields which affect it.  */
6020 
6021   /* Match anchors at newlines.  */
6022   re_comp_buf.newline_anchor = 1;
6023 
6024   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
6025 
6026   if (!ret)
6027     return NULL;
6028 
6029   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
6030   return (char *) gettext (re_error_msgid[(int) ret]);
6031 }
6032 
6033 
6034 int
6035 #ifdef _LIBC
6036 weak_function
6037 #endif
re_exec(s)6038 re_exec (s)
6039     const char *s;
6040 {
6041   const int len = strlen (s);
6042   return
6043     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6044 }
6045 
6046 #endif /* _REGEX_RE_COMP */
6047 
6048 /* POSIX.2 functions.  Don't define these for Emacs.  */
6049 
6050 #ifndef emacs
6051 
6052 /* regcomp takes a regular expression as a string and compiles it.
6053 
6054    PREG is a regex_t *.  We do not expect any fields to be initialized,
6055    since POSIX says we shouldn't.  Thus, we set
6056 
6057      `buffer' to the compiled pattern;
6058      `used' to the length of the compiled pattern;
6059      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
6060        REG_EXTENDED bit in CFLAGS is set; otherwise, to
6061        RE_SYNTAX_POSIX_BASIC;
6062      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
6063      `fastmap' and `fastmap_accurate' to zero;
6064      `re_nsub' to the number of subexpressions in PATTERN.
6065 
6066    PATTERN is the address of the pattern string.
6067 
6068    CFLAGS is a series of bits which affect compilation.
6069 
6070      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
6071      use POSIX basic syntax.
6072 
6073      If REG_NEWLINE is set, then . and [^...] don't match newline.
6074      Also, regexec will try a match beginning after every newline.
6075 
6076      If REG_ICASE is set, then we considers upper- and lowercase
6077      versions of letters to be equivalent when matching.
6078 
6079      If REG_NOSUB is set, then when PREG is passed to regexec, that
6080      routine will report only success or failure, and nothing about the
6081      registers.
6082 
6083    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
6084    the return codes and their meanings.)  */
6085 
6086 #ifdef __APPLE__
6087 __private_extern__
6088 #endif
6089 int
regcomp(preg,pattern,cflags)6090 regcomp (preg, pattern, cflags)
6091     regex_t *preg;
6092     const char *pattern;
6093     int cflags;
6094 {
6095   reg_errcode_t ret;
6096   reg_syntax_t syntax
6097     = (cflags & REG_EXTENDED) ?
6098       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
6099 
6100   /* regex_compile will allocate the space for the compiled pattern.  */
6101   preg->buffer = 0;
6102   preg->allocated = 0;
6103   preg->used = 0;
6104 
6105   /* Don't bother to use a fastmap when searching.  This simplifies the
6106      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
6107      characters after newlines into the fastmap.  This way, we just try
6108      every character.  */
6109   preg->fastmap = 0;
6110 
6111   if (cflags & REG_ICASE)
6112     {
6113       unsigned i;
6114 
6115       preg->translate
6116 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
6117 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
6118       if (preg->translate == NULL)
6119         return (int) REG_ESPACE;
6120 
6121       /* Map uppercase characters to corresponding lowercase ones.  */
6122       for (i = 0; i < CHAR_SET_SIZE; i++)
6123         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
6124     }
6125   else
6126     preg->translate = NULL;
6127 
6128   /* If REG_NEWLINE is set, newlines are treated differently.  */
6129   if (cflags & REG_NEWLINE)
6130     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
6131       syntax &= ~RE_DOT_NEWLINE;
6132       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
6133       /* It also changes the matching behavior.  */
6134       preg->newline_anchor = 1;
6135     }
6136   else
6137     preg->newline_anchor = 0;
6138 
6139   preg->no_sub = !!(cflags & REG_NOSUB);
6140 
6141   /* POSIX says a null character in the pattern terminates it, so we
6142      can use strlen here in compiling the pattern.  */
6143   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
6144 
6145   /* POSIX doesn't distinguish between an unmatched open-group and an
6146      unmatched close-group: both are REG_EPAREN.  */
6147   if (ret == REG_ERPAREN) ret = REG_EPAREN;
6148 
6149   return (int) ret;
6150 }
6151 
6152 
6153 /* regexec searches for a given pattern, specified by PREG, in the
6154    string STRING.
6155 
6156    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
6157    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
6158    least NMATCH elements, and we set them to the offsets of the
6159    corresponding matched substrings.
6160 
6161    EFLAGS specifies `execution flags' which affect matching: if
6162    REG_NOTBOL is set, then ^ does not match at the beginning of the
6163    string; if REG_NOTEOL is set, then $ does not match at the end.
6164 
6165    We return 0 if we find a match and REG_NOMATCH if not.  */
6166 
6167 #ifdef __APPLE__
6168 __private_extern__
6169 #endif
6170 int
regexec(preg,string,nmatch,pmatch,eflags)6171 regexec (preg, string, nmatch, pmatch, eflags)
6172     const regex_t *preg;
6173     const char *string;
6174     size_t nmatch;
6175     regmatch_t pmatch[];
6176     int eflags;
6177 {
6178   int ret;
6179   struct re_registers regs;
6180   regex_t private_preg;
6181   int len = strlen (string);
6182   boolean want_reg_info = !preg->no_sub && nmatch > 0;
6183 
6184   private_preg = *preg;
6185 
6186   private_preg.not_bol = !!(eflags & REG_NOTBOL);
6187   private_preg.not_eol = !!(eflags & REG_NOTEOL);
6188 
6189   /* The user has told us exactly how many registers to return
6190      information about, via `nmatch'.  We have to pass that on to the
6191      matching routines.  */
6192   private_preg.regs_allocated = REGS_FIXED;
6193 
6194   if (want_reg_info)
6195     {
6196       regs.num_regs = nmatch;
6197       regs.start = TALLOC (nmatch, regoff_t);
6198       regs.end = TALLOC (nmatch, regoff_t);
6199       if (regs.start == NULL || regs.end == NULL)
6200         return (int) REG_NOMATCH;
6201     }
6202 
6203   /* Perform the searching operation.  */
6204   ret = re_search (&private_preg, string, len,
6205                    /* start: */ 0, /* range: */ len,
6206                    want_reg_info ? &regs : (struct re_registers *) 0);
6207 
6208   /* Copy the register information to the POSIX structure.  */
6209   if (want_reg_info)
6210     {
6211       if (ret >= 0)
6212         {
6213           unsigned r;
6214 
6215           for (r = 0; r < nmatch; r++)
6216             {
6217               pmatch[r].rm_so = regs.start[r];
6218               pmatch[r].rm_eo = regs.end[r];
6219             }
6220         }
6221 
6222       /* If we needed the temporary register info, free the space now.  */
6223       free (regs.start);
6224       free (regs.end);
6225     }
6226 
6227   /* We want zero return to mean success, unlike `re_search'.  */
6228   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
6229 }
6230 
6231 
6232 /* Returns a message corresponding to an error code, ERRCODE, returned
6233    from either regcomp or regexec.   We don't use PREG here.  */
6234 
6235 size_t
regerror(errcode,preg,errbuf,errbuf_size)6236 regerror (errcode, preg, errbuf, errbuf_size)
6237     int errcode;
6238     const regex_t *preg;
6239     char *errbuf;
6240     size_t errbuf_size;
6241 {
6242   const char *msg;
6243   size_t msg_size;
6244 
6245   if (errcode < 0
6246       || errcode >= (int) (sizeof (re_error_msgid)
6247 			   / sizeof (re_error_msgid[0])))
6248     /* Only error codes returned by the rest of the code should be passed
6249        to this routine.  If we are given anything else, or if other regex
6250        code generates an invalid error code, then the program has a bug.
6251        Dump core so we can fix it.  */
6252     abort ();
6253 
6254   msg = gettext (re_error_msgid[errcode]);
6255 
6256   msg_size = strlen (msg) + 1; /* Includes the null.  */
6257 
6258   if (errbuf_size != 0)
6259     {
6260       if (msg_size > errbuf_size)
6261         {
6262           strncpy (errbuf, msg, errbuf_size - 1);
6263           errbuf[errbuf_size - 1] = 0;
6264         }
6265       else
6266         strcpy (errbuf, msg);
6267     }
6268 
6269   return msg_size;
6270 }
6271 
6272 
6273 /* Free dynamically allocated space used by PREG.  */
6274 
6275 #ifdef __APPLE__
6276 __private_extern__
6277 #endif
6278 void
regfree(preg)6279 regfree (preg)
6280     regex_t *preg;
6281 {
6282   if (preg->buffer != NULL)
6283     free (preg->buffer);
6284   preg->buffer = NULL;
6285 
6286   preg->allocated = 0;
6287   preg->used = 0;
6288 
6289   if (preg->fastmap != NULL)
6290     free (preg->fastmap);
6291   preg->fastmap = NULL;
6292   preg->fastmap_accurate = 0;
6293 
6294   if (preg->translate != NULL)
6295     free (preg->translate);
6296   preg->translate = NULL;
6297 }
6298 
6299 #endif /* not emacs  */
6300