xref: /openbsd/gnu/usr.bin/gcc/gcc/fixinc/gnu-regex.c (revision c87b03e5)
1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P1003.2/D11.2, except for some of the
4    internationalization features.)
5    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998
6    Free Software Foundation, Inc.
7 
8    NOTE: The canonical source of this file is maintained with the
9    GNU C Library.  Bugs can be reported to bug-glibc@prep.ai.mit.edu.
10 
11    This program is free software; you can redistribute it and/or modify it
12    under the terms of the GNU General Public License as published by the
13    Free Software Foundation; either version 2, or (at your option) any
14    later version.
15 
16    This program is distributed in the hope that it will be useful,
17    but WITHOUT ANY WARRANTY; without even the implied warranty of
18    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19    GNU General Public License for more details.
20 
21    You should have received a copy of the GNU General Public License
22    along with this program; if not, write to the Free Software Foundation,
23    Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA.  */
24 
25 #ifdef HAVE_CONFIG_H
26 # include <config.h>
27 #endif
28 
29 /* GCC LOCAL: we don't need NLS here.  */
30 #undef ENABLE_NLS
31 /* GCC LOCAL: to handle defining alloca.  */
32 #include "libiberty.h"
33 
34 /* Do not use a C alloca, we will leak memory and crash.  */
35 #ifdef C_ALLOCA
36 # define REGEX_MALLOC
37 #endif
38 
39 /* AIX requires this to be the first thing in the file. */
40 #if defined _AIX && !defined REGEX_MALLOC
41   #pragma alloca
42 #endif
43 
44 #ifndef PARAMS
45 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
46 #  define PARAMS(args) args
47 # else
48 #  define PARAMS(args) ()
49 # endif  /* GCC.  */
50 #endif  /* Not PARAMS.  */
51 
52 #if defined STDC_HEADERS && !defined emacs
53 # include <stddef.h>
54 #else
55 /* We need this for `gnu-regex.h', and perhaps for the Emacs include files.  */
56 # include <sys/types.h>
57 #endif
58 
59 /* For platform which support the ISO C amendement 1 functionality we
60    support user defined character classes.  */
61 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
62  /* Solaris 2.5 has a bug: <wchar.h> must be included before <wctype.h>.  */
63 # include <wchar.h>
64 # include <wctype.h>
65 #endif
66 
67 /* This is for other GNU distributions with internationalized messages.  */
68 /* GCC LOCAL: ../intl will handle this for us */
69 #ifdef ENABLE_NLS
70 # include <libintl.h>
71 #else
72 # define gettext(msgid) (msgid)
73 #endif
74 
75 #ifndef gettext_noop
76 /* This define is so xgettext can find the internationalizable
77    strings.  */
78 # define gettext_noop(String) String
79 #endif
80 
81 # if !defined(volatile) && !defined(HAVE_VOLATILE)
82 #  define volatile
83 # endif
84 
85 /* If we are not linking with Emacs proper,
86    we can't use the relocating allocator
87    even if config.h says that we can.  */
88 # undef REL_ALLOC
89 
90 # if defined STDC_HEADERS || defined _LIBC
91 #  include <stdlib.h>
92 # else
93 char *malloc ();
94 char *realloc ();
95 # endif
96 
97 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
98    If nothing else has been done, use the method below.  */
99 # ifdef INHIBIT_STRING_HEADER
100 #  if !(defined HAVE_BZERO && defined HAVE_BCOPY)
101 #   if !defined bzero && !defined bcopy
102 #    undef INHIBIT_STRING_HEADER
103 #   endif
104 #  endif
105 # endif
106 
107 /* This is the normal way of making sure we have a bcopy and a bzero.
108    This is used in most programs--a few other programs avoid this
109    by defining INHIBIT_STRING_HEADER.  */
110 # ifndef INHIBIT_STRING_HEADER
111 #  if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
112 #   include <string.h>
113 #   ifndef bzero
114 #    ifndef _LIBC
115 #     define bzero(s, n)	(memset (s, '\0', n), (s))
116 #    else
117 #     define bzero(s, n)	__bzero (s, n)
118 #    endif
119 #   endif
120 #  else
121 #   include <strings.h>
122 #   ifndef memcmp
123 #    define memcmp(s1, s2, n)	bcmp (s1, s2, n)
124 #   endif
125 #   ifndef memcpy
126 #    define memcpy(d, s, n)	(bcopy (s, d, n), (d))
127 #   endif
128 #  endif
129 # endif
130 
131 /* Define the syntax stuff for \<, \>, etc.  */
132 
133 /* This must be nonzero for the wordchar and notwordchar pattern
134    commands in re_match_2.  */
135 # ifndef Sword
136 #  define Sword 1
137 # endif
138 
139 # ifdef SWITCH_ENUM_BUG
140 #  define SWITCH_ENUM_CAST(x) ((int)(x))
141 # else
142 #  define SWITCH_ENUM_CAST(x) (x)
143 # endif
144 
145 /* How many characters in the character set.  */
146 # define CHAR_SET_SIZE 256
147 
148 # ifdef SYNTAX_TABLE
149 
150 extern char *re_syntax_table;
151 
152 # else /* not SYNTAX_TABLE */
153 
154 static char re_syntax_table[CHAR_SET_SIZE];
155 
156 static void init_syntax_once PARAMS ((void));
157 
158 static void
init_syntax_once()159 init_syntax_once ()
160 {
161    register int c;
162    static int done = 0;
163 
164    if (done)
165      return;
166 
167    bzero (re_syntax_table, sizeof re_syntax_table);
168 
169    for (c = 'a'; c <= 'z'; c++)
170      re_syntax_table[c] = Sword;
171 
172    for (c = 'A'; c <= 'Z'; c++)
173      re_syntax_table[c] = Sword;
174 
175    for (c = '0'; c <= '9'; c++)
176      re_syntax_table[c] = Sword;
177 
178    re_syntax_table['_'] = Sword;
179 
180    done = 1;
181 }
182 
183 # endif /* not SYNTAX_TABLE */
184 
185 # define SYNTAX(c) re_syntax_table[c]
186 
187 /* Get the interface, including the syntax bits.  */
188 /* GCC LOCAL: call it gnu-regex.h, not regex.h, to avoid name conflicts */
189 #include "gnu-regex.h"
190 
191 /* ISALPHA etc. are used for the character classes.  */
192 /* GCC LOCAL: use libiberty's safe-ctype.h, don't bother defining
193    wrapper macros ourselves.  */
194 #include <safe-ctype.h>
195 
196 #ifndef NULL
197 # define NULL (void *)0
198 #endif
199 
200 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
201    since ours (we hope) works properly with all combinations of
202    machines, compilers, `char' and `unsigned char' argument types.
203    (Per Bothner suggested the basic approach.)  */
204 #undef SIGN_EXTEND_CHAR
205 #if __STDC__
206 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
207 #else  /* not __STDC__ */
208 /* As in Harbison and Steele.  */
209 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
210 #endif
211 
212 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
213    use `alloca' instead of `malloc'.  This is because using malloc in
214    re_search* or re_match* could cause memory leaks when C-g is used in
215    Emacs; also, malloc is slower and causes storage fragmentation.  On
216    the other hand, malloc is more portable, and easier to debug.
217 
218    Because we sometimes use alloca, some routines have to be macros,
219    not functions -- `alloca'-allocated space disappears at the end of the
220    function it is called in.  */
221 
222 #ifdef REGEX_MALLOC
223 
224 # define REGEX_ALLOCATE malloc
225 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
226 # define REGEX_FREE free
227 
228 #else /* not REGEX_MALLOC  */
229 
230 /* Emacs already defines alloca, sometimes.  */
231 # ifndef alloca
232 
233 /* Make alloca work the best possible way.  */
234 #  ifdef __GNUC__
235 #   define alloca __builtin_alloca
236 #  else /* not __GNUC__ */
237 #   if HAVE_ALLOCA_H
238 #    include <alloca.h>
239 #   endif /* HAVE_ALLOCA_H */
240 #  endif /* not __GNUC__ */
241 
242 # endif /* not alloca */
243 
244 # define REGEX_ALLOCATE alloca
245 
246 /* Assumes a `char *destination' variable.  */
247 # define REGEX_REALLOCATE(source, osize, nsize)				\
248   (destination = (char *) alloca (nsize),				\
249    memcpy (destination, source, osize))
250 
251 /* No need to do anything to free, after alloca.  */
252 # define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
253 
254 #endif /* not REGEX_MALLOC */
255 
256 /* Define how to allocate the failure stack.  */
257 
258 #if defined REL_ALLOC && defined REGEX_MALLOC
259 
260 # define REGEX_ALLOCATE_STACK(size)				\
261   r_alloc (&failure_stack_ptr, (size))
262 # define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
263   r_re_alloc (&failure_stack_ptr, (nsize))
264 # define REGEX_FREE_STACK(ptr)					\
265   r_alloc_free (&failure_stack_ptr)
266 
267 #else /* not using relocating allocator */
268 
269 # ifdef REGEX_MALLOC
270 
271 #  define REGEX_ALLOCATE_STACK malloc
272 #  define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
273 #  define REGEX_FREE_STACK free
274 
275 # else /* not REGEX_MALLOC */
276 
277 #  define REGEX_ALLOCATE_STACK alloca
278 
279 #  define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
280    REGEX_REALLOCATE (source, osize, nsize)
281 /* No need to explicitly free anything.  */
282 #  define REGEX_FREE_STACK(arg)
283 
284 # endif /* not REGEX_MALLOC */
285 #endif /* not using relocating allocator */
286 
287 
288 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
289    `string1' or just past its end.  This works if PTR is NULL, which is
290    a good thing.  */
291 #define FIRST_STRING_P(ptr) 					\
292   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
293 
294 /* (Re)Allocate N items of type T using malloc, or fail.  */
295 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
296 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
297 #define RETALLOC_IF(addr, n, t) \
298   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
299 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
300 
301 #define BYTEWIDTH 8 /* In bits.  */
302 
303 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
304 
305 #undef MAX
306 #undef MIN
307 #define MAX(a, b) ((a) > (b) ? (a) : (b))
308 #define MIN(a, b) ((a) < (b) ? (a) : (b))
309 
310 typedef char boolean;
311 #define false 0
312 #define true 1
313 
314 static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
315 					const char *string1, int size1,
316 					const char *string2, int size2,
317 					int pos,
318 					struct re_registers *regs,
319 					int stop));
320 
321 /* These are the command codes that appear in compiled regular
322    expressions.  Some opcodes are followed by argument bytes.  A
323    command code can specify any interpretation whatsoever for its
324    arguments.  Zero bytes may appear in the compiled regular expression.  */
325 
326 typedef enum
327 {
328   no_op = 0,
329 
330   /* Succeed right away--no more backtracking.  */
331   succeed,
332 
333         /* Followed by one byte giving n, then by n literal bytes.  */
334   exactn,
335 
336         /* Matches any (more or less) character.  */
337   anychar,
338 
339         /* Matches any one char belonging to specified set.  First
340            following byte is number of bitmap bytes.  Then come bytes
341            for a bitmap saying which chars are in.  Bits in each byte
342            are ordered low-bit-first.  A character is in the set if its
343            bit is 1.  A character too large to have a bit in the map is
344            automatically not in the set.  */
345   charset,
346 
347         /* Same parameters as charset, but match any character that is
348            not one of those specified.  */
349   charset_not,
350 
351         /* Start remembering the text that is matched, for storing in a
352            register.  Followed by one byte with the register number, in
353            the range 0 to one less than the pattern buffer's re_nsub
354            field.  Then followed by one byte with the number of groups
355            inner to this one.  (This last has to be part of the
356            start_memory only because we need it in the on_failure_jump
357            of re_match_2.)  */
358   start_memory,
359 
360         /* Stop remembering the text that is matched and store it in a
361            memory register.  Followed by one byte with the register
362            number, in the range 0 to one less than `re_nsub' in the
363            pattern buffer, and one byte with the number of inner groups,
364            just like `start_memory'.  (We need the number of inner
365            groups here because we don't have any easy way of finding the
366            corresponding start_memory when we're at a stop_memory.)  */
367   stop_memory,
368 
369         /* Match a duplicate of something remembered. Followed by one
370            byte containing the register number.  */
371   duplicate,
372 
373         /* Fail unless at beginning of line.  */
374   begline,
375 
376         /* Fail unless at end of line.  */
377   endline,
378 
379         /* Succeeds if at beginning of buffer (if emacs) or at beginning
380            of string to be matched (if not).  */
381   begbuf,
382 
383         /* Analogously, for end of buffer/string.  */
384   endbuf,
385 
386         /* Followed by two byte relative address to which to jump.  */
387   jump,
388 
389 	/* Same as jump, but marks the end of an alternative.  */
390   jump_past_alt,
391 
392         /* Followed by two-byte relative address of place to resume at
393            in case of failure.  */
394   on_failure_jump,
395 
396         /* Like on_failure_jump, but pushes a placeholder instead of the
397            current string position when executed.  */
398   on_failure_keep_string_jump,
399 
400         /* Throw away latest failure point and then jump to following
401            two-byte relative address.  */
402   pop_failure_jump,
403 
404         /* Change to pop_failure_jump if know won't have to backtrack to
405            match; otherwise change to jump.  This is used to jump
406            back to the beginning of a repeat.  If what follows this jump
407            clearly won't match what the repeat does, such that we can be
408            sure that there is no use backtracking out of repetitions
409            already matched, then we change it to a pop_failure_jump.
410            Followed by two-byte address.  */
411   maybe_pop_jump,
412 
413         /* Jump to following two-byte address, and push a dummy failure
414            point. This failure point will be thrown away if an attempt
415            is made to use it for a failure.  A `+' construct makes this
416            before the first repeat.  Also used as an intermediary kind
417            of jump when compiling an alternative.  */
418   dummy_failure_jump,
419 
420 	/* Push a dummy failure point and continue.  Used at the end of
421 	   alternatives.  */
422   push_dummy_failure,
423 
424         /* Followed by two-byte relative address and two-byte number n.
425            After matching N times, jump to the address upon failure.  */
426   succeed_n,
427 
428         /* Followed by two-byte relative address, and two-byte number n.
429            Jump to the address N times, then fail.  */
430   jump_n,
431 
432         /* Set the following two-byte relative address to the
433            subsequent two-byte number.  The address *includes* the two
434            bytes of number.  */
435   set_number_at,
436 
437   wordchar,	/* Matches any word-constituent character.  */
438   notwordchar,	/* Matches any char that is not a word-constituent.  */
439 
440   wordbeg,	/* Succeeds if at word beginning.  */
441   wordend,	/* Succeeds if at word end.  */
442 
443   wordbound,	/* Succeeds if at a word boundary.  */
444   notwordbound	/* Succeeds if not at a word boundary.  */
445 
446 #ifdef emacs
447   ,before_dot,	/* Succeeds if before point.  */
448   at_dot,	/* Succeeds if at point.  */
449   after_dot,	/* Succeeds if after point.  */
450 
451 	/* Matches any character whose syntax is specified.  Followed by
452            a byte which contains a syntax code, e.g., Sword.  */
453   syntaxspec,
454 
455 	/* Matches any character whose syntax is not that specified.  */
456   notsyntaxspec
457 #endif /* emacs */
458 } re_opcode_t;
459 
460 /* Common operations on the compiled pattern.  */
461 
462 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
463 
464 #define STORE_NUMBER(destination, number)				\
465   do {									\
466     (destination)[0] = (number) & 0377;					\
467     (destination)[1] = (number) >> 8;					\
468   } while (0)
469 
470 /* Same as STORE_NUMBER, except increment DESTINATION to
471    the byte after where the number is stored.  Therefore, DESTINATION
472    must be an lvalue.  */
473 
474 #define STORE_NUMBER_AND_INCR(destination, number)			\
475   do {									\
476     STORE_NUMBER (destination, number);					\
477     (destination) += 2;							\
478   } while (0)
479 
480 /* Put into DESTINATION a number stored in two contiguous bytes starting
481    at SOURCE.  */
482 
483 #define EXTRACT_NUMBER(destination, source)				\
484   do {									\
485     (destination) = *(source) & 0377;					\
486     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
487   } while (0)
488 
489 #ifdef DEBUG
490 static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
491 static void
extract_number(dest,source)492 extract_number (dest, source)
493     int *dest;
494     unsigned char *source;
495 {
496   int temp = SIGN_EXTEND_CHAR (*(source + 1));
497   *dest = *source & 0377;
498   *dest += temp << 8;
499 }
500 
501 # ifndef EXTRACT_MACROS /* To debug the macros.  */
502 #  undef EXTRACT_NUMBER
503 #  define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
504 # endif /* not EXTRACT_MACROS */
505 
506 #endif /* DEBUG */
507 
508 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
509    SOURCE must be an lvalue.  */
510 
511 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
512   do {									\
513     EXTRACT_NUMBER (destination, source);				\
514     (source) += 2; 							\
515   } while (0)
516 
517 #ifdef DEBUG
518 static void extract_number_and_incr _RE_ARGS ((int *destination,
519 					       unsigned char **source));
520 static void
extract_number_and_incr(destination,source)521 extract_number_and_incr (destination, source)
522     int *destination;
523     unsigned char **source;
524 {
525   extract_number (destination, *source);
526   *source += 2;
527 }
528 
529 # ifndef EXTRACT_MACROS
530 #  undef EXTRACT_NUMBER_AND_INCR
531 #  define EXTRACT_NUMBER_AND_INCR(dest, src) \
532   extract_number_and_incr (&dest, &src)
533 # endif /* not EXTRACT_MACROS */
534 
535 #endif /* DEBUG */
536 
537 /* If DEBUG is defined, Regex prints many voluminous messages about what
538    it is doing (if the variable `debug' is nonzero).  If linked with the
539    main program in `iregex.c', you can enter patterns and strings
540    interactively.  And if linked with the main program in `main.c' and
541    the other test files, you can run the already-written tests.  */
542 
543 #ifdef DEBUG
544 
545 /* We use standard I/O for debugging.  */
546 # include <stdio.h>
547 
548 /* It is useful to test things that ``must'' be true when debugging.  */
549 # include <assert.h>
550 
551 static int debug = 0;
552 
553 # define DEBUG_STATEMENT(e) e
554 # define DEBUG_PRINT1(x) if (debug) printf (x)
555 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
556 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
557 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
558 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
559   if (debug) print_partial_compiled_pattern (s, e)
560 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
561   if (debug) print_double_string (w, s1, sz1, s2, sz2)
562 
563 
564 /* Print the fastmap in human-readable form.  */
565 
566 void
print_fastmap(fastmap)567 print_fastmap (fastmap)
568     char *fastmap;
569 {
570   unsigned was_a_range = 0;
571   unsigned i = 0;
572 
573   while (i < (1 << BYTEWIDTH))
574     {
575       if (fastmap[i++])
576 	{
577 	  was_a_range = 0;
578           putchar (i - 1);
579           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
580             {
581               was_a_range = 1;
582               i++;
583             }
584 	  if (was_a_range)
585             {
586               printf ("-");
587               putchar (i - 1);
588             }
589         }
590     }
591   putchar ('\n');
592 }
593 
594 
595 /* Print a compiled pattern string in human-readable form, starting at
596    the START pointer into it and ending just before the pointer END.  */
597 
598 void
print_partial_compiled_pattern(start,end)599 print_partial_compiled_pattern (start, end)
600     unsigned char *start;
601     unsigned char *end;
602 {
603   int mcnt, mcnt2;
604   unsigned char *p1;
605   unsigned char *p = start;
606   unsigned char *pend = end;
607 
608   if (start == NULL)
609     {
610       printf ("(null)\n");
611       return;
612     }
613 
614   /* Loop over pattern commands.  */
615   while (p < pend)
616     {
617       printf ("%d:\t", p - start);
618 
619       switch ((re_opcode_t) *p++)
620 	{
621         case no_op:
622           printf ("/no_op");
623           break;
624 
625 	case exactn:
626 	  mcnt = *p++;
627           printf ("/exactn/%d", mcnt);
628           do
629 	    {
630               putchar ('/');
631 	      putchar (*p++);
632             }
633           while (--mcnt);
634           break;
635 
636 	case start_memory:
637           mcnt = *p++;
638           printf ("/start_memory/%d/%d", mcnt, *p++);
639           break;
640 
641 	case stop_memory:
642           mcnt = *p++;
643 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
644           break;
645 
646 	case duplicate:
647 	  printf ("/duplicate/%d", *p++);
648 	  break;
649 
650 	case anychar:
651 	  printf ("/anychar");
652 	  break;
653 
654 	case charset:
655         case charset_not:
656           {
657             register int c, last = -100;
658 	    register int in_range = 0;
659 
660 	    printf ("/charset [%s",
661 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
662 
663             assert (p + *p < pend);
664 
665             for (c = 0; c < 256; c++)
666 	      if (c / 8 < *p
667 		  && (p[1 + (c/8)] & (1 << (c % 8))))
668 		{
669 		  /* Are we starting a range?  */
670 		  if (last + 1 == c && ! in_range)
671 		    {
672 		      putchar ('-');
673 		      in_range = 1;
674 		    }
675 		  /* Have we broken a range?  */
676 		  else if (last + 1 != c && in_range)
677               {
678 		      putchar (last);
679 		      in_range = 0;
680 		    }
681 
682 		  if (! in_range)
683 		    putchar (c);
684 
685 		  last = c;
686               }
687 
688 	    if (in_range)
689 	      putchar (last);
690 
691 	    putchar (']');
692 
693 	    p += 1 + *p;
694 	  }
695 	  break;
696 
697 	case begline:
698 	  printf ("/begline");
699           break;
700 
701 	case endline:
702           printf ("/endline");
703           break;
704 
705 	case on_failure_jump:
706           extract_number_and_incr (&mcnt, &p);
707   	  printf ("/on_failure_jump to %d", p + mcnt - start);
708           break;
709 
710 	case on_failure_keep_string_jump:
711           extract_number_and_incr (&mcnt, &p);
712   	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
713           break;
714 
715 	case dummy_failure_jump:
716           extract_number_and_incr (&mcnt, &p);
717   	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
718           break;
719 
720 	case push_dummy_failure:
721           printf ("/push_dummy_failure");
722           break;
723 
724         case maybe_pop_jump:
725           extract_number_and_incr (&mcnt, &p);
726   	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
727 	  break;
728 
729         case pop_failure_jump:
730 	  extract_number_and_incr (&mcnt, &p);
731   	  printf ("/pop_failure_jump to %d", p + mcnt - start);
732 	  break;
733 
734         case jump_past_alt:
735 	  extract_number_and_incr (&mcnt, &p);
736   	  printf ("/jump_past_alt to %d", p + mcnt - start);
737 	  break;
738 
739         case jump:
740 	  extract_number_and_incr (&mcnt, &p);
741   	  printf ("/jump to %d", p + mcnt - start);
742 	  break;
743 
744         case succeed_n:
745           extract_number_and_incr (&mcnt, &p);
746 	  p1 = p + mcnt;
747           extract_number_and_incr (&mcnt2, &p);
748 	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
749           break;
750 
751         case jump_n:
752           extract_number_and_incr (&mcnt, &p);
753 	  p1 = p + mcnt;
754           extract_number_and_incr (&mcnt2, &p);
755 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
756           break;
757 
758         case set_number_at:
759           extract_number_and_incr (&mcnt, &p);
760 	  p1 = p + mcnt;
761           extract_number_and_incr (&mcnt2, &p);
762 	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
763           break;
764 
765         case wordbound:
766 	  printf ("/wordbound");
767 	  break;
768 
769 	case notwordbound:
770 	  printf ("/notwordbound");
771           break;
772 
773 	case wordbeg:
774 	  printf ("/wordbeg");
775 	  break;
776 
777 	case wordend:
778 	  printf ("/wordend");
779 
780 # ifdef emacs
781 	case before_dot:
782 	  printf ("/before_dot");
783           break;
784 
785 	case at_dot:
786 	  printf ("/at_dot");
787           break;
788 
789 	case after_dot:
790 	  printf ("/after_dot");
791           break;
792 
793 	case syntaxspec:
794           printf ("/syntaxspec");
795 	  mcnt = *p++;
796 	  printf ("/%d", mcnt);
797           break;
798 
799 	case notsyntaxspec:
800           printf ("/notsyntaxspec");
801 	  mcnt = *p++;
802 	  printf ("/%d", mcnt);
803 	  break;
804 # endif /* emacs */
805 
806 	case wordchar:
807 	  printf ("/wordchar");
808           break;
809 
810 	case notwordchar:
811 	  printf ("/notwordchar");
812           break;
813 
814 	case begbuf:
815 	  printf ("/begbuf");
816           break;
817 
818 	case endbuf:
819 	  printf ("/endbuf");
820           break;
821 
822         default:
823           printf ("?%d", *(p-1));
824 	}
825 
826       putchar ('\n');
827     }
828 
829   printf ("%d:\tend of pattern.\n", p - start);
830 }
831 
832 
833 void
print_compiled_pattern(bufp)834 print_compiled_pattern (bufp)
835     struct re_pattern_buffer *bufp;
836 {
837   unsigned char *buffer = bufp->buffer;
838 
839   print_partial_compiled_pattern (buffer, buffer + bufp->used);
840   printf ("%ld bytes used/%ld bytes allocated.\n",
841 	  bufp->used, bufp->allocated);
842 
843   if (bufp->fastmap_accurate && bufp->fastmap)
844     {
845       printf ("fastmap: ");
846       print_fastmap (bufp->fastmap);
847     }
848 
849   printf ("re_nsub: %d\t", bufp->re_nsub);
850   printf ("regs_alloc: %d\t", bufp->regs_allocated);
851   printf ("can_be_null: %d\t", bufp->can_be_null);
852   printf ("newline_anchor: %d\n", bufp->newline_anchor);
853   printf ("no_sub: %d\t", bufp->no_sub);
854   printf ("not_bol: %d\t", bufp->not_bol);
855   printf ("not_eol: %d\t", bufp->not_eol);
856   printf ("syntax: %lx\n", bufp->syntax);
857   /* Perhaps we should print the translate table?  */
858 }
859 
860 
861 void
print_double_string(where,string1,size1,string2,size2)862 print_double_string (where, string1, size1, string2, size2)
863     const char *where;
864     const char *string1;
865     const char *string2;
866     int size1;
867     int size2;
868 {
869   int this_char;
870 
871   if (where == NULL)
872     printf ("(null)");
873   else
874     {
875       if (FIRST_STRING_P (where))
876         {
877           for (this_char = where - string1; this_char < size1; this_char++)
878             putchar (string1[this_char]);
879 
880           where = string2;
881         }
882 
883       for (this_char = where - string2; this_char < size2; this_char++)
884         putchar (string2[this_char]);
885     }
886 }
887 
888 void
printchar(c)889 printchar (c)
890      int c;
891 {
892   putc (c, stderr);
893 }
894 
895 #else /* not DEBUG */
896 
897 # undef assert
898 # define assert(e)
899 
900 # define DEBUG_STATEMENT(e)
901 # define DEBUG_PRINT1(x)
902 # define DEBUG_PRINT2(x1, x2)
903 # define DEBUG_PRINT3(x1, x2, x3)
904 # define DEBUG_PRINT4(x1, x2, x3, x4)
905 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
906 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
907 
908 #endif /* not DEBUG */
909 
910 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
911    also be assigned to arbitrarily: each pattern buffer stores its own
912    syntax, so it can be changed between regex compilations.  */
913 /* This has no initializer because initialized variables in Emacs
914    become read-only after dumping.  */
915 reg_syntax_t re_syntax_options;
916 
917 
918 /* Specify the precise syntax of regexps for compilation.  This provides
919    for compatibility for various utilities which historically have
920    different, incompatible syntaxes.
921 
922    The argument SYNTAX is a bit mask comprised of the various bits
923    defined in gnu-regex.h.  We return the old syntax.  */
924 
925 reg_syntax_t
re_set_syntax(syntax)926 re_set_syntax (syntax)
927     reg_syntax_t syntax;
928 {
929   reg_syntax_t ret = re_syntax_options;
930 
931   re_syntax_options = syntax;
932 #ifdef DEBUG
933   if (syntax & RE_DEBUG)
934     debug = 1;
935   else if (debug) /* was on but now is not */
936     debug = 0;
937 #endif /* DEBUG */
938   return ret;
939 }
940 #ifdef _LIBC
941 weak_alias (__re_set_syntax, re_set_syntax)
942 #endif
943 
944 /* This table gives an error message for each of the error codes listed
945    in gnu-regex.h.  Obviously the order here has to be same as there.
946    POSIX doesn't require that we do anything for REG_NOERROR,
947    but why not be nice?  */
948 
949 static const char *const re_error_msgid[] =
950   {
951     gettext_noop ("Success"),	/* REG_NOERROR */
952     gettext_noop ("No match"),	/* REG_NOMATCH */
953     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
954     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
955     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
956     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
957     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
958     gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
959     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
960     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
961     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
962     gettext_noop ("Invalid range end"),	/* REG_ERANGE */
963     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
964     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
965     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
966     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
967     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
968   };
969 
970 /* Avoiding alloca during matching, to placate r_alloc.  */
971 
972 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
973    searching and matching functions should not call alloca.  On some
974    systems, alloca is implemented in terms of malloc, and if we're
975    using the relocating allocator routines, then malloc could cause a
976    relocation, which might (if the strings being searched are in the
977    ralloc heap) shift the data out from underneath the regexp
978    routines.
979 
980    Here's another reason to avoid allocation: Emacs
981    processes input from X in a signal handler; processing X input may
982    call malloc; if input arrives while a matching routine is calling
983    malloc, then we're scrod.  But Emacs can't just block input while
984    calling matching routines; then we don't notice interrupts when
985    they come in.  So, Emacs blocks input around all regexp calls
986    except the matching calls, which it leaves unprotected, in the
987    faith that they will not malloc.  */
988 
989 /* Normally, this is fine.  */
990 #define MATCH_MAY_ALLOCATE
991 
992 /* When using GNU C, we are not REALLY using the C alloca, no matter
993    what config.h may say.  So don't take precautions for it.  */
994 #ifdef __GNUC__
995 # undef C_ALLOCA
996 #endif
997 
998 /* The match routines may not allocate if (1) they would do it with malloc
999    and (2) it's not safe for them to use malloc.
1000    Note that if REL_ALLOC is defined, matching would not use malloc for the
1001    failure stack, but we would still use it for the register vectors;
1002    so REL_ALLOC should not affect this.  */
1003 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
1004 # undef MATCH_MAY_ALLOCATE
1005 #endif
1006 
1007 
1008 /* Failure stack declarations and macros; both re_compile_fastmap and
1009    re_match_2 use a failure stack.  These have to be macros because of
1010    REGEX_ALLOCATE_STACK.  */
1011 
1012 
1013 /* Number of failure points for which to initially allocate space
1014    when matching.  If this number is exceeded, we allocate more
1015    space, so it is not a hard limit.  */
1016 #ifndef INIT_FAILURE_ALLOC
1017 # define INIT_FAILURE_ALLOC 5
1018 #endif
1019 
1020 /* Roughly the maximum number of failure points on the stack.  Would be
1021    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1022    This is a variable only so users of regex can assign to it; we never
1023    change it ourselves.  */
1024 
1025 #ifdef INT_IS_16BIT
1026 
1027 # if defined MATCH_MAY_ALLOCATE
1028 /* 4400 was enough to cause a crash on Alpha OSF/1,
1029    whose default stack limit is 2mb.  */
1030 long int re_max_failures = 4000;
1031 # else
1032 long int re_max_failures = 2000;
1033 # endif
1034 
1035 union fail_stack_elt
1036 {
1037   unsigned char *pointer;
1038   long int integer;
1039 };
1040 
1041 typedef union fail_stack_elt fail_stack_elt_t;
1042 
1043 typedef struct
1044 {
1045   fail_stack_elt_t *stack;
1046   unsigned long int size;
1047   unsigned long int avail;		/* Offset of next open position.  */
1048 } fail_stack_type;
1049 
1050 #else /* not INT_IS_16BIT */
1051 
1052 # if defined MATCH_MAY_ALLOCATE
1053 /* 4400 was enough to cause a crash on Alpha OSF/1,
1054    whose default stack limit is 2mb.  */
1055 int re_max_failures = 20000;
1056 # else
1057 int re_max_failures = 2000;
1058 # endif
1059 
1060 union fail_stack_elt
1061 {
1062   unsigned char *pointer;
1063   int integer;
1064 };
1065 
1066 typedef union fail_stack_elt fail_stack_elt_t;
1067 
1068 typedef struct
1069 {
1070   fail_stack_elt_t *stack;
1071   unsigned size;
1072   unsigned avail;			/* Offset of next open position.  */
1073 } fail_stack_type;
1074 
1075 #endif /* INT_IS_16BIT */
1076 
1077 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1078 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1079 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1080 
1081 
1082 /* Define macros to initialize and free the failure stack.
1083    Do `return -2' if the alloc fails.  */
1084 
1085 #ifdef MATCH_MAY_ALLOCATE
1086 # define INIT_FAIL_STACK()						\
1087   do {									\
1088     fail_stack.stack = (fail_stack_elt_t *)				\
1089       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
1090 									\
1091     if (fail_stack.stack == NULL)					\
1092       return -2;							\
1093 									\
1094     fail_stack.size = INIT_FAILURE_ALLOC;				\
1095     fail_stack.avail = 0;						\
1096   } while (0)
1097 
1098 # define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1099 #else
1100 # define INIT_FAIL_STACK()						\
1101   do {									\
1102     fail_stack.avail = 0;						\
1103   } while (0)
1104 
1105 # define RESET_FAIL_STACK()
1106 #endif
1107 
1108 
1109 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1110 
1111    Return 1 if succeeds, and 0 if either ran out of memory
1112    allocating space for it or it was already too large.
1113 
1114    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1115 
1116 #define DOUBLE_FAIL_STACK(fail_stack)					\
1117   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1118    ? 0									\
1119    : ((fail_stack).stack = (fail_stack_elt_t *)				\
1120         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1121           (fail_stack).size * sizeof (fail_stack_elt_t),		\
1122           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1123 									\
1124       (fail_stack).stack == NULL					\
1125       ? 0								\
1126       : ((fail_stack).size <<= 1, 					\
1127          1)))
1128 
1129 
1130 /* Push pointer POINTER on FAIL_STACK.
1131    Return 1 if was able to do so and 0 if ran out of memory allocating
1132    space to do so.  */
1133 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1134   ((FAIL_STACK_FULL ()							\
1135     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1136    ? 0									\
1137    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1138       1))
1139 
1140 /* Push a pointer value onto the failure stack.
1141    Assumes the variable `fail_stack'.  Probably should only
1142    be called from within `PUSH_FAILURE_POINT'.  */
1143 #define PUSH_FAILURE_POINTER(item)					\
1144   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1145 
1146 /* This pushes an integer-valued item onto the failure stack.
1147    Assumes the variable `fail_stack'.  Probably should only
1148    be called from within `PUSH_FAILURE_POINT'.  */
1149 #define PUSH_FAILURE_INT(item)					\
1150   fail_stack.stack[fail_stack.avail++].integer = (item)
1151 
1152 /* Push a fail_stack_elt_t value onto the failure stack.
1153    Assumes the variable `fail_stack'.  Probably should only
1154    be called from within `PUSH_FAILURE_POINT'.  */
1155 #define PUSH_FAILURE_ELT(item)					\
1156   fail_stack.stack[fail_stack.avail++] =  (item)
1157 
1158 /* These three POP... operations complement the three PUSH... operations.
1159    All assume that `fail_stack' is nonempty.  */
1160 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1161 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1162 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1163 
1164 /* Used to omit pushing failure point id's when we're not debugging.  */
1165 #ifdef DEBUG
1166 # define DEBUG_PUSH PUSH_FAILURE_INT
1167 # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1168 #else
1169 # define DEBUG_PUSH(item)
1170 # define DEBUG_POP(item_addr)
1171 #endif
1172 
1173 
1174 /* Push the information about the state we will need
1175    if we ever fail back to it.
1176 
1177    Requires variables fail_stack, regstart, regend, reg_info, and
1178    num_regs_pushed be declared.  DOUBLE_FAIL_STACK requires `destination'
1179    be declared.
1180 
1181    Does `return FAILURE_CODE' if runs out of memory.  */
1182 
1183 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1184   do {									\
1185     char *destination;							\
1186     /* Must be int, so when we don't save any registers, the arithmetic	\
1187        of 0 + -1 isn't done as unsigned.  */				\
1188     /* Can't be int, since there is not a shred of a guarantee that int	\
1189        is wide enough to hold a value of something to which pointer can	\
1190        be assigned */							\
1191     active_reg_t this_reg;						\
1192     									\
1193     DEBUG_STATEMENT (failure_id++);					\
1194     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1195     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1196     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1197     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1198 									\
1199     DEBUG_PRINT2 ("  slots needed: %ld\n", NUM_FAILURE_ITEMS);		\
1200     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1201 									\
1202     /* Ensure we have enough space allocated for what we will push.  */	\
1203     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1204       {									\
1205         if (!DOUBLE_FAIL_STACK (fail_stack))				\
1206           return failure_code;						\
1207 									\
1208         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1209 		       (fail_stack).size);				\
1210         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1211       }									\
1212 									\
1213     /* Push the info, starting with the registers.  */			\
1214     DEBUG_PRINT1 ("\n");						\
1215 									\
1216     if (1)								\
1217       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1218 	   this_reg++)							\
1219 	{								\
1220 	  DEBUG_PRINT2 ("  Pushing reg: %lu\n", this_reg);		\
1221 	  DEBUG_STATEMENT (num_regs_pushed++);				\
1222 									\
1223 	  DEBUG_PRINT2 ("    start: %p\n", regstart[this_reg]);		\
1224 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1225 									\
1226 	  DEBUG_PRINT2 ("    end: %p\n", regend[this_reg]);		\
1227 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1228 									\
1229 	  DEBUG_PRINT2 ("    info: %p\n      ",				\
1230 			reg_info[this_reg].word.pointer);		\
1231 	  DEBUG_PRINT2 (" match_null=%d",				\
1232 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1233 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1234 	  DEBUG_PRINT2 (" matched_something=%d",			\
1235 			MATCHED_SOMETHING (reg_info[this_reg]));	\
1236 	  DEBUG_PRINT2 (" ever_matched=%d",				\
1237 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1238 	  DEBUG_PRINT1 ("\n");						\
1239 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1240 	}								\
1241 									\
1242     DEBUG_PRINT2 ("  Pushing  low active reg: %ld\n", lowest_active_reg);\
1243     PUSH_FAILURE_INT (lowest_active_reg);				\
1244 									\
1245     DEBUG_PRINT2 ("  Pushing high active reg: %ld\n", highest_active_reg);\
1246     PUSH_FAILURE_INT (highest_active_reg);				\
1247 									\
1248     DEBUG_PRINT2 ("  Pushing pattern %p:\n", pattern_place);		\
1249     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1250     PUSH_FAILURE_POINTER (pattern_place);				\
1251 									\
1252     DEBUG_PRINT2 ("  Pushing string %p: `", string_place);		\
1253     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1254 				 size2);				\
1255     DEBUG_PRINT1 ("'\n");						\
1256     PUSH_FAILURE_POINTER (string_place);				\
1257 									\
1258     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1259     DEBUG_PUSH (failure_id);						\
1260   } while (0)
1261 
1262 /* This is the number of items that are pushed and popped on the stack
1263    for each register.  */
1264 #define NUM_REG_ITEMS  3
1265 
1266 /* Individual items aside from the registers.  */
1267 #ifdef DEBUG
1268 # define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1269 #else
1270 # define NUM_NONREG_ITEMS 4
1271 #endif
1272 
1273 /* We push at most this many items on the stack.  */
1274 /* We used to use (num_regs - 1), which is the number of registers
1275    this regexp will save; but that was changed to 5
1276    to avoid stack overflow for a regexp with lots of parens.  */
1277 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1278 
1279 /* We actually push this many items.  */
1280 #define NUM_FAILURE_ITEMS				\
1281   (((0							\
1282      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1283     * NUM_REG_ITEMS)					\
1284    + NUM_NONREG_ITEMS)
1285 
1286 /* How many items can still be added to the stack without overflowing it.  */
1287 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1288 
1289 
1290 /* Pops what PUSH_FAIL_STACK pushes.
1291 
1292    We restore into the parameters, all of which should be lvalues:
1293      STR -- the saved data position.
1294      PAT -- the saved pattern position.
1295      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1296      REGSTART, REGEND -- arrays of string positions.
1297      REG_INFO -- array of information about each subexpression.
1298 
1299    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1300    `pend', `string1', `size1', `string2', and `size2'.  */
1301 
1302 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1303 {									\
1304   DEBUG_STATEMENT (unsigned failure_id;)				\
1305   active_reg_t this_reg;						\
1306   const unsigned char *string_temp;					\
1307 									\
1308   assert (!FAIL_STACK_EMPTY ());					\
1309 									\
1310   /* Remove failure points and point to how many regs pushed.  */	\
1311   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1312   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1313   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1314 									\
1315   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1316 									\
1317   DEBUG_POP (&failure_id);						\
1318   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1319 									\
1320   /* If the saved string location is NULL, it came from an		\
1321      on_failure_keep_string_jump opcode, and we want to throw away the	\
1322      saved NULL, thus retaining our current position in the string.  */	\
1323   string_temp = POP_FAILURE_POINTER ();					\
1324   if (string_temp != NULL)						\
1325     str = (const char *) string_temp;					\
1326 									\
1327   DEBUG_PRINT2 ("  Popping string %p: `", str);				\
1328   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1329   DEBUG_PRINT1 ("'\n");							\
1330 									\
1331   pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1332   DEBUG_PRINT2 ("  Popping pattern %p:\n", pat);			\
1333   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1334 									\
1335   /* Restore register info.  */						\
1336   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1337   DEBUG_PRINT2 ("  Popping high active reg: %ld\n", high_reg);		\
1338 									\
1339   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1340   DEBUG_PRINT2 ("  Popping  low active reg: %ld\n", low_reg);		\
1341 									\
1342   if (1)								\
1343     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1344       {									\
1345 	DEBUG_PRINT2 ("    Popping reg: %ld\n", this_reg);		\
1346 									\
1347 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1348 	DEBUG_PRINT2 ("      info: %p\n",				\
1349 		      reg_info[this_reg].word.pointer);			\
1350 									\
1351 	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1352 	DEBUG_PRINT2 ("      end: %p\n", regend[this_reg]);		\
1353 									\
1354 	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1355 	DEBUG_PRINT2 ("      start: %p\n", regstart[this_reg]);		\
1356       }									\
1357   else									\
1358     {									\
1359       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1360 	{								\
1361 	  reg_info[this_reg].word.integer = 0;				\
1362 	  regend[this_reg] = 0;						\
1363 	  regstart[this_reg] = 0;					\
1364 	}								\
1365       highest_active_reg = high_reg;					\
1366     }									\
1367 									\
1368   set_regs_matched_done = 0;						\
1369   DEBUG_STATEMENT (nfailure_points_popped++);				\
1370 } /* POP_FAILURE_POINT */
1371 
1372 
1373 
1374 /* Structure for per-register (a.k.a. per-group) information.
1375    Other register information, such as the
1376    starting and ending positions (which are addresses), and the list of
1377    inner groups (which is a bits list) are maintained in separate
1378    variables.
1379 
1380    We are making a (strictly speaking) nonportable assumption here: that
1381    the compiler will pack our bit fields into something that fits into
1382    the type of `word', i.e., is something that fits into one item on the
1383    failure stack.  */
1384 
1385 
1386 /* Declarations and macros for re_match_2.  */
1387 
1388 typedef union
1389 {
1390   fail_stack_elt_t word;
1391   struct
1392   {
1393       /* This field is one if this group can match the empty string,
1394          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1395 #define MATCH_NULL_UNSET_VALUE 3
1396     unsigned match_null_string_p : 2;
1397     unsigned is_active : 1;
1398     unsigned matched_something : 1;
1399     unsigned ever_matched_something : 1;
1400   } bits;
1401 } register_info_type;
1402 
1403 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1404 #define IS_ACTIVE(R)  ((R).bits.is_active)
1405 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1406 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1407 
1408 
1409 /* Call this when have matched a real character; it sets `matched' flags
1410    for the subexpressions which we are currently inside.  Also records
1411    that those subexprs have matched.  */
1412 #define SET_REGS_MATCHED()						\
1413   do									\
1414     {									\
1415       if (!set_regs_matched_done)					\
1416 	{								\
1417 	  active_reg_t r;						\
1418 	  set_regs_matched_done = 1;					\
1419 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1420 	    {								\
1421 	      MATCHED_SOMETHING (reg_info[r])				\
1422 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1423 		= 1;							\
1424 	    }								\
1425 	}								\
1426     }									\
1427   while (0)
1428 
1429 /* Registers are set to a sentinel when they haven't yet matched.  */
1430 static char reg_unset_dummy;
1431 #define REG_UNSET_VALUE (&reg_unset_dummy)
1432 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1433 
1434 /* Subroutine declarations and macros for regex_compile.  */
1435 
1436 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1437 					      reg_syntax_t syntax,
1438 					      struct re_pattern_buffer *bufp));
1439 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1440 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1441 				 int arg1, int arg2));
1442 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1443 				  int arg, unsigned char *end));
1444 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1445 				  int arg1, int arg2, unsigned char *end));
1446 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1447 					   reg_syntax_t syntax));
1448 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1449 					   reg_syntax_t syntax));
1450 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1451 					      const char *pend,
1452 					      char *translate,
1453 					      reg_syntax_t syntax,
1454 					      unsigned char *b));
1455 
1456 /* Fetch the next character in the uncompiled pattern---translating it
1457    if necessary.  Also cast from a signed character in the constant
1458    string passed to us by the user to an unsigned char that we can use
1459    as an array index (in, e.g., `translate').  */
1460 #ifndef PATFETCH
1461 # define PATFETCH(c)							\
1462   do {if (p == pend) return REG_EEND;					\
1463     c = (unsigned char) *p++;						\
1464     if (translate) c = (unsigned char) translate[c];			\
1465   } while (0)
1466 #endif
1467 
1468 /* Fetch the next character in the uncompiled pattern, with no
1469    translation.  */
1470 #define PATFETCH_RAW(c)							\
1471   do {if (p == pend) return REG_EEND;					\
1472     c = (unsigned char) *p++; 						\
1473   } while (0)
1474 
1475 /* Go backwards one character in the pattern.  */
1476 #define PATUNFETCH p--
1477 
1478 
1479 /* If `translate' is non-null, return translate[D], else just D.  We
1480    cast the subscript to translate because some data is declared as
1481    `char *', to avoid warnings when a string constant is passed.  But
1482    when we use a character as a subscript we must make it unsigned.  */
1483 #ifndef TRANSLATE
1484 # define TRANSLATE(d) \
1485   (translate ? (char) translate[(unsigned char) (d)] : (d))
1486 #endif
1487 
1488 
1489 /* Macros for outputting the compiled pattern into `buffer'.  */
1490 
1491 /* If the buffer isn't allocated when it comes in, use this.  */
1492 #define INIT_BUF_SIZE  32
1493 
1494 /* Make sure we have at least N more bytes of space in buffer.  */
1495 #define GET_BUFFER_SPACE(n)						\
1496     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1497       EXTEND_BUFFER ()
1498 
1499 /* Make sure we have one more byte of buffer space and then add C to it.  */
1500 #define BUF_PUSH(c)							\
1501   do {									\
1502     GET_BUFFER_SPACE (1);						\
1503     *b++ = (unsigned char) (c);						\
1504   } while (0)
1505 
1506 
1507 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1508 #define BUF_PUSH_2(c1, c2)						\
1509   do {									\
1510     GET_BUFFER_SPACE (2);						\
1511     *b++ = (unsigned char) (c1);					\
1512     *b++ = (unsigned char) (c2);					\
1513   } while (0)
1514 
1515 
1516 /* As with BUF_PUSH_2, except for three bytes.  */
1517 #define BUF_PUSH_3(c1, c2, c3)						\
1518   do {									\
1519     GET_BUFFER_SPACE (3);						\
1520     *b++ = (unsigned char) (c1);					\
1521     *b++ = (unsigned char) (c2);					\
1522     *b++ = (unsigned char) (c3);					\
1523   } while (0)
1524 
1525 
1526 /* Store a jump with opcode OP at LOC to location TO.  We store a
1527    relative address offset by the three bytes the jump itself occupies.  */
1528 #define STORE_JUMP(op, loc, to) \
1529   store_op1 (op, loc, (int) ((to) - (loc) - 3))
1530 
1531 /* Likewise, for a two-argument jump.  */
1532 #define STORE_JUMP2(op, loc, to, arg) \
1533   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1534 
1535 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1536 #define INSERT_JUMP(op, loc, to) \
1537   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1538 
1539 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1540 #define INSERT_JUMP2(op, loc, to, arg) \
1541   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1542 
1543 
1544 /* This is not an arbitrary limit: the arguments which represent offsets
1545    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1546    be too small, many things would have to change.  */
1547 /* Any other compiler which, like MSC, has allocation limit below 2^16
1548    bytes will have to use approach similar to what was done below for
1549    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1550    reallocating to 0 bytes.  Such thing is not going to work too well.
1551    You have been warned!!  */
1552 #if defined _MSC_VER  && !defined WIN32
1553 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1554    The REALLOC define eliminates a flurry of conversion warnings,
1555    but is not required. */
1556 # define MAX_BUF_SIZE  65500L
1557 # define REALLOC(p,s) realloc ((p), (size_t) (s))
1558 #else
1559 # define MAX_BUF_SIZE (1L << 16)
1560 # define REALLOC(p,s) realloc ((p), (s))
1561 #endif
1562 
1563 /* Extend the buffer by twice its current size via realloc and
1564    reset the pointers that pointed into the old block to point to the
1565    correct places in the new one.  If extending the buffer results in it
1566    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1567 #define EXTEND_BUFFER()							\
1568   do { 									\
1569     unsigned char *old_buffer = bufp->buffer;				\
1570     if (bufp->allocated == MAX_BUF_SIZE) 				\
1571       return REG_ESIZE;							\
1572     bufp->allocated <<= 1;						\
1573     if (bufp->allocated > MAX_BUF_SIZE)					\
1574       bufp->allocated = MAX_BUF_SIZE; 					\
1575     bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1576     if (bufp->buffer == NULL)						\
1577       return REG_ESPACE;						\
1578     /* If the buffer moved, move all the pointers into it.  */		\
1579     if (old_buffer != bufp->buffer)					\
1580       {									\
1581         b = (b - old_buffer) + bufp->buffer;				\
1582         begalt = (begalt - old_buffer) + bufp->buffer;			\
1583         if (fixup_alt_jump)						\
1584           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1585         if (laststart)							\
1586           laststart = (laststart - old_buffer) + bufp->buffer;		\
1587         if (pending_exact)						\
1588           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1589       }									\
1590   } while (0)
1591 
1592 
1593 /* Since we have one byte reserved for the register number argument to
1594    {start,stop}_memory, the maximum number of groups we can report
1595    things about is what fits in that byte.  */
1596 #define MAX_REGNUM 255
1597 
1598 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1599    ignore the excess.  */
1600 typedef unsigned regnum_t;
1601 
1602 
1603 /* Macros for the compile stack.  */
1604 
1605 /* Since offsets can go either forwards or backwards, this type needs to
1606    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1607 /* int may be not enough when sizeof(int) == 2.  */
1608 typedef long pattern_offset_t;
1609 
1610 typedef struct
1611 {
1612   pattern_offset_t begalt_offset;
1613   pattern_offset_t fixup_alt_jump;
1614   pattern_offset_t inner_group_offset;
1615   pattern_offset_t laststart_offset;
1616   regnum_t regnum;
1617 } compile_stack_elt_t;
1618 
1619 
1620 typedef struct
1621 {
1622   compile_stack_elt_t *stack;
1623   unsigned size;
1624   unsigned avail;			/* Offset of next open position.  */
1625 } compile_stack_type;
1626 
1627 
1628 #define INIT_COMPILE_STACK_SIZE 32
1629 
1630 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1631 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1632 
1633 /* The next available element.  */
1634 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1635 
1636 
1637 /* Set the bit for character C in a list.  */
1638 #define SET_LIST_BIT(c)                               \
1639   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1640    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1641 
1642 
1643 /* Get the next unsigned number in the uncompiled pattern.  */
1644 #define GET_UNSIGNED_NUMBER(num) 					\
1645   { if (p != pend)							\
1646      {									\
1647        PATFETCH (c); 							\
1648        while (ISDIGIT (c)) 						\
1649          { 								\
1650            if (num < 0)							\
1651               num = 0;							\
1652            num = num * 10 + c - '0'; 					\
1653            if (p == pend) 						\
1654               break; 							\
1655            PATFETCH (c);						\
1656          } 								\
1657        } 								\
1658     }
1659 
1660 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1661 /* The GNU C library provides support for user-defined character classes
1662    and the functions from ISO C amendement 1.  */
1663 # ifdef CHARCLASS_NAME_MAX
1664 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1665 # else
1666 /* This shouldn't happen but some implementation might still have this
1667    problem.  Use a reasonable default value.  */
1668 #  define CHAR_CLASS_MAX_LENGTH 256
1669 # endif
1670 
1671 # ifdef _LIBC
1672 #  define IS_CHAR_CLASS(string) __wctype (string)
1673 # else
1674 #  define IS_CHAR_CLASS(string) wctype (string)
1675 # endif
1676 #else
1677 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1678 
1679 # define IS_CHAR_CLASS(string)						\
1680    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1681     || STREQ (string, "lower") || STREQ (string, "digit")		\
1682     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1683     || STREQ (string, "space") || STREQ (string, "print")		\
1684     || STREQ (string, "punct") || STREQ (string, "graph")		\
1685     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1686 #endif
1687 
1688 #ifndef MATCH_MAY_ALLOCATE
1689 
1690 /* If we cannot allocate large objects within re_match_2_internal,
1691    we make the fail stack and register vectors global.
1692    The fail stack, we grow to the maximum size when a regexp
1693    is compiled.
1694    The register vectors, we adjust in size each time we
1695    compile a regexp, according to the number of registers it needs.  */
1696 
1697 static fail_stack_type fail_stack;
1698 
1699 /* Size with which the following vectors are currently allocated.
1700    That is so we can make them bigger as needed,
1701    but never make them smaller.  */
1702 static int regs_allocated_size;
1703 
1704 static const char **     regstart, **     regend;
1705 static const char ** old_regstart, ** old_regend;
1706 static const char **best_regstart, **best_regend;
1707 static register_info_type *reg_info;
1708 static const char **reg_dummy;
1709 static register_info_type *reg_info_dummy;
1710 
1711 /* Make the register vectors big enough for NUM_REGS registers,
1712    but don't make them smaller.  */
1713 
1714 static
regex_grow_registers(num_regs)1715 regex_grow_registers (num_regs)
1716      int num_regs;
1717 {
1718   if (num_regs > regs_allocated_size)
1719     {
1720       RETALLOC_IF (regstart,	 num_regs, const char *);
1721       RETALLOC_IF (regend,	 num_regs, const char *);
1722       RETALLOC_IF (old_regstart, num_regs, const char *);
1723       RETALLOC_IF (old_regend,	 num_regs, const char *);
1724       RETALLOC_IF (best_regstart, num_regs, const char *);
1725       RETALLOC_IF (best_regend,	 num_regs, const char *);
1726       RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1727       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1728       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1729 
1730       regs_allocated_size = num_regs;
1731     }
1732 }
1733 
1734 #endif /* not MATCH_MAY_ALLOCATE */
1735 
1736 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1737 						 compile_stack,
1738 						 regnum_t regnum));
1739 
1740 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1741    Returns one of error codes defined in `gnu-regex.h', or zero for success.
1742 
1743    Assumes the `allocated' (and perhaps `buffer') and `translate'
1744    fields are set in BUFP on entry.
1745 
1746    If it succeeds, results are put in BUFP (if it returns an error, the
1747    contents of BUFP are undefined):
1748      `buffer' is the compiled pattern;
1749      `syntax' is set to SYNTAX;
1750      `used' is set to the length of the compiled pattern;
1751      `fastmap_accurate' is zero;
1752      `re_nsub' is the number of subexpressions in PATTERN;
1753      `not_bol' and `not_eol' are zero;
1754 
1755    The `fastmap' and `newline_anchor' fields are neither
1756    examined nor set.  */
1757 
1758 /* Return, freeing storage we allocated.  */
1759 #define FREE_STACK_RETURN(value)		\
1760   return (free (compile_stack.stack), value)
1761 
1762 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1763 regex_compile (pattern, size, syntax, bufp)
1764      const char *pattern;
1765      size_t size;
1766      reg_syntax_t syntax;
1767      struct re_pattern_buffer *bufp;
1768 {
1769   /* We fetch characters from PATTERN here.  Even though PATTERN is
1770      `char *' (i.e., signed), we declare these variables as unsigned, so
1771      they can be reliably used as array indices.  */
1772   register unsigned char c, c1;
1773 
1774   /* A random temporary spot in PATTERN.  */
1775   const char *p1;
1776 
1777   /* Points to the end of the buffer, where we should append.  */
1778   register unsigned char *b;
1779 
1780   /* Keeps track of unclosed groups.  */
1781   compile_stack_type compile_stack;
1782 
1783   /* Points to the current (ending) position in the pattern.  */
1784   const char *p = pattern;
1785   const char *pend = pattern + size;
1786 
1787   /* How to translate the characters in the pattern.  */
1788   RE_TRANSLATE_TYPE translate = bufp->translate;
1789 
1790   /* Address of the count-byte of the most recently inserted `exactn'
1791      command.  This makes it possible to tell if a new exact-match
1792      character can be added to that command or if the character requires
1793      a new `exactn' command.  */
1794   unsigned char *pending_exact = 0;
1795 
1796   /* Address of start of the most recently finished expression.
1797      This tells, e.g., postfix * where to find the start of its
1798      operand.  Reset at the beginning of groups and alternatives.  */
1799   unsigned char *laststart = 0;
1800 
1801   /* Address of beginning of regexp, or inside of last group.  */
1802   unsigned char *begalt;
1803 
1804   /* Place in the uncompiled pattern (i.e., the {) to
1805      which to go back if the interval is invalid.  */
1806   const char *beg_interval;
1807 
1808   /* Address of the place where a forward jump should go to the end of
1809      the containing expression.  Each alternative of an `or' -- except the
1810      last -- ends with a forward jump of this sort.  */
1811   unsigned char *fixup_alt_jump = 0;
1812 
1813   /* Counts open-groups as they are encountered.  Remembered for the
1814      matching close-group on the compile stack, so the same register
1815      number is put in the stop_memory as the start_memory.  */
1816   regnum_t regnum = 0;
1817 
1818 #ifdef DEBUG
1819   DEBUG_PRINT1 ("\nCompiling pattern: ");
1820   if (debug)
1821     {
1822       unsigned debug_count;
1823 
1824       for (debug_count = 0; debug_count < size; debug_count++)
1825         putchar (pattern[debug_count]);
1826       putchar ('\n');
1827     }
1828 #endif /* DEBUG */
1829 
1830   /* Initialize the compile stack.  */
1831   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1832   if (compile_stack.stack == NULL)
1833     return REG_ESPACE;
1834 
1835   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1836   compile_stack.avail = 0;
1837 
1838   /* Initialize the pattern buffer.  */
1839   bufp->syntax = syntax;
1840   bufp->fastmap_accurate = 0;
1841   bufp->not_bol = bufp->not_eol = 0;
1842 
1843   /* Set `used' to zero, so that if we return an error, the pattern
1844      printer (for debugging) will think there's no pattern.  We reset it
1845      at the end.  */
1846   bufp->used = 0;
1847 
1848   /* Always count groups, whether or not bufp->no_sub is set.  */
1849   bufp->re_nsub = 0;
1850 
1851 #if !defined emacs && !defined SYNTAX_TABLE
1852   /* Initialize the syntax table.  */
1853    init_syntax_once ();
1854 #endif
1855 
1856   if (bufp->allocated == 0)
1857     {
1858       if (bufp->buffer)
1859 	{ /* If zero allocated, but buffer is non-null, try to realloc
1860              enough space.  This loses if buffer's address is bogus, but
1861              that is the user's responsibility.  */
1862           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1863         }
1864       else
1865         { /* Caller did not allocate a buffer.  Do it for them.  */
1866           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1867         }
1868       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1869 
1870       bufp->allocated = INIT_BUF_SIZE;
1871     }
1872 
1873   begalt = b = bufp->buffer;
1874 
1875   /* Loop through the uncompiled pattern until we're at the end.  */
1876   while (p != pend)
1877     {
1878       PATFETCH (c);
1879 
1880       switch (c)
1881         {
1882         case '^':
1883           {
1884             if (   /* If at start of pattern, it's an operator.  */
1885                    p == pattern + 1
1886                    /* If context independent, it's an operator.  */
1887                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1888                    /* Otherwise, depends on what's come before.  */
1889                 || at_begline_loc_p (pattern, p, syntax))
1890               BUF_PUSH (begline);
1891             else
1892               goto normal_char;
1893           }
1894           break;
1895 
1896 
1897         case '$':
1898           {
1899             if (   /* If at end of pattern, it's an operator.  */
1900                    p == pend
1901                    /* If context independent, it's an operator.  */
1902                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1903                    /* Otherwise, depends on what's next.  */
1904                 || at_endline_loc_p (p, pend, syntax))
1905                BUF_PUSH (endline);
1906              else
1907                goto normal_char;
1908            }
1909            break;
1910 
1911 
1912 	case '+':
1913         case '?':
1914           if ((syntax & RE_BK_PLUS_QM)
1915               || (syntax & RE_LIMITED_OPS))
1916             goto normal_char;
1917         handle_plus:
1918         case '*':
1919           /* If there is no previous pattern... */
1920           if (!laststart)
1921             {
1922               if (syntax & RE_CONTEXT_INVALID_OPS)
1923                 FREE_STACK_RETURN (REG_BADRPT);
1924               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1925                 goto normal_char;
1926             }
1927 
1928           {
1929             /* Are we optimizing this jump?  */
1930             boolean keep_string_p = false;
1931 
1932             /* 1 means zero (many) matches is allowed.  */
1933             char zero_times_ok = 0, many_times_ok = 0;
1934 
1935             /* If there is a sequence of repetition chars, collapse it
1936                down to just one (the right one).  We can't combine
1937                interval operators with these because of, e.g., `a{2}*',
1938                which should only match an even number of `a's.  */
1939 
1940             for (;;)
1941               {
1942                 zero_times_ok |= c != '+';
1943                 many_times_ok |= c != '?';
1944 
1945                 if (p == pend)
1946                   break;
1947 
1948                 PATFETCH (c);
1949 
1950                 if (c == '*'
1951                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1952                   ;
1953 
1954                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1955                   {
1956                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1957 
1958                     PATFETCH (c1);
1959                     if (!(c1 == '+' || c1 == '?'))
1960                       {
1961                         PATUNFETCH;
1962                         PATUNFETCH;
1963                         break;
1964                       }
1965 
1966                     c = c1;
1967                   }
1968                 else
1969                   {
1970                     PATUNFETCH;
1971                     break;
1972                   }
1973 
1974                 /* If we get here, we found another repeat character.  */
1975                }
1976 
1977             /* Star, etc. applied to an empty pattern is equivalent
1978                to an empty pattern.  */
1979             if (!laststart)
1980               break;
1981 
1982             /* Now we know whether or not zero matches is allowed
1983                and also whether or not two or more matches is allowed.  */
1984             if (many_times_ok)
1985               { /* More than one repetition is allowed, so put in at the
1986                    end a backward relative jump from `b' to before the next
1987                    jump we're going to put in below (which jumps from
1988                    laststart to after this jump).
1989 
1990                    But if we are at the `*' in the exact sequence `.*\n',
1991                    insert an unconditional jump backwards to the .,
1992                    instead of the beginning of the loop.  This way we only
1993                    push a failure point once, instead of every time
1994                    through the loop.  */
1995                 assert (p - 1 > pattern);
1996 
1997                 /* Allocate the space for the jump.  */
1998                 GET_BUFFER_SPACE (3);
1999 
2000                 /* We know we are not at the first character of the pattern,
2001                    because laststart was nonzero.  And we've already
2002                    incremented `p', by the way, to be the character after
2003                    the `*'.  Do we have to do something analogous here
2004                    for null bytes, because of RE_DOT_NOT_NULL?  */
2005                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2006 		    && zero_times_ok
2007                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2008                     && !(syntax & RE_DOT_NEWLINE))
2009                   { /* We have .*\n.  */
2010                     STORE_JUMP (jump, b, laststart);
2011                     keep_string_p = true;
2012                   }
2013                 else
2014                   /* Anything else.  */
2015                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2016 
2017                 /* We've added more stuff to the buffer.  */
2018                 b += 3;
2019               }
2020 
2021             /* On failure, jump from laststart to b + 3, which will be the
2022                end of the buffer after this jump is inserted.  */
2023             GET_BUFFER_SPACE (3);
2024             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2025                                        : on_failure_jump,
2026                          laststart, b + 3);
2027             pending_exact = 0;
2028             b += 3;
2029 
2030             if (!zero_times_ok)
2031               {
2032                 /* At least one repetition is required, so insert a
2033                    `dummy_failure_jump' before the initial
2034                    `on_failure_jump' instruction of the loop. This
2035                    effects a skip over that instruction the first time
2036                    we hit that loop.  */
2037                 GET_BUFFER_SPACE (3);
2038                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2039                 b += 3;
2040               }
2041             }
2042 	  break;
2043 
2044 
2045 	case '.':
2046           laststart = b;
2047           BUF_PUSH (anychar);
2048           break;
2049 
2050 
2051         case '[':
2052           {
2053             boolean had_char_class = false;
2054 
2055             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2056 
2057             /* Ensure that we have enough space to push a charset: the
2058                opcode, the length count, and the bitset; 34 bytes in all.  */
2059 	    GET_BUFFER_SPACE (34);
2060 
2061             laststart = b;
2062 
2063             /* We test `*p == '^' twice, instead of using an if
2064                statement, so we only need one BUF_PUSH.  */
2065             BUF_PUSH (*p == '^' ? charset_not : charset);
2066             if (*p == '^')
2067               p++;
2068 
2069             /* Remember the first position in the bracket expression.  */
2070             p1 = p;
2071 
2072             /* Push the number of bytes in the bitmap.  */
2073             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2074 
2075             /* Clear the whole map.  */
2076             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2077 
2078             /* charset_not matches newline according to a syntax bit.  */
2079             if ((re_opcode_t) b[-2] == charset_not
2080                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2081               SET_LIST_BIT ('\n');
2082 
2083             /* Read in characters and ranges, setting map bits.  */
2084             for (;;)
2085               {
2086                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2087 
2088                 PATFETCH (c);
2089 
2090                 /* \ might escape characters inside [...] and [^...].  */
2091                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2092                   {
2093                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2094 
2095                     PATFETCH (c1);
2096                     SET_LIST_BIT (c1);
2097                     continue;
2098                   }
2099 
2100                 /* Could be the end of the bracket expression.  If it's
2101                    not (i.e., when the bracket expression is `[]' so
2102                    far), the ']' character bit gets set way below.  */
2103                 if (c == ']' && p != p1 + 1)
2104                   break;
2105 
2106                 /* Look ahead to see if it's a range when the last thing
2107                    was a character class.  */
2108                 if (had_char_class && c == '-' && *p != ']')
2109                   FREE_STACK_RETURN (REG_ERANGE);
2110 
2111                 /* Look ahead to see if it's a range when the last thing
2112                    was a character: if this is a hyphen not at the
2113                    beginning or the end of a list, then it's the range
2114                    operator.  */
2115                 if (c == '-'
2116                     && !(p - 2 >= pattern && p[-2] == '[')
2117                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2118                     && *p != ']')
2119                   {
2120                     reg_errcode_t ret
2121                       = compile_range (&p, pend, translate, syntax, b);
2122                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2123                   }
2124 
2125                 else if (p[0] == '-' && p[1] != ']')
2126                   { /* This handles ranges made up of characters only.  */
2127                     reg_errcode_t ret;
2128 
2129 		    /* Move past the `-'.  */
2130                     PATFETCH (c1);
2131 
2132                     ret = compile_range (&p, pend, translate, syntax, b);
2133                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2134                   }
2135 
2136                 /* See if we're at the beginning of a possible character
2137                    class.  */
2138 
2139                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2140                   { /* Leave room for the null.  */
2141                     char str[CHAR_CLASS_MAX_LENGTH + 1];
2142 
2143                     PATFETCH (c);
2144                     c1 = 0;
2145 
2146                     /* If pattern is `[[:'.  */
2147                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2148 
2149                     for (;;)
2150                       {
2151                         PATFETCH (c);
2152                         if ((c == ':' && *p == ']') || p == pend
2153                             || c1 == CHAR_CLASS_MAX_LENGTH)
2154                           break;
2155                         str[c1++] = c;
2156                       }
2157                     str[c1] = '\0';
2158 
2159                     /* If isn't a word bracketed by `[:' and `:]':
2160                        undo the ending character, the letters, and leave
2161                        the leading `:' and `[' (but set bits for them).  */
2162                     if (c == ':' && *p == ']')
2163                       {
2164 /* GCC LOCAL: Skip this code if we don't have btowc().  btowc() is */
2165 /* defined in the 1994 Amendment 1 to ISO C and may not be present on */
2166 /* systems where we have wchar.h and wctype.h.   */
2167 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_BTOWC)
2168                         boolean is_lower = STREQ (str, "lower");
2169                         boolean is_upper = STREQ (str, "upper");
2170 			wctype_t wt;
2171                         int ch;
2172 
2173 			wt = IS_CHAR_CLASS (str);
2174 			if (wt == 0)
2175 			  FREE_STACK_RETURN (REG_ECTYPE);
2176 
2177                         /* Throw away the ] at the end of the character
2178                            class.  */
2179                         PATFETCH (c);
2180 
2181                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2182 
2183                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2184 			  {
2185 # ifdef _LIBC
2186 			    if (__iswctype (__btowc (ch), wt))
2187 			      SET_LIST_BIT (ch);
2188 #else
2189 			    if (iswctype (btowc (ch), wt))
2190 			      SET_LIST_BIT (ch);
2191 #endif
2192 
2193 			    if (translate && (is_upper || is_lower)
2194 				&& (ISUPPER (ch) || ISLOWER (ch)))
2195 			      SET_LIST_BIT (ch);
2196 			  }
2197 
2198                         had_char_class = true;
2199 #else
2200                         int ch;
2201                         boolean is_alnum = STREQ (str, "alnum");
2202                         boolean is_alpha = STREQ (str, "alpha");
2203                         boolean is_blank = STREQ (str, "blank");
2204                         boolean is_cntrl = STREQ (str, "cntrl");
2205                         boolean is_digit = STREQ (str, "digit");
2206                         boolean is_graph = STREQ (str, "graph");
2207                         boolean is_lower = STREQ (str, "lower");
2208                         boolean is_print = STREQ (str, "print");
2209                         boolean is_punct = STREQ (str, "punct");
2210                         boolean is_space = STREQ (str, "space");
2211                         boolean is_upper = STREQ (str, "upper");
2212                         boolean is_xdigit = STREQ (str, "xdigit");
2213 
2214                         if (!IS_CHAR_CLASS (str))
2215 			  FREE_STACK_RETURN (REG_ECTYPE);
2216 
2217                         /* Throw away the ] at the end of the character
2218                            class.  */
2219                         PATFETCH (c);
2220 
2221                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2222 
2223                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2224                           {
2225 			    /* This was split into 3 if's to
2226 			       avoid an arbitrary limit in some compiler.  */
2227                             if (   (is_alnum  && ISALNUM (ch))
2228                                 || (is_alpha  && ISALPHA (ch))
2229                                 || (is_blank  && ISBLANK (ch))
2230                                 || (is_cntrl  && ISCNTRL (ch)))
2231 			      SET_LIST_BIT (ch);
2232 			    if (   (is_digit  && ISDIGIT (ch))
2233                                 || (is_graph  && ISGRAPH (ch))
2234                                 || (is_lower  && ISLOWER (ch))
2235                                 || (is_print  && ISPRINT (ch)))
2236 			      SET_LIST_BIT (ch);
2237 			    if (   (is_punct  && ISPUNCT (ch))
2238                                 || (is_space  && ISSPACE (ch))
2239                                 || (is_upper  && ISUPPER (ch))
2240                                 || (is_xdigit && ISXDIGIT (ch)))
2241 			      SET_LIST_BIT (ch);
2242 			    if (   translate && (is_upper || is_lower)
2243 				&& (ISUPPER (ch) || ISLOWER (ch)))
2244 			      SET_LIST_BIT (ch);
2245                           }
2246                         had_char_class = true;
2247 #endif	/* libc || wctype.h */
2248                       }
2249                     else
2250                       {
2251                         c1++;
2252                         while (c1--)
2253                           PATUNFETCH;
2254                         SET_LIST_BIT ('[');
2255                         SET_LIST_BIT (':');
2256                         had_char_class = false;
2257                       }
2258                   }
2259                 else
2260                   {
2261                     had_char_class = false;
2262                     SET_LIST_BIT (c);
2263                   }
2264               }
2265 
2266             /* Discard any (non)matching list bytes that are all 0 at the
2267                end of the map.  Decrease the map-length byte too.  */
2268             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2269               b[-1]--;
2270             b += b[-1];
2271           }
2272           break;
2273 
2274 
2275 	case '(':
2276           if (syntax & RE_NO_BK_PARENS)
2277             goto handle_open;
2278           else
2279             goto normal_char;
2280 
2281 
2282         case ')':
2283           if (syntax & RE_NO_BK_PARENS)
2284             goto handle_close;
2285           else
2286             goto normal_char;
2287 
2288 
2289         case '\n':
2290           if (syntax & RE_NEWLINE_ALT)
2291             goto handle_alt;
2292           else
2293             goto normal_char;
2294 
2295 
2296 	case '|':
2297           if (syntax & RE_NO_BK_VBAR)
2298             goto handle_alt;
2299           else
2300             goto normal_char;
2301 
2302 
2303         case '{':
2304            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2305              goto handle_interval;
2306            else
2307              goto normal_char;
2308 
2309 
2310         case '\\':
2311           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2312 
2313           /* Do not translate the character after the \, so that we can
2314              distinguish, e.g., \B from \b, even if we normally would
2315              translate, e.g., B to b.  */
2316           PATFETCH_RAW (c);
2317 
2318           switch (c)
2319             {
2320             case '(':
2321               if (syntax & RE_NO_BK_PARENS)
2322                 goto normal_backslash;
2323 
2324             handle_open:
2325               bufp->re_nsub++;
2326               regnum++;
2327 
2328               if (COMPILE_STACK_FULL)
2329                 {
2330                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
2331                             compile_stack_elt_t);
2332                   if (compile_stack.stack == NULL) return REG_ESPACE;
2333 
2334                   compile_stack.size <<= 1;
2335                 }
2336 
2337               /* These are the values to restore when we hit end of this
2338                  group.  They are all relative offsets, so that if the
2339                  whole pattern moves because of realloc, they will still
2340                  be valid.  */
2341               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2342               COMPILE_STACK_TOP.fixup_alt_jump
2343                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2344               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2345               COMPILE_STACK_TOP.regnum = regnum;
2346 
2347               /* We will eventually replace the 0 with the number of
2348                  groups inner to this one.  But do not push a
2349                  start_memory for groups beyond the last one we can
2350                  represent in the compiled pattern.  */
2351               if (regnum <= MAX_REGNUM)
2352                 {
2353                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2354                   BUF_PUSH_3 (start_memory, regnum, 0);
2355                 }
2356 
2357               compile_stack.avail++;
2358 
2359               fixup_alt_jump = 0;
2360               laststart = 0;
2361               begalt = b;
2362 	      /* If we've reached MAX_REGNUM groups, then this open
2363 		 won't actually generate any code, so we'll have to
2364 		 clear pending_exact explicitly.  */
2365 	      pending_exact = 0;
2366               break;
2367 
2368 
2369             case ')':
2370               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2371 
2372               if (COMPILE_STACK_EMPTY)
2373 		{
2374 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2375 		    goto normal_backslash;
2376 		  else
2377 		    FREE_STACK_RETURN (REG_ERPAREN);
2378 		}
2379 
2380             handle_close:
2381               if (fixup_alt_jump)
2382                 { /* Push a dummy failure point at the end of the
2383                      alternative for a possible future
2384                      `pop_failure_jump' to pop.  See comments at
2385                      `push_dummy_failure' in `re_match_2'.  */
2386                   BUF_PUSH (push_dummy_failure);
2387 
2388                   /* We allocated space for this jump when we assigned
2389                      to `fixup_alt_jump', in the `handle_alt' case below.  */
2390                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2391                 }
2392 
2393               /* See similar code for backslashed left paren above.  */
2394               if (COMPILE_STACK_EMPTY)
2395 		{
2396 		  if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2397 		    goto normal_char;
2398 		  else
2399 		    FREE_STACK_RETURN (REG_ERPAREN);
2400 		}
2401 
2402               /* Since we just checked for an empty stack above, this
2403                  ``can't happen''.  */
2404               assert (compile_stack.avail != 0);
2405               {
2406                 /* We don't just want to restore into `regnum', because
2407                    later groups should continue to be numbered higher,
2408                    as in `(ab)c(de)' -- the second group is #2.  */
2409                 regnum_t this_group_regnum;
2410 
2411                 compile_stack.avail--;
2412                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2413                 fixup_alt_jump
2414                   = COMPILE_STACK_TOP.fixup_alt_jump
2415                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2416                     : 0;
2417                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2418                 this_group_regnum = COMPILE_STACK_TOP.regnum;
2419 		/* If we've reached MAX_REGNUM groups, then this open
2420 		   won't actually generate any code, so we'll have to
2421 		   clear pending_exact explicitly.  */
2422 		pending_exact = 0;
2423 
2424                 /* We're at the end of the group, so now we know how many
2425                    groups were inside this one.  */
2426                 if (this_group_regnum <= MAX_REGNUM)
2427                   {
2428                     unsigned char *inner_group_loc
2429                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2430 
2431                     *inner_group_loc = regnum - this_group_regnum;
2432                     BUF_PUSH_3 (stop_memory, this_group_regnum,
2433                                 regnum - this_group_regnum);
2434                   }
2435               }
2436               break;
2437 
2438 
2439             case '|':					/* `\|'.  */
2440               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2441                 goto normal_backslash;
2442             handle_alt:
2443               if (syntax & RE_LIMITED_OPS)
2444                 goto normal_char;
2445 
2446               /* Insert before the previous alternative a jump which
2447                  jumps to this alternative if the former fails.  */
2448               GET_BUFFER_SPACE (3);
2449               INSERT_JUMP (on_failure_jump, begalt, b + 6);
2450               pending_exact = 0;
2451               b += 3;
2452 
2453               /* The alternative before this one has a jump after it
2454                  which gets executed if it gets matched.  Adjust that
2455                  jump so it will jump to this alternative's analogous
2456                  jump (put in below, which in turn will jump to the next
2457                  (if any) alternative's such jump, etc.).  The last such
2458                  jump jumps to the correct final destination.  A picture:
2459                           _____ _____
2460                           |   | |   |
2461                           |   v |   v
2462                          a | b   | c
2463 
2464                  If we are at `b', then fixup_alt_jump right now points to a
2465                  three-byte space after `a'.  We'll put in the jump, set
2466                  fixup_alt_jump to right after `b', and leave behind three
2467                  bytes which we'll fill in when we get to after `c'.  */
2468 
2469               if (fixup_alt_jump)
2470                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2471 
2472               /* Mark and leave space for a jump after this alternative,
2473                  to be filled in later either by next alternative or
2474                  when know we're at the end of a series of alternatives.  */
2475               fixup_alt_jump = b;
2476               GET_BUFFER_SPACE (3);
2477               b += 3;
2478 
2479               laststart = 0;
2480               begalt = b;
2481               break;
2482 
2483 
2484             case '{':
2485               /* If \{ is a literal.  */
2486               if (!(syntax & RE_INTERVALS)
2487                      /* If we're at `\{' and it's not the open-interval
2488                         operator.  */
2489                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2490                   || (p - 2 == pattern  &&  p == pend))
2491                 goto normal_backslash;
2492 
2493             handle_interval:
2494               {
2495                 /* If got here, then the syntax allows intervals.  */
2496 
2497                 /* At least (most) this many matches must be made.  */
2498                 int lower_bound = -1, upper_bound = -1;
2499 
2500                 beg_interval = p - 1;
2501 
2502                 if (p == pend)
2503                   {
2504                     if (syntax & RE_NO_BK_BRACES)
2505                       goto unfetch_interval;
2506                     else
2507                       FREE_STACK_RETURN (REG_EBRACE);
2508                   }
2509 
2510                 GET_UNSIGNED_NUMBER (lower_bound);
2511 
2512                 if (c == ',')
2513                   {
2514                     GET_UNSIGNED_NUMBER (upper_bound);
2515                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2516                   }
2517                 else
2518                   /* Interval such as `{1}' => match exactly once. */
2519                   upper_bound = lower_bound;
2520 
2521                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2522                     || lower_bound > upper_bound)
2523                   {
2524                     if (syntax & RE_NO_BK_BRACES)
2525                       goto unfetch_interval;
2526                     else
2527                       FREE_STACK_RETURN (REG_BADBR);
2528                   }
2529 
2530                 if (!(syntax & RE_NO_BK_BRACES))
2531                   {
2532                     if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2533 
2534                     PATFETCH (c);
2535                   }
2536 
2537                 if (c != '}')
2538                   {
2539                     if (syntax & RE_NO_BK_BRACES)
2540                       goto unfetch_interval;
2541                     else
2542                       FREE_STACK_RETURN (REG_BADBR);
2543                   }
2544 
2545                 /* We just parsed a valid interval.  */
2546 
2547                 /* If it's invalid to have no preceding re.  */
2548                 if (!laststart)
2549                   {
2550                     if (syntax & RE_CONTEXT_INVALID_OPS)
2551                       FREE_STACK_RETURN (REG_BADRPT);
2552                     else if (syntax & RE_CONTEXT_INDEP_OPS)
2553                       laststart = b;
2554                     else
2555                       goto unfetch_interval;
2556                   }
2557 
2558                 /* If the upper bound is zero, don't want to succeed at
2559                    all; jump from `laststart' to `b + 3', which will be
2560                    the end of the buffer after we insert the jump.  */
2561                  if (upper_bound == 0)
2562                    {
2563                      GET_BUFFER_SPACE (3);
2564                      INSERT_JUMP (jump, laststart, b + 3);
2565                      b += 3;
2566                    }
2567 
2568                  /* Otherwise, we have a nontrivial interval.  When
2569                     we're all done, the pattern will look like:
2570                       set_number_at <jump count> <upper bound>
2571                       set_number_at <succeed_n count> <lower bound>
2572                       succeed_n <after jump addr> <succeed_n count>
2573                       <body of loop>
2574                       jump_n <succeed_n addr> <jump count>
2575                     (The upper bound and `jump_n' are omitted if
2576                     `upper_bound' is 1, though.)  */
2577                  else
2578                    { /* If the upper bound is > 1, we need to insert
2579                         more at the end of the loop.  */
2580                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
2581 
2582                      GET_BUFFER_SPACE (nbytes);
2583 
2584                      /* Initialize lower bound of the `succeed_n', even
2585                         though it will be set during matching by its
2586                         attendant `set_number_at' (inserted next),
2587                         because `re_compile_fastmap' needs to know.
2588                         Jump to the `jump_n' we might insert below.  */
2589                      INSERT_JUMP2 (succeed_n, laststart,
2590                                    b + 5 + (upper_bound > 1) * 5,
2591                                    lower_bound);
2592                      b += 5;
2593 
2594                      /* Code to initialize the lower bound.  Insert
2595                         before the `succeed_n'.  The `5' is the last two
2596                         bytes of this `set_number_at', plus 3 bytes of
2597                         the following `succeed_n'.  */
2598                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2599                      b += 5;
2600 
2601                      if (upper_bound > 1)
2602                        { /* More than one repetition is allowed, so
2603                             append a backward jump to the `succeed_n'
2604                             that starts this interval.
2605 
2606                             When we've reached this during matching,
2607                             we'll have matched the interval once, so
2608                             jump back only `upper_bound - 1' times.  */
2609                          STORE_JUMP2 (jump_n, b, laststart + 5,
2610                                       upper_bound - 1);
2611                          b += 5;
2612 
2613                          /* The location we want to set is the second
2614                             parameter of the `jump_n'; that is `b-2' as
2615                             an absolute address.  `laststart' will be
2616                             the `set_number_at' we're about to insert;
2617                             `laststart+3' the number to set, the source
2618                             for the relative address.  But we are
2619                             inserting into the middle of the pattern --
2620                             so everything is getting moved up by 5.
2621                             Conclusion: (b - 2) - (laststart + 3) + 5,
2622                             i.e., b - laststart.
2623 
2624                             We insert this at the beginning of the loop
2625                             so that if we fail during matching, we'll
2626                             reinitialize the bounds.  */
2627                          insert_op2 (set_number_at, laststart, b - laststart,
2628                                      upper_bound - 1, b);
2629                          b += 5;
2630                        }
2631                    }
2632                 pending_exact = 0;
2633                 beg_interval = NULL;
2634               }
2635               break;
2636 
2637             unfetch_interval:
2638               /* If an invalid interval, match the characters as literals.  */
2639                assert (beg_interval);
2640                p = beg_interval;
2641                beg_interval = NULL;
2642 
2643                /* normal_char and normal_backslash need `c'.  */
2644                PATFETCH (c);
2645 
2646                if (!(syntax & RE_NO_BK_BRACES))
2647                  {
2648                    if (p > pattern  &&  p[-1] == '\\')
2649                      goto normal_backslash;
2650                  }
2651                goto normal_char;
2652 
2653 #ifdef emacs
2654             /* There is no way to specify the before_dot and after_dot
2655                operators.  rms says this is ok.  --karl  */
2656             case '=':
2657               BUF_PUSH (at_dot);
2658               break;
2659 
2660             case 's':
2661               laststart = b;
2662               PATFETCH (c);
2663               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2664               break;
2665 
2666             case 'S':
2667               laststart = b;
2668               PATFETCH (c);
2669               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2670               break;
2671 #endif /* emacs */
2672 
2673 
2674             case 'w':
2675 	      if (syntax & RE_NO_GNU_OPS)
2676 		goto normal_char;
2677               laststart = b;
2678               BUF_PUSH (wordchar);
2679               break;
2680 
2681 
2682             case 'W':
2683 	      if (syntax & RE_NO_GNU_OPS)
2684 		goto normal_char;
2685               laststart = b;
2686               BUF_PUSH (notwordchar);
2687               break;
2688 
2689 
2690             case '<':
2691 	      if (syntax & RE_NO_GNU_OPS)
2692 		goto normal_char;
2693               BUF_PUSH (wordbeg);
2694               break;
2695 
2696             case '>':
2697 	      if (syntax & RE_NO_GNU_OPS)
2698 		goto normal_char;
2699               BUF_PUSH (wordend);
2700               break;
2701 
2702             case 'b':
2703 	      if (syntax & RE_NO_GNU_OPS)
2704 		goto normal_char;
2705               BUF_PUSH (wordbound);
2706               break;
2707 
2708             case 'B':
2709 	      if (syntax & RE_NO_GNU_OPS)
2710 		goto normal_char;
2711               BUF_PUSH (notwordbound);
2712               break;
2713 
2714             case '`':
2715 	      if (syntax & RE_NO_GNU_OPS)
2716 		goto normal_char;
2717               BUF_PUSH (begbuf);
2718               break;
2719 
2720             case '\'':
2721 	      if (syntax & RE_NO_GNU_OPS)
2722 		goto normal_char;
2723               BUF_PUSH (endbuf);
2724               break;
2725 
2726             case '1': case '2': case '3': case '4': case '5':
2727             case '6': case '7': case '8': case '9':
2728               if (syntax & RE_NO_BK_REFS)
2729                 goto normal_char;
2730 
2731               c1 = c - '0';
2732 
2733               if (c1 > regnum)
2734                 FREE_STACK_RETURN (REG_ESUBREG);
2735 
2736               /* Can't back reference to a subexpression if inside of it.  */
2737               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2738                 goto normal_char;
2739 
2740               laststart = b;
2741               BUF_PUSH_2 (duplicate, c1);
2742               break;
2743 
2744 
2745             case '+':
2746             case '?':
2747               if (syntax & RE_BK_PLUS_QM)
2748                 goto handle_plus;
2749               else
2750                 goto normal_backslash;
2751 
2752             default:
2753             normal_backslash:
2754               /* You might think it would be useful for \ to mean
2755                  not to translate; but if we don't translate it
2756                  it will never match anything.  */
2757               c = TRANSLATE (c);
2758               goto normal_char;
2759             }
2760           break;
2761 
2762 
2763 	default:
2764         /* Expects the character in `c'.  */
2765 	normal_char:
2766 	      /* If no exactn currently being built.  */
2767           if (!pending_exact
2768 
2769               /* If last exactn not at current position.  */
2770               || pending_exact + *pending_exact + 1 != b
2771 
2772               /* We have only one byte following the exactn for the count.  */
2773 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2774 
2775               /* If followed by a repetition operator.  */
2776               || *p == '*' || *p == '^'
2777 	      || ((syntax & RE_BK_PLUS_QM)
2778 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2779 		  : (*p == '+' || *p == '?'))
2780 	      || ((syntax & RE_INTERVALS)
2781                   && ((syntax & RE_NO_BK_BRACES)
2782 		      ? *p == '{'
2783                       : (p[0] == '\\' && p[1] == '{'))))
2784 	    {
2785 	      /* Start building a new exactn.  */
2786 
2787               laststart = b;
2788 
2789 	      BUF_PUSH_2 (exactn, 0);
2790 	      pending_exact = b - 1;
2791             }
2792 
2793 	  BUF_PUSH (c);
2794           (*pending_exact)++;
2795 	  break;
2796         } /* switch (c) */
2797     } /* while p != pend */
2798 
2799 
2800   /* Through the pattern now.  */
2801 
2802   if (fixup_alt_jump)
2803     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2804 
2805   if (!COMPILE_STACK_EMPTY)
2806     FREE_STACK_RETURN (REG_EPAREN);
2807 
2808   /* If we don't want backtracking, force success
2809      the first time we reach the end of the compiled pattern.  */
2810   if (syntax & RE_NO_POSIX_BACKTRACKING)
2811     BUF_PUSH (succeed);
2812 
2813   free (compile_stack.stack);
2814 
2815   /* We have succeeded; set the length of the buffer.  */
2816   bufp->used = b - bufp->buffer;
2817 
2818 #ifdef DEBUG
2819   if (debug)
2820     {
2821       DEBUG_PRINT1 ("\nCompiled pattern: \n");
2822       print_compiled_pattern (bufp);
2823     }
2824 #endif /* DEBUG */
2825 
2826 #ifndef MATCH_MAY_ALLOCATE
2827   /* Initialize the failure stack to the largest possible stack.  This
2828      isn't necessary unless we're trying to avoid calling alloca in
2829      the search and match routines.  */
2830   {
2831     int num_regs = bufp->re_nsub + 1;
2832 
2833     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2834        is strictly greater than re_max_failures, the largest possible stack
2835        is 2 * re_max_failures failure points.  */
2836     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2837       {
2838 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2839 
2840 # ifdef emacs
2841 	if (! fail_stack.stack)
2842 	  fail_stack.stack
2843 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2844 					    * sizeof (fail_stack_elt_t));
2845 	else
2846 	  fail_stack.stack
2847 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2848 					     (fail_stack.size
2849 					      * sizeof (fail_stack_elt_t)));
2850 # else /* not emacs */
2851 	if (! fail_stack.stack)
2852 	  fail_stack.stack
2853 	    = (fail_stack_elt_t *) malloc (fail_stack.size
2854 					   * sizeof (fail_stack_elt_t));
2855 	else
2856 	  fail_stack.stack
2857 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2858 					    (fail_stack.size
2859 					     * sizeof (fail_stack_elt_t)));
2860 # endif /* not emacs */
2861       }
2862 
2863     regex_grow_registers (num_regs);
2864   }
2865 #endif /* not MATCH_MAY_ALLOCATE */
2866 
2867   return REG_NOERROR;
2868 } /* regex_compile */
2869 
2870 /* Subroutines for `regex_compile'.  */
2871 
2872 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2873 
2874 static void
store_op1(op,loc,arg)2875 store_op1 (op, loc, arg)
2876     re_opcode_t op;
2877     unsigned char *loc;
2878     int arg;
2879 {
2880   *loc = (unsigned char) op;
2881   STORE_NUMBER (loc + 1, arg);
2882 }
2883 
2884 
2885 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2886 
2887 static void
store_op2(op,loc,arg1,arg2)2888 store_op2 (op, loc, arg1, arg2)
2889     re_opcode_t op;
2890     unsigned char *loc;
2891     int arg1, arg2;
2892 {
2893   *loc = (unsigned char) op;
2894   STORE_NUMBER (loc + 1, arg1);
2895   STORE_NUMBER (loc + 3, arg2);
2896 }
2897 
2898 
2899 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2900    for OP followed by two-byte integer parameter ARG.  */
2901 
2902 static void
insert_op1(op,loc,arg,end)2903 insert_op1 (op, loc, arg, end)
2904     re_opcode_t op;
2905     unsigned char *loc;
2906     int arg;
2907     unsigned char *end;
2908 {
2909   register unsigned char *pfrom = end;
2910   register unsigned char *pto = end + 3;
2911 
2912   while (pfrom != loc)
2913     *--pto = *--pfrom;
2914 
2915   store_op1 (op, loc, arg);
2916 }
2917 
2918 
2919 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2920 
2921 static void
insert_op2(op,loc,arg1,arg2,end)2922 insert_op2 (op, loc, arg1, arg2, end)
2923     re_opcode_t op;
2924     unsigned char *loc;
2925     int arg1, arg2;
2926     unsigned char *end;
2927 {
2928   register unsigned char *pfrom = end;
2929   register unsigned char *pto = end + 5;
2930 
2931   while (pfrom != loc)
2932     *--pto = *--pfrom;
2933 
2934   store_op2 (op, loc, arg1, arg2);
2935 }
2936 
2937 
2938 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2939    after an alternative or a begin-subexpression.  We assume there is at
2940    least one character before the ^.  */
2941 
2942 static boolean
at_begline_loc_p(pattern,p,syntax)2943 at_begline_loc_p (pattern, p, syntax)
2944     const char *pattern, *p;
2945     reg_syntax_t syntax;
2946 {
2947   const char *prev = p - 2;
2948   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2949 
2950   return
2951        /* After a subexpression?  */
2952        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2953        /* After an alternative?  */
2954     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2955 }
2956 
2957 
2958 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2959    at least one character after the $, i.e., `P < PEND'.  */
2960 
2961 static boolean
at_endline_loc_p(p,pend,syntax)2962 at_endline_loc_p (p, pend, syntax)
2963     const char *p, *pend;
2964     reg_syntax_t syntax;
2965 {
2966   const char *next = p;
2967   boolean next_backslash = *next == '\\';
2968   const char *next_next = p + 1 < pend ? p + 1 : 0;
2969 
2970   return
2971        /* Before a subexpression?  */
2972        (syntax & RE_NO_BK_PARENS ? *next == ')'
2973         : next_backslash && next_next && *next_next == ')')
2974        /* Before an alternative?  */
2975     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2976         : next_backslash && next_next && *next_next == '|');
2977 }
2978 
2979 
2980 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2981    false if it's not.  */
2982 
2983 static boolean
group_in_compile_stack(compile_stack,regnum)2984 group_in_compile_stack (compile_stack, regnum)
2985     compile_stack_type compile_stack;
2986     regnum_t regnum;
2987 {
2988   int this_element;
2989 
2990   for (this_element = compile_stack.avail - 1;
2991        this_element >= 0;
2992        this_element--)
2993     if (compile_stack.stack[this_element].regnum == regnum)
2994       return true;
2995 
2996   return false;
2997 }
2998 
2999 
3000 /* Read the ending character of a range (in a bracket expression) from the
3001    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3002    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3003    Then we set the translation of all bits between the starting and
3004    ending characters (inclusive) in the compiled pattern B.
3005 
3006    Return an error code.
3007 
3008    We use these short variable names so we can use the same macros as
3009    `regex_compile' itself.  */
3010 
3011 static reg_errcode_t
compile_range(p_ptr,pend,translate,syntax,b)3012 compile_range (p_ptr, pend, translate, syntax, b)
3013     const char **p_ptr, *pend;
3014     RE_TRANSLATE_TYPE translate;
3015     reg_syntax_t syntax;
3016     unsigned char *b;
3017 {
3018   unsigned this_char;
3019 
3020   const char *p = *p_ptr;
3021   unsigned int range_start, range_end;
3022 
3023   if (p == pend)
3024     return REG_ERANGE;
3025 
3026   /* Even though the pattern is a signed `char *', we need to fetch
3027      with unsigned char *'s; if the high bit of the pattern character
3028      is set, the range endpoints will be negative if we fetch using a
3029      signed char *.
3030 
3031      We also want to fetch the endpoints without translating them; the
3032      appropriate translation is done in the bit-setting loop below.  */
3033   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3034   range_start = ((const unsigned char *) p)[-2];
3035   range_end   = ((const unsigned char *) p)[0];
3036 
3037   /* Have to increment the pointer into the pattern string, so the
3038      caller isn't still at the ending character.  */
3039   (*p_ptr)++;
3040 
3041   /* If the start is after the end, the range is empty.  */
3042   if (range_start > range_end)
3043     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3044 
3045   /* Here we see why `this_char' has to be larger than an `unsigned
3046      char' -- the range is inclusive, so if `range_end' == 0xff
3047      (assuming 8-bit characters), we would otherwise go into an infinite
3048      loop, since all characters <= 0xff.  */
3049   for (this_char = range_start; this_char <= range_end; this_char++)
3050     {
3051       SET_LIST_BIT (TRANSLATE (this_char));
3052     }
3053 
3054   return REG_NOERROR;
3055 }
3056 
3057 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3058    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3059    characters can start a string that matches the pattern.  This fastmap
3060    is used by re_search to skip quickly over impossible starting points.
3061 
3062    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3063    area as BUFP->fastmap.
3064 
3065    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3066    the pattern buffer.
3067 
3068    Returns 0 if we succeed, -2 if an internal error.   */
3069 
3070 int
re_compile_fastmap(bufp)3071 re_compile_fastmap (bufp)
3072      struct re_pattern_buffer *bufp;
3073 {
3074   int j, k;
3075 #ifdef MATCH_MAY_ALLOCATE
3076   fail_stack_type fail_stack;
3077 #endif
3078 #ifndef REGEX_MALLOC
3079   char *destination;
3080 #endif
3081 
3082   register char *fastmap = bufp->fastmap;
3083   unsigned char *pattern = bufp->buffer;
3084   unsigned char *p = pattern;
3085   register unsigned char *pend = pattern + bufp->used;
3086 
3087 #ifdef REL_ALLOC
3088   /* This holds the pointer to the failure stack, when
3089      it is allocated relocatably.  */
3090   fail_stack_elt_t *failure_stack_ptr;
3091 #endif
3092 
3093   /* Assume that each path through the pattern can be null until
3094      proven otherwise.  We set this false at the bottom of switch
3095      statement, to which we get only if a particular path doesn't
3096      match the empty string.  */
3097   boolean path_can_be_null = true;
3098 
3099   /* We aren't doing a `succeed_n' to begin with.  */
3100   boolean succeed_n_p = false;
3101 
3102   assert (fastmap != NULL && p != NULL);
3103 
3104   INIT_FAIL_STACK ();
3105   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3106   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3107   bufp->can_be_null = 0;
3108 
3109   while (1)
3110     {
3111       if (p == pend || *p == succeed)
3112 	{
3113 	  /* We have reached the (effective) end of pattern.  */
3114 	  if (!FAIL_STACK_EMPTY ())
3115 	    {
3116 	      bufp->can_be_null |= path_can_be_null;
3117 
3118 	      /* Reset for next path.  */
3119 	      path_can_be_null = true;
3120 
3121 	      p = fail_stack.stack[--fail_stack.avail].pointer;
3122 
3123 	      continue;
3124 	    }
3125 	  else
3126 	    break;
3127 	}
3128 
3129       /* We should never be about to go beyond the end of the pattern.  */
3130       assert (p < pend);
3131 
3132       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3133 	{
3134 
3135         /* I guess the idea here is to simply not bother with a fastmap
3136            if a backreference is used, since it's too hard to figure out
3137            the fastmap for the corresponding group.  Setting
3138            `can_be_null' stops `re_search_2' from using the fastmap, so
3139            that is all we do.  */
3140 	case duplicate:
3141 	  bufp->can_be_null = 1;
3142           goto done;
3143 
3144 
3145       /* Following are the cases which match a character.  These end
3146          with `break'.  */
3147 
3148 	case exactn:
3149           fastmap[p[1]] = 1;
3150 	  break;
3151 
3152 
3153         case charset:
3154           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3155 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3156               fastmap[j] = 1;
3157 	  break;
3158 
3159 
3160 	case charset_not:
3161 	  /* Chars beyond end of map must be allowed.  */
3162 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3163             fastmap[j] = 1;
3164 
3165 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3166 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3167               fastmap[j] = 1;
3168           break;
3169 
3170 
3171 	case wordchar:
3172 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3173 	    if (SYNTAX (j) == Sword)
3174 	      fastmap[j] = 1;
3175 	  break;
3176 
3177 
3178 	case notwordchar:
3179 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3180 	    if (SYNTAX (j) != Sword)
3181 	      fastmap[j] = 1;
3182 	  break;
3183 
3184 
3185         case anychar:
3186 	  {
3187 	    int fastmap_newline = fastmap['\n'];
3188 
3189 	    /* `.' matches anything ...  */
3190 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3191 	      fastmap[j] = 1;
3192 
3193 	    /* ... except perhaps newline.  */
3194 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3195 	      fastmap['\n'] = fastmap_newline;
3196 
3197 	    /* Return if we have already set `can_be_null'; if we have,
3198 	       then the fastmap is irrelevant.  Something's wrong here.  */
3199 	    else if (bufp->can_be_null)
3200 	      goto done;
3201 
3202 	    /* Otherwise, have to check alternative paths.  */
3203 	    break;
3204 	  }
3205 
3206 #ifdef emacs
3207         case syntaxspec:
3208 	  k = *p++;
3209 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3210 	    if (SYNTAX (j) == (enum syntaxcode) k)
3211 	      fastmap[j] = 1;
3212 	  break;
3213 
3214 
3215 	case notsyntaxspec:
3216 	  k = *p++;
3217 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3218 	    if (SYNTAX (j) != (enum syntaxcode) k)
3219 	      fastmap[j] = 1;
3220 	  break;
3221 
3222 
3223       /* All cases after this match the empty string.  These end with
3224          `continue'.  */
3225 
3226 
3227 	case before_dot:
3228 	case at_dot:
3229 	case after_dot:
3230           continue;
3231 #endif /* emacs */
3232 
3233 
3234         case no_op:
3235         case begline:
3236         case endline:
3237 	case begbuf:
3238 	case endbuf:
3239 	case wordbound:
3240 	case notwordbound:
3241 	case wordbeg:
3242 	case wordend:
3243         case push_dummy_failure:
3244           continue;
3245 
3246 
3247 	case jump_n:
3248         case pop_failure_jump:
3249 	case maybe_pop_jump:
3250 	case jump:
3251         case jump_past_alt:
3252 	case dummy_failure_jump:
3253           EXTRACT_NUMBER_AND_INCR (j, p);
3254 	  p += j;
3255 	  if (j > 0)
3256 	    continue;
3257 
3258           /* Jump backward implies we just went through the body of a
3259              loop and matched nothing.  Opcode jumped to should be
3260              `on_failure_jump' or `succeed_n'.  Just treat it like an
3261              ordinary jump.  For a * loop, it has pushed its failure
3262              point already; if so, discard that as redundant.  */
3263           if ((re_opcode_t) *p != on_failure_jump
3264 	      && (re_opcode_t) *p != succeed_n)
3265 	    continue;
3266 
3267           p++;
3268           EXTRACT_NUMBER_AND_INCR (j, p);
3269           p += j;
3270 
3271           /* If what's on the stack is where we are now, pop it.  */
3272           if (!FAIL_STACK_EMPTY ()
3273 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3274             fail_stack.avail--;
3275 
3276           continue;
3277 
3278 
3279         case on_failure_jump:
3280         case on_failure_keep_string_jump:
3281 	handle_on_failure_jump:
3282           EXTRACT_NUMBER_AND_INCR (j, p);
3283 
3284           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3285              end of the pattern.  We don't want to push such a point,
3286              since when we restore it above, entering the switch will
3287              increment `p' past the end of the pattern.  We don't need
3288              to push such a point since we obviously won't find any more
3289              fastmap entries beyond `pend'.  Such a pattern can match
3290              the null string, though.  */
3291           if (p + j < pend)
3292             {
3293               if (!PUSH_PATTERN_OP (p + j, fail_stack))
3294 		{
3295 		  RESET_FAIL_STACK ();
3296 		  return -2;
3297 		}
3298             }
3299           else
3300             bufp->can_be_null = 1;
3301 
3302           if (succeed_n_p)
3303             {
3304               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3305               succeed_n_p = false;
3306 	    }
3307 
3308           continue;
3309 
3310 
3311 	case succeed_n:
3312           /* Get to the number of times to succeed.  */
3313           p += 2;
3314 
3315           /* Increment p past the n for when k != 0.  */
3316           EXTRACT_NUMBER_AND_INCR (k, p);
3317           if (k == 0)
3318 	    {
3319               p -= 4;
3320   	      succeed_n_p = true;  /* Spaghetti code alert.  */
3321               goto handle_on_failure_jump;
3322             }
3323           continue;
3324 
3325 
3326 	case set_number_at:
3327           p += 4;
3328           continue;
3329 
3330 
3331 	case start_memory:
3332         case stop_memory:
3333 	  p += 2;
3334 	  continue;
3335 
3336 
3337 	default:
3338           abort (); /* We have listed all the cases.  */
3339         } /* switch *p++ */
3340 
3341       /* Getting here means we have found the possible starting
3342          characters for one path of the pattern -- and that the empty
3343          string does not match.  We need not follow this path further.
3344          Instead, look at the next alternative (remembered on the
3345          stack), or quit if no more.  The test at the top of the loop
3346          does these things.  */
3347       path_can_be_null = false;
3348       p = pend;
3349     } /* while p */
3350 
3351   /* Set `can_be_null' for the last path (also the first path, if the
3352      pattern is empty).  */
3353   bufp->can_be_null |= path_can_be_null;
3354 
3355  done:
3356   RESET_FAIL_STACK ();
3357   return 0;
3358 } /* re_compile_fastmap */
3359 #ifdef _LIBC
3360 weak_alias (__re_compile_fastmap, re_compile_fastmap)
3361 #endif
3362 
3363 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3364    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3365    this memory for recording register information.  STARTS and ENDS
3366    must be allocated using the malloc library routine, and must each
3367    be at least NUM_REGS * sizeof (regoff_t) bytes long.
3368 
3369    If NUM_REGS == 0, then subsequent matches should allocate their own
3370    register data.
3371 
3372    Unless this function is called, the first search or match using
3373    PATTERN_BUFFER will allocate its own register data, without
3374    freeing the old data.  */
3375 
3376 void
3377 re_set_registers (bufp, regs, num_regs, starts, ends)
3378     struct re_pattern_buffer *bufp;
3379     struct re_registers *regs;
3380     unsigned num_regs;
3381     regoff_t *starts, *ends;
3382 {
3383   if (num_regs)
3384     {
3385       bufp->regs_allocated = REGS_REALLOCATE;
3386       regs->num_regs = num_regs;
3387       regs->start = starts;
3388       regs->end = ends;
3389     }
3390   else
3391     {
3392       bufp->regs_allocated = REGS_UNALLOCATED;
3393       regs->num_regs = 0;
3394       regs->start = regs->end = (regoff_t *) 0;
3395     }
3396 }
3397 #ifdef _LIBC
3398 weak_alias (__re_set_registers, re_set_registers)
3399 #endif
3400 
3401 /* Searching routines.  */
3402 
3403 /* Like re_search_2, below, but only one string is specified, and
3404    doesn't let you say where to stop matching. */
3405 
3406 int
3407 re_search (bufp, string, size, startpos, range, regs)
3408      struct re_pattern_buffer *bufp;
3409      const char *string;
3410      int size, startpos, range;
3411      struct re_registers *regs;
3412 {
3413   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3414 		      regs, size);
3415 }
3416 #ifdef _LIBC
3417 weak_alias (__re_search, re_search)
3418 #endif
3419 
3420 
3421 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3422    virtual concatenation of STRING1 and STRING2, starting first at index
3423    STARTPOS, then at STARTPOS + 1, and so on.
3424 
3425    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3426 
3427    RANGE is how far to scan while trying to match.  RANGE = 0 means try
3428    only at STARTPOS; in general, the last start tried is STARTPOS +
3429    RANGE.
3430 
3431    In REGS, return the indices of the virtual concatenation of STRING1
3432    and STRING2 that matched the entire BUFP->buffer and its contained
3433    subexpressions.
3434 
3435    Do not consider matching one past the index STOP in the virtual
3436    concatenation of STRING1 and STRING2.
3437 
3438    We return either the position in the strings at which the match was
3439    found, -1 if no match, or -2 if error (such as failure
3440    stack overflow).  */
3441 
3442 int
3443 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3444      struct re_pattern_buffer *bufp;
3445      const char *string1, *string2;
3446      int size1, size2;
3447      int startpos;
3448      int range;
3449      struct re_registers *regs;
3450      int stop;
3451 {
3452   int val;
3453   register char *fastmap = bufp->fastmap;
3454   register RE_TRANSLATE_TYPE translate = bufp->translate;
3455   int total_size = size1 + size2;
3456   int endpos = startpos + range;
3457 
3458   /* Check for out-of-range STARTPOS.  */
3459   if (startpos < 0 || startpos > total_size)
3460     return -1;
3461 
3462   /* Fix up RANGE if it might eventually take us outside
3463      the virtual concatenation of STRING1 and STRING2.
3464      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3465   if (endpos < 0)
3466     range = 0 - startpos;
3467   else if (endpos > total_size)
3468     range = total_size - startpos;
3469 
3470   /* If the search isn't to be a backwards one, don't waste time in a
3471      search for a pattern that must be anchored.  */
3472   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3473     {
3474       if (startpos > 0)
3475 	return -1;
3476       else
3477 	range = 1;
3478     }
3479 
3480 #ifdef emacs
3481   /* In a forward search for something that starts with \=.
3482      don't keep searching past point.  */
3483   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3484     {
3485       range = PT - startpos;
3486       if (range <= 0)
3487 	return -1;
3488     }
3489 #endif /* emacs */
3490 
3491   /* Update the fastmap now if not correct already.  */
3492   if (fastmap && !bufp->fastmap_accurate)
3493     if (re_compile_fastmap (bufp) == -2)
3494       return -2;
3495 
3496   /* Loop through the string, looking for a place to start matching.  */
3497   for (;;)
3498     {
3499       /* If a fastmap is supplied, skip quickly over characters that
3500          cannot be the start of a match.  If the pattern can match the
3501          null string, however, we don't need to skip characters; we want
3502          the first null string.  */
3503       if (fastmap && startpos < total_size && !bufp->can_be_null)
3504 	{
3505 	  if (range > 0)	/* Searching forwards.  */
3506 	    {
3507 	      register const char *d;
3508 	      register int lim = 0;
3509 	      int irange = range;
3510 
3511               if (startpos < size1 && startpos + range >= size1)
3512                 lim = range - (size1 - startpos);
3513 
3514 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3515 
3516               /* Written out as an if-else to avoid testing `translate'
3517                  inside the loop.  */
3518 	      if (translate)
3519                 while (range > lim
3520                        && !fastmap[(unsigned char)
3521 				   translate[(unsigned char) *d++]])
3522                   range--;
3523 	      else
3524                 while (range > lim && !fastmap[(unsigned char) *d++])
3525                   range--;
3526 
3527 	      startpos += irange - range;
3528 	    }
3529 	  else				/* Searching backwards.  */
3530 	    {
3531 	      register char c = (size1 == 0 || startpos >= size1
3532                                  ? string2[startpos - size1]
3533                                  : string1[startpos]);
3534 
3535 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3536 		goto advance;
3537 	    }
3538 	}
3539 
3540       /* If can't match the null string, and that's all we have left, fail.  */
3541       if (range >= 0 && startpos == total_size && fastmap
3542           && !bufp->can_be_null)
3543 	return -1;
3544 
3545       val = re_match_2_internal (bufp, string1, size1, string2, size2,
3546 				 startpos, regs, stop);
3547 #ifndef REGEX_MALLOC
3548 # ifdef C_ALLOCA
3549       alloca (0);
3550 # endif
3551 #endif
3552 
3553       if (val >= 0)
3554 	return startpos;
3555 
3556       if (val == -2)
3557 	return -2;
3558 
3559     advance:
3560       if (!range)
3561         break;
3562       else if (range > 0)
3563         {
3564           range--;
3565           startpos++;
3566         }
3567       else
3568         {
3569           range++;
3570           startpos--;
3571         }
3572     }
3573   return -1;
3574 } /* re_search_2 */
3575 #ifdef _LIBC
3576 weak_alias (__re_search_2, re_search_2)
3577 #endif
3578 
3579 /* This converts PTR, a pointer into one of the search strings `string1'
3580    and `string2' into an offset from the beginning of that string.  */
3581 #define POINTER_TO_OFFSET(ptr)			\
3582   (FIRST_STRING_P (ptr)				\
3583    ? ((regoff_t) ((ptr) - string1))		\
3584    : ((regoff_t) ((ptr) - string2 + size1)))
3585 
3586 /* Macros for dealing with the split strings in re_match_2.  */
3587 
3588 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3589 
3590 /* Call before fetching a character with *d.  This switches over to
3591    string2 if necessary.  */
3592 #define PREFETCH()							\
3593   while (d == dend)						    	\
3594     {									\
3595       /* End of string2 => fail.  */					\
3596       if (dend == end_match_2) 						\
3597         goto fail;							\
3598       /* End of string1 => advance to string2.  */ 			\
3599       d = string2;						        \
3600       dend = end_match_2;						\
3601     }
3602 
3603 
3604 /* Test if at very beginning or at very end of the virtual concatenation
3605    of `string1' and `string2'.  If only one string, it's `string2'.  */
3606 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3607 #define AT_STRINGS_END(d) ((d) == end2)
3608 
3609 
3610 /* Test if D points to a character which is word-constituent.  We have
3611    two special cases to check for: if past the end of string1, look at
3612    the first character in string2; and if before the beginning of
3613    string2, look at the last character in string1.  */
3614 #define WORDCHAR_P(d)							\
3615   (SYNTAX ((d) == end1 ? *string2					\
3616            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3617    == Sword)
3618 
3619 /* Disabled due to a compiler bug -- see comment at case wordbound */
3620 #if 0
3621 /* Test if the character before D and the one at D differ with respect
3622    to being word-constituent.  */
3623 #define AT_WORD_BOUNDARY(d)						\
3624   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3625    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3626 #endif
3627 
3628 /* Free everything we malloc.  */
3629 #ifdef MATCH_MAY_ALLOCATE
3630 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3631 # define FREE_VARIABLES()						\
3632   do {									\
3633     REGEX_FREE_STACK (fail_stack.stack);				\
3634     FREE_VAR (regstart);						\
3635     FREE_VAR (regend);							\
3636     FREE_VAR (old_regstart);						\
3637     FREE_VAR (old_regend);						\
3638     FREE_VAR (best_regstart);						\
3639     FREE_VAR (best_regend);						\
3640     FREE_VAR (reg_info);						\
3641     FREE_VAR (reg_dummy);						\
3642     FREE_VAR (reg_info_dummy);						\
3643   } while (0)
3644 #else
3645 # define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning. */
3646 #endif /* not MATCH_MAY_ALLOCATE */
3647 
3648 /* These values must meet several constraints.  They must not be valid
3649    register values; since we have a limit of 255 registers (because
3650    we use only one byte in the pattern for the register number), we can
3651    use numbers larger than 255.  They must differ by 1, because of
3652    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3653    be larger than the value for the highest register, so we do not try
3654    to actually save any registers when none are active.  */
3655 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3656 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3657 
3658 /* Matching routines.  */
3659 
3660 #ifndef emacs   /* Emacs never uses this.  */
3661 /* re_match is like re_match_2 except it takes only a single string.  */
3662 
3663 int
3664 re_match (bufp, string, size, pos, regs)
3665      struct re_pattern_buffer *bufp;
3666      const char *string;
3667      int size, pos;
3668      struct re_registers *regs;
3669 {
3670   int result = re_match_2_internal (bufp, NULL, 0, string, size,
3671 				    pos, regs, size);
3672 # ifndef REGEX_MALLOC
3673 #  ifdef C_ALLOCA
3674   alloca (0);
3675 #  endif
3676 # endif
3677   return result;
3678 }
3679 # ifdef _LIBC
3680 weak_alias (__re_match, re_match)
3681 # endif
3682 #endif /* not emacs */
3683 
3684 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3685 						    unsigned char *end,
3686 						register_info_type *reg_info));
3687 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3688 						  unsigned char *end,
3689 						register_info_type *reg_info));
3690 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3691 							unsigned char *end,
3692 						register_info_type *reg_info));
3693 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3694 				     int len, char *translate));
3695 
3696 /* re_match_2 matches the compiled pattern in BUFP against the
3697    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3698    and SIZE2, respectively).  We start matching at POS, and stop
3699    matching at STOP.
3700 
3701    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3702    store offsets for the substring each group matched in REGS.  See the
3703    documentation for exactly how many groups we fill.
3704 
3705    We return -1 if no match, -2 if an internal error (such as the
3706    failure stack overflowing).  Otherwise, we return the length of the
3707    matched substring.  */
3708 
3709 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)3710 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3711      struct re_pattern_buffer *bufp;
3712      const char *string1, *string2;
3713      int size1, size2;
3714      int pos;
3715      struct re_registers *regs;
3716      int stop;
3717 {
3718   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3719 				    pos, regs, stop);
3720 #ifndef REGEX_MALLOC
3721 # ifdef C_ALLOCA
3722   alloca (0);
3723 # endif
3724 #endif
3725   return result;
3726 }
3727 #ifdef _LIBC
3728 weak_alias (__re_match_2, re_match_2)
3729 #endif
3730 
3731 /* This is a separate function so that we can force an alloca cleanup
3732    afterwards.  */
3733 static int
3734 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3735      struct re_pattern_buffer *bufp;
3736      const char *string1, *string2;
3737      int size1, size2;
3738      int pos;
3739      struct re_registers *regs;
3740      int stop;
3741 {
3742   /* General temporaries.  */
3743   int mcnt;
3744   unsigned char *p1;
3745 
3746   /* Just past the end of the corresponding string.  */
3747   const char *end1, *end2;
3748 
3749   /* Pointers into string1 and string2, just past the last characters in
3750      each to consider matching.  */
3751   const char *end_match_1, *end_match_2;
3752 
3753   /* Where we are in the data, and the end of the current string.  */
3754   const char *d, *dend;
3755 
3756   /* Where we are in the pattern, and the end of the pattern.  */
3757   unsigned char *p = bufp->buffer;
3758   register unsigned char *pend = p + bufp->used;
3759 
3760   /* Mark the opcode just after a start_memory, so we can test for an
3761      empty subpattern when we get to the stop_memory.  */
3762   unsigned char *just_past_start_mem = 0;
3763 
3764   /* We use this to map every character in the string.  */
3765   RE_TRANSLATE_TYPE translate = bufp->translate;
3766 
3767   /* Failure point stack.  Each place that can handle a failure further
3768      down the line pushes a failure point on this stack.  It consists of
3769      restart, regend, and reg_info for all registers corresponding to
3770      the subexpressions we're currently inside, plus the number of such
3771      registers, and, finally, two char *'s.  The first char * is where
3772      to resume scanning the pattern; the second one is where to resume
3773      scanning the strings.  If the latter is zero, the failure point is
3774      a ``dummy''; if a failure happens and the failure point is a dummy,
3775      it gets discarded and the next next one is tried.  */
3776 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3777   fail_stack_type fail_stack;
3778 #endif
3779 #ifdef DEBUG
3780   static unsigned failure_id = 0;
3781   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3782 #endif
3783 
3784 #ifdef REL_ALLOC
3785   /* This holds the pointer to the failure stack, when
3786      it is allocated relocatably.  */
3787   fail_stack_elt_t *failure_stack_ptr;
3788 #endif
3789 
3790   /* We fill all the registers internally, independent of what we
3791      return, for use in backreferences.  The number here includes
3792      an element for register zero.  */
3793   size_t num_regs = bufp->re_nsub + 1;
3794 
3795   /* The currently active registers.  */
3796   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3797   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3798 
3799   /* Information on the contents of registers. These are pointers into
3800      the input strings; they record just what was matched (on this
3801      attempt) by a subexpression part of the pattern, that is, the
3802      regnum-th regstart pointer points to where in the pattern we began
3803      matching and the regnum-th regend points to right after where we
3804      stopped matching the regnum-th subexpression.  (The zeroth register
3805      keeps track of what the whole pattern matches.)  */
3806 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3807   const char **regstart, **regend;
3808 #endif
3809 
3810   /* If a group that's operated upon by a repetition operator fails to
3811      match anything, then the register for its start will need to be
3812      restored because it will have been set to wherever in the string we
3813      are when we last see its open-group operator.  Similarly for a
3814      register's end.  */
3815 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3816   const char **old_regstart, **old_regend;
3817 #endif
3818 
3819   /* The is_active field of reg_info helps us keep track of which (possibly
3820      nested) subexpressions we are currently in. The matched_something
3821      field of reg_info[reg_num] helps us tell whether or not we have
3822      matched any of the pattern so far this time through the reg_num-th
3823      subexpression.  These two fields get reset each time through any
3824      loop their register is in.  */
3825 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3826   register_info_type *reg_info;
3827 #endif
3828 
3829   /* The following record the register info as found in the above
3830      variables when we find a match better than any we've seen before.
3831      This happens as we backtrack through the failure points, which in
3832      turn happens only if we have not yet matched the entire string. */
3833   unsigned best_regs_set = false;
3834 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3835   const char **best_regstart, **best_regend;
3836 #endif
3837 
3838   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3839      allocate space for that if we're not allocating space for anything
3840      else (see below).  Also, we never need info about register 0 for
3841      any of the other register vectors, and it seems rather a kludge to
3842      treat `best_regend' differently than the rest.  So we keep track of
3843      the end of the best match so far in a separate variable.  We
3844      initialize this to NULL so that when we backtrack the first time
3845      and need to test it, it's not garbage.  */
3846   const char *match_end = NULL;
3847 
3848   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3849   int set_regs_matched_done = 0;
3850 
3851   /* Used when we pop values we don't care about.  */
3852 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3853   const char **reg_dummy;
3854   register_info_type *reg_info_dummy;
3855 #endif
3856 
3857 #ifdef DEBUG
3858   /* Counts the total number of registers pushed.  */
3859   unsigned num_regs_pushed = 0;
3860 #endif
3861 
3862   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3863 
3864   INIT_FAIL_STACK ();
3865 
3866 #ifdef MATCH_MAY_ALLOCATE
3867   /* Do not bother to initialize all the register variables if there are
3868      no groups in the pattern, as it takes a fair amount of time.  If
3869      there are groups, we include space for register 0 (the whole
3870      pattern), even though we never use it, since it simplifies the
3871      array indexing.  We should fix this.  */
3872   if (bufp->re_nsub)
3873     {
3874       regstart = REGEX_TALLOC (num_regs, const char *);
3875       regend = REGEX_TALLOC (num_regs, const char *);
3876       old_regstart = REGEX_TALLOC (num_regs, const char *);
3877       old_regend = REGEX_TALLOC (num_regs, const char *);
3878       best_regstart = REGEX_TALLOC (num_regs, const char *);
3879       best_regend = REGEX_TALLOC (num_regs, const char *);
3880       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3881       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3882       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3883 
3884       if (!(regstart && regend && old_regstart && old_regend && reg_info
3885             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3886         {
3887           FREE_VARIABLES ();
3888           return -2;
3889         }
3890     }
3891   else
3892     {
3893       /* We must initialize all our variables to NULL, so that
3894          `FREE_VARIABLES' doesn't try to free them.  */
3895       regstart = regend = old_regstart = old_regend = best_regstart
3896         = best_regend = reg_dummy = NULL;
3897       reg_info = reg_info_dummy = (register_info_type *) NULL;
3898     }
3899 #endif /* MATCH_MAY_ALLOCATE */
3900 
3901   /* The starting position is bogus.  */
3902   if (pos < 0 || pos > size1 + size2)
3903     {
3904       FREE_VARIABLES ();
3905       return -1;
3906     }
3907 
3908   /* Initialize subexpression text positions to -1 to mark ones that no
3909      start_memory/stop_memory has been seen for. Also initialize the
3910      register information struct.  */
3911   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3912     {
3913       regstart[mcnt] = regend[mcnt]
3914         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3915 
3916       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3917       IS_ACTIVE (reg_info[mcnt]) = 0;
3918       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3919       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3920     }
3921 
3922   /* We move `string1' into `string2' if the latter's empty -- but not if
3923      `string1' is null.  */
3924   if (size2 == 0 && string1 != NULL)
3925     {
3926       string2 = string1;
3927       size2 = size1;
3928       string1 = 0;
3929       size1 = 0;
3930     }
3931   end1 = string1 + size1;
3932   end2 = string2 + size2;
3933 
3934   /* Compute where to stop matching, within the two strings.  */
3935   if (stop <= size1)
3936     {
3937       end_match_1 = string1 + stop;
3938       end_match_2 = string2;
3939     }
3940   else
3941     {
3942       end_match_1 = end1;
3943       end_match_2 = string2 + stop - size1;
3944     }
3945 
3946   /* `p' scans through the pattern as `d' scans through the data.
3947      `dend' is the end of the input string that `d' points within.  `d'
3948      is advanced into the following input string whenever necessary, but
3949      this happens before fetching; therefore, at the beginning of the
3950      loop, `d' can be pointing at the end of a string, but it cannot
3951      equal `string2'.  */
3952   if (size1 > 0 && pos <= size1)
3953     {
3954       d = string1 + pos;
3955       dend = end_match_1;
3956     }
3957   else
3958     {
3959       d = string2 + pos - size1;
3960       dend = end_match_2;
3961     }
3962 
3963   DEBUG_PRINT1 ("The compiled pattern is:\n");
3964   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3965   DEBUG_PRINT1 ("The string to match is: `");
3966   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3967   DEBUG_PRINT1 ("'\n");
3968 
3969   /* This loops over pattern commands.  It exits by returning from the
3970      function if the match is complete, or it drops through if the match
3971      fails at this starting point in the input data.  */
3972   for (;;)
3973     {
3974 #ifdef _LIBC
3975       DEBUG_PRINT2 ("\n%p: ", p);
3976 #else
3977       DEBUG_PRINT2 ("\n0x%x: ", p);
3978 #endif
3979 
3980       if (p == pend)
3981 	{ /* End of pattern means we might have succeeded.  */
3982           DEBUG_PRINT1 ("end of pattern ... ");
3983 
3984 	  /* If we haven't matched the entire string, and we want the
3985              longest match, try backtracking.  */
3986           if (d != end_match_2)
3987 	    {
3988 	      /* 1 if this match ends in the same string (string1 or string2)
3989 		 as the best previous match.  */
3990 	      boolean same_str_p = (FIRST_STRING_P (match_end)
3991 				    == MATCHING_IN_FIRST_STRING);
3992 	      /* 1 if this match is the best seen so far.  */
3993 	      boolean best_match_p;
3994 
3995 	      /* AIX compiler got confused when this was combined
3996 		 with the previous declaration.  */
3997 	      if (same_str_p)
3998 		best_match_p = d > match_end;
3999 	      else
4000 		best_match_p = !MATCHING_IN_FIRST_STRING;
4001 
4002               DEBUG_PRINT1 ("backtracking.\n");
4003 
4004               if (!FAIL_STACK_EMPTY ())
4005                 { /* More failure points to try.  */
4006 
4007                   /* If exceeds best match so far, save it.  */
4008                   if (!best_regs_set || best_match_p)
4009                     {
4010                       best_regs_set = true;
4011                       match_end = d;
4012 
4013                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4014 
4015                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4016                         {
4017                           best_regstart[mcnt] = regstart[mcnt];
4018                           best_regend[mcnt] = regend[mcnt];
4019                         }
4020                     }
4021                   goto fail;
4022                 }
4023 
4024               /* If no failure points, don't restore garbage.  And if
4025                  last match is real best match, don't restore second
4026                  best one. */
4027               else if (best_regs_set && !best_match_p)
4028                 {
4029   	        restore_best_regs:
4030                   /* Restore best match.  It may happen that `dend ==
4031                      end_match_1' while the restored d is in string2.
4032                      For example, the pattern `x.*y.*z' against the
4033                      strings `x-' and `y-z-', if the two strings are
4034                      not consecutive in memory.  */
4035                   DEBUG_PRINT1 ("Restoring best registers.\n");
4036 
4037                   d = match_end;
4038                   dend = ((d >= string1 && d <= end1)
4039 		           ? end_match_1 : end_match_2);
4040 
4041 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4042 		    {
4043 		      regstart[mcnt] = best_regstart[mcnt];
4044 		      regend[mcnt] = best_regend[mcnt];
4045 		    }
4046                 }
4047             } /* d != end_match_2 */
4048 
4049 	succeed_label:
4050           DEBUG_PRINT1 ("Accepting match.\n");
4051 
4052           /* If caller wants register contents data back, do it.  */
4053           if (regs && !bufp->no_sub)
4054 	    {
4055               /* Have the register data arrays been allocated?  */
4056               if (bufp->regs_allocated == REGS_UNALLOCATED)
4057                 { /* No.  So allocate them with malloc.  We need one
4058                      extra element beyond `num_regs' for the `-1' marker
4059                      GNU code uses.  */
4060                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4061                   regs->start = TALLOC (regs->num_regs, regoff_t);
4062                   regs->end = TALLOC (regs->num_regs, regoff_t);
4063                   if (regs->start == NULL || regs->end == NULL)
4064 		    {
4065 		      FREE_VARIABLES ();
4066 		      return -2;
4067 		    }
4068                   bufp->regs_allocated = REGS_REALLOCATE;
4069                 }
4070               else if (bufp->regs_allocated == REGS_REALLOCATE)
4071                 { /* Yes.  If we need more elements than were already
4072                      allocated, reallocate them.  If we need fewer, just
4073                      leave it alone.  */
4074                   if (regs->num_regs < num_regs + 1)
4075                     {
4076                       regs->num_regs = num_regs + 1;
4077                       RETALLOC (regs->start, regs->num_regs, regoff_t);
4078                       RETALLOC (regs->end, regs->num_regs, regoff_t);
4079                       if (regs->start == NULL || regs->end == NULL)
4080 			{
4081 			  FREE_VARIABLES ();
4082 			  return -2;
4083 			}
4084                     }
4085                 }
4086               else
4087 		{
4088 		  /* These braces fend off a "empty body in an else-statement"
4089 		     warning under GCC when assert expands to nothing.  */
4090 		  assert (bufp->regs_allocated == REGS_FIXED);
4091 		}
4092 
4093               /* Convert the pointer data in `regstart' and `regend' to
4094                  indices.  Register zero has to be set differently,
4095                  since we haven't kept track of any info for it.  */
4096               if (regs->num_regs > 0)
4097                 {
4098                   regs->start[0] = pos;
4099                   regs->end[0] = (MATCHING_IN_FIRST_STRING
4100 				  ? ((regoff_t) (d - string1))
4101 			          : ((regoff_t) (d - string2 + size1)));
4102                 }
4103 
4104               /* Go through the first `min (num_regs, regs->num_regs)'
4105                  registers, since that is all we initialized.  */
4106 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4107 		   mcnt++)
4108 		{
4109                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4110                     regs->start[mcnt] = regs->end[mcnt] = -1;
4111                   else
4112                     {
4113 		      regs->start[mcnt]
4114 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4115                       regs->end[mcnt]
4116 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4117                     }
4118 		}
4119 
4120               /* If the regs structure we return has more elements than
4121                  were in the pattern, set the extra elements to -1.  If
4122                  we (re)allocated the registers, this is the case,
4123                  because we always allocate enough to have at least one
4124                  -1 at the end.  */
4125               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4126                 regs->start[mcnt] = regs->end[mcnt] = -1;
4127 	    } /* regs && !bufp->no_sub */
4128 
4129           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4130                         nfailure_points_pushed, nfailure_points_popped,
4131                         nfailure_points_pushed - nfailure_points_popped);
4132           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4133 
4134           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4135 			    ? string1
4136 			    : string2 - size1);
4137 
4138           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4139 
4140           FREE_VARIABLES ();
4141           return mcnt;
4142         }
4143 
4144       /* Otherwise match next pattern command.  */
4145       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4146 	{
4147         /* Ignore these.  Used to ignore the n of succeed_n's which
4148            currently have n == 0.  */
4149         case no_op:
4150           DEBUG_PRINT1 ("EXECUTING no_op.\n");
4151           break;
4152 
4153 	case succeed:
4154           DEBUG_PRINT1 ("EXECUTING succeed.\n");
4155 	  goto succeed_label;
4156 
4157         /* Match the next n pattern characters exactly.  The following
4158            byte in the pattern defines n, and the n bytes after that
4159            are the characters to match.  */
4160 	case exactn:
4161 	  mcnt = *p++;
4162           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4163 
4164           /* This is written out as an if-else so we don't waste time
4165              testing `translate' inside the loop.  */
4166           if (translate)
4167 	    {
4168 	      do
4169 		{
4170 		  PREFETCH ();
4171 		  if ((unsigned char) translate[(unsigned char) *d++]
4172 		      != (unsigned char) *p++)
4173                     goto fail;
4174 		}
4175 	      while (--mcnt);
4176 	    }
4177 	  else
4178 	    {
4179 	      do
4180 		{
4181 		  PREFETCH ();
4182 		  if (*d++ != (char) *p++) goto fail;
4183 		}
4184 	      while (--mcnt);
4185 	    }
4186 	  SET_REGS_MATCHED ();
4187           break;
4188 
4189 
4190         /* Match any character except possibly a newline or a null.  */
4191 	case anychar:
4192           DEBUG_PRINT1 ("EXECUTING anychar.\n");
4193 
4194           PREFETCH ();
4195 
4196           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4197               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4198 	    goto fail;
4199 
4200           SET_REGS_MATCHED ();
4201           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4202           d++;
4203 	  break;
4204 
4205 
4206 	case charset:
4207 	case charset_not:
4208 	  {
4209 	    register unsigned char c;
4210 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4211 
4212             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4213 
4214 	    PREFETCH ();
4215 	    c = TRANSLATE (*d); /* The character to match.  */
4216 
4217             /* Cast to `unsigned' instead of `unsigned char' in case the
4218                bit list is a full 32 bytes long.  */
4219 	    if (c < (unsigned) (*p * BYTEWIDTH)
4220 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4221 	      not = !not;
4222 
4223 	    p += 1 + *p;
4224 
4225 	    if (!not) goto fail;
4226 
4227 	    SET_REGS_MATCHED ();
4228             d++;
4229 	    break;
4230 	  }
4231 
4232 
4233         /* The beginning of a group is represented by start_memory.
4234            The arguments are the register number in the next byte, and the
4235            number of groups inner to this one in the next.  The text
4236            matched within the group is recorded (in the internal
4237            registers data structure) under the register number.  */
4238         case start_memory:
4239 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4240 
4241           /* Find out if this group can match the empty string.  */
4242 	  p1 = p;		/* To send to group_match_null_string_p.  */
4243 
4244           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4245             REG_MATCH_NULL_STRING_P (reg_info[*p])
4246               = group_match_null_string_p (&p1, pend, reg_info);
4247 
4248           /* Save the position in the string where we were the last time
4249              we were at this open-group operator in case the group is
4250              operated upon by a repetition operator, e.g., with `(a*)*b'
4251              against `ab'; then we want to ignore where we are now in
4252              the string in case this attempt to match fails.  */
4253           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4254                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4255                              : regstart[*p];
4256 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4257 			 POINTER_TO_OFFSET (old_regstart[*p]));
4258 
4259           regstart[*p] = d;
4260 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4261 
4262           IS_ACTIVE (reg_info[*p]) = 1;
4263           MATCHED_SOMETHING (reg_info[*p]) = 0;
4264 
4265 	  /* Clear this whenever we change the register activity status.  */
4266 	  set_regs_matched_done = 0;
4267 
4268           /* This is the new highest active register.  */
4269           highest_active_reg = *p;
4270 
4271           /* If nothing was active before, this is the new lowest active
4272              register.  */
4273           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4274             lowest_active_reg = *p;
4275 
4276           /* Move past the register number and inner group count.  */
4277           p += 2;
4278 	  just_past_start_mem = p;
4279 
4280           break;
4281 
4282 
4283         /* The stop_memory opcode represents the end of a group.  Its
4284            arguments are the same as start_memory's: the register
4285            number, and the number of inner groups.  */
4286 	case stop_memory:
4287 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4288 
4289           /* We need to save the string position the last time we were at
4290              this close-group operator in case the group is operated
4291              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4292              against `aba'; then we want to ignore where we are now in
4293              the string in case this attempt to match fails.  */
4294           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4295                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
4296 			   : regend[*p];
4297 	  DEBUG_PRINT2 ("      old_regend: %d\n",
4298 			 POINTER_TO_OFFSET (old_regend[*p]));
4299 
4300           regend[*p] = d;
4301 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4302 
4303           /* This register isn't active anymore.  */
4304           IS_ACTIVE (reg_info[*p]) = 0;
4305 
4306 	  /* Clear this whenever we change the register activity status.  */
4307 	  set_regs_matched_done = 0;
4308 
4309           /* If this was the only register active, nothing is active
4310              anymore.  */
4311           if (lowest_active_reg == highest_active_reg)
4312             {
4313               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4314               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4315             }
4316           else
4317             { /* We must scan for the new highest active register, since
4318                  it isn't necessarily one less than now: consider
4319                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4320                  new highest active register is 1.  */
4321               unsigned char r = *p - 1;
4322               while (r > 0 && !IS_ACTIVE (reg_info[r]))
4323                 r--;
4324 
4325               /* If we end up at register zero, that means that we saved
4326                  the registers as the result of an `on_failure_jump', not
4327                  a `start_memory', and we jumped to past the innermost
4328                  `stop_memory'.  For example, in ((.)*) we save
4329                  registers 1 and 2 as a result of the *, but when we pop
4330                  back to the second ), we are at the stop_memory 1.
4331                  Thus, nothing is active.  */
4332 	      if (r == 0)
4333                 {
4334                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4335                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4336                 }
4337               else
4338                 highest_active_reg = r;
4339             }
4340 
4341           /* If just failed to match something this time around with a
4342              group that's operated on by a repetition operator, try to
4343              force exit from the ``loop'', and restore the register
4344              information for this group that we had before trying this
4345              last match.  */
4346           if ((!MATCHED_SOMETHING (reg_info[*p])
4347                || just_past_start_mem == p - 1)
4348 	      && (p + 2) < pend)
4349             {
4350               boolean is_a_jump_n = false;
4351 
4352               p1 = p + 2;
4353               mcnt = 0;
4354               switch ((re_opcode_t) *p1++)
4355                 {
4356                   case jump_n:
4357 		    is_a_jump_n = true;
4358                   case pop_failure_jump:
4359 		  case maybe_pop_jump:
4360 		  case jump:
4361 		  case dummy_failure_jump:
4362                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4363 		    if (is_a_jump_n)
4364 		      p1 += 2;
4365                     break;
4366 
4367                   default:
4368                     /* do nothing */ ;
4369                 }
4370 	      p1 += mcnt;
4371 
4372               /* If the next operation is a jump backwards in the pattern
4373 	         to an on_failure_jump right before the start_memory
4374                  corresponding to this stop_memory, exit from the loop
4375                  by forcing a failure after pushing on the stack the
4376                  on_failure_jump's jump in the pattern, and d.  */
4377               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4378                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4379 		{
4380                   /* If this group ever matched anything, then restore
4381                      what its registers were before trying this last
4382                      failed match, e.g., with `(a*)*b' against `ab' for
4383                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
4384                      against `aba' for regend[3].
4385 
4386                      Also restore the registers for inner groups for,
4387                      e.g., `((a*)(b*))*' against `aba' (register 3 would
4388                      otherwise get trashed).  */
4389 
4390                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4391 		    {
4392 		      unsigned r;
4393 
4394                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4395 
4396 		      /* Restore this and inner groups' (if any) registers.  */
4397                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4398 			   r++)
4399                         {
4400                           regstart[r] = old_regstart[r];
4401 
4402                           /* xx why this test?  */
4403                           if (old_regend[r] >= regstart[r])
4404                             regend[r] = old_regend[r];
4405                         }
4406                     }
4407 		  p1++;
4408                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4409                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4410 
4411                   goto fail;
4412                 }
4413             }
4414 
4415           /* Move past the register number and the inner group count.  */
4416           p += 2;
4417           break;
4418 
4419 
4420 	/* \<digit> has been turned into a `duplicate' command which is
4421            followed by the numeric value of <digit> as the register number.  */
4422         case duplicate:
4423 	  {
4424 	    register const char *d2, *dend2;
4425 	    int regno = *p++;   /* Get which register to match against.  */
4426 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4427 
4428 	    /* Can't back reference a group which we've never matched.  */
4429             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4430               goto fail;
4431 
4432             /* Where in input to try to start matching.  */
4433             d2 = regstart[regno];
4434 
4435             /* Where to stop matching; if both the place to start and
4436                the place to stop matching are in the same string, then
4437                set to the place to stop, otherwise, for now have to use
4438                the end of the first string.  */
4439 
4440             dend2 = ((FIRST_STRING_P (regstart[regno])
4441 		      == FIRST_STRING_P (regend[regno]))
4442 		     ? regend[regno] : end_match_1);
4443 	    for (;;)
4444 	      {
4445 		/* If necessary, advance to next segment in register
4446                    contents.  */
4447 		while (d2 == dend2)
4448 		  {
4449 		    if (dend2 == end_match_2) break;
4450 		    if (dend2 == regend[regno]) break;
4451 
4452                     /* End of string1 => advance to string2. */
4453                     d2 = string2;
4454                     dend2 = regend[regno];
4455 		  }
4456 		/* At end of register contents => success */
4457 		if (d2 == dend2) break;
4458 
4459 		/* If necessary, advance to next segment in data.  */
4460 		PREFETCH ();
4461 
4462 		/* How many characters left in this segment to match.  */
4463 		mcnt = dend - d;
4464 
4465 		/* Want how many consecutive characters we can match in
4466                    one shot, so, if necessary, adjust the count.  */
4467                 if (mcnt > dend2 - d2)
4468 		  mcnt = dend2 - d2;
4469 
4470 		/* Compare that many; failure if mismatch, else move
4471                    past them.  */
4472 		if (translate
4473                     ? bcmp_translate (d, d2, mcnt, translate)
4474                     : memcmp (d, d2, mcnt))
4475 		  goto fail;
4476 		d += mcnt, d2 += mcnt;
4477 
4478 		/* Do this because we've match some characters.  */
4479 		SET_REGS_MATCHED ();
4480 	      }
4481 	  }
4482 	  break;
4483 
4484 
4485         /* begline matches the empty string at the beginning of the string
4486            (unless `not_bol' is set in `bufp'), and, if
4487            `newline_anchor' is set, after newlines.  */
4488 	case begline:
4489           DEBUG_PRINT1 ("EXECUTING begline.\n");
4490 
4491           if (AT_STRINGS_BEG (d))
4492             {
4493               if (!bufp->not_bol) break;
4494             }
4495           else if (d[-1] == '\n' && bufp->newline_anchor)
4496             {
4497               break;
4498             }
4499           /* In all other cases, we fail.  */
4500           goto fail;
4501 
4502 
4503         /* endline is the dual of begline.  */
4504 	case endline:
4505           DEBUG_PRINT1 ("EXECUTING endline.\n");
4506 
4507           if (AT_STRINGS_END (d))
4508             {
4509               if (!bufp->not_eol) break;
4510             }
4511 
4512           /* We have to ``prefetch'' the next character.  */
4513           else if ((d == end1 ? *string2 : *d) == '\n'
4514                    && bufp->newline_anchor)
4515             {
4516               break;
4517             }
4518           goto fail;
4519 
4520 
4521 	/* Match at the very beginning of the data.  */
4522         case begbuf:
4523           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4524           if (AT_STRINGS_BEG (d))
4525             break;
4526           goto fail;
4527 
4528 
4529 	/* Match at the very end of the data.  */
4530         case endbuf:
4531           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4532 	  if (AT_STRINGS_END (d))
4533 	    break;
4534           goto fail;
4535 
4536 
4537         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4538            pushes NULL as the value for the string on the stack.  Then
4539            `pop_failure_point' will keep the current value for the
4540            string, instead of restoring it.  To see why, consider
4541            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4542            then the . fails against the \n.  But the next thing we want
4543            to do is match the \n against the \n; if we restored the
4544            string value, we would be back at the foo.
4545 
4546            Because this is used only in specific cases, we don't need to
4547            check all the things that `on_failure_jump' does, to make
4548            sure the right things get saved on the stack.  Hence we don't
4549            share its code.  The only reason to push anything on the
4550            stack at all is that otherwise we would have to change
4551            `anychar's code to do something besides goto fail in this
4552            case; that seems worse than this.  */
4553         case on_failure_keep_string_jump:
4554           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4555 
4556           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4557 #ifdef _LIBC
4558           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4559 #else
4560           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4561 #endif
4562 
4563           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4564           break;
4565 
4566 
4567 	/* Uses of on_failure_jump:
4568 
4569            Each alternative starts with an on_failure_jump that points
4570            to the beginning of the next alternative.  Each alternative
4571            except the last ends with a jump that in effect jumps past
4572            the rest of the alternatives.  (They really jump to the
4573            ending jump of the following alternative, because tensioning
4574            these jumps is a hassle.)
4575 
4576            Repeats start with an on_failure_jump that points past both
4577            the repetition text and either the following jump or
4578            pop_failure_jump back to this on_failure_jump.  */
4579 	case on_failure_jump:
4580         on_failure:
4581           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4582 
4583           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4584 #ifdef _LIBC
4585           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4586 #else
4587           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4588 #endif
4589 
4590           /* If this on_failure_jump comes right before a group (i.e.,
4591              the original * applied to a group), save the information
4592              for that group and all inner ones, so that if we fail back
4593              to this point, the group's information will be correct.
4594              For example, in \(a*\)*\1, we need the preceding group,
4595              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4596 
4597           /* We can't use `p' to check ahead because we push
4598              a failure point to `p + mcnt' after we do this.  */
4599           p1 = p;
4600 
4601           /* We need to skip no_op's before we look for the
4602              start_memory in case this on_failure_jump is happening as
4603              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4604              against aba.  */
4605           while (p1 < pend && (re_opcode_t) *p1 == no_op)
4606             p1++;
4607 
4608           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4609             {
4610               /* We have a new highest active register now.  This will
4611                  get reset at the start_memory we are about to get to,
4612                  but we will have saved all the registers relevant to
4613                  this repetition op, as described above.  */
4614               highest_active_reg = *(p1 + 1) + *(p1 + 2);
4615               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4616                 lowest_active_reg = *(p1 + 1);
4617             }
4618 
4619           DEBUG_PRINT1 (":\n");
4620           PUSH_FAILURE_POINT (p + mcnt, d, -2);
4621           break;
4622 
4623 
4624         /* A smart repeat ends with `maybe_pop_jump'.
4625 	   We change it to either `pop_failure_jump' or `jump'.  */
4626         case maybe_pop_jump:
4627           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4628           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4629           {
4630 	    register unsigned char *p2 = p;
4631 
4632             /* Compare the beginning of the repeat with what in the
4633                pattern follows its end. If we can establish that there
4634                is nothing that they would both match, i.e., that we
4635                would have to backtrack because of (as in, e.g., `a*a')
4636                then we can change to pop_failure_jump, because we'll
4637                never have to backtrack.
4638 
4639                This is not true in the case of alternatives: in
4640                `(a|ab)*' we do need to backtrack to the `ab' alternative
4641                (e.g., if the string was `ab').  But instead of trying to
4642                detect that here, the alternative has put on a dummy
4643                failure point which is what we will end up popping.  */
4644 
4645 	    /* Skip over open/close-group commands.
4646 	       If what follows this loop is a ...+ construct,
4647 	       look at what begins its body, since we will have to
4648 	       match at least one of that.  */
4649 	    while (1)
4650 	      {
4651 		if (p2 + 2 < pend
4652 		    && ((re_opcode_t) *p2 == stop_memory
4653 			|| (re_opcode_t) *p2 == start_memory))
4654 		  p2 += 3;
4655 		else if (p2 + 6 < pend
4656 			 && (re_opcode_t) *p2 == dummy_failure_jump)
4657 		  p2 += 6;
4658 		else
4659 		  break;
4660 	      }
4661 
4662 	    p1 = p + mcnt;
4663 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4664 	       to the `maybe_finalize_jump' of this case.  Examine what
4665 	       follows.  */
4666 
4667             /* If we're at the end of the pattern, we can change.  */
4668             if (p2 == pend)
4669 	      {
4670 		/* Consider what happens when matching ":\(.*\)"
4671 		   against ":/".  I don't really understand this code
4672 		   yet.  */
4673   	        p[-3] = (unsigned char) pop_failure_jump;
4674                 DEBUG_PRINT1
4675                   ("  End of pattern: change to `pop_failure_jump'.\n");
4676               }
4677 
4678             else if ((re_opcode_t) *p2 == exactn
4679 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4680 	      {
4681 		register unsigned char c
4682                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4683 
4684                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4685                   {
4686   		    p[-3] = (unsigned char) pop_failure_jump;
4687                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4688                                   c, p1[5]);
4689                   }
4690 
4691 		else if ((re_opcode_t) p1[3] == charset
4692 			 || (re_opcode_t) p1[3] == charset_not)
4693 		  {
4694 		    int not = (re_opcode_t) p1[3] == charset_not;
4695 
4696 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4697 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4698 		      not = !not;
4699 
4700                     /* `not' is equal to 1 if c would match, which means
4701                         that we can't change to pop_failure_jump.  */
4702 		    if (!not)
4703                       {
4704   		        p[-3] = (unsigned char) pop_failure_jump;
4705                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4706                       }
4707 		  }
4708 	      }
4709             else if ((re_opcode_t) *p2 == charset)
4710 	      {
4711 #ifdef DEBUG
4712 		register unsigned char c
4713                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4714 #endif
4715 
4716 #if 0
4717                 if ((re_opcode_t) p1[3] == exactn
4718 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4719 			  && (p2[2 + p1[5] / BYTEWIDTH]
4720 			      & (1 << (p1[5] % BYTEWIDTH)))))
4721 #else
4722                 if ((re_opcode_t) p1[3] == exactn
4723 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4724 			  && (p2[2 + p1[4] / BYTEWIDTH]
4725 			      & (1 << (p1[4] % BYTEWIDTH)))))
4726 #endif
4727                   {
4728   		    p[-3] = (unsigned char) pop_failure_jump;
4729                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4730                                   c, p1[5]);
4731                   }
4732 
4733 		else if ((re_opcode_t) p1[3] == charset_not)
4734 		  {
4735 		    int idx;
4736 		    /* We win if the charset_not inside the loop
4737 		       lists every character listed in the charset after.  */
4738 		    for (idx = 0; idx < (int) p2[1]; idx++)
4739 		      if (! (p2[2 + idx] == 0
4740 			     || (idx < (int) p1[4]
4741 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4742 			break;
4743 
4744 		    if (idx == p2[1])
4745                       {
4746   		        p[-3] = (unsigned char) pop_failure_jump;
4747                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4748                       }
4749 		  }
4750 		else if ((re_opcode_t) p1[3] == charset)
4751 		  {
4752 		    int idx;
4753 		    /* We win if the charset inside the loop
4754 		       has no overlap with the one after the loop.  */
4755 		    for (idx = 0;
4756 			 idx < (int) p2[1] && idx < (int) p1[4];
4757 			 idx++)
4758 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4759 			break;
4760 
4761 		    if (idx == p2[1] || idx == p1[4])
4762                       {
4763   		        p[-3] = (unsigned char) pop_failure_jump;
4764                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4765                       }
4766 		  }
4767 	      }
4768 	  }
4769 	  p -= 2;		/* Point at relative address again.  */
4770 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4771 	    {
4772 	      p[-1] = (unsigned char) jump;
4773               DEBUG_PRINT1 ("  Match => jump.\n");
4774 	      goto unconditional_jump;
4775 	    }
4776         /* Note fall through.  */
4777 
4778 
4779 	/* The end of a simple repeat has a pop_failure_jump back to
4780            its matching on_failure_jump, where the latter will push a
4781            failure point.  The pop_failure_jump takes off failure
4782            points put on by this pop_failure_jump's matching
4783            on_failure_jump; we got through the pattern to here from the
4784            matching on_failure_jump, so didn't fail.  */
4785         case pop_failure_jump:
4786           {
4787             /* We need to pass separate storage for the lowest and
4788                highest registers, even though we don't care about the
4789                actual values.  Otherwise, we will restore only one
4790                register from the stack, since lowest will == highest in
4791                `pop_failure_point'.  */
4792             active_reg_t dummy_low_reg, dummy_high_reg;
4793             unsigned char *pdummy;
4794             const char *sdummy;
4795 
4796             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4797             POP_FAILURE_POINT (sdummy, pdummy,
4798                                dummy_low_reg, dummy_high_reg,
4799                                reg_dummy, reg_dummy, reg_info_dummy);
4800           }
4801 	  /* Note fall through.  */
4802 
4803 	unconditional_jump:
4804 #ifdef _LIBC
4805 	  DEBUG_PRINT2 ("\n%p: ", p);
4806 #else
4807 	  DEBUG_PRINT2 ("\n0x%x: ", p);
4808 #endif
4809           /* Note fall through.  */
4810 
4811         /* Unconditionally jump (without popping any failure points).  */
4812         case jump:
4813 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4814           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4815 	  p += mcnt;				/* Do the jump.  */
4816 #ifdef _LIBC
4817           DEBUG_PRINT2 ("(to %p).\n", p);
4818 #else
4819           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4820 #endif
4821 	  break;
4822 
4823 
4824         /* We need this opcode so we can detect where alternatives end
4825            in `group_match_null_string_p' et al.  */
4826         case jump_past_alt:
4827           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4828           goto unconditional_jump;
4829 
4830 
4831         /* Normally, the on_failure_jump pushes a failure point, which
4832            then gets popped at pop_failure_jump.  We will end up at
4833            pop_failure_jump, also, and with a pattern of, say, `a+', we
4834            are skipping over the on_failure_jump, so we have to push
4835            something meaningless for pop_failure_jump to pop.  */
4836         case dummy_failure_jump:
4837           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4838           /* It doesn't matter what we push for the string here.  What
4839              the code at `fail' tests is the value for the pattern.  */
4840           PUSH_FAILURE_POINT (NULL, NULL, -2);
4841           goto unconditional_jump;
4842 
4843 
4844         /* At the end of an alternative, we need to push a dummy failure
4845            point in case we are followed by a `pop_failure_jump', because
4846            we don't want the failure point for the alternative to be
4847            popped.  For example, matching `(a|ab)*' against `aab'
4848            requires that we match the `ab' alternative.  */
4849         case push_dummy_failure:
4850           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4851           /* See comments just above at `dummy_failure_jump' about the
4852              two zeroes.  */
4853           PUSH_FAILURE_POINT (NULL, NULL, -2);
4854           break;
4855 
4856         /* Have to succeed matching what follows at least n times.
4857            After that, handle like `on_failure_jump'.  */
4858         case succeed_n:
4859           EXTRACT_NUMBER (mcnt, p + 2);
4860           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4861 
4862           assert (mcnt >= 0);
4863           /* Originally, this is how many times we HAVE to succeed.  */
4864           if (mcnt > 0)
4865             {
4866                mcnt--;
4867 	       p += 2;
4868                STORE_NUMBER_AND_INCR (p, mcnt);
4869 #ifdef _LIBC
4870                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4871 #else
4872                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4873 #endif
4874             }
4875 	  else if (mcnt == 0)
4876             {
4877 #ifdef _LIBC
4878               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4879 #else
4880               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4881 #endif
4882 	      p[2] = (unsigned char) no_op;
4883               p[3] = (unsigned char) no_op;
4884               goto on_failure;
4885             }
4886           break;
4887 
4888         case jump_n:
4889           EXTRACT_NUMBER (mcnt, p + 2);
4890           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4891 
4892           /* Originally, this is how many times we CAN jump.  */
4893           if (mcnt)
4894             {
4895                mcnt--;
4896                STORE_NUMBER (p + 2, mcnt);
4897 #ifdef _LIBC
4898                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4899 #else
4900                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4901 #endif
4902 	       goto unconditional_jump;
4903             }
4904           /* If don't have to jump any more, skip over the rest of command.  */
4905 	  else
4906 	    p += 4;
4907           break;
4908 
4909 	case set_number_at:
4910 	  {
4911             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4912 
4913             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4914             p1 = p + mcnt;
4915             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4916 #ifdef _LIBC
4917             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4918 #else
4919             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4920 #endif
4921 	    STORE_NUMBER (p1, mcnt);
4922             break;
4923           }
4924 
4925 #if 0
4926 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
4927 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4928 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4929 	   macro and introducing temporary variables works around the bug.  */
4930 
4931 	case wordbound:
4932 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4933 	  if (AT_WORD_BOUNDARY (d))
4934 	    break;
4935 	  goto fail;
4936 
4937 	case notwordbound:
4938 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4939 	  if (AT_WORD_BOUNDARY (d))
4940 	    goto fail;
4941 	  break;
4942 #else
4943 	case wordbound:
4944 	{
4945 	  boolean prevchar, thischar;
4946 
4947 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4948 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4949 	    break;
4950 
4951 	  prevchar = WORDCHAR_P (d - 1);
4952 	  thischar = WORDCHAR_P (d);
4953 	  if (prevchar != thischar)
4954 	    break;
4955 	  goto fail;
4956 	}
4957 
4958       case notwordbound:
4959 	{
4960 	  boolean prevchar, thischar;
4961 
4962 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4963 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4964 	    goto fail;
4965 
4966 	  prevchar = WORDCHAR_P (d - 1);
4967 	  thischar = WORDCHAR_P (d);
4968 	  if (prevchar != thischar)
4969 	    goto fail;
4970 	  break;
4971 	}
4972 #endif
4973 
4974 	case wordbeg:
4975           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4976 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4977 	    break;
4978           goto fail;
4979 
4980 	case wordend:
4981           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4982 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4983               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4984 	    break;
4985           goto fail;
4986 
4987 #ifdef emacs
4988   	case before_dot:
4989           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4990  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4991   	    goto fail;
4992   	  break;
4993 
4994   	case at_dot:
4995           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4996  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4997   	    goto fail;
4998   	  break;
4999 
5000   	case after_dot:
5001           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5002           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5003   	    goto fail;
5004   	  break;
5005 
5006 	case syntaxspec:
5007           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5008 	  mcnt = *p++;
5009 	  goto matchsyntax;
5010 
5011         case wordchar:
5012           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5013 	  mcnt = (int) Sword;
5014         matchsyntax:
5015 	  PREFETCH ();
5016 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5017 	  d++;
5018 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5019 	    goto fail;
5020           SET_REGS_MATCHED ();
5021 	  break;
5022 
5023 	case notsyntaxspec:
5024           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5025 	  mcnt = *p++;
5026 	  goto matchnotsyntax;
5027 
5028         case notwordchar:
5029           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5030 	  mcnt = (int) Sword;
5031         matchnotsyntax:
5032 	  PREFETCH ();
5033 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5034 	  d++;
5035 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5036 	    goto fail;
5037 	  SET_REGS_MATCHED ();
5038           break;
5039 
5040 #else /* not emacs */
5041 	case wordchar:
5042           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5043 	  PREFETCH ();
5044           if (!WORDCHAR_P (d))
5045             goto fail;
5046 	  SET_REGS_MATCHED ();
5047           d++;
5048 	  break;
5049 
5050 	case notwordchar:
5051           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5052 	  PREFETCH ();
5053 	  if (WORDCHAR_P (d))
5054             goto fail;
5055           SET_REGS_MATCHED ();
5056           d++;
5057 	  break;
5058 #endif /* not emacs */
5059 
5060         default:
5061           abort ();
5062 	}
5063       continue;  /* Successfully executed one pattern command; keep going.  */
5064 
5065 
5066     /* We goto here if a matching operation fails. */
5067     fail:
5068       if (!FAIL_STACK_EMPTY ())
5069 	{ /* A restart point is known.  Restore to that state.  */
5070           DEBUG_PRINT1 ("\nFAIL:\n");
5071           POP_FAILURE_POINT (d, p,
5072                              lowest_active_reg, highest_active_reg,
5073                              regstart, regend, reg_info);
5074 
5075           /* If this failure point is a dummy, try the next one.  */
5076           if (!p)
5077 	    goto fail;
5078 
5079           /* If we failed to the end of the pattern, don't examine *p.  */
5080 	  assert (p <= pend);
5081           if (p < pend)
5082             {
5083               boolean is_a_jump_n = false;
5084 
5085               /* If failed to a backwards jump that's part of a repetition
5086                  loop, need to pop this failure point and use the next one.  */
5087               switch ((re_opcode_t) *p)
5088                 {
5089                 case jump_n:
5090                   is_a_jump_n = true;
5091                 case maybe_pop_jump:
5092                 case pop_failure_jump:
5093                 case jump:
5094                   p1 = p + 1;
5095                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5096                   p1 += mcnt;
5097 
5098                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5099                       || (!is_a_jump_n
5100                           && (re_opcode_t) *p1 == on_failure_jump))
5101                     goto fail;
5102                   break;
5103                 default:
5104                   /* do nothing */ ;
5105                 }
5106             }
5107 
5108           if (d >= string1 && d <= end1)
5109 	    dend = end_match_1;
5110         }
5111       else
5112         break;   /* Matching at this starting point really fails.  */
5113     } /* for (;;) */
5114 
5115   if (best_regs_set)
5116     goto restore_best_regs;
5117 
5118   FREE_VARIABLES ();
5119 
5120   return -1;         			/* Failure to match.  */
5121 } /* re_match_2 */
5122 
5123 /* Subroutine definitions for re_match_2.  */
5124 
5125 
5126 /* We are passed P pointing to a register number after a start_memory.
5127 
5128    Return true if the pattern up to the corresponding stop_memory can
5129    match the empty string, and false otherwise.
5130 
5131    If we find the matching stop_memory, sets P to point to one past its number.
5132    Otherwise, sets P to an undefined byte less than or equal to END.
5133 
5134    We don't handle duplicates properly (yet).  */
5135 
5136 static boolean
group_match_null_string_p(p,end,reg_info)5137 group_match_null_string_p (p, end, reg_info)
5138     unsigned char **p, *end;
5139     register_info_type *reg_info;
5140 {
5141   int mcnt;
5142   /* Point to after the args to the start_memory.  */
5143   unsigned char *p1 = *p + 2;
5144 
5145   while (p1 < end)
5146     {
5147       /* Skip over opcodes that can match nothing, and return true or
5148 	 false, as appropriate, when we get to one that can't, or to the
5149          matching stop_memory.  */
5150 
5151       switch ((re_opcode_t) *p1)
5152         {
5153         /* Could be either a loop or a series of alternatives.  */
5154         case on_failure_jump:
5155           p1++;
5156           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5157 
5158           /* If the next operation is not a jump backwards in the
5159 	     pattern.  */
5160 
5161 	  if (mcnt >= 0)
5162 	    {
5163               /* Go through the on_failure_jumps of the alternatives,
5164                  seeing if any of the alternatives cannot match nothing.
5165                  The last alternative starts with only a jump,
5166                  whereas the rest start with on_failure_jump and end
5167                  with a jump, e.g., here is the pattern for `a|b|c':
5168 
5169                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5170                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5171                  /exactn/1/c
5172 
5173                  So, we have to first go through the first (n-1)
5174                  alternatives and then deal with the last one separately.  */
5175 
5176 
5177               /* Deal with the first (n-1) alternatives, which start
5178                  with an on_failure_jump (see above) that jumps to right
5179                  past a jump_past_alt.  */
5180 
5181               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5182                 {
5183                   /* `mcnt' holds how many bytes long the alternative
5184                      is, including the ending `jump_past_alt' and
5185                      its number.  */
5186 
5187                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5188 				                      reg_info))
5189                     return false;
5190 
5191                   /* Move to right after this alternative, including the
5192 		     jump_past_alt.  */
5193                   p1 += mcnt;
5194 
5195                   /* Break if it's the beginning of an n-th alternative
5196                      that doesn't begin with an on_failure_jump.  */
5197                   if ((re_opcode_t) *p1 != on_failure_jump)
5198                     break;
5199 
5200 		  /* Still have to check that it's not an n-th
5201 		     alternative that starts with an on_failure_jump.  */
5202 		  p1++;
5203                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5204                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5205                     {
5206 		      /* Get to the beginning of the n-th alternative.  */
5207                       p1 -= 3;
5208                       break;
5209                     }
5210                 }
5211 
5212               /* Deal with the last alternative: go back and get number
5213                  of the `jump_past_alt' just before it.  `mcnt' contains
5214                  the length of the alternative.  */
5215               EXTRACT_NUMBER (mcnt, p1 - 2);
5216 
5217               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5218                 return false;
5219 
5220               p1 += mcnt;	/* Get past the n-th alternative.  */
5221             } /* if mcnt > 0 */
5222           break;
5223 
5224 
5225         case stop_memory:
5226 	  assert (p1[1] == **p);
5227           *p = p1 + 2;
5228           return true;
5229 
5230 
5231         default:
5232           if (!common_op_match_null_string_p (&p1, end, reg_info))
5233             return false;
5234         }
5235     } /* while p1 < end */
5236 
5237   return false;
5238 } /* group_match_null_string_p */
5239 
5240 
5241 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5242    It expects P to be the first byte of a single alternative and END one
5243    byte past the last. The alternative can contain groups.  */
5244 
5245 static boolean
alt_match_null_string_p(p,end,reg_info)5246 alt_match_null_string_p (p, end, reg_info)
5247     unsigned char *p, *end;
5248     register_info_type *reg_info;
5249 {
5250   int mcnt;
5251   unsigned char *p1 = p;
5252 
5253   while (p1 < end)
5254     {
5255       /* Skip over opcodes that can match nothing, and break when we get
5256          to one that can't.  */
5257 
5258       switch ((re_opcode_t) *p1)
5259         {
5260 	/* It's a loop.  */
5261         case on_failure_jump:
5262           p1++;
5263           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5264           p1 += mcnt;
5265           break;
5266 
5267 	default:
5268           if (!common_op_match_null_string_p (&p1, end, reg_info))
5269             return false;
5270         }
5271     }  /* while p1 < end */
5272 
5273   return true;
5274 } /* alt_match_null_string_p */
5275 
5276 
5277 /* Deals with the ops common to group_match_null_string_p and
5278    alt_match_null_string_p.
5279 
5280    Sets P to one after the op and its arguments, if any.  */
5281 
5282 static boolean
common_op_match_null_string_p(p,end,reg_info)5283 common_op_match_null_string_p (p, end, reg_info)
5284     unsigned char **p, *end;
5285     register_info_type *reg_info;
5286 {
5287   int mcnt;
5288   boolean ret;
5289   int reg_no;
5290   unsigned char *p1 = *p;
5291 
5292   switch ((re_opcode_t) *p1++)
5293     {
5294     case no_op:
5295     case begline:
5296     case endline:
5297     case begbuf:
5298     case endbuf:
5299     case wordbeg:
5300     case wordend:
5301     case wordbound:
5302     case notwordbound:
5303 #ifdef emacs
5304     case before_dot:
5305     case at_dot:
5306     case after_dot:
5307 #endif
5308       break;
5309 
5310     case start_memory:
5311       reg_no = *p1;
5312       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5313       ret = group_match_null_string_p (&p1, end, reg_info);
5314 
5315       /* Have to set this here in case we're checking a group which
5316          contains a group and a back reference to it.  */
5317 
5318       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5319         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5320 
5321       if (!ret)
5322         return false;
5323       break;
5324 
5325     /* If this is an optimized succeed_n for zero times, make the jump.  */
5326     case jump:
5327       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5328       if (mcnt >= 0)
5329         p1 += mcnt;
5330       else
5331         return false;
5332       break;
5333 
5334     case succeed_n:
5335       /* Get to the number of times to succeed.  */
5336       p1 += 2;
5337       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5338 
5339       if (mcnt == 0)
5340         {
5341           p1 -= 4;
5342           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5343           p1 += mcnt;
5344         }
5345       else
5346         return false;
5347       break;
5348 
5349     case duplicate:
5350       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5351         return false;
5352       break;
5353 
5354     case set_number_at:
5355       p1 += 4;
5356 
5357     default:
5358       /* All other opcodes mean we cannot match the empty string.  */
5359       return false;
5360   }
5361 
5362   *p = p1;
5363   return true;
5364 } /* common_op_match_null_string_p */
5365 
5366 
5367 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5368    bytes; nonzero otherwise.  */
5369 
5370 static int
bcmp_translate(s1,s2,len,translate)5371 bcmp_translate (s1, s2, len, translate)
5372      const char *s1, *s2;
5373      register int len;
5374      RE_TRANSLATE_TYPE translate;
5375 {
5376   register const unsigned char *p1 = (const unsigned char *) s1;
5377   register const unsigned char *p2 = (const unsigned char *) s2;
5378   while (len)
5379     {
5380       if (translate[*p1++] != translate[*p2++]) return 1;
5381       len--;
5382     }
5383   return 0;
5384 }
5385 
5386 /* Entry points for GNU code.  */
5387 
5388 /* re_compile_pattern is the GNU regular expression compiler: it
5389    compiles PATTERN (of length SIZE) and puts the result in BUFP.
5390    Returns 0 if the pattern was valid, otherwise an error string.
5391 
5392    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5393    are set in BUFP on entry.
5394 
5395    We call regex_compile to do the actual compilation.  */
5396 
5397 const char *
re_compile_pattern(pattern,length,bufp)5398 re_compile_pattern (pattern, length, bufp)
5399      const char *pattern;
5400      size_t length;
5401      struct re_pattern_buffer *bufp;
5402 {
5403   reg_errcode_t ret;
5404 
5405   /* GNU code is written to assume at least RE_NREGS registers will be set
5406      (and at least one extra will be -1).  */
5407   bufp->regs_allocated = REGS_UNALLOCATED;
5408 
5409   /* And GNU code determines whether or not to get register information
5410      by passing null for the REGS argument to re_match, etc., not by
5411      setting no_sub.  */
5412   bufp->no_sub = 0;
5413 
5414   /* Match anchors at newline.  */
5415   bufp->newline_anchor = 1;
5416 
5417   ret = regex_compile (pattern, length, re_syntax_options, bufp);
5418 
5419   if (!ret)
5420     return NULL;
5421   return gettext (re_error_msgid[(int) ret]);
5422 }
5423 #ifdef _LIBC
5424 weak_alias (__re_compile_pattern, re_compile_pattern)
5425 #endif
5426 
5427 /* Entry points compatible with 4.2 BSD regex library.  We don't define
5428    them unless specifically requested.  */
5429 
5430 #if defined _REGEX_RE_COMP || defined _LIBC
5431 
5432 /* BSD has one and only one pattern buffer.  */
5433 static struct re_pattern_buffer re_comp_buf;
5434 
5435 char *
5436 #ifdef _LIBC
5437 /* Make these definitions weak in libc, so POSIX programs can redefine
5438    these names if they don't use our functions, and still use
5439    regcomp/regexec below without link errors.  */
5440 weak_function
5441 #endif
re_comp(s)5442 re_comp (s)
5443     const char *s;
5444 {
5445   reg_errcode_t ret;
5446 
5447   if (!s)
5448     {
5449       if (!re_comp_buf.buffer)
5450 	return gettext ("No previous regular expression");
5451       return 0;
5452     }
5453 
5454   if (!re_comp_buf.buffer)
5455     {
5456       re_comp_buf.buffer = (unsigned char *) malloc (200);
5457       if (re_comp_buf.buffer == NULL)
5458         return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5459       re_comp_buf.allocated = 200;
5460 
5461       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5462       if (re_comp_buf.fastmap == NULL)
5463 	return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
5464     }
5465 
5466   /* Since `re_exec' always passes NULL for the `regs' argument, we
5467      don't need to initialize the pattern buffer fields which affect it.  */
5468 
5469   /* Match anchors at newlines.  */
5470   re_comp_buf.newline_anchor = 1;
5471 
5472   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5473 
5474   if (!ret)
5475     return NULL;
5476 
5477   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5478   return (char *) gettext (re_error_msgid[(int) ret]);
5479 }
5480 
5481 
5482 int
5483 #ifdef _LIBC
5484 weak_function
5485 #endif
re_exec(s)5486 re_exec (s)
5487     const char *s;
5488 {
5489   const int len = strlen (s);
5490   return
5491     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5492 }
5493 
5494 #endif /* _REGEX_RE_COMP */
5495 
5496 /* POSIX.2 functions.  Don't define these for Emacs.  */
5497 
5498 #ifndef emacs
5499 
5500 /* regcomp takes a regular expression as a string and compiles it.
5501 
5502    PREG is a regex_t *.  We do not expect any fields to be initialized,
5503    since POSIX says we shouldn't.  Thus, we set
5504 
5505      `buffer' to the compiled pattern;
5506      `used' to the length of the compiled pattern;
5507      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5508        REG_EXTENDED bit in CFLAGS is set; otherwise, to
5509        RE_SYNTAX_POSIX_BASIC;
5510      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5511      `fastmap' to an allocated space for the fastmap;
5512      `fastmap_accurate' to 1;
5513      `re_nsub' to the number of subexpressions in PATTERN.
5514 
5515    PATTERN is the address of the pattern string.
5516 
5517    CFLAGS is a series of bits which affect compilation.
5518 
5519      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5520      use POSIX basic syntax.
5521 
5522      If REG_NEWLINE is set, then . and [^...] don't match newline.
5523      Also, regexec will try a match beginning after every newline.
5524 
5525      If REG_ICASE is set, then we considers upper- and lowercase
5526      versions of letters to be equivalent when matching.
5527 
5528      If REG_NOSUB is set, then when PREG is passed to regexec, that
5529      routine will report only success or failure, and nothing about the
5530      registers.
5531 
5532    It returns 0 if it succeeds, nonzero if it doesn't.  (See gnu-regex.h for
5533    the return codes and their meanings.)  */
5534 
5535 int
regcomp(preg,pattern,cflags)5536 regcomp (preg, pattern, cflags)
5537     regex_t *preg;
5538     const char *pattern;
5539     int cflags;
5540 {
5541   reg_errcode_t ret;
5542   reg_syntax_t syntax
5543     = (cflags & REG_EXTENDED) ?
5544       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5545 
5546   /* regex_compile will allocate the space for the compiled pattern.  */
5547   preg->buffer = 0;
5548   preg->allocated = 0;
5549   preg->used = 0;
5550 
5551   /* Try to allocate space for the fastmap.  */
5552   preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
5553 
5554   if (cflags & REG_ICASE)
5555     {
5556       unsigned i;
5557 
5558       preg->translate
5559 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5560 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5561       if (preg->translate == NULL)
5562         return (int) REG_ESPACE;
5563 
5564       /* Map uppercase characters to corresponding lowercase ones.  */
5565       for (i = 0; i < CHAR_SET_SIZE; i++)
5566         preg->translate[i] = TOLOWER (i);
5567     }
5568   else
5569     preg->translate = NULL;
5570 
5571   /* If REG_NEWLINE is set, newlines are treated differently.  */
5572   if (cflags & REG_NEWLINE)
5573     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5574       syntax &= ~RE_DOT_NEWLINE;
5575       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5576       /* It also changes the matching behavior.  */
5577       preg->newline_anchor = 1;
5578     }
5579   else
5580     preg->newline_anchor = 0;
5581 
5582   preg->no_sub = !!(cflags & REG_NOSUB);
5583 
5584   /* POSIX says a null character in the pattern terminates it, so we
5585      can use strlen here in compiling the pattern.  */
5586   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5587 
5588   /* POSIX doesn't distinguish between an unmatched open-group and an
5589      unmatched close-group: both are REG_EPAREN.  */
5590   if (ret == REG_ERPAREN) ret = REG_EPAREN;
5591 
5592   if (ret == REG_NOERROR && preg->fastmap)
5593     {
5594       /* Compute the fastmap now, since regexec cannot modify the pattern
5595         buffer.  */
5596       if (re_compile_fastmap (preg) == -2)
5597        {
5598          /* Some error occured while computing the fastmap, just forget
5599             about it.  */
5600          free (preg->fastmap);
5601          preg->fastmap = NULL;
5602        }
5603     }
5604 
5605   return (int) ret;
5606 }
5607 #ifdef _LIBC
5608 weak_alias (__regcomp, regcomp)
5609 #endif
5610 
5611 
5612 /* regexec searches for a given pattern, specified by PREG, in the
5613    string STRING.
5614 
5615    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5616    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5617    least NMATCH elements, and we set them to the offsets of the
5618    corresponding matched substrings.
5619 
5620    EFLAGS specifies `execution flags' which affect matching: if
5621    REG_NOTBOL is set, then ^ does not match at the beginning of the
5622    string; if REG_NOTEOL is set, then $ does not match at the end.
5623 
5624    We return 0 if we find a match and REG_NOMATCH if not.  */
5625 
5626 int
5627 regexec (preg, string, nmatch, pmatch, eflags)
5628     const regex_t *preg;
5629     const char *string;
5630     size_t nmatch;
5631     regmatch_t pmatch[];
5632     int eflags;
5633 {
5634   int ret;
5635   struct re_registers regs;
5636   regex_t private_preg;
5637   int len = strlen (string);
5638   boolean want_reg_info = !preg->no_sub && nmatch > 0;
5639 
5640   private_preg = *preg;
5641 
5642   private_preg.not_bol = !!(eflags & REG_NOTBOL);
5643   private_preg.not_eol = !!(eflags & REG_NOTEOL);
5644 
5645   /* The user has told us exactly how many registers to return
5646      information about, via `nmatch'.  We have to pass that on to the
5647      matching routines.  */
5648   private_preg.regs_allocated = REGS_FIXED;
5649 
5650   if (want_reg_info)
5651     {
5652       regs.num_regs = nmatch;
5653       regs.start = TALLOC (nmatch, regoff_t);
5654       regs.end = TALLOC (nmatch, regoff_t);
5655       if (regs.start == NULL || regs.end == NULL)
5656         return (int) REG_NOMATCH;
5657     }
5658 
5659   /* Perform the searching operation.  */
5660   ret = re_search (&private_preg, string, len,
5661                    /* start: */ 0, /* range: */ len,
5662                    want_reg_info ? &regs : (struct re_registers *) 0);
5663 
5664   /* Copy the register information to the POSIX structure.  */
5665   if (want_reg_info)
5666     {
5667       if (ret >= 0)
5668         {
5669           unsigned r;
5670 
5671           for (r = 0; r < nmatch; r++)
5672             {
5673               pmatch[r].rm_so = regs.start[r];
5674               pmatch[r].rm_eo = regs.end[r];
5675             }
5676         }
5677 
5678       /* If we needed the temporary register info, free the space now.  */
5679       free (regs.start);
5680       free (regs.end);
5681     }
5682 
5683   /* We want zero return to mean success, unlike `re_search'.  */
5684   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5685 }
5686 #ifdef _LIBC
5687 weak_alias (__regexec, regexec)
5688 #endif
5689 
5690 
5691 /* Returns a message corresponding to an error code, ERRCODE, returned
5692    from either regcomp or regexec.   We don't use PREG here.  */
5693 
5694 size_t
5695 regerror (errcode, preg, errbuf, errbuf_size)
5696     int errcode;
5697     const regex_t *preg;
5698     char *errbuf;
5699     size_t errbuf_size;
5700 {
5701   const char *msg;
5702   size_t msg_size;
5703   (void)preg;
5704 
5705   if (errcode < 0
5706       || errcode >= (int) (sizeof (re_error_msgid)
5707 			   / sizeof (re_error_msgid[0])))
5708     /* Only error codes returned by the rest of the code should be passed
5709        to this routine.  If we are given anything else, or if other regex
5710        code generates an invalid error code, then the program has a bug.
5711        Dump core so we can fix it.  */
5712     abort ();
5713 
5714   msg = gettext (re_error_msgid[errcode]);
5715 
5716   msg_size = strlen (msg) + 1; /* Includes the null.  */
5717 
5718   if (errbuf_size != 0)
5719     {
5720       if (msg_size > errbuf_size)
5721         {
5722 #if defined HAVE_MEMPCPY || defined _LIBC
5723 	  *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
5724 #else
5725           memcpy (errbuf, msg, errbuf_size - 1);
5726           errbuf[errbuf_size - 1] = 0;
5727 #endif
5728         }
5729       else
5730         memcpy (errbuf, msg, msg_size);
5731     }
5732 
5733   return msg_size;
5734 }
5735 #ifdef _LIBC
5736 weak_alias (__regerror, regerror)
5737 #endif
5738 
5739 
5740 /* Free dynamically allocated space used by PREG.  */
5741 
5742 void
5743 regfree (preg)
5744     regex_t *preg;
5745 {
5746   if (preg->buffer != NULL)
5747     free (preg->buffer);
5748   preg->buffer = NULL;
5749 
5750   preg->allocated = 0;
5751   preg->used = 0;
5752 
5753   if (preg->fastmap != NULL)
5754     free (preg->fastmap);
5755   preg->fastmap = NULL;
5756   preg->fastmap_accurate = 0;
5757 
5758   if (preg->translate != NULL)
5759     free (preg->translate);
5760   preg->translate = NULL;
5761 }
5762 #ifdef _LIBC
5763 weak_alias (__regfree, regfree)
5764 #endif
5765 
5766 #endif /* not emacs  */
5767