1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5 
6    Copyright (C) 1993 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21 
22 /* AIX requires this to be the first thing in the file. */
23 #if defined (_AIX) && !defined (REGEX_MALLOC)
24   #pragma alloca
25 #endif
26 
27 #define _GNU_SOURCE
28 
29 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
30 #include <sys/types.h>
31 #include <stdint.h>
32 
33 #ifdef HAVE_CONFIG_H
34 #include "config.h"
35 #endif
36 
37 /* The `emacs' switch turns on certain matching commands
38    that make sense only in Emacs. */
39 #ifdef emacs
40 
41 #include "lisp.h"
42 #include "buffer.h"
43 #include "syntax.h"
44 
45 /* Emacs uses `NULL' as a predicate.  */
46 #undef NULL
47 
48 #else  /* not emacs */
49 
50 /* We used to test for `BSTRING' here, but only GCC and Emacs define
51    `BSTRING', as far as I know, and neither of them use this code.  */
52 #if HAVE_STRING_H || STDC_HEADERS
53 #include <string.h>
54 #ifdef HAVE_STRINGS_H
55 #include <strings.h>
56 #endif
57 #ifndef bcmp
58 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
59 #endif
60 #ifndef bcopy
61 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
62 #endif
63 #ifndef bzero
64 #define bzero(s, n)	memset ((s), 0, (n))
65 #endif
66 #else
67 #include <strings.h>
68 #endif
69 
70 #ifdef STDC_HEADERS
71 #include <stdlib.h>
72 
73 #if defined(_WIN32) && defined(HAVE_MALLOC_H)
74 #include <malloc.h>
75 #endif
76 
77 #else
78 char *malloc ();
79 char *realloc ();
80 #endif
81 
82 
83 /* Define the syntax stuff for \<, \>, etc.  */
84 
85 /* This must be nonzero for the wordchar and notwordchar pattern
86    commands in re_match_2.  */
87 #ifndef Sword
88 #define Sword 1
89 #endif
90 
91 #ifdef SYNTAX_TABLE
92 
93 extern char *re_syntax_table;
94 
95 #else /* not SYNTAX_TABLE */
96 
97 /* How many characters in the character set.  */
98 #define CHAR_SET_SIZE 256
99 
100 static char re_syntax_table[CHAR_SET_SIZE];
101 
102 static void
init_syntax_once(void)103 init_syntax_once (void)
104 {
105    register int c;
106    static int done = 0;
107 
108    if (done)
109      return;
110 
111    bzero (re_syntax_table, sizeof re_syntax_table);
112 
113    for (c = 'a'; c <= 'z'; c++)
114      re_syntax_table[c] = Sword;
115 
116    for (c = 'A'; c <= 'Z'; c++)
117      re_syntax_table[c] = Sword;
118 
119    for (c = '0'; c <= '9'; c++)
120      re_syntax_table[c] = Sword;
121 
122    re_syntax_table['_'] = Sword;
123 
124    done = 1;
125 }
126 
127 #endif /* not SYNTAX_TABLE */
128 
129 #define SYNTAX(c) re_syntax_table[c]
130 
131 #endif /* not emacs */
132 
133 /* Get the interface, including the syntax bits.  */
134 #include "regex.h"
135 
136 /* isalpha etc. are used for the character classes.  */
137 #include <ctype.h>
138 
139 #ifndef isascii
140 #define isascii(c) 1
141 #endif
142 
143 #ifdef isblank
144 #define ISBLANK(c) (isascii (c) && isblank (c))
145 #else
146 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
147 #endif
148 #ifdef isgraph
149 #define ISGRAPH(c) (isascii (c) && isgraph (c))
150 #else
151 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
152 #endif
153 
154 #define ISPRINT(c) (isascii (c) && isprint (c))
155 #define ISDIGIT(c) (isascii (c) && isdigit (c))
156 #define ISALNUM(c) (isascii (c) && isalnum (c))
157 #define ISALPHA(c) (isascii (c) && isalpha (c))
158 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
159 #define ISLOWER(c) (isascii (c) && islower (c))
160 #define ISPUNCT(c) (isascii (c) && ispunct (c))
161 #define ISSPACE(c) (isascii (c) && isspace (c))
162 #define ISUPPER(c) (isascii (c) && isupper (c))
163 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
164 
165 #ifndef NULL
166 #define NULL 0
167 #endif
168 
169 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
170    since ours (we hope) works properly with all combinations of
171    machines, compilers, `char' and `unsigned char' argument types.
172    (Per Bothner suggested the basic approach.)  */
173 #undef SIGN_EXTEND_CHAR
174 #if __STDC__
175 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
176 #else  /* not __STDC__ */
177 /* As in Harbison and Steele.  */
178 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
179 #endif
180 
181 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
182    use `alloca' instead of `malloc'.  This is because using malloc in
183    re_search* or re_match* could cause memory leaks when C-g is used in
184    Emacs; also, malloc is slower and causes storage fragmentation.  On
185    the other hand, malloc is more portable, and easier to debug.
186 
187    Because we sometimes use alloca, some routines have to be macros,
188    not functions -- `alloca'-allocated space disappears at the end of the
189    function it is called in.  */
190 
191 #ifdef REGEX_MALLOC
192 
193 #define REGEX_ALLOCATE malloc
194 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
195 
196 #else /* not REGEX_MALLOC  */
197 
198 /* Emacs already defines alloca, sometimes.  */
199 #ifndef alloca
200 
201 /* Make alloca work the best possible way.  */
202 #ifdef __GNUC__
203 #define alloca __builtin_alloca
204 #else /* not __GNUC__ */
205 #if HAVE_ALLOCA_H
206 #include <alloca.h>
207 #else /* not __GNUC__ or HAVE_ALLOCA_H */
208 #ifndef _AIX /* Already did AIX, up at the top.  */
209 #ifndef _WIN32
210 char *alloca ();
211 #endif
212 #endif /* not _AIX */
213 #endif /* not HAVE_ALLOCA_H */
214 #endif /* not __GNUC__ */
215 
216 #endif /* not alloca */
217 
218 #define REGEX_ALLOCATE alloca
219 
220 /* Assumes a `char *destination' variable.  */
221 #define REGEX_REALLOCATE(source, osize, nsize)				\
222   (destination = (char *) alloca (nsize),				\
223    bcopy (source, destination, osize),					\
224    destination)
225 
226 #endif /* not REGEX_MALLOC */
227 
228 
229 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
230    `string1' or just past its end.  This works if PTR is NULL, which is
231    a good thing.  */
232 #define FIRST_STRING_P(ptr) 					\
233   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
234 
235 /* (Re)Allocate N items of type T using malloc, or fail.  */
236 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
237 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
238 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
239 
240 #define BYTEWIDTH 8 /* In bits.  */
241 
242 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
243 
244 #define MAX(a, b) ((a) > (b) ? (a) : (b))
245 #define MIN(a, b) ((a) < (b) ? (a) : (b))
246 
247 typedef char boolean;
248 #define false 0
249 #define true 1
250 
251 /* These are the command codes that appear in compiled regular
252    expressions.  Some opcodes are followed by argument bytes.  A
253    command code can specify any interpretation whatsoever for its
254    arguments.  Zero bytes may appear in the compiled regular expression.
255 
256    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
257    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
258    `exactn' we use here must also be 1.  */
259 
260 typedef enum
261 {
262   no_op = 0,
263 
264         /* Followed by one byte giving n, then by n literal bytes.  */
265   exactn = 1,
266 
267         /* Matches any (more or less) character.  */
268   anychar,
269 
270         /* Matches any one char belonging to specified set.  First
271            following byte is number of bitmap bytes.  Then come bytes
272            for a bitmap saying which chars are in.  Bits in each byte
273            are ordered low-bit-first.  A character is in the set if its
274            bit is 1.  A character too large to have a bit in the map is
275            automatically not in the set.  */
276   charset,
277 
278         /* Same parameters as charset, but match any character that is
279            not one of those specified.  */
280   charset_not,
281 
282         /* Start remembering the text that is matched, for storing in a
283            register.  Followed by one byte with the register number, in
284            the range 0 to one less than the pattern buffer's re_nsub
285            field.  Then followed by one byte with the number of groups
286            inner to this one.  (This last has to be part of the
287            start_memory only because we need it in the on_failure_jump
288            of re_match_2.)  */
289   start_memory,
290 
291         /* Stop remembering the text that is matched and store it in a
292            memory register.  Followed by one byte with the register
293            number, in the range 0 to one less than `re_nsub' in the
294            pattern buffer, and one byte with the number of inner groups,
295            just like `start_memory'.  (We need the number of inner
296            groups here because we don't have any easy way of finding the
297            corresponding start_memory when we're at a stop_memory.)  */
298   stop_memory,
299 
300         /* Match a duplicate of something remembered. Followed by one
301            byte containing the register number.  */
302   duplicate,
303 
304         /* Fail unless at beginning of line.  */
305   begline,
306 
307         /* Fail unless at end of line.  */
308   endline,
309 
310         /* Succeeds if at beginning of buffer (if emacs) or at beginning
311            of string to be matched (if not).  */
312   begbuf,
313 
314         /* Analogously, for end of buffer/string.  */
315   endbuf,
316 
317         /* Followed by two byte relative address to which to jump.  */
318   jump,
319 
320 	/* Same as jump, but marks the end of an alternative.  */
321   jump_past_alt,
322 
323         /* Followed by two-byte relative address of place to resume at
324            in case of failure.  */
325   on_failure_jump,
326 
327         /* Like on_failure_jump, but pushes a placeholder instead of the
328            current string position when executed.  */
329   on_failure_keep_string_jump,
330 
331         /* Throw away latest failure point and then jump to following
332            two-byte relative address.  */
333   pop_failure_jump,
334 
335         /* Change to pop_failure_jump if know won't have to backtrack to
336            match; otherwise change to jump.  This is used to jump
337            back to the beginning of a repeat.  If what follows this jump
338            clearly won't match what the repeat does, such that we can be
339            sure that there is no use backtracking out of repetitions
340            already matched, then we change it to a pop_failure_jump.
341            Followed by two-byte address.  */
342   maybe_pop_jump,
343 
344         /* Jump to following two-byte address, and push a dummy failure
345            point. This failure point will be thrown away if an attempt
346            is made to use it for a failure.  A `+' construct makes this
347            before the first repeat.  Also used as an intermediary kind
348            of jump when compiling an alternative.  */
349   dummy_failure_jump,
350 
351 	/* Push a dummy failure point and continue.  Used at the end of
352 	   alternatives.  */
353   push_dummy_failure,
354 
355         /* Followed by two-byte relative address and two-byte number n.
356            After matching N times, jump to the address upon failure.  */
357   succeed_n,
358 
359         /* Followed by two-byte relative address, and two-byte number n.
360            Jump to the address N times, then fail.  */
361   jump_n,
362 
363         /* Set the following two-byte relative address to the
364            subsequent two-byte number.  The address *includes* the two
365            bytes of number.  */
366   set_number_at,
367 
368   wordchar,	/* Matches any word-constituent character.  */
369   notwordchar,	/* Matches any char that is not a word-constituent.  */
370 
371   wordbeg,	/* Succeeds if at word beginning.  */
372   wordend,	/* Succeeds if at word end.  */
373 
374   wordbound,	/* Succeeds if at a word boundary.  */
375   notwordbound	/* Succeeds if not at a word boundary.  */
376 
377 #ifdef emacs
378   ,before_dot,	/* Succeeds if before point.  */
379   at_dot,	/* Succeeds if at point.  */
380   after_dot,	/* Succeeds if after point.  */
381 
382 	/* Matches any character whose syntax is specified.  Followed by
383            a byte which contains a syntax code, e.g., Sword.  */
384   syntaxspec,
385 
386 	/* Matches any character whose syntax is not that specified.  */
387   notsyntaxspec
388 #endif /* emacs */
389 } re_opcode_t;
390 
391 /* Common operations on the compiled pattern.  */
392 
393 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
394 
395 #define STORE_NUMBER(destination, number)				\
396   do {									\
397     (destination)[0] = (number) & 0377;					\
398     (destination)[1] = (number) >> 8;					\
399   } while (0)
400 
401 /* Same as STORE_NUMBER, except increment DESTINATION to
402    the byte after where the number is stored.  Therefore, DESTINATION
403    must be an lvalue.  */
404 
405 #define STORE_NUMBER_AND_INCR(destination, number)			\
406   do {									\
407     STORE_NUMBER (destination, number);					\
408     (destination) += 2;							\
409   } while (0)
410 
411 /* Put into DESTINATION a number stored in two contiguous bytes starting
412    at SOURCE.  */
413 
414 #define EXTRACT_NUMBER(destination, source)				\
415   do {									\
416     (destination) = *(source) & 0377;					\
417     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
418   } while (0)
419 
420 #ifdef DEBUG
421 static void
extract_number(dest,source)422 extract_number (dest, source)
423     int *dest;
424     unsigned char *source;
425 {
426   int temp = SIGN_EXTEND_CHAR (*(source + 1));
427   *dest = *source & 0377;
428   *dest += temp << 8;
429 }
430 
431 #ifndef EXTRACT_MACROS /* To debug the macros.  */
432 #undef EXTRACT_NUMBER
433 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
434 #endif /* not EXTRACT_MACROS */
435 
436 #endif /* DEBUG */
437 
438 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
439    SOURCE must be an lvalue.  */
440 
441 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
442   do {									\
443     EXTRACT_NUMBER (destination, source);				\
444     (source) += 2; 							\
445   } while (0)
446 
447 #ifdef DEBUG
448 static void
extract_number_and_incr(destination,source)449 extract_number_and_incr (destination, source)
450     int *destination;
451     unsigned char **source;
452 {
453   extract_number (destination, *source);
454   *source += 2;
455 }
456 
457 #ifndef EXTRACT_MACROS
458 #undef EXTRACT_NUMBER_AND_INCR
459 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
460   extract_number_and_incr (&dest, &src)
461 #endif /* not EXTRACT_MACROS */
462 
463 #endif /* DEBUG */
464 
465 /* If DEBUG is defined, Regex prints many voluminous messages about what
466    it is doing (if the variable `debug' is nonzero).  If linked with the
467    main program in `iregex.c', you can enter patterns and strings
468    interactively.  And if linked with the main program in `main.c' and
469    the other test files, you can run the already-written tests.  */
470 
471 #ifdef DEBUG
472 
473 /* We use standard I/O for debugging.  */
474 #include <stdio.h>
475 
476 /* It is useful to test things that ``must'' be true when debugging.  */
477 #include <assert.h>
478 
479 static int debug = 0;
480 
481 #define DEBUG_STATEMENT(e) e
482 #define DEBUG_PRINT1(x) if (debug) printf (x)
483 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
484 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
485 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
486 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
487   if (debug) print_partial_compiled_pattern (s, e)
488 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
489   if (debug) print_double_string (w, s1, sz1, s2, sz2)
490 
491 
492 extern void printchar ();
493 
494 /* Print the fastmap in human-readable form.  */
495 
496 void
print_fastmap(fastmap)497 print_fastmap (fastmap)
498     char *fastmap;
499 {
500   unsigned was_a_range = 0;
501   unsigned i = 0;
502 
503   while (i < (1 << BYTEWIDTH))
504     {
505       if (fastmap[i++])
506 	{
507 	  was_a_range = 0;
508           printchar (i - 1);
509           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
510             {
511               was_a_range = 1;
512               i++;
513             }
514 	  if (was_a_range)
515             {
516               printf ("-");
517               printchar (i - 1);
518             }
519         }
520     }
521   putchar ('\n');
522 }
523 
524 
525 /* Print a compiled pattern string in human-readable form, starting at
526    the START pointer into it and ending just before the pointer END.  */
527 
528 void
print_partial_compiled_pattern(start,end)529 print_partial_compiled_pattern (start, end)
530     unsigned char *start;
531     unsigned char *end;
532 {
533   int mcnt, mcnt2;
534   unsigned char *p = start;
535   unsigned char *pend = end;
536 
537   if (start == NULL)
538     {
539       printf ("(null)\n");
540       return;
541     }
542 
543   /* Loop over pattern commands.  */
544   while (p < pend)
545     {
546       switch ((re_opcode_t) *p++)
547 	{
548         case no_op:
549           printf ("/no_op");
550           break;
551 
552 	case exactn:
553 	  mcnt = *p++;
554           printf ("/exactn/%d", mcnt);
555           do
556 	    {
557               putchar ('/');
558 	      printchar (*p++);
559             }
560           while (--mcnt);
561           break;
562 
563 	case start_memory:
564           mcnt = *p++;
565           printf ("/start_memory/%d/%d", mcnt, *p++);
566           break;
567 
568 	case stop_memory:
569           mcnt = *p++;
570 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
571           break;
572 
573 	case duplicate:
574 	  printf ("/duplicate/%d", *p++);
575 	  break;
576 
577 	case anychar:
578 	  printf ("/anychar");
579 	  break;
580 
581 	case charset:
582         case charset_not:
583           {
584             register int c;
585 
586             printf ("/charset%s",
587 	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
588 
589             assert (p + *p < pend);
590 
591             for (c = 0; c < *p; c++)
592               {
593                 unsigned bit;
594                 unsigned char map_byte = p[1 + c];
595 
596                 putchar ('/');
597 
598 		for (bit = 0; bit < BYTEWIDTH; bit++)
599                   if (map_byte & (1 << bit))
600                     printchar (c * BYTEWIDTH + bit);
601               }
602 	    p += 1 + *p;
603 	    break;
604 	  }
605 
606 	case begline:
607 	  printf ("/begline");
608           break;
609 
610 	case endline:
611           printf ("/endline");
612           break;
613 
614 	case on_failure_jump:
615           extract_number_and_incr (&mcnt, &p);
616   	  printf ("/on_failure_jump/0/%d", mcnt);
617           break;
618 
619 	case on_failure_keep_string_jump:
620           extract_number_and_incr (&mcnt, &p);
621   	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
622           break;
623 
624 	case dummy_failure_jump:
625           extract_number_and_incr (&mcnt, &p);
626   	  printf ("/dummy_failure_jump/0/%d", mcnt);
627           break;
628 
629 	case push_dummy_failure:
630           printf ("/push_dummy_failure");
631           break;
632 
633         case maybe_pop_jump:
634           extract_number_and_incr (&mcnt, &p);
635   	  printf ("/maybe_pop_jump/0/%d", mcnt);
636 	  break;
637 
638         case pop_failure_jump:
639 	  extract_number_and_incr (&mcnt, &p);
640   	  printf ("/pop_failure_jump/0/%d", mcnt);
641 	  break;
642 
643         case jump_past_alt:
644 	  extract_number_and_incr (&mcnt, &p);
645   	  printf ("/jump_past_alt/0/%d", mcnt);
646 	  break;
647 
648         case jump:
649 	  extract_number_and_incr (&mcnt, &p);
650   	  printf ("/jump/0/%d", mcnt);
651 	  break;
652 
653         case succeed_n:
654           extract_number_and_incr (&mcnt, &p);
655           extract_number_and_incr (&mcnt2, &p);
656  	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
657           break;
658 
659         case jump_n:
660           extract_number_and_incr (&mcnt, &p);
661           extract_number_and_incr (&mcnt2, &p);
662  	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
663           break;
664 
665         case set_number_at:
666           extract_number_and_incr (&mcnt, &p);
667           extract_number_and_incr (&mcnt2, &p);
668  	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
669           break;
670 
671         case wordbound:
672 	  printf ("/wordbound");
673 	  break;
674 
675 	case notwordbound:
676 	  printf ("/notwordbound");
677           break;
678 
679 	case wordbeg:
680 	  printf ("/wordbeg");
681 	  break;
682 
683 	case wordend:
684 	  printf ("/wordend");
685 
686 #ifdef emacs
687 	case before_dot:
688 	  printf ("/before_dot");
689           break;
690 
691 	case at_dot:
692 	  printf ("/at_dot");
693           break;
694 
695 	case after_dot:
696 	  printf ("/after_dot");
697           break;
698 
699 	case syntaxspec:
700           printf ("/syntaxspec");
701 	  mcnt = *p++;
702 	  printf ("/%d", mcnt);
703           break;
704 
705 	case notsyntaxspec:
706           printf ("/notsyntaxspec");
707 	  mcnt = *p++;
708 	  printf ("/%d", mcnt);
709 	  break;
710 #endif /* emacs */
711 
712 	case wordchar:
713 	  printf ("/wordchar");
714           break;
715 
716 	case notwordchar:
717 	  printf ("/notwordchar");
718           break;
719 
720 	case begbuf:
721 	  printf ("/begbuf");
722           break;
723 
724 	case endbuf:
725 	  printf ("/endbuf");
726           break;
727 
728         default:
729           printf ("?%d", *(p-1));
730 	}
731     }
732   printf ("/\n");
733 }
734 
735 
736 void
print_compiled_pattern(bufp)737 print_compiled_pattern (bufp)
738     struct re_pattern_buffer *bufp;
739 {
740   unsigned char *buffer = bufp->buffer;
741 
742   print_partial_compiled_pattern (buffer, buffer + bufp->used);
743   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
744 
745   if (bufp->fastmap_accurate && bufp->fastmap)
746     {
747       printf ("fastmap: ");
748       print_fastmap (bufp->fastmap);
749     }
750 
751   printf ("re_nsub: %d\t", bufp->re_nsub);
752   printf ("regs_alloc: %d\t", bufp->regs_allocated);
753   printf ("can_be_null: %d\t", bufp->can_be_null);
754   printf ("newline_anchor: %d\n", bufp->newline_anchor);
755   printf ("no_sub: %d\t", bufp->no_sub);
756   printf ("not_bol: %d\t", bufp->not_bol);
757   printf ("not_eol: %d\t", bufp->not_eol);
758   printf ("syntax: %d\n", bufp->syntax);
759   /* Perhaps we should print the translate table?  */
760 }
761 
762 
763 void
print_double_string(where,string1,size1,string2,size2)764 print_double_string (where, string1, size1, string2, size2)
765     const char *where;
766     const char *string1;
767     const char *string2;
768     int size1;
769     int size2;
770 {
771   unsigned this_char;
772 
773   if (where == NULL)
774     printf ("(null)");
775   else
776     {
777       if (FIRST_STRING_P (where))
778         {
779           for (this_char = where - string1; this_char < size1; this_char++)
780             printchar (string1[this_char]);
781 
782           where = string2;
783         }
784 
785       for (this_char = where - string2; this_char < size2; this_char++)
786         printchar (string2[this_char]);
787     }
788 }
789 
790 #else /* not DEBUG */
791 
792 #undef assert
793 #define assert(e)
794 
795 #define DEBUG_STATEMENT(e)
796 #define DEBUG_PRINT1(x)
797 #define DEBUG_PRINT2(x1, x2)
798 #define DEBUG_PRINT3(x1, x2, x3)
799 #define DEBUG_PRINT4(x1, x2, x3, x4)
800 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
801 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
802 
803 #endif /* not DEBUG */
804 
805 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
806    also be assigned to arbitrarily: each pattern buffer stores its own
807    syntax, so it can be changed between regex compilations.  */
808 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
809 
810 
811 /* Specify the precise syntax of regexps for compilation.  This provides
812    for compatibility for various utilities which historically have
813    different, incompatible syntaxes.
814 
815    The argument SYNTAX is a bit mask comprised of the various bits
816    defined in regex.h.  We return the old syntax.  */
817 
818 reg_syntax_t
re_set_syntax(syntax)819 re_set_syntax (syntax)
820     reg_syntax_t syntax;
821 {
822   reg_syntax_t ret = re_syntax_options;
823 
824   re_syntax_options = syntax;
825   return ret;
826 }
827 
828 /* This table gives an error message for each of the error codes listed
829    in regex.h.  Obviously the order here has to be same as there.  */
830 
831 static const char *re_error_msg[] =
832   { NULL,					/* REG_NOERROR */
833     "No match",					/* REG_NOMATCH */
834     "Invalid regular expression",		/* REG_BADPAT */
835     "Invalid collation character",		/* REG_ECOLLATE */
836     "Invalid character class name",		/* REG_ECTYPE */
837     "Trailing backslash",			/* REG_EESCAPE */
838     "Invalid back reference",			/* REG_ESUBREG */
839     "Unmatched [ or [^",			/* REG_EBRACK */
840     "Unmatched ( or \\(",			/* REG_EPAREN */
841     "Unmatched \\{",				/* REG_EBRACE */
842     "Invalid content of \\{\\}",		/* REG_BADBR */
843     "Invalid range end",			/* REG_ERANGE */
844     "Memory exhausted",				/* REG_ESPACE */
845     "Invalid preceding regular expression",	/* REG_BADRPT */
846     "Premature end of regular expression",	/* REG_EEND */
847     "Regular expression too big",		/* REG_ESIZE */
848     "Unmatched ) or \\)",			/* REG_ERPAREN */
849   };
850 
851 /* Subroutine declarations and macros for regex_compile.  */
852 
853 
854 typedef struct
855 {
856   struct compile_stack_elt_t *stack;
857   unsigned size;
858   unsigned avail;			/* Offset of next open position.  */
859 } compile_stack_type;
860 
861 /* But patterns can have more than `MAX_REGNUM' registers.  We just
862    ignore the excess.  */
863 typedef unsigned regnum_t;
864 
865 static void store_op1 (re_opcode_t op, unsigned char *loc, int arg), store_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2);
866 static void insert_op1 (re_opcode_t op, unsigned char *loc, int arg, unsigned char *end), insert_op2 (re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end);
867 static boolean at_begline_loc_p (const char *pattern, const char *p, reg_syntax_t syntax), at_endline_loc_p (const char *pattern, const char *p, int syntax);
868 static boolean group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum);
869 static reg_errcode_t compile_range (const char **p_ptr, const char *pend, char *translate, reg_syntax_t syntax, unsigned char *b);
870 
871 /* Fetch the next character in the uncompiled pattern---translating it
872    if necessary.  Also cast from a signed character in the constant
873    string passed to us by the user to an unsigned char that we can use
874    as an array index (in, e.g., `translate').  */
875 #define PATFETCH(c)							\
876   do {if (p == pend) return REG_EEND;					\
877     c = (unsigned char) *p++;						\
878     if (translate) c = translate[c]; 					\
879   } while (0)
880 
881 /* Fetch the next character in the uncompiled pattern, with no
882    translation.  */
883 #define PATFETCH_RAW(c)							\
884   do {if (p == pend) return REG_EEND;					\
885     c = (unsigned char) *p++; 						\
886   } while (0)
887 
888 /* Go backwards one character in the pattern.  */
889 #define PATUNFETCH p--
890 
891 
892 /* If `translate' is non-null, return translate[D], else just D.  We
893    cast the subscript to translate because some data is declared as
894    `char *', to avoid warnings when a string constant is passed.  But
895    when we use a character as a subscript we must make it unsigned.  */
896 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
897 
898 
899 /* Macros for outputting the compiled pattern into `buffer'.  */
900 
901 /* If the buffer isn't allocated when it comes in, use this.  */
902 #define INIT_BUF_SIZE  32
903 
904 /* Make sure we have at least N more bytes of space in buffer.  */
905 #define GET_BUFFER_SPACE(n)						\
906     while ((unsigned long)(b - bufp->buffer + (n)) > bufp->allocated) \
907       EXTEND_BUFFER ()
908 
909 /* Make sure we have one more byte of buffer space and then add C to it.  */
910 #define BUF_PUSH(c)							\
911   do {									\
912     GET_BUFFER_SPACE (1);						\
913     *b++ = (unsigned char) (c);						\
914   } while (0)
915 
916 
917 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
918 #define BUF_PUSH_2(c1, c2)						\
919   do {									\
920     GET_BUFFER_SPACE (2);						\
921     *b++ = (unsigned char) (c1);					\
922     *b++ = (unsigned char) (c2);					\
923   } while (0)
924 
925 
926 /* As with BUF_PUSH_2, except for three bytes.  */
927 #define BUF_PUSH_3(c1, c2, c3)						\
928   do {									\
929     GET_BUFFER_SPACE (3);						\
930     *b++ = (unsigned char) (c1);					\
931     *b++ = (unsigned char) (c2);					\
932     *b++ = (unsigned char) (c3);					\
933   } while (0)
934 
935 
936 /* Store a jump with opcode OP at LOC to location TO.  We store a
937    relative address offset by the three bytes the jump itself occupies.  */
938 #define STORE_JUMP(op, loc, to) \
939   store_op1 (op, loc, (to) - (loc) - 3)
940 
941 /* Likewise, for a two-argument jump.  */
942 #define STORE_JUMP2(op, loc, to, arg) \
943   store_op2 (op, loc, (to) - (loc) - 3, arg)
944 
945 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
946 #define INSERT_JUMP(op, loc, to) \
947   insert_op1 (op, loc, (to) - (loc) - 3, b)
948 
949 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
950 #define INSERT_JUMP2(op, loc, to, arg) \
951   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
952 
953 
954 /* This is not an arbitrary limit: the arguments which represent offsets
955    into the pattern are two bytes long.  So if 2^16 bytes turns out to
956    be too small, many things would have to change.  */
957 #define MAX_BUF_SIZE (1L << 16)
958 
959 
960 /* Extend the buffer by twice its current size via realloc and
961    reset the pointers that pointed into the old block to point to the
962    correct places in the new one.  If extending the buffer results in it
963    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
964 #define EXTEND_BUFFER()							\
965   do { 									\
966     unsigned char *old_buffer = bufp->buffer;				\
967     if (bufp->allocated == MAX_BUF_SIZE) 				\
968       return REG_ESIZE;							\
969     bufp->allocated <<= 1;						\
970     if (bufp->allocated > MAX_BUF_SIZE)					\
971       bufp->allocated = MAX_BUF_SIZE; 					\
972     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
973     if (bufp->buffer == NULL)						\
974       return REG_ESPACE;						\
975     /* If the buffer moved, move all the pointers into it.  */		\
976     if (old_buffer != bufp->buffer)					\
977       {									\
978         b = (b - old_buffer) + bufp->buffer;				\
979         begalt = (begalt - old_buffer) + bufp->buffer;			\
980         if (fixup_alt_jump)						\
981           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
982         if (laststart)							\
983           laststart = (laststart - old_buffer) + bufp->buffer;		\
984         if (pending_exact)						\
985           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
986       }									\
987   } while (0)
988 
989 
990 /* Since we have one byte reserved for the register number argument to
991    {start,stop}_memory, the maximum number of groups we can report
992    things about is what fits in that byte.  */
993 #define MAX_REGNUM 255
994 
995 
996 /* Macros for the compile stack.  */
997 
998 /* Since offsets can go either forwards or backwards, this type needs to
999    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1000 typedef int pattern_offset_t;
1001 
1002 typedef struct compile_stack_elt_t
1003 {
1004   pattern_offset_t begalt_offset;
1005   pattern_offset_t fixup_alt_jump;
1006   pattern_offset_t inner_group_offset;
1007   pattern_offset_t laststart_offset;
1008   regnum_t regnum;
1009 } compile_stack_elt_t;
1010 
1011 
1012 #define INIT_COMPILE_STACK_SIZE 32
1013 
1014 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1015 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1016 
1017 /* The next available element.  */
1018 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1019 
1020 
1021 /* Set the bit for character C in a list.  */
1022 #define SET_LIST_BIT(c)                               \
1023   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1024    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1025 
1026 
1027 /* Get the next unsigned number in the uncompiled pattern.  */
1028 #define GET_UNSIGNED_NUMBER(num) 					\
1029   { if (p != pend)							\
1030      {									\
1031        PATFETCH (c); 							\
1032        while (ISDIGIT (c)) 						\
1033          { 								\
1034            if (num < 0)							\
1035               num = 0;							\
1036            num = num * 10 + c - '0'; 					\
1037            if (p == pend) 						\
1038               break; 							\
1039            PATFETCH (c);						\
1040          } 								\
1041        } 								\
1042     }
1043 
1044 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1045 
1046 #define IS_CHAR_CLASS(string)						\
1047    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1048     || STREQ (string, "lower") || STREQ (string, "digit")		\
1049     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1050     || STREQ (string, "space") || STREQ (string, "print")		\
1051     || STREQ (string, "punct") || STREQ (string, "graph")		\
1052     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1053 
1054 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1055    Returns one of error codes defined in `regex.h', or zero for success.
1056 
1057    Assumes the `allocated' (and perhaps `buffer') and `translate'
1058    fields are set in BUFP on entry.
1059 
1060    If it succeeds, results are put in BUFP (if it returns an error, the
1061    contents of BUFP are undefined):
1062      `buffer' is the compiled pattern;
1063      `syntax' is set to SYNTAX;
1064      `used' is set to the length of the compiled pattern;
1065      `fastmap_accurate' is zero;
1066      `re_nsub' is the number of subexpressions in PATTERN;
1067      `not_bol' and `not_eol' are zero;
1068 
1069    The `fastmap' and `newline_anchor' fields are neither
1070    examined nor set.  */
1071 
1072 static reg_errcode_t regex_compile (const char *pattern, int size, reg_syntax_t syntax, struct re_pattern_buffer *bufp);
1073 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1074 regex_compile (pattern, size, syntax, bufp)
1075      const char *pattern;
1076      int size;
1077      reg_syntax_t syntax;
1078      struct re_pattern_buffer *bufp;
1079 {
1080   /* We fetch characters from PATTERN here.  Even though PATTERN is
1081      `char *' (i.e., signed), we declare these variables as unsigned, so
1082      they can be reliably used as array indices.  */
1083   register unsigned char c, c1;
1084 
1085   /* A random tempory spot in PATTERN.  */
1086   const char *p1;
1087 
1088   /* Points to the end of the buffer, where we should append.  */
1089   register unsigned char *b;
1090 
1091   /* Keeps track of unclosed groups.  */
1092   compile_stack_type compile_stack;
1093 
1094   /* Points to the current (ending) position in the pattern.  */
1095   const char *p = pattern;
1096   const char *pend = pattern + size;
1097 
1098   /* How to translate the characters in the pattern.  */
1099   char *translate = bufp->translate;
1100 
1101   /* Address of the count-byte of the most recently inserted `exactn'
1102      command.  This makes it possible to tell if a new exact-match
1103      character can be added to that command or if the character requires
1104      a new `exactn' command.  */
1105   unsigned char *pending_exact = 0;
1106 
1107   /* Address of start of the most recently finished expression.
1108      This tells, e.g., postfix * where to find the start of its
1109      operand.  Reset at the beginning of groups and alternatives.  */
1110   unsigned char *laststart = 0;
1111 
1112   /* Address of beginning of regexp, or inside of last group.  */
1113   unsigned char *begalt;
1114 
1115   /* Place in the uncompiled pattern (i.e., the {) to
1116      which to go back if the interval is invalid.  */
1117   const char *beg_interval;
1118 
1119   /* Address of the place where a forward jump should go to the end of
1120      the containing expression.  Each alternative of an `or' -- except the
1121      last -- ends with a forward jump of this sort.  */
1122   unsigned char *fixup_alt_jump = 0;
1123 
1124   /* Counts open-groups as they are encountered.  Remembered for the
1125      matching close-group on the compile stack, so the same register
1126      number is put in the stop_memory as the start_memory.  */
1127   regnum_t regnum = 0;
1128 
1129 #ifdef DEBUG
1130   DEBUG_PRINT1 ("\nCompiling pattern: ");
1131   if (debug)
1132     {
1133       unsigned debug_count;
1134 
1135       for (debug_count = 0; debug_count < size; debug_count++)
1136         printchar (pattern[debug_count]);
1137       putchar ('\n');
1138     }
1139 #endif /* DEBUG */
1140 
1141   /* Initialize the compile stack.  */
1142   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1143   if (compile_stack.stack == NULL)
1144     return REG_ESPACE;
1145 
1146   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1147   compile_stack.avail = 0;
1148 
1149   /* Initialize the pattern buffer.  */
1150   bufp->syntax = syntax;
1151   bufp->fastmap_accurate = 0;
1152   bufp->not_bol = bufp->not_eol = 0;
1153 
1154   /* Set `used' to zero, so that if we return an error, the pattern
1155      printer (for debugging) will think there's no pattern.  We reset it
1156      at the end.  */
1157   bufp->used = 0;
1158 
1159   /* Always count groups, whether or not bufp->no_sub is set.  */
1160   bufp->re_nsub = 0;
1161 
1162 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1163   /* Initialize the syntax table.  */
1164    init_syntax_once ();
1165 #endif
1166 
1167   if (bufp->allocated == 0)
1168     {
1169       if (bufp->buffer)
1170 	{ /* If zero allocated, but buffer is non-null, try to realloc
1171              enough space.  This loses if buffer's address is bogus, but
1172              that is the user's responsibility.  */
1173           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1174         }
1175       else
1176         { /* Caller did not allocate a buffer.  Do it for them.  */
1177           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1178         }
1179       if (!bufp->buffer) return REG_ESPACE;
1180 
1181       bufp->allocated = INIT_BUF_SIZE;
1182     }
1183 
1184   begalt = b = bufp->buffer;
1185 
1186   /* Loop through the uncompiled pattern until we're at the end.  */
1187   while (p != pend)
1188     {
1189       PATFETCH (c);
1190 
1191       switch (c)
1192         {
1193         case '^':
1194           {
1195             if (   /* If at start of pattern, it's an operator.  */
1196                    p == pattern + 1
1197                    /* If context independent, it's an operator.  */
1198                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1199                    /* Otherwise, depends on what's come before.  */
1200                 || at_begline_loc_p (pattern, p, syntax))
1201               BUF_PUSH (begline);
1202             else
1203               goto normal_char;
1204           }
1205           break;
1206 
1207 
1208         case '$':
1209           {
1210             if (   /* If at end of pattern, it's an operator.  */
1211                    p == pend
1212                    /* If context independent, it's an operator.  */
1213                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1214                    /* Otherwise, depends on what's next.  */
1215                 || at_endline_loc_p (p, pend, syntax))
1216                BUF_PUSH (endline);
1217              else
1218                goto normal_char;
1219            }
1220            break;
1221 
1222 
1223 	case '+':
1224         case '?':
1225           if ((syntax & RE_BK_PLUS_QM)
1226               || (syntax & RE_LIMITED_OPS))
1227             goto normal_char;
1228         handle_plus:
1229         case '*':
1230           /* If there is no previous pattern... */
1231           if (!laststart)
1232             {
1233               if (syntax & RE_CONTEXT_INVALID_OPS)
1234                 return REG_BADRPT;
1235               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1236                 goto normal_char;
1237             }
1238 
1239           {
1240             /* Are we optimizing this jump?  */
1241             boolean keep_string_p = false;
1242 
1243             /* 1 means zero (many) matches is allowed.  */
1244             char zero_times_ok = 0, many_times_ok = 0;
1245 
1246             /* If there is a sequence of repetition chars, collapse it
1247                down to just one (the right one).  We can't combine
1248                interval operators with these because of, e.g., `a{2}*',
1249                which should only match an even number of `a's.  */
1250 
1251             for (;;)
1252               {
1253                 zero_times_ok |= c != '+';
1254                 many_times_ok |= c != '?';
1255 
1256                 if (p == pend)
1257                   break;
1258 
1259                 PATFETCH (c);
1260 
1261                 if (c == '*'
1262                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1263                   ;
1264 
1265                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1266                   {
1267                     if (p == pend) return REG_EESCAPE;
1268 
1269                     PATFETCH (c1);
1270                     if (!(c1 == '+' || c1 == '?'))
1271                       {
1272                         PATUNFETCH;
1273                         PATUNFETCH;
1274                         break;
1275                       }
1276 
1277                     c = c1;
1278                   }
1279                 else
1280                   {
1281                     PATUNFETCH;
1282                     break;
1283                   }
1284 
1285                 /* If we get here, we found another repeat character.  */
1286                }
1287 
1288             /* Star, etc. applied to an empty pattern is equivalent
1289                to an empty pattern.  */
1290             if (!laststart)
1291               break;
1292 
1293             /* Now we know whether or not zero matches is allowed
1294                and also whether or not two or more matches is allowed.  */
1295             if (many_times_ok)
1296               { /* More than one repetition is allowed, so put in at the
1297                    end a backward relative jump from `b' to before the next
1298                    jump we're going to put in below (which jumps from
1299                    laststart to after this jump).
1300 
1301                    But if we are at the `*' in the exact sequence `.*\n',
1302                    insert an unconditional jump backwards to the .,
1303                    instead of the beginning of the loop.  This way we only
1304                    push a failure point once, instead of every time
1305                    through the loop.  */
1306                 assert (p - 1 > pattern);
1307 
1308                 /* Allocate the space for the jump.  */
1309                 GET_BUFFER_SPACE (3);
1310 
1311                 /* We know we are not at the first character of the pattern,
1312                    because laststart was nonzero.  And we've already
1313                    incremented `p', by the way, to be the character after
1314                    the `*'.  Do we have to do something analogous here
1315                    for null bytes, because of RE_DOT_NOT_NULL?  */
1316                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1317 		    && zero_times_ok
1318                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1319                     && !(syntax & RE_DOT_NEWLINE))
1320                   { /* We have .*\n.  */
1321                     STORE_JUMP (jump, b, laststart);
1322                     keep_string_p = true;
1323                   }
1324                 else
1325                   /* Anything else.  */
1326                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1327 
1328                 /* We've added more stuff to the buffer.  */
1329                 b += 3;
1330               }
1331 
1332             /* On failure, jump from laststart to b + 3, which will be the
1333                end of the buffer after this jump is inserted.  */
1334             GET_BUFFER_SPACE (3);
1335             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1336                                        : on_failure_jump,
1337                          laststart, b + 3);
1338             pending_exact = 0;
1339             b += 3;
1340 
1341             if (!zero_times_ok)
1342               {
1343                 /* At least one repetition is required, so insert a
1344                    `dummy_failure_jump' before the initial
1345                    `on_failure_jump' instruction of the loop. This
1346                    effects a skip over that instruction the first time
1347                    we hit that loop.  */
1348                 GET_BUFFER_SPACE (3);
1349                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1350                 b += 3;
1351               }
1352             }
1353 	  break;
1354 
1355 
1356 	case '.':
1357           laststart = b;
1358           BUF_PUSH (anychar);
1359           break;
1360 
1361 
1362         case '[':
1363           {
1364             boolean had_char_class = false;
1365 
1366             if (p == pend) return REG_EBRACK;
1367 
1368             /* Ensure that we have enough space to push a charset: the
1369                opcode, the length count, and the bitset; 34 bytes in all.  */
1370 	    GET_BUFFER_SPACE (34);
1371 
1372             laststart = b;
1373 
1374             /* We test `*p == '^' twice, instead of using an if
1375                statement, so we only need one BUF_PUSH.  */
1376             BUF_PUSH (*p == '^' ? charset_not : charset);
1377             if (*p == '^')
1378               p++;
1379 
1380             /* Remember the first position in the bracket expression.  */
1381             p1 = p;
1382 
1383             /* Push the number of bytes in the bitmap.  */
1384             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1385 
1386             /* Clear the whole map.  */
1387             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1388 
1389             /* charset_not matches newline according to a syntax bit.  */
1390             if ((re_opcode_t) b[-2] == charset_not
1391                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1392               SET_LIST_BIT ('\n');
1393 
1394             /* Read in characters and ranges, setting map bits.  */
1395             for (;;)
1396               {
1397                 if (p == pend) return REG_EBRACK;
1398 
1399                 PATFETCH (c);
1400 
1401                 /* \ might escape characters inside [...] and [^...].  */
1402                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1403                   {
1404                     if (p == pend) return REG_EESCAPE;
1405 
1406                     PATFETCH (c1);
1407                     SET_LIST_BIT (c1);
1408                     continue;
1409                   }
1410 
1411                 /* Could be the end of the bracket expression.  If it's
1412                    not (i.e., when the bracket expression is `[]' so
1413                    far), the ']' character bit gets set way below.  */
1414                 if (c == ']' && p != p1 + 1)
1415                   break;
1416 
1417                 /* Look ahead to see if it's a range when the last thing
1418                    was a character class.  */
1419                 if (had_char_class && c == '-' && *p != ']')
1420                   return REG_ERANGE;
1421 
1422                 /* Look ahead to see if it's a range when the last thing
1423                    was a character: if this is a hyphen not at the
1424                    beginning or the end of a list, then it's the range
1425                    operator.  */
1426                 if (c == '-'
1427                     && !(p - 2 >= pattern && p[-2] == '[')
1428                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1429                     && *p != ']')
1430                   {
1431                     reg_errcode_t ret
1432                       = compile_range (&p, pend, translate, syntax, b);
1433                     if (ret != REG_NOERROR) return ret;
1434                   }
1435 
1436                 else if (p[0] == '-' && p[1] != ']')
1437                   { /* This handles ranges made up of characters only.  */
1438                     reg_errcode_t ret;
1439 
1440 		    /* Move past the `-'.  */
1441                     PATFETCH (c1);
1442 
1443                     ret = compile_range (&p, pend, translate, syntax, b);
1444                     if (ret != REG_NOERROR) return ret;
1445                   }
1446 
1447                 /* See if we're at the beginning of a possible character
1448                    class.  */
1449 
1450                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1451                   { /* Leave room for the null.  */
1452                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1453 
1454                     PATFETCH (c);
1455                     c1 = 0;
1456 
1457                     /* If pattern is `[[:'.  */
1458                     if (p == pend) return REG_EBRACK;
1459 
1460                     for (;;)
1461                       {
1462                         PATFETCH (c);
1463                         if (c == ':' || c == ']' || p == pend
1464                             || c1 == CHAR_CLASS_MAX_LENGTH)
1465                           break;
1466                         str[c1++] = c;
1467                       }
1468                     str[c1] = '\0';
1469 
1470                     /* If isn't a word bracketed by `[:' and:`]':
1471                        undo the ending character, the letters, and leave
1472                        the leading `:' and `[' (but set bits for them).  */
1473                     if (c == ':' && *p == ']')
1474                       {
1475                         int ch;
1476                         boolean is_alnum = STREQ (str, "alnum");
1477                         boolean is_alpha = STREQ (str, "alpha");
1478                         boolean is_blank = STREQ (str, "blank");
1479                         boolean is_cntrl = STREQ (str, "cntrl");
1480                         boolean is_digit = STREQ (str, "digit");
1481                         boolean is_graph = STREQ (str, "graph");
1482                         boolean is_lower = STREQ (str, "lower");
1483                         boolean is_print = STREQ (str, "print");
1484                         boolean is_punct = STREQ (str, "punct");
1485                         boolean is_space = STREQ (str, "space");
1486                         boolean is_upper = STREQ (str, "upper");
1487                         boolean is_xdigit = STREQ (str, "xdigit");
1488 
1489                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1490 
1491                         /* Throw away the ] at the end of the character
1492                            class.  */
1493                         PATFETCH (c);
1494 
1495                         if (p == pend) return REG_EBRACK;
1496 
1497                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1498                           {
1499                             if (   (is_alnum  && ISALNUM (ch))
1500                                 || (is_alpha  && ISALPHA (ch))
1501                                 || (is_blank  && ISBLANK (ch))
1502                                 || (is_cntrl  && ISCNTRL (ch))
1503                                 || (is_digit  && ISDIGIT (ch))
1504                                 || (is_graph  && ISGRAPH (ch))
1505                                 || (is_lower  && ISLOWER (ch))
1506                                 || (is_print  && ISPRINT (ch))
1507                                 || (is_punct  && ISPUNCT (ch))
1508                                 || (is_space  && ISSPACE (ch))
1509                                 || (is_upper  && ISUPPER (ch))
1510                                 || (is_xdigit && ISXDIGIT (ch)))
1511                             SET_LIST_BIT (ch);
1512                           }
1513                         had_char_class = true;
1514                       }
1515                     else
1516                       {
1517                         c1++;
1518                         while (c1--)
1519                           PATUNFETCH;
1520                         SET_LIST_BIT ('[');
1521                         SET_LIST_BIT (':');
1522                         had_char_class = false;
1523                       }
1524                   }
1525                 else
1526                   {
1527                     had_char_class = false;
1528                     SET_LIST_BIT (c);
1529                   }
1530               }
1531 
1532             /* Discard any (non)matching list bytes that are all 0 at the
1533                end of the map.  Decrease the map-length byte too.  */
1534             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1535               b[-1]--;
1536             b += b[-1];
1537           }
1538           break;
1539 
1540 
1541 	case '(':
1542           if (syntax & RE_NO_BK_PARENS)
1543             goto handle_open;
1544           else
1545             goto normal_char;
1546 
1547 
1548         case ')':
1549           if (syntax & RE_NO_BK_PARENS)
1550             goto handle_close;
1551           else
1552             goto normal_char;
1553 
1554 
1555         case '\n':
1556           if (syntax & RE_NEWLINE_ALT)
1557             goto handle_alt;
1558           else
1559             goto normal_char;
1560 
1561 
1562 	case '|':
1563           if (syntax & RE_NO_BK_VBAR)
1564             goto handle_alt;
1565           else
1566             goto normal_char;
1567 
1568 
1569         case '{':
1570            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1571              goto handle_interval;
1572            else
1573              goto normal_char;
1574 
1575 
1576         case '\\':
1577           if (p == pend) return REG_EESCAPE;
1578 
1579           /* Do not translate the character after the \, so that we can
1580              distinguish, e.g., \B from \b, even if we normally would
1581              translate, e.g., B to b.  */
1582           PATFETCH_RAW (c);
1583 
1584           switch (c)
1585             {
1586             case '(':
1587               if (syntax & RE_NO_BK_PARENS)
1588                 goto normal_backslash;
1589 
1590             handle_open:
1591               bufp->re_nsub++;
1592               regnum++;
1593 
1594               if (COMPILE_STACK_FULL)
1595                 {
1596                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1597                             compile_stack_elt_t);
1598                   if (compile_stack.stack == NULL) return REG_ESPACE;
1599 
1600                   compile_stack.size <<= 1;
1601                 }
1602 
1603               /* These are the values to restore when we hit end of this
1604                  group.  They are all relative offsets, so that if the
1605                  whole pattern moves because of realloc, they will still
1606                  be valid.  */
1607               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1608               COMPILE_STACK_TOP.fixup_alt_jump
1609                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1610               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1611               COMPILE_STACK_TOP.regnum = regnum;
1612 
1613               /* We will eventually replace the 0 with the number of
1614                  groups inner to this one.  But do not push a
1615                  start_memory for groups beyond the last one we can
1616                  represent in the compiled pattern.  */
1617               if (regnum <= MAX_REGNUM)
1618                 {
1619                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1620                   BUF_PUSH_3 (start_memory, regnum, 0);
1621                 }
1622 
1623               compile_stack.avail++;
1624 
1625               fixup_alt_jump = 0;
1626               laststart = 0;
1627               begalt = b;
1628 	      /* If we've reached MAX_REGNUM groups, then this open
1629 		 won't actually generate any code, so we'll have to
1630 		 clear pending_exact explicitly.  */
1631 	      pending_exact = 0;
1632               break;
1633 
1634 
1635             case ')':
1636               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1637 
1638 /* snax: added braces here to elimnate gcc warning */
1639 
1640               if (COMPILE_STACK_EMPTY)
1641 		{
1642                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1643                   goto normal_backslash;
1644                 else
1645                   return REG_ERPAREN;
1646 		}
1647             handle_close:
1648               if (fixup_alt_jump)
1649                 { /* Push a dummy failure point at the end of the
1650                      alternative for a possible future
1651                      `pop_failure_jump' to pop.  See comments at
1652                      `push_dummy_failure' in `re_match_2'.  */
1653                   BUF_PUSH (push_dummy_failure);
1654 
1655                   /* We allocated space for this jump when we assigned
1656                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1657                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1658                 }
1659 
1660 /* snax: added braces here too */
1661               /* See similar code for backslashed left paren above.  */
1662               if (COMPILE_STACK_EMPTY)
1663 		{
1664                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1665                   goto normal_char;
1666                 else
1667                   return REG_ERPAREN;
1668 		}
1669 
1670               /* Since we just checked for an empty stack above, this
1671                  ``can't happen''.  */
1672               assert (compile_stack.avail != 0);
1673               {
1674                 /* We don't just want to restore into `regnum', because
1675                    later groups should continue to be numbered higher,
1676                    as in `(ab)c(de)' -- the second group is #2.  */
1677                 regnum_t this_group_regnum;
1678 
1679                 compile_stack.avail--;
1680                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1681                 fixup_alt_jump
1682                   = COMPILE_STACK_TOP.fixup_alt_jump
1683                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1684                     : 0;
1685                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1686                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1687 		/* If we've reached MAX_REGNUM groups, then this open
1688 		   won't actually generate any code, so we'll have to
1689 		   clear pending_exact explicitly.  */
1690 		pending_exact = 0;
1691 
1692                 /* We're at the end of the group, so now we know how many
1693                    groups were inside this one.  */
1694                 if (this_group_regnum <= MAX_REGNUM)
1695                   {
1696                     unsigned char *inner_group_loc
1697                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1698 
1699                     *inner_group_loc = regnum - this_group_regnum;
1700                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1701                                 regnum - this_group_regnum);
1702                   }
1703               }
1704               break;
1705 
1706 
1707             case '|':					/* `\|'.  */
1708               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1709                 goto normal_backslash;
1710             handle_alt:
1711               if (syntax & RE_LIMITED_OPS)
1712                 goto normal_char;
1713 
1714               /* Insert before the previous alternative a jump which
1715                  jumps to this alternative if the former fails.  */
1716               GET_BUFFER_SPACE (3);
1717               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1718               pending_exact = 0;
1719               b += 3;
1720 
1721               /* The alternative before this one has a jump after it
1722                  which gets executed if it gets matched.  Adjust that
1723                  jump so it will jump to this alternative's analogous
1724                  jump (put in below, which in turn will jump to the next
1725                  (if any) alternative's such jump, etc.).  The last such
1726                  jump jumps to the correct final destination.  A picture:
1727                           _____ _____
1728                           |   | |   |
1729                           |   v |   v
1730                          a | b   | c
1731 
1732                  If we are at `b', then fixup_alt_jump right now points to a
1733                  three-byte space after `a'.  We'll put in the jump, set
1734                  fixup_alt_jump to right after `b', and leave behind three
1735                  bytes which we'll fill in when we get to after `c'.  */
1736 
1737               if (fixup_alt_jump)
1738                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1739 
1740               /* Mark and leave space for a jump after this alternative,
1741                  to be filled in later either by next alternative or
1742                  when know we're at the end of a series of alternatives.  */
1743               fixup_alt_jump = b;
1744               GET_BUFFER_SPACE (3);
1745               b += 3;
1746 
1747               laststart = 0;
1748               begalt = b;
1749               break;
1750 
1751 
1752             case '{':
1753               /* If \{ is a literal.  */
1754               if (!(syntax & RE_INTERVALS)
1755                      /* If we're at `\{' and it's not the open-interval
1756                         operator.  */
1757                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1758                   || (p - 2 == pattern  &&  p == pend))
1759                 goto normal_backslash;
1760 
1761             handle_interval:
1762               {
1763                 /* If got here, then the syntax allows intervals.  */
1764 
1765                 /* At least (most) this many matches must be made.  */
1766                 int lower_bound = -1, upper_bound = -1;
1767 
1768                 beg_interval = p - 1;
1769 
1770                 if (p == pend)
1771                   {
1772                     if (syntax & RE_NO_BK_BRACES)
1773                       goto unfetch_interval;
1774                     else
1775                       return REG_EBRACE;
1776                   }
1777 
1778                 GET_UNSIGNED_NUMBER (lower_bound);
1779 
1780                 if (c == ',')
1781                   {
1782                     GET_UNSIGNED_NUMBER (upper_bound);
1783                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1784                   }
1785                 else
1786                   /* Interval such as `{1}' => match exactly once. */
1787                   upper_bound = lower_bound;
1788 
1789                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1790                     || lower_bound > upper_bound)
1791                   {
1792                     if (syntax & RE_NO_BK_BRACES)
1793                       goto unfetch_interval;
1794                     else
1795                       return REG_BADBR;
1796                   }
1797 
1798                 if (!(syntax & RE_NO_BK_BRACES))
1799                   {
1800                     if (c != '\\') return REG_EBRACE;
1801 
1802                     PATFETCH (c);
1803                   }
1804 
1805                 if (c != '}')
1806                   {
1807                     if (syntax & RE_NO_BK_BRACES)
1808                       goto unfetch_interval;
1809                     else
1810                       return REG_BADBR;
1811                   }
1812 
1813                 /* We just parsed a valid interval.  */
1814 
1815                 /* If it's invalid to have no preceding re.  */
1816                 if (!laststart)
1817                   {
1818                     if (syntax & RE_CONTEXT_INVALID_OPS)
1819                       return REG_BADRPT;
1820                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1821                       laststart = b;
1822                     else
1823                       goto unfetch_interval;
1824                   }
1825 
1826                 /* If the upper bound is zero, don't want to succeed at
1827                    all; jump from `laststart' to `b + 3', which will be
1828                    the end of the buffer after we insert the jump.  */
1829                  if (upper_bound == 0)
1830                    {
1831                      GET_BUFFER_SPACE (3);
1832                      INSERT_JUMP (jump, laststart, b + 3);
1833                      b += 3;
1834                    }
1835 
1836                  /* Otherwise, we have a nontrivial interval.  When
1837                     we're all done, the pattern will look like:
1838                       set_number_at <jump count> <upper bound>
1839                       set_number_at <succeed_n count> <lower bound>
1840                       succeed_n <after jump addr> <succed_n count>
1841                       <body of loop>
1842                       jump_n <succeed_n addr> <jump count>
1843                     (The upper bound and `jump_n' are omitted if
1844                     `upper_bound' is 1, though.)  */
1845                  else
1846                    { /* If the upper bound is > 1, we need to insert
1847                         more at the end of the loop.  */
1848                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1849 
1850                      GET_BUFFER_SPACE (nbytes);
1851 
1852                      /* Initialize lower bound of the `succeed_n', even
1853                         though it will be set during matching by its
1854                         attendant `set_number_at' (inserted next),
1855                         because `re_compile_fastmap' needs to know.
1856                         Jump to the `jump_n' we might insert below.  */
1857                      INSERT_JUMP2 (succeed_n, laststart,
1858                                    b + 5 + (upper_bound > 1) * 5,
1859                                    lower_bound);
1860                      b += 5;
1861 
1862                      /* Code to initialize the lower bound.  Insert
1863                         before the `succeed_n'.  The `5' is the last two
1864                         bytes of this `set_number_at', plus 3 bytes of
1865                         the following `succeed_n'.  */
1866                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1867                      b += 5;
1868 
1869                      if (upper_bound > 1)
1870                        { /* More than one repetition is allowed, so
1871                             append a backward jump to the `succeed_n'
1872                             that starts this interval.
1873 
1874                             When we've reached this during matching,
1875                             we'll have matched the interval once, so
1876                             jump back only `upper_bound - 1' times.  */
1877                          STORE_JUMP2 (jump_n, b, laststart + 5,
1878                                       upper_bound - 1);
1879                          b += 5;
1880 
1881                          /* The location we want to set is the second
1882                             parameter of the `jump_n'; that is `b-2' as
1883                             an absolute address.  `laststart' will be
1884                             the `set_number_at' we're about to insert;
1885                             `laststart+3' the number to set, the source
1886                             for the relative address.  But we are
1887                             inserting into the middle of the pattern --
1888                             so everything is getting moved up by 5.
1889                             Conclusion: (b - 2) - (laststart + 3) + 5,
1890                             i.e., b - laststart.
1891 
1892                             We insert this at the beginning of the loop
1893                             so that if we fail during matching, we'll
1894                             reinitialize the bounds.  */
1895                          insert_op2 (set_number_at, laststart, b - laststart,
1896                                      upper_bound - 1, b);
1897                          b += 5;
1898                        }
1899                    }
1900                 pending_exact = 0;
1901                 beg_interval = NULL;
1902               }
1903               break;
1904 
1905             unfetch_interval:
1906               /* If an invalid interval, match the characters as literals.  */
1907                assert (beg_interval);
1908                p = beg_interval;
1909                beg_interval = NULL;
1910 
1911                /* normal_char and normal_backslash need `c'.  */
1912                PATFETCH (c);
1913 
1914                if (!(syntax & RE_NO_BK_BRACES))
1915                  {
1916                    if (p > pattern  &&  p[-1] == '\\')
1917                      goto normal_backslash;
1918                  }
1919                goto normal_char;
1920 
1921 #ifdef emacs
1922             /* There is no way to specify the before_dot and after_dot
1923                operators.  rms says this is ok.  --karl  */
1924             case '=':
1925               BUF_PUSH (at_dot);
1926               break;
1927 
1928             case 's':
1929               laststart = b;
1930               PATFETCH (c);
1931               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1932               break;
1933 
1934             case 'S':
1935               laststart = b;
1936               PATFETCH (c);
1937               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1938               break;
1939 #endif /* emacs */
1940 
1941 
1942             case 'w':
1943               laststart = b;
1944               BUF_PUSH (wordchar);
1945               break;
1946 
1947 
1948             case 'W':
1949               laststart = b;
1950               BUF_PUSH (notwordchar);
1951               break;
1952 
1953 
1954             case '<':
1955               BUF_PUSH (wordbeg);
1956               break;
1957 
1958             case '>':
1959               BUF_PUSH (wordend);
1960               break;
1961 
1962             case 'b':
1963               BUF_PUSH (wordbound);
1964               break;
1965 
1966             case 'B':
1967               BUF_PUSH (notwordbound);
1968               break;
1969 
1970             case '`':
1971               BUF_PUSH (begbuf);
1972               break;
1973 
1974             case '\'':
1975               BUF_PUSH (endbuf);
1976               break;
1977 
1978             case '1': case '2': case '3': case '4': case '5':
1979             case '6': case '7': case '8': case '9':
1980               if (syntax & RE_NO_BK_REFS)
1981                 goto normal_char;
1982 
1983               c1 = c - '0';
1984 
1985               if (c1 > regnum)
1986                 return REG_ESUBREG;
1987 
1988               /* Can't back reference to a subexpression if inside of it.  */
1989               if (group_in_compile_stack (compile_stack, c1))
1990                 goto normal_char;
1991 
1992               laststart = b;
1993               BUF_PUSH_2 (duplicate, c1);
1994               break;
1995 
1996 
1997             case '+':
1998             case '?':
1999               if (syntax & RE_BK_PLUS_QM)
2000                 goto handle_plus;
2001               else
2002                 goto normal_backslash;
2003 
2004             default:
2005             normal_backslash:
2006               /* You might think it would be useful for \ to mean
2007                  not to translate; but if we don't translate it
2008                  it will never match anything.  */
2009               c = TRANSLATE (c);
2010               goto normal_char;
2011             }
2012           break;
2013 
2014 
2015 	default:
2016         /* Expects the character in `c'.  */
2017 	normal_char:
2018 	      /* If no exactn currently being built.  */
2019           if (!pending_exact
2020 
2021               /* If last exactn not at current position.  */
2022               || pending_exact + *pending_exact + 1 != b
2023 
2024               /* We have only one byte following the exactn for the count.  */
2025 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2026 
2027               /* If followed by a repetition operator.  */
2028               || *p == '*' || *p == '^'
2029 	      || ((syntax & RE_BK_PLUS_QM)
2030 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2031 		  : (*p == '+' || *p == '?'))
2032 	      || ((syntax & RE_INTERVALS)
2033                   && ((syntax & RE_NO_BK_BRACES)
2034 		      ? *p == '{'
2035                       : (p[0] == '\\' && p[1] == '{'))))
2036 	    {
2037 	      /* Start building a new exactn.  */
2038 
2039               laststart = b;
2040 
2041 	      BUF_PUSH_2 (exactn, 0);
2042 	      pending_exact = b - 1;
2043             }
2044 
2045 	  BUF_PUSH (c);
2046           (*pending_exact)++;
2047 	  break;
2048         } /* switch (c) */
2049     } /* while p != pend */
2050 
2051 
2052   /* Through the pattern now.  */
2053 
2054   if (fixup_alt_jump)
2055     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2056 
2057   if (!COMPILE_STACK_EMPTY)
2058     return REG_EPAREN;
2059 
2060   free (compile_stack.stack);
2061 
2062   /* We have succeeded; set the length of the buffer.  */
2063   bufp->used = b - bufp->buffer;
2064 
2065 #ifdef DEBUG
2066   if (debug)
2067     {
2068       DEBUG_PRINT1 ("\nCompiled pattern: ");
2069       print_compiled_pattern (bufp);
2070     }
2071 #endif /* DEBUG */
2072 
2073   return REG_NOERROR;
2074 } /* regex_compile */
2075 
2076 /* Subroutines for `regex_compile'.  */
2077 
2078 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2079 
2080 static void
store_op1(op,loc,arg)2081 store_op1 (op, loc, arg)
2082     re_opcode_t op;
2083     unsigned char *loc;
2084     int arg;
2085 {
2086   *loc = (unsigned char) op;
2087   STORE_NUMBER (loc + 1, arg);
2088 }
2089 
2090 
2091 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2092 
2093 static void
store_op2(op,loc,arg1,arg2)2094 store_op2 (op, loc, arg1, arg2)
2095     re_opcode_t op;
2096     unsigned char *loc;
2097     int arg1, arg2;
2098 {
2099   *loc = (unsigned char) op;
2100   STORE_NUMBER (loc + 1, arg1);
2101   STORE_NUMBER (loc + 3, arg2);
2102 }
2103 
2104 
2105 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2106    for OP followed by two-byte integer parameter ARG.  */
2107 
2108 static void
insert_op1(op,loc,arg,end)2109 insert_op1 (op, loc, arg, end)
2110     re_opcode_t op;
2111     unsigned char *loc;
2112     int arg;
2113     unsigned char *end;
2114 {
2115   register unsigned char *pfrom = end;
2116   register unsigned char *pto = end + 3;
2117 
2118   while (pfrom != loc)
2119     *--pto = *--pfrom;
2120 
2121   store_op1 (op, loc, arg);
2122 }
2123 
2124 
2125 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2126 
2127 static void
insert_op2(op,loc,arg1,arg2,end)2128 insert_op2 (op, loc, arg1, arg2, end)
2129     re_opcode_t op;
2130     unsigned char *loc;
2131     int arg1, arg2;
2132     unsigned char *end;
2133 {
2134   register unsigned char *pfrom = end;
2135   register unsigned char *pto = end + 5;
2136 
2137   while (pfrom != loc)
2138     *--pto = *--pfrom;
2139 
2140   store_op2 (op, loc, arg1, arg2);
2141 }
2142 
2143 
2144 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2145    after an alternative or a begin-subexpression.  We assume there is at
2146    least one character before the ^.  */
2147 
2148 static boolean
at_begline_loc_p(pattern,p,syntax)2149 at_begline_loc_p (pattern, p, syntax)
2150     const char *pattern, *p;
2151     reg_syntax_t syntax;
2152 {
2153   const char *prev = p - 2;
2154   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2155 
2156   return
2157        /* After a subexpression?  */
2158        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2159        /* After an alternative?  */
2160     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2161 }
2162 
2163 
2164 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2165    at least one character after the $, i.e., `P < PEND'.  */
2166 
2167 static boolean
at_endline_loc_p(p,pend,syntax)2168 at_endline_loc_p (p, pend, syntax)
2169     const char *p, *pend;
2170     int syntax;
2171 {
2172   const char *next = p;
2173   boolean next_backslash = *next == '\\';
2174   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2175 
2176   return
2177        /* Before a subexpression?  */
2178        (syntax & RE_NO_BK_PARENS ? *next == ')'
2179         : next_backslash && next_next && *next_next == ')')
2180        /* Before an alternative?  */
2181     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2182         : next_backslash && next_next && *next_next == '|');
2183 }
2184 
2185 
2186 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2187    false if it's not.  */
2188 
2189 static boolean
group_in_compile_stack(compile_stack,regnum)2190 group_in_compile_stack (compile_stack, regnum)
2191     compile_stack_type compile_stack;
2192     regnum_t regnum;
2193 {
2194   int this_element;
2195 
2196   for (this_element = compile_stack.avail - 1;
2197        this_element >= 0;
2198        this_element--)
2199     if (compile_stack.stack[this_element].regnum == regnum)
2200       return true;
2201 
2202   return false;
2203 }
2204 
2205 
2206 /* Read the ending character of a range (in a bracket expression) from the
2207    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2208    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2209    Then we set the translation of all bits between the starting and
2210    ending characters (inclusive) in the compiled pattern B.
2211 
2212    Return an error code.
2213 
2214    We use these short variable names so we can use the same macros as
2215    `regex_compile' itself.  */
2216 
2217 static reg_errcode_t
compile_range(p_ptr,pend,translate,syntax,b)2218 compile_range (p_ptr, pend, translate, syntax, b)
2219     const char **p_ptr, *pend;
2220     char *translate;
2221     reg_syntax_t syntax;
2222     unsigned char *b;
2223 {
2224   int this_char;
2225 
2226   const char *p = *p_ptr;
2227   int range_start, range_end;
2228 
2229   if (p == pend)
2230     return REG_ERANGE;
2231 
2232   /* Even though the pattern is a signed `char *', we need to fetch
2233      with unsigned char *'s; if the high bit of the pattern character
2234      is set, the range endpoints will be negative if we fetch using a
2235      signed char *.
2236 
2237      We also want to fetch the endpoints without translating them; the
2238      appropriate translation is done in the bit-setting loop below.  */
2239   range_start = ((unsigned char *) p)[-2];
2240   range_end   = ((unsigned char *) p)[0];
2241 
2242   /* Have to increment the pointer into the pattern string, so the
2243      caller isn't still at the ending character.  */
2244   (*p_ptr)++;
2245 
2246   /* If the start is after the end, the range is empty.  */
2247   if (range_start > range_end)
2248     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2249 
2250   /* Here we see why `this_char' has to be larger than an `unsigned
2251      char' -- the range is inclusive, so if `range_end' == 0xff
2252      (assuming 8-bit characters), we would otherwise go into an infinite
2253      loop, since all characters <= 0xff.  */
2254   for (this_char = range_start; this_char <= range_end; this_char++)
2255     {
2256       SET_LIST_BIT (TRANSLATE (this_char));
2257     }
2258 
2259   return REG_NOERROR;
2260 }
2261 
2262 /* Failure stack declarations and macros; both re_compile_fastmap and
2263    re_match_2 use a failure stack.  These have to be macros because of
2264    REGEX_ALLOCATE.  */
2265 
2266 
2267 /* Number of failure points for which to initially allocate space
2268    when matching.  If this number is exceeded, we allocate more
2269    space, so it is not a hard limit.  */
2270 #ifndef INIT_FAILURE_ALLOC
2271 #define INIT_FAILURE_ALLOC 5
2272 #endif
2273 
2274 /* Roughly the maximum number of failure points on the stack.  Would be
2275    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2276    This is a variable only so users of regex can assign to it; we never
2277    change it ourselves.  */
2278 int re_max_failures = 2000;
2279 
2280 typedef const unsigned char *fail_stack_elt_t;
2281 
2282 typedef struct
2283 {
2284   fail_stack_elt_t *stack;
2285   unsigned size;
2286   unsigned avail;			/* Offset of next open position.  */
2287 } fail_stack_type;
2288 
2289 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2290 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2291 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2292 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2293 
2294 
2295 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2296 
2297 #define INIT_FAIL_STACK()						\
2298   do {									\
2299     fail_stack.stack = (fail_stack_elt_t *)				\
2300       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2301 									\
2302     if (fail_stack.stack == NULL)					\
2303       return -2;							\
2304 									\
2305     fail_stack.size = INIT_FAILURE_ALLOC;				\
2306     fail_stack.avail = 0;						\
2307   } while (0)
2308 
2309 
2310 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2311 
2312    Return 1 if succeeds, and 0 if either ran out of memory
2313    allocating space for it or it was already too large.
2314 
2315    REGEX_REALLOCATE requires `destination' be declared.   */
2316 
2317 #define DOUBLE_FAIL_STACK(fail_stack)					\
2318   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
2319    ? 0									\
2320    : ((fail_stack).stack = (fail_stack_elt_t *)				\
2321         REGEX_REALLOCATE ((fail_stack).stack, 				\
2322           (fail_stack).size * sizeof (fail_stack_elt_t),		\
2323           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2324 									\
2325       (fail_stack).stack == NULL					\
2326       ? 0								\
2327       : ((fail_stack).size <<= 1, 					\
2328          1)))
2329 
2330 
2331 /* Push PATTERN_OP on FAIL_STACK.
2332 
2333    Return 1 if was able to do so and 0 if ran out of memory allocating
2334    space to do so.  */
2335 #define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
2336   ((FAIL_STACK_FULL ()							\
2337     && !DOUBLE_FAIL_STACK (fail_stack))					\
2338     ? 0									\
2339     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
2340        1))
2341 
2342 /* This pushes an item onto the failure stack.  Must be a four-byte
2343    value.  Assumes the variable `fail_stack'.  Probably should only
2344    be called from within `PUSH_FAILURE_POINT'.  */
2345 #define PUSH_FAILURE_ITEM(item)						\
2346   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2347 
2348 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2349 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2350 
2351 /* Used to omit pushing failure point id's when we're not debugging.  */
2352 #ifdef DEBUG
2353 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2354 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2355 #else
2356 #define DEBUG_PUSH(item)
2357 #define DEBUG_POP(item_addr)
2358 #endif
2359 
2360 
2361 /* Push the information about the state we will need
2362    if we ever fail back to it.
2363 
2364    Requires variables fail_stack, regstart, regend, reg_info, and
2365    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2366    declared.
2367 
2368    Does `return FAILURE_CODE' if runs out of memory.  */
2369 
2370 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2371   do {									\
2372     char *destination;							\
2373     /* Must be int, so when we don't save any registers, the arithmetic	\
2374        of 0 + -1 isn't done as unsigned.  */				\
2375     unsigned int this_reg;							\
2376     									\
2377     DEBUG_STATEMENT (failure_id++);					\
2378     DEBUG_STATEMENT (nfailure_points_pushed++);				\
2379     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2380     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2381     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2382 									\
2383     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2384     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2385 									\
2386     /* Ensure we have enough space allocated for what we will push.  */	\
2387     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2388       {									\
2389         if (!DOUBLE_FAIL_STACK (fail_stack))			\
2390           return failure_code;						\
2391 									\
2392         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2393 		       (fail_stack).size);				\
2394         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2395       }									\
2396 									\
2397     /* Push the info, starting with the registers.  */			\
2398     DEBUG_PRINT1 ("\n");						\
2399 									\
2400     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
2401          this_reg++)							\
2402       {									\
2403 	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
2404         DEBUG_STATEMENT (num_regs_pushed++);				\
2405 									\
2406 	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
2407         PUSH_FAILURE_ITEM (regstart[this_reg]);				\
2408                                                                         \
2409 	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2410         PUSH_FAILURE_ITEM (regend[this_reg]);				\
2411 									\
2412 	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2413         DEBUG_PRINT2 (" match_null=%d",					\
2414                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2415         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2416         DEBUG_PRINT2 (" matched_something=%d",				\
2417                       MATCHED_SOMETHING (reg_info[this_reg]));		\
2418         DEBUG_PRINT2 (" ever_matched=%d",				\
2419                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2420 	DEBUG_PRINT1 ("\n");						\
2421         PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
2422       }									\
2423 									\
2424     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2425     PUSH_FAILURE_ITEM (lowest_active_reg);				\
2426 									\
2427     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2428     PUSH_FAILURE_ITEM (highest_active_reg);				\
2429 									\
2430     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2431     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2432     PUSH_FAILURE_ITEM (pattern_place);					\
2433 									\
2434     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2435     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2436 				 size2);				\
2437     DEBUG_PRINT1 ("'\n");						\
2438     PUSH_FAILURE_ITEM (string_place);					\
2439 									\
2440     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2441     DEBUG_PUSH (failure_id);						\
2442   } while (0)
2443 
2444 /* This is the number of items that are pushed and popped on the stack
2445    for each register.  */
2446 #define NUM_REG_ITEMS  3
2447 
2448 /* Individual items aside from the registers.  */
2449 #ifdef DEBUG
2450 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2451 #else
2452 #define NUM_NONREG_ITEMS 4
2453 #endif
2454 
2455 /* We push at most this many items on the stack.  */
2456 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2457 
2458 /* We actually push this many items.  */
2459 #define NUM_FAILURE_ITEMS						\
2460   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2461     + NUM_NONREG_ITEMS)
2462 
2463 /* How many items can still be added to the stack without overflowing it.  */
2464 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2465 
2466 
2467 /* Pops what PUSH_FAIL_STACK pushes.
2468 
2469    We restore into the parameters, all of which should be lvalues:
2470      STR -- the saved data position.
2471      PAT -- the saved pattern position.
2472      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2473      REGSTART, REGEND -- arrays of string positions.
2474      REG_INFO -- array of information about each subexpression.
2475 
2476    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2477    `pend', `string1', `size1', `string2', and `size2'.  */
2478 
2479 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2480 {									\
2481   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2482   int this_reg;								\
2483   const unsigned char *string_temp;					\
2484 									\
2485   assert (!FAIL_STACK_EMPTY ());					\
2486 									\
2487   /* Remove failure points and point to how many regs pushed.  */	\
2488   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2489   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2490   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2491 									\
2492   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2493 									\
2494   DEBUG_POP (&failure_id);						\
2495   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2496 									\
2497   /* If the saved string location is NULL, it came from an		\
2498      on_failure_keep_string_jump opcode, and we want to throw away the	\
2499      saved NULL, thus retaining our current position in the string.  */	\
2500   string_temp = POP_FAILURE_ITEM ();					\
2501   if (string_temp != NULL)						\
2502     str = (const char *) string_temp;					\
2503 									\
2504   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2505   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2506   DEBUG_PRINT1 ("'\n");							\
2507 									\
2508   pat = (unsigned char *) POP_FAILURE_ITEM ();				\
2509   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2510   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2511 									\
2512   /* Restore register info.  */						\
2513   high_reg = (uintptr_t) POP_FAILURE_ITEM ();				\
2514   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2515 									\
2516   low_reg = (uintptr_t) POP_FAILURE_ITEM ();				\
2517   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2518 									\
2519   for (this_reg = high_reg; (unsigned) this_reg >= low_reg; this_reg--)	\
2520     {									\
2521       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
2522 									\
2523       reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
2524       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
2525 									\
2526       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2527       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2528 									\
2529       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2530       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
2531     }									\
2532 									\
2533   DEBUG_STATEMENT (nfailure_points_popped++);				\
2534 } /* POP_FAILURE_POINT */
2535 
2536 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2537    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2538    characters can start a string that matches the pattern.  This fastmap
2539    is used by re_search to skip quickly over impossible starting points.
2540 
2541    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2542    area as BUFP->fastmap.
2543 
2544    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2545    the pattern buffer.
2546 
2547    Returns 0 if we succeed, -2 if an internal error.   */
2548 
2549 int
re_compile_fastmap(bufp)2550 re_compile_fastmap (bufp)
2551      struct re_pattern_buffer *bufp;
2552 {
2553   int j, k;
2554   fail_stack_type fail_stack;
2555 #ifndef REGEX_MALLOC
2556   char *destination;
2557 #endif
2558   /* We don't push any register information onto the failure stack.  */
2559   unsigned num_regs = 0;
2560 
2561   register char *fastmap = bufp->fastmap;
2562   unsigned char *pattern = bufp->buffer;
2563   unsigned long size = bufp->used;
2564   const unsigned char *p = pattern;
2565   register unsigned char *pend = pattern + size;
2566 
2567   /* Assume that each path through the pattern can be null until
2568      proven otherwise.  We set this false at the bottom of switch
2569      statement, to which we get only if a particular path doesn't
2570      match the empty string.  */
2571   boolean path_can_be_null = true;
2572 
2573   /* We aren't doing a `succeed_n' to begin with.  */
2574   boolean succeed_n_p = false;
2575 
2576   assert (fastmap != NULL && p != NULL);
2577 
2578   INIT_FAIL_STACK ();
2579   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2580   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2581   bufp->can_be_null = 0;
2582 
2583   while (p != pend || !FAIL_STACK_EMPTY ())
2584     {
2585       if (p == pend)
2586         {
2587           bufp->can_be_null |= path_can_be_null;
2588 
2589           /* Reset for next path.  */
2590           path_can_be_null = true;
2591 
2592           p = fail_stack.stack[--fail_stack.avail];
2593 	}
2594 
2595       /* We should never be about to go beyond the end of the pattern.  */
2596       assert (p < pend);
2597 
2598 #ifdef SWITCH_ENUM_BUG
2599       switch ((int) ((re_opcode_t) *p++))
2600 #else
2601       switch ((re_opcode_t) *p++)
2602 #endif
2603 	{
2604 
2605         /* I guess the idea here is to simply not bother with a fastmap
2606            if a backreference is used, since it's too hard to figure out
2607            the fastmap for the corresponding group.  Setting
2608            `can_be_null' stops `re_search_2' from using the fastmap, so
2609            that is all we do.  */
2610 	case duplicate:
2611 	  bufp->can_be_null = 1;
2612           return 0;
2613 
2614 
2615       /* Following are the cases which match a character.  These end
2616          with `break'.  */
2617 
2618 	case exactn:
2619           fastmap[p[1]] = 1;
2620 	  break;
2621 
2622 
2623         case charset:
2624           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2625 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2626               fastmap[j] = 1;
2627 	  break;
2628 
2629 
2630 	case charset_not:
2631 	  /* Chars beyond end of map must be allowed.  */
2632 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2633             fastmap[j] = 1;
2634 
2635 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2636 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2637               fastmap[j] = 1;
2638           break;
2639 
2640 
2641 	case wordchar:
2642 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2643 	    if (SYNTAX (j) == Sword)
2644 	      fastmap[j] = 1;
2645 	  break;
2646 
2647 
2648 	case notwordchar:
2649 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2650 	    if (SYNTAX (j) != Sword)
2651 	      fastmap[j] = 1;
2652 	  break;
2653 
2654 
2655         case anychar:
2656           /* `.' matches anything ...  */
2657 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2658             fastmap[j] = 1;
2659 
2660           /* ... except perhaps newline.  */
2661           if (!(bufp->syntax & RE_DOT_NEWLINE))
2662             fastmap['\n'] = 0;
2663 
2664           /* Return if we have already set `can_be_null'; if we have,
2665              then the fastmap is irrelevant.  Something's wrong here.  */
2666 	  else if (bufp->can_be_null)
2667 	    return 0;
2668 
2669           /* Otherwise, have to check alternative paths.  */
2670 	  break;
2671 
2672 
2673 #ifdef emacs
2674         case syntaxspec:
2675 	  k = *p++;
2676 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2677 	    if (SYNTAX (j) == (enum syntaxcode) k)
2678 	      fastmap[j] = 1;
2679 	  break;
2680 
2681 
2682 	case notsyntaxspec:
2683 	  k = *p++;
2684 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2685 	    if (SYNTAX (j) != (enum syntaxcode) k)
2686 	      fastmap[j] = 1;
2687 	  break;
2688 
2689 
2690       /* All cases after this match the empty string.  These end with
2691          `continue'.  */
2692 
2693 
2694 	case before_dot:
2695 	case at_dot:
2696 	case after_dot:
2697           continue;
2698 #endif /* not emacs */
2699 
2700 
2701         case no_op:
2702         case begline:
2703         case endline:
2704 	case begbuf:
2705 	case endbuf:
2706 	case wordbound:
2707 	case notwordbound:
2708 	case wordbeg:
2709 	case wordend:
2710         case push_dummy_failure:
2711           continue;
2712 
2713 
2714 	case jump_n:
2715         case pop_failure_jump:
2716 	case maybe_pop_jump:
2717 	case jump:
2718         case jump_past_alt:
2719 	case dummy_failure_jump:
2720           EXTRACT_NUMBER_AND_INCR (j, p);
2721 	  p += j;
2722 	  if (j > 0)
2723 	    continue;
2724 
2725           /* Jump backward implies we just went through the body of a
2726              loop and matched nothing.  Opcode jumped to should be
2727              `on_failure_jump' or `succeed_n'.  Just treat it like an
2728              ordinary jump.  For a * loop, it has pushed its failure
2729              point already; if so, discard that as redundant.  */
2730           if ((re_opcode_t) *p != on_failure_jump
2731 	      && (re_opcode_t) *p != succeed_n)
2732 	    continue;
2733 
2734           p++;
2735           EXTRACT_NUMBER_AND_INCR (j, p);
2736           p += j;
2737 
2738           /* If what's on the stack is where we are now, pop it.  */
2739           if (!FAIL_STACK_EMPTY ()
2740 	      && fail_stack.stack[fail_stack.avail - 1] == p)
2741             fail_stack.avail--;
2742 
2743           continue;
2744 
2745 
2746         case on_failure_jump:
2747         case on_failure_keep_string_jump:
2748 	handle_on_failure_jump:
2749           EXTRACT_NUMBER_AND_INCR (j, p);
2750 
2751           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2752              end of the pattern.  We don't want to push such a point,
2753              since when we restore it above, entering the switch will
2754              increment `p' past the end of the pattern.  We don't need
2755              to push such a point since we obviously won't find any more
2756              fastmap entries beyond `pend'.  Such a pattern can match
2757              the null string, though.  */
2758           if (p + j < pend)
2759             {
2760               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2761                 return -2;
2762             }
2763           else
2764             bufp->can_be_null = 1;
2765 
2766           if (succeed_n_p)
2767             {
2768               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2769               succeed_n_p = false;
2770 	    }
2771 
2772           continue;
2773 
2774 
2775 	case succeed_n:
2776           /* Get to the number of times to succeed.  */
2777           p += 2;
2778 
2779           /* Increment p past the n for when k != 0.  */
2780           EXTRACT_NUMBER_AND_INCR (k, p);
2781           if (k == 0)
2782 	    {
2783               p -= 4;
2784   	      succeed_n_p = true;  /* Spaghetti code alert.  */
2785               goto handle_on_failure_jump;
2786             }
2787           continue;
2788 
2789 
2790 	case set_number_at:
2791           p += 4;
2792           continue;
2793 
2794 
2795 	case start_memory:
2796         case stop_memory:
2797 	  p += 2;
2798 	  continue;
2799 
2800 
2801 	default:
2802           abort (); /* We have listed all the cases.  */
2803         } /* switch *p++ */
2804 
2805       /* Getting here means we have found the possible starting
2806          characters for one path of the pattern -- and that the empty
2807          string does not match.  We need not follow this path further.
2808          Instead, look at the next alternative (remembered on the
2809          stack), or quit if no more.  The test at the top of the loop
2810          does these things.  */
2811       path_can_be_null = false;
2812       p = pend;
2813     } /* while p */
2814 
2815   /* Set `can_be_null' for the last path (also the first path, if the
2816      pattern is empty).  */
2817   bufp->can_be_null |= path_can_be_null;
2818   return 0;
2819 } /* re_compile_fastmap */
2820 
2821 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2822    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2823    this memory for recording register information.  STARTS and ENDS
2824    must be allocated using the malloc library routine, and must each
2825    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2826 
2827    If NUM_REGS == 0, then subsequent matches should allocate their own
2828    register data.
2829 
2830    Unless this function is called, the first search or match using
2831    PATTERN_BUFFER will allocate its own register data, without
2832    freeing the old data.  */
2833 
2834 void
re_set_registers(bufp,regs,num_regs,starts,ends)2835 re_set_registers (bufp, regs, num_regs, starts, ends)
2836     struct re_pattern_buffer *bufp;
2837     struct re_registers *regs;
2838     unsigned num_regs;
2839     regoff_t *starts, *ends;
2840 {
2841   if (num_regs)
2842     {
2843       bufp->regs_allocated = REGS_REALLOCATE;
2844       regs->num_regs = num_regs;
2845       regs->start = starts;
2846       regs->end = ends;
2847     }
2848   else
2849     {
2850       bufp->regs_allocated = REGS_UNALLOCATED;
2851       regs->num_regs = 0;
2852       regs->start = regs->end = (regoff_t) 0;
2853     }
2854 }
2855 
2856 /* Searching routines.  */
2857 
2858 /* Like re_search_2, below, but only one string is specified, and
2859    doesn't let you say where to stop matching. */
2860 
2861 int
re_search(bufp,string,size,startpos,range,regs)2862 re_search (bufp, string, size, startpos, range, regs)
2863      struct re_pattern_buffer *bufp;
2864      const char *string;
2865      int size, startpos, range;
2866      struct re_registers *regs;
2867 {
2868   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2869 		      regs, size);
2870 }
2871 
2872 
2873 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2874    virtual concatenation of STRING1 and STRING2, starting first at index
2875    STARTPOS, then at STARTPOS + 1, and so on.
2876 
2877    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2878 
2879    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2880    only at STARTPOS; in general, the last start tried is STARTPOS +
2881    RANGE.
2882 
2883    In REGS, return the indices of the virtual concatenation of STRING1
2884    and STRING2 that matched the entire BUFP->buffer and its contained
2885    subexpressions.
2886 
2887    Do not consider matching one past the index STOP in the virtual
2888    concatenation of STRING1 and STRING2.
2889 
2890    We return either the position in the strings at which the match was
2891    found, -1 if no match, or -2 if error (such as failure
2892    stack overflow).  */
2893 
2894 int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)2895 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2896      struct re_pattern_buffer *bufp;
2897      const char *string1, *string2;
2898      int size1, size2;
2899      int startpos;
2900      int range;
2901      struct re_registers *regs;
2902      int stop;
2903 {
2904   int val;
2905   register char *fastmap = bufp->fastmap;
2906   register char *translate = bufp->translate;
2907   int total_size = size1 + size2;
2908   int endpos = startpos + range;
2909 
2910   /* Check for out-of-range STARTPOS.  */
2911   if (startpos < 0 || startpos > total_size)
2912     return -1;
2913 
2914   /* Fix up RANGE if it might eventually take us outside
2915      the virtual concatenation of STRING1 and STRING2.  */
2916   if (endpos < -1)
2917     range = -1 - startpos;
2918   else if (endpos > total_size)
2919     range = total_size - startpos;
2920 
2921   /* If the search isn't to be a backwards one, don't waste time in a
2922      search for a pattern that must be anchored.  */
2923   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2924     {
2925       if (startpos > 0)
2926 	return -1;
2927       else
2928 	range = 1;
2929     }
2930 
2931   /* Update the fastmap now if not correct already.  */
2932   if (fastmap && !bufp->fastmap_accurate)
2933     if (re_compile_fastmap (bufp) == -2)
2934       return -2;
2935 
2936   /* Loop through the string, looking for a place to start matching.  */
2937   for (;;)
2938     {
2939       /* If a fastmap is supplied, skip quickly over characters that
2940          cannot be the start of a match.  If the pattern can match the
2941          null string, however, we don't need to skip characters; we want
2942          the first null string.  */
2943       if (fastmap && startpos < total_size && !bufp->can_be_null)
2944 	{
2945 	  if (range > 0)	/* Searching forwards.  */
2946 	    {
2947 	      register const char *d;
2948 	      register int lim = 0;
2949 	      int irange = range;
2950 
2951               if (startpos < size1 && startpos + range >= size1)
2952                 lim = range - (size1 - startpos);
2953 
2954 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2955 
2956               /* Written out as an if-else to avoid testing `translate'
2957                  inside the loop.  */
2958 	      if (translate)
2959                 while (range > lim
2960                        && !fastmap[(unsigned char)
2961 				   translate[(unsigned char) *d++]])
2962                   range--;
2963 	      else
2964                 while (range > lim && !fastmap[(unsigned char) *d++])
2965                   range--;
2966 
2967 	      startpos += irange - range;
2968 	    }
2969 	  else				/* Searching backwards.  */
2970 	    {
2971 	      register char c = (size1 == 0 || startpos >= size1
2972                                  ? string2[startpos - size1]
2973                                  : string1[startpos]);
2974 
2975 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
2976 		goto advance;
2977 	    }
2978 	}
2979 
2980       /* If can't match the null string, and that's all we have left, fail.  */
2981       if (range >= 0 && startpos == total_size && fastmap
2982           && !bufp->can_be_null)
2983 	return -1;
2984 
2985       val = re_match_2 (bufp, string1, size1, string2, size2,
2986 	                startpos, regs, stop);
2987       if (val >= 0)
2988 	return startpos;
2989 
2990       if (val == -2)
2991 	return -2;
2992 
2993     advance:
2994       if (!range)
2995         break;
2996       else if (range > 0)
2997         {
2998           range--;
2999           startpos++;
3000         }
3001       else
3002         {
3003           range++;
3004           startpos--;
3005         }
3006     }
3007   return -1;
3008 } /* re_search_2 */
3009 
3010 /* Declarations and macros for re_match_2.  */
3011 union register_info_type;
3012 static int bcmp_translate (const unsigned char *s1, const unsigned char *s2, int len, char *translate);
3013 static boolean alt_match_null_string_p (unsigned char *p, unsigned char *end, union register_info_type *reg_info),
3014                common_op_match_null_string_p (unsigned char **p, unsigned char *end, union register_info_type *reg_info),
3015                group_match_null_string_p (unsigned char **p, unsigned char *end, union register_info_type *reg_info);
3016 
3017 /* Structure for per-register (a.k.a. per-group) information.
3018    This must not be longer than one word, because we push this value
3019    onto the failure stack.  Other register information, such as the
3020    starting and ending positions (which are addresses), and the list of
3021    inner groups (which is a bits list) are maintained in separate
3022    variables.
3023 
3024    We are making a (strictly speaking) nonportable assumption here: that
3025    the compiler will pack our bit fields into something that fits into
3026    the type of `word', i.e., is something that fits into one item on the
3027    failure stack.  */
3028 typedef union register_info_type
3029 {
3030   fail_stack_elt_t word;
3031   struct
3032   {
3033       /* This field is one if this group can match the empty string,
3034          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3035 #define MATCH_NULL_UNSET_VALUE 3
3036     unsigned match_null_string_p : 2;
3037     unsigned is_active : 1;
3038     unsigned matched_something : 1;
3039     unsigned ever_matched_something : 1;
3040   } bits;
3041 } register_info_type;
3042 
3043 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3044 #define IS_ACTIVE(R)  ((R).bits.is_active)
3045 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3046 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3047 
3048 
3049 /* Call this when have matched a real character; it sets `matched' flags
3050    for the subexpressions which we are currently inside.  Also records
3051    that those subexprs have matched.  */
3052 #define SET_REGS_MATCHED()						\
3053   do									\
3054     {									\
3055       unsigned r;							\
3056       for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3057         {								\
3058           MATCHED_SOMETHING (reg_info[r])				\
3059             = EVER_MATCHED_SOMETHING (reg_info[r])			\
3060             = 1;							\
3061         }								\
3062     }									\
3063   while (0)
3064 
3065 
3066 /* This converts PTR, a pointer into one of the search strings `string1'
3067    and `string2' into an offset from the beginning of that string.  */
3068 #define POINTER_TO_OFFSET(ptr)						\
3069   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3070 
3071 /* Registers are set to a sentinel when they haven't yet matched.  */
3072 #define REG_UNSET_VALUE ((char *) -1)
3073 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3074 
3075 
3076 /* Macros for dealing with the split strings in re_match_2.  */
3077 
3078 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3079 
3080 /* Call before fetching a character with *d.  This switches over to
3081    string2 if necessary.  */
3082 #define PREFETCH()							\
3083   while (d == dend)						    	\
3084     {									\
3085       /* End of string2 => fail.  */					\
3086       if (dend == end_match_2) 						\
3087         goto fail;							\
3088       /* End of string1 => advance to string2.  */ 			\
3089       d = string2;						        \
3090       dend = end_match_2;						\
3091     }
3092 
3093 
3094 /* Test if at very beginning or at very end of the virtual concatenation
3095    of `string1' and `string2'.  If only one string, it's `string2'.  */
3096 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3097 #define AT_STRINGS_END(d) ((d) == end2)
3098 
3099 
3100 /* Test if D points to a character which is word-constituent.  We have
3101    two special cases to check for: if past the end of string1, look at
3102    the first character in string2; and if before the beginning of
3103    string2, look at the last character in string1.  */
3104 #define WORDCHAR_P(d)							\
3105   (SYNTAX ((d) == end1 ? *string2					\
3106            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3107    == Sword)
3108 
3109 /* Test if the character before D and the one at D differ with respect
3110    to being word-constituent.  */
3111 #define AT_WORD_BOUNDARY(d)						\
3112   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3113    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3114 
3115 
3116 /* Free everything we malloc.  */
3117 #ifdef REGEX_MALLOC
3118 #define FREE_VAR(var) if (var) free (var); var = NULL
3119 #define FREE_VARIABLES()						\
3120   do {									\
3121     FREE_VAR (fail_stack.stack);					\
3122     FREE_VAR (regstart);						\
3123     FREE_VAR (regend);							\
3124     FREE_VAR (old_regstart);						\
3125     FREE_VAR (old_regend);						\
3126     FREE_VAR (best_regstart);						\
3127     FREE_VAR (best_regend);						\
3128     FREE_VAR (reg_info);						\
3129     FREE_VAR (reg_dummy);						\
3130     FREE_VAR (reg_info_dummy);						\
3131   } while (0)
3132 #else /* not REGEX_MALLOC */
3133 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3134 #define FREE_VARIABLES() alloca (0)
3135 #endif /* not REGEX_MALLOC */
3136 
3137 
3138 /* These values must meet several constraints.  They must not be valid
3139    register values; since we have a limit of 255 registers (because
3140    we use only one byte in the pattern for the register number), we can
3141    use numbers larger than 255.  They must differ by 1, because of
3142    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3143    be larger than the value for the highest register, so we do not try
3144    to actually save any registers when none are active.  */
3145 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3146 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3147 
3148 /* Matching routines.  */
3149 
3150 #ifndef emacs   /* Emacs never uses this.  */
3151 /* re_match is like re_match_2 except it takes only a single string.  */
3152 
3153 int
re_match(bufp,string,size,pos,regs)3154 re_match (bufp, string, size, pos, regs)
3155      struct re_pattern_buffer *bufp;
3156      const char *string;
3157      int size, pos;
3158      struct re_registers *regs;
3159  {
3160   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3161 }
3162 #endif /* not emacs */
3163 
3164 
3165 /* re_match_2 matches the compiled pattern in BUFP against the
3166    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3167    and SIZE2, respectively).  We start matching at POS, and stop
3168    matching at STOP.
3169 
3170    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3171    store offsets for the substring each group matched in REGS.  See the
3172    documentation for exactly how many groups we fill.
3173 
3174    We return -1 if no match, -2 if an internal error (such as the
3175    failure stack overflowing).  Otherwise, we return the length of the
3176    matched substring.  */
3177 
3178 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)3179 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3180      struct re_pattern_buffer *bufp;
3181      const char *string1, *string2;
3182      int size1, size2;
3183      int pos;
3184      struct re_registers *regs;
3185      int stop;
3186 {
3187   /* General temporaries.  */
3188   int mcnt;
3189   unsigned char *p1;
3190 
3191   /* Just past the end of the corresponding string.  */
3192   const char *end1, *end2;
3193 
3194   /* Pointers into string1 and string2, just past the last characters in
3195      each to consider matching.  */
3196   const char *end_match_1, *end_match_2;
3197 
3198   /* Where we are in the data, and the end of the current string.  */
3199   const char *d, *dend;
3200 
3201   /* Where we are in the pattern, and the end of the pattern.  */
3202   unsigned char *p = bufp->buffer;
3203   register unsigned char *pend = p + bufp->used;
3204 
3205   /* We use this to map every character in the string.  */
3206   char *translate = bufp->translate;
3207 
3208   /* Failure point stack.  Each place that can handle a failure further
3209      down the line pushes a failure point on this stack.  It consists of
3210      restart, regend, and reg_info for all registers corresponding to
3211      the subexpressions we're currently inside, plus the number of such
3212      registers, and, finally, two char *'s.  The first char * is where
3213      to resume scanning the pattern; the second one is where to resume
3214      scanning the strings.  If the latter is zero, the failure point is
3215      a ``dummy''; if a failure happens and the failure point is a dummy,
3216      it gets discarded and the next next one is tried.  */
3217   fail_stack_type fail_stack;
3218 #ifdef DEBUG
3219   static unsigned failure_id = 0;
3220   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3221 #endif
3222 
3223   /* We fill all the registers internally, independent of what we
3224      return, for use in backreferences.  The number here includes
3225      an element for register zero.  */
3226   unsigned num_regs = bufp->re_nsub + 1;
3227 
3228   /* The currently active registers.  */
3229   uintptr_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3230   uintptr_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3231 
3232   /* Information on the contents of registers. These are pointers into
3233      the input strings; they record just what was matched (on this
3234      attempt) by a subexpression part of the pattern, that is, the
3235      regnum-th regstart pointer points to where in the pattern we began
3236      matching and the regnum-th regend points to right after where we
3237      stopped matching the regnum-th subexpression.  (The zeroth register
3238      keeps track of what the whole pattern matches.)  */
3239   const char **regstart = 0, **regend = 0;
3240 
3241   /* If a group that's operated upon by a repetition operator fails to
3242      match anything, then the register for its start will need to be
3243      restored because it will have been set to wherever in the string we
3244      are when we last see its open-group operator.  Similarly for a
3245      register's end.  */
3246   const char **old_regstart = 0, **old_regend = 0;
3247 
3248   /* The is_active field of reg_info helps us keep track of which (possibly
3249      nested) subexpressions we are currently in. The matched_something
3250      field of reg_info[reg_num] helps us tell whether or not we have
3251      matched any of the pattern so far this time through the reg_num-th
3252      subexpression.  These two fields get reset each time through any
3253      loop their register is in.  */
3254   register_info_type *reg_info = 0;
3255 
3256   /* The following record the register info as found in the above
3257      variables when we find a match better than any we've seen before.
3258      This happens as we backtrack through the failure points, which in
3259      turn happens only if we have not yet matched the entire string. */
3260   unsigned best_regs_set = false;
3261   const char **best_regstart = 0, **best_regend = 0;
3262 
3263   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3264      allocate space for that if we're not allocating space for anything
3265      else (see below).  Also, we never need info about register 0 for
3266      any of the other register vectors, and it seems rather a kludge to
3267      treat `best_regend' differently than the rest.  So we keep track of
3268      the end of the best match so far in a separate variable.  We
3269      initialize this to NULL so that when we backtrack the first time
3270      and need to test it, it's not garbage.  */
3271   const char *match_end = NULL;
3272 
3273   /* Used when we pop values we don't care about.  */
3274   const char **reg_dummy = 0;
3275   register_info_type *reg_info_dummy = 0;
3276 
3277 #ifdef DEBUG
3278   /* Counts the total number of registers pushed.  */
3279   unsigned num_regs_pushed = 0;
3280 #endif
3281 
3282   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3283 
3284   INIT_FAIL_STACK ();
3285 
3286   /* Do not bother to initialize all the register variables if there are
3287      no groups in the pattern, as it takes a fair amount of time.  If
3288      there are groups, we include space for register 0 (the whole
3289      pattern), even though we never use it, since it simplifies the
3290      array indexing.  We should fix this.  */
3291   if (bufp->re_nsub)
3292     {
3293       regstart = REGEX_TALLOC (num_regs, const char *);
3294       regend = REGEX_TALLOC (num_regs, const char *);
3295       old_regstart = REGEX_TALLOC (num_regs, const char *);
3296       old_regend = REGEX_TALLOC (num_regs, const char *);
3297       best_regstart = REGEX_TALLOC (num_regs, const char *);
3298       best_regend = REGEX_TALLOC (num_regs, const char *);
3299       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3300       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3301       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3302 
3303       if (!(regstart && regend && old_regstart && old_regend && reg_info
3304             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3305         {
3306           FREE_VARIABLES ();
3307           return -2;
3308         }
3309     }
3310 #ifdef REGEX_MALLOC
3311   else
3312     {
3313       /* We must initialize all our variables to NULL, so that
3314          `FREE_VARIABLES' doesn't try to free them.  */
3315       regstart = regend = old_regstart = old_regend = best_regstart
3316         = best_regend = reg_dummy = NULL;
3317       reg_info = reg_info_dummy = (register_info_type *) NULL;
3318     }
3319 #endif /* REGEX_MALLOC */
3320 
3321   /* The starting position is bogus.  */
3322   if (pos < 0 || pos > size1 + size2)
3323     {
3324       FREE_VARIABLES ();
3325       return -1;
3326     }
3327 
3328   /* Initialize subexpression text positions to -1 to mark ones that no
3329      start_memory/stop_memory has been seen for. Also initialize the
3330      register information struct.  */
3331   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3332     {
3333       regstart[mcnt] = regend[mcnt]
3334         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3335 
3336       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3337       IS_ACTIVE (reg_info[mcnt]) = 0;
3338       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3339       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3340     }
3341 
3342   /* We move `string1' into `string2' if the latter's empty -- but not if
3343      `string1' is null.  */
3344   if (size2 == 0 && string1 != NULL)
3345     {
3346       string2 = string1;
3347       size2 = size1;
3348       string1 = 0;
3349       size1 = 0;
3350     }
3351   end1 = string1 + size1;
3352   end2 = string2 + size2;
3353 
3354   /* Compute where to stop matching, within the two strings.  */
3355   if (stop <= size1)
3356     {
3357       end_match_1 = string1 + stop;
3358       end_match_2 = string2;
3359     }
3360   else
3361     {
3362       end_match_1 = end1;
3363       end_match_2 = string2 + stop - size1;
3364     }
3365 
3366   /* `p' scans through the pattern as `d' scans through the data.
3367      `dend' is the end of the input string that `d' points within.  `d'
3368      is advanced into the following input string whenever necessary, but
3369      this happens before fetching; therefore, at the beginning of the
3370      loop, `d' can be pointing at the end of a string, but it cannot
3371      equal `string2'.  */
3372   if (size1 > 0 && pos <= size1)
3373     {
3374       d = string1 + pos;
3375       dend = end_match_1;
3376     }
3377   else
3378     {
3379       d = string2 + pos - size1;
3380       dend = end_match_2;
3381     }
3382 
3383   DEBUG_PRINT1 ("The compiled pattern is: ");
3384   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3385   DEBUG_PRINT1 ("The string to match is: `");
3386   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3387   DEBUG_PRINT1 ("'\n");
3388 
3389   /* This loops over pattern commands.  It exits by returning from the
3390      function if the match is complete, or it drops through if the match
3391      fails at this starting point in the input data.  */
3392   for (;;)
3393     {
3394       DEBUG_PRINT2 ("\n0x%x: ", p);
3395 
3396       if (p == pend)
3397 	{ /* End of pattern means we might have succeeded.  */
3398           DEBUG_PRINT1 ("end of pattern ... ");
3399 
3400 	  /* If we haven't matched the entire string, and we want the
3401              longest match, try backtracking.  */
3402           if (d != end_match_2)
3403 	    {
3404               DEBUG_PRINT1 ("backtracking.\n");
3405 
3406               if (!FAIL_STACK_EMPTY ())
3407                 { /* More failure points to try.  */
3408                   boolean same_str_p = (FIRST_STRING_P (match_end)
3409 	        	                == MATCHING_IN_FIRST_STRING);
3410 
3411                   /* If exceeds best match so far, save it.  */
3412                   if (!best_regs_set
3413                       || (same_str_p && d > match_end)
3414                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3415                     {
3416                       best_regs_set = true;
3417                       match_end = d;
3418 
3419                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3420 
3421                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3422                         {
3423                           best_regstart[mcnt] = regstart[mcnt];
3424                           best_regend[mcnt] = regend[mcnt];
3425                         }
3426                     }
3427                   goto fail;
3428                 }
3429 
3430               /* If no failure points, don't restore garbage.  */
3431               else if (best_regs_set)
3432                 {
3433   	        restore_best_regs:
3434                   /* Restore best match.  It may happen that `dend ==
3435                      end_match_1' while the restored d is in string2.
3436                      For example, the pattern `x.*y.*z' against the
3437                      strings `x-' and `y-z-', if the two strings are
3438                      not consecutive in memory.  */
3439                   DEBUG_PRINT1 ("Restoring best registers.\n");
3440 
3441                   d = match_end;
3442                   dend = ((d >= string1 && d <= end1)
3443 		           ? end_match_1 : end_match_2);
3444 
3445 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3446 		    {
3447 		      regstart[mcnt] = best_regstart[mcnt];
3448 		      regend[mcnt] = best_regend[mcnt];
3449 		    }
3450                 }
3451             } /* d != end_match_2 */
3452 
3453           DEBUG_PRINT1 ("Accepting match.\n");
3454 
3455           /* If caller wants register contents data back, do it.  */
3456           if (regs && !bufp->no_sub)
3457 	    {
3458               /* Have the register data arrays been allocated?  */
3459               if (bufp->regs_allocated == REGS_UNALLOCATED)
3460                 { /* No.  So allocate them with malloc.  We need one
3461                      extra element beyond `num_regs' for the `-1' marker
3462                      GNU code uses.  */
3463                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3464                   regs->start = TALLOC (regs->num_regs, regoff_t);
3465                   regs->end = TALLOC (regs->num_regs, regoff_t);
3466                   if (regs->start == NULL || regs->end == NULL)
3467                     return -2;
3468                   bufp->regs_allocated = REGS_REALLOCATE;
3469                 }
3470               else if (bufp->regs_allocated == REGS_REALLOCATE)
3471                 { /* Yes.  If we need more elements than were already
3472                      allocated, reallocate them.  If we need fewer, just
3473                      leave it alone.  */
3474                   if (regs->num_regs < num_regs + 1)
3475                     {
3476                       regs->num_regs = num_regs + 1;
3477                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3478                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3479                       if (regs->start == NULL || regs->end == NULL)
3480                         return -2;
3481                     }
3482                 }
3483               else
3484                 assert (bufp->regs_allocated == REGS_FIXED);
3485 
3486               /* Convert the pointer data in `regstart' and `regend' to
3487                  indices.  Register zero has to be set differently,
3488                  since we haven't kept track of any info for it.  */
3489               if (regs->num_regs > 0)
3490                 {
3491                   regs->start[0] = pos;
3492                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3493 			          : d - string2 + size1);
3494                 }
3495 
3496               /* Go through the first `min (num_regs, regs->num_regs)'
3497                  registers, since that is all we initialized.  */
3498 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
3499 		   mcnt++)
3500 		{
3501                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3502                     regs->start[mcnt] = regs->end[mcnt] = -1;
3503                   else
3504                     {
3505 		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3506                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3507                     }
3508 		}
3509 
3510               /* If the regs structure we return has more elements than
3511                  were in the pattern, set the extra elements to -1.  If
3512                  we (re)allocated the registers, this is the case,
3513                  because we always allocate enough to have at least one
3514                  -1 at the end.  */
3515               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
3516                 regs->start[mcnt] = regs->end[mcnt] = -1;
3517 	    } /* regs && !bufp->no_sub */
3518 
3519           FREE_VARIABLES ();
3520           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3521                         nfailure_points_pushed, nfailure_points_popped,
3522                         nfailure_points_pushed - nfailure_points_popped);
3523           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3524 
3525           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3526 			    ? string1
3527 			    : string2 - size1);
3528 
3529           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3530 
3531           return mcnt;
3532         }
3533 
3534       /* Otherwise match next pattern command.  */
3535 #ifdef SWITCH_ENUM_BUG
3536       switch ((int) ((re_opcode_t) *p++))
3537 #else
3538       switch ((re_opcode_t) *p++)
3539 #endif
3540 	{
3541         /* Ignore these.  Used to ignore the n of succeed_n's which
3542            currently have n == 0.  */
3543         case no_op:
3544           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3545           break;
3546 
3547 
3548         /* Match the next n pattern characters exactly.  The following
3549            byte in the pattern defines n, and the n bytes after that
3550            are the characters to match.  */
3551 	case exactn:
3552 	  mcnt = *p++;
3553           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3554 
3555           /* This is written out as an if-else so we don't waste time
3556              testing `translate' inside the loop.  */
3557           if (translate)
3558 	    {
3559 	      do
3560 		{
3561 		  PREFETCH ();
3562 		  if (translate[(unsigned char) *d++] != (char) *p++)
3563                     goto fail;
3564 		}
3565 	      while (--mcnt);
3566 	    }
3567 	  else
3568 	    {
3569 	      do
3570 		{
3571 		  PREFETCH ();
3572 		  if (*d++ != (char) *p++) goto fail;
3573 		}
3574 	      while (--mcnt);
3575 	    }
3576 	  SET_REGS_MATCHED ();
3577           break;
3578 
3579 
3580         /* Match any character except possibly a newline or a null.  */
3581 	case anychar:
3582           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3583 
3584           PREFETCH ();
3585 
3586           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3587               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3588 	    goto fail;
3589 
3590           SET_REGS_MATCHED ();
3591           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3592           d++;
3593 	  break;
3594 
3595 
3596 	case charset:
3597 	case charset_not:
3598 	  {
3599 	    register unsigned char c;
3600 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3601 
3602             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3603 
3604 	    PREFETCH ();
3605 	    c = TRANSLATE (*d); /* The character to match.  */
3606 
3607             /* Cast to `unsigned' instead of `unsigned char' in case the
3608                bit list is a full 32 bytes long.  */
3609 	    if (c < (unsigned) (*p * BYTEWIDTH)
3610 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3611 	      not = !not;
3612 
3613 	    p += 1 + *p;
3614 
3615 	    if (!not) goto fail;
3616 
3617 	    SET_REGS_MATCHED ();
3618             d++;
3619 	    break;
3620 	  }
3621 
3622 
3623         /* The beginning of a group is represented by start_memory.
3624            The arguments are the register number in the next byte, and the
3625            number of groups inner to this one in the next.  The text
3626            matched within the group is recorded (in the internal
3627            registers data structure) under the register number.  */
3628         case start_memory:
3629 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3630 
3631           /* Find out if this group can match the empty string.  */
3632 	  p1 = p;		/* To send to group_match_null_string_p.  */
3633 
3634           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3635             REG_MATCH_NULL_STRING_P (reg_info[*p])
3636               = group_match_null_string_p (&p1, pend, reg_info);
3637 
3638           /* Save the position in the string where we were the last time
3639              we were at this open-group operator in case the group is
3640              operated upon by a repetition operator, e.g., with `(a*)*b'
3641              against `ab'; then we want to ignore where we are now in
3642              the string in case this attempt to match fails.  */
3643           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3644                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3645                              : regstart[*p];
3646 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3647 			 POINTER_TO_OFFSET (old_regstart[*p]));
3648 
3649           regstart[*p] = d;
3650 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3651 
3652           IS_ACTIVE (reg_info[*p]) = 1;
3653           MATCHED_SOMETHING (reg_info[*p]) = 0;
3654 
3655           /* This is the new highest active register.  */
3656           highest_active_reg = *p;
3657 
3658           /* If nothing was active before, this is the new lowest active
3659              register.  */
3660           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3661             lowest_active_reg = *p;
3662 
3663           /* Move past the register number and inner group count.  */
3664           p += 2;
3665           break;
3666 
3667 
3668         /* The stop_memory opcode represents the end of a group.  Its
3669            arguments are the same as start_memory's: the register
3670            number, and the number of inner groups.  */
3671 	case stop_memory:
3672 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3673 
3674           /* We need to save the string position the last time we were at
3675              this close-group operator in case the group is operated
3676              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3677              against `aba'; then we want to ignore where we are now in
3678              the string in case this attempt to match fails.  */
3679           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3680                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3681 			   : regend[*p];
3682 	  DEBUG_PRINT2 ("      old_regend: %d\n",
3683 			 POINTER_TO_OFFSET (old_regend[*p]));
3684 
3685           regend[*p] = d;
3686 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3687 
3688           /* This register isn't active anymore.  */
3689           IS_ACTIVE (reg_info[*p]) = 0;
3690 
3691           /* If this was the only register active, nothing is active
3692              anymore.  */
3693           if (lowest_active_reg == highest_active_reg)
3694             {
3695               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3696               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3697             }
3698           else
3699             { /* We must scan for the new highest active register, since
3700                  it isn't necessarily one less than now: consider
3701                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3702                  new highest active register is 1.  */
3703               unsigned char r = *p - 1;
3704               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3705                 r--;
3706 
3707               /* If we end up at register zero, that means that we saved
3708                  the registers as the result of an `on_failure_jump', not
3709                  a `start_memory', and we jumped to past the innermost
3710                  `stop_memory'.  For example, in ((.)*) we save
3711                  registers 1 and 2 as a result of the *, but when we pop
3712                  back to the second ), we are at the stop_memory 1.
3713                  Thus, nothing is active.  */
3714 	      if (r == 0)
3715                 {
3716                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3717                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3718                 }
3719               else
3720                 highest_active_reg = r;
3721             }
3722 
3723           /* If just failed to match something this time around with a
3724              group that's operated on by a repetition operator, try to
3725              force exit from the ``loop'', and restore the register
3726              information for this group that we had before trying this
3727              last match.  */
3728           if ((!MATCHED_SOMETHING (reg_info[*p])
3729                || (re_opcode_t) p[-3] == start_memory)
3730 	      && (p + 2) < pend)
3731             {
3732               boolean is_a_jump_n = false;
3733 
3734               p1 = p + 2;
3735               mcnt = 0;
3736               switch ((re_opcode_t) *p1++)
3737                 {
3738                   case jump_n:
3739 		    is_a_jump_n = true;
3740                   case pop_failure_jump:
3741 		  case maybe_pop_jump:
3742 		  case jump:
3743 		  case dummy_failure_jump:
3744                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3745 		    if (is_a_jump_n)
3746 		      p1 += 2;
3747                     break;
3748 
3749                   default:
3750                     /* do nothing */ ;
3751                 }
3752 	      p1 += mcnt;
3753 
3754               /* If the next operation is a jump backwards in the pattern
3755 	         to an on_failure_jump right before the start_memory
3756                  corresponding to this stop_memory, exit from the loop
3757                  by forcing a failure after pushing on the stack the
3758                  on_failure_jump's jump in the pattern, and d.  */
3759               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3760                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3761 		{
3762                   /* If this group ever matched anything, then restore
3763                      what its registers were before trying this last
3764                      failed match, e.g., with `(a*)*b' against `ab' for
3765                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3766                      against `aba' for regend[3].
3767 
3768                      Also restore the registers for inner groups for,
3769                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3770                      otherwise get trashed).  */
3771 
3772                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3773 		    {
3774 		      unsigned r;
3775 
3776                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3777 
3778 		      /* Restore this and inner groups' (if any) registers.  */
3779                       for (r = *p; r < (unsigned) (*p + *(p + 1)); r++)
3780                         {
3781                           regstart[r] = old_regstart[r];
3782 
3783                           /* xx why this test?  */
3784                           if (old_regend[r] >= regstart[r])
3785                             regend[r] = old_regend[r];
3786                         }
3787                     }
3788 		  p1++;
3789                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3790                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3791 
3792                   goto fail;
3793                 }
3794             }
3795 
3796           /* Move past the register number and the inner group count.  */
3797           p += 2;
3798           break;
3799 
3800 
3801 	/* \<digit> has been turned into a `duplicate' command which is
3802            followed by the numeric value of <digit> as the register number.  */
3803         case duplicate:
3804 	  {
3805 	    register const char *d2, *dend2;
3806 	    int regno = *p++;   /* Get which register to match against.  */
3807 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3808 
3809 	    /* Can't back reference a group which we've never matched.  */
3810             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3811               goto fail;
3812 
3813             /* Where in input to try to start matching.  */
3814             d2 = regstart[regno];
3815 
3816             /* Where to stop matching; if both the place to start and
3817                the place to stop matching are in the same string, then
3818                set to the place to stop, otherwise, for now have to use
3819                the end of the first string.  */
3820 
3821             dend2 = ((FIRST_STRING_P (regstart[regno])
3822 		      == FIRST_STRING_P (regend[regno]))
3823 		     ? regend[regno] : end_match_1);
3824 	    for (;;)
3825 	      {
3826 		/* If necessary, advance to next segment in register
3827                    contents.  */
3828 		while (d2 == dend2)
3829 		  {
3830 		    if (dend2 == end_match_2) break;
3831 		    if (dend2 == regend[regno]) break;
3832 
3833                     /* End of string1 => advance to string2. */
3834                     d2 = string2;
3835                     dend2 = regend[regno];
3836 		  }
3837 		/* At end of register contents => success */
3838 		if (d2 == dend2) break;
3839 
3840 		/* If necessary, advance to next segment in data.  */
3841 		PREFETCH ();
3842 
3843 		/* How many characters left in this segment to match.  */
3844 		mcnt = dend - d;
3845 
3846 		/* Want how many consecutive characters we can match in
3847                    one shot, so, if necessary, adjust the count.  */
3848                 if ((unsigned) mcnt > (unsigned) (dend2 - d2))
3849 		  mcnt = dend2 - d2;
3850 
3851 		/* Compare that many; failure if mismatch, else move
3852                    past them.  */
3853 		if (translate
3854                     ? bcmp_translate ((unsigned char *) d,
3855 									  (unsigned char *) d2, mcnt, translate)
3856                     : bcmp (d, d2, mcnt))
3857 		  goto fail;
3858 		d += mcnt, d2 += mcnt;
3859 	      }
3860 	  }
3861 	  break;
3862 
3863 
3864         /* begline matches the empty string at the beginning of the string
3865            (unless `not_bol' is set in `bufp'), and, if
3866            `newline_anchor' is set, after newlines.  */
3867 	case begline:
3868           DEBUG_PRINT1 ("EXECUTING begline.\n");
3869 
3870           if (AT_STRINGS_BEG (d))
3871             {
3872               if (!bufp->not_bol) break;
3873             }
3874           else if (d[-1] == '\n' && bufp->newline_anchor)
3875             {
3876               break;
3877             }
3878           /* In all other cases, we fail.  */
3879           goto fail;
3880 
3881 
3882         /* endline is the dual of begline.  */
3883 	case endline:
3884           DEBUG_PRINT1 ("EXECUTING endline.\n");
3885 
3886           if (AT_STRINGS_END (d))
3887             {
3888               if (!bufp->not_eol) break;
3889             }
3890 
3891           /* We have to ``prefetch'' the next character.  */
3892           else if ((d == end1 ? *string2 : *d) == '\n'
3893                    && bufp->newline_anchor)
3894             {
3895               break;
3896             }
3897           goto fail;
3898 
3899 
3900 	/* Match at the very beginning of the data.  */
3901         case begbuf:
3902           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3903           if (AT_STRINGS_BEG (d))
3904             break;
3905           goto fail;
3906 
3907 
3908 	/* Match at the very end of the data.  */
3909         case endbuf:
3910           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3911 	  if (AT_STRINGS_END (d))
3912 	    break;
3913           goto fail;
3914 
3915 
3916         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3917            pushes NULL as the value for the string on the stack.  Then
3918            `pop_failure_point' will keep the current value for the
3919            string, instead of restoring it.  To see why, consider
3920            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3921            then the . fails against the \n.  But the next thing we want
3922            to do is match the \n against the \n; if we restored the
3923            string value, we would be back at the foo.
3924 
3925            Because this is used only in specific cases, we don't need to
3926            check all the things that `on_failure_jump' does, to make
3927            sure the right things get saved on the stack.  Hence we don't
3928            share its code.  The only reason to push anything on the
3929            stack at all is that otherwise we would have to change
3930            `anychar's code to do something besides goto fail in this
3931            case; that seems worse than this.  */
3932         case on_failure_keep_string_jump:
3933           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3934 
3935           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3936           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3937 
3938           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3939           break;
3940 
3941 
3942 	/* Uses of on_failure_jump:
3943 
3944            Each alternative starts with an on_failure_jump that points
3945            to the beginning of the next alternative.  Each alternative
3946            except the last ends with a jump that in effect jumps past
3947            the rest of the alternatives.  (They really jump to the
3948            ending jump of the following alternative, because tensioning
3949            these jumps is a hassle.)
3950 
3951            Repeats start with an on_failure_jump that points past both
3952            the repetition text and either the following jump or
3953            pop_failure_jump back to this on_failure_jump.  */
3954 	case on_failure_jump:
3955         on_failure:
3956           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3957 
3958           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3959           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3960 
3961           /* If this on_failure_jump comes right before a group (i.e.,
3962              the original * applied to a group), save the information
3963              for that group and all inner ones, so that if we fail back
3964              to this point, the group's information will be correct.
3965              For example, in \(a*\)*\1, we need the preceding group,
3966              and in \(\(a*\)b*\)\2, we need the inner group.  */
3967 
3968           /* We can't use `p' to check ahead because we push
3969              a failure point to `p + mcnt' after we do this.  */
3970           p1 = p;
3971 
3972           /* We need to skip no_op's before we look for the
3973              start_memory in case this on_failure_jump is happening as
3974              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3975              against aba.  */
3976           while (p1 < pend && (re_opcode_t) *p1 == no_op)
3977             p1++;
3978 
3979           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3980             {
3981               /* We have a new highest active register now.  This will
3982                  get reset at the start_memory we are about to get to,
3983                  but we will have saved all the registers relevant to
3984                  this repetition op, as described above.  */
3985               highest_active_reg = *(p1 + 1) + *(p1 + 2);
3986               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3987                 lowest_active_reg = *(p1 + 1);
3988             }
3989 
3990           DEBUG_PRINT1 (":\n");
3991           PUSH_FAILURE_POINT (p + mcnt, d, -2);
3992           break;
3993 
3994 
3995         /* A smart repeat ends with `maybe_pop_jump'.
3996 	   We change it to either `pop_failure_jump' or `jump'.  */
3997         case maybe_pop_jump:
3998           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3999           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4000           {
4001 	    register unsigned char *p2 = p;
4002 
4003             /* Compare the beginning of the repeat with what in the
4004                pattern follows its end. If we can establish that there
4005                is nothing that they would both match, i.e., that we
4006                would have to backtrack because of (as in, e.g., `a*a')
4007                then we can change to pop_failure_jump, because we'll
4008                never have to backtrack.
4009 
4010                This is not true in the case of alternatives: in
4011                `(a|ab)*' we do need to backtrack to the `ab' alternative
4012                (e.g., if the string was `ab').  But instead of trying to
4013                detect that here, the alternative has put on a dummy
4014                failure point which is what we will end up popping.  */
4015 
4016 	    /* Skip over open/close-group commands.  */
4017 	    while (p2 + 2 < pend
4018 		   && ((re_opcode_t) *p2 == stop_memory
4019 		       || (re_opcode_t) *p2 == start_memory))
4020 	      p2 += 3;			/* Skip over args, too.  */
4021 
4022             /* If we're at the end of the pattern, we can change.  */
4023             if (p2 == pend)
4024 	      {
4025 		/* Consider what happens when matching ":\(.*\)"
4026 		   against ":/".  I don't really understand this code
4027 		   yet.  */
4028   	        p[-3] = (unsigned char) pop_failure_jump;
4029                 DEBUG_PRINT1
4030                   ("  End of pattern: change to `pop_failure_jump'.\n");
4031               }
4032 
4033             else if ((re_opcode_t) *p2 == exactn
4034 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4035 	      {
4036 		register unsigned char c
4037                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4038 		p1 = p + mcnt;
4039 
4040                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4041                    to the `maybe_finalize_jump' of this case.  Examine what
4042                    follows.  */
4043                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4044                   {
4045   		    p[-3] = (unsigned char) pop_failure_jump;
4046                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4047                                   c, p1[5]);
4048                   }
4049 
4050 		else if ((re_opcode_t) p1[3] == charset
4051 			 || (re_opcode_t) p1[3] == charset_not)
4052 		  {
4053 		    int not = (re_opcode_t) p1[3] == charset_not;
4054 
4055 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4056 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4057 		      not = !not;
4058 
4059                     /* `not' is equal to 1 if c would match, which means
4060                         that we can't change to pop_failure_jump.  */
4061 		    if (!not)
4062                       {
4063   		        p[-3] = (unsigned char) pop_failure_jump;
4064                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4065                       }
4066 		  }
4067 	      }
4068 	  }
4069 	  p -= 2;		/* Point at relative address again.  */
4070 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4071 	    {
4072 	      p[-1] = (unsigned char) jump;
4073               DEBUG_PRINT1 ("  Match => jump.\n");
4074 	      goto unconditional_jump;
4075 	    }
4076         /* Note fall through.  */
4077 
4078 
4079 	/* The end of a simple repeat has a pop_failure_jump back to
4080            its matching on_failure_jump, where the latter will push a
4081            failure point.  The pop_failure_jump takes off failure
4082            points put on by this pop_failure_jump's matching
4083            on_failure_jump; we got through the pattern to here from the
4084            matching on_failure_jump, so didn't fail.  */
4085         case pop_failure_jump:
4086           {
4087             /* We need to pass separate storage for the lowest and
4088                highest registers, even though we don't care about the
4089                actual values.  Otherwise, we will restore only one
4090                register from the stack, since lowest will == highest in
4091                `pop_failure_point'.  */
4092             unsigned long dummy_low_reg, dummy_high_reg;
4093             unsigned char *pdummy;
4094             const char *sdummy;
4095 
4096             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4097             POP_FAILURE_POINT (sdummy, pdummy,
4098                                dummy_low_reg, dummy_high_reg,
4099                                reg_dummy, reg_dummy, reg_info_dummy);
4100 			(void)pdummy;
4101 			(void)sdummy;
4102           }
4103           /* Note fall through.  */
4104 
4105 
4106         /* Unconditionally jump (without popping any failure points).  */
4107         case jump:
4108 	unconditional_jump:
4109 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4110           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4111 	  p += mcnt;				/* Do the jump.  */
4112           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4113 	  break;
4114 
4115 
4116         /* We need this opcode so we can detect where alternatives end
4117            in `group_match_null_string_p' et al.  */
4118         case jump_past_alt:
4119           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4120           goto unconditional_jump;
4121 
4122 
4123         /* Normally, the on_failure_jump pushes a failure point, which
4124            then gets popped at pop_failure_jump.  We will end up at
4125            pop_failure_jump, also, and with a pattern of, say, `a+', we
4126            are skipping over the on_failure_jump, so we have to push
4127            something meaningless for pop_failure_jump to pop.  */
4128         case dummy_failure_jump:
4129           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4130           /* It doesn't matter what we push for the string here.  What
4131              the code at `fail' tests is the value for the pattern.  */
4132           PUSH_FAILURE_POINT (0, 0, -2);
4133           goto unconditional_jump;
4134 
4135 
4136         /* At the end of an alternative, we need to push a dummy failure
4137            point in case we are followed by a `pop_failure_jump', because
4138            we don't want the failure point for the alternative to be
4139            popped.  For example, matching `(a|ab)*' against `aab'
4140            requires that we match the `ab' alternative.  */
4141         case push_dummy_failure:
4142           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4143           /* See comments just above at `dummy_failure_jump' about the
4144              two zeroes.  */
4145           PUSH_FAILURE_POINT (0, 0, -2);
4146           break;
4147 
4148         /* Have to succeed matching what follows at least n times.
4149            After that, handle like `on_failure_jump'.  */
4150         case succeed_n:
4151           EXTRACT_NUMBER (mcnt, p + 2);
4152           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4153 
4154           assert (mcnt >= 0);
4155           /* Originally, this is how many times we HAVE to succeed.  */
4156           if (mcnt > 0)
4157             {
4158                mcnt--;
4159 	       p += 2;
4160                STORE_NUMBER_AND_INCR (p, mcnt);
4161                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4162             }
4163 	  else if (mcnt == 0)
4164             {
4165               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4166 	      p[2] = (unsigned char) no_op;
4167               p[3] = (unsigned char) no_op;
4168               goto on_failure;
4169             }
4170           break;
4171 
4172         case jump_n:
4173           EXTRACT_NUMBER (mcnt, p + 2);
4174           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4175 
4176           /* Originally, this is how many times we CAN jump.  */
4177           if (mcnt)
4178             {
4179                mcnt--;
4180                STORE_NUMBER (p + 2, mcnt);
4181 	       goto unconditional_jump;
4182             }
4183           /* If don't have to jump any more, skip over the rest of command.  */
4184 	  else
4185 	    p += 4;
4186           break;
4187 
4188 	case set_number_at:
4189 	  {
4190             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4191 
4192             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4193             p1 = p + mcnt;
4194             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4195             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4196 	    STORE_NUMBER (p1, mcnt);
4197             break;
4198           }
4199 
4200         case wordbound:
4201           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4202           if (AT_WORD_BOUNDARY (d))
4203 	    break;
4204           goto fail;
4205 
4206 	case notwordbound:
4207           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4208 	  if (AT_WORD_BOUNDARY (d))
4209 	    goto fail;
4210           break;
4211 
4212 	case wordbeg:
4213           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4214 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4215 	    break;
4216           goto fail;
4217 
4218 	case wordend:
4219           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4220 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4221               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4222 	    break;
4223           goto fail;
4224 
4225 #ifdef emacs
4226 #ifdef emacs19
4227   	case before_dot:
4228           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4229  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4230   	    goto fail;
4231   	  break;
4232 
4233   	case at_dot:
4234           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4235  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4236   	    goto fail;
4237   	  break;
4238 
4239   	case after_dot:
4240           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4241           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4242   	    goto fail;
4243   	  break;
4244 #else /* not emacs19 */
4245 	case at_dot:
4246           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4247 	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4248 	    goto fail;
4249 	  break;
4250 #endif /* not emacs19 */
4251 
4252 	case syntaxspec:
4253           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4254 	  mcnt = *p++;
4255 	  goto matchsyntax;
4256 
4257         case wordchar:
4258           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4259 	  mcnt = (int) Sword;
4260         matchsyntax:
4261 	  PREFETCH ();
4262 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4263             goto fail;
4264           SET_REGS_MATCHED ();
4265 	  break;
4266 
4267 	case notsyntaxspec:
4268           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4269 	  mcnt = *p++;
4270 	  goto matchnotsyntax;
4271 
4272         case notwordchar:
4273           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4274 	  mcnt = (int) Sword;
4275         matchnotsyntax:
4276 	  PREFETCH ();
4277 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4278             goto fail;
4279 	  SET_REGS_MATCHED ();
4280           break;
4281 
4282 #else /* not emacs */
4283 	case wordchar:
4284           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4285 	  PREFETCH ();
4286           if (!WORDCHAR_P (d))
4287             goto fail;
4288 	  SET_REGS_MATCHED ();
4289           d++;
4290 	  break;
4291 
4292 	case notwordchar:
4293           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4294 	  PREFETCH ();
4295 	  if (WORDCHAR_P (d))
4296             goto fail;
4297           SET_REGS_MATCHED ();
4298           d++;
4299 	  break;
4300 #endif /* not emacs */
4301 
4302         default:
4303           abort ();
4304 	}
4305       continue;  /* Successfully executed one pattern command; keep going.  */
4306 
4307 
4308     /* We goto here if a matching operation fails. */
4309     fail:
4310       if (!FAIL_STACK_EMPTY ())
4311 	{ /* A restart point is known.  Restore to that state.  */
4312           DEBUG_PRINT1 ("\nFAIL:\n");
4313           POP_FAILURE_POINT (d, p,
4314                              lowest_active_reg, highest_active_reg,
4315                              regstart, regend, reg_info);
4316 
4317           /* If this failure point is a dummy, try the next one.  */
4318           if (!p)
4319 	    goto fail;
4320 
4321           /* If we failed to the end of the pattern, don't examine *p.  */
4322 	  assert (p <= pend);
4323           if (p < pend)
4324             {
4325               boolean is_a_jump_n = false;
4326 
4327               /* If failed to a backwards jump that's part of a repetition
4328                  loop, need to pop this failure point and use the next one.  */
4329               switch ((re_opcode_t) *p)
4330                 {
4331                 case jump_n:
4332                   is_a_jump_n = true;
4333                 case maybe_pop_jump:
4334                 case pop_failure_jump:
4335                 case jump:
4336                   p1 = p + 1;
4337                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4338                   p1 += mcnt;
4339 
4340                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4341                       || (!is_a_jump_n
4342                           && (re_opcode_t) *p1 == on_failure_jump))
4343                     goto fail;
4344                   break;
4345                 default:
4346                   /* do nothing */ ;
4347                 }
4348             }
4349 
4350           if (d >= string1 && d <= end1)
4351 	    dend = end_match_1;
4352         }
4353       else
4354         break;   /* Matching at this starting point really fails.  */
4355     } /* for (;;) */
4356 
4357   if (best_regs_set)
4358     goto restore_best_regs;
4359 
4360   FREE_VARIABLES ();
4361 
4362   return -1;         			/* Failure to match.  */
4363 } /* re_match_2 */
4364 
4365 /* Subroutine definitions for re_match_2.  */
4366 
4367 
4368 /* We are passed P pointing to a register number after a start_memory.
4369 
4370    Return true if the pattern up to the corresponding stop_memory can
4371    match the empty string, and false otherwise.
4372 
4373    If we find the matching stop_memory, sets P to point to one past its number.
4374    Otherwise, sets P to an undefined byte less than or equal to END.
4375 
4376    We don't handle duplicates properly (yet).  */
4377 
4378 static boolean
group_match_null_string_p(p,end,reg_info)4379 group_match_null_string_p (p, end, reg_info)
4380     unsigned char **p, *end;
4381     register_info_type *reg_info;
4382 {
4383   int mcnt;
4384   /* Point to after the args to the start_memory.  */
4385   unsigned char *p1 = *p + 2;
4386 
4387   while (p1 < end)
4388     {
4389       /* Skip over opcodes that can match nothing, and return true or
4390 	 false, as appropriate, when we get to one that can't, or to the
4391          matching stop_memory.  */
4392 
4393       switch ((re_opcode_t) *p1)
4394         {
4395         /* Could be either a loop or a series of alternatives.  */
4396         case on_failure_jump:
4397           p1++;
4398           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4399 
4400           /* If the next operation is not a jump backwards in the
4401 	     pattern.  */
4402 
4403 	  if (mcnt >= 0)
4404 	    {
4405               /* Go through the on_failure_jumps of the alternatives,
4406                  seeing if any of the alternatives cannot match nothing.
4407                  The last alternative starts with only a jump,
4408                  whereas the rest start with on_failure_jump and end
4409                  with a jump, e.g., here is the pattern for `a|b|c':
4410 
4411                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4412                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4413                  /exactn/1/c
4414 
4415                  So, we have to first go through the first (n-1)
4416                  alternatives and then deal with the last one separately.  */
4417 
4418 
4419               /* Deal with the first (n-1) alternatives, which start
4420                  with an on_failure_jump (see above) that jumps to right
4421                  past a jump_past_alt.  */
4422 
4423               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4424                 {
4425                   /* `mcnt' holds how many bytes long the alternative
4426                      is, including the ending `jump_past_alt' and
4427                      its number.  */
4428 
4429                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4430 				                      reg_info))
4431                     return false;
4432 
4433                   /* Move to right after this alternative, including the
4434 		     jump_past_alt.  */
4435                   p1 += mcnt;
4436 
4437                   /* Break if it's the beginning of an n-th alternative
4438                      that doesn't begin with an on_failure_jump.  */
4439                   if ((re_opcode_t) *p1 != on_failure_jump)
4440                     break;
4441 
4442 		  /* Still have to check that it's not an n-th
4443 		     alternative that starts with an on_failure_jump.  */
4444 		  p1++;
4445                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4446                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4447                     {
4448 		      /* Get to the beginning of the n-th alternative.  */
4449                       p1 -= 3;
4450                       break;
4451                     }
4452                 }
4453 
4454               /* Deal with the last alternative: go back and get number
4455                  of the `jump_past_alt' just before it.  `mcnt' contains
4456                  the length of the alternative.  */
4457               EXTRACT_NUMBER (mcnt, p1 - 2);
4458 
4459               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4460                 return false;
4461 
4462               p1 += mcnt;	/* Get past the n-th alternative.  */
4463             } /* if mcnt > 0 */
4464           break;
4465 
4466 
4467         case stop_memory:
4468 	  assert (p1[1] == **p);
4469           *p = p1 + 2;
4470           return true;
4471 
4472 
4473         default:
4474           if (!common_op_match_null_string_p (&p1, end, reg_info))
4475             return false;
4476         }
4477     } /* while p1 < end */
4478 
4479   return false;
4480 } /* group_match_null_string_p */
4481 
4482 
4483 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4484    It expects P to be the first byte of a single alternative and END one
4485    byte past the last. The alternative can contain groups.  */
4486 
4487 static boolean
alt_match_null_string_p(p,end,reg_info)4488 alt_match_null_string_p (p, end, reg_info)
4489     unsigned char *p, *end;
4490     register_info_type *reg_info;
4491 {
4492   int mcnt;
4493   unsigned char *p1 = p;
4494 
4495   while (p1 < end)
4496     {
4497       /* Skip over opcodes that can match nothing, and break when we get
4498          to one that can't.  */
4499 
4500       switch ((re_opcode_t) *p1)
4501         {
4502 	/* It's a loop.  */
4503         case on_failure_jump:
4504           p1++;
4505           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4506           p1 += mcnt;
4507           break;
4508 
4509 	default:
4510           if (!common_op_match_null_string_p (&p1, end, reg_info))
4511             return false;
4512         }
4513     }  /* while p1 < end */
4514 
4515   return true;
4516 } /* alt_match_null_string_p */
4517 
4518 
4519 /* Deals with the ops common to group_match_null_string_p and
4520    alt_match_null_string_p.
4521 
4522    Sets P to one after the op and its arguments, if any.  */
4523 
4524 static boolean
common_op_match_null_string_p(p,end,reg_info)4525 common_op_match_null_string_p (p, end, reg_info)
4526     unsigned char **p, *end;
4527     register_info_type *reg_info;
4528 {
4529   int mcnt;
4530   boolean ret;
4531   int reg_no;
4532   unsigned char *p1 = *p;
4533 
4534   switch ((re_opcode_t) *p1++)
4535     {
4536     case no_op:
4537     case begline:
4538     case endline:
4539     case begbuf:
4540     case endbuf:
4541     case wordbeg:
4542     case wordend:
4543     case wordbound:
4544     case notwordbound:
4545 #ifdef emacs
4546     case before_dot:
4547     case at_dot:
4548     case after_dot:
4549 #endif
4550       break;
4551 
4552     case start_memory:
4553       reg_no = *p1;
4554       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4555       ret = group_match_null_string_p (&p1, end, reg_info);
4556 
4557       /* Have to set this here in case we're checking a group which
4558          contains a group and a back reference to it.  */
4559 
4560       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4561         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4562 
4563       if (!ret)
4564         return false;
4565       break;
4566 
4567     /* If this is an optimized succeed_n for zero times, make the jump.  */
4568     case jump:
4569       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4570       if (mcnt >= 0)
4571         p1 += mcnt;
4572       else
4573         return false;
4574       break;
4575 
4576     case succeed_n:
4577       /* Get to the number of times to succeed.  */
4578       p1 += 2;
4579       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4580 
4581       if (mcnt == 0)
4582         {
4583           p1 -= 4;
4584           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4585           p1 += mcnt;
4586         }
4587       else
4588         return false;
4589       break;
4590 
4591     case duplicate:
4592       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4593         return false;
4594       break;
4595 
4596     case set_number_at:
4597       p1 += 4;
4598 
4599     default:
4600       /* All other opcodes mean we cannot match the empty string.  */
4601       return false;
4602   }
4603 
4604   *p = p1;
4605   return true;
4606 } /* common_op_match_null_string_p */
4607 
4608 
4609 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4610    bytes; nonzero otherwise.  */
4611 
4612 static int
bcmp_translate(s1,s2,len,translate)4613 bcmp_translate (s1, s2, len, translate)
4614      const unsigned char *s1, *s2;
4615      register int len;
4616      char *translate;
4617 {
4618   register const unsigned char *p1 = s1, *p2 = s2;
4619   while (len)
4620     {
4621       if (translate[*p1++] != translate[*p2++]) return 1;
4622       len--;
4623     }
4624   return 0;
4625 }
4626 
4627 /* Entry points for GNU code.  */
4628 
4629 /* re_compile_pattern is the GNU regular expression compiler: it
4630    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4631    Returns 0 if the pattern was valid, otherwise an error string.
4632 
4633    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4634    are set in BUFP on entry.
4635 
4636    We call regex_compile to do the actual compilation.  */
4637 
4638 const char *
re_compile_pattern(pattern,length,bufp)4639 re_compile_pattern (pattern, length, bufp)
4640      const char *pattern;
4641      int length;
4642      struct re_pattern_buffer *bufp;
4643 {
4644   reg_errcode_t ret;
4645 
4646   /* GNU code is written to assume at least RE_NREGS registers will be set
4647      (and at least one extra will be -1).  */
4648   bufp->regs_allocated = REGS_UNALLOCATED;
4649 
4650   /* And GNU code determines whether or not to get register information
4651      by passing null for the REGS argument to re_match, etc., not by
4652      setting no_sub.  */
4653   bufp->no_sub = 0;
4654 
4655   /* Match anchors at newline.  */
4656   bufp->newline_anchor = 1;
4657 
4658   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4659 
4660   return re_error_msg[(int) ret];
4661 }
4662 
4663 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4664    them if this is an Emacs or POSIX compilation.  */
4665 
4666 #if !defined (emacs) && !defined (_POSIX_SOURCE)
4667 
4668 /* BSD has one and only one pattern buffer.  */
4669 static struct re_pattern_buffer re_comp_buf;
4670 
4671 const char *
re_comp(s)4672 re_comp (s)
4673     const char *s;
4674 {
4675   reg_errcode_t ret;
4676 
4677   if (!s)
4678     {
4679       if (!re_comp_buf.buffer)
4680 	return "No previous regular expression";
4681       return 0;
4682     }
4683 
4684   if (!re_comp_buf.buffer)
4685     {
4686       re_comp_buf.buffer = (unsigned char *) malloc (200);
4687       if (re_comp_buf.buffer == NULL)
4688         return "Memory exhausted";
4689       re_comp_buf.allocated = 200;
4690 
4691       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4692       if (re_comp_buf.fastmap == NULL)
4693 	return "Memory exhausted";
4694     }
4695 
4696   /* Since `re_exec' always passes NULL for the `regs' argument, we
4697      don't need to initialize the pattern buffer fields which affect it.  */
4698 
4699   /* Match anchors at newlines.  */
4700   re_comp_buf.newline_anchor = 1;
4701 
4702   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4703 
4704   /* Yes, we're discarding `const' here.  */
4705   return (char *) re_error_msg[(int) ret];
4706 }
4707 
4708 
4709 int
re_exec(s)4710 re_exec (s)
4711     const char *s;
4712 {
4713   const int len = strlen (s);
4714   return
4715     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4716 }
4717 #endif /* not emacs and not _POSIX_SOURCE */
4718 
4719 /* POSIX.2 functions.  Don't define these for Emacs.  */
4720 
4721 #ifndef emacs
4722 
4723 /* regcomp takes a regular expression as a string and compiles it.
4724 
4725    PREG is a regex_t *.  We do not expect any fields to be initialized,
4726    since POSIX says we shouldn't.  Thus, we set
4727 
4728      `buffer' to the compiled pattern;
4729      `used' to the length of the compiled pattern;
4730      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4731        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4732        RE_SYNTAX_POSIX_BASIC;
4733      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4734      `fastmap' and `fastmap_accurate' to zero;
4735      `re_nsub' to the number of subexpressions in PATTERN.
4736 
4737    PATTERN is the address of the pattern string.
4738 
4739    CFLAGS is a series of bits which affect compilation.
4740 
4741      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4742      use POSIX basic syntax.
4743 
4744      If REG_NEWLINE is set, then . and [^...] don't match newline.
4745      Also, regexec will try a match beginning after every newline.
4746 
4747      If REG_ICASE is set, then we considers upper- and lowercase
4748      versions of letters to be equivalent when matching.
4749 
4750      If REG_NOSUB is set, then when PREG is passed to regexec, that
4751      routine will report only success or failure, and nothing about the
4752      registers.
4753 
4754    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4755    the return codes and their meanings.)  */
4756 
4757 int
regcomp(preg,pattern,cflags)4758 regcomp (preg, pattern, cflags)
4759     regex_t *preg;
4760     const char *pattern;
4761     int cflags;
4762 {
4763   reg_errcode_t ret;
4764   unsigned syntax
4765     = (cflags & REG_EXTENDED) ?
4766       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4767 
4768   /* regex_compile will allocate the space for the compiled pattern.  */
4769   preg->buffer = 0;
4770   preg->allocated = 0;
4771 
4772   /* Don't bother to use a fastmap when searching.  This simplifies the
4773      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4774      characters after newlines into the fastmap.  This way, we just try
4775      every character.  */
4776   preg->fastmap = 0;
4777 
4778   if (cflags & REG_ICASE)
4779     {
4780       unsigned i;
4781 
4782       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4783       if (preg->translate == NULL)
4784         return (int) REG_ESPACE;
4785 
4786       /* Map uppercase characters to corresponding lowercase ones.  */
4787       for (i = 0; i < CHAR_SET_SIZE; i++)
4788         preg->translate[i] = ISUPPER (i) ? (unsigned) tolower (i) : i;
4789     }
4790   else
4791     preg->translate = NULL;
4792 
4793   /* If REG_NEWLINE is set, newlines are treated differently.  */
4794   if (cflags & REG_NEWLINE)
4795     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4796       syntax &= ~RE_DOT_NEWLINE;
4797       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4798       /* It also changes the matching behavior.  */
4799       preg->newline_anchor = 1;
4800     }
4801   else
4802     preg->newline_anchor = 0;
4803 
4804   preg->no_sub = !!(cflags & REG_NOSUB);
4805 
4806   /* POSIX says a null character in the pattern terminates it, so we
4807      can use strlen here in compiling the pattern.  */
4808   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4809 
4810   /* POSIX doesn't distinguish between an unmatched open-group and an
4811      unmatched close-group: both are REG_EPAREN.  */
4812   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4813 
4814   return (int) ret;
4815 }
4816 
4817 
4818 /* regexec searches for a given pattern, specified by PREG, in the
4819    string STRING.
4820 
4821    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4822    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4823    least NMATCH elements, and we set them to the offsets of the
4824    corresponding matched substrings.
4825 
4826    EFLAGS specifies `execution flags' which affect matching: if
4827    REG_NOTBOL is set, then ^ does not match at the beginning of the
4828    string; if REG_NOTEOL is set, then $ does not match at the end.
4829 
4830    We return 0 if we find a match and REG_NOMATCH if not.  */
4831 
4832 int
regexec(preg,string,nmatch,pmatch,eflags)4833 regexec (preg, string, nmatch, pmatch, eflags)
4834     const regex_t *preg;
4835     const char *string;
4836     size_t nmatch;
4837     regmatch_t pmatch[];
4838     int eflags;
4839 {
4840   int ret;
4841   struct re_registers regs;
4842   regex_t private_preg;
4843   int len = strlen (string);
4844   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4845 
4846   private_preg = *preg;
4847 
4848   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4849   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4850 
4851   /* The user has told us exactly how many registers to return
4852      information about, via `nmatch'.  We have to pass that on to the
4853      matching routines.  */
4854   private_preg.regs_allocated = REGS_FIXED;
4855 
4856   if (want_reg_info)
4857     {
4858       regs.num_regs = nmatch;
4859       regs.start = TALLOC (nmatch, regoff_t);
4860       regs.end = TALLOC (nmatch, regoff_t);
4861       if (regs.start == NULL || regs.end == NULL)
4862         return (int) REG_NOMATCH;
4863     }
4864 
4865   /* Perform the searching operation.  */
4866   ret = re_search (&private_preg, string, len,
4867                    /* start: */ 0, /* range: */ len,
4868                    want_reg_info ? &regs : (struct re_registers *) 0);
4869 
4870   /* Copy the register information to the POSIX structure.  */
4871   if (want_reg_info)
4872     {
4873       if (ret >= 0)
4874         {
4875           unsigned r;
4876 
4877           for (r = 0; r < nmatch; r++)
4878             {
4879               pmatch[r].rm_so = regs.start[r];
4880               pmatch[r].rm_eo = regs.end[r];
4881             }
4882         }
4883 
4884       /* If we needed the temporary register info, free the space now.  */
4885       free (regs.start);
4886       free (regs.end);
4887     }
4888 
4889   /* We want zero return to mean success, unlike `re_search'.  */
4890   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4891 }
4892 
4893 
4894 /* Returns a message corresponding to an error code, ERRCODE, returned
4895    from either regcomp or regexec.   We don't use PREG here.  */
4896 
4897 size_t
regerror(int errcode,const regex_t * preg,char * errbuf,size_t errbuf_size)4898 regerror (int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size) {
4899   const char *msg;
4900   size_t msg_size;
4901 
4902   if (errcode < 0
4903       || (size_t) errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4904     /* Only error codes returned by the rest of the code should be passed
4905        to this routine.  If we are given anything else, or if other regex
4906        code generates an invalid error code, then the program has a bug.
4907        Dump core so we can fix it.  */
4908     abort ();
4909 
4910   msg = re_error_msg[errcode];
4911 
4912   /* POSIX doesn't require that we do anything in this case, but why
4913      not be nice.  */
4914   if (! msg)
4915     msg = "Success";
4916 
4917   msg_size = strlen (msg) + 1; /* Includes the null.  */
4918 
4919   if (errbuf_size != 0)
4920     {
4921       if (msg_size > errbuf_size)
4922         {
4923           strncpy (errbuf, msg, errbuf_size - 1);
4924           errbuf[errbuf_size - 1] = 0;
4925         }
4926       else
4927         strcpy (errbuf, msg);
4928     }
4929 
4930   return msg_size;
4931 }
4932 
4933 
4934 /* Free dynamically allocated space used by PREG.  */
4935 
4936 void
regfree(preg)4937 regfree (preg)
4938     regex_t *preg;
4939 {
4940   if (preg->buffer != NULL)
4941     free (preg->buffer);
4942   preg->buffer = NULL;
4943 
4944   preg->allocated = 0;
4945   preg->used = 0;
4946 
4947   if (preg->fastmap != NULL)
4948     free (preg->fastmap);
4949   preg->fastmap = NULL;
4950   preg->fastmap_accurate = 0;
4951 
4952   if (preg->translate != NULL)
4953     free (preg->translate);
4954   preg->translate = NULL;
4955 }
4956 
4957 #endif /* not emacs  */
4958 
4959 /*
4960 Local variables:
4961 make-backup-files: t
4962 version-control: t
4963 trim-versions-without-asking: nil
4964 End:
4965 */
4966