1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5 
6    Copyright (C) 1993 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.  */
21 
22 #ifdef DEBUG
23 #undef DEBUG
24 #endif
25 
26 #if defined(_WIN64) && defined(_MSC_VER)
27 /* NOTE:
28 
29    to compile this file for Windows AMD64 platform please
30    disable any optimisation of this file.
31 
32    otherwise, on any calling of _alloca function, you can get fatal error
33    C1001: INTERNAL COMPILER ERROR */
34 #	pragma optimize( "", off )
35 #endif
36 
37 /* AIX requires this to be the first thing in the file. */
38 #if defined (_AIX) && !defined (REGEX_MALLOC)
39   #pragma alloca
40 #endif
41 
42 #define _GNU_SOURCE
43 
44 #include "common.h"
45 
46 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
47 #include <sys/types.h>
48 
49 /* The `emacs' switch turns on certain matching commands
50    that make sense only in Emacs. */
51 #ifdef emacs
52 
53 #include "lisp.h"
54 #include "buffer.h"
55 #include "syntax.h"
56 
57 /* Emacs uses `NULL' as a predicate.  */
58 #undef NULL
59 
60 #else  /* not emacs */
61 
62 /* We used to test for `BSTRING' here, but only GCC and Emacs define
63    `BSTRING', as far as I know, and neither of them use this code.  */
64 #if HAVE_STRING_H || STDC_HEADERS
65 #	include <string.h>
66 #	ifndef bcmp
67 #		define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
68 #	endif
69 #	ifndef bcopy
70 #		define bcopy(s, d, n)	memcpy ((d), (s), (n))
71 #	endif
72 #	ifndef bzero
73 #		define bzero(s, n)	memset ((s), 0, (n))
74 #	endif
75 #else
76 #	include <strings.h>
77 #endif
78 
79 /* Define the syntax stuff for \<, \>, etc.  */
80 
81 /* This must be non-zero for the wordchar and notwordchar pattern
82    commands in re_match_2.  */
83 #ifndef Sword
84 #define Sword 1
85 #endif
86 
87 #ifdef SYNTAX_TABLE
88 
89 extern char *re_syntax_table;
90 
91 #else /* not SYNTAX_TABLE */
92 
93 /* How many characters in the character set.  */
94 #define CHAR_SET_SIZE 256
95 
96 static char re_syntax_table[CHAR_SET_SIZE];
97 
98 static void
init_syntax_once()99 init_syntax_once ()
100 {
101    register int c;
102    static int done = 0;
103 
104    if (done)
105      return;
106 
107    bzero (re_syntax_table, sizeof re_syntax_table);
108 
109    for (c = 'a'; c <= 'z'; c++)
110      re_syntax_table[c] = Sword;
111 
112    for (c = 'A'; c <= 'Z'; c++)
113      re_syntax_table[c] = Sword;
114 
115    for (c = '0'; c <= '9'; c++)
116      re_syntax_table[c] = Sword;
117 
118    re_syntax_table['_'] = Sword;
119 
120    done = 1;
121 }
122 
123 #endif /* not SYNTAX_TABLE */
124 
125 #define SYNTAX(c) re_syntax_table[c]
126 
127 #endif /* not emacs */
128 
129 /* Get the interface, including the syntax bits.  */
130 #include "gnuregex.h"
131 
132 /* isalpha etc. are used for the character classes.  */
133 #include <ctype.h>
134 
135 #ifndef isascii
136 #define isascii(c) 1
137 #endif
138 
139 #ifdef isblank
140 #define ISBLANK(c) (isascii (c) && isblank (c))
141 #else
142 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
143 #endif
144 #ifdef isgraph
145 #define ISGRAPH(c) (isascii (c) && isgraph (c))
146 #else
147 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
148 #endif
149 
150 #define ISPRINT(c) (isascii (c) && isprint (c))
151 #define ISDIGIT(c) (isascii (c) && isdigit (c))
152 #define ISALNUM(c) (isascii (c) && isalnum (c))
153 #define ISALPHA(c) (isascii (c) && isalpha (c))
154 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
155 #define ISLOWER(c) (isascii (c) && islower (c))
156 #define ISPUNCT(c) (isascii (c) && ispunct (c))
157 #define ISSPACE(c) (isascii (c) && isspace (c))
158 #define ISUPPER(c) (isascii (c) && isupper (c))
159 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
160 
161 #ifndef NULL
162 #define NULL 0
163 #endif
164 
165 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
166    since ours (we hope) works properly with all combinations of
167    machines, compilers, `char' and `unsigned char' argument types.
168    (Per Bothner suggested the basic approach.)  */
169 #undef SIGN_EXTEND_CHAR
170 #if __STDC__
171 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
172 #else  /* not __STDC__ */
173 /* As in Harbison and Steele.  */
174 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
175 #endif
176 
177 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
178    use `alloca' instead of `malloc'.  This is because using malloc in
179    re_search* or re_match* could cause memory leaks when C-g is used in
180    Emacs; also, malloc is slower and causes storage fragmentation.  On
181    the other hand, malloc is more portable, and easier to debug.
182 
183    Because we sometimes use alloca, some routines have to be macros,
184    not functions -- `alloca'-allocated space disappears at the end of the
185    function it is called in.  */
186 
187 #ifdef REGEX_MALLOC
188 
189 	#define REGEX_ALLOCATE malloc
190 	#define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
191 
192 #else /* not REGEX_MALLOC  */
193 
194 /* Emacs already defines alloca, sometimes.  */
195 	#ifndef alloca
196 
197 /* Make alloca work the best possible way.  */
198 		#ifdef __GNUC__
199 			#define alloca __builtin_alloca
200 		#elif HAVE_ALLOCA_H
201 			#include <alloca.h>
202 		#elif _WINDOWS
203 			#define alloca _alloca
204 		#elif !defined(_AIX) /* Already did AIX, up at the top.  */
205 			char *alloca ();
206 		#endif /* __GNUC__ */
207 	#endif /* not alloca */
208 
209 	#define REGEX_ALLOCATE alloca
210 
211 	/* Assumes a `char *destination' variable.  */
212 	#define REGEX_REALLOCATE(source, osize, nsize)				\
213 		(destination = (char *) alloca (nsize),				\
214 		bcopy (source, destination, osize),				\
215 		destination)
216 
217 #endif /* not REGEX_MALLOC */
218 
219 
220 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
221    `string1' or just past its end.  This works if PTR is NULL, which is
222    a good thing.  */
223 #define FIRST_STRING_P(ptr)					\
224   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
225 
226 /* (Re)Allocate N items of type T using malloc, or fail.  */
227 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
228 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
229 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
230 
231 #define BYTEWIDTH 8 /* In bits.  */
232 
233 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
234 
235 #ifndef MAX
236 #define MAX(a, b) ((a) > (b) ? (a) : (b))
237 #endif
238 #ifndef MIN
239 #define MIN(a, b) ((a) < (b) ? (a) : (b))
240 #endif
241 
242 #if defined(boolean)
243 typedef char boolean;
244 #endif
245 
246 #define false 0
247 #define true 1
248 
249 /* These are the command codes that appear in compiled regular
250    expressions.  Some opcodes are followed by argument bytes.  A
251    command code can specify any interpretation whatsoever for its
252    arguments.  Zero bytes may appear in the compiled regular expression.
253 
254    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
255    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
256    `exactn' we use here must also be 1.  */
257 
258 typedef enum
259 {
260   no_op = 0,
261 
262         /* Followed by one byte giving n, then by n literal bytes.  */
263   exactn = 1,
264 
265         /* Matches any (more or less) character.  */
266   anychar,
267 
268         /* Matches any one char belonging to specified set.  First
269            following byte is number of bitmap bytes.  Then come bytes
270            for a bitmap saying which chars are in.  Bits in each byte
271            are ordered low-bit-first.  A character is in the set if its
272            bit is 1.  A character too large to have a bit in the map is
273            automatically not in the set.  */
274   charset,
275 
276         /* Same parameters as charset, but match any character that is
277            not one of those specified.  */
278   charset_not,
279 
280         /* Start remembering the text that is matched, for storing in a
281            register.  Followed by one byte with the register number, in
282            the range 0 to one less than the pattern buffer's re_nsub
283            field.  Then followed by one byte with the number of groups
284            inner to this one.  (This last has to be part of the
285            start_memory only because we need it in the on_failure_jump
286            of re_match_2.)  */
287   start_memory,
288 
289         /* Stop remembering the text that is matched and store it in a
290            memory register.  Followed by one byte with the register
291            number, in the range 0 to one less than `re_nsub' in the
292            pattern buffer, and one byte with the number of inner groups,
293            just like `start_memory'.  (We need the number of inner
294            groups here because we don't have any easy way of finding the
295            corresponding start_memory when we're at a stop_memory.)  */
296   stop_memory,
297 
298         /* Match a duplicate of something remembered. Followed by one
299            byte containing the register number.  */
300   duplicate,
301 
302         /* Fail unless at beginning of line.  */
303   begline,
304 
305         /* Fail unless at end of line.  */
306   endline,
307 
308         /* Succeeds if at beginning of buffer (if emacs) or at beginning
309            of string to be matched (if not).  */
310   begbuf,
311 
312         /* Analogously, for end of buffer/string.  */
313   endbuf,
314 
315         /* Followed by two byte relative address to which to jump.  */
316   jump,
317 
318 	/* Same as jump, but marks the end of an alternative.  */
319   jump_past_alt,
320 
321         /* Followed by two-byte relative address of place to resume at
322            in case of failure.  */
323   on_failure_jump,
324 
325         /* Like on_failure_jump, but pushes a placeholder instead of the
326            current string position when executed.  */
327   on_failure_keep_string_jump,
328 
329         /* Throw away latest failure point and then jump to following
330            two-byte relative address.  */
331   pop_failure_jump,
332 
333         /* Change to pop_failure_jump if know won't have to backtrack to
334            match; otherwise change to jump.  This is used to jump
335            back to the beginning of a repeat.  If what follows this jump
336            clearly won't match what the repeat does, such that we can be
337            sure that there is no use backtracking out of repetitions
338            already matched, then we change it to a pop_failure_jump.
339            Followed by two-byte address.  */
340   maybe_pop_jump,
341 
342         /* Jump to following two-byte address, and push a dummy failure
343            point. This failure point will be thrown away if an attempt
344            is made to use it for a failure.  A `+' construct makes this
345            before the first repeat.  Also used as an intermediary kind
346            of jump when compiling an alternative.  */
347   dummy_failure_jump,
348 
349 	/* Push a dummy failure point and continue.  Used at the end of
350 	   alternatives.  */
351   push_dummy_failure,
352 
353         /* Followed by two-byte relative address and two-byte number n.
354            After matching N times, jump to the address upon failure.  */
355   succeed_n,
356 
357         /* Followed by two-byte relative address, and two-byte number n.
358            Jump to the address N times, then fail.  */
359   jump_n,
360 
361         /* Set the following two-byte relative address to the
362            subsequent two-byte number.  The address *includes* the two
363            bytes of number.  */
364   set_number_at,
365 
366   wordchar,	/* Matches any word-constituent character.  */
367   notwordchar,	/* Matches any char that is not a word-constituent.  */
368 
369   wordbeg,	/* Succeeds if at word beginning.  */
370   wordend,	/* Succeeds if at word end.  */
371 
372   wordbound,	/* Succeeds if at a word boundary.  */
373   notwordbound	/* Succeeds if not at a word boundary.  */
374 
375 #ifdef emacs
376   ,before_dot,	/* Succeeds if before point.  */
377   at_dot,	/* Succeeds if at point.  */
378   after_dot,	/* Succeeds if after point.  */
379 
380 	/* Matches any character whose syntax is specified.  Followed by
381            a byte which contains a syntax code, e.g., Sword.  */
382   syntaxspec,
383 
384 	/* Matches any character whose syntax is not that specified.  */
385   notsyntaxspec
386 #endif /* emacs */
387 } re_opcode_t;
388 
389 /* Common operations on the compiled pattern.  */
390 
391 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
392 
393 #define STORE_NUMBER(destination, number)				\
394   do {									\
395     (destination)[0] = (number) & 0377;					\
396     (destination)[1] = (number) >> 8;					\
397   } while (0)
398 
399 /* Same as STORE_NUMBER, except increment DESTINATION to
400    the byte after where the number is stored.  Therefore, DESTINATION
401    must be an lvalue.  */
402 
403 #define STORE_NUMBER_AND_INCR(destination, number)			\
404   do {									\
405     STORE_NUMBER (destination, number);					\
406     (destination) += 2;							\
407   } while (0)
408 
409 /* Put into DESTINATION a number stored in two contiguous bytes starting
410    at SOURCE.  */
411 
412 #define EXTRACT_NUMBER(destination, source)				\
413   do {									\
414     (destination) = *(source) & 0377;					\
415     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
416   } while (0)
417 
418 #ifdef DEBUG
419 static void
extract_number(dest,source)420 extract_number (dest, source)
421     int *dest;
422     unsigned char *source;
423 {
424   int temp = SIGN_EXTEND_CHAR (*(source + 1));
425   *dest = *source & 0377;
426   *dest += temp << 8;
427 }
428 
429 #ifndef EXTRACT_MACROS /* To debug the macros.  */
430 #undef EXTRACT_NUMBER
431 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
432 #endif /* not EXTRACT_MACROS */
433 
434 #endif /* DEBUG */
435 
436 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
437    SOURCE must be an lvalue.  */
438 
439 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
440   do {									\
441     EXTRACT_NUMBER (destination, source);				\
442     (source) += 2; 							\
443   } while (0)
444 
445 #ifdef DEBUG
446 static void
extract_number_and_incr(destination,source)447 extract_number_and_incr (destination, source)
448     int *destination;
449     unsigned char **source;
450 {
451   extract_number (destination, *source);
452   *source += 2;
453 }
454 
455 #ifndef EXTRACT_MACROS
456 #undef EXTRACT_NUMBER_AND_INCR
457 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
458   extract_number_and_incr (&dest, &src)
459 #endif /* not EXTRACT_MACROS */
460 
461 #endif /* DEBUG */
462 
463 /* If DEBUG is defined, Regex prints many voluminous messages about what
464    it is doing (if the variable `debug' is non-zero).  If linked with the
465    main program in `iregex.c', you can enter patterns and strings
466    interactively.  And if linked with the main program in `main.c' and
467    the other test files, you can run the already-written tests.  */
468 
469 #ifdef DEBUG
470 
471 /* We use standard I/O for debugging.  */
472 #include <stdio.h>
473 
474 /* It is useful to test things that ``must'' be true when debugging.  */
475 #include <assert.h>
476 
477 static int debug = 0;
478 
479 #define DEBUG_STATEMENT(e) e
480 #define DEBUG_PRINT1(x) if (debug) printf (x)
481 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
482 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
483 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
484 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
485   if (debug) print_partial_compiled_pattern (s, e)
486 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
487   if (debug) print_double_string (w, s1, sz1, s2, sz2)
488 
489 
490 extern void printchar ();
491 
492 /* Print the fastmap in human-readable form.  */
493 
494 void
print_fastmap(fastmap)495 print_fastmap (fastmap)
496     char *fastmap;
497 {
498   unsigned was_a_range = 0;
499   unsigned i = 0;
500 
501   while (i < (1 << BYTEWIDTH))
502     {
503       if (fastmap[i++])
504 	{
505 	  was_a_range = 0;
506           printchar (i - 1);
507           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
508             {
509               was_a_range = 1;
510               i++;
511             }
512 	  if (was_a_range)
513             {
514               printf ("-");
515               printchar (i - 1);
516             }
517         }
518     }
519   putchar ('\n');
520 }
521 
522 
523 /* Print a compiled pattern string in human-readable form, starting at
524    the START pointer into it and ending just before the pointer END.  */
525 
526 void
print_partial_compiled_pattern(start,end)527 print_partial_compiled_pattern (start, end)
528     unsigned char *start;
529     unsigned char *end;
530 {
531   int mcnt, mcnt2;
532   unsigned char *p = start;
533   unsigned char *pend = end;
534 
535   if (start == NULL)
536     {
537       printf ("(null)\n");
538       return;
539     }
540 
541   /* Loop over pattern commands.  */
542   while (p < pend)
543     {
544       switch ((re_opcode_t) *p++)
545 	{
546         case no_op:
547           printf ("/no_op");
548           break;
549 
550 	case exactn:
551 	  mcnt = *p++;
552           printf ("/exactn/%d", mcnt);
553           do
554 	    {
555               putchar ('/');
556 	      printchar (*p++);
557             }
558           while (--mcnt);
559           break;
560 
561 	case start_memory:
562           mcnt = *p++;
563           printf ("/start_memory/%d/%d", mcnt, *p++);
564           break;
565 
566 	case stop_memory:
567           mcnt = *p++;
568 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
569           break;
570 
571 	case duplicate:
572 	  printf ("/duplicate/%d", *p++);
573 	  break;
574 
575 	case anychar:
576 	  printf ("/anychar");
577 	  break;
578 
579 	case charset:
580         case charset_not:
581           {
582             register int c;
583 
584             printf ("/charset%s",
585 	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
586 
587             assert (p + *p < pend);
588 
589             for (c = 0; c < *p; c++)
590               {
591                 unsigned bit;
592                 unsigned char map_byte = p[1 + c];
593 
594                 putchar ('/');
595 
596 		for (bit = 0; bit < BYTEWIDTH; bit++)
597                   if (map_byte & (1 << bit))
598                     printchar (c * BYTEWIDTH + bit);
599               }
600 	    p += 1 + *p;
601 	    break;
602 	  }
603 
604 	case begline:
605 	  printf ("/begline");
606           break;
607 
608 	case endline:
609           printf ("/endline");
610           break;
611 
612 	case on_failure_jump:
613           extract_number_and_incr (&mcnt, &p);
614 	  printf ("/on_failure_jump/0/%d", mcnt);
615           break;
616 
617 	case on_failure_keep_string_jump:
618           extract_number_and_incr (&mcnt, &p);
619 	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
620           break;
621 
622 	case dummy_failure_jump:
623           extract_number_and_incr (&mcnt, &p);
624 	  printf ("/dummy_failure_jump/0/%d", mcnt);
625           break;
626 
627 	case push_dummy_failure:
628           printf ("/push_dummy_failure");
629           break;
630 
631         case maybe_pop_jump:
632           extract_number_and_incr (&mcnt, &p);
633 	  printf ("/maybe_pop_jump/0/%d", mcnt);
634 	  break;
635 
636         case pop_failure_jump:
637 	  extract_number_and_incr (&mcnt, &p);
638 	  printf ("/pop_failure_jump/0/%d", mcnt);
639 	  break;
640 
641         case jump_past_alt:
642 	  extract_number_and_incr (&mcnt, &p);
643 	  printf ("/jump_past_alt/0/%d", mcnt);
644 	  break;
645 
646         case jump:
647 	  extract_number_and_incr (&mcnt, &p);
648 	  printf ("/jump/0/%d", mcnt);
649 	  break;
650 
651         case succeed_n:
652           extract_number_and_incr (&mcnt, &p);
653           extract_number_and_incr (&mcnt2, &p);
654 	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
655           break;
656 
657         case jump_n:
658           extract_number_and_incr (&mcnt, &p);
659           extract_number_and_incr (&mcnt2, &p);
660 	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
661           break;
662 
663         case set_number_at:
664           extract_number_and_incr (&mcnt, &p);
665           extract_number_and_incr (&mcnt2, &p);
666 	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
667           break;
668 
669         case wordbound:
670 	  printf ("/wordbound");
671 	  break;
672 
673 	case notwordbound:
674 	  printf ("/notwordbound");
675           break;
676 
677 	case wordbeg:
678 	  printf ("/wordbeg");
679 	  break;
680 
681 	case wordend:
682 	  printf ("/wordend");
683 
684 #ifdef emacs
685 	case before_dot:
686 	  printf ("/before_dot");
687           break;
688 
689 	case at_dot:
690 	  printf ("/at_dot");
691           break;
692 
693 	case after_dot:
694 	  printf ("/after_dot");
695           break;
696 
697 	case syntaxspec:
698           printf ("/syntaxspec");
699 	  mcnt = *p++;
700 	  printf ("/%d", mcnt);
701           break;
702 
703 	case notsyntaxspec:
704           printf ("/notsyntaxspec");
705 	  mcnt = *p++;
706 	  printf ("/%d", mcnt);
707 	  break;
708 #endif /* emacs */
709 
710 	case wordchar:
711 	  printf ("/wordchar");
712           break;
713 
714 	case notwordchar:
715 	  printf ("/notwordchar");
716           break;
717 
718 	case begbuf:
719 	  printf ("/begbuf");
720           break;
721 
722 	case endbuf:
723 	  printf ("/endbuf");
724           break;
725 
726         default:
727           printf ("?%d", *(p-1));
728 	}
729     }
730   printf ("/\n");
731 }
732 
733 
734 void
print_compiled_pattern(bufp)735 print_compiled_pattern (bufp)
736     struct re_pattern_buffer *bufp;
737 {
738   unsigned char *buffer = bufp->buffer;
739 
740   print_partial_compiled_pattern (buffer, buffer + bufp->used);
741   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
742 
743   if (bufp->fastmap_accurate && bufp->fastmap)
744     {
745       printf ("fastmap: ");
746       print_fastmap (bufp->fastmap);
747     }
748 
749   printf ("re_nsub: %d\t", bufp->re_nsub);
750   printf ("regs_alloc: %d\t", bufp->regs_allocated);
751   printf ("can_be_null: %d\t", bufp->can_be_null);
752   printf ("newline_anchor: %d\n", bufp->newline_anchor);
753   printf ("no_sub: %d\t", bufp->no_sub);
754   printf ("not_bol: %d\t", bufp->not_bol);
755   printf ("not_eol: %d\t", bufp->not_eol);
756   printf ("syntax: %d\n", bufp->syntax);
757   /* Perhaps we should print the translate table?  */
758 }
759 
760 
761 void
print_double_string(where,string1,size1,string2,size2)762 print_double_string (where, string1, size1, string2, size2)
763     const char *where;
764     const char *string1;
765     const char *string2;
766     int size1;
767     int size2;
768 {
769   unsigned this_char;
770 
771   if (where == NULL)
772     printf ("(null)");
773   else
774     {
775       if (FIRST_STRING_P (where))
776         {
777           for (this_char = where - string1; this_char < size1; this_char++)
778             printchar (string1[this_char]);
779 
780           where = string2;
781         }
782 
783       for (this_char = where - string2; this_char < size2; this_char++)
784         printchar (string2[this_char]);
785     }
786 }
787 
788 #else /* not DEBUG */
789 
790 #undef assert
791 #define assert(e)
792 
793 #define DEBUG_STATEMENT(e)
794 #define DEBUG_PRINT1(x)
795 #define DEBUG_PRINT2(x1, x2)
796 #define DEBUG_PRINT3(x1, x2, x3)
797 #define DEBUG_PRINT4(x1, x2, x3, x4)
798 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
799 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
800 
801 #endif /* not DEBUG */
802 
803 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
804    also be assigned to arbitrarily: each pattern buffer stores its own
805    syntax, so it can be changed between regex compilations.  */
806 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
807 
808 
809 /* Specify the precise syntax of regexps for compilation.  This provides
810    for compatibility for various utilities which historically have
811    different, incompatible syntaxes.
812 
813    The argument SYNTAX is a bit mask comprised of the various bits
814    defined in regex.h.  We return the old syntax.  */
815 
816 reg_syntax_t
re_set_syntax(syntax)817 re_set_syntax (syntax)
818     reg_syntax_t syntax;
819 {
820   reg_syntax_t ret = re_syntax_options;
821 
822   re_syntax_options = syntax;
823   return ret;
824 }
825 
826 /* This table gives an error message for each of the error codes listed
827    in regex.h.  Obviously the order here has to be same as there.  */
828 
829 static const char *re_error_msg[] =
830   { NULL,					/* REG_NOERROR */
831     "No match",					/* REG_NOMATCH */
832     "Invalid regular expression",		/* REG_BADPAT */
833     "Invalid collation character",		/* REG_ECOLLATE */
834     "Invalid character class name",		/* REG_ECTYPE */
835     "Trailing backslash",			/* REG_EESCAPE */
836     "Invalid back reference",			/* REG_ESUBREG */
837     "Unmatched [ or [^",			/* REG_EBRACK */
838     "Unmatched ( or \\(",			/* REG_EPAREN */
839     "Unmatched \\{",				/* REG_EBRACE */
840     "Invalid content of \\{\\}",		/* REG_BADBR */
841     "Invalid range end",			/* REG_ERANGE */
842     "Memory exhausted",				/* REG_ESPACE */
843     "Invalid preceding regular expression",	/* REG_BADRPT */
844     "Premature end of regular expression",	/* REG_EEND */
845     "Regular expression too big",		/* REG_ESIZE */
846     "Unmatched ) or \\)",			/* REG_ERPAREN */
847   };
848 
849 /* Subroutine declarations and macros for regex_compile.  */
850 
851 static void store_op1 (), store_op2 ();
852 static void insert_op1 (), insert_op2 ();
853 static boolean at_begline_loc_p (), at_endline_loc_p ();
854 static boolean group_in_compile_stack ();
855 static reg_errcode_t compile_range ();
856 
857 #define FREE_MEM(var) if (var) free (var); var = NULL
858 
859 /* Fetch the next character in the uncompiled pattern---translating it
860    if necessary.  Also cast from a signed character in the constant
861    string passed to us by the user to an unsigned char that we can use
862    as an array index (in, e.g., `translate').  */
863 #define PATFETCH(c)							\
864 do {if (p == pend) {FREE_MEM (compile_stack.stack); return REG_EEND;}	\
865     c = (unsigned char) *p++;						\
866     if (translate) c = translate[c]; 					\
867   } while (0)
868 
869 /* Fetch the next character in the uncompiled pattern, with no
870    translation.  */
871 #define PATFETCH_RAW(c)							\
872 do {if (p == pend) {FREE_MEM (compile_stack.stack); return REG_EEND;}	\
873     c = (unsigned char) *p++; 						\
874   } while (0)
875 
876 /* Go backwards one character in the pattern.  */
877 #define PATUNFETCH p--
878 
879 
880 /* If `translate' is non-null, return translate[D], else just D.  We
881    cast the subscript to translate because some data is declared as
882    `char *', to avoid warnings when a string constant is passed.  But
883    when we use a character as a subscript we must make it unsigned.  */
884 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
885 
886 
887 /* Macros for outputting the compiled pattern into `buffer'.  */
888 
889 /* If the buffer isn't allocated when it comes in, use this.  */
890 #define INIT_BUF_SIZE  32
891 
892 /* Make sure we have at least N more bytes of space in buffer.  */
893 #define GET_BUFFER_SPACE(n)						\
894     while ((unsigned long)(b - bufp->buffer + (n)) > bufp->allocated)	\
895       EXTEND_BUFFER ()
896 
897 /* Make sure we have one more byte of buffer space and then add C to it.  */
898 #define BUF_PUSH(c)							\
899   do {									\
900     GET_BUFFER_SPACE (1);						\
901     *b++ = (unsigned char) (c);						\
902   } while (0)
903 
904 
905 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
906 #define BUF_PUSH_2(c1, c2)						\
907   do {									\
908     GET_BUFFER_SPACE (2);						\
909     *b++ = (unsigned char) (c1);					\
910     *b++ = (unsigned char) (c2);					\
911   } while (0)
912 
913 
914 /* As with BUF_PUSH_2, except for three bytes.  */
915 #define BUF_PUSH_3(c1, c2, c3)						\
916   do {									\
917     GET_BUFFER_SPACE (3);						\
918     *b++ = (unsigned char) (c1);					\
919     *b++ = (unsigned char) (c2);					\
920     *b++ = (unsigned char) (c3);					\
921   } while (0)
922 
923 
924 /* Store a jump with opcode OP at LOC to location TO.  We store a
925    relative address offset by the three bytes the jump itself occupies.  */
926 #define STORE_JUMP(op, loc, to) \
927   store_op1 (op, loc, (to) - (loc) - 3)
928 
929 /* Likewise, for a two-argument jump.  */
930 #define STORE_JUMP2(op, loc, to, arg) \
931   store_op2 (op, loc, (to) - (loc) - 3, arg)
932 
933 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
934 #define INSERT_JUMP(op, loc, to) \
935   insert_op1 (op, loc, (to) - (loc) - 3, b)
936 
937 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
938 #define INSERT_JUMP2(op, loc, to, arg) \
939   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
940 
941 
942 /* This is not an arbitrary limit: the arguments which represent offsets
943    into the pattern are two bytes long.  So if 2^16 bytes turns out to
944    be too small, many things would have to change.  */
945 #define MAX_BUF_SIZE (1L << 16)
946 
947 
948 /* Extend the buffer by twice its current size via realloc and
949    reset the pointers that pointed into the old block to point to the
950    correct places in the new one.  If extending the buffer results in it
951    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
952 #define EXTEND_BUFFER()							\
953   do { 									\
954     unsigned char *old_buffer = bufp->buffer;				\
955     if (bufp->allocated == MAX_BUF_SIZE) 				\
956       {									\
957         FREE_MEM (compile_stack.stack);					\
958         return REG_ESIZE;						\
959       }									\
960     bufp->allocated <<= 1;						\
961     if (bufp->allocated > MAX_BUF_SIZE)					\
962       bufp->allocated = MAX_BUF_SIZE; 					\
963     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
964     if (bufp->buffer == NULL)						\
965       {									\
966         FREE_MEM (compile_stack.stack);					\
967         return REG_ESPACE;						\
968       }									\
969     /* If the buffer moved, move all the pointers into it.  */		\
970     if (old_buffer != bufp->buffer)					\
971       {									\
972         b = (b - old_buffer) + bufp->buffer;				\
973         begalt = (begalt - old_buffer) + bufp->buffer;			\
974         if (fixup_alt_jump)						\
975           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
976         if (laststart)							\
977           laststart = (laststart - old_buffer) + bufp->buffer;		\
978         if (pending_exact)						\
979           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
980       }									\
981   } while (0)
982 
983 
984 /* Since we have one byte reserved for the register number argument to
985    {start,stop}_memory, the maximum number of groups we can report
986    things about is what fits in that byte.  */
987 #define MAX_REGNUM 255
988 
989 /* But patterns can have more than `MAX_REGNUM' registers.  We just
990    ignore the excess.  */
991 typedef unsigned regnum_t;
992 
993 
994 /* Macros for the compile stack.  */
995 
996 /* Since offsets can go either forwards or backwards, this type needs to
997    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
998 typedef int pattern_offset_t;
999 
1000 typedef struct
1001 {
1002   pattern_offset_t begalt_offset;
1003   pattern_offset_t fixup_alt_jump;
1004   pattern_offset_t inner_group_offset;
1005   pattern_offset_t laststart_offset;
1006   regnum_t regnum;
1007 } compile_stack_elt_t;
1008 
1009 
1010 typedef struct
1011 {
1012   compile_stack_elt_t *stack;
1013   unsigned size;
1014   unsigned avail;			/* Offset of next open position.  */
1015 } compile_stack_type;
1016 
1017 
1018 #define INIT_COMPILE_STACK_SIZE 32
1019 
1020 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1021 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1022 
1023 /* The next available element.  */
1024 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1025 
1026 
1027 /* Set the bit for character C in a list.  */
1028 #define SET_LIST_BIT(c)                               \
1029   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1030    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1031 
1032 
1033 /* Get the next unsigned number in the uncompiled pattern.  */
1034 #define GET_UNSIGNED_NUMBER(num) 					\
1035   { if (p != pend)							\
1036      {									\
1037        PATFETCH (c); 							\
1038        while (ISDIGIT (c)) 						\
1039          { 								\
1040            if (num < 0)							\
1041               num = 0;							\
1042            num = num * 10 + c - '0'; 					\
1043            if (p == pend) 						\
1044               break; 							\
1045            PATFETCH (c);						\
1046          } 								\
1047        } 								\
1048     }
1049 
1050 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1051 
1052 #define IS_CHAR_CLASS(string)						\
1053    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1054     || STREQ (string, "lower") || STREQ (string, "digit")		\
1055     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1056     || STREQ (string, "space") || STREQ (string, "print")		\
1057     || STREQ (string, "punct") || STREQ (string, "graph")		\
1058     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1059 
1060 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1061    Returns one of error codes defined in `regex.h', or zero for success.
1062 
1063    Assumes the `allocated' (and perhaps `buffer') and `translate'
1064    fields are set in BUFP on entry.
1065 
1066    If it succeeds, results are put in BUFP (if it returns an error, the
1067    contents of BUFP are undefined):
1068      `buffer' is the compiled pattern;
1069      `syntax' is set to SYNTAX;
1070      `used' is set to the length of the compiled pattern;
1071      `fastmap_accurate' is zero;
1072      `re_nsub' is the number of subexpressions in PATTERN;
1073      `not_bol' and `not_eol' are zero;
1074 
1075    The `fastmap' and `newline_anchor' fields are neither
1076    examined nor set.  */
1077 
1078 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1079 regex_compile (pattern, size, syntax, bufp)
1080      const char *pattern;
1081      int size;
1082      reg_syntax_t syntax;
1083      struct re_pattern_buffer *bufp;
1084 {
1085   /* We fetch characters from PATTERN here.  Even though PATTERN is
1086      `char *' (i.e., signed), we declare these variables as unsigned, so
1087      they can be reliably used as array indices.  */
1088   register unsigned char c, c1;
1089 
1090   /* A random tempory spot in PATTERN.  */
1091   const char *p1;
1092 
1093   /* Points to the end of the buffer, where we should append.  */
1094   register unsigned char *b;
1095 
1096   /* Keeps track of unclosed groups.  */
1097   compile_stack_type compile_stack;
1098 
1099   /* Points to the current (ending) position in the pattern.  */
1100   const char *p = pattern;
1101   const char *pend = pattern + size;
1102 
1103   /* How to translate the characters in the pattern.  */
1104   char *translate = bufp->translate;
1105 
1106   /* Address of the count-byte of the most recently inserted `exactn'
1107      command.  This makes it possible to tell if a new exact-match
1108      character can be added to that command or if the character requires
1109      a new `exactn' command.  */
1110   unsigned char *pending_exact = 0;
1111 
1112   /* Address of start of the most recently finished expression.
1113      This tells, e.g., postfix * where to find the start of its
1114      operand.  Reset at the beginning of groups and alternatives.  */
1115   unsigned char *laststart = 0;
1116 
1117   /* Address of beginning of regexp, or inside of last group.  */
1118   unsigned char *begalt;
1119 
1120   /* Place in the uncompiled pattern (i.e., the {) to
1121      which to go back if the interval is invalid.  */
1122   const char *beg_interval;
1123 
1124   /* Address of the place where a forward jump should go to the end of
1125      the containing expression.  Each alternative of an `or' -- except the
1126      last -- ends with a forward jump of this sort.  */
1127   unsigned char *fixup_alt_jump = 0;
1128 
1129   /* Counts open-groups as they are encountered.  Remembered for the
1130      matching close-group on the compile stack, so the same register
1131      number is put in the stop_memory as the start_memory.  */
1132   regnum_t regnum = 0;
1133 
1134 #ifdef DEBUG
1135   DEBUG_PRINT1 ("\nCompiling pattern: ");
1136   if (debug)
1137     {
1138       unsigned debug_count;
1139 
1140       for (debug_count = 0; debug_count < size; debug_count++)
1141         printchar (pattern[debug_count]);
1142       putchar ('\n');
1143     }
1144 #endif /* DEBUG */
1145 
1146   /* Initialize the compile stack.  */
1147   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1148   if (compile_stack.stack == NULL)
1149     return REG_ESPACE;
1150 
1151   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1152   compile_stack.avail = 0;
1153 
1154   /* Initialize the pattern buffer.  */
1155   bufp->syntax = syntax;
1156   bufp->fastmap_accurate = 0;
1157   bufp->not_bol = bufp->not_eol = 0;
1158 
1159   /* Set `used' to zero, so that if we return an error, the pattern
1160      printer (for debugging) will think there's no pattern.  We reset it
1161      at the end.  */
1162   bufp->used = 0;
1163 
1164   /* Always count groups, whether or not bufp->no_sub is set.  */
1165   bufp->re_nsub = 0;
1166 
1167 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1168   /* Initialize the syntax table.  */
1169    init_syntax_once ();
1170 #endif
1171 
1172   if (bufp->allocated == 0)
1173     {
1174       if (bufp->buffer)
1175 	{ /* If zero allocated, but buffer is non-null, try to realloc
1176              enough space.  This loses if buffer's address is bogus, but
1177              that is the user's responsibility.  */
1178           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1179         }
1180       else
1181         { /* Caller did not allocate a buffer.  Do it for them.  */
1182           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1183         }
1184       if (!bufp->buffer)
1185         {
1186           FREE_MEM (compile_stack.stack);
1187           return REG_ESPACE;
1188         }
1189       bufp->allocated = INIT_BUF_SIZE;
1190     }
1191 
1192   begalt = b = bufp->buffer;
1193 
1194   /* Loop through the uncompiled pattern until we're at the end.  */
1195   while (p != pend)
1196     {
1197       PATFETCH (c);
1198 
1199       switch (c)
1200         {
1201         case '^':
1202           {
1203             if (   /* If at start of pattern, it's an operator.  */
1204                    p == pattern + 1
1205                    /* If context independent, it's an operator.  */
1206                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1207                    /* Otherwise, depends on what's come before.  */
1208                 || at_begline_loc_p (pattern, p, syntax))
1209               BUF_PUSH (begline);
1210             else
1211               goto normal_char;
1212           }
1213           break;
1214 
1215 
1216         case '$':
1217           {
1218             if (   /* If at end of pattern, it's an operator.  */
1219                    p == pend
1220                    /* If context independent, it's an operator.  */
1221                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1222                    /* Otherwise, depends on what's next.  */
1223                 || at_endline_loc_p (p, pend, syntax))
1224                BUF_PUSH (endline);
1225              else
1226                goto normal_char;
1227            }
1228            break;
1229 
1230 
1231 	case '+':
1232         case '?':
1233           if ((syntax & RE_BK_PLUS_QM)
1234               || (syntax & RE_LIMITED_OPS))
1235             goto normal_char;
1236         handle_plus:
1237         case '*':
1238           /* If there is no previous pattern... */
1239           if (!laststart)
1240             {
1241               if (syntax & RE_CONTEXT_INVALID_OPS)
1242                 {
1243                   FREE_MEM (compile_stack.stack);
1244                   return REG_BADRPT;
1245                 }
1246               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1247                 goto normal_char;
1248             }
1249 
1250           {
1251             /* Are we optimizing this jump?  */
1252             boolean keep_string_p = false;
1253 
1254             /* 1 means zero (many) matches is allowed.  */
1255             char zero_times_ok = 0, many_times_ok = 0;
1256 
1257             /* If there is a sequence of repetition chars, collapse it
1258                down to just one (the right one).  We can't combine
1259                interval operators with these because of, e.g., `a{2}*',
1260                which should only match an even number of `a's.  */
1261 
1262             for (;;)
1263               {
1264                 zero_times_ok |= c != '+';
1265                 many_times_ok |= c != '?';
1266 
1267                 if (p == pend)
1268                   break;
1269 
1270                 PATFETCH (c);
1271 
1272                 if (c == '*'
1273                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1274                   ;
1275 
1276                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1277                   {
1278                     if (p == pend)
1279                       {
1280                         FREE_MEM (compile_stack.stack);
1281                         return REG_EESCAPE;
1282                       }
1283 
1284                     PATFETCH (c1);
1285                     if (!(c1 == '+' || c1 == '?'))
1286                       {
1287                         PATUNFETCH;
1288                         PATUNFETCH;
1289                         break;
1290                       }
1291 
1292                     c = c1;
1293                   }
1294                 else
1295                   {
1296                     PATUNFETCH;
1297                     break;
1298                   }
1299 
1300                 /* If we get here, we found another repeat character.  */
1301                }
1302 
1303             /* Star, etc. applied to an empty pattern is equivalent
1304                to an empty pattern.  */
1305             if (!laststart)
1306               break;
1307 
1308             /* Now we know whether or not zero matches is allowed
1309                and also whether or not two or more matches is allowed.  */
1310             if (many_times_ok)
1311               { /* More than one repetition is allowed, so put in at the
1312                    end a backward relative jump from `b' to before the next
1313                    jump we're going to put in below (which jumps from
1314                    laststart to after this jump).
1315 
1316                    But if we are at the `*' in the exact sequence `.*\n',
1317                    insert an unconditional jump backwards to the .,
1318                    instead of the beginning of the loop.  This way we only
1319                    push a failure point once, instead of every time
1320                    through the loop.  */
1321                 assert (p - 1 > pattern);
1322 
1323                 /* Allocate the space for the jump.  */
1324                 GET_BUFFER_SPACE (3);
1325 
1326                 /* We know we are not at the first character of the pattern,
1327                    because laststart was non-zero.  And we've already
1328                    incremented `p', by the way, to be the character after
1329                    the `*'.  Do we have to do something analogous here
1330                    for null bytes, because of RE_DOT_NOT_NULL?  */
1331                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1332 		    && zero_times_ok
1333                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1334                     && !(syntax & RE_DOT_NEWLINE))
1335                   { /* We have .*\n.  */
1336                     STORE_JUMP (jump, b, laststart);
1337                     keep_string_p = true;
1338                   }
1339                 else
1340                   /* Anything else.  */
1341                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1342 
1343                 /* We've added more stuff to the buffer.  */
1344                 b += 3;
1345               }
1346 
1347             /* On failure, jump from laststart to b + 3, which will be the
1348                end of the buffer after this jump is inserted.  */
1349             GET_BUFFER_SPACE (3);
1350             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1351                                        : on_failure_jump,
1352                          laststart, b + 3);
1353             pending_exact = 0;
1354             b += 3;
1355 
1356             if (!zero_times_ok)
1357               {
1358                 /* At least one repetition is required, so insert a
1359                    `dummy_failure_jump' before the initial
1360                    `on_failure_jump' instruction of the loop. This
1361                    effects a skip over that instruction the first time
1362                    we hit that loop.  */
1363                 GET_BUFFER_SPACE (3);
1364                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1365                 b += 3;
1366               }
1367             }
1368 	  break;
1369 
1370 
1371 	case '.':
1372           laststart = b;
1373           BUF_PUSH (anychar);
1374           break;
1375 
1376 
1377         case '[':
1378           {
1379             boolean had_char_class = false;
1380 
1381             if (p == pend)
1382               {
1383                 FREE_MEM (compile_stack.stack);
1384                 return REG_EBRACK;
1385               }
1386             /* Ensure that we have enough space to push a charset: the
1387                opcode, the length count, and the bitset; 34 bytes in all.  */
1388 	    GET_BUFFER_SPACE (34);
1389 
1390             laststart = b;
1391 
1392             /* We test `*p == '^' twice, instead of using an if
1393                statement, so we only need one BUF_PUSH.  */
1394             BUF_PUSH (*p == '^' ? charset_not : charset);
1395             if (*p == '^')
1396               p++;
1397 
1398             /* Remember the first position in the bracket expression.  */
1399             p1 = p;
1400 
1401             /* Push the number of bytes in the bitmap.  */
1402             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1403 
1404             /* Clear the whole map.  */
1405             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1406 
1407             /* charset_not matches newline according to a syntax bit.  */
1408             if ((re_opcode_t) b[-2] == charset_not
1409                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1410               SET_LIST_BIT ('\n');
1411 
1412             /* Read in characters and ranges, setting map bits.  */
1413             for (;;)
1414               {
1415                 if (p == pend)
1416                   {
1417                     FREE_MEM (compile_stack.stack);
1418                     return REG_EBRACK;
1419                   }
1420 
1421                 PATFETCH (c);
1422 
1423                 /* \ might escape characters inside [...] and [^...].  */
1424                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1425                   {
1426                     if (p == pend)
1427                       {
1428                         FREE_MEM (compile_stack.stack);
1429                         return REG_EESCAPE;
1430                       }
1431 
1432                     PATFETCH (c1);
1433                     SET_LIST_BIT (c1);
1434                     continue;
1435                   }
1436 
1437                 /* Could be the end of the bracket expression.  If it's
1438                    not (i.e., when the bracket expression is `[]' so
1439                    far), the ']' character bit gets set way below.  */
1440                 if (c == ']' && p != p1 + 1)
1441                   break;
1442 
1443                 /* Look ahead to see if it's a range when the last thing
1444                    was a character class.  */
1445                 if (had_char_class && c == '-' && *p != ']')
1446                   {
1447                     FREE_MEM (compile_stack.stack);
1448                     return REG_ERANGE;
1449                   }
1450 
1451                 /* Look ahead to see if it's a range when the last thing
1452                    was a character: if this is a hyphen not at the
1453                    beginning or the end of a list, then it's the range
1454                    operator.  */
1455                 if (c == '-'
1456                     && !(p - 2 >= pattern && p[-2] == '[')
1457                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1458                     && *p != ']')
1459                   {
1460                     reg_errcode_t ret
1461                       = compile_range (&p, pend, translate, syntax, b);
1462                     if (ret != REG_NOERROR)
1463                       {
1464                         FREE_MEM (compile_stack.stack);
1465                         return ret;
1466                       }
1467                   }
1468 
1469                 else if (p[0] == '-' && p[1] != ']')
1470                   { /* This handles ranges made up of characters only.  */
1471                     reg_errcode_t ret;
1472 
1473 		    /* Move past the `-'.  */
1474                     PATFETCH (c1);
1475 
1476                     ret = compile_range (&p, pend, translate, syntax, b);
1477                     if (ret != REG_NOERROR)
1478                       {
1479                         FREE_MEM (compile_stack.stack);
1480                         return ret;
1481                       }
1482                   }
1483 
1484                 /* See if we're at the beginning of a possible character
1485                    class.  */
1486 
1487                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1488                   { /* Leave room for the null.  */
1489                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1490 
1491                     PATFETCH (c);
1492                     c1 = 0;
1493 
1494                     /* If pattern is `[[:'.  */
1495                     if (p == pend)
1496                       {
1497                         FREE_MEM (compile_stack.stack);
1498                         return REG_EBRACK;
1499                       }
1500 
1501                     for (;;)
1502                       {
1503                         PATFETCH (c);
1504                         if (c == ':' || c == ']' || p == pend
1505                             || c1 == CHAR_CLASS_MAX_LENGTH)
1506                           break;
1507                         str[c1++] = c;
1508                       }
1509                     str[c1] = '\0';
1510 
1511                     /* If isn't a word bracketed by `[:' and:`]':
1512                        undo the ending character, the letters, and leave
1513                        the leading `:' and `[' (but set bits for them).  */
1514                     if (c == ':' && *p == ']')
1515                       {
1516                         int ch;
1517                         boolean is_alnum = STREQ (str, "alnum");
1518                         boolean is_alpha = STREQ (str, "alpha");
1519                         boolean is_blank = STREQ (str, "blank");
1520                         boolean is_cntrl = STREQ (str, "cntrl");
1521                         boolean is_digit = STREQ (str, "digit");
1522                         boolean is_graph = STREQ (str, "graph");
1523                         boolean is_lower = STREQ (str, "lower");
1524                         boolean is_print = STREQ (str, "print");
1525                         boolean is_punct = STREQ (str, "punct");
1526                         boolean is_space = STREQ (str, "space");
1527                         boolean is_upper = STREQ (str, "upper");
1528                         boolean is_xdigit = STREQ (str, "xdigit");
1529 
1530                         if (!IS_CHAR_CLASS (str))
1531                           {
1532                             FREE_MEM (compile_stack.stack);
1533                             return REG_ECTYPE;
1534                           }
1535 
1536                         /* Throw away the ] at the end of the character
1537                            class.  */
1538                         PATFETCH (c);
1539 
1540                         if (p == pend)
1541                           {
1542                             FREE_MEM (compile_stack.stack);
1543                             return REG_EBRACK;
1544                           }
1545 
1546                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1547                           {
1548                             if (   (is_alnum  && ISALNUM (ch))
1549                                 || (is_alpha  && ISALPHA (ch))
1550                                 || (is_blank  && ISBLANK (ch))
1551                                 || (is_cntrl  && ISCNTRL (ch))
1552                                 || (is_digit  && ISDIGIT (ch))
1553                                 || (is_graph  && ISGRAPH (ch))
1554                                 || (is_lower  && ISLOWER (ch))
1555                                 || (is_print  && ISPRINT (ch))
1556                                 || (is_punct  && ISPUNCT (ch))
1557                                 || (is_space  && ISSPACE (ch))
1558                                 || (is_upper  && ISUPPER (ch))
1559                                 || (is_xdigit && ISXDIGIT (ch)))
1560                             SET_LIST_BIT (ch);
1561                           }
1562                         had_char_class = true;
1563                       }
1564                     else
1565                       {
1566                         c1++;
1567                         while (c1--)
1568                           PATUNFETCH;
1569                         SET_LIST_BIT ('[');
1570                         SET_LIST_BIT (':');
1571                         had_char_class = false;
1572                       }
1573                   }
1574                 else
1575                   {
1576                     had_char_class = false;
1577                     SET_LIST_BIT (c);
1578                   }
1579               }
1580 
1581             /* Discard any (non)matching list bytes that are all 0 at the
1582                end of the map.  Decrease the map-length byte too.  */
1583             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1584               b[-1]--;
1585             b += b[-1];
1586           }
1587           break;
1588 
1589 
1590 	case '(':
1591           if (syntax & RE_NO_BK_PARENS)
1592             goto handle_open;
1593           else
1594             goto normal_char;
1595 
1596 
1597         case ')':
1598           if (syntax & RE_NO_BK_PARENS)
1599             goto handle_close;
1600           else
1601             goto normal_char;
1602 
1603 
1604         case '\n':
1605           if (syntax & RE_NEWLINE_ALT)
1606             goto handle_alt;
1607           else
1608             goto normal_char;
1609 
1610 
1611 	case '|':
1612           if (syntax & RE_NO_BK_VBAR)
1613             goto handle_alt;
1614           else
1615             goto normal_char;
1616 
1617 
1618         case '{':
1619            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1620              goto handle_interval;
1621            else
1622              goto normal_char;
1623 
1624 
1625         case '\\':
1626           if (p == pend)
1627             {
1628               FREE_MEM (compile_stack.stack);
1629               return REG_EESCAPE;
1630             }
1631 
1632           /* Do not translate the character after the \, so that we can
1633              distinguish, e.g., \B from \b, even if we normally would
1634              translate, e.g., B to b.  */
1635           PATFETCH_RAW (c);
1636 
1637           switch (c)
1638             {
1639             case '(':
1640               if (syntax & RE_NO_BK_PARENS)
1641                 goto normal_backslash;
1642 
1643             handle_open:
1644               bufp->re_nsub++;
1645               regnum++;
1646 
1647               if (COMPILE_STACK_FULL)
1648                 {
1649                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1650                             compile_stack_elt_t);
1651                   if (compile_stack.stack == NULL) return REG_ESPACE;
1652 
1653                   compile_stack.size <<= 1;
1654                 }
1655 
1656               /* These are the values to restore when we hit end of this
1657                  group.  They are all relative offsets, so that if the
1658                  whole pattern moves because of realloc, they will still
1659                  be valid.  */
1660               COMPILE_STACK_TOP.begalt_offset = (pattern_offset_t)(begalt - bufp->buffer);
1661               COMPILE_STACK_TOP.fixup_alt_jump
1662                 = (pattern_offset_t)(fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0);
1663               COMPILE_STACK_TOP.laststart_offset = (pattern_offset_t)(b - bufp->buffer);
1664               COMPILE_STACK_TOP.regnum = regnum;
1665 
1666               /* We will eventually replace the 0 with the number of
1667                  groups inner to this one.  But do not push a
1668                  start_memory for groups beyond the last one we can
1669                  represent in the compiled pattern.  */
1670               if (regnum <= MAX_REGNUM)
1671                 {
1672                   COMPILE_STACK_TOP.inner_group_offset = (pattern_offset_t)(b - bufp->buffer + 2);
1673                   BUF_PUSH_3 (start_memory, regnum, 0);
1674                 }
1675 
1676               compile_stack.avail++;
1677 
1678               fixup_alt_jump = 0;
1679               laststart = 0;
1680               begalt = b;
1681 	      /* If we've reached MAX_REGNUM groups, then this open
1682 		 won't actually generate any code, so we'll have to
1683 		 clear pending_exact explicitly.  */
1684 	      pending_exact = 0;
1685               break;
1686 
1687 
1688             case ')':
1689               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1690 
1691               if (COMPILE_STACK_EMPTY)
1692                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1693                   goto normal_backslash;
1694                 else
1695                   {
1696                     FREE_MEM (compile_stack.stack);
1697                     return REG_ERPAREN;
1698                   }
1699 
1700             handle_close:
1701               if (fixup_alt_jump)
1702                 { /* Push a dummy failure point at the end of the
1703                      alternative for a possible future
1704                      `pop_failure_jump' to pop.  See comments at
1705                      `push_dummy_failure' in `re_match_2'.  */
1706                   BUF_PUSH (push_dummy_failure);
1707 
1708                   /* We allocated space for this jump when we assigned
1709                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1710                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1711                 }
1712 
1713               /* See similar code for backslashed left paren above.  */
1714               if (COMPILE_STACK_EMPTY)
1715                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1716                   goto normal_char;
1717                 else
1718                   {
1719                     FREE_MEM (compile_stack.stack);
1720                     return REG_ERPAREN;
1721                   }
1722 
1723               /* Since we just checked for an empty stack above, this
1724                  ``can't happen''.  */
1725               assert (compile_stack.avail != 0);
1726               {
1727                 /* We don't just want to restore into `regnum', because
1728                    later groups should continue to be numbered higher,
1729                    as in `(ab)c(de)' -- the second group is #2.  */
1730                 regnum_t this_group_regnum;
1731 
1732                 compile_stack.avail--;
1733                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1734                 fixup_alt_jump
1735                   = COMPILE_STACK_TOP.fixup_alt_jump
1736                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1737                     : 0;
1738                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1739                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1740 		/* If we've reached MAX_REGNUM groups, then this open
1741 		   won't actually generate any code, so we'll have to
1742 		   clear pending_exact explicitly.  */
1743 		pending_exact = 0;
1744 
1745                 /* We're at the end of the group, so now we know how many
1746                    groups were inside this one.  */
1747                 if (this_group_regnum <= MAX_REGNUM)
1748                   {
1749                     unsigned char *inner_group_loc
1750                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1751 
1752                     *inner_group_loc = regnum - this_group_regnum;
1753                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1754                                 regnum - this_group_regnum);
1755                   }
1756               }
1757               break;
1758 
1759 
1760             case '|':					/* `\|'.  */
1761               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1762                 goto normal_backslash;
1763             handle_alt:
1764               if (syntax & RE_LIMITED_OPS)
1765                 goto normal_char;
1766 
1767               /* Insert before the previous alternative a jump which
1768                  jumps to this alternative if the former fails.  */
1769               GET_BUFFER_SPACE (3);
1770               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1771               pending_exact = 0;
1772               b += 3;
1773 
1774               /* The alternative before this one has a jump after it
1775                  which gets executed if it gets matched.  Adjust that
1776                  jump so it will jump to this alternative's analogous
1777                  jump (put in below, which in turn will jump to the next
1778                  (if any) alternative's such jump, etc.).  The last such
1779                  jump jumps to the correct final destination.  A picture:
1780                           _____ _____
1781                           |   | |   |
1782                           |   v |   v
1783                          a | b   | c
1784 
1785                  If we are at `b', then fixup_alt_jump right now points to a
1786                  three-byte space after `a'.  We'll put in the jump, set
1787                  fixup_alt_jump to right after `b', and leave behind three
1788                  bytes which we'll fill in when we get to after `c'.  */
1789 
1790               if (fixup_alt_jump)
1791                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1792 
1793               /* Mark and leave space for a jump after this alternative,
1794                  to be filled in later either by next alternative or
1795                  when know we're at the end of a series of alternatives.  */
1796               fixup_alt_jump = b;
1797               GET_BUFFER_SPACE (3);
1798               b += 3;
1799 
1800               laststart = 0;
1801               begalt = b;
1802               break;
1803 
1804 
1805             case '{':
1806               /* If \{ is a literal.  */
1807               if (!(syntax & RE_INTERVALS)
1808                      /* If we're at `\{' and it's not the open-interval
1809                         operator.  */
1810                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1811                   || (p - 2 == pattern  &&  p == pend))
1812                 goto normal_backslash;
1813 
1814             handle_interval:
1815               {
1816                 /* If got here, then the syntax allows intervals.  */
1817 
1818                 /* At least (most) this many matches must be made.  */
1819                 int lower_bound = -1, upper_bound = -1;
1820 
1821                 beg_interval = p - 1;
1822 
1823                 if (p == pend)
1824                   {
1825                     if (syntax & RE_NO_BK_BRACES)
1826                       goto unfetch_interval;
1827                     else
1828                       {
1829                         FREE_MEM (compile_stack.stack);
1830                         return REG_EBRACE;
1831                       }
1832                   }
1833 
1834                 GET_UNSIGNED_NUMBER (lower_bound);
1835 
1836                 if (c == ',')
1837                   {
1838                     GET_UNSIGNED_NUMBER (upper_bound);
1839                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1840                   }
1841                 else
1842                   /* Interval such as `{1}' => match exactly once. */
1843                   upper_bound = lower_bound;
1844 
1845                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1846                     || lower_bound > upper_bound)
1847                   {
1848                     if (syntax & RE_NO_BK_BRACES)
1849                       goto unfetch_interval;
1850                     else
1851                       {
1852                         FREE_MEM (compile_stack.stack);
1853                         return REG_BADBR;
1854                       }
1855                   }
1856 
1857                 if (!(syntax & RE_NO_BK_BRACES))
1858                   {
1859                     if (c != '\\')
1860                       {
1861                         FREE_MEM (compile_stack.stack);
1862 			return REG_EBRACE;
1863                       }
1864                     PATFETCH (c);
1865                   }
1866 
1867                 if (c != '}')
1868                   {
1869                     if (syntax & RE_NO_BK_BRACES)
1870                       goto unfetch_interval;
1871                     else
1872                       {
1873                         FREE_MEM (compile_stack.stack);
1874                         return REG_BADBR;
1875                       }
1876                   }
1877 
1878                 /* We just parsed a valid interval.  */
1879 
1880                 /* If it's invalid to have no preceding re.  */
1881                 if (!laststart)
1882                   {
1883                     if (syntax & RE_CONTEXT_INVALID_OPS)
1884                       {
1885                         FREE_MEM (compile_stack.stack);
1886                         return REG_BADRPT;
1887                       }
1888                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1889                       laststart = b;
1890                     else
1891                       goto unfetch_interval;
1892                   }
1893 
1894                 /* If the upper bound is zero, don't want to succeed at
1895                    all; jump from `laststart' to `b + 3', which will be
1896                    the end of the buffer after we insert the jump.  */
1897                  if (upper_bound == 0)
1898                    {
1899                      GET_BUFFER_SPACE (3);
1900                      INSERT_JUMP (jump, laststart, b + 3);
1901                      b += 3;
1902                    }
1903 
1904                  /* Otherwise, we have a nontrivial interval.  When
1905                     we're all done, the pattern will look like:
1906                       set_number_at <jump count> <upper bound>
1907                       set_number_at <succeed_n count> <lower bound>
1908                       succeed_n <after jump addr> <succed_n count>
1909                       <body of loop>
1910                       jump_n <succeed_n addr> <jump count>
1911                     (The upper bound and `jump_n' are omitted if
1912                     `upper_bound' is 1, though.)  */
1913                  else
1914                    { /* If the upper bound is > 1, we need to insert
1915                         more at the end of the loop.  */
1916                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1917 
1918                      GET_BUFFER_SPACE (nbytes);
1919 
1920                      /* Initialize lower bound of the `succeed_n', even
1921                         though it will be set during matching by its
1922                         attendant `set_number_at' (inserted next),
1923                         because `re_compile_fastmap' needs to know.
1924                         Jump to the `jump_n' we might insert below.  */
1925                      INSERT_JUMP2 (succeed_n, laststart,
1926                                    b + 5 + (upper_bound > 1) * 5,
1927                                    lower_bound);
1928                      b += 5;
1929 
1930                      /* Code to initialize the lower bound.  Insert
1931                         before the `succeed_n'.  The `5' is the last two
1932                         bytes of this `set_number_at', plus 3 bytes of
1933                         the following `succeed_n'.  */
1934                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1935                      b += 5;
1936 
1937                      if (upper_bound > 1)
1938                        { /* More than one repetition is allowed, so
1939                             append a backward jump to the `succeed_n'
1940                             that starts this interval.
1941 
1942                             When we've reached this during matching,
1943                             we'll have matched the interval once, so
1944                             jump back only `upper_bound - 1' times.  */
1945                          STORE_JUMP2 (jump_n, b, laststart + 5,
1946                                       upper_bound - 1);
1947                          b += 5;
1948 
1949                          /* The location we want to set is the second
1950                             parameter of the `jump_n'; that is `b-2' as
1951                             an absolute address.  `laststart' will be
1952                             the `set_number_at' we're about to insert;
1953                             `laststart+3' the number to set, the source
1954                             for the relative address.  But we are
1955                             inserting into the middle of the pattern --
1956                             so everything is getting moved up by 5.
1957                             Conclusion: (b - 2) - (laststart + 3) + 5,
1958                             i.e., b - laststart.
1959 
1960                             We insert this at the beginning of the loop
1961                             so that if we fail during matching, we'll
1962                             reinitialize the bounds.  */
1963                          insert_op2 (set_number_at, laststart, b - laststart,
1964                                      upper_bound - 1, b);
1965                          b += 5;
1966                        }
1967                    }
1968                 pending_exact = 0;
1969                 beg_interval = NULL;
1970               }
1971               break;
1972 
1973             unfetch_interval:
1974               /* If an invalid interval, match the characters as literals.  */
1975                assert (beg_interval);
1976                p = beg_interval;
1977                beg_interval = NULL;
1978 
1979                /* normal_char and normal_backslash need `c'.  */
1980                PATFETCH (c);
1981 
1982                if (!(syntax & RE_NO_BK_BRACES))
1983                  {
1984                    if (p > pattern  &&  p[-1] == '\\')
1985                      goto normal_backslash;
1986                  }
1987                goto normal_char;
1988 
1989 #ifdef emacs
1990             /* There is no way to specify the before_dot and after_dot
1991                operators.  rms says this is ok.  --karl  */
1992             case '=':
1993               BUF_PUSH (at_dot);
1994               break;
1995 
1996             case 's':
1997               laststart = b;
1998               PATFETCH (c);
1999               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2000               break;
2001 
2002             case 'S':
2003               laststart = b;
2004               PATFETCH (c);
2005               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2006               break;
2007 #endif /* emacs */
2008 
2009 
2010             case 'w':
2011               laststart = b;
2012               BUF_PUSH (wordchar);
2013               break;
2014 
2015 
2016             case 'W':
2017               laststart = b;
2018               BUF_PUSH (notwordchar);
2019               break;
2020 
2021 
2022             case '<':
2023               BUF_PUSH (wordbeg);
2024               break;
2025 
2026             case '>':
2027               BUF_PUSH (wordend);
2028               break;
2029 
2030             case 'b':
2031               BUF_PUSH (wordbound);
2032               break;
2033 
2034             case 'B':
2035               BUF_PUSH (notwordbound);
2036               break;
2037 
2038             case '`':
2039               BUF_PUSH (begbuf);
2040               break;
2041 
2042             case '\'':
2043               BUF_PUSH (endbuf);
2044               break;
2045 
2046             case '1': case '2': case '3': case '4': case '5':
2047             case '6': case '7': case '8': case '9':
2048               if (syntax & RE_NO_BK_REFS)
2049                 goto normal_char;
2050 
2051               c1 = c - '0';
2052 
2053               if (c1 > regnum)
2054                 {
2055                   FREE_MEM (compile_stack.stack);
2056                   return REG_ESUBREG;
2057                 }
2058 
2059               /* Can't back reference to a subexpression if inside of it.  */
2060               if (group_in_compile_stack (compile_stack, c1))
2061                 goto normal_char;
2062 
2063               laststart = b;
2064               BUF_PUSH_2 (duplicate, c1);
2065               break;
2066 
2067 
2068             case '+':
2069             case '?':
2070               if (syntax & RE_BK_PLUS_QM)
2071                 goto handle_plus;
2072               else
2073                 goto normal_backslash;
2074 
2075             default:
2076             normal_backslash:
2077               /* You might think it would be useful for \ to mean
2078                  not to translate; but if we don't translate it
2079                  it will never match anything.  */
2080               c = TRANSLATE (c);
2081               goto normal_char;
2082             }
2083           break;
2084 
2085 
2086 	default:
2087         /* Expects the character in `c'.  */
2088 	normal_char:
2089 	      /* If no exactn currently being built.  */
2090           if (!pending_exact
2091 
2092               /* If last exactn not at current position.  */
2093               || pending_exact + *pending_exact + 1 != b
2094 
2095               /* We have only one byte following the exactn for the count.  */
2096 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2097 
2098               /* If followed by a repetition operator.  */
2099               || *p == '*' || *p == '^'
2100 	      || ((syntax & RE_BK_PLUS_QM)
2101 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2102 		  : (*p == '+' || *p == '?'))
2103 	      || ((syntax & RE_INTERVALS)
2104                   && ((syntax & RE_NO_BK_BRACES)
2105 		      ? *p == '{'
2106                       : (p[0] == '\\' && p[1] == '{'))))
2107 	    {
2108 	      /* Start building a new exactn.  */
2109 
2110               laststart = b;
2111 
2112 	      BUF_PUSH_2 (exactn, 0);
2113 	      pending_exact = b - 1;
2114             }
2115 
2116 	  BUF_PUSH (c);
2117           (*pending_exact)++;
2118 	  break;
2119         } /* switch (c) */
2120     } /* while p != pend */
2121 
2122 
2123   /* Through the pattern now.  */
2124 
2125   if (fixup_alt_jump)
2126     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2127 
2128   if (!COMPILE_STACK_EMPTY)
2129     {
2130       FREE_MEM (compile_stack.stack);
2131       return REG_EPAREN;
2132     }
2133 
2134   free (compile_stack.stack);
2135 
2136   /* We have succeeded; set the length of the buffer.  */
2137   bufp->used = (unsigned long)(b - bufp->buffer);
2138 
2139 #ifdef DEBUG
2140   if (debug)
2141     {
2142       DEBUG_PRINT1 ("\nCompiled pattern: ");
2143       print_compiled_pattern (bufp);
2144     }
2145 #endif /* DEBUG */
2146 
2147   return REG_NOERROR;
2148 } /* regex_compile */
2149 
2150 /* Subroutines for `regex_compile'.  */
2151 
2152 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2153 
2154 static void
store_op1(op,loc,arg)2155 store_op1 (op, loc, arg)
2156     re_opcode_t op;
2157     unsigned char *loc;
2158     int arg;
2159 {
2160   *loc = (unsigned char) op;
2161   STORE_NUMBER (loc + 1, arg);
2162 }
2163 
2164 
2165 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2166 
2167 static void
store_op2(op,loc,arg1,arg2)2168 store_op2 (op, loc, arg1, arg2)
2169     re_opcode_t op;
2170     unsigned char *loc;
2171     int arg1, arg2;
2172 {
2173   *loc = (unsigned char) op;
2174   STORE_NUMBER (loc + 1, arg1);
2175   STORE_NUMBER (loc + 3, arg2);
2176 }
2177 
2178 
2179 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2180    for OP followed by two-byte integer parameter ARG.  */
2181 
2182 static void
insert_op1(op,loc,arg,end)2183 insert_op1 (op, loc, arg, end)
2184     re_opcode_t op;
2185     unsigned char *loc;
2186     int arg;
2187     unsigned char *end;
2188 {
2189   register unsigned char *pfrom = end;
2190   register unsigned char *pto = end + 3;
2191 
2192   while (pfrom != loc)
2193     *--pto = *--pfrom;
2194 
2195   store_op1 (op, loc, arg);
2196 }
2197 
2198 
2199 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2200 
2201 static void
insert_op2(op,loc,arg1,arg2,end)2202 insert_op2 (op, loc, arg1, arg2, end)
2203     re_opcode_t op;
2204     unsigned char *loc;
2205     int arg1, arg2;
2206     unsigned char *end;
2207 {
2208   register unsigned char *pfrom = end;
2209   register unsigned char *pto = end + 5;
2210 
2211   while (pfrom != loc)
2212     *--pto = *--pfrom;
2213 
2214   store_op2 (op, loc, arg1, arg2);
2215 }
2216 
2217 
2218 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2219    after an alternative or a begin-subexpression.  We assume there is at
2220    least one character before the ^.  */
2221 
2222 static boolean
at_begline_loc_p(pattern,p,syntax)2223 at_begline_loc_p (pattern, p, syntax)
2224     const char *pattern, *p;
2225     reg_syntax_t syntax;
2226 {
2227   const char *prev = p - 2;
2228   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2229 
2230   return
2231        /* After a subexpression?  */
2232        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2233        /* After an alternative?  */
2234     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2235 }
2236 
2237 
2238 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2239    at least one character after the $, i.e., `P < PEND'.  */
2240 
2241 static boolean
at_endline_loc_p(p,pend,syntax)2242 at_endline_loc_p (p, pend, syntax)
2243     const char *p, *pend;
2244     int syntax;
2245 {
2246   const char *next = p;
2247   boolean next_backslash = *next == '\\';
2248   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2249 
2250   return
2251        /* Before a subexpression?  */
2252        (syntax & RE_NO_BK_PARENS ? *next == ')'
2253         : next_backslash && next_next && *next_next == ')')
2254        /* Before an alternative?  */
2255     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2256         : next_backslash && next_next && *next_next == '|');
2257 }
2258 
2259 
2260 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2261    false if it's not.  */
2262 
2263 static boolean
group_in_compile_stack(compile_stack,regnum)2264 group_in_compile_stack (compile_stack, regnum)
2265     compile_stack_type compile_stack;
2266     regnum_t regnum;
2267 {
2268   int this_element;
2269 
2270   for (this_element = compile_stack.avail - 1;
2271        this_element >= 0;
2272        this_element--)
2273     if (compile_stack.stack[this_element].regnum == regnum)
2274       return true;
2275 
2276   return false;
2277 }
2278 
2279 
2280 /* Read the ending character of a range (in a bracket expression) from the
2281    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2282    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2283    Then we set the translation of all bits between the starting and
2284    ending characters (inclusive) in the compiled pattern B.
2285 
2286    Return an error code.
2287 
2288    We use these short variable names so we can use the same macros as
2289    `regex_compile' itself.  */
2290 
2291 static reg_errcode_t
compile_range(p_ptr,pend,translate,syntax,b)2292 compile_range (p_ptr, pend, translate, syntax, b)
2293     const char **p_ptr, *pend;
2294     char *translate;
2295     reg_syntax_t syntax;
2296     unsigned char *b;
2297 {
2298   unsigned this_char;
2299 
2300   const char *p = *p_ptr;
2301   int range_start, range_end;
2302 
2303   if (p == pend)
2304     return REG_ERANGE;
2305 
2306   /* Even though the pattern is a signed `char *', we need to fetch
2307      with unsigned char *'s; if the high bit of the pattern character
2308      is set, the range endpoints will be negative if we fetch using a
2309      signed char *.
2310 
2311      We also want to fetch the endpoints without translating them; the
2312      appropriate translation is done in the bit-setting loop below.  */
2313   range_start = ((unsigned char *) p)[-2];
2314   range_end   = ((unsigned char *) p)[0];
2315 
2316   /* Have to increment the pointer into the pattern string, so the
2317      caller isn't still at the ending character.  */
2318   (*p_ptr)++;
2319 
2320   /* If the start is after the end, the range is empty.  */
2321   if (range_start > range_end)
2322     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2323 
2324   /* Here we see why `this_char' has to be larger than an `unsigned
2325      char' -- the range is inclusive, so if `range_end' == 0xff
2326      (assuming 8-bit characters), we would otherwise go into an infinite
2327      loop, since all characters <= 0xff.  */
2328   for (this_char = range_start; this_char <= (unsigned)range_end; this_char++)
2329     {
2330       SET_LIST_BIT (TRANSLATE (this_char));
2331     }
2332 
2333   return REG_NOERROR;
2334 }
2335 
2336 /* Failure stack declarations and macros; both re_compile_fastmap and
2337    re_match_2 use a failure stack.  These have to be macros because of
2338    REGEX_ALLOCATE.  */
2339 
2340 
2341 /* Number of failure points for which to initially allocate space
2342    when matching.  If this number is exceeded, we allocate more
2343    space, so it is not a hard limit.  */
2344 #ifndef INIT_FAILURE_ALLOC
2345 #define INIT_FAILURE_ALLOC 5
2346 #endif
2347 
2348 /* Roughly the maximum number of failure points on the stack.  Would be
2349    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2350    This is a variable only so users of regex can assign to it; we never
2351    change it ourselves.  */
2352 int re_max_failures = 2000;
2353 
2354 typedef const unsigned char *fail_stack_elt_t;
2355 
2356 typedef struct
2357 {
2358   fail_stack_elt_t *stack;
2359   unsigned size;
2360   unsigned avail;			/* Offset of next open position.  */
2361 } fail_stack_type;
2362 
2363 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2364 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2365 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2366 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2367 
2368 
2369 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2370 
2371 #define INIT_FAIL_STACK()						\
2372   do {									\
2373     fail_stack.stack = (fail_stack_elt_t *)				\
2374       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2375 									\
2376     if (fail_stack.stack == NULL)					\
2377       return -2;							\
2378 									\
2379     fail_stack.size = INIT_FAILURE_ALLOC;				\
2380     fail_stack.avail = 0;						\
2381   } while (0)
2382 
2383 
2384 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2385 
2386    Return 1 if succeeds, and 0 if either ran out of memory
2387    allocating space for it or it was already too large.
2388 
2389    REGEX_REALLOCATE requires `destination' be declared.   */
2390 
2391 #define DOUBLE_FAIL_STACK(fail_stack)					\
2392   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
2393    ? 0									\
2394    : ((fail_stack).stack = (fail_stack_elt_t *)				\
2395         REGEX_REALLOCATE ((fail_stack).stack, 				\
2396           (fail_stack).size * sizeof (fail_stack_elt_t),		\
2397           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2398 									\
2399       (fail_stack).stack == NULL					\
2400       ? 0								\
2401       : ((fail_stack).size <<= 1, 					\
2402          1)))
2403 
2404 
2405 /* Push PATTERN_OP on FAIL_STACK.
2406 
2407    Return 1 if was able to do so and 0 if ran out of memory allocating
2408    space to do so.  */
2409 #define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
2410   ((FAIL_STACK_FULL ()							\
2411     && !DOUBLE_FAIL_STACK (fail_stack))					\
2412     ? 0									\
2413     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
2414        1))
2415 
2416 /* This pushes an item onto the failure stack.  Must be a four-byte
2417    value.  Assumes the variable `fail_stack'.  Probably should only
2418    be called from within `PUSH_FAILURE_POINT'.  */
2419 #define PUSH_FAILURE_ITEM(item)						\
2420   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2421 
2422 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2423 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2424 
2425 /* Used to omit pushing failure point id's when we're not debugging.  */
2426 #ifdef DEBUG
2427 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2428 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2429 #else
2430 #define DEBUG_PUSH(item)
2431 #define DEBUG_POP(item_addr)
2432 #endif
2433 
2434 
2435 /* Push the information about the state we will need
2436    if we ever fail back to it.
2437 
2438    Requires variables fail_stack, regstart, regend, reg_info, and
2439    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2440    declared.
2441 
2442    Does `return FAILURE_CODE' if runs out of memory.  */
2443 
2444 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2445   do {									\
2446     char *destination;							\
2447     /* Must be int, so when we don't save any registers, the arithmetic	\
2448        of 0 + -1 isn't done as unsigned.  */				\
2449     int this_reg;							\
2450     									\
2451     DEBUG_STATEMENT (failure_id++);					\
2452     DEBUG_STATEMENT (nfailure_points_pushed++);				\
2453     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2454     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2455     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2456 									\
2457     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2458     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2459 									\
2460     /* Ensure we have enough space allocated for what we will push.  */	\
2461     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2462       {									\
2463         if (!DOUBLE_FAIL_STACK (fail_stack))			\
2464           return failure_code;						\
2465 									\
2466         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2467 		       (fail_stack).size);				\
2468         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2469       }									\
2470 									\
2471     /* Push the info, starting with the registers.  */			\
2472     DEBUG_PRINT1 ("\n");						\
2473 									\
2474     for (this_reg = lowest_active_reg; (unsigned)this_reg <= highest_active_reg;	\
2475          this_reg++)							\
2476       {									\
2477 	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
2478         DEBUG_STATEMENT (num_regs_pushed++);				\
2479 									\
2480 	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
2481         PUSH_FAILURE_ITEM (regstart[this_reg]);				\
2482                                                                         \
2483 	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2484         PUSH_FAILURE_ITEM (regend[this_reg]);				\
2485 									\
2486 	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2487         DEBUG_PRINT2 (" match_null=%d",					\
2488                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2489         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2490         DEBUG_PRINT2 (" matched_something=%d",				\
2491                       MATCHED_SOMETHING (reg_info[this_reg]));		\
2492         DEBUG_PRINT2 (" ever_matched=%d",				\
2493                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2494 	DEBUG_PRINT1 ("\n");						\
2495         PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
2496       }									\
2497 									\
2498     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2499     PUSH_FAILURE_ITEM (lowest_active_reg);				\
2500 									\
2501     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2502     PUSH_FAILURE_ITEM (highest_active_reg);				\
2503 									\
2504     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2505     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2506     PUSH_FAILURE_ITEM (pattern_place);					\
2507 									\
2508     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2509     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2510 				 size2);				\
2511     DEBUG_PRINT1 ("'\n");						\
2512     PUSH_FAILURE_ITEM (string_place);					\
2513 									\
2514     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2515     DEBUG_PUSH (failure_id);						\
2516   } while (0)
2517 
2518 /* This is the number of items that are pushed and popped on the stack
2519    for each register.  */
2520 #define NUM_REG_ITEMS  3
2521 
2522 /* Individual items aside from the registers.  */
2523 #ifdef DEBUG
2524 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2525 #else
2526 #define NUM_NONREG_ITEMS 4
2527 #endif
2528 
2529 /* We push at most this many items on the stack.  */
2530 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2531 
2532 /* We actually push this many items.  */
2533 #define NUM_FAILURE_ITEMS						\
2534   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2535     + NUM_NONREG_ITEMS)
2536 
2537 /* How many items can still be added to the stack without overflowing it.  */
2538 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2539 
2540 
2541 /* Pops what PUSH_FAIL_STACK pushes.
2542 
2543    We restore into the parameters, all of which should be lvalues:
2544      STR -- the saved data position.
2545      PAT -- the saved pattern position.
2546      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2547      REGSTART, REGEND -- arrays of string positions.
2548      REG_INFO -- array of information about each subexpression.
2549 
2550    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2551    `pend', `string1', `size1', `string2', and `size2'.  */
2552 
2553 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2554 {									\
2555   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2556   int this_reg;								\
2557   const unsigned char *string_temp;					\
2558 									\
2559   assert (!FAIL_STACK_EMPTY ());					\
2560 									\
2561   /* Remove failure points and point to how many regs pushed.  */	\
2562   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2563   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2564   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2565 									\
2566   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2567 									\
2568   DEBUG_POP (&failure_id);						\
2569   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2570 									\
2571   /* If the saved string location is NULL, it came from an		\
2572      on_failure_keep_string_jump opcode, and we want to throw away the	\
2573      saved NULL, thus retaining our current position in the string.  */	\
2574   string_temp = POP_FAILURE_ITEM ();					\
2575   if (string_temp != NULL)						\
2576     str = (const char *) string_temp;					\
2577 									\
2578   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2579   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2580   DEBUG_PRINT1 ("'\n");							\
2581 									\
2582   pat = (unsigned char *) POP_FAILURE_ITEM ();				\
2583   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2584   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2585 									\
2586   /* Restore register info.  */						\
2587   high_reg = (unsigned) POP_FAILURE_ITEM ();				\
2588   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2589 									\
2590   low_reg = (unsigned) POP_FAILURE_ITEM ();				\
2591   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2592 									\
2593   for (this_reg = high_reg; (unsigned)this_reg >= low_reg; this_reg--)		\
2594     {									\
2595       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
2596 									\
2597       reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
2598       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
2599 									\
2600       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2601       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2602 									\
2603       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2604       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
2605     }									\
2606 									\
2607   DEBUG_STATEMENT (nfailure_points_popped++);				\
2608 } /* POP_FAILURE_POINT */
2609 
2610 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2611    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2612    characters can start a string that matches the pattern.  This fastmap
2613    is used by re_search to skip quickly over impossible starting points.
2614 
2615    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2616    area as BUFP->fastmap.
2617 
2618    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2619    the pattern buffer.
2620 
2621    Returns 0 if we succeed, -2 if an internal error.   */
2622 
2623 int
re_compile_fastmap(bufp)2624 re_compile_fastmap (bufp)
2625      struct re_pattern_buffer *bufp;
2626 {
2627   int j, k;
2628   fail_stack_type fail_stack;
2629 #ifndef REGEX_MALLOC
2630   char *destination;
2631 #endif
2632   /* We don't push any register information onto the failure stack.  */
2633   unsigned num_regs = 0;
2634 
2635   register char *fastmap = bufp->fastmap;
2636   unsigned char *pattern = bufp->buffer;
2637   unsigned long size = bufp->used;
2638   const unsigned char *p = pattern;
2639   register unsigned char *pend = pattern + size;
2640 
2641   /* Assume that each path through the pattern can be null until
2642      proven otherwise.  We set this false at the bottom of switch
2643      statement, to which we get only if a particular path doesn't
2644      match the empty string.  */
2645   boolean path_can_be_null = true;
2646 
2647   /* We aren't doing a `succeed_n' to begin with.  */
2648   boolean succeed_n_p = false;
2649 
2650   assert (fastmap != NULL && p != NULL);
2651 
2652   INIT_FAIL_STACK ();
2653   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2654   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2655   bufp->can_be_null = 0;
2656 
2657   while (p != pend || !FAIL_STACK_EMPTY ())
2658     {
2659       if (p == pend)
2660         {
2661           bufp->can_be_null |= path_can_be_null;
2662 
2663           /* Reset for next path.  */
2664           path_can_be_null = true;
2665 
2666           p = fail_stack.stack[--fail_stack.avail];
2667 	}
2668 
2669       /* We should never be about to go beyond the end of the pattern.  */
2670       assert (p < pend);
2671 
2672 #ifdef SWITCH_ENUM_BUG
2673       switch ((int) ((re_opcode_t) *p++))
2674 #else
2675       switch ((re_opcode_t) *p++)
2676 #endif
2677 	{
2678 
2679         /* I guess the idea here is to simply not bother with a fastmap
2680            if a backreference is used, since it's too hard to figure out
2681            the fastmap for the corresponding group.  Setting
2682            `can_be_null' stops `re_search_2' from using the fastmap, so
2683            that is all we do.  */
2684 	case duplicate:
2685 	  bufp->can_be_null = 1;
2686 	  return 0;
2687 
2688 
2689       /* Following are the cases which match a character.  These end
2690          with `break'.  */
2691 
2692 	case exactn:
2693           fastmap[p[1]] = 1;
2694 	  break;
2695 
2696 
2697         case charset:
2698           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2699 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2700               fastmap[j] = 1;
2701 	  break;
2702 
2703 
2704 	case charset_not:
2705 	  /* Chars beyond end of map must be allowed.  */
2706 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2707             fastmap[j] = 1;
2708 
2709 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2710 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2711               fastmap[j] = 1;
2712           break;
2713 
2714 
2715 	case wordchar:
2716 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2717 	    if (SYNTAX (j) == Sword)
2718 	      fastmap[j] = 1;
2719 	  break;
2720 
2721 
2722 	case notwordchar:
2723 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2724 	    if (SYNTAX (j) != Sword)
2725 	      fastmap[j] = 1;
2726 	  break;
2727 
2728 
2729         case anychar:
2730           /* `.' matches anything ...  */
2731 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2732             fastmap[j] = 1;
2733 
2734           /* ... except perhaps newline.  */
2735           if (!(bufp->syntax & RE_DOT_NEWLINE))
2736             fastmap['\n'] = 0;
2737 
2738           /* Return if we have already set `can_be_null'; if we have,
2739              then the fastmap is irrelevant.  Something's wrong here.  */
2740 	  else if (bufp->can_be_null)
2741 	    return 0;
2742 
2743           /* Otherwise, have to check alternative paths.  */
2744 	  break;
2745 
2746 
2747 #ifdef emacs
2748         case syntaxspec:
2749 	  k = *p++;
2750 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2751 	    if (SYNTAX (j) == (enum syntaxcode) k)
2752 	      fastmap[j] = 1;
2753 	  break;
2754 
2755 
2756 	case notsyntaxspec:
2757 	  k = *p++;
2758 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2759 	    if (SYNTAX (j) != (enum syntaxcode) k)
2760 	      fastmap[j] = 1;
2761 	  break;
2762 
2763 
2764       /* All cases after this match the empty string.  These end with
2765          `continue'.  */
2766 
2767 
2768 	case before_dot:
2769 	case at_dot:
2770 	case after_dot:
2771           continue;
2772 #endif /* not emacs */
2773 
2774 
2775         case no_op:
2776         case begline:
2777         case endline:
2778 	case begbuf:
2779 	case endbuf:
2780 	case wordbound:
2781 	case notwordbound:
2782 	case wordbeg:
2783 	case wordend:
2784         case push_dummy_failure:
2785           continue;
2786 
2787 
2788 	case jump_n:
2789         case pop_failure_jump:
2790 	case maybe_pop_jump:
2791 	case jump:
2792         case jump_past_alt:
2793 	case dummy_failure_jump:
2794           EXTRACT_NUMBER_AND_INCR (j, p);
2795 	  p += j;
2796 	  if (j > 0)
2797 	    continue;
2798 
2799           /* Jump backward implies we just went through the body of a
2800              loop and matched nothing.  Opcode jumped to should be
2801              `on_failure_jump' or `succeed_n'.  Just treat it like an
2802              ordinary jump.  For a * loop, it has pushed its failure
2803              point already; if so, discard that as redundant.  */
2804           if ((re_opcode_t) *p != on_failure_jump
2805 	      && (re_opcode_t) *p != succeed_n)
2806 	    continue;
2807 
2808           p++;
2809           EXTRACT_NUMBER_AND_INCR (j, p);
2810           p += j;
2811 
2812           /* If what's on the stack is where we are now, pop it.  */
2813           if (!FAIL_STACK_EMPTY ()
2814 	      && fail_stack.stack[fail_stack.avail - 1] == p)
2815             fail_stack.avail--;
2816 
2817           continue;
2818 
2819 
2820         case on_failure_jump:
2821         case on_failure_keep_string_jump:
2822 	handle_on_failure_jump:
2823           EXTRACT_NUMBER_AND_INCR (j, p);
2824 
2825           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2826              end of the pattern.  We don't want to push such a point,
2827              since when we restore it above, entering the switch will
2828              increment `p' past the end of the pattern.  We don't need
2829              to push such a point since we obviously won't find any more
2830              fastmap entries beyond `pend'.  Such a pattern can match
2831              the null string, though.  */
2832           if (p + j < pend)
2833             {
2834               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2835                 return -2;
2836             }
2837           else
2838             bufp->can_be_null = 1;
2839 
2840           if (succeed_n_p)
2841             {
2842               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2843               succeed_n_p = false;
2844 	    }
2845 
2846           continue;
2847 
2848 
2849 	case succeed_n:
2850           /* Get to the number of times to succeed.  */
2851           p += 2;
2852 
2853           /* Increment p past the n for when k != 0.  */
2854           EXTRACT_NUMBER_AND_INCR (k, p);
2855           if (k == 0)
2856 	    {
2857               p -= 4;
2858   	      succeed_n_p = true;  /* Spaghetti code alert.  */
2859               goto handle_on_failure_jump;
2860             }
2861           continue;
2862 
2863 
2864 	case set_number_at:
2865           p += 4;
2866           continue;
2867 
2868 
2869 	case start_memory:
2870         case stop_memory:
2871 	  p += 2;
2872 	  continue;
2873 
2874 
2875 	default:
2876           abort (); /* We have listed all the cases.  */
2877         } /* switch *p++ */
2878 
2879       /* Getting here means we have found the possible starting
2880          characters for one path of the pattern -- and that the empty
2881          string does not match.  We need not follow this path further.
2882          Instead, look at the next alternative (remembered on the
2883          stack), or quit if no more.  The test at the top of the loop
2884          does these things.  */
2885       path_can_be_null = false;
2886       p = pend;
2887     } /* while p */
2888 
2889   /* Set `can_be_null' for the last path (also the first path, if the
2890      pattern is empty).  */
2891   bufp->can_be_null |= path_can_be_null;
2892   return 0;
2893 } /* re_compile_fastmap */
2894 
2895 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2896    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2897    this memory for recording register information.  STARTS and ENDS
2898    must be allocated using the malloc library routine, and must each
2899    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2900 
2901    If NUM_REGS == 0, then subsequent matches should allocate their own
2902    register data.
2903 
2904    Unless this function is called, the first search or match using
2905    PATTERN_BUFFER will allocate its own register data, without
2906    freeing the old data.  */
2907 
2908 void
re_set_registers(bufp,regs,num_regs,starts,ends)2909 re_set_registers (bufp, regs, num_regs, starts, ends)
2910     struct re_pattern_buffer *bufp;
2911     struct re_registers *regs;
2912     unsigned num_regs;
2913     regoff_t *starts, *ends;
2914 {
2915   if (num_regs)
2916     {
2917       bufp->regs_allocated = REGS_REALLOCATE;
2918       regs->num_regs = num_regs;
2919       regs->start = starts;
2920       regs->end = ends;
2921     }
2922   else
2923     {
2924       bufp->regs_allocated = REGS_UNALLOCATED;
2925       regs->num_regs = 0;
2926       regs->start = regs->end = (regoff_t*) 0;
2927     }
2928 }
2929 
2930 /* Searching routines.  */
2931 
2932 /* Like re_search_2, below, but only one string is specified, and
2933    doesn't let you say where to stop matching. */
2934 
2935 int
re_search(bufp,string,size,startpos,range,regs)2936 re_search (bufp, string, size, startpos, range, regs)
2937      struct re_pattern_buffer *bufp;
2938      const char *string;
2939      int size, startpos, range;
2940      struct re_registers *regs;
2941 {
2942   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2943 		      regs, size);
2944 }
2945 
2946 
2947 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2948    virtual concatenation of STRING1 and STRING2, starting first at index
2949    STARTPOS, then at STARTPOS + 1, and so on.
2950 
2951    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2952 
2953    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2954    only at STARTPOS; in general, the last start tried is STARTPOS +
2955    RANGE.
2956 
2957    In REGS, return the indices of the virtual concatenation of STRING1
2958    and STRING2 that matched the entire BUFP->buffer and its contained
2959    subexpressions.
2960 
2961    Do not consider matching one past the index STOP in the virtual
2962    concatenation of STRING1 and STRING2.
2963 
2964    We return either the position in the strings at which the match was
2965    found, -1 if no match, or -2 if error (such as failure
2966    stack overflow).  */
2967 
2968 int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)2969 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2970      struct re_pattern_buffer *bufp;
2971      const char *string1, *string2;
2972      int size1, size2;
2973      int startpos;
2974      int range;
2975      struct re_registers *regs;
2976      int stop;
2977 {
2978   int val;
2979   register char *fastmap = bufp->fastmap;
2980   register char *translate = bufp->translate;
2981   int total_size = size1 + size2;
2982   int endpos = startpos + range;
2983 
2984   /* Check for out-of-range STARTPOS.  */
2985   if (startpos < 0 || startpos > total_size)
2986     return -1;
2987 
2988   /* Fix up RANGE if it might eventually take us outside
2989      the virtual concatenation of STRING1 and STRING2.  */
2990   if (endpos < -1)
2991     range = -1 - startpos;
2992   else if (endpos > total_size)
2993     range = total_size - startpos;
2994 
2995   /* If the search isn't to be a backwards one, don't waste time in a
2996      search for a pattern that must be anchored.  */
2997   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2998     {
2999       if (startpos > 0)
3000 	return -1;
3001       else
3002 	range = 1;
3003     }
3004 
3005   /* Update the fastmap now if not correct already.  */
3006   if (fastmap && !bufp->fastmap_accurate)
3007     if (re_compile_fastmap (bufp) == -2)
3008       return -2;
3009 
3010   /* Loop through the string, looking for a place to start matching.  */
3011   for (;;)
3012     {
3013       /* If a fastmap is supplied, skip quickly over characters that
3014          cannot be the start of a match.  If the pattern can match the
3015          null string, however, we don't need to skip characters; we want
3016          the first null string.  */
3017       if (fastmap && startpos < total_size && !bufp->can_be_null)
3018 	{
3019 	  if (range > 0)	/* Searching forwards.  */
3020 	    {
3021 	      register const char *d;
3022 	      register int lim = 0;
3023 	      int irange = range;
3024 
3025               if (startpos < size1 && startpos + range >= size1)
3026                 lim = range - (size1 - startpos);
3027 
3028 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3029 
3030               /* Written out as an if-else to avoid testing `translate'
3031                  inside the loop.  */
3032 	      if (translate)
3033                 while (range > lim
3034                        && !fastmap[(unsigned char)
3035 				   translate[(unsigned char) *d++]])
3036                   range--;
3037 	      else
3038                 while (range > lim && !fastmap[(unsigned char) *d++])
3039                   range--;
3040 
3041 	      startpos += irange - range;
3042 	    }
3043 	  else				/* Searching backwards.  */
3044 	    {
3045 	      register char c = (size1 == 0 || startpos >= size1
3046                                  ? string2[startpos - size1]
3047                                  : string1[startpos]);
3048 
3049 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3050 		goto advance;
3051 	    }
3052 	}
3053 
3054       /* If can't match the null string, and that's all we have left, fail.  */
3055       if (range >= 0 && startpos == total_size && fastmap
3056           && !bufp->can_be_null)
3057 	return -1;
3058 
3059       val = re_match_2 (bufp, string1, size1, string2, size2,
3060 	                startpos, regs, stop);
3061       if (val >= 0)
3062 	return startpos;
3063 
3064       if (val == -2)
3065 	return -2;
3066 
3067     advance:
3068       if (!range)
3069         break;
3070       else if (range > 0)
3071         {
3072           range--;
3073           startpos++;
3074         }
3075       else
3076         {
3077           range++;
3078           startpos--;
3079         }
3080     }
3081   return -1;
3082 } /* re_search_2 */
3083 
3084 /* Declarations and macros for re_match_2.  */
3085 
3086 static int bcmp_translate ();
3087 static boolean alt_match_null_string_p (),
3088                common_op_match_null_string_p (),
3089                group_match_null_string_p ();
3090 
3091 /* Structure for per-register (a.k.a. per-group) information.
3092    This must not be longer than one word, because we push this value
3093    onto the failure stack.  Other register information, such as the
3094    starting and ending positions (which are addresses), and the list of
3095    inner groups (which is a bits list) are maintained in separate
3096    variables.
3097 
3098    We are making a (strictly speaking) nonportable assumption here: that
3099    the compiler will pack our bit fields into something that fits into
3100    the type of `word', i.e., is something that fits into one item on the
3101    failure stack.  */
3102 typedef union
3103 {
3104   fail_stack_elt_t word;
3105   struct
3106   {
3107       /* This field is one if this group can match the empty string,
3108          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3109 #define MATCH_NULL_UNSET_VALUE 3
3110     unsigned match_null_string_p : 2;
3111     unsigned is_active : 1;
3112     unsigned matched_something : 1;
3113     unsigned ever_matched_something : 1;
3114   } bits;
3115 } register_info_type;
3116 
3117 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3118 #define IS_ACTIVE(R)  ((R).bits.is_active)
3119 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3120 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3121 
3122 
3123 /* Call this when have matched a real character; it sets `matched' flags
3124    for the subexpressions which we are currently inside.  Also records
3125    that those subexprs have matched.  */
3126 #define SET_REGS_MATCHED()						\
3127   do									\
3128     {									\
3129       unsigned r;							\
3130       for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3131         {								\
3132           MATCHED_SOMETHING (reg_info[r])				\
3133             = EVER_MATCHED_SOMETHING (reg_info[r])			\
3134             = 1;							\
3135         }								\
3136     }									\
3137   while (0)
3138 
3139 
3140 /* This converts PTR, a pointer into one of the search strings `string1'
3141    and `string2' into an offset from the beginning of that string.  */
3142 #define POINTER_TO_OFFSET(ptr)						\
3143   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3144 
3145 /* Registers are set to a sentinel when they haven't yet matched.  */
3146 #define REG_UNSET_VALUE ((char *) -1)
3147 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3148 
3149 
3150 /* Macros for dealing with the split strings in re_match_2.  */
3151 
3152 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3153 
3154 /* Call before fetching a character with *d.  This switches over to
3155    string2 if necessary.  */
3156 #define PREFETCH()							\
3157   while (d == dend)						    	\
3158     {									\
3159       /* End of string2 => fail.  */					\
3160       if (dend == end_match_2) 						\
3161         goto fail;							\
3162       /* End of string1 => advance to string2.  */ 			\
3163       d = string2;						        \
3164       dend = end_match_2;						\
3165     }
3166 
3167 
3168 /* Test if at very beginning or at very end of the virtual concatenation
3169    of `string1' and `string2'.  If only one string, it's `string2'.  */
3170 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3171 #define AT_STRINGS_END(d) ((d) == end2)
3172 
3173 
3174 /* Test if D points to a character which is word-constituent.  We have
3175    two special cases to check for: if past the end of string1, look at
3176    the first character in string2; and if before the beginning of
3177    string2, look at the last character in string1.  */
3178 #define WORDCHAR_P(d)							\
3179   (SYNTAX ((d) == end1 ? *string2					\
3180            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3181    == Sword)
3182 
3183 /* Test if the character before D and the one at D differ with respect
3184    to being word-constituent.  */
3185 #define AT_WORD_BOUNDARY(d)						\
3186   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3187    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3188 
3189 
3190 /* Free everything we malloc.  */
3191 #ifdef REGEX_MALLOC
3192 #define FREE_VAR(var) if (var) free (var); var = NULL
3193 #define FREE_VARIABLES()						\
3194   do {									\
3195     FREE_VAR (fail_stack.stack);					\
3196     FREE_VAR (regstart);						\
3197     FREE_VAR (regend);							\
3198     FREE_VAR (old_regstart);						\
3199     FREE_VAR (old_regend);						\
3200     FREE_VAR (best_regstart);						\
3201     FREE_VAR (best_regend);						\
3202     FREE_VAR (reg_info);						\
3203     FREE_VAR (reg_dummy);						\
3204     FREE_VAR (reg_info_dummy);						\
3205   } while (0)
3206 #else /* not REGEX_MALLOC */
3207 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3208 #define FREE_VARIABLES() alloca (0)
3209 #endif /* not REGEX_MALLOC */
3210 
3211 
3212 /* These values must meet several constraints.  They must not be valid
3213    register values; since we have a limit of 255 registers (because
3214    we use only one byte in the pattern for the register number), we can
3215    use numbers larger than 255.  They must differ by 1, because of
3216    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3217    be larger than the value for the highest register, so we do not try
3218    to actually save any registers when none are active.  */
3219 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3220 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3221 
3222 /* Matching routines.  */
3223 
3224 #ifndef emacs   /* Emacs never uses this.  */
3225 /* re_match is like re_match_2 except it takes only a single string.  */
3226 
3227 int
re_match(bufp,string,size,pos,regs)3228 re_match (bufp, string, size, pos, regs)
3229      struct re_pattern_buffer *bufp;
3230      const char *string;
3231      int size, pos;
3232      struct re_registers *regs;
3233  {
3234   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3235 }
3236 #endif /* not emacs */
3237 
3238 
3239 /* re_match_2 matches the compiled pattern in BUFP against the
3240    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3241    and SIZE2, respectively).  We start matching at POS, and stop
3242    matching at STOP.
3243 
3244    If REGS is non-null and the `no_sub' field of BUFP is non-zero, we
3245    store offsets for the substring each group matched in REGS.  See the
3246    documentation for exactly how many groups we fill.
3247 
3248    We return -1 if no match, -2 if an internal error (such as the
3249    failure stack overflowing).  Otherwise, we return the length of the
3250    matched substring.  */
3251 
3252 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)3253 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3254      struct re_pattern_buffer *bufp;
3255      const char *string1, *string2;
3256      int size1, size2;
3257      int pos;
3258      struct re_registers *regs;
3259      int stop;
3260 {
3261   /* General temporaries.  */
3262   int mcnt;
3263   unsigned char *p1;
3264 
3265   /* Just past the end of the corresponding string.  */
3266   const char *end1, *end2;
3267 
3268   /* Pointers into string1 and string2, just past the last characters in
3269      each to consider matching.  */
3270   const char *end_match_1, *end_match_2;
3271 
3272   /* Where we are in the data, and the end of the current string.  */
3273   const char *d, *dend;
3274 
3275   /* Where we are in the pattern, and the end of the pattern.  */
3276   unsigned char *p = bufp->buffer;
3277   register unsigned char *pend = p + bufp->used;
3278 
3279   /* We use this to map every character in the string.  */
3280   char *translate = bufp->translate;
3281 
3282   /* Failure point stack.  Each place that can handle a failure further
3283      down the line pushes a failure point on this stack.  It consists of
3284      restart, regend, and reg_info for all registers corresponding to
3285      the subexpressions we're currently inside, plus the number of such
3286      registers, and, finally, two char *'s.  The first char * is where
3287      to resume scanning the pattern; the second one is where to resume
3288      scanning the strings.  If the latter is zero, the failure point is
3289      a ``dummy''; if a failure happens and the failure point is a dummy,
3290      it gets discarded and the next next one is tried.  */
3291   fail_stack_type fail_stack;
3292 #ifdef DEBUG
3293   static unsigned failure_id = 0;
3294   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3295 #endif
3296 
3297   /* We fill all the registers internally, independent of what we
3298      return, for use in backreferences.  The number here includes
3299      an element for register zero.  */
3300   unsigned num_regs = (unsigned)bufp->re_nsub + 1;
3301 
3302   /* The currently active registers.  */
3303   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3304   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3305 
3306   /* Information on the contents of registers. These are pointers into
3307      the input strings; they record just what was matched (on this
3308      attempt) by a subexpression part of the pattern, that is, the
3309      regnum-th regstart pointer points to where in the pattern we began
3310      matching and the regnum-th regend points to right after where we
3311      stopped matching the regnum-th subexpression.  (The zeroth register
3312      keeps track of what the whole pattern matches.)  */
3313   const char **regstart, **regend;
3314 
3315   /* If a group that's operated upon by a repetition operator fails to
3316      match anything, then the register for its start will need to be
3317      restored because it will have been set to wherever in the string we
3318      are when we last see its open-group operator.  Similarly for a
3319      register's end.  */
3320   const char **old_regstart, **old_regend;
3321 
3322   /* The is_active field of reg_info helps us keep track of which (possibly
3323      nested) subexpressions we are currently in. The matched_something
3324      field of reg_info[reg_num] helps us tell whether or not we have
3325      matched any of the pattern so far this time through the reg_num-th
3326      subexpression.  These two fields get reset each time through any
3327      loop their register is in.  */
3328   register_info_type *reg_info;
3329 
3330   /* The following record the register info as found in the above
3331      variables when we find a match better than any we've seen before.
3332      This happens as we backtrack through the failure points, which in
3333      turn happens only if we have not yet matched the entire string. */
3334   unsigned best_regs_set = false;
3335   const char **best_regstart, **best_regend;
3336 
3337   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3338      allocate space for that if we're not allocating space for anything
3339      else (see below).  Also, we never need info about register 0 for
3340      any of the other register vectors, and it seems rather a kludge to
3341      treat `best_regend' differently than the rest.  So we keep track of
3342      the end of the best match so far in a separate variable.  We
3343      initialize this to NULL so that when we backtrack the first time
3344      and need to test it, it's not garbage.  */
3345   const char *match_end = NULL;
3346 
3347   /* Used when we pop values we don't care about.  */
3348   const char **reg_dummy;
3349   register_info_type *reg_info_dummy;
3350 
3351 #ifdef DEBUG
3352   /* Counts the total number of registers pushed.  */
3353   unsigned num_regs_pushed = 0;
3354 #endif
3355 
3356   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3357 
3358   INIT_FAIL_STACK ();
3359 
3360   /* Do not bother to initialize all the register variables if there are
3361      no groups in the pattern, as it takes a fair amount of time.  If
3362      there are groups, we include space for register 0 (the whole
3363      pattern), even though we never use it, since it simplifies the
3364      array indexing.  We should fix this.  */
3365   if (bufp->re_nsub)
3366     {
3367       regstart = REGEX_TALLOC (num_regs, const char *);
3368       regend = REGEX_TALLOC (num_regs, const char *);
3369       old_regstart = REGEX_TALLOC (num_regs, const char *);
3370       old_regend = REGEX_TALLOC (num_regs, const char *);
3371       best_regstart = REGEX_TALLOC (num_regs, const char *);
3372       best_regend = REGEX_TALLOC (num_regs, const char *);
3373       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3374       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3375       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3376 
3377       if (!(regstart && regend && old_regstart && old_regend && reg_info
3378             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3379         {
3380           FREE_VARIABLES ();
3381           return -2;
3382         }
3383     }
3384 #ifdef REGEX_MALLOC
3385   else
3386     {
3387       /* We must initialize all our variables to NULL, so that
3388          `FREE_VARIABLES' doesn't try to free them.  */
3389       regstart = regend = old_regstart = old_regend = best_regstart
3390         = best_regend = reg_dummy = NULL;
3391       reg_info = reg_info_dummy = (register_info_type *) NULL;
3392     }
3393 #endif /* REGEX_MALLOC */
3394 
3395   /* The starting position is bogus.  */
3396   if (pos < 0 || pos > size1 + size2)
3397     {
3398       FREE_VARIABLES ();
3399       return -1;
3400     }
3401 
3402   /* Initialize subexpression text positions to -1 to mark ones that no
3403      start_memory/stop_memory has been seen for. Also initialize the
3404      register information struct.  */
3405   for (mcnt = 1; (unsigned)mcnt < num_regs; mcnt++)
3406     {
3407       regstart[mcnt] = regend[mcnt]
3408         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3409 
3410       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3411       IS_ACTIVE (reg_info[mcnt]) = 0;
3412       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3413       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3414     }
3415 
3416   /* We move `string1' into `string2' if the latter's empty -- but not if
3417      `string1' is null.  */
3418   if (size2 == 0 && string1 != NULL)
3419     {
3420       string2 = string1;
3421       size2 = size1;
3422       string1 = 0;
3423       size1 = 0;
3424     }
3425   end1 = string1 + size1;
3426   end2 = string2 + size2;
3427 
3428   /* Compute where to stop matching, within the two strings.  */
3429   if (stop <= size1)
3430     {
3431       end_match_1 = string1 + stop;
3432       end_match_2 = string2;
3433     }
3434   else
3435     {
3436       end_match_1 = end1;
3437       end_match_2 = string2 + stop - size1;
3438     }
3439 
3440   /* `p' scans through the pattern as `d' scans through the data.
3441      `dend' is the end of the input string that `d' points within.  `d'
3442      is advanced into the following input string whenever necessary, but
3443      this happens before fetching; therefore, at the beginning of the
3444      loop, `d' can be pointing at the end of a string, but it cannot
3445      equal `string2'.  */
3446   if (size1 > 0 && pos <= size1)
3447     {
3448       d = string1 + pos;
3449       dend = end_match_1;
3450     }
3451   else
3452     {
3453       d = string2 + pos - size1;
3454       dend = end_match_2;
3455     }
3456 
3457   DEBUG_PRINT1 ("The compiled pattern is: ");
3458   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3459   DEBUG_PRINT1 ("The string to match is: `");
3460   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3461   DEBUG_PRINT1 ("'\n");
3462 
3463   /* This loops over pattern commands.  It exits by returning from the
3464      function if the match is complete, or it drops through if the match
3465      fails at this starting point in the input data.  */
3466   for (;;)
3467     {
3468       DEBUG_PRINT2 ("\n0x%x: ", p);
3469 
3470       if (p == pend)
3471 	{ /* End of pattern means we might have succeeded.  */
3472           DEBUG_PRINT1 ("end of pattern ... ");
3473 
3474 	  /* If we haven't matched the entire string, and we want the
3475              longest match, try backtracking.  */
3476           if (d != end_match_2)
3477 	    {
3478               DEBUG_PRINT1 ("backtracking.\n");
3479 
3480               if (!FAIL_STACK_EMPTY ())
3481                 { /* More failure points to try.  */
3482                   boolean same_str_p = (FIRST_STRING_P (match_end)
3483 			                == MATCHING_IN_FIRST_STRING);
3484 
3485                   /* If exceeds best match so far, save it.  */
3486                   if (!best_regs_set
3487                       || (same_str_p && d > match_end)
3488                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3489                     {
3490                       best_regs_set = true;
3491                       match_end = d;
3492 
3493                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3494 
3495                       for (mcnt = 1; (unsigned)mcnt < num_regs; mcnt++)
3496                         {
3497                           best_regstart[mcnt] = regstart[mcnt];
3498                           best_regend[mcnt] = regend[mcnt];
3499                         }
3500                     }
3501                   goto fail;
3502                 }
3503 
3504               /* If no failure points, don't restore garbage.  */
3505               else if (best_regs_set)
3506                 {
3507 	        restore_best_regs:
3508                   /* Restore best match.  It may happen that `dend ==
3509                      end_match_1' while the restored d is in string2.
3510                      For example, the pattern `x.*y.*z' against the
3511                      strings `x-' and `y-z-', if the two strings are
3512                      not consecutive in memory.  */
3513                   DEBUG_PRINT1 ("Restoring best registers.\n");
3514 
3515                   d = match_end;
3516                   dend = ((d >= string1 && d <= end1)
3517 		           ? end_match_1 : end_match_2);
3518 
3519 		  for (mcnt = 1; (unsigned)mcnt < num_regs; mcnt++)
3520 		    {
3521 		      regstart[mcnt] = best_regstart[mcnt];
3522 		      regend[mcnt] = best_regend[mcnt];
3523 		    }
3524                 }
3525             } /* d != end_match_2 */
3526 
3527           DEBUG_PRINT1 ("Accepting match.\n");
3528 
3529           /* If caller wants register contents data back, do it.  */
3530           if (regs && !bufp->no_sub)
3531 	    {
3532               /* Have the register data arrays been allocated?  */
3533               if (bufp->regs_allocated == REGS_UNALLOCATED)
3534                 { /* No.  So allocate them with malloc.  We need one
3535                      extra element beyond `num_regs' for the `-1' marker
3536                      GNU code uses.  */
3537                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3538                   regs->start = TALLOC (regs->num_regs, regoff_t);
3539                   regs->end = TALLOC (regs->num_regs, regoff_t);
3540                   if (regs->start == NULL || regs->end == NULL)
3541                     return -2;
3542                   bufp->regs_allocated = REGS_REALLOCATE;
3543                 }
3544               else if (bufp->regs_allocated == REGS_REALLOCATE)
3545                 { /* Yes.  If we need more elements than were already
3546                      allocated, reallocate them.  If we need fewer, just
3547                      leave it alone.  */
3548                   if (regs->num_regs < num_regs + 1)
3549                     {
3550                       regs->num_regs = num_regs + 1;
3551                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3552                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3553                       if (regs->start == NULL || regs->end == NULL)
3554                         return -2;
3555                     }
3556                 }
3557               else
3558                 assert (bufp->regs_allocated == REGS_FIXED);
3559 
3560               /* Convert the pointer data in `regstart' and `regend' to
3561                  indices.  Register zero has to be set differently,
3562                  since we haven't kept track of any info for it.  */
3563               if (regs->num_regs > 0)
3564                 {
3565                   regs->start[0] = pos;
3566                   regs->end[0] = (regoff_t)(MATCHING_IN_FIRST_STRING ? d - string1
3567 			          : d - string2 + size1);
3568                 }
3569 
3570               /* Go through the first `min (num_regs, regs->num_regs)'
3571                  registers, since that is all we initialized.  */
3572 	      for (mcnt = 1; (unsigned)mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3573 		{
3574                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3575                     regs->start[mcnt] = regs->end[mcnt] = -1;
3576                   else
3577                     {
3578 		      regs->start[mcnt] = (regoff_t)POINTER_TO_OFFSET (regstart[mcnt]);
3579                       regs->end[mcnt] = (regoff_t)POINTER_TO_OFFSET (regend[mcnt]);
3580                     }
3581 		}
3582 
3583               /* If the regs structure we return has more elements than
3584                  were in the pattern, set the extra elements to -1.  If
3585                  we (re)allocated the registers, this is the case,
3586                  because we always allocate enough to have at least one
3587                  -1 at the end.  */
3588               for (mcnt = num_regs; (unsigned)mcnt < regs->num_regs; mcnt++)
3589                 regs->start[mcnt] = regs->end[mcnt] = -1;
3590 	    } /* regs && !bufp->no_sub */
3591 
3592           FREE_VARIABLES ();
3593           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3594                         nfailure_points_pushed, nfailure_points_popped,
3595                         nfailure_points_pushed - nfailure_points_popped);
3596           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3597 
3598           mcnt = (int)(d - pos - (MATCHING_IN_FIRST_STRING
3599 			    ? string1
3600 			    : string2 - size1));
3601 
3602           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3603 
3604           return mcnt;
3605         }
3606 
3607       /* Otherwise match next pattern command.  */
3608 #ifdef SWITCH_ENUM_BUG
3609       switch ((int) ((re_opcode_t) *p++))
3610 #else
3611       switch ((re_opcode_t) *p++)
3612 #endif
3613 	{
3614         /* Ignore these.  Used to ignore the n of succeed_n's which
3615            currently have n == 0.  */
3616         case no_op:
3617           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3618           break;
3619 
3620 
3621         /* Match the next n pattern characters exactly.  The following
3622            byte in the pattern defines n, and the n bytes after that
3623            are the characters to match.  */
3624 	case exactn:
3625 	  mcnt = *p++;
3626           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3627 
3628           /* This is written out as an if-else so we don't waste time
3629              testing `translate' inside the loop.  */
3630           if (translate)
3631 	    {
3632 	      do
3633 		{
3634 		  PREFETCH ();
3635 		  if (translate[(unsigned char) *d++] != (char) *p++)
3636                     goto fail;
3637 		}
3638 	      while (--mcnt);
3639 	    }
3640 	  else
3641 	    {
3642 	      do
3643 		{
3644 		  PREFETCH ();
3645 		  if (*d++ != (char) *p++) goto fail;
3646 		}
3647 	      while (--mcnt);
3648 	    }
3649 	  SET_REGS_MATCHED ();
3650           break;
3651 
3652 
3653         /* Match any character except possibly a newline or a null.  */
3654 	case anychar:
3655           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3656 
3657           PREFETCH ();
3658 
3659           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3660               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3661 	    goto fail;
3662 
3663           SET_REGS_MATCHED ();
3664           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3665           d++;
3666 	  break;
3667 
3668 
3669 	case charset:
3670 	case charset_not:
3671 	  {
3672 	    register unsigned char c;
3673 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3674 
3675             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3676 
3677 	    PREFETCH ();
3678 	    c = TRANSLATE (*d); /* The character to match.  */
3679 
3680             /* Cast to `unsigned' instead of `unsigned char' in case the
3681                bit list is a full 32 bytes long.  */
3682 	    if (c < (unsigned) (*p * BYTEWIDTH)
3683 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3684 	      not = !not;
3685 
3686 	    p += 1 + *p;
3687 
3688 	    if (!not) goto fail;
3689 
3690 	    SET_REGS_MATCHED ();
3691             d++;
3692 	    break;
3693 	  }
3694 
3695 
3696         /* The beginning of a group is represented by start_memory.
3697            The arguments are the register number in the next byte, and the
3698            number of groups inner to this one in the next.  The text
3699            matched within the group is recorded (in the internal
3700            registers data structure) under the register number.  */
3701         case start_memory:
3702 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3703 
3704           /* Find out if this group can match the empty string.  */
3705 	  p1 = p;		/* To send to group_match_null_string_p.  */
3706 
3707           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3708             REG_MATCH_NULL_STRING_P (reg_info[*p])
3709               = group_match_null_string_p (&p1, pend, reg_info);
3710 
3711           /* Save the position in the string where we were the last time
3712              we were at this open-group operator in case the group is
3713              operated upon by a repetition operator, e.g., with `(a*)*b'
3714              against `ab'; then we want to ignore where we are now in
3715              the string in case this attempt to match fails.  */
3716           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3717                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3718                              : regstart[*p];
3719 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3720 			 POINTER_TO_OFFSET (old_regstart[*p]));
3721 
3722           regstart[*p] = d;
3723 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3724 
3725           IS_ACTIVE (reg_info[*p]) = 1;
3726           MATCHED_SOMETHING (reg_info[*p]) = 0;
3727 
3728           /* This is the new highest active register.  */
3729           highest_active_reg = *p;
3730 
3731           /* If nothing was active before, this is the new lowest active
3732              register.  */
3733           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3734             lowest_active_reg = *p;
3735 
3736           /* Move past the register number and inner group count.  */
3737           p += 2;
3738           break;
3739 
3740 
3741         /* The stop_memory opcode represents the end of a group.  Its
3742            arguments are the same as start_memory's: the register
3743            number, and the number of inner groups.  */
3744 	case stop_memory:
3745 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3746 
3747           /* We need to save the string position the last time we were at
3748              this close-group operator in case the group is operated
3749              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3750              against `aba'; then we want to ignore where we are now in
3751              the string in case this attempt to match fails.  */
3752           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3753                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3754 			   : regend[*p];
3755 	  DEBUG_PRINT2 ("      old_regend: %d\n",
3756 			 POINTER_TO_OFFSET (old_regend[*p]));
3757 
3758           regend[*p] = d;
3759 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3760 
3761           /* This register isn't active anymore.  */
3762           IS_ACTIVE (reg_info[*p]) = 0;
3763 
3764           /* If this was the only register active, nothing is active
3765              anymore.  */
3766           if (lowest_active_reg == highest_active_reg)
3767             {
3768               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3769               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3770             }
3771           else
3772             { /* We must scan for the new highest active register, since
3773                  it isn't necessarily one less than now: consider
3774                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3775                  new highest active register is 1.  */
3776               unsigned char r = *p - 1;
3777               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3778                 r--;
3779 
3780               /* If we end up at register zero, that means that we saved
3781                  the registers as the result of an `on_failure_jump', not
3782                  a `start_memory', and we jumped to past the innermost
3783                  `stop_memory'.  For example, in ((.)*) we save
3784                  registers 1 and 2 as a result of the *, but when we pop
3785                  back to the second ), we are at the stop_memory 1.
3786                  Thus, nothing is active.  */
3787 	      if (r == 0)
3788                 {
3789                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3790                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3791                 }
3792               else
3793                 highest_active_reg = r;
3794             }
3795 
3796           /* If just failed to match something this time around with a
3797              group that's operated on by a repetition operator, try to
3798              force exit from the ``loop'', and restore the register
3799              information for this group that we had before trying this
3800              last match.  */
3801           if ((!MATCHED_SOMETHING (reg_info[*p])
3802                || (re_opcode_t) p[-3] == start_memory)
3803 	      && (p + 2) < pend)
3804             {
3805               boolean is_a_jump_n = false;
3806 
3807               p1 = p + 2;
3808               mcnt = 0;
3809               switch ((re_opcode_t) *p1++)
3810                 {
3811                   case jump_n:
3812 		    is_a_jump_n = true;
3813                   case pop_failure_jump:
3814 		  case maybe_pop_jump:
3815 		  case jump:
3816 		  case dummy_failure_jump:
3817                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3818 		    if (is_a_jump_n)
3819 		      p1 += 2;
3820                     break;
3821 
3822                   default:
3823                     /* do nothing */ ;
3824                 }
3825 	      p1 += mcnt;
3826 
3827               /* If the next operation is a jump backwards in the pattern
3828 	         to an on_failure_jump right before the start_memory
3829                  corresponding to this stop_memory, exit from the loop
3830                  by forcing a failure after pushing on the stack the
3831                  on_failure_jump's jump in the pattern, and d.  */
3832               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3833                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3834 		{
3835                   /* If this group ever matched anything, then restore
3836                      what its registers were before trying this last
3837                      failed match, e.g., with `(a*)*b' against `ab' for
3838                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3839                      against `aba' for regend[3].
3840 
3841                      Also restore the registers for inner groups for,
3842                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3843                      otherwise get trashed).  */
3844 
3845                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3846 		    {
3847 		      unsigned r;
3848 
3849                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3850 
3851 		      /* Restore this and inner groups' (if any) registers.  */
3852                       for (r = *p; r < (unsigned)(*p + *(p + 1)); r++)
3853                         {
3854                           regstart[r] = old_regstart[r];
3855 
3856                           /* xx why this test?  */
3857                           if (old_regend[r] >= regstart[r])
3858                             regend[r] = old_regend[r];
3859                         }
3860                     }
3861 		  p1++;
3862                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3863                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3864 
3865                   goto fail;
3866                 }
3867             }
3868 
3869           /* Move past the register number and the inner group count.  */
3870           p += 2;
3871           break;
3872 
3873 
3874 	/* \<digit> has been turned into a `duplicate' command which is
3875            followed by the numeric value of <digit> as the register number.  */
3876         case duplicate:
3877 	  {
3878 	    register const char *d2, *dend2;
3879 	    int regno = *p++;   /* Get which register to match against.  */
3880 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3881 
3882 	    /* Can't back reference a group which we've never matched.  */
3883             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3884               goto fail;
3885 
3886             /* Where in input to try to start matching.  */
3887             d2 = regstart[regno];
3888 
3889             /* Where to stop matching; if both the place to start and
3890                the place to stop matching are in the same string, then
3891                set to the place to stop, otherwise, for now have to use
3892                the end of the first string.  */
3893 
3894             dend2 = ((FIRST_STRING_P (regstart[regno])
3895 		      == FIRST_STRING_P (regend[regno]))
3896 		     ? regend[regno] : end_match_1);
3897 	    for (;;)
3898 	      {
3899 		/* If necessary, advance to next segment in register
3900                    contents.  */
3901 		while (d2 == dend2)
3902 		  {
3903 		    if (dend2 == end_match_2) break;
3904 		    if (dend2 == regend[regno]) break;
3905 
3906                     /* End of string1 => advance to string2. */
3907                     d2 = string2;
3908                     dend2 = regend[regno];
3909 		  }
3910 		/* At end of register contents => success */
3911 		if (d2 == dend2) break;
3912 
3913 		/* If necessary, advance to next segment in data.  */
3914 		PREFETCH ();
3915 
3916 		/* How many characters left in this segment to match.  */
3917 		mcnt = (int)(dend - d);
3918 
3919 		/* Want how many consecutive characters we can match in
3920                    one shot, so, if necessary, adjust the count.  */
3921                 if (mcnt > dend2 - d2)
3922 		  mcnt = (int)(dend2 - d2);
3923 
3924 		/* Compare that many; failure if mismatch, else move
3925                    past them.  */
3926 		if (translate
3927                     ? bcmp_translate (d, d2, mcnt, translate)
3928                     : bcmp (d, d2, mcnt))
3929 		  goto fail;
3930 		d += mcnt, d2 += mcnt;
3931 	      }
3932 	  }
3933 	  break;
3934 
3935 
3936         /* begline matches the empty string at the beginning of the string
3937            (unless `not_bol' is set in `bufp'), and, if
3938            `newline_anchor' is set, after newlines.  */
3939 	case begline:
3940           DEBUG_PRINT1 ("EXECUTING begline.\n");
3941 
3942           if (AT_STRINGS_BEG (d))
3943             {
3944               if (!bufp->not_bol) break;
3945             }
3946           else if (d[-1] == '\n' && bufp->newline_anchor)
3947             {
3948               break;
3949             }
3950           /* In all other cases, we fail.  */
3951           goto fail;
3952 
3953 
3954         /* endline is the dual of begline.  */
3955 	case endline:
3956           DEBUG_PRINT1 ("EXECUTING endline.\n");
3957 
3958           if (AT_STRINGS_END (d))
3959             {
3960               if (!bufp->not_eol) break;
3961             }
3962 
3963           /* We have to ``prefetch'' the next character.  */
3964           else if ((d == end1 ? *string2 : *d) == '\n'
3965                    && bufp->newline_anchor)
3966             {
3967               break;
3968             }
3969           goto fail;
3970 
3971 
3972 	/* Match at the very beginning of the data.  */
3973         case begbuf:
3974           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3975           if (AT_STRINGS_BEG (d))
3976             break;
3977           goto fail;
3978 
3979 
3980 	/* Match at the very end of the data.  */
3981         case endbuf:
3982           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3983 	  if (AT_STRINGS_END (d))
3984 	    break;
3985           goto fail;
3986 
3987 
3988         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3989            pushes NULL as the value for the string on the stack.  Then
3990            `pop_failure_point' will keep the current value for the
3991            string, instead of restoring it.  To see why, consider
3992            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3993            then the . fails against the \n.  But the next thing we want
3994            to do is match the \n against the \n; if we restored the
3995            string value, we would be back at the foo.
3996 
3997            Because this is used only in specific cases, we don't need to
3998            check all the things that `on_failure_jump' does, to make
3999            sure the right things get saved on the stack.  Hence we don't
4000            share its code.  The only reason to push anything on the
4001            stack at all is that otherwise we would have to change
4002            `anychar's code to do something besides goto fail in this
4003            case; that seems worse than this.  */
4004         case on_failure_keep_string_jump:
4005           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4006 
4007           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4008           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4009 
4010           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4011           break;
4012 
4013 
4014 	/* Uses of on_failure_jump:
4015 
4016            Each alternative starts with an on_failure_jump that points
4017            to the beginning of the next alternative.  Each alternative
4018            except the last ends with a jump that in effect jumps past
4019            the rest of the alternatives.  (They really jump to the
4020            ending jump of the following alternative, because tensioning
4021            these jumps is a hassle.)
4022 
4023            Repeats start with an on_failure_jump that points past both
4024            the repetition text and either the following jump or
4025            pop_failure_jump back to this on_failure_jump.  */
4026 	case on_failure_jump:
4027         on_failure:
4028           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4029 
4030           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4031           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4032 
4033           /* If this on_failure_jump comes right before a group (i.e.,
4034              the original * applied to a group), save the information
4035              for that group and all inner ones, so that if we fail back
4036              to this point, the group's information will be correct.
4037              For example, in \(a*\)*\1, we need the preceding group,
4038              and in \(\(a*\)b*\)\2, we need the inner group.  */
4039 
4040           /* We can't use `p' to check ahead because we push
4041              a failure point to `p + mcnt' after we do this.  */
4042           p1 = p;
4043 
4044           /* We need to skip no_op's before we look for the
4045              start_memory in case this on_failure_jump is happening as
4046              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4047              against aba.  */
4048           while (p1 < pend && (re_opcode_t) *p1 == no_op)
4049             p1++;
4050 
4051           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4052             {
4053               /* We have a new highest active register now.  This will
4054                  get reset at the start_memory we are about to get to,
4055                  but we will have saved all the registers relevant to
4056                  this repetition op, as described above.  */
4057               highest_active_reg = *(p1 + 1) + *(p1 + 2);
4058               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4059                 lowest_active_reg = *(p1 + 1);
4060             }
4061 
4062           DEBUG_PRINT1 (":\n");
4063           PUSH_FAILURE_POINT (p + mcnt, d, -2);
4064           break;
4065 
4066 
4067         /* A smart repeat ends with `maybe_pop_jump'.
4068 	   We change it to either `pop_failure_jump' or `jump'.  */
4069         case maybe_pop_jump:
4070           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4071           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4072           {
4073 	    register unsigned char *p2 = p;
4074 
4075             /* Compare the beginning of the repeat with what in the
4076                pattern follows its end. If we can establish that there
4077                is nothing that they would both match, i.e., that we
4078                would have to backtrack because of (as in, e.g., `a*a')
4079                then we can change to pop_failure_jump, because we'll
4080                never have to backtrack.
4081 
4082                This is not true in the case of alternatives: in
4083                `(a|ab)*' we do need to backtrack to the `ab' alternative
4084                (e.g., if the string was `ab').  But instead of trying to
4085                detect that here, the alternative has put on a dummy
4086                failure point which is what we will end up popping.  */
4087 
4088 	    /* Skip over open/close-group commands.  */
4089 	    while (p2 + 2 < pend
4090 		   && ((re_opcode_t) *p2 == stop_memory
4091 		       || (re_opcode_t) *p2 == start_memory))
4092 	      p2 += 3;			/* Skip over args, too.  */
4093 
4094             /* If we're at the end of the pattern, we can change.  */
4095             if (p2 == pend)
4096 	      {
4097 		/* Consider what happens when matching ":\(.*\)"
4098 		   against ":/".  I don't really understand this code
4099 		   yet.  */
4100 	        p[-3] = (unsigned char) pop_failure_jump;
4101                 DEBUG_PRINT1
4102                   ("  End of pattern: change to `pop_failure_jump'.\n");
4103               }
4104 
4105             else if ((re_opcode_t) *p2 == exactn
4106 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4107 	      {
4108 		register unsigned char c
4109                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4110 		p1 = p + mcnt;
4111 
4112                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4113                    to the `maybe_finalize_jump' of this case.  Examine what
4114                    follows.  */
4115                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4116                   {
4117 		    p[-3] = (unsigned char) pop_failure_jump;
4118                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4119                                   c, p1[5]);
4120                   }
4121 
4122 		else if ((re_opcode_t) p1[3] == charset
4123 			 || (re_opcode_t) p1[3] == charset_not)
4124 		  {
4125 		    int not = (re_opcode_t) p1[3] == charset_not;
4126 
4127 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4128 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4129 		      not = !not;
4130 
4131                     /* `not' is equal to 1 if c would match, which means
4132                         that we can't change to pop_failure_jump.  */
4133 		    if (!not)
4134                       {
4135 		        p[-3] = (unsigned char) pop_failure_jump;
4136                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4137                       }
4138 		  }
4139 	      }
4140 	  }
4141 	  p -= 2;		/* Point at relative address again.  */
4142 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4143 	    {
4144 	      p[-1] = (unsigned char) jump;
4145               DEBUG_PRINT1 ("  Match => jump.\n");
4146 	      goto unconditional_jump;
4147 	    }
4148         /* Note fall through.  */
4149 
4150 
4151 	/* The end of a simple repeat has a pop_failure_jump back to
4152            its matching on_failure_jump, where the latter will push a
4153            failure point.  The pop_failure_jump takes off failure
4154            points put on by this pop_failure_jump's matching
4155            on_failure_jump; we got through the pattern to here from the
4156            matching on_failure_jump, so didn't fail.  */
4157         case pop_failure_jump:
4158           {
4159             /* We need to pass separate storage for the lowest and
4160                highest registers, even though we don't care about the
4161                actual values.  Otherwise, we will restore only one
4162                register from the stack, since lowest will == highest in
4163                `pop_failure_point'.  */
4164             unsigned dummy_low_reg, dummy_high_reg;
4165             unsigned char *pdummy;
4166             const char *sdummy;
4167 
4168             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4169             POP_FAILURE_POINT (sdummy, pdummy,
4170                                dummy_low_reg, dummy_high_reg,
4171                                reg_dummy, reg_dummy, reg_info_dummy);
4172           }
4173           /* Note fall through.  */
4174 
4175 
4176         /* Unconditionally jump (without popping any failure points).  */
4177         case jump:
4178 	unconditional_jump:
4179 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4180           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4181 	  p += mcnt;				/* Do the jump.  */
4182           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4183 	  break;
4184 
4185 
4186         /* We need this opcode so we can detect where alternatives end
4187            in `group_match_null_string_p' et al.  */
4188         case jump_past_alt:
4189           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4190           goto unconditional_jump;
4191 
4192 
4193         /* Normally, the on_failure_jump pushes a failure point, which
4194            then gets popped at pop_failure_jump.  We will end up at
4195            pop_failure_jump, also, and with a pattern of, say, `a+', we
4196            are skipping over the on_failure_jump, so we have to push
4197            something meaningless for pop_failure_jump to pop.  */
4198         case dummy_failure_jump:
4199           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4200           /* It doesn't matter what we push for the string here.  What
4201              the code at `fail' tests is the value for the pattern.  */
4202           PUSH_FAILURE_POINT (0, 0, -2);
4203           goto unconditional_jump;
4204 
4205 
4206         /* At the end of an alternative, we need to push a dummy failure
4207            point in case we are followed by a `pop_failure_jump', because
4208            we don't want the failure point for the alternative to be
4209            popped.  For example, matching `(a|ab)*' against `aab'
4210            requires that we match the `ab' alternative.  */
4211         case push_dummy_failure:
4212           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4213           /* See comments just above at `dummy_failure_jump' about the
4214              two zeroes.  */
4215           PUSH_FAILURE_POINT (0, 0, -2);
4216           break;
4217 
4218         /* Have to succeed matching what follows at least n times.
4219            After that, handle like `on_failure_jump'.  */
4220         case succeed_n:
4221           EXTRACT_NUMBER (mcnt, p + 2);
4222           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4223 
4224           assert (mcnt >= 0);
4225           /* Originally, this is how many times we HAVE to succeed.  */
4226           if (mcnt > 0)
4227             {
4228                mcnt--;
4229 	       p += 2;
4230                STORE_NUMBER_AND_INCR (p, mcnt);
4231                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4232             }
4233 	  else if (mcnt == 0)
4234             {
4235               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4236 	      p[2] = (unsigned char) no_op;
4237               p[3] = (unsigned char) no_op;
4238               goto on_failure;
4239             }
4240           break;
4241 
4242         case jump_n:
4243           EXTRACT_NUMBER (mcnt, p + 2);
4244           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4245 
4246           /* Originally, this is how many times we CAN jump.  */
4247           if (mcnt)
4248             {
4249                mcnt--;
4250                STORE_NUMBER (p + 2, mcnt);
4251 	       goto unconditional_jump;
4252             }
4253           /* If don't have to jump any more, skip over the rest of command.  */
4254 	  else
4255 	    p += 4;
4256           break;
4257 
4258 	case set_number_at:
4259 	  {
4260             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4261 
4262             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4263             p1 = p + mcnt;
4264             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4265             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4266 	    STORE_NUMBER (p1, mcnt);
4267             break;
4268           }
4269 
4270         case wordbound:
4271           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4272           if (AT_WORD_BOUNDARY (d))
4273 	    break;
4274           goto fail;
4275 
4276 	case notwordbound:
4277           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4278 	  if (AT_WORD_BOUNDARY (d))
4279 	    goto fail;
4280           break;
4281 
4282 	case wordbeg:
4283           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4284 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4285 	    break;
4286           goto fail;
4287 
4288 	case wordend:
4289           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4290 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4291               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4292 	    break;
4293           goto fail;
4294 
4295 #ifdef emacs
4296 #ifdef emacs19
4297 	case before_dot:
4298           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4299 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4300 	    goto fail;
4301 	  break;
4302 
4303 	case at_dot:
4304           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4305 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4306 	    goto fail;
4307 	  break;
4308 
4309 	case after_dot:
4310           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4311           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4312 	    goto fail;
4313 	  break;
4314 #else /* not emacs19 */
4315 	case at_dot:
4316           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4317 	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4318 	    goto fail;
4319 	  break;
4320 #endif /* not emacs19 */
4321 
4322 	case syntaxspec:
4323           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4324 	  mcnt = *p++;
4325 	  goto matchsyntax;
4326 
4327         case wordchar:
4328           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4329 	  mcnt = (int) Sword;
4330         matchsyntax:
4331 	  PREFETCH ();
4332 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4333             goto fail;
4334           SET_REGS_MATCHED ();
4335 	  break;
4336 
4337 	case notsyntaxspec:
4338           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4339 	  mcnt = *p++;
4340 	  goto matchnotsyntax;
4341 
4342         case notwordchar:
4343           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4344 	  mcnt = (int) Sword;
4345         matchnotsyntax:
4346 	  PREFETCH ();
4347 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4348             goto fail;
4349 	  SET_REGS_MATCHED ();
4350           break;
4351 
4352 #else /* not emacs */
4353 	case wordchar:
4354           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4355 	  PREFETCH ();
4356           if (!WORDCHAR_P (d))
4357             goto fail;
4358 	  SET_REGS_MATCHED ();
4359           d++;
4360 	  break;
4361 
4362 	case notwordchar:
4363           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4364 	  PREFETCH ();
4365 	  if (WORDCHAR_P (d))
4366             goto fail;
4367           SET_REGS_MATCHED ();
4368           d++;
4369 	  break;
4370 #endif /* not emacs */
4371 
4372         default:
4373           abort ();
4374 	}
4375       continue;  /* Successfully executed one pattern command; keep going.  */
4376 
4377 
4378     /* We goto here if a matching operation fails. */
4379     fail:
4380       if (!FAIL_STACK_EMPTY ())
4381 	{ /* A restart point is known.  Restore to that state.  */
4382           DEBUG_PRINT1 ("\nFAIL:\n");
4383           POP_FAILURE_POINT (d, p,
4384                              lowest_active_reg, highest_active_reg,
4385                              regstart, regend, reg_info);
4386 
4387           /* If this failure point is a dummy, try the next one.  */
4388           if (!p)
4389 	    goto fail;
4390 
4391           /* If we failed to the end of the pattern, don't examine *p.  */
4392 	  assert (p <= pend);
4393           if (p < pend)
4394             {
4395               boolean is_a_jump_n = false;
4396 
4397               /* If failed to a backwards jump that's part of a repetition
4398                  loop, need to pop this failure point and use the next one.  */
4399               switch ((re_opcode_t) *p)
4400                 {
4401                 case jump_n:
4402                   is_a_jump_n = true;
4403                 case maybe_pop_jump:
4404                 case pop_failure_jump:
4405                 case jump:
4406                   p1 = p + 1;
4407                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4408                   p1 += mcnt;
4409 
4410                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4411                       || (!is_a_jump_n
4412                           && (re_opcode_t) *p1 == on_failure_jump))
4413                     goto fail;
4414                   break;
4415                 default:
4416                   /* do nothing */ ;
4417                 }
4418             }
4419 
4420           if (d >= string1 && d <= end1)
4421 	    dend = end_match_1;
4422         }
4423       else
4424         break;   /* Matching at this starting point really fails.  */
4425     } /* for (;;) */
4426 
4427   if (best_regs_set)
4428     goto restore_best_regs;
4429 
4430   FREE_VARIABLES ();
4431 
4432   return -1;			/* Failure to match.  */
4433 } /* re_match_2 */
4434 
4435 /* Subroutine definitions for re_match_2.  */
4436 
4437 
4438 /* We are passed P pointing to a register number after a start_memory.
4439 
4440    Return true if the pattern up to the corresponding stop_memory can
4441    match the empty string, and false otherwise.
4442 
4443    If we find the matching stop_memory, sets P to point to one past its number.
4444    Otherwise, sets P to an undefined byte less than or equal to END.
4445 
4446    We don't handle duplicates properly (yet).  */
4447 
4448 static boolean
group_match_null_string_p(p,end,reg_info)4449 group_match_null_string_p (p, end, reg_info)
4450     unsigned char **p, *end;
4451     register_info_type *reg_info;
4452 {
4453   int mcnt;
4454   /* Point to after the args to the start_memory.  */
4455   unsigned char *p1 = *p + 2;
4456 
4457   while (p1 < end)
4458     {
4459       /* Skip over opcodes that can match nothing, and return true or
4460 	 false, as appropriate, when we get to one that can't, or to the
4461          matching stop_memory.  */
4462 
4463       switch ((re_opcode_t) *p1)
4464         {
4465         /* Could be either a loop or a series of alternatives.  */
4466         case on_failure_jump:
4467           p1++;
4468           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4469 
4470           /* If the next operation is not a jump backwards in the
4471 	     pattern.  */
4472 
4473 	  if (mcnt >= 0)
4474 	    {
4475               /* Go through the on_failure_jumps of the alternatives,
4476                  seeing if any of the alternatives cannot match nothing.
4477                  The last alternative starts with only a jump,
4478                  whereas the rest start with on_failure_jump and end
4479                  with a jump, e.g., here is the pattern for `a|b|c':
4480 
4481                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4482                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4483                  /exactn/1/c
4484 
4485                  So, we have to first go through the first (n-1)
4486                  alternatives and then deal with the last one separately.  */
4487 
4488 
4489               /* Deal with the first (n-1) alternatives, which start
4490                  with an on_failure_jump (see above) that jumps to right
4491                  past a jump_past_alt.  */
4492 
4493               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4494                 {
4495                   /* `mcnt' holds how many bytes long the alternative
4496                      is, including the ending `jump_past_alt' and
4497                      its number.  */
4498 
4499                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4500 				                      reg_info))
4501                     return false;
4502 
4503                   /* Move to right after this alternative, including the
4504 		     jump_past_alt.  */
4505                   p1 += mcnt;
4506 
4507                   /* Break if it's the beginning of an n-th alternative
4508                      that doesn't begin with an on_failure_jump.  */
4509                   if ((re_opcode_t) *p1 != on_failure_jump)
4510                     break;
4511 
4512 		  /* Still have to check that it's not an n-th
4513 		     alternative that starts with an on_failure_jump.  */
4514 		  p1++;
4515                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4516                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4517                     {
4518 		      /* Get to the beginning of the n-th alternative.  */
4519                       p1 -= 3;
4520                       break;
4521                     }
4522                 }
4523 
4524               /* Deal with the last alternative: go back and get number
4525                  of the `jump_past_alt' just before it.  `mcnt' contains
4526                  the length of the alternative.  */
4527               EXTRACT_NUMBER (mcnt, p1 - 2);
4528 
4529               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4530                 return false;
4531 
4532               p1 += mcnt;	/* Get past the n-th alternative.  */
4533             } /* if mcnt > 0 */
4534           break;
4535 
4536 
4537         case stop_memory:
4538 	  assert (p1[1] == **p);
4539           *p = p1 + 2;
4540           return true;
4541 
4542 
4543         default:
4544           if (!common_op_match_null_string_p (&p1, end, reg_info))
4545             return false;
4546         }
4547     } /* while p1 < end */
4548 
4549   return false;
4550 } /* group_match_null_string_p */
4551 
4552 
4553 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4554    It expects P to be the first byte of a single alternative and END one
4555    byte past the last. The alternative can contain groups.  */
4556 
4557 static boolean
alt_match_null_string_p(p,end,reg_info)4558 alt_match_null_string_p (p, end, reg_info)
4559     unsigned char *p, *end;
4560     register_info_type *reg_info;
4561 {
4562   int mcnt;
4563   unsigned char *p1 = p;
4564 
4565   while (p1 < end)
4566     {
4567       /* Skip over opcodes that can match nothing, and break when we get
4568          to one that can't.  */
4569 
4570       switch ((re_opcode_t) *p1)
4571         {
4572 	/* It's a loop.  */
4573         case on_failure_jump:
4574           p1++;
4575           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4576           p1 += mcnt;
4577           break;
4578 
4579 	default:
4580           if (!common_op_match_null_string_p (&p1, end, reg_info))
4581             return false;
4582         }
4583     }  /* while p1 < end */
4584 
4585   return true;
4586 } /* alt_match_null_string_p */
4587 
4588 
4589 /* Deals with the ops common to group_match_null_string_p and
4590    alt_match_null_string_p.
4591 
4592    Sets P to one after the op and its arguments, if any.  */
4593 
4594 static boolean
common_op_match_null_string_p(p,end,reg_info)4595 common_op_match_null_string_p (p, end, reg_info)
4596     unsigned char **p, *end;
4597     register_info_type *reg_info;
4598 {
4599   int mcnt;
4600   boolean ret;
4601   int reg_no;
4602   unsigned char *p1 = *p;
4603 
4604   switch ((re_opcode_t) *p1++)
4605     {
4606     case no_op:
4607     case begline:
4608     case endline:
4609     case begbuf:
4610     case endbuf:
4611     case wordbeg:
4612     case wordend:
4613     case wordbound:
4614     case notwordbound:
4615 #ifdef emacs
4616     case before_dot:
4617     case at_dot:
4618     case after_dot:
4619 #endif
4620       break;
4621 
4622     case start_memory:
4623       reg_no = *p1;
4624       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4625       ret = group_match_null_string_p (&p1, end, reg_info);
4626 
4627       /* Have to set this here in case we're checking a group which
4628          contains a group and a back reference to it.  */
4629 
4630       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4631         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4632 
4633       if (!ret)
4634         return false;
4635       break;
4636 
4637     /* If this is an optimized succeed_n for zero times, make the jump.  */
4638     case jump:
4639       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4640       if (mcnt >= 0)
4641         p1 += mcnt;
4642       else
4643         return false;
4644       break;
4645 
4646     case succeed_n:
4647       /* Get to the number of times to succeed.  */
4648       p1 += 2;
4649       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4650 
4651       if (mcnt == 0)
4652         {
4653           p1 -= 4;
4654           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4655           p1 += mcnt;
4656         }
4657       else
4658         return false;
4659       break;
4660 
4661     case duplicate:
4662       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4663         return false;
4664       break;
4665 
4666     case set_number_at:
4667       p1 += 4;
4668 
4669     default:
4670       /* All other opcodes mean we cannot match the empty string.  */
4671       return false;
4672   }
4673 
4674   *p = p1;
4675   return true;
4676 } /* common_op_match_null_string_p */
4677 
4678 
4679 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4680    bytes; non-zero otherwise.  */
4681 
4682 static int
bcmp_translate(s1,s2,len,translate)4683 bcmp_translate (s1, s2, len, translate)
4684      unsigned char *s1, *s2;
4685      register int len;
4686      char *translate;
4687 {
4688   register unsigned char *p1 = s1, *p2 = s2;
4689   while (len)
4690     {
4691       if (translate[*p1++] != translate[*p2++]) return 1;
4692       len--;
4693     }
4694   return 0;
4695 }
4696 
4697 /* Entry points for GNU code.  */
4698 
4699 /* re_compile_pattern is the GNU regular expression compiler: it
4700    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4701    Returns 0 if the pattern was valid, otherwise an error string.
4702 
4703    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4704    are set in BUFP on entry.
4705 
4706    We call regex_compile to do the actual compilation.  */
4707 
4708 const char *
re_compile_pattern(pattern,length,bufp)4709 re_compile_pattern (pattern, length, bufp)
4710      const char *pattern;
4711      int length;
4712      struct re_pattern_buffer *bufp;
4713 {
4714   reg_errcode_t ret;
4715 
4716   /* GNU code is written to assume at least RE_NREGS registers will be set
4717      (and at least one extra will be -1).  */
4718   bufp->regs_allocated = REGS_UNALLOCATED;
4719 
4720   /* And GNU code determines whether or not to get register information
4721      by passing null for the REGS argument to re_match, etc., not by
4722      setting no_sub.  */
4723   bufp->no_sub = 0;
4724 
4725   /* Match anchors at newline.  */
4726   bufp->newline_anchor = 1;
4727 
4728   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4729 
4730   return re_error_msg[(int) ret];
4731 }
4732 
4733 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4734    them if this is an Emacs or POSIX compilation.  */
4735 
4736 #if !defined (emacs) && !defined (_POSIX_SOURCE)
4737 
4738 /* BSD has one and only one pattern buffer.  */
4739 static struct re_pattern_buffer re_comp_buf;
4740 
4741 char *
re_comp(s)4742 re_comp (s)
4743     const char *s;
4744 {
4745   reg_errcode_t ret;
4746 
4747   if (!s)
4748     {
4749       if (!re_comp_buf.buffer)
4750 	return "No previous regular expression";
4751       return 0;
4752     }
4753 
4754   if (!re_comp_buf.buffer)
4755     {
4756       re_comp_buf.buffer = (unsigned char *) malloc (200);
4757       if (re_comp_buf.buffer == NULL)
4758         return "Memory exhausted";
4759       re_comp_buf.allocated = 200;
4760 
4761       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4762       if (re_comp_buf.fastmap == NULL)
4763 	return "Memory exhausted";
4764     }
4765 
4766   /* Since `re_exec' always passes NULL for the `regs' argument, we
4767      don't need to initialize the pattern buffer fields which affect it.  */
4768 
4769   /* Match anchors at newlines.  */
4770   re_comp_buf.newline_anchor = 1;
4771 
4772   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4773 
4774   /* Yes, we're discarding `const' here.  */
4775   return (char *) re_error_msg[(int) ret];
4776 }
4777 
4778 
4779 int
re_exec(s)4780 re_exec (s)
4781     const char *s;
4782 {
4783   const int len = (int)strlen (s);
4784   return
4785     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4786 }
4787 #endif /* not emacs and not _POSIX_SOURCE */
4788 
4789 /* POSIX.2 functions.  Don't define these for Emacs.  */
4790 
4791 #ifndef emacs
4792 
4793 /* regcomp takes a regular expression as a string and compiles it.
4794 
4795    PREG is a regex_t *.  We do not expect any fields to be initialized,
4796    since POSIX says we shouldn't.  Thus, we set
4797 
4798      `buffer' to the compiled pattern;
4799      `used' to the length of the compiled pattern;
4800      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4801        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4802        RE_SYNTAX_POSIX_BASIC;
4803      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4804      `fastmap' and `fastmap_accurate' to zero;
4805      `re_nsub' to the number of subexpressions in PATTERN.
4806 
4807    PATTERN is the address of the pattern string.
4808 
4809    CFLAGS is a series of bits which affect compilation.
4810 
4811      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4812      use POSIX basic syntax.
4813 
4814      If REG_NEWLINE is set, then . and [^...] don't match newline.
4815      Also, regexec will try a match beginning after every newline.
4816 
4817      If REG_ICASE is set, then we considers upper- and lowercase
4818      versions of letters to be equivalent when matching.
4819 
4820      If REG_NOSUB is set, then when PREG is passed to regexec, that
4821      routine will report only success or failure, and nothing about the
4822      registers.
4823 
4824    It returns 0 if it succeeds, non-zero if it doesn't.  (See regex.h for
4825    the return codes and their meanings.)  */
4826 
4827 int
regcomp(preg,pattern,cflags)4828 regcomp (preg, pattern, cflags)
4829     regex_t *preg;
4830     const char *pattern;
4831     int cflags;
4832 {
4833   reg_errcode_t ret;
4834   unsigned syntax
4835     = (cflags & REG_EXTENDED) ?
4836       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4837 
4838   /* regex_compile will allocate the space for the compiled pattern.  */
4839   preg->buffer = 0;
4840   preg->allocated = 0;
4841 
4842   /* Don't bother to use a fastmap when searching.  This simplifies the
4843      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4844      characters after newlines into the fastmap.  This way, we just try
4845      every character.  */
4846   preg->fastmap = 0;
4847 
4848   if (cflags & REG_ICASE)
4849     {
4850       unsigned i;
4851 
4852       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4853       if (preg->translate == NULL)
4854         return (int) REG_ESPACE;
4855 
4856       /* Map uppercase characters to corresponding lowercase ones.  */
4857       for (i = 0; i < CHAR_SET_SIZE; i++)
4858         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4859     }
4860   else
4861     preg->translate = NULL;
4862 
4863   /* If REG_NEWLINE is set, newlines are treated differently.  */
4864   if (cflags & REG_NEWLINE)
4865     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4866       syntax &= ~RE_DOT_NEWLINE;
4867       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4868       /* It also changes the matching behavior.  */
4869       preg->newline_anchor = 1;
4870     }
4871   else
4872     preg->newline_anchor = 0;
4873 
4874   preg->no_sub = !!(cflags & REG_NOSUB);
4875 
4876   /* POSIX says a null character in the pattern terminates it, so we
4877      can use strlen here in compiling the pattern.  */
4878   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4879 
4880   /* POSIX doesn't distinguish between an unmatched open-group and an
4881      unmatched close-group: both are REG_EPAREN.  */
4882   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4883 
4884   return (int) ret;
4885 }
4886 
4887 
4888 /* regexec searches for a given pattern, specified by PREG, in the
4889    string STRING.
4890 
4891    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4892    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4893    least NMATCH elements, and we set them to the offsets of the
4894    corresponding matched substrings.
4895 
4896    EFLAGS specifies `execution flags' which affect matching: if
4897    REG_NOTBOL is set, then ^ does not match at the beginning of the
4898    string; if REG_NOTEOL is set, then $ does not match at the end.
4899 
4900    We return 0 if we find a match and REG_NOMATCH if not.  */
4901 
4902 int
regexec(preg,string,nmatch,pmatch,eflags)4903 regexec (preg, string, nmatch, pmatch, eflags)
4904     const regex_t *preg;
4905     const char *string;
4906     size_t nmatch;
4907     regmatch_t pmatch[];
4908     int eflags;
4909 {
4910   int ret;
4911   struct re_registers regs;
4912   regex_t private_preg;
4913   int len = (int)strlen (string);
4914   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4915 
4916   private_preg = *preg;
4917 
4918   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4919   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4920 
4921   /* The user has told us exactly how many registers to return
4922      information about, via `nmatch'.  We have to pass that on to the
4923      matching routines.  */
4924   private_preg.regs_allocated = REGS_FIXED;
4925 
4926   if (want_reg_info)
4927     {
4928       regs.num_regs = (unsigned)nmatch;
4929       regs.start = TALLOC (nmatch, regoff_t);
4930       regs.end = TALLOC (nmatch, regoff_t);
4931 
4932       if (regs.start == NULL)
4933         {
4934           if (NULL != regs.end)
4935             free (regs.end);
4936           return (int) REG_NOMATCH;
4937         }
4938       if (regs.end == NULL)
4939         {
4940           free (regs.start);
4941           return (int) REG_NOMATCH;
4942         }
4943     }
4944 
4945   /* Perform the searching operation.  */
4946   ret = re_search (&private_preg, string, len,
4947                    /* start: */ 0, /* range: */ len,
4948                    want_reg_info ? &regs : (struct re_registers *) 0);
4949 
4950   /* Copy the register information to the POSIX structure.  */
4951   if (want_reg_info)
4952     {
4953       if (ret >= 0)
4954         {
4955           unsigned r;
4956 
4957           for (r = 0; r < nmatch; r++)
4958             {
4959               pmatch[r].rm_so = regs.start[r];
4960               pmatch[r].rm_eo = regs.end[r];
4961             }
4962         }
4963 
4964       /* If we needed the temporary register info, free the space now.  */
4965       free (regs.start);
4966       free (regs.end);
4967     }
4968 
4969   /* We want zero return to mean success, unlike `re_search'.  */
4970   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4971 }
4972 
4973 
4974 /* Returns a message corresponding to an error code, ERRCODE, returned
4975    from either regcomp or regexec.   We don't use PREG here.  */
4976 
4977 size_t
regerror(int errcode,const regex_t * preg,char * errbuf,size_t errbuf_size)4978 regerror (
4979     int errcode,
4980     const regex_t *preg,
4981     char *errbuf,
4982     size_t errbuf_size)
4983 {
4984   const char *msg;
4985 
4986   if (errcode < 0
4987       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4988     /* Only error codes returned by the rest of the code should be passed
4989        to this routine.  If we are given anything else, or if other regex
4990        code generates an invalid error code, then the program has a bug.
4991        Dump core so we can fix it.  */
4992     abort ();
4993 
4994   msg = re_error_msg[errcode];
4995 
4996   /* POSIX doesn't require that we do anything in this case, but why
4997      not be nice.  */
4998 	if (NULL ==  msg)
4999 		msg = "Success";
5000 
5001 	return zbx_strlcpy(errbuf, msg, errbuf_size) + 1;	/* include the null */
5002 }
5003 
5004 
5005 /* Free dynamically allocated space used by PREG.  */
5006 
5007 void
regfree(preg)5008 regfree (preg)
5009     regex_t *preg;
5010 {
5011   if (preg->buffer != NULL)
5012     free (preg->buffer);
5013   preg->buffer = NULL;
5014 
5015   preg->allocated = 0;
5016   preg->used = 0;
5017 
5018   if (preg->fastmap != NULL)
5019     free (preg->fastmap);
5020   preg->fastmap = NULL;
5021   preg->fastmap_accurate = 0;
5022 
5023   if (preg->translate != NULL)
5024     free (preg->translate);
5025   preg->translate = NULL;
5026 }
5027 
5028 #endif /* not emacs  */
5029 
5030 #if defined(_WIN64) && defined(_MSC_VER)
5031 /*	restore optimization options */
5032 #	pragma optimize( "", on )
5033 #endif
5034 
5035 
5036 /*
5037 Local variables:
5038 make-backup-files: t
5039 version-control: t
5040 trim-versions-without-asking: nil
5041 End:
5042 */
5043