1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5 
6    Copyright (C) 1993 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21 
22 /*
23 
24    This file has been modified to replace int re_max_failures with a
25    #define.  This avoids compile problems when other various libc's
26    already have that symbol.   It has also been modified to be 64-bit
27    clean.  --jordan
28 
29 */
30 
31 
32 /* AIX requires this to be the first thing in the file. */
33 #if defined (_AIX) && !defined (REGEX_MALLOC)
34   #pragma alloca
35 #endif
36 
37 #define _GNU_SOURCE
38 
39 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
40 #include <sys/types.h>
41 
42 #ifdef HAVE_CONFIG_H
43 #include "config.h"
44 #endif
45 
46 /* The `emacs' switch turns on certain matching commands
47    that make sense only in Emacs. */
48 #ifdef emacs
49 
50 #include "lisp.h"
51 #include "buffer.h"
52 #include "syntax.h"
53 
54 /* Emacs uses `NULL' as a predicate.  */
55 #undef NULL
56 
57 #else  /* not emacs */
58 
59 /* We used to test for `BSTRING' here, but only GCC and Emacs define
60    `BSTRING', as far as I know, and neither of them use this code.  */
61 #if HAVE_STRING_H || STDC_HEADERS
62 #include <string.h>
63 #ifndef bcmp
64 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
65 #endif
66 #ifndef bcopy
67 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
68 #endif
69 #ifndef bzero
70 #define bzero(s, n)	memset ((s), 0, (n))
71 #endif
72 #else
73 #include <strings.h>
74 #endif
75 
76 #ifdef STDC_HEADERS
77 #include <stdlib.h>
78 #else
79 char *malloc ();
80 char *realloc ();
81 #endif
82 
83 
84 /* Define the syntax stuff for \<, \>, etc.  */
85 
86 /* This must be nonzero for the wordchar and notwordchar pattern
87    commands in re_match_2.  */
88 #ifndef Sword
89 #define Sword 1
90 #endif
91 
92 #ifdef SYNTAX_TABLE
93 
94 extern char *re_syntax_table;
95 
96 #else /* not SYNTAX_TABLE */
97 
98 /* How many characters in the character set.  */
99 #define CHAR_SET_SIZE 256
100 
101 static char re_syntax_table[CHAR_SET_SIZE];
102 
103 static void
init_syntax_once()104 init_syntax_once ()
105 {
106    register int c;
107    static int done = 0;
108 
109    if (done)
110      return;
111 
112    bzero (re_syntax_table, sizeof re_syntax_table);
113 
114    for (c = 'a'; c <= 'z'; c++)
115      re_syntax_table[c] = Sword;
116 
117    for (c = 'A'; c <= 'Z'; c++)
118      re_syntax_table[c] = Sword;
119 
120    for (c = '0'; c <= '9'; c++)
121      re_syntax_table[c] = Sword;
122 
123    re_syntax_table['_'] = Sword;
124 
125    done = 1;
126 }
127 
128 #endif /* not SYNTAX_TABLE */
129 
130 #define SYNTAX(c) re_syntax_table[c]
131 
132 #endif /* not emacs */
133 
134 /* Get the interface, including the syntax bits.  */
135 #include "regex.h"
136 
137 /* isalpha etc. are used for the character classes.  */
138 #include <ctype.h>
139 
140 #ifndef isascii
141 #define isascii(c) 1
142 #endif
143 
144 #ifdef isblank
145 #define ISBLANK(c) (isascii (c) && isblank (c))
146 #else
147 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
148 #endif
149 #ifdef isgraph
150 #define ISGRAPH(c) (isascii (c) && isgraph (c))
151 #else
152 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
153 #endif
154 
155 #define ISPRINT(c) (isascii (c) && isprint (c))
156 #define ISDIGIT(c) (isascii (c) && isdigit (c))
157 #define ISALNUM(c) (isascii (c) && isalnum (c))
158 #define ISALPHA(c) (isascii (c) && isalpha (c))
159 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
160 #define ISLOWER(c) (isascii (c) && islower (c))
161 #define ISPUNCT(c) (isascii (c) && ispunct (c))
162 #define ISSPACE(c) (isascii (c) && isspace (c))
163 #define ISUPPER(c) (isascii (c) && isupper (c))
164 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
165 
166 #ifndef NULL
167 #define NULL 0
168 #endif
169 
170 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
171    since ours (we hope) works properly with all combinations of
172    machines, compilers, `char' and `unsigned char' argument types.
173    (Per Bothner suggested the basic approach.)  */
174 #undef SIGN_EXTEND_CHAR
175 #if __STDC__
176 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
177 #else  /* not __STDC__ */
178 /* As in Harbison and Steele.  */
179 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
180 #endif
181 
182 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
183    use `alloca' instead of `malloc'.  This is because using malloc in
184    re_search* or re_match* could cause memory leaks when C-g is used in
185    Emacs; also, malloc is slower and causes storage fragmentation.  On
186    the other hand, malloc is more portable, and easier to debug.
187 
188    Because we sometimes use alloca, some routines have to be macros,
189    not functions -- `alloca'-allocated space disappears at the end of the
190    function it is called in.  */
191 
192 #ifdef REGEX_MALLOC
193 
194 #define REGEX_ALLOCATE malloc
195 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
196 
197 #else /* not REGEX_MALLOC  */
198 
199 /* Emacs already defines alloca, sometimes.  */
200 #ifndef alloca
201 
202 /* Make alloca work the best possible way.  */
203 #ifdef __GNUC__
204 #define alloca __builtin_alloca
205 #else /* not __GNUC__ */
206 #if HAVE_ALLOCA_H
207 #include <alloca.h>
208 #else /* not __GNUC__ or HAVE_ALLOCA_H */
209 #ifndef _AIX /* Already did AIX, up at the top.  */
210 char *alloca ();
211 #endif /* not _AIX */
212 #endif /* not HAVE_ALLOCA_H */
213 #endif /* not __GNUC__ */
214 
215 #endif /* not alloca */
216 
217 #define REGEX_ALLOCATE alloca
218 
219 /* Assumes a `char *destination' variable.  */
220 #define REGEX_REALLOCATE(source, osize, nsize)				\
221   (destination = (char *) alloca (nsize),				\
222    bcopy (source, destination, osize),					\
223    destination)
224 
225 #endif /* not REGEX_MALLOC */
226 
227 
228 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
229    `string1' or just past its end.  This works if PTR is NULL, which is
230    a good thing.  */
231 #define FIRST_STRING_P(ptr) 					\
232   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
233 
234 /* (Re)Allocate N items of type T using malloc, or fail.  */
235 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
236 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
237 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
238 
239 #define BYTEWIDTH 8 /* In bits.  */
240 
241 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
242 
243 #define MAX(a, b) ((a) > (b) ? (a) : (b))
244 #define MIN(a, b) ((a) < (b) ? (a) : (b))
245 
246 typedef char boolean;
247 #define false 0
248 #define true 1
249 
250 /* These are the command codes that appear in compiled regular
251    expressions.  Some opcodes are followed by argument bytes.  A
252    command code can specify any interpretation whatsoever for its
253    arguments.  Zero bytes may appear in the compiled regular expression.
254 
255    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
256    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
257    `exactn' we use here must also be 1.  */
258 
259 typedef enum
260 {
261   no_op = 0,
262 
263         /* Followed by one byte giving n, then by n literal bytes.  */
264   exactn = 1,
265 
266         /* Matches any (more or less) character.  */
267   anychar,
268 
269         /* Matches any one char belonging to specified set.  First
270            following byte is number of bitmap bytes.  Then come bytes
271            for a bitmap saying which chars are in.  Bits in each byte
272            are ordered low-bit-first.  A character is in the set if its
273            bit is 1.  A character too large to have a bit in the map is
274            automatically not in the set.  */
275   charset,
276 
277         /* Same parameters as charset, but match any character that is
278            not one of those specified.  */
279   charset_not,
280 
281         /* Start remembering the text that is matched, for storing in a
282            register.  Followed by one byte with the register number, in
283            the range 0 to one less than the pattern buffer's re_nsub
284            field.  Then followed by one byte with the number of groups
285            inner to this one.  (This last has to be part of the
286            start_memory only because we need it in the on_failure_jump
287            of re_match_2.)  */
288   start_memory,
289 
290         /* Stop remembering the text that is matched and store it in a
291            memory register.  Followed by one byte with the register
292            number, in the range 0 to one less than `re_nsub' in the
293            pattern buffer, and one byte with the number of inner groups,
294            just like `start_memory'.  (We need the number of inner
295            groups here because we don't have any easy way of finding the
296            corresponding start_memory when we're at a stop_memory.)  */
297   stop_memory,
298 
299         /* Match a duplicate of something remembered. Followed by one
300            byte containing the register number.  */
301   duplicate,
302 
303         /* Fail unless at beginning of line.  */
304   begline,
305 
306         /* Fail unless at end of line.  */
307   endline,
308 
309         /* Succeeds if at beginning of buffer (if emacs) or at beginning
310            of string to be matched (if not).  */
311   begbuf,
312 
313         /* Analogously, for end of buffer/string.  */
314   endbuf,
315 
316         /* Followed by two byte relative address to which to jump.  */
317   jump,
318 
319 	/* Same as jump, but marks the end of an alternative.  */
320   jump_past_alt,
321 
322         /* Followed by two-byte relative address of place to resume at
323            in case of failure.  */
324   on_failure_jump,
325 
326         /* Like on_failure_jump, but pushes a placeholder instead of the
327            current string position when executed.  */
328   on_failure_keep_string_jump,
329 
330         /* Throw away latest failure point and then jump to following
331            two-byte relative address.  */
332   pop_failure_jump,
333 
334         /* Change to pop_failure_jump if know won't have to backtrack to
335            match; otherwise change to jump.  This is used to jump
336            back to the beginning of a repeat.  If what follows this jump
337            clearly won't match what the repeat does, such that we can be
338            sure that there is no use backtracking out of repetitions
339            already matched, then we change it to a pop_failure_jump.
340            Followed by two-byte address.  */
341   maybe_pop_jump,
342 
343         /* Jump to following two-byte address, and push a dummy failure
344            point. This failure point will be thrown away if an attempt
345            is made to use it for a failure.  A `+' construct makes this
346            before the first repeat.  Also used as an intermediary kind
347            of jump when compiling an alternative.  */
348   dummy_failure_jump,
349 
350 	/* Push a dummy failure point and continue.  Used at the end of
351 	   alternatives.  */
352   push_dummy_failure,
353 
354         /* Followed by two-byte relative address and two-byte number n.
355            After matching N times, jump to the address upon failure.  */
356   succeed_n,
357 
358         /* Followed by two-byte relative address, and two-byte number n.
359            Jump to the address N times, then fail.  */
360   jump_n,
361 
362         /* Set the following two-byte relative address to the
363            subsequent two-byte number.  The address *includes* the two
364            bytes of number.  */
365   set_number_at,
366 
367   wordchar,	/* Matches any word-constituent character.  */
368   notwordchar,	/* Matches any char that is not a word-constituent.  */
369 
370   wordbeg,	/* Succeeds if at word beginning.  */
371   wordend,	/* Succeeds if at word end.  */
372 
373   wordbound,	/* Succeeds if at a word boundary.  */
374   notwordbound	/* Succeeds if not at a word boundary.  */
375 
376 #ifdef emacs
377   ,before_dot,	/* Succeeds if before point.  */
378   at_dot,	/* Succeeds if at point.  */
379   after_dot,	/* Succeeds if after point.  */
380 
381 	/* Matches any character whose syntax is specified.  Followed by
382            a byte which contains a syntax code, e.g., Sword.  */
383   syntaxspec,
384 
385 	/* Matches any character whose syntax is not that specified.  */
386   notsyntaxspec
387 #endif /* emacs */
388 } re_opcode_t;
389 
390 /* Common operations on the compiled pattern.  */
391 
392 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
393 
394 #define STORE_NUMBER(destination, number)				\
395   do {									\
396     (destination)[0] = (number) & 0377;					\
397     (destination)[1] = (number) >> 8;					\
398   } while (0)
399 
400 /* Same as STORE_NUMBER, except increment DESTINATION to
401    the byte after where the number is stored.  Therefore, DESTINATION
402    must be an lvalue.  */
403 
404 #define STORE_NUMBER_AND_INCR(destination, number)			\
405   do {									\
406     STORE_NUMBER (destination, number);					\
407     (destination) += 2;							\
408   } while (0)
409 
410 /* Put into DESTINATION a number stored in two contiguous bytes starting
411    at SOURCE.  */
412 
413 #define EXTRACT_NUMBER(destination, source)				\
414   do {									\
415     (destination) = *(source) & 0377;					\
416     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
417   } while (0)
418 
419 #ifdef DEBUG
420 static void
extract_number(dest,source)421 extract_number (dest, source)
422     int *dest;
423     unsigned char *source;
424 {
425   int temp = SIGN_EXTEND_CHAR (*(source + 1));
426   *dest = *source & 0377;
427   *dest += temp << 8;
428 }
429 
430 #ifndef EXTRACT_MACROS /* To debug the macros.  */
431 #undef EXTRACT_NUMBER
432 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
433 #endif /* not EXTRACT_MACROS */
434 
435 #endif /* DEBUG */
436 
437 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
438    SOURCE must be an lvalue.  */
439 
440 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
441   do {									\
442     EXTRACT_NUMBER (destination, source);				\
443     (source) += 2; 							\
444   } while (0)
445 
446 #ifdef DEBUG
447 static void
extract_number_and_incr(destination,source)448 extract_number_and_incr (destination, source)
449     int *destination;
450     unsigned char **source;
451 {
452   extract_number (destination, *source);
453   *source += 2;
454 }
455 
456 #ifndef EXTRACT_MACROS
457 #undef EXTRACT_NUMBER_AND_INCR
458 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
459   extract_number_and_incr (&dest, &src)
460 #endif /* not EXTRACT_MACROS */
461 
462 #endif /* DEBUG */
463 
464 /* If DEBUG is defined, Regex prints many voluminous messages about what
465    it is doing (if the variable `debug' is nonzero).  If linked with the
466    main program in `iregex.c', you can enter patterns and strings
467    interactively.  And if linked with the main program in `main.c' and
468    the other test files, you can run the already-written tests.  */
469 
470 #ifdef DEBUG
471 
472 /* We use standard I/O for debugging.  */
473 #include <stdio.h>
474 
475 /* It is useful to test things that ``must'' be true when debugging.  */
476 #include <assert.h>
477 
478 static int debug = 0;
479 
480 #define DEBUG_STATEMENT(e) e
481 #define DEBUG_PRINT1(x) if (debug) printf (x)
482 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
483 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
484 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
485 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
486   if (debug) print_partial_compiled_pattern (s, e)
487 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
488   if (debug) print_double_string (w, s1, sz1, s2, sz2)
489 
490 
491 extern void printchar ();
492 
493 /* Print the fastmap in human-readable form.  */
494 
495 void
print_fastmap(fastmap)496 print_fastmap (fastmap)
497     char *fastmap;
498 {
499   unsigned was_a_range = 0;
500   unsigned i = 0;
501 
502   while (i < (1 << BYTEWIDTH))
503     {
504       if (fastmap[i++])
505 	{
506 	  was_a_range = 0;
507           printchar (i - 1);
508           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
509             {
510               was_a_range = 1;
511               i++;
512             }
513 	  if (was_a_range)
514             {
515               printf ("-");
516               printchar (i - 1);
517             }
518         }
519     }
520   putchar ('\n');
521 }
522 
523 
524 /* Print a compiled pattern string in human-readable form, starting at
525    the START pointer into it and ending just before the pointer END.  */
526 
527 void
print_partial_compiled_pattern(start,end)528 print_partial_compiled_pattern (start, end)
529     unsigned char *start;
530     unsigned char *end;
531 {
532   int mcnt, mcnt2;
533   unsigned char *p = start;
534   unsigned char *pend = end;
535 
536   if (start == NULL)
537     {
538       printf ("(null)\n");
539       return;
540     }
541 
542   /* Loop over pattern commands.  */
543   while (p < pend)
544     {
545       switch ((re_opcode_t) *p++)
546 	{
547         case no_op:
548           printf ("/no_op");
549           break;
550 
551 	case exactn:
552 	  mcnt = *p++;
553           printf ("/exactn/%d", mcnt);
554           do
555 	    {
556               putchar ('/');
557 	      printchar (*p++);
558             }
559           while (--mcnt);
560           break;
561 
562 	case start_memory:
563           mcnt = *p++;
564           printf ("/start_memory/%d/%d", mcnt, *p++);
565           break;
566 
567 	case stop_memory:
568           mcnt = *p++;
569 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
570           break;
571 
572 	case duplicate:
573 	  printf ("/duplicate/%d", *p++);
574 	  break;
575 
576 	case anychar:
577 	  printf ("/anychar");
578 	  break;
579 
580 	case charset:
581         case charset_not:
582           {
583             register int c;
584 
585             printf ("/charset%s",
586 	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
587 
588             assert (p + *p < pend);
589 
590             for (c = 0; c < *p; c++)
591               {
592                 unsigned bit;
593                 unsigned char map_byte = p[1 + c];
594 
595                 putchar ('/');
596 
597 		for (bit = 0; bit < BYTEWIDTH; bit++)
598                   if (map_byte & (1 << bit))
599                     printchar (c * BYTEWIDTH + bit);
600               }
601 	    p += 1 + *p;
602 	    break;
603 	  }
604 
605 	case begline:
606 	  printf ("/begline");
607           break;
608 
609 	case endline:
610           printf ("/endline");
611           break;
612 
613 	case on_failure_jump:
614           extract_number_and_incr (&mcnt, &p);
615   	  printf ("/on_failure_jump/0/%d", mcnt);
616           break;
617 
618 	case on_failure_keep_string_jump:
619           extract_number_and_incr (&mcnt, &p);
620   	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
621           break;
622 
623 	case dummy_failure_jump:
624           extract_number_and_incr (&mcnt, &p);
625   	  printf ("/dummy_failure_jump/0/%d", mcnt);
626           break;
627 
628 	case push_dummy_failure:
629           printf ("/push_dummy_failure");
630           break;
631 
632         case maybe_pop_jump:
633           extract_number_and_incr (&mcnt, &p);
634   	  printf ("/maybe_pop_jump/0/%d", mcnt);
635 	  break;
636 
637         case pop_failure_jump:
638 	  extract_number_and_incr (&mcnt, &p);
639   	  printf ("/pop_failure_jump/0/%d", mcnt);
640 	  break;
641 
642         case jump_past_alt:
643 	  extract_number_and_incr (&mcnt, &p);
644   	  printf ("/jump_past_alt/0/%d", mcnt);
645 	  break;
646 
647         case jump:
648 	  extract_number_and_incr (&mcnt, &p);
649   	  printf ("/jump/0/%d", mcnt);
650 	  break;
651 
652         case succeed_n:
653           extract_number_and_incr (&mcnt, &p);
654           extract_number_and_incr (&mcnt2, &p);
655  	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
656           break;
657 
658         case jump_n:
659           extract_number_and_incr (&mcnt, &p);
660           extract_number_and_incr (&mcnt2, &p);
661  	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
662           break;
663 
664         case set_number_at:
665           extract_number_and_incr (&mcnt, &p);
666           extract_number_and_incr (&mcnt2, &p);
667  	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
668           break;
669 
670         case wordbound:
671 	  printf ("/wordbound");
672 	  break;
673 
674 	case notwordbound:
675 	  printf ("/notwordbound");
676           break;
677 
678 	case wordbeg:
679 	  printf ("/wordbeg");
680 	  break;
681 
682 	case wordend:
683 	  printf ("/wordend");
684 
685 #ifdef emacs
686 	case before_dot:
687 	  printf ("/before_dot");
688           break;
689 
690 	case at_dot:
691 	  printf ("/at_dot");
692           break;
693 
694 	case after_dot:
695 	  printf ("/after_dot");
696           break;
697 
698 	case syntaxspec:
699           printf ("/syntaxspec");
700 	  mcnt = *p++;
701 	  printf ("/%d", mcnt);
702           break;
703 
704 	case notsyntaxspec:
705           printf ("/notsyntaxspec");
706 	  mcnt = *p++;
707 	  printf ("/%d", mcnt);
708 	  break;
709 #endif /* emacs */
710 
711 	case wordchar:
712 	  printf ("/wordchar");
713           break;
714 
715 	case notwordchar:
716 	  printf ("/notwordchar");
717           break;
718 
719 	case begbuf:
720 	  printf ("/begbuf");
721           break;
722 
723 	case endbuf:
724 	  printf ("/endbuf");
725           break;
726 
727         default:
728           printf ("?%d", *(p-1));
729 	}
730     }
731   printf ("/\n");
732 }
733 
734 
735 void
print_compiled_pattern(bufp)736 print_compiled_pattern (bufp)
737     struct re_pattern_buffer *bufp;
738 {
739   unsigned char *buffer = bufp->buffer;
740 
741   print_partial_compiled_pattern (buffer, buffer + bufp->used);
742   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
743 
744   if (bufp->fastmap_accurate && bufp->fastmap)
745     {
746       printf ("fastmap: ");
747       print_fastmap (bufp->fastmap);
748     }
749 
750   printf ("re_nsub: %d\t", bufp->re_nsub);
751   printf ("regs_alloc: %d\t", bufp->regs_allocated);
752   printf ("can_be_null: %d\t", bufp->can_be_null);
753   printf ("newline_anchor: %d\n", bufp->newline_anchor);
754   printf ("no_sub: %d\t", bufp->no_sub);
755   printf ("not_bol: %d\t", bufp->not_bol);
756   printf ("not_eol: %d\t", bufp->not_eol);
757   printf ("syntax: %d\n", bufp->syntax);
758   /* Perhaps we should print the translate table?  */
759 }
760 
761 
762 void
print_double_string(where,string1,size1,string2,size2)763 print_double_string (where, string1, size1, string2, size2)
764     const char *where;
765     const char *string1;
766     const char *string2;
767     int size1;
768     int size2;
769 {
770   unsigned this_char;
771 
772   if (where == NULL)
773     printf ("(null)");
774   else
775     {
776       if (FIRST_STRING_P (where))
777         {
778           for (this_char = where - string1; this_char < size1; this_char++)
779             printchar (string1[this_char]);
780 
781           where = string2;
782         }
783 
784       for (this_char = where - string2; this_char < size2; this_char++)
785         printchar (string2[this_char]);
786     }
787 }
788 
789 #else /* not DEBUG */
790 
791 #undef assert
792 #define assert(e)
793 
794 #define DEBUG_STATEMENT(e)
795 #define DEBUG_PRINT1(x)
796 #define DEBUG_PRINT2(x1, x2)
797 #define DEBUG_PRINT3(x1, x2, x3)
798 #define DEBUG_PRINT4(x1, x2, x3, x4)
799 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
800 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
801 
802 #endif /* not DEBUG */
803 
804 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
805    also be assigned to arbitrarily: each pattern buffer stores its own
806    syntax, so it can be changed between regex compilations.  */
807 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
808 
809 
810 /* Specify the precise syntax of regexps for compilation.  This provides
811    for compatibility for various utilities which historically have
812    different, incompatible syntaxes.
813 
814    The argument SYNTAX is a bit mask comprised of the various bits
815    defined in regex.h.  We return the old syntax.  */
816 
817 reg_syntax_t
re_set_syntax(syntax)818 re_set_syntax (syntax)
819     reg_syntax_t syntax;
820 {
821   reg_syntax_t ret = re_syntax_options;
822 
823   re_syntax_options = syntax;
824   return ret;
825 }
826 
827 /* This table gives an error message for each of the error codes listed
828    in regex.h.  Obviously the order here has to be same as there.  */
829 
830 static const char *re_error_msg[] =
831   { NULL,					/* REG_NOERROR */
832     "No match",					/* REG_NOMATCH */
833     "Invalid regular expression",		/* REG_BADPAT */
834     "Invalid collation character",		/* REG_ECOLLATE */
835     "Invalid character class name",		/* REG_ECTYPE */
836     "Trailing backslash",			/* REG_EESCAPE */
837     "Invalid back reference",			/* REG_ESUBREG */
838     "Unmatched [ or [^",			/* REG_EBRACK */
839     "Unmatched ( or \\(",			/* REG_EPAREN */
840     "Unmatched \\{",				/* REG_EBRACE */
841     "Invalid content of \\{\\}",		/* REG_BADBR */
842     "Invalid range end",			/* REG_ERANGE */
843     "Memory exhausted",				/* REG_ESPACE */
844     "Invalid preceding regular expression",	/* REG_BADRPT */
845     "Premature end of regular expression",	/* REG_EEND */
846     "Regular expression too big",		/* REG_ESIZE */
847     "Unmatched ) or \\)",			/* REG_ERPAREN */
848   };
849 
850 /* Subroutine declarations and macros for regex_compile.  */
851 
852 static void store_op1 (), store_op2 ();
853 static void insert_op1 (), insert_op2 ();
854 static boolean at_begline_loc_p (), at_endline_loc_p ();
855 static boolean group_in_compile_stack ();
856 static reg_errcode_t compile_range ();
857 
858 /* Fetch the next character in the uncompiled pattern---translating it
859    if necessary.  Also cast from a signed character in the constant
860    string passed to us by the user to an unsigned char that we can use
861    as an array index (in, e.g., `translate').  */
862 #define PATFETCH(c)							\
863   do {if (p == pend) return REG_EEND;					\
864     c = (unsigned char) *p++;						\
865     if (translate) c = translate[c]; 					\
866   } while (0)
867 
868 /* Fetch the next character in the uncompiled pattern, with no
869    translation.  */
870 #define PATFETCH_RAW(c)							\
871   do {if (p == pend) return REG_EEND;					\
872     c = (unsigned char) *p++; 						\
873   } while (0)
874 
875 /* Go backwards one character in the pattern.  */
876 #define PATUNFETCH p--
877 
878 
879 /* If `translate' is non-null, return translate[D], else just D.  We
880    cast the subscript to translate because some data is declared as
881    `char *', to avoid warnings when a string constant is passed.  But
882    when we use a character as a subscript we must make it unsigned.  */
883 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
884 
885 
886 /* Macros for outputting the compiled pattern into `buffer'.  */
887 
888 /* If the buffer isn't allocated when it comes in, use this.  */
889 #define INIT_BUF_SIZE  32
890 
891 /* Make sure we have at least N more bytes of space in buffer.  */
892 #define GET_BUFFER_SPACE(n)						\
893     while (b - bufp->buffer + (n) > bufp->allocated)			\
894       EXTEND_BUFFER ()
895 
896 /* Make sure we have one more byte of buffer space and then add C to it.  */
897 #define BUF_PUSH(c)							\
898   do {									\
899     GET_BUFFER_SPACE (1);						\
900     *b++ = (unsigned char) (c);						\
901   } while (0)
902 
903 
904 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
905 #define BUF_PUSH_2(c1, c2)						\
906   do {									\
907     GET_BUFFER_SPACE (2);						\
908     *b++ = (unsigned char) (c1);					\
909     *b++ = (unsigned char) (c2);					\
910   } while (0)
911 
912 
913 /* As with BUF_PUSH_2, except for three bytes.  */
914 #define BUF_PUSH_3(c1, c2, c3)						\
915   do {									\
916     GET_BUFFER_SPACE (3);						\
917     *b++ = (unsigned char) (c1);					\
918     *b++ = (unsigned char) (c2);					\
919     *b++ = (unsigned char) (c3);					\
920   } while (0)
921 
922 
923 /* Store a jump with opcode OP at LOC to location TO.  We store a
924    relative address offset by the three bytes the jump itself occupies.  */
925 #define STORE_JUMP(op, loc, to) \
926   store_op1 (op, loc, (to) - (loc) - 3)
927 
928 /* Likewise, for a two-argument jump.  */
929 #define STORE_JUMP2(op, loc, to, arg) \
930   store_op2 (op, loc, (to) - (loc) - 3, arg)
931 
932 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
933 #define INSERT_JUMP(op, loc, to) \
934   insert_op1 (op, loc, (to) - (loc) - 3, b)
935 
936 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
937 #define INSERT_JUMP2(op, loc, to, arg) \
938   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
939 
940 
941 /* This is not an arbitrary limit: the arguments which represent offsets
942    into the pattern are two bytes long.  So if 2^16 bytes turns out to
943    be too small, many things would have to change.  */
944 #define MAX_BUF_SIZE (1L << 16)
945 
946 
947 /* Extend the buffer by twice its current size via realloc and
948    reset the pointers that pointed into the old block to point to the
949    correct places in the new one.  If extending the buffer results in it
950    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
951 #define EXTEND_BUFFER()							\
952   do { 									\
953     unsigned char *old_buffer = bufp->buffer;				\
954     if (bufp->allocated == MAX_BUF_SIZE) 				\
955       return REG_ESIZE;							\
956     bufp->allocated <<= 1;						\
957     if (bufp->allocated > MAX_BUF_SIZE)					\
958       bufp->allocated = MAX_BUF_SIZE; 					\
959     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
960     if (bufp->buffer == NULL)						\
961       return REG_ESPACE;						\
962     /* If the buffer moved, move all the pointers into it.  */		\
963     if (old_buffer != bufp->buffer)					\
964       {									\
965         b = (b - old_buffer) + bufp->buffer;				\
966         begalt = (begalt - old_buffer) + bufp->buffer;			\
967         if (fixup_alt_jump)						\
968           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
969         if (laststart)							\
970           laststart = (laststart - old_buffer) + bufp->buffer;		\
971         if (pending_exact)						\
972           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
973       }									\
974   } while (0)
975 
976 
977 /* Since we have one byte reserved for the register number argument to
978    {start,stop}_memory, the maximum number of groups we can report
979    things about is what fits in that byte.  */
980 #define MAX_REGNUM 255
981 
982 /* But patterns can have more than `MAX_REGNUM' registers.  We just
983    ignore the excess.  */
984 typedef unsigned regnum_t;
985 
986 
987 /* Macros for the compile stack.  */
988 
989 /* Since offsets can go either forwards or backwards, this type needs to
990    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
991 typedef int pattern_offset_t;
992 
993 typedef struct
994 {
995   pattern_offset_t begalt_offset;
996   pattern_offset_t fixup_alt_jump;
997   pattern_offset_t inner_group_offset;
998   pattern_offset_t laststart_offset;
999   regnum_t regnum;
1000 } compile_stack_elt_t;
1001 
1002 
1003 typedef struct
1004 {
1005   compile_stack_elt_t *stack;
1006   unsigned size;
1007   unsigned avail;			/* Offset of next open position.  */
1008 } compile_stack_type;
1009 
1010 
1011 #define INIT_COMPILE_STACK_SIZE 32
1012 
1013 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1014 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1015 
1016 /* The next available element.  */
1017 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1018 
1019 
1020 /* Set the bit for character C in a list.  */
1021 #define SET_LIST_BIT(c)                               \
1022   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1023    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1024 
1025 
1026 /* Get the next unsigned number in the uncompiled pattern.  */
1027 #define GET_UNSIGNED_NUMBER(num) 					\
1028   { if (p != pend)							\
1029      {									\
1030        PATFETCH (c); 							\
1031        while (ISDIGIT (c)) 						\
1032          { 								\
1033            if (num < 0)							\
1034               num = 0;							\
1035            num = num * 10 + c - '0'; 					\
1036            if (p == pend) 						\
1037               break; 							\
1038            PATFETCH (c);						\
1039          } 								\
1040        } 								\
1041     }
1042 
1043 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1044 
1045 #define IS_CHAR_CLASS(string)						\
1046    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1047     || STREQ (string, "lower") || STREQ (string, "digit")		\
1048     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1049     || STREQ (string, "space") || STREQ (string, "print")		\
1050     || STREQ (string, "punct") || STREQ (string, "graph")		\
1051     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1052 
1053 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1054    Returns one of error codes defined in `regex.h', or zero for success.
1055 
1056    Assumes the `allocated' (and perhaps `buffer') and `translate'
1057    fields are set in BUFP on entry.
1058 
1059    If it succeeds, results are put in BUFP (if it returns an error, the
1060    contents of BUFP are undefined):
1061      `buffer' is the compiled pattern;
1062      `syntax' is set to SYNTAX;
1063      `used' is set to the length of the compiled pattern;
1064      `fastmap_accurate' is zero;
1065      `re_nsub' is the number of subexpressions in PATTERN;
1066      `not_bol' and `not_eol' are zero;
1067 
1068    The `fastmap' and `newline_anchor' fields are neither
1069    examined nor set.  */
1070 
1071 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1072 regex_compile (pattern, size, syntax, bufp)
1073      const char *pattern;
1074      int size;
1075      reg_syntax_t syntax;
1076      struct re_pattern_buffer *bufp;
1077 {
1078   /* We fetch characters from PATTERN here.  Even though PATTERN is
1079      `char *' (i.e., signed), we declare these variables as unsigned, so
1080      they can be reliably used as array indices.  */
1081   register unsigned char c, c1;
1082 
1083   /* A random tempory spot in PATTERN.  */
1084   const char *p1;
1085 
1086   /* Points to the end of the buffer, where we should append.  */
1087   register unsigned char *b;
1088 
1089   /* Keeps track of unclosed groups.  */
1090   compile_stack_type compile_stack;
1091 
1092   /* Points to the current (ending) position in the pattern.  */
1093   const char *p = pattern;
1094   const char *pend = pattern + size;
1095 
1096   /* How to translate the characters in the pattern.  */
1097   char *translate = bufp->translate;
1098 
1099   /* Address of the count-byte of the most recently inserted `exactn'
1100      command.  This makes it possible to tell if a new exact-match
1101      character can be added to that command or if the character requires
1102      a new `exactn' command.  */
1103   unsigned char *pending_exact = 0;
1104 
1105   /* Address of start of the most recently finished expression.
1106      This tells, e.g., postfix * where to find the start of its
1107      operand.  Reset at the beginning of groups and alternatives.  */
1108   unsigned char *laststart = 0;
1109 
1110   /* Address of beginning of regexp, or inside of last group.  */
1111   unsigned char *begalt;
1112 
1113   /* Place in the uncompiled pattern (i.e., the {) to
1114      which to go back if the interval is invalid.  */
1115   const char *beg_interval;
1116 
1117   /* Address of the place where a forward jump should go to the end of
1118      the containing expression.  Each alternative of an `or' -- except the
1119      last -- ends with a forward jump of this sort.  */
1120   unsigned char *fixup_alt_jump = 0;
1121 
1122   /* Counts open-groups as they are encountered.  Remembered for the
1123      matching close-group on the compile stack, so the same register
1124      number is put in the stop_memory as the start_memory.  */
1125   regnum_t regnum = 0;
1126 
1127 #ifdef DEBUG
1128   DEBUG_PRINT1 ("\nCompiling pattern: ");
1129   if (debug)
1130     {
1131       unsigned debug_count;
1132 
1133       for (debug_count = 0; debug_count < size; debug_count++)
1134         printchar (pattern[debug_count]);
1135       putchar ('\n');
1136     }
1137 #endif /* DEBUG */
1138 
1139   /* Initialize the compile stack.  */
1140   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1141   if (compile_stack.stack == NULL)
1142     return REG_ESPACE;
1143 
1144   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1145   compile_stack.avail = 0;
1146 
1147   /* Initialize the pattern buffer.  */
1148   bufp->syntax = syntax;
1149   bufp->fastmap_accurate = 0;
1150   bufp->not_bol = bufp->not_eol = 0;
1151 
1152   /* Set `used' to zero, so that if we return an error, the pattern
1153      printer (for debugging) will think there's no pattern.  We reset it
1154      at the end.  */
1155   bufp->used = 0;
1156 
1157   /* Always count groups, whether or not bufp->no_sub is set.  */
1158   bufp->re_nsub = 0;
1159 
1160 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1161   /* Initialize the syntax table.  */
1162    init_syntax_once ();
1163 #endif
1164 
1165   if (bufp->allocated == 0)
1166     {
1167       if (bufp->buffer)
1168 	{ /* If zero allocated, but buffer is non-null, try to realloc
1169              enough space.  This loses if buffer's address is bogus, but
1170              that is the user's responsibility.  */
1171           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1172         }
1173       else
1174         { /* Caller did not allocate a buffer.  Do it for them.  */
1175           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1176         }
1177       if (!bufp->buffer) return REG_ESPACE;
1178 
1179       bufp->allocated = INIT_BUF_SIZE;
1180     }
1181 
1182   begalt = b = bufp->buffer;
1183 
1184   /* Loop through the uncompiled pattern until we're at the end.  */
1185   while (p != pend)
1186     {
1187       PATFETCH (c);
1188 
1189       switch (c)
1190         {
1191         case '^':
1192           {
1193             if (   /* If at start of pattern, it's an operator.  */
1194                    p == pattern + 1
1195                    /* If context independent, it's an operator.  */
1196                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1197                    /* Otherwise, depends on what's come before.  */
1198                 || at_begline_loc_p (pattern, p, syntax))
1199               BUF_PUSH (begline);
1200             else
1201               goto normal_char;
1202           }
1203           break;
1204 
1205 
1206         case '$':
1207           {
1208             if (   /* If at end of pattern, it's an operator.  */
1209                    p == pend
1210                    /* If context independent, it's an operator.  */
1211                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1212                    /* Otherwise, depends on what's next.  */
1213                 || at_endline_loc_p (p, pend, syntax))
1214                BUF_PUSH (endline);
1215              else
1216                goto normal_char;
1217            }
1218            break;
1219 
1220 
1221 	case '+':
1222         case '?':
1223           if ((syntax & RE_BK_PLUS_QM)
1224               || (syntax & RE_LIMITED_OPS))
1225             goto normal_char;
1226         handle_plus:
1227         case '*':
1228           /* If there is no previous pattern... */
1229           if (!laststart)
1230             {
1231               if (syntax & RE_CONTEXT_INVALID_OPS)
1232                 return REG_BADRPT;
1233               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1234                 goto normal_char;
1235             }
1236 
1237           {
1238             /* Are we optimizing this jump?  */
1239             boolean keep_string_p = false;
1240 
1241             /* 1 means zero (many) matches is allowed.  */
1242             char zero_times_ok = 0, many_times_ok = 0;
1243 
1244             /* If there is a sequence of repetition chars, collapse it
1245                down to just one (the right one).  We can't combine
1246                interval operators with these because of, e.g., `a{2}*',
1247                which should only match an even number of `a's.  */
1248 
1249             for (;;)
1250               {
1251                 zero_times_ok |= c != '+';
1252                 many_times_ok |= c != '?';
1253 
1254                 if (p == pend)
1255                   break;
1256 
1257                 PATFETCH (c);
1258 
1259                 if (c == '*'
1260                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1261                   ;
1262 
1263                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1264                   {
1265                     if (p == pend) return REG_EESCAPE;
1266 
1267                     PATFETCH (c1);
1268                     if (!(c1 == '+' || c1 == '?'))
1269                       {
1270                         PATUNFETCH;
1271                         PATUNFETCH;
1272                         break;
1273                       }
1274 
1275                     c = c1;
1276                   }
1277                 else
1278                   {
1279                     PATUNFETCH;
1280                     break;
1281                   }
1282 
1283                 /* If we get here, we found another repeat character.  */
1284                }
1285 
1286             /* Star, etc. applied to an empty pattern is equivalent
1287                to an empty pattern.  */
1288             if (!laststart)
1289               break;
1290 
1291             /* Now we know whether or not zero matches is allowed
1292                and also whether or not two or more matches is allowed.  */
1293             if (many_times_ok)
1294               { /* More than one repetition is allowed, so put in at the
1295                    end a backward relative jump from `b' to before the next
1296                    jump we're going to put in below (which jumps from
1297                    laststart to after this jump).
1298 
1299                    But if we are at the `*' in the exact sequence `.*\n',
1300                    insert an unconditional jump backwards to the .,
1301                    instead of the beginning of the loop.  This way we only
1302                    push a failure point once, instead of every time
1303                    through the loop.  */
1304                 assert (p - 1 > pattern);
1305 
1306                 /* Allocate the space for the jump.  */
1307                 GET_BUFFER_SPACE (3);
1308 
1309                 /* We know we are not at the first character of the pattern,
1310                    because laststart was nonzero.  And we've already
1311                    incremented `p', by the way, to be the character after
1312                    the `*'.  Do we have to do something analogous here
1313                    for null bytes, because of RE_DOT_NOT_NULL?  */
1314                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1315 		    && zero_times_ok
1316                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1317                     && !(syntax & RE_DOT_NEWLINE))
1318                   { /* We have .*\n.  */
1319                     STORE_JUMP (jump, b, laststart);
1320                     keep_string_p = true;
1321                   }
1322                 else
1323                   /* Anything else.  */
1324                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1325 
1326                 /* We've added more stuff to the buffer.  */
1327                 b += 3;
1328               }
1329 
1330             /* On failure, jump from laststart to b + 3, which will be the
1331                end of the buffer after this jump is inserted.  */
1332             GET_BUFFER_SPACE (3);
1333             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1334                                        : on_failure_jump,
1335                          laststart, b + 3);
1336             pending_exact = 0;
1337             b += 3;
1338 
1339             if (!zero_times_ok)
1340               {
1341                 /* At least one repetition is required, so insert a
1342                    `dummy_failure_jump' before the initial
1343                    `on_failure_jump' instruction of the loop. This
1344                    effects a skip over that instruction the first time
1345                    we hit that loop.  */
1346                 GET_BUFFER_SPACE (3);
1347                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1348                 b += 3;
1349               }
1350             }
1351 	  break;
1352 
1353 
1354 	case '.':
1355           laststart = b;
1356           BUF_PUSH (anychar);
1357           break;
1358 
1359 
1360         case '[':
1361           {
1362             boolean had_char_class = false;
1363 
1364             if (p == pend) return REG_EBRACK;
1365 
1366             /* Ensure that we have enough space to push a charset: the
1367                opcode, the length count, and the bitset; 34 bytes in all.  */
1368 	    GET_BUFFER_SPACE (34);
1369 
1370             laststart = b;
1371 
1372             /* We test `*p == '^' twice, instead of using an if
1373                statement, so we only need one BUF_PUSH.  */
1374             BUF_PUSH (*p == '^' ? charset_not : charset);
1375             if (*p == '^')
1376               p++;
1377 
1378             /* Remember the first position in the bracket expression.  */
1379             p1 = p;
1380 
1381             /* Push the number of bytes in the bitmap.  */
1382             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1383 
1384             /* Clear the whole map.  */
1385             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1386 
1387             /* charset_not matches newline according to a syntax bit.  */
1388             if ((re_opcode_t) b[-2] == charset_not
1389                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1390               SET_LIST_BIT ('\n');
1391 
1392             /* Read in characters and ranges, setting map bits.  */
1393             for (;;)
1394               {
1395                 if (p == pend) return REG_EBRACK;
1396 
1397                 PATFETCH (c);
1398 
1399                 /* \ might escape characters inside [...] and [^...].  */
1400                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1401                   {
1402                     if (p == pend) return REG_EESCAPE;
1403 
1404                     PATFETCH (c1);
1405                     SET_LIST_BIT (c1);
1406                     continue;
1407                   }
1408 
1409                 /* Could be the end of the bracket expression.  If it's
1410                    not (i.e., when the bracket expression is `[]' so
1411                    far), the ']' character bit gets set way below.  */
1412                 if (c == ']' && p != p1 + 1)
1413                   break;
1414 
1415                 /* Look ahead to see if it's a range when the last thing
1416                    was a character class.  */
1417                 if (had_char_class && c == '-' && *p != ']')
1418                   return REG_ERANGE;
1419 
1420                 /* Look ahead to see if it's a range when the last thing
1421                    was a character: if this is a hyphen not at the
1422                    beginning or the end of a list, then it's the range
1423                    operator.  */
1424                 if (c == '-'
1425                     && !(p - 2 >= pattern && p[-2] == '[')
1426                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1427                     && *p != ']')
1428                   {
1429                     reg_errcode_t ret
1430                       = compile_range (&p, pend, translate, syntax, b);
1431                     if (ret != REG_NOERROR) return ret;
1432                   }
1433 
1434                 else if (p[0] == '-' && p[1] != ']')
1435                   { /* This handles ranges made up of characters only.  */
1436                     reg_errcode_t ret;
1437 
1438 		    /* Move past the `-'.  */
1439                     PATFETCH (c1);
1440 
1441                     ret = compile_range (&p, pend, translate, syntax, b);
1442                     if (ret != REG_NOERROR) return ret;
1443                   }
1444 
1445                 /* See if we're at the beginning of a possible character
1446                    class.  */
1447 
1448                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1449                   { /* Leave room for the null.  */
1450                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1451 
1452                     PATFETCH (c);
1453                     c1 = 0;
1454 
1455                     /* If pattern is `[[:'.  */
1456                     if (p == pend) return REG_EBRACK;
1457 
1458                     for (;;)
1459                       {
1460                         PATFETCH (c);
1461                         if (c == ':' || c == ']' || p == pend
1462                             || c1 == CHAR_CLASS_MAX_LENGTH)
1463                           break;
1464                         str[c1++] = c;
1465                       }
1466                     str[c1] = '\0';
1467 
1468                     /* If isn't a word bracketed by `[:' and:`]':
1469                        undo the ending character, the letters, and leave
1470                        the leading `:' and `[' (but set bits for them).  */
1471                     if (c == ':' && *p == ']')
1472                       {
1473                         int ch;
1474                         boolean is_alnum = STREQ (str, "alnum");
1475                         boolean is_alpha = STREQ (str, "alpha");
1476                         boolean is_blank = STREQ (str, "blank");
1477                         boolean is_cntrl = STREQ (str, "cntrl");
1478                         boolean is_digit = STREQ (str, "digit");
1479                         boolean is_graph = STREQ (str, "graph");
1480                         boolean is_lower = STREQ (str, "lower");
1481                         boolean is_print = STREQ (str, "print");
1482                         boolean is_punct = STREQ (str, "punct");
1483                         boolean is_space = STREQ (str, "space");
1484                         boolean is_upper = STREQ (str, "upper");
1485                         boolean is_xdigit = STREQ (str, "xdigit");
1486 
1487                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1488 
1489                         /* Throw away the ] at the end of the character
1490                            class.  */
1491                         PATFETCH (c);
1492 
1493                         if (p == pend) return REG_EBRACK;
1494 
1495                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1496                           {
1497                             if (   (is_alnum  && ISALNUM (ch))
1498                                 || (is_alpha  && ISALPHA (ch))
1499                                 || (is_blank  && ISBLANK (ch))
1500                                 || (is_cntrl  && ISCNTRL (ch))
1501                                 || (is_digit  && ISDIGIT (ch))
1502                                 || (is_graph  && ISGRAPH (ch))
1503                                 || (is_lower  && ISLOWER (ch))
1504                                 || (is_print  && ISPRINT (ch))
1505                                 || (is_punct  && ISPUNCT (ch))
1506                                 || (is_space  && ISSPACE (ch))
1507                                 || (is_upper  && ISUPPER (ch))
1508                                 || (is_xdigit && ISXDIGIT (ch)))
1509                             SET_LIST_BIT (ch);
1510                           }
1511                         had_char_class = true;
1512                       }
1513                     else
1514                       {
1515                         c1++;
1516                         while (c1--)
1517                           PATUNFETCH;
1518                         SET_LIST_BIT ('[');
1519                         SET_LIST_BIT (':');
1520                         had_char_class = false;
1521                       }
1522                   }
1523                 else
1524                   {
1525                     had_char_class = false;
1526                     SET_LIST_BIT (c);
1527                   }
1528               }
1529 
1530             /* Discard any (non)matching list bytes that are all 0 at the
1531                end of the map.  Decrease the map-length byte too.  */
1532             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1533               b[-1]--;
1534             b += b[-1];
1535           }
1536           break;
1537 
1538 
1539 	case '(':
1540           if (syntax & RE_NO_BK_PARENS)
1541             goto handle_open;
1542           else
1543             goto normal_char;
1544 
1545 
1546         case ')':
1547           if (syntax & RE_NO_BK_PARENS)
1548             goto handle_close;
1549           else
1550             goto normal_char;
1551 
1552 
1553         case '\n':
1554           if (syntax & RE_NEWLINE_ALT)
1555             goto handle_alt;
1556           else
1557             goto normal_char;
1558 
1559 
1560 	case '|':
1561           if (syntax & RE_NO_BK_VBAR)
1562             goto handle_alt;
1563           else
1564             goto normal_char;
1565 
1566 
1567         case '{':
1568            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1569              goto handle_interval;
1570            else
1571              goto normal_char;
1572 
1573 
1574         case '\\':
1575           if (p == pend) return REG_EESCAPE;
1576 
1577           /* Do not translate the character after the \, so that we can
1578              distinguish, e.g., \B from \b, even if we normally would
1579              translate, e.g., B to b.  */
1580           PATFETCH_RAW (c);
1581 
1582           switch (c)
1583             {
1584             case '(':
1585               if (syntax & RE_NO_BK_PARENS)
1586                 goto normal_backslash;
1587 
1588             handle_open:
1589               bufp->re_nsub++;
1590               regnum++;
1591 
1592               if (COMPILE_STACK_FULL)
1593                 {
1594                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1595                             compile_stack_elt_t);
1596                   if (compile_stack.stack == NULL) return REG_ESPACE;
1597 
1598                   compile_stack.size <<= 1;
1599                 }
1600 
1601               /* These are the values to restore when we hit end of this
1602                  group.  They are all relative offsets, so that if the
1603                  whole pattern moves because of realloc, they will still
1604                  be valid.  */
1605               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1606               COMPILE_STACK_TOP.fixup_alt_jump
1607                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1608               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1609               COMPILE_STACK_TOP.regnum = regnum;
1610 
1611               /* We will eventually replace the 0 with the number of
1612                  groups inner to this one.  But do not push a
1613                  start_memory for groups beyond the last one we can
1614                  represent in the compiled pattern.  */
1615               if (regnum <= MAX_REGNUM)
1616                 {
1617                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1618                   BUF_PUSH_3 (start_memory, regnum, 0);
1619                 }
1620 
1621               compile_stack.avail++;
1622 
1623               fixup_alt_jump = 0;
1624               laststart = 0;
1625               begalt = b;
1626 	      /* If we've reached MAX_REGNUM groups, then this open
1627 		 won't actually generate any code, so we'll have to
1628 		 clear pending_exact explicitly.  */
1629 	      pending_exact = 0;
1630               break;
1631 
1632 
1633             case ')':
1634               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1635 
1636               if (COMPILE_STACK_EMPTY)
1637                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1638                   goto normal_backslash;
1639                 else
1640                   return REG_ERPAREN;
1641 
1642             handle_close:
1643               if (fixup_alt_jump)
1644                 { /* Push a dummy failure point at the end of the
1645                      alternative for a possible future
1646                      `pop_failure_jump' to pop.  See comments at
1647                      `push_dummy_failure' in `re_match_2'.  */
1648                   BUF_PUSH (push_dummy_failure);
1649 
1650                   /* We allocated space for this jump when we assigned
1651                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1652                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1653                 }
1654 
1655               /* See similar code for backslashed left paren above.  */
1656               if (COMPILE_STACK_EMPTY)
1657                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1658                   goto normal_char;
1659                 else
1660                   return REG_ERPAREN;
1661 
1662               /* Since we just checked for an empty stack above, this
1663                  ``can't happen''.  */
1664               assert (compile_stack.avail != 0);
1665               {
1666                 /* We don't just want to restore into `regnum', because
1667                    later groups should continue to be numbered higher,
1668                    as in `(ab)c(de)' -- the second group is #2.  */
1669                 regnum_t this_group_regnum;
1670 
1671                 compile_stack.avail--;
1672                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1673                 fixup_alt_jump
1674                   = COMPILE_STACK_TOP.fixup_alt_jump
1675                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1676                     : 0;
1677                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1678                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1679 		/* If we've reached MAX_REGNUM groups, then this open
1680 		   won't actually generate any code, so we'll have to
1681 		   clear pending_exact explicitly.  */
1682 		pending_exact = 0;
1683 
1684                 /* We're at the end of the group, so now we know how many
1685                    groups were inside this one.  */
1686                 if (this_group_regnum <= MAX_REGNUM)
1687                   {
1688                     unsigned char *inner_group_loc
1689                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1690 
1691                     *inner_group_loc = regnum - this_group_regnum;
1692                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1693                                 regnum - this_group_regnum);
1694                   }
1695               }
1696               break;
1697 
1698 
1699             case '|':					/* `\|'.  */
1700               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1701                 goto normal_backslash;
1702             handle_alt:
1703               if (syntax & RE_LIMITED_OPS)
1704                 goto normal_char;
1705 
1706               /* Insert before the previous alternative a jump which
1707                  jumps to this alternative if the former fails.  */
1708               GET_BUFFER_SPACE (3);
1709               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1710               pending_exact = 0;
1711               b += 3;
1712 
1713               /* The alternative before this one has a jump after it
1714                  which gets executed if it gets matched.  Adjust that
1715                  jump so it will jump to this alternative's analogous
1716                  jump (put in below, which in turn will jump to the next
1717                  (if any) alternative's such jump, etc.).  The last such
1718                  jump jumps to the correct final destination.  A picture:
1719                           _____ _____
1720                           |   | |   |
1721                           |   v |   v
1722                          a | b   | c
1723 
1724                  If we are at `b', then fixup_alt_jump right now points to a
1725                  three-byte space after `a'.  We'll put in the jump, set
1726                  fixup_alt_jump to right after `b', and leave behind three
1727                  bytes which we'll fill in when we get to after `c'.  */
1728 
1729               if (fixup_alt_jump)
1730                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1731 
1732               /* Mark and leave space for a jump after this alternative,
1733                  to be filled in later either by next alternative or
1734                  when know we're at the end of a series of alternatives.  */
1735               fixup_alt_jump = b;
1736               GET_BUFFER_SPACE (3);
1737               b += 3;
1738 
1739               laststart = 0;
1740               begalt = b;
1741               break;
1742 
1743 
1744             case '{':
1745               /* If \{ is a literal.  */
1746               if (!(syntax & RE_INTERVALS)
1747                      /* If we're at `\{' and it's not the open-interval
1748                         operator.  */
1749                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1750                   || (p - 2 == pattern  &&  p == pend))
1751                 goto normal_backslash;
1752 
1753             handle_interval:
1754               {
1755                 /* If got here, then the syntax allows intervals.  */
1756 
1757                 /* At least (most) this many matches must be made.  */
1758                 int lower_bound = -1, upper_bound = -1;
1759 
1760                 beg_interval = p - 1;
1761 
1762                 if (p == pend)
1763                   {
1764                     if (syntax & RE_NO_BK_BRACES)
1765                       goto unfetch_interval;
1766                     else
1767                       return REG_EBRACE;
1768                   }
1769 
1770                 GET_UNSIGNED_NUMBER (lower_bound);
1771 
1772                 if (c == ',')
1773                   {
1774                     GET_UNSIGNED_NUMBER (upper_bound);
1775                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1776                   }
1777                 else
1778                   /* Interval such as `{1}' => match exactly once. */
1779                   upper_bound = lower_bound;
1780 
1781                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1782                     || lower_bound > upper_bound)
1783                   {
1784                     if (syntax & RE_NO_BK_BRACES)
1785                       goto unfetch_interval;
1786                     else
1787                       return REG_BADBR;
1788                   }
1789 
1790                 if (!(syntax & RE_NO_BK_BRACES))
1791                   {
1792                     if (c != '\\') return REG_EBRACE;
1793 
1794                     PATFETCH (c);
1795                   }
1796 
1797                 if (c != '}')
1798                   {
1799                     if (syntax & RE_NO_BK_BRACES)
1800                       goto unfetch_interval;
1801                     else
1802                       return REG_BADBR;
1803                   }
1804 
1805                 /* We just parsed a valid interval.  */
1806 
1807                 /* If it's invalid to have no preceding re.  */
1808                 if (!laststart)
1809                   {
1810                     if (syntax & RE_CONTEXT_INVALID_OPS)
1811                       return REG_BADRPT;
1812                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1813                       laststart = b;
1814                     else
1815                       goto unfetch_interval;
1816                   }
1817 
1818                 /* If the upper bound is zero, don't want to succeed at
1819                    all; jump from `laststart' to `b + 3', which will be
1820                    the end of the buffer after we insert the jump.  */
1821                  if (upper_bound == 0)
1822                    {
1823                      GET_BUFFER_SPACE (3);
1824                      INSERT_JUMP (jump, laststart, b + 3);
1825                      b += 3;
1826                    }
1827 
1828                  /* Otherwise, we have a nontrivial interval.  When
1829                     we're all done, the pattern will look like:
1830                       set_number_at <jump count> <upper bound>
1831                       set_number_at <succeed_n count> <lower bound>
1832                       succeed_n <after jump addr> <succed_n count>
1833                       <body of loop>
1834                       jump_n <succeed_n addr> <jump count>
1835                     (The upper bound and `jump_n' are omitted if
1836                     `upper_bound' is 1, though.)  */
1837                  else
1838                    { /* If the upper bound is > 1, we need to insert
1839                         more at the end of the loop.  */
1840                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1841 
1842                      GET_BUFFER_SPACE (nbytes);
1843 
1844                      /* Initialize lower bound of the `succeed_n', even
1845                         though it will be set during matching by its
1846                         attendant `set_number_at' (inserted next),
1847                         because `re_compile_fastmap' needs to know.
1848                         Jump to the `jump_n' we might insert below.  */
1849                      INSERT_JUMP2 (succeed_n, laststart,
1850                                    b + 5 + (upper_bound > 1) * 5,
1851                                    lower_bound);
1852                      b += 5;
1853 
1854                      /* Code to initialize the lower bound.  Insert
1855                         before the `succeed_n'.  The `5' is the last two
1856                         bytes of this `set_number_at', plus 3 bytes of
1857                         the following `succeed_n'.  */
1858                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1859                      b += 5;
1860 
1861                      if (upper_bound > 1)
1862                        { /* More than one repetition is allowed, so
1863                             append a backward jump to the `succeed_n'
1864                             that starts this interval.
1865 
1866                             When we've reached this during matching,
1867                             we'll have matched the interval once, so
1868                             jump back only `upper_bound - 1' times.  */
1869                          STORE_JUMP2 (jump_n, b, laststart + 5,
1870                                       upper_bound - 1);
1871                          b += 5;
1872 
1873                          /* The location we want to set is the second
1874                             parameter of the `jump_n'; that is `b-2' as
1875                             an absolute address.  `laststart' will be
1876                             the `set_number_at' we're about to insert;
1877                             `laststart+3' the number to set, the source
1878                             for the relative address.  But we are
1879                             inserting into the middle of the pattern --
1880                             so everything is getting moved up by 5.
1881                             Conclusion: (b - 2) - (laststart + 3) + 5,
1882                             i.e., b - laststart.
1883 
1884                             We insert this at the beginning of the loop
1885                             so that if we fail during matching, we'll
1886                             reinitialize the bounds.  */
1887                          insert_op2 (set_number_at, laststart, b - laststart,
1888                                      upper_bound - 1, b);
1889                          b += 5;
1890                        }
1891                    }
1892                 pending_exact = 0;
1893                 beg_interval = NULL;
1894               }
1895               break;
1896 
1897             unfetch_interval:
1898               /* If an invalid interval, match the characters as literals.  */
1899                assert (beg_interval);
1900                p = beg_interval;
1901                beg_interval = NULL;
1902 
1903                /* normal_char and normal_backslash need `c'.  */
1904                PATFETCH (c);
1905 
1906                if (!(syntax & RE_NO_BK_BRACES))
1907                  {
1908                    if (p > pattern  &&  p[-1] == '\\')
1909                      goto normal_backslash;
1910                  }
1911                goto normal_char;
1912 
1913 #ifdef emacs
1914             /* There is no way to specify the before_dot and after_dot
1915                operators.  rms says this is ok.  --karl  */
1916             case '=':
1917               BUF_PUSH (at_dot);
1918               break;
1919 
1920             case 's':
1921               laststart = b;
1922               PATFETCH (c);
1923               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1924               break;
1925 
1926             case 'S':
1927               laststart = b;
1928               PATFETCH (c);
1929               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1930               break;
1931 #endif /* emacs */
1932 
1933 
1934             case 'w':
1935               laststart = b;
1936               BUF_PUSH (wordchar);
1937               break;
1938 
1939 
1940             case 'W':
1941               laststart = b;
1942               BUF_PUSH (notwordchar);
1943               break;
1944 
1945 
1946             case '<':
1947               BUF_PUSH (wordbeg);
1948               break;
1949 
1950             case '>':
1951               BUF_PUSH (wordend);
1952               break;
1953 
1954             case 'b':
1955               BUF_PUSH (wordbound);
1956               break;
1957 
1958             case 'B':
1959               BUF_PUSH (notwordbound);
1960               break;
1961 
1962             case '`':
1963               BUF_PUSH (begbuf);
1964               break;
1965 
1966             case '\'':
1967               BUF_PUSH (endbuf);
1968               break;
1969 
1970             case '1': case '2': case '3': case '4': case '5':
1971             case '6': case '7': case '8': case '9':
1972               if (syntax & RE_NO_BK_REFS)
1973                 goto normal_char;
1974 
1975               c1 = c - '0';
1976 
1977               if (c1 > regnum)
1978                 return REG_ESUBREG;
1979 
1980               /* Can't back reference to a subexpression if inside of it.  */
1981               if (group_in_compile_stack (compile_stack, c1))
1982                 goto normal_char;
1983 
1984               laststart = b;
1985               BUF_PUSH_2 (duplicate, c1);
1986               break;
1987 
1988 
1989             case '+':
1990             case '?':
1991               if (syntax & RE_BK_PLUS_QM)
1992                 goto handle_plus;
1993               else
1994                 goto normal_backslash;
1995 
1996             default:
1997             normal_backslash:
1998               /* You might think it would be useful for \ to mean
1999                  not to translate; but if we don't translate it
2000                  it will never match anything.  */
2001               c = TRANSLATE (c);
2002               goto normal_char;
2003             }
2004           break;
2005 
2006 
2007 	default:
2008         /* Expects the character in `c'.  */
2009 	normal_char:
2010 	      /* If no exactn currently being built.  */
2011           if (!pending_exact
2012 
2013               /* If last exactn not at current position.  */
2014               || pending_exact + *pending_exact + 1 != b
2015 
2016               /* We have only one byte following the exactn for the count.  */
2017 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2018 
2019               /* If followed by a repetition operator.  */
2020               || *p == '*' || *p == '^'
2021 	      || ((syntax & RE_BK_PLUS_QM)
2022 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2023 		  : (*p == '+' || *p == '?'))
2024 	      || ((syntax & RE_INTERVALS)
2025                   && ((syntax & RE_NO_BK_BRACES)
2026 		      ? *p == '{'
2027                       : (p[0] == '\\' && p[1] == '{'))))
2028 	    {
2029 	      /* Start building a new exactn.  */
2030 
2031               laststart = b;
2032 
2033 	      BUF_PUSH_2 (exactn, 0);
2034 	      pending_exact = b - 1;
2035             }
2036 
2037 	  BUF_PUSH (c);
2038           (*pending_exact)++;
2039 	  break;
2040         } /* switch (c) */
2041     } /* while p != pend */
2042 
2043 
2044   /* Through the pattern now.  */
2045 
2046   if (fixup_alt_jump)
2047     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2048 
2049   if (!COMPILE_STACK_EMPTY)
2050     return REG_EPAREN;
2051 
2052   free (compile_stack.stack);
2053 
2054   /* We have succeeded; set the length of the buffer.  */
2055   bufp->used = b - bufp->buffer;
2056 
2057 #ifdef DEBUG
2058   if (debug)
2059     {
2060       DEBUG_PRINT1 ("\nCompiled pattern: ");
2061       print_compiled_pattern (bufp);
2062     }
2063 #endif /* DEBUG */
2064 
2065   return REG_NOERROR;
2066 } /* regex_compile */
2067 
2068 /* Subroutines for `regex_compile'.  */
2069 
2070 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2071 
2072 static void
store_op1(op,loc,arg)2073 store_op1 (op, loc, arg)
2074     re_opcode_t op;
2075     unsigned char *loc;
2076     int arg;
2077 {
2078   *loc = (unsigned char) op;
2079   STORE_NUMBER (loc + 1, arg);
2080 }
2081 
2082 
2083 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2084 
2085 static void
store_op2(op,loc,arg1,arg2)2086 store_op2 (op, loc, arg1, arg2)
2087     re_opcode_t op;
2088     unsigned char *loc;
2089     int arg1, arg2;
2090 {
2091   *loc = (unsigned char) op;
2092   STORE_NUMBER (loc + 1, arg1);
2093   STORE_NUMBER (loc + 3, arg2);
2094 }
2095 
2096 
2097 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2098    for OP followed by two-byte integer parameter ARG.  */
2099 
2100 static void
insert_op1(op,loc,arg,end)2101 insert_op1 (op, loc, arg, end)
2102     re_opcode_t op;
2103     unsigned char *loc;
2104     int arg;
2105     unsigned char *end;
2106 {
2107   register unsigned char *pfrom = end;
2108   register unsigned char *pto = end + 3;
2109 
2110   while (pfrom != loc)
2111     *--pto = *--pfrom;
2112 
2113   store_op1 (op, loc, arg);
2114 }
2115 
2116 
2117 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2118 
2119 static void
insert_op2(op,loc,arg1,arg2,end)2120 insert_op2 (op, loc, arg1, arg2, end)
2121     re_opcode_t op;
2122     unsigned char *loc;
2123     int arg1, arg2;
2124     unsigned char *end;
2125 {
2126   register unsigned char *pfrom = end;
2127   register unsigned char *pto = end + 5;
2128 
2129   while (pfrom != loc)
2130     *--pto = *--pfrom;
2131 
2132   store_op2 (op, loc, arg1, arg2);
2133 }
2134 
2135 
2136 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2137    after an alternative or a begin-subexpression.  We assume there is at
2138    least one character before the ^.  */
2139 
2140 static boolean
at_begline_loc_p(pattern,p,syntax)2141 at_begline_loc_p (pattern, p, syntax)
2142     const char *pattern, *p;
2143     reg_syntax_t syntax;
2144 {
2145   const char *prev = p - 2;
2146   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2147 
2148   return
2149        /* After a subexpression?  */
2150        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2151        /* After an alternative?  */
2152     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2153 }
2154 
2155 
2156 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2157    at least one character after the $, i.e., `P < PEND'.  */
2158 
2159 static boolean
at_endline_loc_p(p,pend,syntax)2160 at_endline_loc_p (p, pend, syntax)
2161     const char *p, *pend;
2162     int syntax;
2163 {
2164   const char *next = p;
2165   boolean next_backslash = *next == '\\';
2166   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2167 
2168   return
2169        /* Before a subexpression?  */
2170        (syntax & RE_NO_BK_PARENS ? *next == ')'
2171         : next_backslash && next_next && *next_next == ')')
2172        /* Before an alternative?  */
2173     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2174         : next_backslash && next_next && *next_next == '|');
2175 }
2176 
2177 
2178 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2179    false if it's not.  */
2180 
2181 static boolean
group_in_compile_stack(compile_stack,regnum)2182 group_in_compile_stack (compile_stack, regnum)
2183     compile_stack_type compile_stack;
2184     regnum_t regnum;
2185 {
2186   int this_element;
2187 
2188   for (this_element = compile_stack.avail - 1;
2189        this_element >= 0;
2190        this_element--)
2191     if (compile_stack.stack[this_element].regnum == regnum)
2192       return true;
2193 
2194   return false;
2195 }
2196 
2197 
2198 /* Read the ending character of a range (in a bracket expression) from the
2199    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2200    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2201    Then we set the translation of all bits between the starting and
2202    ending characters (inclusive) in the compiled pattern B.
2203 
2204    Return an error code.
2205 
2206    We use these short variable names so we can use the same macros as
2207    `regex_compile' itself.  */
2208 
2209 static reg_errcode_t
compile_range(p_ptr,pend,translate,syntax,b)2210 compile_range (p_ptr, pend, translate, syntax, b)
2211     const char **p_ptr, *pend;
2212     char *translate;
2213     reg_syntax_t syntax;
2214     unsigned char *b;
2215 {
2216   unsigned this_char;
2217 
2218   const char *p = *p_ptr;
2219   int range_start, range_end;
2220 
2221   if (p == pend)
2222     return REG_ERANGE;
2223 
2224   /* Even though the pattern is a signed `char *', we need to fetch
2225      with unsigned char *'s; if the high bit of the pattern character
2226      is set, the range endpoints will be negative if we fetch using a
2227      signed char *.
2228 
2229      We also want to fetch the endpoints without translating them; the
2230      appropriate translation is done in the bit-setting loop below.  */
2231   range_start = ((unsigned char *) p)[-2];
2232   range_end   = ((unsigned char *) p)[0];
2233 
2234   /* Have to increment the pointer into the pattern string, so the
2235      caller isn't still at the ending character.  */
2236   (*p_ptr)++;
2237 
2238   /* If the start is after the end, the range is empty.  */
2239   if (range_start > range_end)
2240     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2241 
2242   /* Here we see why `this_char' has to be larger than an `unsigned
2243      char' -- the range is inclusive, so if `range_end' == 0xff
2244      (assuming 8-bit characters), we would otherwise go into an infinite
2245      loop, since all characters <= 0xff.  */
2246   for (this_char = range_start; this_char <= range_end; this_char++)
2247     {
2248       SET_LIST_BIT (TRANSLATE (this_char));
2249     }
2250 
2251   return REG_NOERROR;
2252 }
2253 
2254 /* Failure stack declarations and macros; both re_compile_fastmap and
2255    re_match_2 use a failure stack.  These have to be macros because of
2256    REGEX_ALLOCATE.  */
2257 
2258 
2259 /* Number of failure points for which to initially allocate space
2260    when matching.  If this number is exceeded, we allocate more
2261    space, so it is not a hard limit.  */
2262 #ifndef INIT_FAILURE_ALLOC
2263 #define INIT_FAILURE_ALLOC 5
2264 #endif
2265 
2266 /* Roughly the maximum number of failure points on the stack.  Would be
2267    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2268    This is a variable only so users of regex can assign to it; we never
2269    change it ourselves.  */
2270 
2271 /* change for rh glibc re_max_failures symbol collision - jpr5 */
2272 #define RE_MAX_FAILURES 2000
2273 
2274 typedef const unsigned char *fail_stack_elt_t;
2275 
2276 typedef struct
2277 {
2278   fail_stack_elt_t *stack;
2279   unsigned long size;
2280   unsigned long avail;			/* Offset of next open position.  */
2281 } fail_stack_type;
2282 
2283 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2284 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2285 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2286 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2287 
2288 
2289 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2290 
2291 #define INIT_FAIL_STACK()						\
2292   do {									\
2293     fail_stack.stack = (fail_stack_elt_t *)				\
2294       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2295 									\
2296     if (fail_stack.stack == NULL)					\
2297       return -2;							\
2298 									\
2299     fail_stack.size = INIT_FAILURE_ALLOC;				\
2300     fail_stack.avail = 0;						\
2301   } while (0)
2302 
2303 
2304 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2305 
2306    Return 1 if succeeds, and 0 if either ran out of memory
2307    allocating space for it or it was already too large.
2308 
2309    REGEX_REALLOCATE requires `destination' be declared.   */
2310 
2311 /* re_max_failures -> #define RE_MAX_FAILURES */
2312 #define DOUBLE_FAIL_STACK(fail_stack)					\
2313   ((fail_stack).size > RE_MAX_FAILURES * MAX_FAILURE_ITEMS		\
2314    ? 0									\
2315    : ((fail_stack).stack = (fail_stack_elt_t *)				\
2316         REGEX_REALLOCATE ((fail_stack).stack, 				\
2317           (fail_stack).size * sizeof (fail_stack_elt_t),		\
2318           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2319 									\
2320       (fail_stack).stack == NULL					\
2321       ? 0								\
2322       : ((fail_stack).size <<= 1, 					\
2323          1)))
2324 
2325 
2326 /* Push PATTERN_OP on FAIL_STACK.
2327 
2328    Return 1 if was able to do so and 0 if ran out of memory allocating
2329    space to do so.  */
2330 #define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
2331   ((FAIL_STACK_FULL ()							\
2332     && !DOUBLE_FAIL_STACK (fail_stack))					\
2333     ? 0									\
2334     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
2335        1))
2336 
2337 /* This pushes an item onto the failure stack.  Must be a four-byte
2338    value.  Assumes the variable `fail_stack'.  Probably should only
2339    be called from within `PUSH_FAILURE_POINT'.  */
2340 #define PUSH_FAILURE_ITEM(item)						\
2341   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2342 
2343 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2344 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2345 
2346 /* Used to omit pushing failure point id's when we're not debugging.  */
2347 #ifdef DEBUG
2348 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2349 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2350 #else
2351 #define DEBUG_PUSH(item)
2352 #define DEBUG_POP(item_addr)
2353 #endif
2354 
2355 
2356 /* Push the information about the state we will need
2357    if we ever fail back to it.
2358 
2359    Requires variables fail_stack, regstart, regend, reg_info, and
2360    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2361    declared.
2362 
2363    Does `return FAILURE_CODE' if runs out of memory.  */
2364 
2365 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2366   do {									\
2367     char *destination;							\
2368     /* Must be int, so when we don't save any registers, the arithmetic	\
2369        of 0 + -1 isn't done as unsigned.  */				\
2370     int this_reg;							\
2371     									\
2372     DEBUG_STATEMENT (failure_id++);					\
2373     DEBUG_STATEMENT (nfailure_points_pushed++);				\
2374     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2375     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2376     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2377 									\
2378     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2379     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2380 									\
2381     /* Ensure we have enough space allocated for what we will push.  */	\
2382     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2383       {									\
2384         if (!DOUBLE_FAIL_STACK (fail_stack))			\
2385           return failure_code;						\
2386 									\
2387         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2388 		       (fail_stack).size);				\
2389         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2390       }									\
2391 									\
2392     /* Push the info, starting with the registers.  */			\
2393     DEBUG_PRINT1 ("\n");						\
2394 									\
2395     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
2396          this_reg++)							\
2397       {									\
2398 	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
2399         DEBUG_STATEMENT (num_regs_pushed++);				\
2400 									\
2401 	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
2402         PUSH_FAILURE_ITEM (regstart[this_reg]);				\
2403                                                                         \
2404 	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2405         PUSH_FAILURE_ITEM (regend[this_reg]);				\
2406 									\
2407 	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2408         DEBUG_PRINT2 (" match_null=%d",					\
2409                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2410         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2411         DEBUG_PRINT2 (" matched_something=%d",				\
2412                       MATCHED_SOMETHING (reg_info[this_reg]));		\
2413         DEBUG_PRINT2 (" ever_matched=%d",				\
2414                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2415 	DEBUG_PRINT1 ("\n");						\
2416         PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
2417       }									\
2418 									\
2419     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2420     PUSH_FAILURE_ITEM (lowest_active_reg);				\
2421 									\
2422     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2423     PUSH_FAILURE_ITEM (highest_active_reg);				\
2424 									\
2425     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2426     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2427     PUSH_FAILURE_ITEM (pattern_place);					\
2428 									\
2429     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2430     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2431 				 size2);				\
2432     DEBUG_PRINT1 ("'\n");						\
2433     PUSH_FAILURE_ITEM (string_place);					\
2434 									\
2435     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2436     DEBUG_PUSH (failure_id);						\
2437   } while (0)
2438 
2439 /* This is the number of items that are pushed and popped on the stack
2440    for each register.  */
2441 #define NUM_REG_ITEMS  3
2442 
2443 /* Individual items aside from the registers.  */
2444 #ifdef DEBUG
2445 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2446 #else
2447 #define NUM_NONREG_ITEMS 4
2448 #endif
2449 
2450 /* We push at most this many items on the stack.  */
2451 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2452 
2453 /* We actually push this many items.  */
2454 #define NUM_FAILURE_ITEMS						\
2455   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2456     + NUM_NONREG_ITEMS)
2457 
2458 /* How many items can still be added to the stack without overflowing it.  */
2459 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2460 
2461 
2462 /* Pops what PUSH_FAIL_STACK pushes.
2463 
2464    We restore into the parameters, all of which should be lvalues:
2465      STR -- the saved data position.
2466      PAT -- the saved pattern position.
2467      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2468      REGSTART, REGEND -- arrays of string positions.
2469      REG_INFO -- array of information about each subexpression.
2470 
2471    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2472    `pend', `string1', `size1', `string2', and `size2'.  */
2473 
2474 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2475 {									\
2476   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2477   int this_reg;								\
2478   const unsigned char *string_temp;					\
2479 									\
2480   assert (!FAIL_STACK_EMPTY ());					\
2481 									\
2482   /* Remove failure points and point to how many regs pushed.  */	\
2483   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2484   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2485   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2486 									\
2487   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2488 									\
2489   DEBUG_POP (&failure_id);						\
2490   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2491 									\
2492   /* If the saved string location is NULL, it came from an		\
2493      on_failure_keep_string_jump opcode, and we want to throw away the	\
2494      saved NULL, thus retaining our current position in the string.  */	\
2495   string_temp = POP_FAILURE_ITEM ();					\
2496   if (string_temp != NULL)						\
2497     str = (const char *) string_temp;					\
2498 									\
2499   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2500   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2501   DEBUG_PRINT1 ("'\n");							\
2502 									\
2503   pat = (unsigned char *) POP_FAILURE_ITEM ();				\
2504   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2505   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2506 									\
2507   /* Restore register info.  */						\
2508   high_reg = (unsigned long) POP_FAILURE_ITEM ();			\
2509   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2510 									\
2511   low_reg = (unsigned long) POP_FAILURE_ITEM ();			\
2512   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2513 									\
2514   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
2515     {									\
2516       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
2517 									\
2518       reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
2519       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
2520 									\
2521       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2522       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2523 									\
2524       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2525       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
2526     }									\
2527 									\
2528   DEBUG_STATEMENT (nfailure_points_popped++);				\
2529 } /* POP_FAILURE_POINT */
2530 
2531 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2532    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2533    characters can start a string that matches the pattern.  This fastmap
2534    is used by re_search to skip quickly over impossible starting points.
2535 
2536    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2537    area as BUFP->fastmap.
2538 
2539    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2540    the pattern buffer.
2541 
2542    Returns 0 if we succeed, -2 if an internal error.   */
2543 
2544 int
re_compile_fastmap(bufp)2545 re_compile_fastmap (bufp)
2546      struct re_pattern_buffer *bufp;
2547 {
2548   int j, k;
2549   fail_stack_type fail_stack;
2550 #ifndef REGEX_MALLOC
2551   char *destination;
2552 #endif
2553   /* We don't push any register information onto the failure stack.  */
2554   unsigned num_regs = 0;
2555 
2556   register char *fastmap = bufp->fastmap;
2557   unsigned char *pattern = bufp->buffer;
2558   unsigned long size = bufp->used;
2559   const unsigned char *p = pattern;
2560   register unsigned char *pend = pattern + size;
2561 
2562   /* Assume that each path through the pattern can be null until
2563      proven otherwise.  We set this false at the bottom of switch
2564      statement, to which we get only if a particular path doesn't
2565      match the empty string.  */
2566   boolean path_can_be_null = true;
2567 
2568   /* We aren't doing a `succeed_n' to begin with.  */
2569   boolean succeed_n_p = false;
2570 
2571   assert (fastmap != NULL && p != NULL);
2572 
2573   INIT_FAIL_STACK ();
2574   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2575   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2576   bufp->can_be_null = 0;
2577 
2578   while (p != pend || !FAIL_STACK_EMPTY ())
2579     {
2580       if (p == pend)
2581         {
2582           bufp->can_be_null |= path_can_be_null;
2583 
2584           /* Reset for next path.  */
2585           path_can_be_null = true;
2586 
2587           p = fail_stack.stack[--fail_stack.avail];
2588 	}
2589 
2590       /* We should never be about to go beyond the end of the pattern.  */
2591       assert (p < pend);
2592 
2593 #ifdef SWITCH_ENUM_BUG
2594       switch ((int) ((re_opcode_t) *p++))
2595 #else
2596       switch ((re_opcode_t) *p++)
2597 #endif
2598 	{
2599 
2600         /* I guess the idea here is to simply not bother with a fastmap
2601            if a backreference is used, since it's too hard to figure out
2602            the fastmap for the corresponding group.  Setting
2603            `can_be_null' stops `re_search_2' from using the fastmap, so
2604            that is all we do.  */
2605 	case duplicate:
2606 	  bufp->can_be_null = 1;
2607           return 0;
2608 
2609 
2610       /* Following are the cases which match a character.  These end
2611          with `break'.  */
2612 
2613 	case exactn:
2614           fastmap[p[1]] = 1;
2615 	  break;
2616 
2617 
2618         case charset:
2619           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2620 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2621               fastmap[j] = 1;
2622 	  break;
2623 
2624 
2625 	case charset_not:
2626 	  /* Chars beyond end of map must be allowed.  */
2627 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2628             fastmap[j] = 1;
2629 
2630 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2631 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2632               fastmap[j] = 1;
2633           break;
2634 
2635 
2636 	case wordchar:
2637 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2638 	    if (SYNTAX (j) == Sword)
2639 	      fastmap[j] = 1;
2640 	  break;
2641 
2642 
2643 	case notwordchar:
2644 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2645 	    if (SYNTAX (j) != Sword)
2646 	      fastmap[j] = 1;
2647 	  break;
2648 
2649 
2650         case anychar:
2651           /* `.' matches anything ...  */
2652 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2653             fastmap[j] = 1;
2654 
2655           /* ... except perhaps newline.  */
2656           if (!(bufp->syntax & RE_DOT_NEWLINE))
2657             fastmap['\n'] = 0;
2658 
2659           /* Return if we have already set `can_be_null'; if we have,
2660              then the fastmap is irrelevant.  Something's wrong here.  */
2661 	  else if (bufp->can_be_null)
2662 	    return 0;
2663 
2664           /* Otherwise, have to check alternative paths.  */
2665 	  break;
2666 
2667 
2668 #ifdef emacs
2669         case syntaxspec:
2670 	  k = *p++;
2671 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2672 	    if (SYNTAX (j) == (enum syntaxcode) k)
2673 	      fastmap[j] = 1;
2674 	  break;
2675 
2676 
2677 	case notsyntaxspec:
2678 	  k = *p++;
2679 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2680 	    if (SYNTAX (j) != (enum syntaxcode) k)
2681 	      fastmap[j] = 1;
2682 	  break;
2683 
2684 
2685       /* All cases after this match the empty string.  These end with
2686          `continue'.  */
2687 
2688 
2689 	case before_dot:
2690 	case at_dot:
2691 	case after_dot:
2692           continue;
2693 #endif /* not emacs */
2694 
2695 
2696         case no_op:
2697         case begline:
2698         case endline:
2699 	case begbuf:
2700 	case endbuf:
2701 	case wordbound:
2702 	case notwordbound:
2703 	case wordbeg:
2704 	case wordend:
2705         case push_dummy_failure:
2706           continue;
2707 
2708 
2709 	case jump_n:
2710         case pop_failure_jump:
2711 	case maybe_pop_jump:
2712 	case jump:
2713         case jump_past_alt:
2714 	case dummy_failure_jump:
2715           EXTRACT_NUMBER_AND_INCR (j, p);
2716 	  p += j;
2717 	  if (j > 0)
2718 	    continue;
2719 
2720           /* Jump backward implies we just went through the body of a
2721              loop and matched nothing.  Opcode jumped to should be
2722              `on_failure_jump' or `succeed_n'.  Just treat it like an
2723              ordinary jump.  For a * loop, it has pushed its failure
2724              point already; if so, discard that as redundant.  */
2725           if ((re_opcode_t) *p != on_failure_jump
2726 	      && (re_opcode_t) *p != succeed_n)
2727 	    continue;
2728 
2729           p++;
2730           EXTRACT_NUMBER_AND_INCR (j, p);
2731           p += j;
2732 
2733           /* If what's on the stack is where we are now, pop it.  */
2734           if (!FAIL_STACK_EMPTY ()
2735 	      && fail_stack.stack[fail_stack.avail - 1] == p)
2736             fail_stack.avail--;
2737 
2738           continue;
2739 
2740 
2741         case on_failure_jump:
2742         case on_failure_keep_string_jump:
2743 	handle_on_failure_jump:
2744           EXTRACT_NUMBER_AND_INCR (j, p);
2745 
2746           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2747              end of the pattern.  We don't want to push such a point,
2748              since when we restore it above, entering the switch will
2749              increment `p' past the end of the pattern.  We don't need
2750              to push such a point since we obviously won't find any more
2751              fastmap entries beyond `pend'.  Such a pattern can match
2752              the null string, though.  */
2753           if (p + j < pend)
2754             {
2755               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2756                 return -2;
2757             }
2758           else
2759             bufp->can_be_null = 1;
2760 
2761           if (succeed_n_p)
2762             {
2763               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2764               succeed_n_p = false;
2765 	    }
2766 
2767           continue;
2768 
2769 
2770 	case succeed_n:
2771           /* Get to the number of times to succeed.  */
2772           p += 2;
2773 
2774           /* Increment p past the n for when k != 0.  */
2775           EXTRACT_NUMBER_AND_INCR (k, p);
2776           if (k == 0)
2777 	    {
2778               p -= 4;
2779   	      succeed_n_p = true;  /* Spaghetti code alert.  */
2780               goto handle_on_failure_jump;
2781             }
2782           continue;
2783 
2784 
2785 	case set_number_at:
2786           p += 4;
2787           continue;
2788 
2789 
2790 	case start_memory:
2791         case stop_memory:
2792 	  p += 2;
2793 	  continue;
2794 
2795 
2796 	default:
2797           abort (); /* We have listed all the cases.  */
2798         } /* switch *p++ */
2799 
2800       /* Getting here means we have found the possible starting
2801          characters for one path of the pattern -- and that the empty
2802          string does not match.  We need not follow this path further.
2803          Instead, look at the next alternative (remembered on the
2804          stack), or quit if no more.  The test at the top of the loop
2805          does these things.  */
2806       path_can_be_null = false;
2807       p = pend;
2808     } /* while p */
2809 
2810   /* Set `can_be_null' for the last path (also the first path, if the
2811      pattern is empty).  */
2812   bufp->can_be_null |= path_can_be_null;
2813   return 0;
2814 } /* re_compile_fastmap */
2815 
2816 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2817    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2818    this memory for recording register information.  STARTS and ENDS
2819    must be allocated using the malloc library routine, and must each
2820    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2821 
2822    If NUM_REGS == 0, then subsequent matches should allocate their own
2823    register data.
2824 
2825    Unless this function is called, the first search or match using
2826    PATTERN_BUFFER will allocate its own register data, without
2827    freeing the old data.  */
2828 
2829 void
re_set_registers(bufp,regs,num_regs,starts,ends)2830 re_set_registers (bufp, regs, num_regs, starts, ends)
2831     struct re_pattern_buffer *bufp;
2832     struct re_registers *regs;
2833     unsigned num_regs;
2834     regoff_t *starts, *ends;
2835 {
2836   if (num_regs)
2837     {
2838       bufp->regs_allocated = REGS_REALLOCATE;
2839       regs->num_regs = num_regs;
2840       regs->start = starts;
2841       regs->end = ends;
2842     }
2843   else
2844     {
2845       bufp->regs_allocated = REGS_UNALLOCATED;
2846       regs->num_regs = 0;
2847       regs->start = regs->end = (regoff_t) 0;
2848     }
2849 }
2850 
2851 /* Searching routines.  */
2852 
2853 /* Like re_search_2, below, but only one string is specified, and
2854    doesn't let you say where to stop matching. */
2855 
2856 int
re_search(bufp,string,size,startpos,range,regs)2857 re_search (bufp, string, size, startpos, range, regs)
2858      struct re_pattern_buffer *bufp;
2859      const char *string;
2860      int size, startpos, range;
2861      struct re_registers *regs;
2862 {
2863   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2864 		      regs, size);
2865 }
2866 
2867 
2868 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2869    virtual concatenation of STRING1 and STRING2, starting first at index
2870    STARTPOS, then at STARTPOS + 1, and so on.
2871 
2872    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2873 
2874    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2875    only at STARTPOS; in general, the last start tried is STARTPOS +
2876    RANGE.
2877 
2878    In REGS, return the indices of the virtual concatenation of STRING1
2879    and STRING2 that matched the entire BUFP->buffer and its contained
2880    subexpressions.
2881 
2882    Do not consider matching one past the index STOP in the virtual
2883    concatenation of STRING1 and STRING2.
2884 
2885    We return either the position in the strings at which the match was
2886    found, -1 if no match, or -2 if error (such as failure
2887    stack overflow).  */
2888 
2889 int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)2890 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2891      struct re_pattern_buffer *bufp;
2892      const char *string1, *string2;
2893      int size1, size2;
2894      int startpos;
2895      int range;
2896      struct re_registers *regs;
2897      int stop;
2898 {
2899   int val;
2900   register char *fastmap = bufp->fastmap;
2901   register char *translate = bufp->translate;
2902   int total_size = size1 + size2;
2903   int endpos = startpos + range;
2904 
2905   /* Check for out-of-range STARTPOS.  */
2906   if (startpos < 0 || startpos > total_size)
2907     return -1;
2908 
2909   /* Fix up RANGE if it might eventually take us outside
2910      the virtual concatenation of STRING1 and STRING2.  */
2911   if (endpos < -1)
2912     range = -1 - startpos;
2913   else if (endpos > total_size)
2914     range = total_size - startpos;
2915 
2916   /* If the search isn't to be a backwards one, don't waste time in a
2917      search for a pattern that must be anchored.  */
2918   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2919     {
2920       if (startpos > 0)
2921 	return -1;
2922       else
2923 	range = 1;
2924     }
2925 
2926   /* Update the fastmap now if not correct already.  */
2927   if (fastmap && !bufp->fastmap_accurate)
2928     if (re_compile_fastmap (bufp) == -2)
2929       return -2;
2930 
2931   /* Loop through the string, looking for a place to start matching.  */
2932   for (;;)
2933     {
2934       /* If a fastmap is supplied, skip quickly over characters that
2935          cannot be the start of a match.  If the pattern can match the
2936          null string, however, we don't need to skip characters; we want
2937          the first null string.  */
2938       if (fastmap && startpos < total_size && !bufp->can_be_null)
2939 	{
2940 	  if (range > 0)	/* Searching forwards.  */
2941 	    {
2942 	      register const char *d;
2943 	      register int lim = 0;
2944 	      int irange = range;
2945 
2946               if (startpos < size1 && startpos + range >= size1)
2947                 lim = range - (size1 - startpos);
2948 
2949 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2950 
2951               /* Written out as an if-else to avoid testing `translate'
2952                  inside the loop.  */
2953 	      if (translate)
2954                 while (range > lim
2955                        && !fastmap[(unsigned char)
2956 				   translate[(unsigned char) *d++]])
2957                   range--;
2958 	      else
2959                 while (range > lim && !fastmap[(unsigned char) *d++])
2960                   range--;
2961 
2962 	      startpos += irange - range;
2963 	    }
2964 	  else				/* Searching backwards.  */
2965 	    {
2966 	      register char c = (size1 == 0 || startpos >= size1
2967                                  ? string2[startpos - size1]
2968                                  : string1[startpos]);
2969 
2970 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
2971 		goto advance;
2972 	    }
2973 	}
2974 
2975       /* If can't match the null string, and that's all we have left, fail.  */
2976       if (range >= 0 && startpos == total_size && fastmap
2977           && !bufp->can_be_null)
2978 	return -1;
2979 
2980       val = re_match_2 (bufp, string1, size1, string2, size2,
2981 	                startpos, regs, stop);
2982       if (val >= 0)
2983 	return startpos;
2984 
2985       if (val == -2)
2986 	return -2;
2987 
2988     advance:
2989       if (!range)
2990         break;
2991       else if (range > 0)
2992         {
2993           range--;
2994           startpos++;
2995         }
2996       else
2997         {
2998           range++;
2999           startpos--;
3000         }
3001     }
3002   return -1;
3003 } /* re_search_2 */
3004 
3005 /* Declarations and macros for re_match_2.  */
3006 
3007 static int bcmp_translate ();
3008 static boolean alt_match_null_string_p (),
3009                common_op_match_null_string_p (),
3010                group_match_null_string_p ();
3011 
3012 /* Structure for per-register (a.k.a. per-group) information.
3013    This must not be longer than one word, because we push this value
3014    onto the failure stack.  Other register information, such as the
3015    starting and ending positions (which are addresses), and the list of
3016    inner groups (which is a bits list) are maintained in separate
3017    variables.
3018 
3019    We are making a (strictly speaking) nonportable assumption here: that
3020    the compiler will pack our bit fields into something that fits into
3021    the type of `word', i.e., is something that fits into one item on the
3022    failure stack.  */
3023 typedef union
3024 {
3025   fail_stack_elt_t word;
3026   struct
3027   {
3028       /* This field is one if this group can match the empty string,
3029          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3030 #define MATCH_NULL_UNSET_VALUE 3
3031     unsigned match_null_string_p : 2;
3032     unsigned is_active : 1;
3033     unsigned matched_something : 1;
3034     unsigned ever_matched_something : 1;
3035   } bits;
3036 } register_info_type;
3037 
3038 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3039 #define IS_ACTIVE(R)  ((R).bits.is_active)
3040 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3041 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3042 
3043 
3044 /* Call this when have matched a real character; it sets `matched' flags
3045    for the subexpressions which we are currently inside.  Also records
3046    that those subexprs have matched.  */
3047 #define SET_REGS_MATCHED()						\
3048   do									\
3049     {									\
3050       unsigned r;							\
3051       for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3052         {								\
3053           MATCHED_SOMETHING (reg_info[r])				\
3054             = EVER_MATCHED_SOMETHING (reg_info[r])			\
3055             = 1;							\
3056         }								\
3057     }									\
3058   while (0)
3059 
3060 
3061 /* This converts PTR, a pointer into one of the search strings `string1'
3062    and `string2' into an offset from the beginning of that string.  */
3063 #define POINTER_TO_OFFSET(ptr)						\
3064   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3065 
3066 /* Registers are set to a sentinel when they haven't yet matched.  */
3067 #define REG_UNSET_VALUE ((char *) -1)
3068 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3069 
3070 
3071 /* Macros for dealing with the split strings in re_match_2.  */
3072 
3073 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3074 
3075 /* Call before fetching a character with *d.  This switches over to
3076    string2 if necessary.  */
3077 #define PREFETCH()							\
3078   while (d == dend)						    	\
3079     {									\
3080       /* End of string2 => fail.  */					\
3081       if (dend == end_match_2) 						\
3082         goto fail;							\
3083       /* End of string1 => advance to string2.  */ 			\
3084       d = string2;						        \
3085       dend = end_match_2;						\
3086     }
3087 
3088 
3089 /* Test if at very beginning or at very end of the virtual concatenation
3090    of `string1' and `string2'.  If only one string, it's `string2'.  */
3091 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3092 #define AT_STRINGS_END(d) ((d) == end2)
3093 
3094 
3095 /* Test if D points to a character which is word-constituent.  We have
3096    two special cases to check for: if past the end of string1, look at
3097    the first character in string2; and if before the beginning of
3098    string2, look at the last character in string1.  */
3099 #define WORDCHAR_P(d)							\
3100   (SYNTAX ((d) == end1 ? *string2					\
3101            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3102    == Sword)
3103 
3104 /* Test if the character before D and the one at D differ with respect
3105    to being word-constituent.  */
3106 #define AT_WORD_BOUNDARY(d)						\
3107   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3108    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3109 
3110 
3111 /* Free everything we malloc.  */
3112 #ifdef REGEX_MALLOC
3113 #define FREE_VAR(var) if (var) free (var); var = NULL
3114 #define FREE_VARIABLES()						\
3115   do {									\
3116     FREE_VAR (fail_stack.stack);					\
3117     FREE_VAR (regstart);						\
3118     FREE_VAR (regend);							\
3119     FREE_VAR (old_regstart);						\
3120     FREE_VAR (old_regend);						\
3121     FREE_VAR (best_regstart);						\
3122     FREE_VAR (best_regend);						\
3123     FREE_VAR (reg_info);						\
3124     FREE_VAR (reg_dummy);						\
3125     FREE_VAR (reg_info_dummy);						\
3126   } while (0)
3127 #else /* not REGEX_MALLOC */
3128 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3129 #define FREE_VARIABLES() alloca (0)
3130 #endif /* not REGEX_MALLOC */
3131 
3132 
3133 /* These values must meet several constraints.  They must not be valid
3134    register values; since we have a limit of 255 registers (because
3135    we use only one byte in the pattern for the register number), we can
3136    use numbers larger than 255.  They must differ by 1, because of
3137    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3138    be larger than the value for the highest register, so we do not try
3139    to actually save any registers when none are active.  */
3140 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3141 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3142 
3143 /* Matching routines.  */
3144 
3145 #ifndef emacs   /* Emacs never uses this.  */
3146 /* re_match is like re_match_2 except it takes only a single string.  */
3147 
3148 int
re_match(bufp,string,size,pos,regs)3149 re_match (bufp, string, size, pos, regs)
3150      struct re_pattern_buffer *bufp;
3151      const char *string;
3152      int size, pos;
3153      struct re_registers *regs;
3154  {
3155   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3156 }
3157 #endif /* not emacs */
3158 
3159 
3160 /* re_match_2 matches the compiled pattern in BUFP against the
3161    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3162    and SIZE2, respectively).  We start matching at POS, and stop
3163    matching at STOP.
3164 
3165    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3166    store offsets for the substring each group matched in REGS.  See the
3167    documentation for exactly how many groups we fill.
3168 
3169    We return -1 if no match, -2 if an internal error (such as the
3170    failure stack overflowing).  Otherwise, we return the length of the
3171    matched substring.  */
3172 
3173 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)3174 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3175      struct re_pattern_buffer *bufp;
3176      const char *string1, *string2;
3177      int size1, size2;
3178      int pos;
3179      struct re_registers *regs;
3180      int stop;
3181 {
3182   /* General temporaries.  */
3183   int mcnt;
3184   unsigned char *p1;
3185 
3186   /* Just past the end of the corresponding string.  */
3187   const char *end1, *end2;
3188 
3189   /* Pointers into string1 and string2, just past the last characters in
3190      each to consider matching.  */
3191   const char *end_match_1, *end_match_2;
3192 
3193   /* Where we are in the data, and the end of the current string.  */
3194   const char *d, *dend;
3195 
3196   /* Where we are in the pattern, and the end of the pattern.  */
3197   unsigned char *p = bufp->buffer;
3198   register unsigned char *pend = p + bufp->used;
3199 
3200   /* We use this to map every character in the string.  */
3201   char *translate = bufp->translate;
3202 
3203   /* Failure point stack.  Each place that can handle a failure further
3204      down the line pushes a failure point on this stack.  It consists of
3205      restart, regend, and reg_info for all registers corresponding to
3206      the subexpressions we're currently inside, plus the number of such
3207      registers, and, finally, two char *'s.  The first char * is where
3208      to resume scanning the pattern; the second one is where to resume
3209      scanning the strings.  If the latter is zero, the failure point is
3210      a ``dummy''; if a failure happens and the failure point is a dummy,
3211      it gets discarded and the next next one is tried.  */
3212   fail_stack_type fail_stack;
3213 #ifdef DEBUG
3214   static unsigned failure_id = 0;
3215   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3216 #endif
3217 
3218   /* We fill all the registers internally, independent of what we
3219      return, for use in backreferences.  The number here includes
3220      an element for register zero.  */
3221   unsigned num_regs = bufp->re_nsub + 1;
3222 
3223   /* The currently active registers.  */
3224   unsigned long lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3225   unsigned long highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3226 
3227   /* Information on the contents of registers. These are pointers into
3228      the input strings; they record just what was matched (on this
3229      attempt) by a subexpression part of the pattern, that is, the
3230      regnum-th regstart pointer points to where in the pattern we began
3231      matching and the regnum-th regend points to right after where we
3232      stopped matching the regnum-th subexpression.  (The zeroth register
3233      keeps track of what the whole pattern matches.)  */
3234   const char **regstart, **regend;
3235 
3236   /* If a group that's operated upon by a repetition operator fails to
3237      match anything, then the register for its start will need to be
3238      restored because it will have been set to wherever in the string we
3239      are when we last see its open-group operator.  Similarly for a
3240      register's end.  */
3241   const char **old_regstart, **old_regend;
3242 
3243   /* The is_active field of reg_info helps us keep track of which (possibly
3244      nested) subexpressions we are currently in. The matched_something
3245      field of reg_info[reg_num] helps us tell whether or not we have
3246      matched any of the pattern so far this time through the reg_num-th
3247      subexpression.  These two fields get reset each time through any
3248      loop their register is in.  */
3249   register_info_type *reg_info;
3250 
3251   /* The following record the register info as found in the above
3252      variables when we find a match better than any we've seen before.
3253      This happens as we backtrack through the failure points, which in
3254      turn happens only if we have not yet matched the entire string. */
3255   unsigned best_regs_set = false;
3256   const char **best_regstart, **best_regend;
3257 
3258   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3259      allocate space for that if we're not allocating space for anything
3260      else (see below).  Also, we never need info about register 0 for
3261      any of the other register vectors, and it seems rather a kludge to
3262      treat `best_regend' differently than the rest.  So we keep track of
3263      the end of the best match so far in a separate variable.  We
3264      initialize this to NULL so that when we backtrack the first time
3265      and need to test it, it's not garbage.  */
3266   const char *match_end = NULL;
3267 
3268   /* Used when we pop values we don't care about.  */
3269   const char **reg_dummy;
3270   register_info_type *reg_info_dummy;
3271 
3272 #ifdef DEBUG
3273   /* Counts the total number of registers pushed.  */
3274   unsigned num_regs_pushed = 0;
3275 #endif
3276 
3277   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3278 
3279   INIT_FAIL_STACK ();
3280 
3281   /* Do not bother to initialize all the register variables if there are
3282      no groups in the pattern, as it takes a fair amount of time.  If
3283      there are groups, we include space for register 0 (the whole
3284      pattern), even though we never use it, since it simplifies the
3285      array indexing.  We should fix this.  */
3286   if (bufp->re_nsub)
3287     {
3288       regstart = REGEX_TALLOC (num_regs, const char *);
3289       regend = REGEX_TALLOC (num_regs, const char *);
3290       old_regstart = REGEX_TALLOC (num_regs, const char *);
3291       old_regend = REGEX_TALLOC (num_regs, const char *);
3292       best_regstart = REGEX_TALLOC (num_regs, const char *);
3293       best_regend = REGEX_TALLOC (num_regs, const char *);
3294       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3295       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3296       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3297 
3298       if (!(regstart && regend && old_regstart && old_regend && reg_info
3299             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3300         {
3301           FREE_VARIABLES ();
3302           return -2;
3303         }
3304     }
3305 #ifdef REGEX_MALLOC
3306   else
3307     {
3308       /* We must initialize all our variables to NULL, so that
3309          `FREE_VARIABLES' doesn't try to free them.  */
3310       regstart = regend = old_regstart = old_regend = best_regstart
3311         = best_regend = reg_dummy = NULL;
3312       reg_info = reg_info_dummy = (register_info_type *) NULL;
3313     }
3314 #endif /* REGEX_MALLOC */
3315 
3316   /* The starting position is bogus.  */
3317   if (pos < 0 || pos > size1 + size2)
3318     {
3319       FREE_VARIABLES ();
3320       return -1;
3321     }
3322 
3323   /* Initialize subexpression text positions to -1 to mark ones that no
3324      start_memory/stop_memory has been seen for. Also initialize the
3325      register information struct.  */
3326   for (mcnt = 1; mcnt < num_regs; mcnt++)
3327     {
3328       regstart[mcnt] = regend[mcnt]
3329         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3330 
3331       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3332       IS_ACTIVE (reg_info[mcnt]) = 0;
3333       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3334       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3335     }
3336 
3337   /* We move `string1' into `string2' if the latter's empty -- but not if
3338      `string1' is null.  */
3339   if (size2 == 0 && string1 != NULL)
3340     {
3341       string2 = string1;
3342       size2 = size1;
3343       string1 = 0;
3344       size1 = 0;
3345     }
3346   end1 = string1 + size1;
3347   end2 = string2 + size2;
3348 
3349   /* Compute where to stop matching, within the two strings.  */
3350   if (stop <= size1)
3351     {
3352       end_match_1 = string1 + stop;
3353       end_match_2 = string2;
3354     }
3355   else
3356     {
3357       end_match_1 = end1;
3358       end_match_2 = string2 + stop - size1;
3359     }
3360 
3361   /* `p' scans through the pattern as `d' scans through the data.
3362      `dend' is the end of the input string that `d' points within.  `d'
3363      is advanced into the following input string whenever necessary, but
3364      this happens before fetching; therefore, at the beginning of the
3365      loop, `d' can be pointing at the end of a string, but it cannot
3366      equal `string2'.  */
3367   if (size1 > 0 && pos <= size1)
3368     {
3369       d = string1 + pos;
3370       dend = end_match_1;
3371     }
3372   else
3373     {
3374       d = string2 + pos - size1;
3375       dend = end_match_2;
3376     }
3377 
3378   DEBUG_PRINT1 ("The compiled pattern is: ");
3379   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3380   DEBUG_PRINT1 ("The string to match is: `");
3381   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3382   DEBUG_PRINT1 ("'\n");
3383 
3384   /* This loops over pattern commands.  It exits by returning from the
3385      function if the match is complete, or it drops through if the match
3386      fails at this starting point in the input data.  */
3387   for (;;)
3388     {
3389       DEBUG_PRINT2 ("\n0x%x: ", p);
3390 
3391       if (p == pend)
3392 	{ /* End of pattern means we might have succeeded.  */
3393           DEBUG_PRINT1 ("end of pattern ... ");
3394 
3395 	  /* If we haven't matched the entire string, and we want the
3396              longest match, try backtracking.  */
3397           if (d != end_match_2)
3398 	    {
3399               DEBUG_PRINT1 ("backtracking.\n");
3400 
3401               if (!FAIL_STACK_EMPTY ())
3402                 { /* More failure points to try.  */
3403                   boolean same_str_p = (FIRST_STRING_P (match_end)
3404 	        	                == MATCHING_IN_FIRST_STRING);
3405 
3406                   /* If exceeds best match so far, save it.  */
3407                   if (!best_regs_set
3408                       || (same_str_p && d > match_end)
3409                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3410                     {
3411                       best_regs_set = true;
3412                       match_end = d;
3413 
3414                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3415 
3416                       for (mcnt = 1; mcnt < num_regs; mcnt++)
3417                         {
3418                           best_regstart[mcnt] = regstart[mcnt];
3419                           best_regend[mcnt] = regend[mcnt];
3420                         }
3421                     }
3422                   goto fail;
3423                 }
3424 
3425               /* If no failure points, don't restore garbage.  */
3426               else if (best_regs_set)
3427                 {
3428   	        restore_best_regs:
3429                   /* Restore best match.  It may happen that `dend ==
3430                      end_match_1' while the restored d is in string2.
3431                      For example, the pattern `x.*y.*z' against the
3432                      strings `x-' and `y-z-', if the two strings are
3433                      not consecutive in memory.  */
3434                   DEBUG_PRINT1 ("Restoring best registers.\n");
3435 
3436                   d = match_end;
3437                   dend = ((d >= string1 && d <= end1)
3438 		           ? end_match_1 : end_match_2);
3439 
3440 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3441 		    {
3442 		      regstart[mcnt] = best_regstart[mcnt];
3443 		      regend[mcnt] = best_regend[mcnt];
3444 		    }
3445                 }
3446             } /* d != end_match_2 */
3447 
3448           DEBUG_PRINT1 ("Accepting match.\n");
3449 
3450           /* If caller wants register contents data back, do it.  */
3451           if (regs && !bufp->no_sub)
3452 	    {
3453               /* Have the register data arrays been allocated?  */
3454               if (bufp->regs_allocated == REGS_UNALLOCATED)
3455                 { /* No.  So allocate them with malloc.  We need one
3456                      extra element beyond `num_regs' for the `-1' marker
3457                      GNU code uses.  */
3458                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3459                   regs->start = TALLOC (regs->num_regs, regoff_t);
3460                   regs->end = TALLOC (regs->num_regs, regoff_t);
3461                   if (regs->start == NULL || regs->end == NULL)
3462                     return -2;
3463                   bufp->regs_allocated = REGS_REALLOCATE;
3464                 }
3465               else if (bufp->regs_allocated == REGS_REALLOCATE)
3466                 { /* Yes.  If we need more elements than were already
3467                      allocated, reallocate them.  If we need fewer, just
3468                      leave it alone.  */
3469                   if (regs->num_regs < num_regs + 1)
3470                     {
3471                       regs->num_regs = num_regs + 1;
3472                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3473                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3474                       if (regs->start == NULL || regs->end == NULL)
3475                         return -2;
3476                     }
3477                 }
3478               else
3479                 assert (bufp->regs_allocated == REGS_FIXED);
3480 
3481               /* Convert the pointer data in `regstart' and `regend' to
3482                  indices.  Register zero has to be set differently,
3483                  since we haven't kept track of any info for it.  */
3484               if (regs->num_regs > 0)
3485                 {
3486                   regs->start[0] = pos;
3487                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3488 			          : d - string2 + size1);
3489                 }
3490 
3491               /* Go through the first `min (num_regs, regs->num_regs)'
3492                  registers, since that is all we initialized.  */
3493 	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3494 		{
3495                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3496                     regs->start[mcnt] = regs->end[mcnt] = -1;
3497                   else
3498                     {
3499 		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3500                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3501                     }
3502 		}
3503 
3504               /* If the regs structure we return has more elements than
3505                  were in the pattern, set the extra elements to -1.  If
3506                  we (re)allocated the registers, this is the case,
3507                  because we always allocate enough to have at least one
3508                  -1 at the end.  */
3509               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3510                 regs->start[mcnt] = regs->end[mcnt] = -1;
3511 	    } /* regs && !bufp->no_sub */
3512 
3513           FREE_VARIABLES ();
3514           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3515                         nfailure_points_pushed, nfailure_points_popped,
3516                         nfailure_points_pushed - nfailure_points_popped);
3517           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3518 
3519           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3520 			    ? string1
3521 			    : string2 - size1);
3522 
3523           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3524 
3525           return mcnt;
3526         }
3527 
3528       /* Otherwise match next pattern command.  */
3529 #ifdef SWITCH_ENUM_BUG
3530       switch ((int) ((re_opcode_t) *p++))
3531 #else
3532       switch ((re_opcode_t) *p++)
3533 #endif
3534 	{
3535         /* Ignore these.  Used to ignore the n of succeed_n's which
3536            currently have n == 0.  */
3537         case no_op:
3538           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3539           break;
3540 
3541 
3542         /* Match the next n pattern characters exactly.  The following
3543            byte in the pattern defines n, and the n bytes after that
3544            are the characters to match.  */
3545 	case exactn:
3546 	  mcnt = *p++;
3547           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3548 
3549           /* This is written out as an if-else so we don't waste time
3550              testing `translate' inside the loop.  */
3551           if (translate)
3552 	    {
3553 	      do
3554 		{
3555 		  PREFETCH ();
3556 		  if (translate[(unsigned char) *d++] != (char) *p++)
3557                     goto fail;
3558 		}
3559 	      while (--mcnt);
3560 	    }
3561 	  else
3562 	    {
3563 	      do
3564 		{
3565 		  PREFETCH ();
3566 		  if (*d++ != (char) *p++) goto fail;
3567 		}
3568 	      while (--mcnt);
3569 	    }
3570 	  SET_REGS_MATCHED ();
3571           break;
3572 
3573 
3574         /* Match any character except possibly a newline or a null.  */
3575 	case anychar:
3576           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3577 
3578           PREFETCH ();
3579 
3580           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3581               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3582 	    goto fail;
3583 
3584           SET_REGS_MATCHED ();
3585           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3586           d++;
3587 	  break;
3588 
3589 
3590 	case charset:
3591 	case charset_not:
3592 	  {
3593 	    register unsigned char c;
3594 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3595 
3596             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3597 
3598 	    PREFETCH ();
3599 	    c = TRANSLATE (*d); /* The character to match.  */
3600 
3601             /* Cast to `unsigned' instead of `unsigned char' in case the
3602                bit list is a full 32 bytes long.  */
3603 	    if (c < (unsigned) (*p * BYTEWIDTH)
3604 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3605 	      not = !not;
3606 
3607 	    p += 1 + *p;
3608 
3609 	    if (!not) goto fail;
3610 
3611 	    SET_REGS_MATCHED ();
3612             d++;
3613 	    break;
3614 	  }
3615 
3616 
3617         /* The beginning of a group is represented by start_memory.
3618            The arguments are the register number in the next byte, and the
3619            number of groups inner to this one in the next.  The text
3620            matched within the group is recorded (in the internal
3621            registers data structure) under the register number.  */
3622         case start_memory:
3623 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3624 
3625           /* Find out if this group can match the empty string.  */
3626 	  p1 = p;		/* To send to group_match_null_string_p.  */
3627 
3628           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3629             REG_MATCH_NULL_STRING_P (reg_info[*p])
3630               = group_match_null_string_p (&p1, pend, reg_info);
3631 
3632           /* Save the position in the string where we were the last time
3633              we were at this open-group operator in case the group is
3634              operated upon by a repetition operator, e.g., with `(a*)*b'
3635              against `ab'; then we want to ignore where we are now in
3636              the string in case this attempt to match fails.  */
3637           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3638                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3639                              : regstart[*p];
3640 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3641 			 POINTER_TO_OFFSET (old_regstart[*p]));
3642 
3643           regstart[*p] = d;
3644 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3645 
3646           IS_ACTIVE (reg_info[*p]) = 1;
3647           MATCHED_SOMETHING (reg_info[*p]) = 0;
3648 
3649           /* This is the new highest active register.  */
3650           highest_active_reg = *p;
3651 
3652           /* If nothing was active before, this is the new lowest active
3653              register.  */
3654           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3655             lowest_active_reg = *p;
3656 
3657           /* Move past the register number and inner group count.  */
3658           p += 2;
3659           break;
3660 
3661 
3662         /* The stop_memory opcode represents the end of a group.  Its
3663            arguments are the same as start_memory's: the register
3664            number, and the number of inner groups.  */
3665 	case stop_memory:
3666 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3667 
3668           /* We need to save the string position the last time we were at
3669              this close-group operator in case the group is operated
3670              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3671              against `aba'; then we want to ignore where we are now in
3672              the string in case this attempt to match fails.  */
3673           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3674                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3675 			   : regend[*p];
3676 	  DEBUG_PRINT2 ("      old_regend: %d\n",
3677 			 POINTER_TO_OFFSET (old_regend[*p]));
3678 
3679           regend[*p] = d;
3680 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3681 
3682           /* This register isn't active anymore.  */
3683           IS_ACTIVE (reg_info[*p]) = 0;
3684 
3685           /* If this was the only register active, nothing is active
3686              anymore.  */
3687           if (lowest_active_reg == highest_active_reg)
3688             {
3689               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3690               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3691             }
3692           else
3693             { /* We must scan for the new highest active register, since
3694                  it isn't necessarily one less than now: consider
3695                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3696                  new highest active register is 1.  */
3697               unsigned char r = *p - 1;
3698               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3699                 r--;
3700 
3701               /* If we end up at register zero, that means that we saved
3702                  the registers as the result of an `on_failure_jump', not
3703                  a `start_memory', and we jumped to past the innermost
3704                  `stop_memory'.  For example, in ((.)*) we save
3705                  registers 1 and 2 as a result of the *, but when we pop
3706                  back to the second ), we are at the stop_memory 1.
3707                  Thus, nothing is active.  */
3708 	      if (r == 0)
3709                 {
3710                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3711                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3712                 }
3713               else
3714                 highest_active_reg = r;
3715             }
3716 
3717           /* If just failed to match something this time around with a
3718              group that's operated on by a repetition operator, try to
3719              force exit from the ``loop'', and restore the register
3720              information for this group that we had before trying this
3721              last match.  */
3722           if ((!MATCHED_SOMETHING (reg_info[*p])
3723                || (re_opcode_t) p[-3] == start_memory)
3724 	      && (p + 2) < pend)
3725             {
3726               boolean is_a_jump_n = false;
3727 
3728               p1 = p + 2;
3729               mcnt = 0;
3730               switch ((re_opcode_t) *p1++)
3731                 {
3732                   case jump_n:
3733 		    is_a_jump_n = true;
3734                   case pop_failure_jump:
3735 		  case maybe_pop_jump:
3736 		  case jump:
3737 		  case dummy_failure_jump:
3738                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3739 		    if (is_a_jump_n)
3740 		      p1 += 2;
3741                     break;
3742 
3743                   default:
3744                     /* do nothing */ ;
3745                 }
3746 	      p1 += mcnt;
3747 
3748               /* If the next operation is a jump backwards in the pattern
3749 	         to an on_failure_jump right before the start_memory
3750                  corresponding to this stop_memory, exit from the loop
3751                  by forcing a failure after pushing on the stack the
3752                  on_failure_jump's jump in the pattern, and d.  */
3753               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3754                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3755 		{
3756                   /* If this group ever matched anything, then restore
3757                      what its registers were before trying this last
3758                      failed match, e.g., with `(a*)*b' against `ab' for
3759                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3760                      against `aba' for regend[3].
3761 
3762                      Also restore the registers for inner groups for,
3763                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3764                      otherwise get trashed).  */
3765 
3766                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3767 		    {
3768 		      unsigned r;
3769 
3770                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3771 
3772 		      /* Restore this and inner groups' (if any) registers.  */
3773                       for (r = *p; r < *p + *(p + 1); r++)
3774                         {
3775                           regstart[r] = old_regstart[r];
3776 
3777                           /* xx why this test?  */
3778                           if ((long) old_regend[r] >= (long) regstart[r])
3779                             regend[r] = old_regend[r];
3780                         }
3781                     }
3782 		  p1++;
3783                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3784                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3785 
3786                   goto fail;
3787                 }
3788             }
3789 
3790           /* Move past the register number and the inner group count.  */
3791           p += 2;
3792           break;
3793 
3794 
3795 	/* \<digit> has been turned into a `duplicate' command which is
3796            followed by the numeric value of <digit> as the register number.  */
3797         case duplicate:
3798 	  {
3799 	    register const char *d2, *dend2;
3800 	    int regno = *p++;   /* Get which register to match against.  */
3801 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3802 
3803 	    /* Can't back reference a group which we've never matched.  */
3804             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3805               goto fail;
3806 
3807             /* Where in input to try to start matching.  */
3808             d2 = regstart[regno];
3809 
3810             /* Where to stop matching; if both the place to start and
3811                the place to stop matching are in the same string, then
3812                set to the place to stop, otherwise, for now have to use
3813                the end of the first string.  */
3814 
3815             dend2 = ((FIRST_STRING_P (regstart[regno])
3816 		      == FIRST_STRING_P (regend[regno]))
3817 		     ? regend[regno] : end_match_1);
3818 	    for (;;)
3819 	      {
3820 		/* If necessary, advance to next segment in register
3821                    contents.  */
3822 		while (d2 == dend2)
3823 		  {
3824 		    if (dend2 == end_match_2) break;
3825 		    if (dend2 == regend[regno]) break;
3826 
3827                     /* End of string1 => advance to string2. */
3828                     d2 = string2;
3829                     dend2 = regend[regno];
3830 		  }
3831 		/* At end of register contents => success */
3832 		if (d2 == dend2) break;
3833 
3834 		/* If necessary, advance to next segment in data.  */
3835 		PREFETCH ();
3836 
3837 		/* How many characters left in this segment to match.  */
3838 		mcnt = dend - d;
3839 
3840 		/* Want how many consecutive characters we can match in
3841                    one shot, so, if necessary, adjust the count.  */
3842                 if (mcnt > dend2 - d2)
3843 		  mcnt = dend2 - d2;
3844 
3845 		/* Compare that many; failure if mismatch, else move
3846                    past them.  */
3847 		if (translate
3848                     ? bcmp_translate (d, d2, mcnt, translate)
3849                     : bcmp (d, d2, mcnt))
3850 		  goto fail;
3851 		d += mcnt, d2 += mcnt;
3852 	      }
3853 	  }
3854 	  break;
3855 
3856 
3857         /* begline matches the empty string at the beginning of the string
3858            (unless `not_bol' is set in `bufp'), and, if
3859            `newline_anchor' is set, after newlines.  */
3860 	case begline:
3861           DEBUG_PRINT1 ("EXECUTING begline.\n");
3862 
3863           if (AT_STRINGS_BEG (d))
3864             {
3865               if (!bufp->not_bol) break;
3866             }
3867           else if (d[-1] == '\n' && bufp->newline_anchor)
3868             {
3869               break;
3870             }
3871           /* In all other cases, we fail.  */
3872           goto fail;
3873 
3874 
3875         /* endline is the dual of begline.  */
3876 	case endline:
3877           DEBUG_PRINT1 ("EXECUTING endline.\n");
3878 
3879           if (AT_STRINGS_END (d))
3880             {
3881               if (!bufp->not_eol) break;
3882             }
3883 
3884           /* We have to ``prefetch'' the next character.  */
3885           else if ((d == end1 ? *string2 : *d) == '\n'
3886                    && bufp->newline_anchor)
3887             {
3888               break;
3889             }
3890           goto fail;
3891 
3892 
3893 	/* Match at the very beginning of the data.  */
3894         case begbuf:
3895           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3896           if (AT_STRINGS_BEG (d))
3897             break;
3898           goto fail;
3899 
3900 
3901 	/* Match at the very end of the data.  */
3902         case endbuf:
3903           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3904 	  if (AT_STRINGS_END (d))
3905 	    break;
3906           goto fail;
3907 
3908 
3909         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3910            pushes NULL as the value for the string on the stack.  Then
3911            `pop_failure_point' will keep the current value for the
3912            string, instead of restoring it.  To see why, consider
3913            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3914            then the . fails against the \n.  But the next thing we want
3915            to do is match the \n against the \n; if we restored the
3916            string value, we would be back at the foo.
3917 
3918            Because this is used only in specific cases, we don't need to
3919            check all the things that `on_failure_jump' does, to make
3920            sure the right things get saved on the stack.  Hence we don't
3921            share its code.  The only reason to push anything on the
3922            stack at all is that otherwise we would have to change
3923            `anychar's code to do something besides goto fail in this
3924            case; that seems worse than this.  */
3925         case on_failure_keep_string_jump:
3926           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3927 
3928           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3929           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3930 
3931           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3932           break;
3933 
3934 
3935 	/* Uses of on_failure_jump:
3936 
3937            Each alternative starts with an on_failure_jump that points
3938            to the beginning of the next alternative.  Each alternative
3939            except the last ends with a jump that in effect jumps past
3940            the rest of the alternatives.  (They really jump to the
3941            ending jump of the following alternative, because tensioning
3942            these jumps is a hassle.)
3943 
3944            Repeats start with an on_failure_jump that points past both
3945            the repetition text and either the following jump or
3946            pop_failure_jump back to this on_failure_jump.  */
3947 	case on_failure_jump:
3948         on_failure:
3949           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3950 
3951           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3952           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3953 
3954           /* If this on_failure_jump comes right before a group (i.e.,
3955              the original * applied to a group), save the information
3956              for that group and all inner ones, so that if we fail back
3957              to this point, the group's information will be correct.
3958              For example, in \(a*\)*\1, we need the preceding group,
3959              and in \(\(a*\)b*\)\2, we need the inner group.  */
3960 
3961           /* We can't use `p' to check ahead because we push
3962              a failure point to `p + mcnt' after we do this.  */
3963           p1 = p;
3964 
3965           /* We need to skip no_op's before we look for the
3966              start_memory in case this on_failure_jump is happening as
3967              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3968              against aba.  */
3969           while (p1 < pend && (re_opcode_t) *p1 == no_op)
3970             p1++;
3971 
3972           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3973             {
3974               /* We have a new highest active register now.  This will
3975                  get reset at the start_memory we are about to get to,
3976                  but we will have saved all the registers relevant to
3977                  this repetition op, as described above.  */
3978               highest_active_reg = *(p1 + 1) + *(p1 + 2);
3979               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3980                 lowest_active_reg = *(p1 + 1);
3981             }
3982 
3983           DEBUG_PRINT1 (":\n");
3984           PUSH_FAILURE_POINT (p + mcnt, d, -2);
3985           break;
3986 
3987 
3988         /* A smart repeat ends with `maybe_pop_jump'.
3989 	   We change it to either `pop_failure_jump' or `jump'.  */
3990         case maybe_pop_jump:
3991           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3992           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3993           {
3994 	    register unsigned char *p2 = p;
3995 
3996             /* Compare the beginning of the repeat with what in the
3997                pattern follows its end. If we can establish that there
3998                is nothing that they would both match, i.e., that we
3999                would have to backtrack because of (as in, e.g., `a*a')
4000                then we can change to pop_failure_jump, because we'll
4001                never have to backtrack.
4002 
4003                This is not true in the case of alternatives: in
4004                `(a|ab)*' we do need to backtrack to the `ab' alternative
4005                (e.g., if the string was `ab').  But instead of trying to
4006                detect that here, the alternative has put on a dummy
4007                failure point which is what we will end up popping.  */
4008 
4009 	    /* Skip over open/close-group commands.  */
4010 	    while (p2 + 2 < pend
4011 		   && ((re_opcode_t) *p2 == stop_memory
4012 		       || (re_opcode_t) *p2 == start_memory))
4013 	      p2 += 3;			/* Skip over args, too.  */
4014 
4015             /* If we're at the end of the pattern, we can change.  */
4016             if (p2 == pend)
4017 	      {
4018 		/* Consider what happens when matching ":\(.*\)"
4019 		   against ":/".  I don't really understand this code
4020 		   yet.  */
4021   	        p[-3] = (unsigned char) pop_failure_jump;
4022                 DEBUG_PRINT1
4023                   ("  End of pattern: change to `pop_failure_jump'.\n");
4024               }
4025 
4026             else if ((re_opcode_t) *p2 == exactn
4027 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4028 	      {
4029 		register unsigned char c
4030                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4031 		p1 = p + mcnt;
4032 
4033                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4034                    to the `maybe_finalize_jump' of this case.  Examine what
4035                    follows.  */
4036                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4037                   {
4038   		    p[-3] = (unsigned char) pop_failure_jump;
4039                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4040                                   c, p1[5]);
4041                   }
4042 
4043 		else if ((re_opcode_t) p1[3] == charset
4044 			 || (re_opcode_t) p1[3] == charset_not)
4045 		  {
4046 		    int not = (re_opcode_t) p1[3] == charset_not;
4047 
4048 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4049 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4050 		      not = !not;
4051 
4052                     /* `not' is equal to 1 if c would match, which means
4053                         that we can't change to pop_failure_jump.  */
4054 		    if (!not)
4055                       {
4056   		        p[-3] = (unsigned char) pop_failure_jump;
4057                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4058                       }
4059 		  }
4060 	      }
4061 	  }
4062 	  p -= 2;		/* Point at relative address again.  */
4063 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4064 	    {
4065 	      p[-1] = (unsigned char) jump;
4066               DEBUG_PRINT1 ("  Match => jump.\n");
4067 	      goto unconditional_jump;
4068 	    }
4069         /* Note fall through.  */
4070 
4071 
4072 	/* The end of a simple repeat has a pop_failure_jump back to
4073            its matching on_failure_jump, where the latter will push a
4074            failure point.  The pop_failure_jump takes off failure
4075            points put on by this pop_failure_jump's matching
4076            on_failure_jump; we got through the pattern to here from the
4077            matching on_failure_jump, so didn't fail.  */
4078         case pop_failure_jump:
4079           {
4080             /* We need to pass separate storage for the lowest and
4081                highest registers, even though we don't care about the
4082                actual values.  Otherwise, we will restore only one
4083                register from the stack, since lowest will == highest in
4084                `pop_failure_point'.  */
4085             unsigned dummy_low_reg, dummy_high_reg;
4086             unsigned char *pdummy;
4087             const char *sdummy;
4088 
4089             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4090             POP_FAILURE_POINT (sdummy, pdummy,
4091                                dummy_low_reg, dummy_high_reg,
4092                                reg_dummy, reg_dummy, reg_info_dummy);
4093           }
4094           /* Note fall through.  */
4095 
4096 
4097         /* Unconditionally jump (without popping any failure points).  */
4098         case jump:
4099 	unconditional_jump:
4100 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4101           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4102 	  p += mcnt;				/* Do the jump.  */
4103           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4104 	  break;
4105 
4106 
4107         /* We need this opcode so we can detect where alternatives end
4108            in `group_match_null_string_p' et al.  */
4109         case jump_past_alt:
4110           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4111           goto unconditional_jump;
4112 
4113 
4114         /* Normally, the on_failure_jump pushes a failure point, which
4115            then gets popped at pop_failure_jump.  We will end up at
4116            pop_failure_jump, also, and with a pattern of, say, `a+', we
4117            are skipping over the on_failure_jump, so we have to push
4118            something meaningless for pop_failure_jump to pop.  */
4119         case dummy_failure_jump:
4120           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4121           /* It doesn't matter what we push for the string here.  What
4122              the code at `fail' tests is the value for the pattern.  */
4123           PUSH_FAILURE_POINT (0, 0, -2);
4124           goto unconditional_jump;
4125 
4126 
4127         /* At the end of an alternative, we need to push a dummy failure
4128            point in case we are followed by a `pop_failure_jump', because
4129            we don't want the failure point for the alternative to be
4130            popped.  For example, matching `(a|ab)*' against `aab'
4131            requires that we match the `ab' alternative.  */
4132         case push_dummy_failure:
4133           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4134           /* See comments just above at `dummy_failure_jump' about the
4135              two zeroes.  */
4136           PUSH_FAILURE_POINT (0, 0, -2);
4137           break;
4138 
4139         /* Have to succeed matching what follows at least n times.
4140            After that, handle like `on_failure_jump'.  */
4141         case succeed_n:
4142           EXTRACT_NUMBER (mcnt, p + 2);
4143           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4144 
4145           assert (mcnt >= 0);
4146           /* Originally, this is how many times we HAVE to succeed.  */
4147           if (mcnt > 0)
4148             {
4149                mcnt--;
4150 	       p += 2;
4151                STORE_NUMBER_AND_INCR (p, mcnt);
4152                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4153             }
4154 	  else if (mcnt == 0)
4155             {
4156               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4157 	      p[2] = (unsigned char) no_op;
4158               p[3] = (unsigned char) no_op;
4159               goto on_failure;
4160             }
4161           break;
4162 
4163         case jump_n:
4164           EXTRACT_NUMBER (mcnt, p + 2);
4165           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4166 
4167           /* Originally, this is how many times we CAN jump.  */
4168           if (mcnt)
4169             {
4170                mcnt--;
4171                STORE_NUMBER (p + 2, mcnt);
4172 	       goto unconditional_jump;
4173             }
4174           /* If don't have to jump any more, skip over the rest of command.  */
4175 	  else
4176 	    p += 4;
4177           break;
4178 
4179 	case set_number_at:
4180 	  {
4181             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4182 
4183             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4184             p1 = p + mcnt;
4185             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4186             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4187 	    STORE_NUMBER (p1, mcnt);
4188             break;
4189           }
4190 
4191         case wordbound:
4192           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4193           if (AT_WORD_BOUNDARY (d))
4194 	    break;
4195           goto fail;
4196 
4197 	case notwordbound:
4198           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4199 	  if (AT_WORD_BOUNDARY (d))
4200 	    goto fail;
4201           break;
4202 
4203 	case wordbeg:
4204           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4205 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4206 	    break;
4207           goto fail;
4208 
4209 	case wordend:
4210           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4211 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4212               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4213 	    break;
4214           goto fail;
4215 
4216 #ifdef emacs
4217 #ifdef emacs19
4218   	case before_dot:
4219           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4220  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4221   	    goto fail;
4222   	  break;
4223 
4224   	case at_dot:
4225           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4226  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4227   	    goto fail;
4228   	  break;
4229 
4230   	case after_dot:
4231           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4232           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4233   	    goto fail;
4234   	  break;
4235 #else /* not emacs19 */
4236 	case at_dot:
4237           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4238 	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4239 	    goto fail;
4240 	  break;
4241 #endif /* not emacs19 */
4242 
4243 	case syntaxspec:
4244           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4245 	  mcnt = *p++;
4246 	  goto matchsyntax;
4247 
4248         case wordchar:
4249           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4250 	  mcnt = (int) Sword;
4251         matchsyntax:
4252 	  PREFETCH ();
4253 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4254             goto fail;
4255           SET_REGS_MATCHED ();
4256 	  break;
4257 
4258 	case notsyntaxspec:
4259           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4260 	  mcnt = *p++;
4261 	  goto matchnotsyntax;
4262 
4263         case notwordchar:
4264           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4265 	  mcnt = (int) Sword;
4266         matchnotsyntax:
4267 	  PREFETCH ();
4268 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4269             goto fail;
4270 	  SET_REGS_MATCHED ();
4271           break;
4272 
4273 #else /* not emacs */
4274 	case wordchar:
4275           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4276 	  PREFETCH ();
4277           if (!WORDCHAR_P (d))
4278             goto fail;
4279 	  SET_REGS_MATCHED ();
4280           d++;
4281 	  break;
4282 
4283 	case notwordchar:
4284           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4285 	  PREFETCH ();
4286 	  if (WORDCHAR_P (d))
4287             goto fail;
4288           SET_REGS_MATCHED ();
4289           d++;
4290 	  break;
4291 #endif /* not emacs */
4292 
4293         default:
4294           abort ();
4295 	}
4296       continue;  /* Successfully executed one pattern command; keep going.  */
4297 
4298 
4299     /* We goto here if a matching operation fails. */
4300     fail:
4301       if (!FAIL_STACK_EMPTY ())
4302 	{ /* A restart point is known.  Restore to that state.  */
4303           DEBUG_PRINT1 ("\nFAIL:\n");
4304           POP_FAILURE_POINT (d, p,
4305                              lowest_active_reg, highest_active_reg,
4306                              regstart, regend, reg_info);
4307 
4308           /* If this failure point is a dummy, try the next one.  */
4309           if (!p)
4310 	    goto fail;
4311 
4312           /* If we failed to the end of the pattern, don't examine *p.  */
4313 	  assert (p <= pend);
4314           if (p < pend)
4315             {
4316               boolean is_a_jump_n = false;
4317 
4318               /* If failed to a backwards jump that's part of a repetition
4319                  loop, need to pop this failure point and use the next one.  */
4320               switch ((re_opcode_t) *p)
4321                 {
4322                 case jump_n:
4323                   is_a_jump_n = true;
4324                 case maybe_pop_jump:
4325                 case pop_failure_jump:
4326                 case jump:
4327                   p1 = p + 1;
4328                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4329                   p1 += mcnt;
4330 
4331                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4332                       || (!is_a_jump_n
4333                           && (re_opcode_t) *p1 == on_failure_jump))
4334                     goto fail;
4335                   break;
4336                 default:
4337                   /* do nothing */ ;
4338                 }
4339             }
4340 
4341           if (d >= string1 && d <= end1)
4342 	    dend = end_match_1;
4343         }
4344       else
4345         break;   /* Matching at this starting point really fails.  */
4346     } /* for (;;) */
4347 
4348   if (best_regs_set)
4349     goto restore_best_regs;
4350 
4351   FREE_VARIABLES ();
4352 
4353   return -1;         			/* Failure to match.  */
4354 } /* re_match_2 */
4355 
4356 /* Subroutine definitions for re_match_2.  */
4357 
4358 
4359 /* We are passed P pointing to a register number after a start_memory.
4360 
4361    Return true if the pattern up to the corresponding stop_memory can
4362    match the empty string, and false otherwise.
4363 
4364    If we find the matching stop_memory, sets P to point to one past its number.
4365    Otherwise, sets P to an undefined byte less than or equal to END.
4366 
4367    We don't handle duplicates properly (yet).  */
4368 
4369 static boolean
group_match_null_string_p(p,end,reg_info)4370 group_match_null_string_p (p, end, reg_info)
4371     unsigned char **p, *end;
4372     register_info_type *reg_info;
4373 {
4374   int mcnt;
4375   /* Point to after the args to the start_memory.  */
4376   unsigned char *p1 = *p + 2;
4377 
4378   while (p1 < end)
4379     {
4380       /* Skip over opcodes that can match nothing, and return true or
4381 	 false, as appropriate, when we get to one that can't, or to the
4382          matching stop_memory.  */
4383 
4384       switch ((re_opcode_t) *p1)
4385         {
4386         /* Could be either a loop or a series of alternatives.  */
4387         case on_failure_jump:
4388           p1++;
4389           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4390 
4391           /* If the next operation is not a jump backwards in the
4392 	     pattern.  */
4393 
4394 	  if (mcnt >= 0)
4395 	    {
4396               /* Go through the on_failure_jumps of the alternatives,
4397                  seeing if any of the alternatives cannot match nothing.
4398                  The last alternative starts with only a jump,
4399                  whereas the rest start with on_failure_jump and end
4400                  with a jump, e.g., here is the pattern for `a|b|c':
4401 
4402                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4403                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4404                  /exactn/1/c
4405 
4406                  So, we have to first go through the first (n-1)
4407                  alternatives and then deal with the last one separately.  */
4408 
4409 
4410               /* Deal with the first (n-1) alternatives, which start
4411                  with an on_failure_jump (see above) that jumps to right
4412                  past a jump_past_alt.  */
4413 
4414               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4415                 {
4416                   /* `mcnt' holds how many bytes long the alternative
4417                      is, including the ending `jump_past_alt' and
4418                      its number.  */
4419 
4420                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4421 				                      reg_info))
4422                     return false;
4423 
4424                   /* Move to right after this alternative, including the
4425 		     jump_past_alt.  */
4426                   p1 += mcnt;
4427 
4428                   /* Break if it's the beginning of an n-th alternative
4429                      that doesn't begin with an on_failure_jump.  */
4430                   if ((re_opcode_t) *p1 != on_failure_jump)
4431                     break;
4432 
4433 		  /* Still have to check that it's not an n-th
4434 		     alternative that starts with an on_failure_jump.  */
4435 		  p1++;
4436                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4437                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4438                     {
4439 		      /* Get to the beginning of the n-th alternative.  */
4440                       p1 -= 3;
4441                       break;
4442                     }
4443                 }
4444 
4445               /* Deal with the last alternative: go back and get number
4446                  of the `jump_past_alt' just before it.  `mcnt' contains
4447                  the length of the alternative.  */
4448               EXTRACT_NUMBER (mcnt, p1 - 2);
4449 
4450               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4451                 return false;
4452 
4453               p1 += mcnt;	/* Get past the n-th alternative.  */
4454             } /* if mcnt > 0 */
4455           break;
4456 
4457 
4458         case stop_memory:
4459 	  assert (p1[1] == **p);
4460           *p = p1 + 2;
4461           return true;
4462 
4463 
4464         default:
4465           if (!common_op_match_null_string_p (&p1, end, reg_info))
4466             return false;
4467         }
4468     } /* while p1 < end */
4469 
4470   return false;
4471 } /* group_match_null_string_p */
4472 
4473 
4474 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4475    It expects P to be the first byte of a single alternative and END one
4476    byte past the last. The alternative can contain groups.  */
4477 
4478 static boolean
alt_match_null_string_p(p,end,reg_info)4479 alt_match_null_string_p (p, end, reg_info)
4480     unsigned char *p, *end;
4481     register_info_type *reg_info;
4482 {
4483   int mcnt;
4484   unsigned char *p1 = p;
4485 
4486   while (p1 < end)
4487     {
4488       /* Skip over opcodes that can match nothing, and break when we get
4489          to one that can't.  */
4490 
4491       switch ((re_opcode_t) *p1)
4492         {
4493 	/* It's a loop.  */
4494         case on_failure_jump:
4495           p1++;
4496           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4497           p1 += mcnt;
4498           break;
4499 
4500 	default:
4501           if (!common_op_match_null_string_p (&p1, end, reg_info))
4502             return false;
4503         }
4504     }  /* while p1 < end */
4505 
4506   return true;
4507 } /* alt_match_null_string_p */
4508 
4509 
4510 /* Deals with the ops common to group_match_null_string_p and
4511    alt_match_null_string_p.
4512 
4513    Sets P to one after the op and its arguments, if any.  */
4514 
4515 static boolean
common_op_match_null_string_p(p,end,reg_info)4516 common_op_match_null_string_p (p, end, reg_info)
4517     unsigned char **p, *end;
4518     register_info_type *reg_info;
4519 {
4520   int mcnt;
4521   boolean ret;
4522   int reg_no;
4523   unsigned char *p1 = *p;
4524 
4525   switch ((re_opcode_t) *p1++)
4526     {
4527     case no_op:
4528     case begline:
4529     case endline:
4530     case begbuf:
4531     case endbuf:
4532     case wordbeg:
4533     case wordend:
4534     case wordbound:
4535     case notwordbound:
4536 #ifdef emacs
4537     case before_dot:
4538     case at_dot:
4539     case after_dot:
4540 #endif
4541       break;
4542 
4543     case start_memory:
4544       reg_no = *p1;
4545       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4546       ret = group_match_null_string_p (&p1, end, reg_info);
4547 
4548       /* Have to set this here in case we're checking a group which
4549          contains a group and a back reference to it.  */
4550 
4551       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4552         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4553 
4554       if (!ret)
4555         return false;
4556       break;
4557 
4558     /* If this is an optimized succeed_n for zero times, make the jump.  */
4559     case jump:
4560       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4561       if (mcnt >= 0)
4562         p1 += mcnt;
4563       else
4564         return false;
4565       break;
4566 
4567     case succeed_n:
4568       /* Get to the number of times to succeed.  */
4569       p1 += 2;
4570       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4571 
4572       if (mcnt == 0)
4573         {
4574           p1 -= 4;
4575           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4576           p1 += mcnt;
4577         }
4578       else
4579         return false;
4580       break;
4581 
4582     case duplicate:
4583       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4584         return false;
4585       break;
4586 
4587     case set_number_at:
4588       p1 += 4;
4589 
4590     default:
4591       /* All other opcodes mean we cannot match the empty string.  */
4592       return false;
4593   }
4594 
4595   *p = p1;
4596   return true;
4597 } /* common_op_match_null_string_p */
4598 
4599 
4600 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4601    bytes; nonzero otherwise.  */
4602 
4603 static int
bcmp_translate(s1,s2,len,translate)4604 bcmp_translate (s1, s2, len, translate)
4605      unsigned char *s1, *s2;
4606      register int len;
4607      char *translate;
4608 {
4609   register unsigned char *p1 = s1, *p2 = s2;
4610   while (len)
4611     {
4612       if (translate[*p1++] != translate[*p2++]) return 1;
4613       len--;
4614     }
4615   return 0;
4616 }
4617 
4618 /* Entry points for GNU code.  */
4619 
4620 /* re_compile_pattern is the GNU regular expression compiler: it
4621    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4622    Returns 0 if the pattern was valid, otherwise an error string.
4623 
4624    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4625    are set in BUFP on entry.
4626 
4627    We call regex_compile to do the actual compilation.  */
4628 
4629 const char *
re_compile_pattern(pattern,length,bufp)4630 re_compile_pattern (pattern, length, bufp)
4631      const char *pattern;
4632      int length;
4633      struct re_pattern_buffer *bufp;
4634 {
4635   reg_errcode_t ret;
4636 
4637   /* GNU code is written to assume at least RE_NREGS registers will be set
4638      (and at least one extra will be -1).  */
4639   bufp->regs_allocated = REGS_UNALLOCATED;
4640 
4641   /* And GNU code determines whether or not to get register information
4642      by passing null for the REGS argument to re_match, etc., not by
4643      setting no_sub.  */
4644   bufp->no_sub = 0;
4645 
4646   /* Match anchors at newline.  */
4647   bufp->newline_anchor = 1;
4648 
4649   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4650 
4651   return re_error_msg[(int) ret];
4652 }
4653 
4654 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4655    them if this is an Emacs or POSIX compilation.  */
4656 
4657 #if !defined (emacs) && !defined (_POSIX_SOURCE)
4658 
4659 /* BSD has one and only one pattern buffer.  */
4660 static struct re_pattern_buffer re_comp_buf;
4661 
4662 char *
re_comp(s)4663 re_comp (s)
4664     const char *s;
4665 {
4666   reg_errcode_t ret;
4667 
4668   if (!s)
4669     {
4670       if (!re_comp_buf.buffer)
4671 	return "No previous regular expression";
4672       return 0;
4673     }
4674 
4675   if (!re_comp_buf.buffer)
4676     {
4677       re_comp_buf.buffer = (unsigned char *) malloc (200);
4678       if (re_comp_buf.buffer == NULL)
4679         return "Memory exhausted";
4680       re_comp_buf.allocated = 200;
4681 
4682       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4683       if (re_comp_buf.fastmap == NULL)
4684 	return "Memory exhausted";
4685     }
4686 
4687   /* Since `re_exec' always passes NULL for the `regs' argument, we
4688      don't need to initialize the pattern buffer fields which affect it.  */
4689 
4690   /* Match anchors at newlines.  */
4691   re_comp_buf.newline_anchor = 1;
4692 
4693   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4694 
4695   /* Yes, we're discarding `const' here.  */
4696   return (char *) re_error_msg[(int) ret];
4697 }
4698 
4699 
4700 int
re_exec(s)4701 re_exec (s)
4702     const char *s;
4703 {
4704   const int len = strlen (s);
4705   return
4706     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4707 }
4708 #endif /* not emacs and not _POSIX_SOURCE */
4709 
4710 /* POSIX.2 functions.  Don't define these for Emacs.  */
4711 
4712 #ifndef emacs
4713 
4714 /* regcomp takes a regular expression as a string and compiles it.
4715 
4716    PREG is a regex_t *.  We do not expect any fields to be initialized,
4717    since POSIX says we shouldn't.  Thus, we set
4718 
4719      `buffer' to the compiled pattern;
4720      `used' to the length of the compiled pattern;
4721      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4722        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4723        RE_SYNTAX_POSIX_BASIC;
4724      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4725      `fastmap' and `fastmap_accurate' to zero;
4726      `re_nsub' to the number of subexpressions in PATTERN.
4727 
4728    PATTERN is the address of the pattern string.
4729 
4730    CFLAGS is a series of bits which affect compilation.
4731 
4732      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4733      use POSIX basic syntax.
4734 
4735      If REG_NEWLINE is set, then . and [^...] don't match newline.
4736      Also, regexec will try a match beginning after every newline.
4737 
4738      If REG_ICASE is set, then we considers upper- and lowercase
4739      versions of letters to be equivalent when matching.
4740 
4741      If REG_NOSUB is set, then when PREG is passed to regexec, that
4742      routine will report only success or failure, and nothing about the
4743      registers.
4744 
4745    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4746    the return codes and their meanings.)  */
4747 
4748 int
regcomp(preg,pattern,cflags)4749 regcomp (preg, pattern, cflags)
4750     regex_t *preg;
4751     const char *pattern;
4752     int cflags;
4753 {
4754   reg_errcode_t ret;
4755   unsigned syntax
4756     = (cflags & REG_EXTENDED) ?
4757       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4758 
4759   /* regex_compile will allocate the space for the compiled pattern.  */
4760   preg->buffer = 0;
4761   preg->allocated = 0;
4762 
4763   /* Don't bother to use a fastmap when searching.  This simplifies the
4764      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4765      characters after newlines into the fastmap.  This way, we just try
4766      every character.  */
4767   preg->fastmap = 0;
4768 
4769   if (cflags & REG_ICASE)
4770     {
4771       unsigned i;
4772 
4773       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4774       if (preg->translate == NULL)
4775         return (int) REG_ESPACE;
4776 
4777       /* Map uppercase characters to corresponding lowercase ones.  */
4778       for (i = 0; i < CHAR_SET_SIZE; i++)
4779         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4780     }
4781   else
4782     preg->translate = NULL;
4783 
4784   /* If REG_NEWLINE is set, newlines are treated differently.  */
4785   if (cflags & REG_NEWLINE)
4786     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4787       syntax &= ~RE_DOT_NEWLINE;
4788       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4789       /* It also changes the matching behavior.  */
4790       preg->newline_anchor = 1;
4791     }
4792   else
4793     preg->newline_anchor = 0;
4794 
4795   preg->no_sub = !!(cflags & REG_NOSUB);
4796 
4797   /* POSIX says a null character in the pattern terminates it, so we
4798      can use strlen here in compiling the pattern.  */
4799   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4800 
4801   /* POSIX doesn't distinguish between an unmatched open-group and an
4802      unmatched close-group: both are REG_EPAREN.  */
4803   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4804 
4805   return (int) ret;
4806 }
4807 
4808 
4809 /* regexec searches for a given pattern, specified by PREG, in the
4810    string STRING.
4811 
4812    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4813    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4814    least NMATCH elements, and we set them to the offsets of the
4815    corresponding matched substrings.
4816 
4817    EFLAGS specifies `execution flags' which affect matching: if
4818    REG_NOTBOL is set, then ^ does not match at the beginning of the
4819    string; if REG_NOTEOL is set, then $ does not match at the end.
4820 
4821    We return 0 if we find a match and REG_NOMATCH if not.  */
4822 
4823 int
regexec(preg,string,nmatch,pmatch,eflags)4824 regexec (preg, string, nmatch, pmatch, eflags)
4825     const regex_t *preg;
4826     const char *string;
4827     size_t nmatch;
4828     regmatch_t pmatch[];
4829     int eflags;
4830 {
4831   int ret;
4832   struct re_registers regs;
4833   regex_t private_preg;
4834   int len = strlen (string);
4835   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4836 
4837   private_preg = *preg;
4838 
4839   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4840   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4841 
4842   /* The user has told us exactly how many registers to return
4843      information about, via `nmatch'.  We have to pass that on to the
4844      matching routines.  */
4845   private_preg.regs_allocated = REGS_FIXED;
4846 
4847   if (want_reg_info)
4848     {
4849       regs.num_regs = nmatch;
4850       regs.start = TALLOC (nmatch, regoff_t);
4851       regs.end = TALLOC (nmatch, regoff_t);
4852       if (regs.start == NULL || regs.end == NULL)
4853         return (int) REG_NOMATCH;
4854     }
4855 
4856   /* Perform the searching operation.  */
4857   ret = re_search (&private_preg, string, len,
4858                    /* start: */ 0, /* range: */ len,
4859                    want_reg_info ? &regs : (struct re_registers *) 0);
4860 
4861   /* Copy the register information to the POSIX structure.  */
4862   if (want_reg_info)
4863     {
4864       if (ret >= 0)
4865         {
4866           unsigned r;
4867 
4868           for (r = 0; r < nmatch; r++)
4869             {
4870               pmatch[r].rm_so = regs.start[r];
4871               pmatch[r].rm_eo = regs.end[r];
4872             }
4873         }
4874 
4875       /* If we needed the temporary register info, free the space now.  */
4876       free (regs.start);
4877       free (regs.end);
4878     }
4879 
4880   /* We want zero return to mean success, unlike `re_search'.  */
4881   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4882 }
4883 
4884 
4885 /* Returns a message corresponding to an error code, ERRCODE, returned
4886    from either regcomp or regexec.   We don't use PREG here.  */
4887 
4888 size_t
regerror(errcode,preg,errbuf,errbuf_size)4889 regerror (errcode, preg, errbuf, errbuf_size)
4890     int errcode;
4891     const regex_t *preg;
4892     char *errbuf;
4893     size_t errbuf_size;
4894 {
4895   const char *msg;
4896   size_t msg_size;
4897 
4898   if (errcode < 0
4899       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4900     /* Only error codes returned by the rest of the code should be passed
4901        to this routine.  If we are given anything else, or if other regex
4902        code generates an invalid error code, then the program has a bug.
4903        Dump core so we can fix it.  */
4904     abort ();
4905 
4906   msg = re_error_msg[errcode];
4907 
4908   /* POSIX doesn't require that we do anything in this case, but why
4909      not be nice.  */
4910   if (! msg)
4911     msg = "Success";
4912 
4913   msg_size = strlen (msg) + 1; /* Includes the null.  */
4914 
4915   if (errbuf_size != 0)
4916     {
4917       if (msg_size > errbuf_size)
4918         {
4919           strncpy (errbuf, msg, errbuf_size - 1);
4920           errbuf[errbuf_size - 1] = 0;
4921         }
4922       else
4923         strcpy (errbuf, msg);
4924     }
4925 
4926   return msg_size;
4927 }
4928 
4929 
4930 /* Free dynamically allocated space used by PREG.  */
4931 
4932 void
regfree(preg)4933 regfree (preg)
4934     regex_t *preg;
4935 {
4936   if (preg->buffer != NULL)
4937     free (preg->buffer);
4938   preg->buffer = NULL;
4939 
4940   preg->allocated = 0;
4941   preg->used = 0;
4942 
4943   if (preg->fastmap != NULL)
4944     free (preg->fastmap);
4945   preg->fastmap = NULL;
4946   preg->fastmap_accurate = 0;
4947 
4948   if (preg->translate != NULL)
4949     free (preg->translate);
4950   preg->translate = NULL;
4951 }
4952 
4953 #endif /* not emacs  */
4954 
4955 /*
4956 Local variables:
4957 make-backup-files: t
4958 version-control: t
4959 trim-versions-without-asking: nil
4960 End:
4961 */
4962