1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5 
6    Copyright (C) 1993 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21 
22 /* AIX requires this to be the first thing in the file. */
23 #if defined (_AIX) && !defined (REGEX_MALLOC)
24   #pragma alloca
25 #endif
26 
27 #define _GNU_SOURCE
28 
29 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
30 #include <sys/types.h>
31 
32 #ifdef HAVE_CONFIG_H
33 #include "config.h"
34 #endif
35 
36 /* The `emacs' switch turns on certain matching commands
37    that make sense only in Emacs. */
38 #ifdef emacs
39 
40 #include "lisp.h"
41 #include "buffer.h"
42 #include "syntax.h"
43 
44 /* Emacs uses `NULL' as a predicate.  */
45 #undef NULL
46 
47 #else  /* not emacs */
48 
49 /* We used to test for `BSTRING' here, but only GCC and Emacs define
50    `BSTRING', as far as I know, and neither of them use this code.  */
51 #if HAVE_STRING_H || STDC_HEADERS
52 #include <string.h>
53 #ifndef bcmp
54 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
55 #endif
56 #ifndef bcopy
57 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
58 #endif
59 #ifndef bzero
60 #define bzero(s, n)	memset ((s), 0, (n))
61 #endif
62 #else
63 #include <strings.h>
64 #endif
65 
66 #ifdef STDC_HEADERS
67 #include <stdlib.h>
68 #else
69 char *malloc ();
70 char *realloc ();
71 #endif
72 
73 
74 /* Define the syntax stuff for \<, \>, etc.  */
75 
76 /* This must be nonzero for the wordchar and notwordchar pattern
77    commands in re_match_2.  */
78 #ifndef Sword
79 #define Sword 1
80 #endif
81 
82 #ifdef SYNTAX_TABLE
83 
84 extern char *re_syntax_table;
85 
86 #else /* not SYNTAX_TABLE */
87 
88 /* How many characters in the character set.  */
89 #define CHAR_SET_SIZE 256
90 
91 static char re_syntax_table[CHAR_SET_SIZE];
92 
93 static void
init_syntax_once()94 init_syntax_once ()
95 {
96    register int c;
97    static int done = 0;
98 
99    if (done)
100      return;
101 
102    bzero (re_syntax_table, sizeof re_syntax_table);
103 
104    for (c = 'a'; c <= 'z'; c++)
105      re_syntax_table[c] = Sword;
106 
107    for (c = 'A'; c <= 'Z'; c++)
108      re_syntax_table[c] = Sword;
109 
110    for (c = '0'; c <= '9'; c++)
111      re_syntax_table[c] = Sword;
112 
113    re_syntax_table['_'] = Sword;
114 
115    done = 1;
116 }
117 
118 #endif /* not SYNTAX_TABLE */
119 
120 #define SYNTAX(c) re_syntax_table[c]
121 
122 #endif /* not emacs */
123 
124 /* Get the interface, including the syntax bits.  */
125 #include "regex.h"
126 
127 /* isalpha etc. are used for the character classes.  */
128 #include <ctype.h>
129 
130 #ifndef isascii
131 #define isascii(c) 1
132 #endif
133 
134 #ifdef isblank
135 #define ISBLANK(c) (isascii (c) && isblank (c))
136 #else
137 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
138 #endif
139 #ifdef isgraph
140 #define ISGRAPH(c) (isascii (c) && isgraph (c))
141 #else
142 #define ISGRAPH(c) (isascii (c) && isprint (c) && !isspace (c))
143 #endif
144 
145 #define ISPRINT(c) (isascii (c) && isprint (c))
146 #define ISDIGIT(c) (isascii (c) && isdigit (c))
147 #define ISALNUM(c) (isascii (c) && isalnum (c))
148 #define ISALPHA(c) (isascii (c) && isalpha (c))
149 #define ISCNTRL(c) (isascii (c) && iscntrl (c))
150 #define ISLOWER(c) (isascii (c) && islower (c))
151 #define ISPUNCT(c) (isascii (c) && ispunct (c))
152 #define ISSPACE(c) (isascii (c) && isspace (c))
153 #define ISUPPER(c) (isascii (c) && isupper (c))
154 #define ISXDIGIT(c) (isascii (c) && isxdigit (c))
155 
156 #ifndef NULL
157 #define NULL 0
158 #endif
159 
160 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
161    since ours (we hope) works properly with all combinations of
162    machines, compilers, `char' and `unsigned char' argument types.
163    (Per Bothner suggested the basic approach.)  */
164 #undef SIGN_EXTEND_CHAR
165 #if __STDC__
166 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
167 #else  /* not __STDC__ */
168 /* As in Harbison and Steele.  */
169 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
170 #endif
171 
172 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
173    use `alloca' instead of `malloc'.  This is because using malloc in
174    re_search* or re_match* could cause memory leaks when C-g is used in
175    Emacs; also, malloc is slower and causes storage fragmentation.  On
176    the other hand, malloc is more portable, and easier to debug.
177 
178    Because we sometimes use alloca, some routines have to be macros,
179    not functions -- `alloca'-allocated space disappears at the end of the
180    function it is called in.  */
181 
182 #ifdef REGEX_MALLOC
183 
184 #define REGEX_ALLOCATE malloc
185 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
186 
187 #else /* not REGEX_MALLOC  */
188 
189 /* Emacs already defines alloca, sometimes.  */
190 #ifndef alloca
191 
192 /* Make alloca work the best possible way.  */
193 #ifdef __GNUC__
194 #define alloca __builtin_alloca
195 #else /* not __GNUC__ */
196 #if HAVE_ALLOCA_H
197 #include <alloca.h>
198 #else /* not __GNUC__ or HAVE_ALLOCA_H */
199 #ifndef _AIX /* Already did AIX, up at the top.  */
200 char *alloca ();
201 #endif /* not _AIX */
202 #endif /* not HAVE_ALLOCA_H */
203 #endif /* not __GNUC__ */
204 
205 #endif /* not alloca */
206 
207 #define REGEX_ALLOCATE alloca
208 
209 /* Assumes a `char *destination' variable.  */
210 #define REGEX_REALLOCATE(source, osize, nsize)				\
211   (destination = (char *) alloca (nsize),				\
212    bcopy (source, destination, osize),					\
213    destination)
214 
215 #endif /* not REGEX_MALLOC */
216 
217 
218 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
219    `string1' or just past its end.  This works if PTR is NULL, which is
220    a good thing.  */
221 #define FIRST_STRING_P(ptr) 					\
222   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
223 
224 /* (Re)Allocate N items of type T using malloc, or fail.  */
225 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
226 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
227 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
228 
229 #define BYTEWIDTH 8 /* In bits.  */
230 
231 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
232 
233 #define MAX(a, b) ((a) > (b) ? (a) : (b))
234 #define MIN(a, b) ((a) < (b) ? (a) : (b))
235 
236 typedef char boolean;
237 #define false 0
238 #define true 1
239 
240 /* These are the command codes that appear in compiled regular
241    expressions.  Some opcodes are followed by argument bytes.  A
242    command code can specify any interpretation whatsoever for its
243    arguments.  Zero bytes may appear in the compiled regular expression.
244 
245    The value of `exactn' is needed in search.c (search_buffer) in Emacs.
246    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
247    `exactn' we use here must also be 1.  */
248 
249 typedef enum
250 {
251   no_op = 0,
252 
253         /* Followed by one byte giving n, then by n literal bytes.  */
254   exactn = 1,
255 
256         /* Matches any (more or less) character.  */
257   anychar,
258 
259         /* Matches any one char belonging to specified set.  First
260            following byte is number of bitmap bytes.  Then come bytes
261            for a bitmap saying which chars are in.  Bits in each byte
262            are ordered low-bit-first.  A character is in the set if its
263            bit is 1.  A character too large to have a bit in the map is
264            automatically not in the set.  */
265   charset,
266 
267         /* Same parameters as charset, but match any character that is
268            not one of those specified.  */
269   charset_not,
270 
271         /* Start remembering the text that is matched, for storing in a
272            register.  Followed by one byte with the register number, in
273            the range 0 to one less than the pattern buffer's re_nsub
274            field.  Then followed by one byte with the number of groups
275            inner to this one.  (This last has to be part of the
276            start_memory only because we need it in the on_failure_jump
277            of re_match_2.)  */
278   start_memory,
279 
280         /* Stop remembering the text that is matched and store it in a
281            memory register.  Followed by one byte with the register
282            number, in the range 0 to one less than `re_nsub' in the
283            pattern buffer, and one byte with the number of inner groups,
284            just like `start_memory'.  (We need the number of inner
285            groups here because we don't have any easy way of finding the
286            corresponding start_memory when we're at a stop_memory.)  */
287   stop_memory,
288 
289         /* Match a duplicate of something remembered. Followed by one
290            byte containing the register number.  */
291   duplicate,
292 
293         /* Fail unless at beginning of line.  */
294   begline,
295 
296         /* Fail unless at end of line.  */
297   endline,
298 
299         /* Succeeds if at beginning of buffer (if emacs) or at beginning
300            of string to be matched (if not).  */
301   begbuf,
302 
303         /* Analogously, for end of buffer/string.  */
304   endbuf,
305 
306         /* Followed by two byte relative address to which to jump.  */
307   jump,
308 
309 	/* Same as jump, but marks the end of an alternative.  */
310   jump_past_alt,
311 
312         /* Followed by two-byte relative address of place to resume at
313            in case of failure.  */
314   on_failure_jump,
315 
316         /* Like on_failure_jump, but pushes a placeholder instead of the
317            current string position when executed.  */
318   on_failure_keep_string_jump,
319 
320         /* Throw away latest failure point and then jump to following
321            two-byte relative address.  */
322   pop_failure_jump,
323 
324         /* Change to pop_failure_jump if know won't have to backtrack to
325            match; otherwise change to jump.  This is used to jump
326            back to the beginning of a repeat.  If what follows this jump
327            clearly won't match what the repeat does, such that we can be
328            sure that there is no use backtracking out of repetitions
329            already matched, then we change it to a pop_failure_jump.
330            Followed by two-byte address.  */
331   maybe_pop_jump,
332 
333         /* Jump to following two-byte address, and push a dummy failure
334            point. This failure point will be thrown away if an attempt
335            is made to use it for a failure.  A `+' construct makes this
336            before the first repeat.  Also used as an intermediary kind
337            of jump when compiling an alternative.  */
338   dummy_failure_jump,
339 
340 	/* Push a dummy failure point and continue.  Used at the end of
341 	   alternatives.  */
342   push_dummy_failure,
343 
344         /* Followed by two-byte relative address and two-byte number n.
345            After matching N times, jump to the address upon failure.  */
346   succeed_n,
347 
348         /* Followed by two-byte relative address, and two-byte number n.
349            Jump to the address N times, then fail.  */
350   jump_n,
351 
352         /* Set the following two-byte relative address to the
353            subsequent two-byte number.  The address *includes* the two
354            bytes of number.  */
355   set_number_at,
356 
357   wordchar,	/* Matches any word-constituent character.  */
358   notwordchar,	/* Matches any char that is not a word-constituent.  */
359 
360   wordbeg,	/* Succeeds if at word beginning.  */
361   wordend,	/* Succeeds if at word end.  */
362 
363   wordbound,	/* Succeeds if at a word boundary.  */
364   notwordbound	/* Succeeds if not at a word boundary.  */
365 
366 #ifdef emacs
367   ,before_dot,	/* Succeeds if before point.  */
368   at_dot,	/* Succeeds if at point.  */
369   after_dot,	/* Succeeds if after point.  */
370 
371 	/* Matches any character whose syntax is specified.  Followed by
372            a byte which contains a syntax code, e.g., Sword.  */
373   syntaxspec,
374 
375 	/* Matches any character whose syntax is not that specified.  */
376   notsyntaxspec
377 #endif /* emacs */
378 } re_opcode_t;
379 
380 /* Common operations on the compiled pattern.  */
381 
382 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
383 
384 #define STORE_NUMBER(destination, number)				\
385   do {									\
386     (destination)[0] = (number) & 0377;					\
387     (destination)[1] = (number) >> 8;					\
388   } while (0)
389 
390 /* Same as STORE_NUMBER, except increment DESTINATION to
391    the byte after where the number is stored.  Therefore, DESTINATION
392    must be an lvalue.  */
393 
394 #define STORE_NUMBER_AND_INCR(destination, number)			\
395   do {									\
396     STORE_NUMBER (destination, number);					\
397     (destination) += 2;							\
398   } while (0)
399 
400 /* Put into DESTINATION a number stored in two contiguous bytes starting
401    at SOURCE.  */
402 
403 #define EXTRACT_NUMBER(destination, source)				\
404   do {									\
405     (destination) = *(source) & 0377;					\
406     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
407   } while (0)
408 
409 #ifdef DEBUG
410 static void
extract_number(dest,source)411 extract_number (dest, source)
412     int *dest;
413     unsigned char *source;
414 {
415   int temp = SIGN_EXTEND_CHAR (*(source + 1));
416   *dest = *source & 0377;
417   *dest += temp << 8;
418 }
419 
420 #ifndef EXTRACT_MACROS /* To debug the macros.  */
421 #undef EXTRACT_NUMBER
422 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
423 #endif /* not EXTRACT_MACROS */
424 
425 #endif /* DEBUG */
426 
427 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
428    SOURCE must be an lvalue.  */
429 
430 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
431   do {									\
432     EXTRACT_NUMBER (destination, source);				\
433     (source) += 2; 							\
434   } while (0)
435 
436 #ifdef DEBUG
437 static void
extract_number_and_incr(destination,source)438 extract_number_and_incr (destination, source)
439     int *destination;
440     unsigned char **source;
441 {
442   extract_number (destination, *source);
443   *source += 2;
444 }
445 
446 #ifndef EXTRACT_MACROS
447 #undef EXTRACT_NUMBER_AND_INCR
448 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
449   extract_number_and_incr (&dest, &src)
450 #endif /* not EXTRACT_MACROS */
451 
452 #endif /* DEBUG */
453 
454 /* If DEBUG is defined, Regex prints many voluminous messages about what
455    it is doing (if the variable `debug' is nonzero).  If linked with the
456    main program in `iregex.c', you can enter patterns and strings
457    interactively.  And if linked with the main program in `main.c' and
458    the other test files, you can run the already-written tests.  */
459 
460 #ifdef DEBUG
461 
462 /* We use standard I/O for debugging.  */
463 #include <stdio.h>
464 
465 /* It is useful to test things that ``must'' be true when debugging.  */
466 #include <assert.h>
467 
468 static int debug = 0;
469 
470 #define DEBUG_STATEMENT(e) e
471 #define DEBUG_PRINT1(x) if (debug) printf (x)
472 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
473 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
474 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
475 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
476   if (debug) print_partial_compiled_pattern (s, e)
477 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
478   if (debug) print_double_string (w, s1, sz1, s2, sz2)
479 
480 
481 extern void printchar ();
482 
483 /* Print the fastmap in human-readable form.  */
484 
485 void
print_fastmap(fastmap)486 print_fastmap (fastmap)
487     char *fastmap;
488 {
489   unsigned was_a_range = 0;
490   unsigned i = 0;
491 
492   while (i < (1 << BYTEWIDTH))
493     {
494       if (fastmap[i++])
495 	{
496 	  was_a_range = 0;
497           printchar (i - 1);
498           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
499             {
500               was_a_range = 1;
501               i++;
502             }
503 	  if (was_a_range)
504             {
505               printf ("-");
506               printchar (i - 1);
507             }
508         }
509     }
510   putchar ('\n');
511 }
512 
513 
514 /* Print a compiled pattern string in human-readable form, starting at
515    the START pointer into it and ending just before the pointer END.  */
516 
517 void
print_partial_compiled_pattern(start,end)518 print_partial_compiled_pattern (start, end)
519     unsigned char *start;
520     unsigned char *end;
521 {
522   int mcnt, mcnt2;
523   unsigned char *p = start;
524   unsigned char *pend = end;
525 
526   if (start == NULL)
527     {
528       printf ("(null)\n");
529       return;
530     }
531 
532   /* Loop over pattern commands.  */
533   while (p < pend)
534     {
535       switch ((re_opcode_t) *p++)
536 	{
537         case no_op:
538           printf ("/no_op");
539           break;
540 
541 	case exactn:
542 	  mcnt = *p++;
543           printf ("/exactn/%d", mcnt);
544           do
545 	    {
546               putchar ('/');
547 	      printchar (*p++);
548             }
549           while (--mcnt);
550           break;
551 
552 	case start_memory:
553           mcnt = *p++;
554           printf ("/start_memory/%d/%d", mcnt, *p++);
555           break;
556 
557 	case stop_memory:
558           mcnt = *p++;
559 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
560           break;
561 
562 	case duplicate:
563 	  printf ("/duplicate/%d", *p++);
564 	  break;
565 
566 	case anychar:
567 	  printf ("/anychar");
568 	  break;
569 
570 	case charset:
571         case charset_not:
572           {
573             register int c;
574 
575             printf ("/charset%s",
576 	            (re_opcode_t) *(p - 1) == charset_not ? "_not" : "");
577 
578             assert (p + *p < pend);
579 
580             for (c = 0; c < *p; c++)
581               {
582                 unsigned bit;
583                 unsigned char map_byte = p[1 + c];
584 
585                 putchar ('/');
586 
587 		for (bit = 0; bit < BYTEWIDTH; bit++)
588                   if (map_byte & (1 << bit))
589                     printchar (c * BYTEWIDTH + bit);
590               }
591 	    p += 1 + *p;
592 	    break;
593 	  }
594 
595 	case begline:
596 	  printf ("/begline");
597           break;
598 
599 	case endline:
600           printf ("/endline");
601           break;
602 
603 	case on_failure_jump:
604           extract_number_and_incr (&mcnt, &p);
605   	  printf ("/on_failure_jump/0/%d", mcnt);
606           break;
607 
608 	case on_failure_keep_string_jump:
609           extract_number_and_incr (&mcnt, &p);
610   	  printf ("/on_failure_keep_string_jump/0/%d", mcnt);
611           break;
612 
613 	case dummy_failure_jump:
614           extract_number_and_incr (&mcnt, &p);
615   	  printf ("/dummy_failure_jump/0/%d", mcnt);
616           break;
617 
618 	case push_dummy_failure:
619           printf ("/push_dummy_failure");
620           break;
621 
622         case maybe_pop_jump:
623           extract_number_and_incr (&mcnt, &p);
624   	  printf ("/maybe_pop_jump/0/%d", mcnt);
625 	  break;
626 
627         case pop_failure_jump:
628 	  extract_number_and_incr (&mcnt, &p);
629   	  printf ("/pop_failure_jump/0/%d", mcnt);
630 	  break;
631 
632         case jump_past_alt:
633 	  extract_number_and_incr (&mcnt, &p);
634   	  printf ("/jump_past_alt/0/%d", mcnt);
635 	  break;
636 
637         case jump:
638 	  extract_number_and_incr (&mcnt, &p);
639   	  printf ("/jump/0/%d", mcnt);
640 	  break;
641 
642         case succeed_n:
643           extract_number_and_incr (&mcnt, &p);
644           extract_number_and_incr (&mcnt2, &p);
645  	  printf ("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
646           break;
647 
648         case jump_n:
649           extract_number_and_incr (&mcnt, &p);
650           extract_number_and_incr (&mcnt2, &p);
651  	  printf ("/jump_n/0/%d/0/%d", mcnt, mcnt2);
652           break;
653 
654         case set_number_at:
655           extract_number_and_incr (&mcnt, &p);
656           extract_number_and_incr (&mcnt2, &p);
657  	  printf ("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
658           break;
659 
660         case wordbound:
661 	  printf ("/wordbound");
662 	  break;
663 
664 	case notwordbound:
665 	  printf ("/notwordbound");
666           break;
667 
668 	case wordbeg:
669 	  printf ("/wordbeg");
670 	  break;
671 
672 	case wordend:
673 	  printf ("/wordend");
674 
675 #ifdef emacs
676 	case before_dot:
677 	  printf ("/before_dot");
678           break;
679 
680 	case at_dot:
681 	  printf ("/at_dot");
682           break;
683 
684 	case after_dot:
685 	  printf ("/after_dot");
686           break;
687 
688 	case syntaxspec:
689           printf ("/syntaxspec");
690 	  mcnt = *p++;
691 	  printf ("/%d", mcnt);
692           break;
693 
694 	case notsyntaxspec:
695           printf ("/notsyntaxspec");
696 	  mcnt = *p++;
697 	  printf ("/%d", mcnt);
698 	  break;
699 #endif /* emacs */
700 
701 	case wordchar:
702 	  printf ("/wordchar");
703           break;
704 
705 	case notwordchar:
706 	  printf ("/notwordchar");
707           break;
708 
709 	case begbuf:
710 	  printf ("/begbuf");
711           break;
712 
713 	case endbuf:
714 	  printf ("/endbuf");
715           break;
716 
717         default:
718           printf ("?%d", *(p-1));
719 	}
720     }
721   printf ("/\n");
722 }
723 
724 
725 void
print_compiled_pattern(bufp)726 print_compiled_pattern (bufp)
727     struct re_pattern_buffer *bufp;
728 {
729   unsigned char *buffer = bufp->buffer;
730 
731   print_partial_compiled_pattern (buffer, buffer + bufp->used);
732   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
733 
734   if (bufp->fastmap_accurate && bufp->fastmap)
735     {
736       printf ("fastmap: ");
737       print_fastmap (bufp->fastmap);
738     }
739 
740   printf ("re_nsub: %d\t", bufp->re_nsub);
741   printf ("regs_alloc: %d\t", bufp->regs_allocated);
742   printf ("can_be_null: %d\t", bufp->can_be_null);
743   printf ("newline_anchor: %d\n", bufp->newline_anchor);
744   printf ("no_sub: %d\t", bufp->no_sub);
745   printf ("not_bol: %d\t", bufp->not_bol);
746   printf ("not_eol: %d\t", bufp->not_eol);
747   printf ("syntax: %d\n", bufp->syntax);
748   /* Perhaps we should print the translate table?  */
749 }
750 
751 
752 void
print_double_string(where,string1,size1,string2,size2)753 print_double_string (where, string1, size1, string2, size2)
754     const char *where;
755     const char *string1;
756     const char *string2;
757     int size1;
758     int size2;
759 {
760   unsigned this_char;
761 
762   if (where == NULL)
763     printf ("(null)");
764   else
765     {
766       if (FIRST_STRING_P (where))
767         {
768           for (this_char = where - string1; this_char < size1; this_char++)
769             printchar (string1[this_char]);
770 
771           where = string2;
772         }
773 
774       for (this_char = where - string2; this_char < size2; this_char++)
775         printchar (string2[this_char]);
776     }
777 }
778 
779 #else /* not DEBUG */
780 
781 #undef assert
782 #define assert(e)
783 
784 #define DEBUG_STATEMENT(e)
785 #define DEBUG_PRINT1(x)
786 #define DEBUG_PRINT2(x1, x2)
787 #define DEBUG_PRINT3(x1, x2, x3)
788 #define DEBUG_PRINT4(x1, x2, x3, x4)
789 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
790 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
791 
792 #endif /* not DEBUG */
793 
794 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
795    also be assigned to arbitrarily: each pattern buffer stores its own
796    syntax, so it can be changed between regex compilations.  */
797 reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
798 
799 
800 /* Specify the precise syntax of regexps for compilation.  This provides
801    for compatibility for various utilities which historically have
802    different, incompatible syntaxes.
803 
804    The argument SYNTAX is a bit mask comprised of the various bits
805    defined in regex.h.  We return the old syntax.  */
806 
807 reg_syntax_t
re_set_syntax(syntax)808 re_set_syntax (syntax)
809     reg_syntax_t syntax;
810 {
811   reg_syntax_t ret = re_syntax_options;
812 
813   re_syntax_options = syntax;
814   return ret;
815 }
816 
817 /* This table gives an error message for each of the error codes listed
818    in regex.h.  Obviously the order here has to be same as there.  */
819 
820 static const char *re_error_msg[] =
821   { NULL,					/* REG_NOERROR */
822     "No match",					/* REG_NOMATCH */
823     "Invalid regular expression",		/* REG_BADPAT */
824     "Invalid collation character",		/* REG_ECOLLATE */
825     "Invalid character class name",		/* REG_ECTYPE */
826     "Trailing backslash",			/* REG_EESCAPE */
827     "Invalid back reference",			/* REG_ESUBREG */
828     "Unmatched [ or [^",			/* REG_EBRACK */
829     "Unmatched ( or \\(",			/* REG_EPAREN */
830     "Unmatched \\{",				/* REG_EBRACE */
831     "Invalid content of \\{\\}",		/* REG_BADBR */
832     "Invalid range end",			/* REG_ERANGE */
833     "Memory exhausted",				/* REG_ESPACE */
834     "Invalid preceding regular expression",	/* REG_BADRPT */
835     "Premature end of regular expression",	/* REG_EEND */
836     "Regular expression too big",		/* REG_ESIZE */
837     "Unmatched ) or \\)",			/* REG_ERPAREN */
838   };
839 
840 /* Subroutine declarations and macros for regex_compile.  */
841 
842 static void store_op1 (), store_op2 ();
843 static void insert_op1 (), insert_op2 ();
844 static boolean at_begline_loc_p (), at_endline_loc_p ();
845 static boolean group_in_compile_stack ();
846 static reg_errcode_t compile_range ();
847 
848 /* Fetch the next character in the uncompiled pattern---translating it
849    if necessary.  Also cast from a signed character in the constant
850    string passed to us by the user to an unsigned char that we can use
851    as an array index (in, e.g., `translate').  */
852 #define PATFETCH(c)							\
853   do {if (p == pend) return REG_EEND;					\
854     c = (unsigned char) *p++;						\
855     if (translate) c = translate[c]; 					\
856   } while (0)
857 
858 /* Fetch the next character in the uncompiled pattern, with no
859    translation.  */
860 #define PATFETCH_RAW(c)							\
861   do {if (p == pend) return REG_EEND;					\
862     c = (unsigned char) *p++; 						\
863   } while (0)
864 
865 /* Go backwards one character in the pattern.  */
866 #define PATUNFETCH p--
867 
868 
869 /* If `translate' is non-null, return translate[D], else just D.  We
870    cast the subscript to translate because some data is declared as
871    `char *', to avoid warnings when a string constant is passed.  But
872    when we use a character as a subscript we must make it unsigned.  */
873 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
874 
875 
876 /* Macros for outputting the compiled pattern into `buffer'.  */
877 
878 /* If the buffer isn't allocated when it comes in, use this.  */
879 #define INIT_BUF_SIZE  32
880 
881 /* Make sure we have at least N more bytes of space in buffer.  */
882 #define GET_BUFFER_SPACE(n)						\
883     while (b - bufp->buffer + (n) > bufp->allocated)			\
884       EXTEND_BUFFER ()
885 
886 /* Make sure we have one more byte of buffer space and then add C to it.  */
887 #define BUF_PUSH(c)							\
888   do {									\
889     GET_BUFFER_SPACE (1);						\
890     *b++ = (unsigned char) (c);						\
891   } while (0)
892 
893 
894 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
895 #define BUF_PUSH_2(c1, c2)						\
896   do {									\
897     GET_BUFFER_SPACE (2);						\
898     *b++ = (unsigned char) (c1);					\
899     *b++ = (unsigned char) (c2);					\
900   } while (0)
901 
902 
903 /* As with BUF_PUSH_2, except for three bytes.  */
904 #define BUF_PUSH_3(c1, c2, c3)						\
905   do {									\
906     GET_BUFFER_SPACE (3);						\
907     *b++ = (unsigned char) (c1);					\
908     *b++ = (unsigned char) (c2);					\
909     *b++ = (unsigned char) (c3);					\
910   } while (0)
911 
912 
913 /* Store a jump with opcode OP at LOC to location TO.  We store a
914    relative address offset by the three bytes the jump itself occupies.  */
915 #define STORE_JUMP(op, loc, to) \
916   store_op1 (op, loc, (to) - (loc) - 3)
917 
918 /* Likewise, for a two-argument jump.  */
919 #define STORE_JUMP2(op, loc, to, arg) \
920   store_op2 (op, loc, (to) - (loc) - 3, arg)
921 
922 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
923 #define INSERT_JUMP(op, loc, to) \
924   insert_op1 (op, loc, (to) - (loc) - 3, b)
925 
926 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
927 #define INSERT_JUMP2(op, loc, to, arg) \
928   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
929 
930 
931 /* This is not an arbitrary limit: the arguments which represent offsets
932    into the pattern are two bytes long.  So if 2^16 bytes turns out to
933    be too small, many things would have to change.  */
934 #define MAX_BUF_SIZE (1L << 16)
935 
936 
937 /* Extend the buffer by twice its current size via realloc and
938    reset the pointers that pointed into the old block to point to the
939    correct places in the new one.  If extending the buffer results in it
940    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
941 #define EXTEND_BUFFER()							\
942   do { 									\
943     unsigned char *old_buffer = bufp->buffer;				\
944     if (bufp->allocated == MAX_BUF_SIZE) 				\
945       return REG_ESIZE;							\
946     bufp->allocated <<= 1;						\
947     if (bufp->allocated > MAX_BUF_SIZE)					\
948       bufp->allocated = MAX_BUF_SIZE; 					\
949     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
950     if (bufp->buffer == NULL)						\
951       return REG_ESPACE;						\
952     /* If the buffer moved, move all the pointers into it.  */		\
953     if (old_buffer != bufp->buffer)					\
954       {									\
955         b = (b - old_buffer) + bufp->buffer;				\
956         begalt = (begalt - old_buffer) + bufp->buffer;			\
957         if (fixup_alt_jump)						\
958           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
959         if (laststart)							\
960           laststart = (laststart - old_buffer) + bufp->buffer;		\
961         if (pending_exact)						\
962           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
963       }									\
964   } while (0)
965 
966 
967 /* Since we have one byte reserved for the register number argument to
968    {start,stop}_memory, the maximum number of groups we can report
969    things about is what fits in that byte.  */
970 #define MAX_REGNUM 255
971 
972 /* But patterns can have more than `MAX_REGNUM' registers.  We just
973    ignore the excess.  */
974 typedef unsigned regnum_t;
975 
976 
977 /* Macros for the compile stack.  */
978 
979 /* Since offsets can go either forwards or backwards, this type needs to
980    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
981 typedef int pattern_offset_t;
982 
983 typedef struct
984 {
985   pattern_offset_t begalt_offset;
986   pattern_offset_t fixup_alt_jump;
987   pattern_offset_t inner_group_offset;
988   pattern_offset_t laststart_offset;
989   regnum_t regnum;
990 } compile_stack_elt_t;
991 
992 
993 typedef struct
994 {
995   compile_stack_elt_t *stack;
996   unsigned size;
997   unsigned avail;			/* Offset of next open position.  */
998 } compile_stack_type;
999 
1000 
1001 #define INIT_COMPILE_STACK_SIZE 32
1002 
1003 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1004 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1005 
1006 /* The next available element.  */
1007 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1008 
1009 
1010 /* Set the bit for character C in a list.  */
1011 #define SET_LIST_BIT(c)                               \
1012   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1013    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1014 
1015 
1016 /* Get the next unsigned number in the uncompiled pattern.  */
1017 #define GET_UNSIGNED_NUMBER(num) 					\
1018   { if (p != pend)							\
1019      {									\
1020        PATFETCH (c); 							\
1021        while (ISDIGIT (c)) 						\
1022          { 								\
1023            if (num < 0)							\
1024               num = 0;							\
1025            num = num * 10 + c - '0'; 					\
1026            if (p == pend) 						\
1027               break; 							\
1028            PATFETCH (c);						\
1029          } 								\
1030        } 								\
1031     }
1032 
1033 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1034 
1035 #define IS_CHAR_CLASS(string)						\
1036    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1037     || STREQ (string, "lower") || STREQ (string, "digit")		\
1038     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1039     || STREQ (string, "space") || STREQ (string, "print")		\
1040     || STREQ (string, "punct") || STREQ (string, "graph")		\
1041     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1042 
1043 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1044    Returns one of error codes defined in `regex.h', or zero for success.
1045 
1046    Assumes the `allocated' (and perhaps `buffer') and `translate'
1047    fields are set in BUFP on entry.
1048 
1049    If it succeeds, results are put in BUFP (if it returns an error, the
1050    contents of BUFP are undefined):
1051      `buffer' is the compiled pattern;
1052      `syntax' is set to SYNTAX;
1053      `used' is set to the length of the compiled pattern;
1054      `fastmap_accurate' is zero;
1055      `re_nsub' is the number of subexpressions in PATTERN;
1056      `not_bol' and `not_eol' are zero;
1057 
1058    The `fastmap' and `newline_anchor' fields are neither
1059    examined nor set.  */
1060 
1061 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1062 regex_compile (pattern, size, syntax, bufp)
1063      const char *pattern;
1064      int size;
1065      reg_syntax_t syntax;
1066      struct re_pattern_buffer *bufp;
1067 {
1068   /* We fetch characters from PATTERN here.  Even though PATTERN is
1069      `char *' (i.e., signed), we declare these variables as unsigned, so
1070      they can be reliably used as array indices.  */
1071   register unsigned char c, c1;
1072 
1073   /* A random tempory spot in PATTERN.  */
1074   const char *p1;
1075 
1076   /* Points to the end of the buffer, where we should append.  */
1077   register unsigned char *b;
1078 
1079   /* Keeps track of unclosed groups.  */
1080   compile_stack_type compile_stack;
1081 
1082   /* Points to the current (ending) position in the pattern.  */
1083   const char *p = pattern;
1084   const char *pend = pattern + size;
1085 
1086   /* How to translate the characters in the pattern.  */
1087   char *translate = bufp->translate;
1088 
1089   /* Address of the count-byte of the most recently inserted `exactn'
1090      command.  This makes it possible to tell if a new exact-match
1091      character can be added to that command or if the character requires
1092      a new `exactn' command.  */
1093   unsigned char *pending_exact = 0;
1094 
1095   /* Address of start of the most recently finished expression.
1096      This tells, e.g., postfix * where to find the start of its
1097      operand.  Reset at the beginning of groups and alternatives.  */
1098   unsigned char *laststart = 0;
1099 
1100   /* Address of beginning of regexp, or inside of last group.  */
1101   unsigned char *begalt;
1102 
1103   /* Place in the uncompiled pattern (i.e., the {) to
1104      which to go back if the interval is invalid.  */
1105   const char *beg_interval;
1106 
1107   /* Address of the place where a forward jump should go to the end of
1108      the containing expression.  Each alternative of an `or' -- except the
1109      last -- ends with a forward jump of this sort.  */
1110   unsigned char *fixup_alt_jump = 0;
1111 
1112   /* Counts open-groups as they are encountered.  Remembered for the
1113      matching close-group on the compile stack, so the same register
1114      number is put in the stop_memory as the start_memory.  */
1115   regnum_t regnum = 0;
1116 
1117 #ifdef DEBUG
1118   DEBUG_PRINT1 ("\nCompiling pattern: ");
1119   if (debug)
1120     {
1121       unsigned debug_count;
1122 
1123       for (debug_count = 0; debug_count < size; debug_count++)
1124         printchar (pattern[debug_count]);
1125       putchar ('\n');
1126     }
1127 #endif /* DEBUG */
1128 
1129   /* Initialize the compile stack.  */
1130   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1131   if (compile_stack.stack == NULL)
1132     return REG_ESPACE;
1133 
1134   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1135   compile_stack.avail = 0;
1136 
1137   /* Initialize the pattern buffer.  */
1138   bufp->syntax = syntax;
1139   bufp->fastmap_accurate = 0;
1140   bufp->not_bol = bufp->not_eol = 0;
1141 
1142   /* Set `used' to zero, so that if we return an error, the pattern
1143      printer (for debugging) will think there's no pattern.  We reset it
1144      at the end.  */
1145   bufp->used = 0;
1146 
1147   /* Always count groups, whether or not bufp->no_sub is set.  */
1148   bufp->re_nsub = 0;
1149 
1150 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1151   /* Initialize the syntax table.  */
1152    init_syntax_once ();
1153 #endif
1154 
1155   if (bufp->allocated == 0)
1156     {
1157       if (bufp->buffer)
1158 	{ /* If zero allocated, but buffer is non-null, try to realloc
1159              enough space.  This loses if buffer's address is bogus, but
1160              that is the user's responsibility.  */
1161           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1162         }
1163       else
1164         { /* Caller did not allocate a buffer.  Do it for them.  */
1165           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1166         }
1167       if (!bufp->buffer) return REG_ESPACE;
1168 
1169       bufp->allocated = INIT_BUF_SIZE;
1170     }
1171 
1172   begalt = b = bufp->buffer;
1173 
1174   /* Loop through the uncompiled pattern until we're at the end.  */
1175   while (p != pend)
1176     {
1177       PATFETCH (c);
1178 
1179       switch (c)
1180         {
1181         case '^':
1182           {
1183             if (   /* If at start of pattern, it's an operator.  */
1184                    p == pattern + 1
1185                    /* If context independent, it's an operator.  */
1186                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1187                    /* Otherwise, depends on what's come before.  */
1188                 || at_begline_loc_p (pattern, p, syntax))
1189               BUF_PUSH (begline);
1190             else
1191               goto normal_char;
1192           }
1193           break;
1194 
1195 
1196         case '$':
1197           {
1198             if (   /* If at end of pattern, it's an operator.  */
1199                    p == pend
1200                    /* If context independent, it's an operator.  */
1201                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1202                    /* Otherwise, depends on what's next.  */
1203                 || at_endline_loc_p (p, pend, syntax))
1204                BUF_PUSH (endline);
1205              else
1206                goto normal_char;
1207            }
1208            break;
1209 
1210 
1211 	case '+':
1212         case '?':
1213           if ((syntax & RE_BK_PLUS_QM)
1214               || (syntax & RE_LIMITED_OPS))
1215             goto normal_char;
1216         handle_plus:
1217         case '*':
1218           /* If there is no previous pattern... */
1219           if (!laststart)
1220             {
1221               if (syntax & RE_CONTEXT_INVALID_OPS)
1222                 return REG_BADRPT;
1223               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1224                 goto normal_char;
1225             }
1226 
1227           {
1228             /* Are we optimizing this jump?  */
1229             boolean keep_string_p = false;
1230 
1231             /* 1 means zero (many) matches is allowed.  */
1232             char zero_times_ok = 0, many_times_ok = 0;
1233 
1234             /* If there is a sequence of repetition chars, collapse it
1235                down to just one (the right one).  We can't combine
1236                interval operators with these because of, e.g., `a{2}*',
1237                which should only match an even number of `a's.  */
1238 
1239             for (;;)
1240               {
1241                 zero_times_ok |= c != '+';
1242                 many_times_ok |= c != '?';
1243 
1244                 if (p == pend)
1245                   break;
1246 
1247                 PATFETCH (c);
1248 
1249                 if (c == '*'
1250                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1251                   ;
1252 
1253                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1254                   {
1255                     if (p == pend) return REG_EESCAPE;
1256 
1257                     PATFETCH (c1);
1258                     if (!(c1 == '+' || c1 == '?'))
1259                       {
1260                         PATUNFETCH;
1261                         PATUNFETCH;
1262                         break;
1263                       }
1264 
1265                     c = c1;
1266                   }
1267                 else
1268                   {
1269                     PATUNFETCH;
1270                     break;
1271                   }
1272 
1273                 /* If we get here, we found another repeat character.  */
1274                }
1275 
1276             /* Star, etc. applied to an empty pattern is equivalent
1277                to an empty pattern.  */
1278             if (!laststart)
1279               break;
1280 
1281             /* Now we know whether or not zero matches is allowed
1282                and also whether or not two or more matches is allowed.  */
1283             if (many_times_ok)
1284               { /* More than one repetition is allowed, so put in at the
1285                    end a backward relative jump from `b' to before the next
1286                    jump we're going to put in below (which jumps from
1287                    laststart to after this jump).
1288 
1289                    But if we are at the `*' in the exact sequence `.*\n',
1290                    insert an unconditional jump backwards to the .,
1291                    instead of the beginning of the loop.  This way we only
1292                    push a failure point once, instead of every time
1293                    through the loop.  */
1294                 assert (p - 1 > pattern);
1295 
1296                 /* Allocate the space for the jump.  */
1297                 GET_BUFFER_SPACE (3);
1298 
1299                 /* We know we are not at the first character of the pattern,
1300                    because laststart was nonzero.  And we've already
1301                    incremented `p', by the way, to be the character after
1302                    the `*'.  Do we have to do something analogous here
1303                    for null bytes, because of RE_DOT_NOT_NULL?  */
1304                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1305 		    && zero_times_ok
1306                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1307                     && !(syntax & RE_DOT_NEWLINE))
1308                   { /* We have .*\n.  */
1309                     STORE_JUMP (jump, b, laststart);
1310                     keep_string_p = true;
1311                   }
1312                 else
1313                   /* Anything else.  */
1314                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1315 
1316                 /* We've added more stuff to the buffer.  */
1317                 b += 3;
1318               }
1319 
1320             /* On failure, jump from laststart to b + 3, which will be the
1321                end of the buffer after this jump is inserted.  */
1322             GET_BUFFER_SPACE (3);
1323             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1324                                        : on_failure_jump,
1325                          laststart, b + 3);
1326             pending_exact = 0;
1327             b += 3;
1328 
1329             if (!zero_times_ok)
1330               {
1331                 /* At least one repetition is required, so insert a
1332                    `dummy_failure_jump' before the initial
1333                    `on_failure_jump' instruction of the loop. This
1334                    effects a skip over that instruction the first time
1335                    we hit that loop.  */
1336                 GET_BUFFER_SPACE (3);
1337                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1338                 b += 3;
1339               }
1340             }
1341 	  break;
1342 
1343 
1344 	case '.':
1345           laststart = b;
1346           BUF_PUSH (anychar);
1347           break;
1348 
1349 
1350         case '[':
1351           {
1352             boolean had_char_class = false;
1353 
1354             if (p == pend) return REG_EBRACK;
1355 
1356             /* Ensure that we have enough space to push a charset: the
1357                opcode, the length count, and the bitset; 34 bytes in all.  */
1358 	    GET_BUFFER_SPACE (34);
1359 
1360             laststart = b;
1361 
1362             /* We test `*p == '^' twice, instead of using an if
1363                statement, so we only need one BUF_PUSH.  */
1364             BUF_PUSH (*p == '^' ? charset_not : charset);
1365             if (*p == '^')
1366               p++;
1367 
1368             /* Remember the first position in the bracket expression.  */
1369             p1 = p;
1370 
1371             /* Push the number of bytes in the bitmap.  */
1372             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1373 
1374             /* Clear the whole map.  */
1375             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1376 
1377             /* charset_not matches newline according to a syntax bit.  */
1378             if ((re_opcode_t) b[-2] == charset_not
1379                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1380               SET_LIST_BIT ('\n');
1381 
1382             /* Read in characters and ranges, setting map bits.  */
1383             for (;;)
1384               {
1385                 if (p == pend) return REG_EBRACK;
1386 
1387                 PATFETCH (c);
1388 
1389                 /* \ might escape characters inside [...] and [^...].  */
1390                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1391                   {
1392                     if (p == pend) return REG_EESCAPE;
1393 
1394                     PATFETCH (c1);
1395                     SET_LIST_BIT (c1);
1396                     continue;
1397                   }
1398 
1399                 /* Could be the end of the bracket expression.  If it's
1400                    not (i.e., when the bracket expression is `[]' so
1401                    far), the ']' character bit gets set way below.  */
1402                 if (c == ']' && p != p1 + 1)
1403                   break;
1404 
1405                 /* Look ahead to see if it's a range when the last thing
1406                    was a character class.  */
1407                 if (had_char_class && c == '-' && *p != ']')
1408                   return REG_ERANGE;
1409 
1410                 /* Look ahead to see if it's a range when the last thing
1411                    was a character: if this is a hyphen not at the
1412                    beginning or the end of a list, then it's the range
1413                    operator.  */
1414                 if (c == '-'
1415                     && !(p - 2 >= pattern && p[-2] == '[')
1416                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1417                     && *p != ']')
1418                   {
1419                     reg_errcode_t ret
1420                       = compile_range (&p, pend, translate, syntax, b);
1421                     if (ret != REG_NOERROR) return ret;
1422                   }
1423 
1424                 else if (p[0] == '-' && p[1] != ']')
1425                   { /* This handles ranges made up of characters only.  */
1426                     reg_errcode_t ret;
1427 
1428 		    /* Move past the `-'.  */
1429                     PATFETCH (c1);
1430 
1431                     ret = compile_range (&p, pend, translate, syntax, b);
1432                     if (ret != REG_NOERROR) return ret;
1433                   }
1434 
1435                 /* See if we're at the beginning of a possible character
1436                    class.  */
1437 
1438                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1439                   { /* Leave room for the null.  */
1440                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1441 
1442                     PATFETCH (c);
1443                     c1 = 0;
1444 
1445                     /* If pattern is `[[:'.  */
1446                     if (p == pend) return REG_EBRACK;
1447 
1448                     for (;;)
1449                       {
1450                         PATFETCH (c);
1451                         if (c == ':' || c == ']' || p == pend
1452                             || c1 == CHAR_CLASS_MAX_LENGTH)
1453                           break;
1454                         str[c1++] = c;
1455                       }
1456                     str[c1] = '\0';
1457 
1458                     /* If isn't a word bracketed by `[:' and:`]':
1459                        undo the ending character, the letters, and leave
1460                        the leading `:' and `[' (but set bits for them).  */
1461                     if (c == ':' && *p == ']')
1462                       {
1463                         int ch;
1464                         boolean is_alnum = STREQ (str, "alnum");
1465                         boolean is_alpha = STREQ (str, "alpha");
1466                         boolean is_blank = STREQ (str, "blank");
1467                         boolean is_cntrl = STREQ (str, "cntrl");
1468                         boolean is_digit = STREQ (str, "digit");
1469                         boolean is_graph = STREQ (str, "graph");
1470                         boolean is_lower = STREQ (str, "lower");
1471                         boolean is_print = STREQ (str, "print");
1472                         boolean is_punct = STREQ (str, "punct");
1473                         boolean is_space = STREQ (str, "space");
1474                         boolean is_upper = STREQ (str, "upper");
1475                         boolean is_xdigit = STREQ (str, "xdigit");
1476 
1477                         if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
1478 
1479                         /* Throw away the ] at the end of the character
1480                            class.  */
1481                         PATFETCH (c);
1482 
1483                         if (p == pend) return REG_EBRACK;
1484 
1485                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
1486                           {
1487                             if (   (is_alnum  && ISALNUM (ch))
1488                                 || (is_alpha  && ISALPHA (ch))
1489                                 || (is_blank  && ISBLANK (ch))
1490                                 || (is_cntrl  && ISCNTRL (ch))
1491                                 || (is_digit  && ISDIGIT (ch))
1492                                 || (is_graph  && ISGRAPH (ch))
1493                                 || (is_lower  && ISLOWER (ch))
1494                                 || (is_print  && ISPRINT (ch))
1495                                 || (is_punct  && ISPUNCT (ch))
1496                                 || (is_space  && ISSPACE (ch))
1497                                 || (is_upper  && ISUPPER (ch))
1498                                 || (is_xdigit && ISXDIGIT (ch)))
1499                             SET_LIST_BIT (ch);
1500                           }
1501                         had_char_class = true;
1502                       }
1503                     else
1504                       {
1505                         c1++;
1506                         while (c1--)
1507                           PATUNFETCH;
1508                         SET_LIST_BIT ('[');
1509                         SET_LIST_BIT (':');
1510                         had_char_class = false;
1511                       }
1512                   }
1513                 else
1514                   {
1515                     had_char_class = false;
1516                     SET_LIST_BIT (c);
1517                   }
1518               }
1519 
1520             /* Discard any (non)matching list bytes that are all 0 at the
1521                end of the map.  Decrease the map-length byte too.  */
1522             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1523               b[-1]--;
1524             b += b[-1];
1525           }
1526           break;
1527 
1528 
1529 	case '(':
1530           if (syntax & RE_NO_BK_PARENS)
1531             goto handle_open;
1532           else
1533             goto normal_char;
1534 
1535 
1536         case ')':
1537           if (syntax & RE_NO_BK_PARENS)
1538             goto handle_close;
1539           else
1540             goto normal_char;
1541 
1542 
1543         case '\n':
1544           if (syntax & RE_NEWLINE_ALT)
1545             goto handle_alt;
1546           else
1547             goto normal_char;
1548 
1549 
1550 	case '|':
1551           if (syntax & RE_NO_BK_VBAR)
1552             goto handle_alt;
1553           else
1554             goto normal_char;
1555 
1556 
1557         case '{':
1558            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1559              goto handle_interval;
1560            else
1561              goto normal_char;
1562 
1563 
1564         case '\\':
1565           if (p == pend) return REG_EESCAPE;
1566 
1567           /* Do not translate the character after the \, so that we can
1568              distinguish, e.g., \B from \b, even if we normally would
1569              translate, e.g., B to b.  */
1570           PATFETCH_RAW (c);
1571 
1572           switch (c)
1573             {
1574             case '(':
1575               if (syntax & RE_NO_BK_PARENS)
1576                 goto normal_backslash;
1577 
1578             handle_open:
1579               bufp->re_nsub++;
1580               regnum++;
1581 
1582               if (COMPILE_STACK_FULL)
1583                 {
1584                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
1585                             compile_stack_elt_t);
1586                   if (compile_stack.stack == NULL) return REG_ESPACE;
1587 
1588                   compile_stack.size <<= 1;
1589                 }
1590 
1591               /* These are the values to restore when we hit end of this
1592                  group.  They are all relative offsets, so that if the
1593                  whole pattern moves because of realloc, they will still
1594                  be valid.  */
1595               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1596               COMPILE_STACK_TOP.fixup_alt_jump
1597                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1598               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1599               COMPILE_STACK_TOP.regnum = regnum;
1600 
1601               /* We will eventually replace the 0 with the number of
1602                  groups inner to this one.  But do not push a
1603                  start_memory for groups beyond the last one we can
1604                  represent in the compiled pattern.  */
1605               if (regnum <= MAX_REGNUM)
1606                 {
1607                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1608                   BUF_PUSH_3 (start_memory, regnum, 0);
1609                 }
1610 
1611               compile_stack.avail++;
1612 
1613               fixup_alt_jump = 0;
1614               laststart = 0;
1615               begalt = b;
1616 	      /* If we've reached MAX_REGNUM groups, then this open
1617 		 won't actually generate any code, so we'll have to
1618 		 clear pending_exact explicitly.  */
1619 	      pending_exact = 0;
1620               break;
1621 
1622 
1623             case ')':
1624               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
1625 
1626               if (COMPILE_STACK_EMPTY)
1627                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1628                   goto normal_backslash;
1629                 else
1630                   return REG_ERPAREN;
1631 
1632             handle_close:
1633               if (fixup_alt_jump)
1634                 { /* Push a dummy failure point at the end of the
1635                      alternative for a possible future
1636                      `pop_failure_jump' to pop.  See comments at
1637                      `push_dummy_failure' in `re_match_2'.  */
1638                   BUF_PUSH (push_dummy_failure);
1639 
1640                   /* We allocated space for this jump when we assigned
1641                      to `fixup_alt_jump', in the `handle_alt' case below.  */
1642                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
1643                 }
1644 
1645               /* See similar code for backslashed left paren above.  */
1646               if (COMPILE_STACK_EMPTY)
1647                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1648                   goto normal_char;
1649                 else
1650                   return REG_ERPAREN;
1651 
1652               /* Since we just checked for an empty stack above, this
1653                  ``can't happen''.  */
1654               assert (compile_stack.avail != 0);
1655               {
1656                 /* We don't just want to restore into `regnum', because
1657                    later groups should continue to be numbered higher,
1658                    as in `(ab)c(de)' -- the second group is #2.  */
1659                 regnum_t this_group_regnum;
1660 
1661                 compile_stack.avail--;
1662                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1663                 fixup_alt_jump
1664                   = COMPILE_STACK_TOP.fixup_alt_jump
1665                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1666                     : 0;
1667                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1668                 this_group_regnum = COMPILE_STACK_TOP.regnum;
1669 		/* If we've reached MAX_REGNUM groups, then this open
1670 		   won't actually generate any code, so we'll have to
1671 		   clear pending_exact explicitly.  */
1672 		pending_exact = 0;
1673 
1674                 /* We're at the end of the group, so now we know how many
1675                    groups were inside this one.  */
1676                 if (this_group_regnum <= MAX_REGNUM)
1677                   {
1678                     unsigned char *inner_group_loc
1679                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1680 
1681                     *inner_group_loc = regnum - this_group_regnum;
1682                     BUF_PUSH_3 (stop_memory, this_group_regnum,
1683                                 regnum - this_group_regnum);
1684                   }
1685               }
1686               break;
1687 
1688 
1689             case '|':					/* `\|'.  */
1690               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1691                 goto normal_backslash;
1692             handle_alt:
1693               if (syntax & RE_LIMITED_OPS)
1694                 goto normal_char;
1695 
1696               /* Insert before the previous alternative a jump which
1697                  jumps to this alternative if the former fails.  */
1698               GET_BUFFER_SPACE (3);
1699               INSERT_JUMP (on_failure_jump, begalt, b + 6);
1700               pending_exact = 0;
1701               b += 3;
1702 
1703               /* The alternative before this one has a jump after it
1704                  which gets executed if it gets matched.  Adjust that
1705                  jump so it will jump to this alternative's analogous
1706                  jump (put in below, which in turn will jump to the next
1707                  (if any) alternative's such jump, etc.).  The last such
1708                  jump jumps to the correct final destination.  A picture:
1709                           _____ _____
1710                           |   | |   |
1711                           |   v |   v
1712                          a | b   | c
1713 
1714                  If we are at `b', then fixup_alt_jump right now points to a
1715                  three-byte space after `a'.  We'll put in the jump, set
1716                  fixup_alt_jump to right after `b', and leave behind three
1717                  bytes which we'll fill in when we get to after `c'.  */
1718 
1719               if (fixup_alt_jump)
1720                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
1721 
1722               /* Mark and leave space for a jump after this alternative,
1723                  to be filled in later either by next alternative or
1724                  when know we're at the end of a series of alternatives.  */
1725               fixup_alt_jump = b;
1726               GET_BUFFER_SPACE (3);
1727               b += 3;
1728 
1729               laststart = 0;
1730               begalt = b;
1731               break;
1732 
1733 
1734             case '{':
1735               /* If \{ is a literal.  */
1736               if (!(syntax & RE_INTERVALS)
1737                      /* If we're at `\{' and it's not the open-interval
1738                         operator.  */
1739                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1740                   || (p - 2 == pattern  &&  p == pend))
1741                 goto normal_backslash;
1742 
1743             handle_interval:
1744               {
1745                 /* If got here, then the syntax allows intervals.  */
1746 
1747                 /* At least (most) this many matches must be made.  */
1748                 int lower_bound = -1, upper_bound = -1;
1749 
1750                 beg_interval = p - 1;
1751 
1752                 if (p == pend)
1753                   {
1754                     if (syntax & RE_NO_BK_BRACES)
1755                       goto unfetch_interval;
1756                     else
1757                       return REG_EBRACE;
1758                   }
1759 
1760                 GET_UNSIGNED_NUMBER (lower_bound);
1761 
1762                 if (c == ',')
1763                   {
1764                     GET_UNSIGNED_NUMBER (upper_bound);
1765                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1766                   }
1767                 else
1768                   /* Interval such as `{1}' => match exactly once. */
1769                   upper_bound = lower_bound;
1770 
1771                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1772                     || lower_bound > upper_bound)
1773                   {
1774                     if (syntax & RE_NO_BK_BRACES)
1775                       goto unfetch_interval;
1776                     else
1777                       return REG_BADBR;
1778                   }
1779 
1780                 if (!(syntax & RE_NO_BK_BRACES))
1781                   {
1782                     if (c != '\\') return REG_EBRACE;
1783 
1784                     PATFETCH (c);
1785                   }
1786 
1787                 if (c != '}')
1788                   {
1789                     if (syntax & RE_NO_BK_BRACES)
1790                       goto unfetch_interval;
1791                     else
1792                       return REG_BADBR;
1793                   }
1794 
1795                 /* We just parsed a valid interval.  */
1796 
1797                 /* If it's invalid to have no preceding re.  */
1798                 if (!laststart)
1799                   {
1800                     if (syntax & RE_CONTEXT_INVALID_OPS)
1801                       return REG_BADRPT;
1802                     else if (syntax & RE_CONTEXT_INDEP_OPS)
1803                       laststart = b;
1804                     else
1805                       goto unfetch_interval;
1806                   }
1807 
1808                 /* If the upper bound is zero, don't want to succeed at
1809                    all; jump from `laststart' to `b + 3', which will be
1810                    the end of the buffer after we insert the jump.  */
1811                  if (upper_bound == 0)
1812                    {
1813                      GET_BUFFER_SPACE (3);
1814                      INSERT_JUMP (jump, laststart, b + 3);
1815                      b += 3;
1816                    }
1817 
1818                  /* Otherwise, we have a nontrivial interval.  When
1819                     we're all done, the pattern will look like:
1820                       set_number_at <jump count> <upper bound>
1821                       set_number_at <succeed_n count> <lower bound>
1822                       succeed_n <after jump addr> <succed_n count>
1823                       <body of loop>
1824                       jump_n <succeed_n addr> <jump count>
1825                     (The upper bound and `jump_n' are omitted if
1826                     `upper_bound' is 1, though.)  */
1827                  else
1828                    { /* If the upper bound is > 1, we need to insert
1829                         more at the end of the loop.  */
1830                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
1831 
1832                      GET_BUFFER_SPACE (nbytes);
1833 
1834                      /* Initialize lower bound of the `succeed_n', even
1835                         though it will be set during matching by its
1836                         attendant `set_number_at' (inserted next),
1837                         because `re_compile_fastmap' needs to know.
1838                         Jump to the `jump_n' we might insert below.  */
1839                      INSERT_JUMP2 (succeed_n, laststart,
1840                                    b + 5 + (upper_bound > 1) * 5,
1841                                    lower_bound);
1842                      b += 5;
1843 
1844                      /* Code to initialize the lower bound.  Insert
1845                         before the `succeed_n'.  The `5' is the last two
1846                         bytes of this `set_number_at', plus 3 bytes of
1847                         the following `succeed_n'.  */
1848                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
1849                      b += 5;
1850 
1851                      if (upper_bound > 1)
1852                        { /* More than one repetition is allowed, so
1853                             append a backward jump to the `succeed_n'
1854                             that starts this interval.
1855 
1856                             When we've reached this during matching,
1857                             we'll have matched the interval once, so
1858                             jump back only `upper_bound - 1' times.  */
1859                          STORE_JUMP2 (jump_n, b, laststart + 5,
1860                                       upper_bound - 1);
1861                          b += 5;
1862 
1863                          /* The location we want to set is the second
1864                             parameter of the `jump_n'; that is `b-2' as
1865                             an absolute address.  `laststart' will be
1866                             the `set_number_at' we're about to insert;
1867                             `laststart+3' the number to set, the source
1868                             for the relative address.  But we are
1869                             inserting into the middle of the pattern --
1870                             so everything is getting moved up by 5.
1871                             Conclusion: (b - 2) - (laststart + 3) + 5,
1872                             i.e., b - laststart.
1873 
1874                             We insert this at the beginning of the loop
1875                             so that if we fail during matching, we'll
1876                             reinitialize the bounds.  */
1877                          insert_op2 (set_number_at, laststart, b - laststart,
1878                                      upper_bound - 1, b);
1879                          b += 5;
1880                        }
1881                    }
1882                 pending_exact = 0;
1883                 beg_interval = NULL;
1884               }
1885               break;
1886 
1887             unfetch_interval:
1888               /* If an invalid interval, match the characters as literals.  */
1889                assert (beg_interval);
1890                p = beg_interval;
1891                beg_interval = NULL;
1892 
1893                /* normal_char and normal_backslash need `c'.  */
1894                PATFETCH (c);
1895 
1896                if (!(syntax & RE_NO_BK_BRACES))
1897                  {
1898                    if (p > pattern  &&  p[-1] == '\\')
1899                      goto normal_backslash;
1900                  }
1901                goto normal_char;
1902 
1903 #ifdef emacs
1904             /* There is no way to specify the before_dot and after_dot
1905                operators.  rms says this is ok.  --karl  */
1906             case '=':
1907               BUF_PUSH (at_dot);
1908               break;
1909 
1910             case 's':
1911               laststart = b;
1912               PATFETCH (c);
1913               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
1914               break;
1915 
1916             case 'S':
1917               laststart = b;
1918               PATFETCH (c);
1919               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
1920               break;
1921 #endif /* emacs */
1922 
1923 
1924             case 'w':
1925               laststart = b;
1926               BUF_PUSH (wordchar);
1927               break;
1928 
1929 
1930             case 'W':
1931               laststart = b;
1932               BUF_PUSH (notwordchar);
1933               break;
1934 
1935 
1936             case '<':
1937               BUF_PUSH (wordbeg);
1938               break;
1939 
1940             case '>':
1941               BUF_PUSH (wordend);
1942               break;
1943 
1944             case 'b':
1945               BUF_PUSH (wordbound);
1946               break;
1947 
1948             case 'B':
1949               BUF_PUSH (notwordbound);
1950               break;
1951 
1952             case '`':
1953               BUF_PUSH (begbuf);
1954               break;
1955 
1956             case '\'':
1957               BUF_PUSH (endbuf);
1958               break;
1959 
1960             case '1': case '2': case '3': case '4': case '5':
1961             case '6': case '7': case '8': case '9':
1962               if (syntax & RE_NO_BK_REFS)
1963                 goto normal_char;
1964 
1965               c1 = c - '0';
1966 
1967               if (c1 > regnum)
1968                 return REG_ESUBREG;
1969 
1970               /* Can't back reference to a subexpression if inside of it.  */
1971               if (group_in_compile_stack (compile_stack, c1))
1972                 goto normal_char;
1973 
1974               laststart = b;
1975               BUF_PUSH_2 (duplicate, c1);
1976               break;
1977 
1978 
1979             case '+':
1980             case '?':
1981               if (syntax & RE_BK_PLUS_QM)
1982                 goto handle_plus;
1983               else
1984                 goto normal_backslash;
1985 
1986             default:
1987             normal_backslash:
1988               /* You might think it would be useful for \ to mean
1989                  not to translate; but if we don't translate it
1990                  it will never match anything.  */
1991               c = TRANSLATE (c);
1992               goto normal_char;
1993             }
1994           break;
1995 
1996 
1997 	default:
1998         /* Expects the character in `c'.  */
1999 	normal_char:
2000 	      /* If no exactn currently being built.  */
2001           if (!pending_exact
2002 
2003               /* If last exactn not at current position.  */
2004               || pending_exact + *pending_exact + 1 != b
2005 
2006               /* We have only one byte following the exactn for the count.  */
2007 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2008 
2009               /* If followed by a repetition operator.  */
2010               || *p == '*' || *p == '^'
2011 	      || ((syntax & RE_BK_PLUS_QM)
2012 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2013 		  : (*p == '+' || *p == '?'))
2014 	      || ((syntax & RE_INTERVALS)
2015                   && ((syntax & RE_NO_BK_BRACES)
2016 		      ? *p == '{'
2017                       : (p[0] == '\\' && p[1] == '{'))))
2018 	    {
2019 	      /* Start building a new exactn.  */
2020 
2021               laststart = b;
2022 
2023 	      BUF_PUSH_2 (exactn, 0);
2024 	      pending_exact = b - 1;
2025             }
2026 
2027 	  BUF_PUSH (c);
2028           (*pending_exact)++;
2029 	  break;
2030         } /* switch (c) */
2031     } /* while p != pend */
2032 
2033 
2034   /* Through the pattern now.  */
2035 
2036   if (fixup_alt_jump)
2037     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2038 
2039   if (!COMPILE_STACK_EMPTY)
2040     return REG_EPAREN;
2041 
2042   free (compile_stack.stack);
2043 
2044   /* We have succeeded; set the length of the buffer.  */
2045   bufp->used = b - bufp->buffer;
2046 
2047 #ifdef DEBUG
2048   if (debug)
2049     {
2050       DEBUG_PRINT1 ("\nCompiled pattern: ");
2051       print_compiled_pattern (bufp);
2052     }
2053 #endif /* DEBUG */
2054 
2055   return REG_NOERROR;
2056 } /* regex_compile */
2057 
2058 /* Subroutines for `regex_compile'.  */
2059 
2060 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2061 
2062 static void
store_op1(op,loc,arg)2063 store_op1 (op, loc, arg)
2064     re_opcode_t op;
2065     unsigned char *loc;
2066     int arg;
2067 {
2068   *loc = (unsigned char) op;
2069   STORE_NUMBER (loc + 1, arg);
2070 }
2071 
2072 
2073 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2074 
2075 static void
store_op2(op,loc,arg1,arg2)2076 store_op2 (op, loc, arg1, arg2)
2077     re_opcode_t op;
2078     unsigned char *loc;
2079     int arg1, arg2;
2080 {
2081   *loc = (unsigned char) op;
2082   STORE_NUMBER (loc + 1, arg1);
2083   STORE_NUMBER (loc + 3, arg2);
2084 }
2085 
2086 
2087 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2088    for OP followed by two-byte integer parameter ARG.  */
2089 
2090 static void
insert_op1(op,loc,arg,end)2091 insert_op1 (op, loc, arg, end)
2092     re_opcode_t op;
2093     unsigned char *loc;
2094     int arg;
2095     unsigned char *end;
2096 {
2097   register unsigned char *pfrom = end;
2098   register unsigned char *pto = end + 3;
2099 
2100   while (pfrom != loc)
2101     *--pto = *--pfrom;
2102 
2103   store_op1 (op, loc, arg);
2104 }
2105 
2106 
2107 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2108 
2109 static void
insert_op2(op,loc,arg1,arg2,end)2110 insert_op2 (op, loc, arg1, arg2, end)
2111     re_opcode_t op;
2112     unsigned char *loc;
2113     int arg1, arg2;
2114     unsigned char *end;
2115 {
2116   register unsigned char *pfrom = end;
2117   register unsigned char *pto = end + 5;
2118 
2119   while (pfrom != loc)
2120     *--pto = *--pfrom;
2121 
2122   store_op2 (op, loc, arg1, arg2);
2123 }
2124 
2125 
2126 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2127    after an alternative or a begin-subexpression.  We assume there is at
2128    least one character before the ^.  */
2129 
2130 static boolean
at_begline_loc_p(pattern,p,syntax)2131 at_begline_loc_p (pattern, p, syntax)
2132     const char *pattern, *p;
2133     reg_syntax_t syntax;
2134 {
2135   const char *prev = p - 2;
2136   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2137 
2138   return
2139        /* After a subexpression?  */
2140        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2141        /* After an alternative?  */
2142     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2143 }
2144 
2145 
2146 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2147    at least one character after the $, i.e., `P < PEND'.  */
2148 
2149 static boolean
at_endline_loc_p(p,pend,syntax)2150 at_endline_loc_p (p, pend, syntax)
2151     const char *p, *pend;
2152     int syntax;
2153 {
2154   const char *next = p;
2155   boolean next_backslash = *next == '\\';
2156   const char *next_next = p + 1 < pend ? p + 1 : NULL;
2157 
2158   return
2159        /* Before a subexpression?  */
2160        (syntax & RE_NO_BK_PARENS ? *next == ')'
2161         : next_backslash && next_next && *next_next == ')')
2162        /* Before an alternative?  */
2163     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2164         : next_backslash && next_next && *next_next == '|');
2165 }
2166 
2167 
2168 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2169    false if it's not.  */
2170 
2171 static boolean
group_in_compile_stack(compile_stack,regnum)2172 group_in_compile_stack (compile_stack, regnum)
2173     compile_stack_type compile_stack;
2174     regnum_t regnum;
2175 {
2176   int this_element;
2177 
2178   for (this_element = compile_stack.avail - 1;
2179        this_element >= 0;
2180        this_element--)
2181     if (compile_stack.stack[this_element].regnum == regnum)
2182       return true;
2183 
2184   return false;
2185 }
2186 
2187 
2188 /* Read the ending character of a range (in a bracket expression) from the
2189    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2190    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2191    Then we set the translation of all bits between the starting and
2192    ending characters (inclusive) in the compiled pattern B.
2193 
2194    Return an error code.
2195 
2196    We use these short variable names so we can use the same macros as
2197    `regex_compile' itself.  */
2198 
2199 static reg_errcode_t
compile_range(p_ptr,pend,translate,syntax,b)2200 compile_range (p_ptr, pend, translate, syntax, b)
2201     const char **p_ptr, *pend;
2202     char *translate;
2203     reg_syntax_t syntax;
2204     unsigned char *b;
2205 {
2206   unsigned this_char;
2207 
2208   const char *p = *p_ptr;
2209   int range_start, range_end;
2210 
2211   if (p == pend)
2212     return REG_ERANGE;
2213 
2214   /* Even though the pattern is a signed `char *', we need to fetch
2215      with unsigned char *'s; if the high bit of the pattern character
2216      is set, the range endpoints will be negative if we fetch using a
2217      signed char *.
2218 
2219      We also want to fetch the endpoints without translating them; the
2220      appropriate translation is done in the bit-setting loop below.  */
2221   range_start = ((unsigned char *) p)[-2];
2222   range_end   = ((unsigned char *) p)[0];
2223 
2224   /* Have to increment the pointer into the pattern string, so the
2225      caller isn't still at the ending character.  */
2226   (*p_ptr)++;
2227 
2228   /* If the start is after the end, the range is empty.  */
2229   if (range_start > range_end)
2230     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2231 
2232   /* Here we see why `this_char' has to be larger than an `unsigned
2233      char' -- the range is inclusive, so if `range_end' == 0xff
2234      (assuming 8-bit characters), we would otherwise go into an infinite
2235      loop, since all characters <= 0xff.  */
2236   for (this_char = range_start; this_char <= range_end; this_char++)
2237     {
2238       SET_LIST_BIT (TRANSLATE (this_char));
2239     }
2240 
2241   return REG_NOERROR;
2242 }
2243 
2244 /* Failure stack declarations and macros; both re_compile_fastmap and
2245    re_match_2 use a failure stack.  These have to be macros because of
2246    REGEX_ALLOCATE.  */
2247 
2248 
2249 /* Number of failure points for which to initially allocate space
2250    when matching.  If this number is exceeded, we allocate more
2251    space, so it is not a hard limit.  */
2252 #ifndef INIT_FAILURE_ALLOC
2253 #define INIT_FAILURE_ALLOC 5
2254 #endif
2255 
2256 /* Roughly the maximum number of failure points on the stack.  Would be
2257    exactly that if always used MAX_FAILURE_SPACE each time we failed.
2258    This is a variable only so users of regex can assign to it; we never
2259    change it ourselves.  */
2260 int re_max_failures = 2000;
2261 
2262 typedef const unsigned char *fail_stack_elt_t;
2263 
2264 typedef struct
2265 {
2266   fail_stack_elt_t *stack;
2267   unsigned size;
2268   unsigned avail;			/* Offset of next open position.  */
2269 } fail_stack_type;
2270 
2271 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2272 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2273 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2274 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2275 
2276 
2277 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2278 
2279 #define INIT_FAIL_STACK()						\
2280   do {									\
2281     fail_stack.stack = (fail_stack_elt_t *)				\
2282       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
2283 									\
2284     if (fail_stack.stack == NULL)					\
2285       return -2;							\
2286 									\
2287     fail_stack.size = INIT_FAILURE_ALLOC;				\
2288     fail_stack.avail = 0;						\
2289   } while (0)
2290 
2291 
2292 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2293 
2294    Return 1 if succeeds, and 0 if either ran out of memory
2295    allocating space for it or it was already too large.
2296 
2297    REGEX_REALLOCATE requires `destination' be declared.   */
2298 
2299 #define DOUBLE_FAIL_STACK(fail_stack)					\
2300   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
2301    ? 0									\
2302    : ((fail_stack).stack = (fail_stack_elt_t *)				\
2303         REGEX_REALLOCATE ((fail_stack).stack, 				\
2304           (fail_stack).size * sizeof (fail_stack_elt_t),		\
2305           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
2306 									\
2307       (fail_stack).stack == NULL					\
2308       ? 0								\
2309       : ((fail_stack).size <<= 1, 					\
2310          1)))
2311 
2312 
2313 /* Push PATTERN_OP on FAIL_STACK.
2314 
2315    Return 1 if was able to do so and 0 if ran out of memory allocating
2316    space to do so.  */
2317 #define PUSH_PATTERN_OP(pattern_op, fail_stack)				\
2318   ((FAIL_STACK_FULL ()							\
2319     && !DOUBLE_FAIL_STACK (fail_stack))					\
2320     ? 0									\
2321     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,		\
2322        1))
2323 
2324 /* This pushes an item onto the failure stack.  Must be a four-byte
2325    value.  Assumes the variable `fail_stack'.  Probably should only
2326    be called from within `PUSH_FAILURE_POINT'.  */
2327 #define PUSH_FAILURE_ITEM(item)						\
2328   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2329 
2330 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2331 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2332 
2333 /* Used to omit pushing failure point id's when we're not debugging.  */
2334 #ifdef DEBUG
2335 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2336 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2337 #else
2338 #define DEBUG_PUSH(item)
2339 #define DEBUG_POP(item_addr)
2340 #endif
2341 
2342 
2343 /* Push the information about the state we will need
2344    if we ever fail back to it.
2345 
2346    Requires variables fail_stack, regstart, regend, reg_info, and
2347    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2348    declared.
2349 
2350    Does `return FAILURE_CODE' if runs out of memory.  */
2351 
2352 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
2353   do {									\
2354     char *destination;							\
2355     /* Must be int, so when we don't save any registers, the arithmetic	\
2356        of 0 + -1 isn't done as unsigned.  */				\
2357     int this_reg;							\
2358     									\
2359     DEBUG_STATEMENT (failure_id++);					\
2360     DEBUG_STATEMENT (nfailure_points_pushed++);				\
2361     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
2362     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2363     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2364 									\
2365     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
2366     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
2367 									\
2368     /* Ensure we have enough space allocated for what we will push.  */	\
2369     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
2370       {									\
2371         if (!DOUBLE_FAIL_STACK (fail_stack))			\
2372           return failure_code;						\
2373 									\
2374         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
2375 		       (fail_stack).size);				\
2376         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2377       }									\
2378 									\
2379     /* Push the info, starting with the registers.  */			\
2380     DEBUG_PRINT1 ("\n");						\
2381 									\
2382     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
2383          this_reg++)							\
2384       {									\
2385 	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
2386         DEBUG_STATEMENT (num_regs_pushed++);				\
2387 									\
2388 	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
2389         PUSH_FAILURE_ITEM (regstart[this_reg]);				\
2390                                                                         \
2391 	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
2392         PUSH_FAILURE_ITEM (regend[this_reg]);				\
2393 									\
2394 	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
2395         DEBUG_PRINT2 (" match_null=%d",					\
2396                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
2397         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
2398         DEBUG_PRINT2 (" matched_something=%d",				\
2399                       MATCHED_SOMETHING (reg_info[this_reg]));		\
2400         DEBUG_PRINT2 (" ever_matched=%d",				\
2401                       EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
2402 	DEBUG_PRINT1 ("\n");						\
2403         PUSH_FAILURE_ITEM (reg_info[this_reg].word);			\
2404       }									\
2405 									\
2406     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2407     PUSH_FAILURE_ITEM (lowest_active_reg);				\
2408 									\
2409     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2410     PUSH_FAILURE_ITEM (highest_active_reg);				\
2411 									\
2412     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
2413     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
2414     PUSH_FAILURE_ITEM (pattern_place);					\
2415 									\
2416     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
2417     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2418 				 size2);				\
2419     DEBUG_PRINT1 ("'\n");						\
2420     PUSH_FAILURE_ITEM (string_place);					\
2421 									\
2422     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
2423     DEBUG_PUSH (failure_id);						\
2424   } while (0)
2425 
2426 /* This is the number of items that are pushed and popped on the stack
2427    for each register.  */
2428 #define NUM_REG_ITEMS  3
2429 
2430 /* Individual items aside from the registers.  */
2431 #ifdef DEBUG
2432 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
2433 #else
2434 #define NUM_NONREG_ITEMS 4
2435 #endif
2436 
2437 /* We push at most this many items on the stack.  */
2438 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2439 
2440 /* We actually push this many items.  */
2441 #define NUM_FAILURE_ITEMS						\
2442   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
2443     + NUM_NONREG_ITEMS)
2444 
2445 /* How many items can still be added to the stack without overflowing it.  */
2446 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2447 
2448 
2449 /* Pops what PUSH_FAIL_STACK pushes.
2450 
2451    We restore into the parameters, all of which should be lvalues:
2452      STR -- the saved data position.
2453      PAT -- the saved pattern position.
2454      LOW_REG, HIGH_REG -- the highest and lowest active registers.
2455      REGSTART, REGEND -- arrays of string positions.
2456      REG_INFO -- array of information about each subexpression.
2457 
2458    Also assumes the variables `fail_stack' and (if debugging), `bufp',
2459    `pend', `string1', `size1', `string2', and `size2'.  */
2460 
2461 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2462 {									\
2463   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
2464   int this_reg;								\
2465   const unsigned char *string_temp;					\
2466 									\
2467   assert (!FAIL_STACK_EMPTY ());					\
2468 									\
2469   /* Remove failure points and point to how many regs pushed.  */	\
2470   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
2471   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
2472   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
2473 									\
2474   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
2475 									\
2476   DEBUG_POP (&failure_id);						\
2477   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
2478 									\
2479   /* If the saved string location is NULL, it came from an		\
2480      on_failure_keep_string_jump opcode, and we want to throw away the	\
2481      saved NULL, thus retaining our current position in the string.  */	\
2482   string_temp = POP_FAILURE_ITEM ();					\
2483   if (string_temp != NULL)						\
2484     str = (const char *) string_temp;					\
2485 									\
2486   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
2487   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
2488   DEBUG_PRINT1 ("'\n");							\
2489 									\
2490   pat = (unsigned char *) POP_FAILURE_ITEM ();				\
2491   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
2492   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
2493 									\
2494   /* Restore register info.  */						\
2495   high_reg = (unsigned) POP_FAILURE_ITEM ();				\
2496   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
2497 									\
2498   low_reg = (unsigned) POP_FAILURE_ITEM ();				\
2499   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
2500 									\
2501   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
2502     {									\
2503       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
2504 									\
2505       reg_info[this_reg].word = POP_FAILURE_ITEM ();			\
2506       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
2507 									\
2508       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2509       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
2510 									\
2511       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();		\
2512       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
2513     }									\
2514 									\
2515   DEBUG_STATEMENT (nfailure_points_popped++);				\
2516 } /* POP_FAILURE_POINT */
2517 
2518 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2519    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2520    characters can start a string that matches the pattern.  This fastmap
2521    is used by re_search to skip quickly over impossible starting points.
2522 
2523    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2524    area as BUFP->fastmap.
2525 
2526    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2527    the pattern buffer.
2528 
2529    Returns 0 if we succeed, -2 if an internal error.   */
2530 
2531 int
re_compile_fastmap(bufp)2532 re_compile_fastmap (bufp)
2533      struct re_pattern_buffer *bufp;
2534 {
2535   int j, k;
2536   fail_stack_type fail_stack;
2537 #ifndef REGEX_MALLOC
2538   char *destination;
2539 #endif
2540   /* We don't push any register information onto the failure stack.  */
2541   unsigned num_regs = 0;
2542 
2543   register char *fastmap = bufp->fastmap;
2544   unsigned char *pattern = bufp->buffer;
2545   unsigned long size = bufp->used;
2546   const unsigned char *p = pattern;
2547   register unsigned char *pend = pattern + size;
2548 
2549   /* Assume that each path through the pattern can be null until
2550      proven otherwise.  We set this false at the bottom of switch
2551      statement, to which we get only if a particular path doesn't
2552      match the empty string.  */
2553   boolean path_can_be_null = true;
2554 
2555   /* We aren't doing a `succeed_n' to begin with.  */
2556   boolean succeed_n_p = false;
2557 
2558   assert (fastmap != NULL && p != NULL);
2559 
2560   INIT_FAIL_STACK ();
2561   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2562   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2563   bufp->can_be_null = 0;
2564 
2565   while (p != pend || !FAIL_STACK_EMPTY ())
2566     {
2567       if (p == pend)
2568         {
2569           bufp->can_be_null |= path_can_be_null;
2570 
2571           /* Reset for next path.  */
2572           path_can_be_null = true;
2573 
2574           p = fail_stack.stack[--fail_stack.avail];
2575 	}
2576 
2577       /* We should never be about to go beyond the end of the pattern.  */
2578       assert (p < pend);
2579 
2580 #ifdef SWITCH_ENUM_BUG
2581       switch ((int) ((re_opcode_t) *p++))
2582 #else
2583       switch ((re_opcode_t) *p++)
2584 #endif
2585 	{
2586 
2587         /* I guess the idea here is to simply not bother with a fastmap
2588            if a backreference is used, since it's too hard to figure out
2589            the fastmap for the corresponding group.  Setting
2590            `can_be_null' stops `re_search_2' from using the fastmap, so
2591            that is all we do.  */
2592 	case duplicate:
2593 	  bufp->can_be_null = 1;
2594           return 0;
2595 
2596 
2597       /* Following are the cases which match a character.  These end
2598          with `break'.  */
2599 
2600 	case exactn:
2601           fastmap[p[1]] = 1;
2602 	  break;
2603 
2604 
2605         case charset:
2606           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2607 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2608               fastmap[j] = 1;
2609 	  break;
2610 
2611 
2612 	case charset_not:
2613 	  /* Chars beyond end of map must be allowed.  */
2614 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2615             fastmap[j] = 1;
2616 
2617 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2618 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2619               fastmap[j] = 1;
2620           break;
2621 
2622 
2623 	case wordchar:
2624 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2625 	    if (SYNTAX (j) == Sword)
2626 	      fastmap[j] = 1;
2627 	  break;
2628 
2629 
2630 	case notwordchar:
2631 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2632 	    if (SYNTAX (j) != Sword)
2633 	      fastmap[j] = 1;
2634 	  break;
2635 
2636 
2637         case anychar:
2638           /* `.' matches anything ...  */
2639 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2640             fastmap[j] = 1;
2641 
2642           /* ... except perhaps newline.  */
2643           if (!(bufp->syntax & RE_DOT_NEWLINE))
2644             fastmap['\n'] = 0;
2645 
2646           /* Return if we have already set `can_be_null'; if we have,
2647              then the fastmap is irrelevant.  Something's wrong here.  */
2648 	  else if (bufp->can_be_null)
2649 	    return 0;
2650 
2651           /* Otherwise, have to check alternative paths.  */
2652 	  break;
2653 
2654 
2655 #ifdef emacs
2656         case syntaxspec:
2657 	  k = *p++;
2658 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2659 	    if (SYNTAX (j) == (enum syntaxcode) k)
2660 	      fastmap[j] = 1;
2661 	  break;
2662 
2663 
2664 	case notsyntaxspec:
2665 	  k = *p++;
2666 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2667 	    if (SYNTAX (j) != (enum syntaxcode) k)
2668 	      fastmap[j] = 1;
2669 	  break;
2670 
2671 
2672       /* All cases after this match the empty string.  These end with
2673          `continue'.  */
2674 
2675 
2676 	case before_dot:
2677 	case at_dot:
2678 	case after_dot:
2679           continue;
2680 #endif /* not emacs */
2681 
2682 
2683         case no_op:
2684         case begline:
2685         case endline:
2686 	case begbuf:
2687 	case endbuf:
2688 	case wordbound:
2689 	case notwordbound:
2690 	case wordbeg:
2691 	case wordend:
2692         case push_dummy_failure:
2693           continue;
2694 
2695 
2696 	case jump_n:
2697         case pop_failure_jump:
2698 	case maybe_pop_jump:
2699 	case jump:
2700         case jump_past_alt:
2701 	case dummy_failure_jump:
2702           EXTRACT_NUMBER_AND_INCR (j, p);
2703 	  p += j;
2704 	  if (j > 0)
2705 	    continue;
2706 
2707           /* Jump backward implies we just went through the body of a
2708              loop and matched nothing.  Opcode jumped to should be
2709              `on_failure_jump' or `succeed_n'.  Just treat it like an
2710              ordinary jump.  For a * loop, it has pushed its failure
2711              point already; if so, discard that as redundant.  */
2712           if ((re_opcode_t) *p != on_failure_jump
2713 	      && (re_opcode_t) *p != succeed_n)
2714 	    continue;
2715 
2716           p++;
2717           EXTRACT_NUMBER_AND_INCR (j, p);
2718           p += j;
2719 
2720           /* If what's on the stack is where we are now, pop it.  */
2721           if (!FAIL_STACK_EMPTY ()
2722 	      && fail_stack.stack[fail_stack.avail - 1] == p)
2723             fail_stack.avail--;
2724 
2725           continue;
2726 
2727 
2728         case on_failure_jump:
2729         case on_failure_keep_string_jump:
2730 	handle_on_failure_jump:
2731           EXTRACT_NUMBER_AND_INCR (j, p);
2732 
2733           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2734              end of the pattern.  We don't want to push such a point,
2735              since when we restore it above, entering the switch will
2736              increment `p' past the end of the pattern.  We don't need
2737              to push such a point since we obviously won't find any more
2738              fastmap entries beyond `pend'.  Such a pattern can match
2739              the null string, though.  */
2740           if (p + j < pend)
2741             {
2742               if (!PUSH_PATTERN_OP (p + j, fail_stack))
2743                 return -2;
2744             }
2745           else
2746             bufp->can_be_null = 1;
2747 
2748           if (succeed_n_p)
2749             {
2750               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
2751               succeed_n_p = false;
2752 	    }
2753 
2754           continue;
2755 
2756 
2757 	case succeed_n:
2758           /* Get to the number of times to succeed.  */
2759           p += 2;
2760 
2761           /* Increment p past the n for when k != 0.  */
2762           EXTRACT_NUMBER_AND_INCR (k, p);
2763           if (k == 0)
2764 	    {
2765               p -= 4;
2766   	      succeed_n_p = true;  /* Spaghetti code alert.  */
2767               goto handle_on_failure_jump;
2768             }
2769           continue;
2770 
2771 
2772 	case set_number_at:
2773           p += 4;
2774           continue;
2775 
2776 
2777 	case start_memory:
2778         case stop_memory:
2779 	  p += 2;
2780 	  continue;
2781 
2782 
2783 	default:
2784           abort (); /* We have listed all the cases.  */
2785         } /* switch *p++ */
2786 
2787       /* Getting here means we have found the possible starting
2788          characters for one path of the pattern -- and that the empty
2789          string does not match.  We need not follow this path further.
2790          Instead, look at the next alternative (remembered on the
2791          stack), or quit if no more.  The test at the top of the loop
2792          does these things.  */
2793       path_can_be_null = false;
2794       p = pend;
2795     } /* while p */
2796 
2797   /* Set `can_be_null' for the last path (also the first path, if the
2798      pattern is empty).  */
2799   bufp->can_be_null |= path_can_be_null;
2800   return 0;
2801 } /* re_compile_fastmap */
2802 
2803 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
2804    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
2805    this memory for recording register information.  STARTS and ENDS
2806    must be allocated using the malloc library routine, and must each
2807    be at least NUM_REGS * sizeof (regoff_t) bytes long.
2808 
2809    If NUM_REGS == 0, then subsequent matches should allocate their own
2810    register data.
2811 
2812    Unless this function is called, the first search or match using
2813    PATTERN_BUFFER will allocate its own register data, without
2814    freeing the old data.  */
2815 
2816 void
re_set_registers(bufp,regs,num_regs,starts,ends)2817 re_set_registers (bufp, regs, num_regs, starts, ends)
2818     struct re_pattern_buffer *bufp;
2819     struct re_registers *regs;
2820     unsigned num_regs;
2821     regoff_t *starts, *ends;
2822 {
2823   if (num_regs)
2824     {
2825       bufp->regs_allocated = REGS_REALLOCATE;
2826       regs->num_regs = num_regs;
2827       regs->start = starts;
2828       regs->end = ends;
2829     }
2830   else
2831     {
2832       bufp->regs_allocated = REGS_UNALLOCATED;
2833       regs->num_regs = 0;
2834       regs->start = regs->end = (regoff_t) 0;
2835     }
2836 }
2837 
2838 /* Searching routines.  */
2839 
2840 /* Like re_search_2, below, but only one string is specified, and
2841    doesn't let you say where to stop matching. */
2842 
2843 int
re_search(bufp,string,size,startpos,range,regs)2844 re_search (bufp, string, size, startpos, range, regs)
2845      struct re_pattern_buffer *bufp;
2846      const char *string;
2847      int size, startpos, range;
2848      struct re_registers *regs;
2849 {
2850   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
2851 		      regs, size);
2852 }
2853 
2854 
2855 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2856    virtual concatenation of STRING1 and STRING2, starting first at index
2857    STARTPOS, then at STARTPOS + 1, and so on.
2858 
2859    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2860 
2861    RANGE is how far to scan while trying to match.  RANGE = 0 means try
2862    only at STARTPOS; in general, the last start tried is STARTPOS +
2863    RANGE.
2864 
2865    In REGS, return the indices of the virtual concatenation of STRING1
2866    and STRING2 that matched the entire BUFP->buffer and its contained
2867    subexpressions.
2868 
2869    Do not consider matching one past the index STOP in the virtual
2870    concatenation of STRING1 and STRING2.
2871 
2872    We return either the position in the strings at which the match was
2873    found, -1 if no match, or -2 if error (such as failure
2874    stack overflow).  */
2875 
2876 int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)2877 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2878      struct re_pattern_buffer *bufp;
2879      const char *string1, *string2;
2880      int size1, size2;
2881      int startpos;
2882      int range;
2883      struct re_registers *regs;
2884      int stop;
2885 {
2886   int val;
2887   register char *fastmap = bufp->fastmap;
2888   register char *translate = bufp->translate;
2889   int total_size = size1 + size2;
2890   int endpos = startpos + range;
2891 
2892   /* Check for out-of-range STARTPOS.  */
2893   if (startpos < 0 || startpos > total_size)
2894     return -1;
2895 
2896   /* Fix up RANGE if it might eventually take us outside
2897      the virtual concatenation of STRING1 and STRING2.  */
2898   if (endpos < -1)
2899     range = -1 - startpos;
2900   else if (endpos > total_size)
2901     range = total_size - startpos;
2902 
2903   /* If the search isn't to be a backwards one, don't waste time in a
2904      search for a pattern that must be anchored.  */
2905   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
2906     {
2907       if (startpos > 0)
2908 	return -1;
2909       else
2910 	range = 1;
2911     }
2912 
2913   /* Update the fastmap now if not correct already.  */
2914   if (fastmap && !bufp->fastmap_accurate)
2915     if (re_compile_fastmap (bufp) == -2)
2916       return -2;
2917 
2918   /* Loop through the string, looking for a place to start matching.  */
2919   for (;;)
2920     {
2921       /* If a fastmap is supplied, skip quickly over characters that
2922          cannot be the start of a match.  If the pattern can match the
2923          null string, however, we don't need to skip characters; we want
2924          the first null string.  */
2925       if (fastmap && startpos < total_size && !bufp->can_be_null)
2926 	{
2927 	  if (range > 0)	/* Searching forwards.  */
2928 	    {
2929 	      register const char *d;
2930 	      register int lim = 0;
2931 	      int irange = range;
2932 
2933               if (startpos < size1 && startpos + range >= size1)
2934                 lim = range - (size1 - startpos);
2935 
2936 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2937 
2938               /* Written out as an if-else to avoid testing `translate'
2939                  inside the loop.  */
2940 	      if (translate)
2941                 while (range > lim
2942                        && !fastmap[(unsigned char)
2943 				   translate[(unsigned char) *d++]])
2944                   range--;
2945 	      else
2946                 while (range > lim && !fastmap[(unsigned char) *d++])
2947                   range--;
2948 
2949 	      startpos += irange - range;
2950 	    }
2951 	  else				/* Searching backwards.  */
2952 	    {
2953 	      register char c = (size1 == 0 || startpos >= size1
2954                                  ? string2[startpos - size1]
2955                                  : string1[startpos]);
2956 
2957 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
2958 		goto advance;
2959 	    }
2960 	}
2961 
2962       /* If can't match the null string, and that's all we have left, fail.  */
2963       if (range >= 0 && startpos == total_size && fastmap
2964           && !bufp->can_be_null)
2965 	return -1;
2966 
2967       val = re_match_2 (bufp, string1, size1, string2, size2,
2968 	                startpos, regs, stop);
2969       if (val >= 0)
2970 	return startpos;
2971 
2972       if (val == -2)
2973 	return -2;
2974 
2975     advance:
2976       if (!range)
2977         break;
2978       else if (range > 0)
2979         {
2980           range--;
2981           startpos++;
2982         }
2983       else
2984         {
2985           range++;
2986           startpos--;
2987         }
2988     }
2989   return -1;
2990 } /* re_search_2 */
2991 
2992 /* Declarations and macros for re_match_2.  */
2993 
2994 static int bcmp_translate ();
2995 static boolean alt_match_null_string_p (),
2996                common_op_match_null_string_p (),
2997                group_match_null_string_p ();
2998 
2999 /* Structure for per-register (a.k.a. per-group) information.
3000    This must not be longer than one word, because we push this value
3001    onto the failure stack.  Other register information, such as the
3002    starting and ending positions (which are addresses), and the list of
3003    inner groups (which is a bits list) are maintained in separate
3004    variables.
3005 
3006    We are making a (strictly speaking) nonportable assumption here: that
3007    the compiler will pack our bit fields into something that fits into
3008    the type of `word', i.e., is something that fits into one item on the
3009    failure stack.  */
3010 typedef union
3011 {
3012   fail_stack_elt_t word;
3013   struct
3014   {
3015       /* This field is one if this group can match the empty string,
3016          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
3017 #define MATCH_NULL_UNSET_VALUE 3
3018     unsigned match_null_string_p : 2;
3019     unsigned is_active : 1;
3020     unsigned matched_something : 1;
3021     unsigned ever_matched_something : 1;
3022   } bits;
3023 } register_info_type;
3024 
3025 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
3026 #define IS_ACTIVE(R)  ((R).bits.is_active)
3027 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3028 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
3029 
3030 
3031 /* Call this when have matched a real character; it sets `matched' flags
3032    for the subexpressions which we are currently inside.  Also records
3033    that those subexprs have matched.  */
3034 #define SET_REGS_MATCHED()						\
3035   do									\
3036     {									\
3037       unsigned r;							\
3038       for (r = lowest_active_reg; r <= highest_active_reg; r++)		\
3039         {								\
3040           MATCHED_SOMETHING (reg_info[r])				\
3041             = EVER_MATCHED_SOMETHING (reg_info[r])			\
3042             = 1;							\
3043         }								\
3044     }									\
3045   while (0)
3046 
3047 
3048 /* This converts PTR, a pointer into one of the search strings `string1'
3049    and `string2' into an offset from the beginning of that string.  */
3050 #define POINTER_TO_OFFSET(ptr)						\
3051   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
3052 
3053 /* Registers are set to a sentinel when they haven't yet matched.  */
3054 #define REG_UNSET_VALUE ((char *) -1)
3055 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3056 
3057 
3058 /* Macros for dealing with the split strings in re_match_2.  */
3059 
3060 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3061 
3062 /* Call before fetching a character with *d.  This switches over to
3063    string2 if necessary.  */
3064 #define PREFETCH()							\
3065   while (d == dend)						    	\
3066     {									\
3067       /* End of string2 => fail.  */					\
3068       if (dend == end_match_2) 						\
3069         goto fail;							\
3070       /* End of string1 => advance to string2.  */ 			\
3071       d = string2;						        \
3072       dend = end_match_2;						\
3073     }
3074 
3075 
3076 /* Test if at very beginning or at very end of the virtual concatenation
3077    of `string1' and `string2'.  If only one string, it's `string2'.  */
3078 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3079 #define AT_STRINGS_END(d) ((d) == end2)
3080 
3081 
3082 /* Test if D points to a character which is word-constituent.  We have
3083    two special cases to check for: if past the end of string1, look at
3084    the first character in string2; and if before the beginning of
3085    string2, look at the last character in string1.  */
3086 #define WORDCHAR_P(d)							\
3087   (SYNTAX ((d) == end1 ? *string2					\
3088            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3089    == Sword)
3090 
3091 /* Test if the character before D and the one at D differ with respect
3092    to being word-constituent.  */
3093 #define AT_WORD_BOUNDARY(d)						\
3094   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3095    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3096 
3097 
3098 /* Free everything we malloc.  */
3099 #ifdef REGEX_MALLOC
3100 #define FREE_VAR(var) if (var) free (var); var = NULL
3101 #define FREE_VARIABLES()						\
3102   do {									\
3103     FREE_VAR (fail_stack.stack);					\
3104     FREE_VAR (regstart);						\
3105     FREE_VAR (regend);							\
3106     FREE_VAR (old_regstart);						\
3107     FREE_VAR (old_regend);						\
3108     FREE_VAR (best_regstart);						\
3109     FREE_VAR (best_regend);						\
3110     FREE_VAR (reg_info);						\
3111     FREE_VAR (reg_dummy);						\
3112     FREE_VAR (reg_info_dummy);						\
3113   } while (0)
3114 #else /* not REGEX_MALLOC */
3115 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
3116 #define FREE_VARIABLES() alloca (0)
3117 #endif /* not REGEX_MALLOC */
3118 
3119 
3120 /* These values must meet several constraints.  They must not be valid
3121    register values; since we have a limit of 255 registers (because
3122    we use only one byte in the pattern for the register number), we can
3123    use numbers larger than 255.  They must differ by 1, because of
3124    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3125    be larger than the value for the highest register, so we do not try
3126    to actually save any registers when none are active.  */
3127 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3128 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3129 
3130 /* Matching routines.  */
3131 
3132 #ifndef emacs   /* Emacs never uses this.  */
3133 /* re_match is like re_match_2 except it takes only a single string.  */
3134 
3135 int
re_match(bufp,string,size,pos,regs)3136 re_match (bufp, string, size, pos, regs)
3137      struct re_pattern_buffer *bufp;
3138      const char *string;
3139      int size, pos;
3140      struct re_registers *regs;
3141  {
3142   return re_match_2 (bufp, NULL, 0, string, size, pos, regs, size);
3143 }
3144 #endif /* not emacs */
3145 
3146 
3147 /* re_match_2 matches the compiled pattern in BUFP against the
3148    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3149    and SIZE2, respectively).  We start matching at POS, and stop
3150    matching at STOP.
3151 
3152    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3153    store offsets for the substring each group matched in REGS.  See the
3154    documentation for exactly how many groups we fill.
3155 
3156    We return -1 if no match, -2 if an internal error (such as the
3157    failure stack overflowing).  Otherwise, we return the length of the
3158    matched substring.  */
3159 
3160 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)3161 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3162      struct re_pattern_buffer *bufp;
3163      const char *string1, *string2;
3164      int size1, size2;
3165      int pos;
3166      struct re_registers *regs;
3167      int stop;
3168 {
3169   /* General temporaries.  */
3170   int mcnt;
3171   unsigned char *p1;
3172 
3173   /* Just past the end of the corresponding string.  */
3174   const char *end1, *end2;
3175 
3176   /* Pointers into string1 and string2, just past the last characters in
3177      each to consider matching.  */
3178   const char *end_match_1, *end_match_2;
3179 
3180   /* Where we are in the data, and the end of the current string.  */
3181   const char *d, *dend;
3182 
3183   /* Where we are in the pattern, and the end of the pattern.  */
3184   unsigned char *p = bufp->buffer;
3185   register unsigned char *pend = p + bufp->used;
3186 
3187   /* We use this to map every character in the string.  */
3188   char *translate = bufp->translate;
3189 
3190   /* Failure point stack.  Each place that can handle a failure further
3191      down the line pushes a failure point on this stack.  It consists of
3192      restart, regend, and reg_info for all registers corresponding to
3193      the subexpressions we're currently inside, plus the number of such
3194      registers, and, finally, two char *'s.  The first char * is where
3195      to resume scanning the pattern; the second one is where to resume
3196      scanning the strings.  If the latter is zero, the failure point is
3197      a ``dummy''; if a failure happens and the failure point is a dummy,
3198      it gets discarded and the next next one is tried.  */
3199   fail_stack_type fail_stack;
3200 #ifdef DEBUG
3201   static unsigned failure_id = 0;
3202   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3203 #endif
3204 
3205   /* We fill all the registers internally, independent of what we
3206      return, for use in backreferences.  The number here includes
3207      an element for register zero.  */
3208   unsigned num_regs = bufp->re_nsub + 1;
3209 
3210   /* The currently active registers.  */
3211   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3212   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3213 
3214   /* Information on the contents of registers. These are pointers into
3215      the input strings; they record just what was matched (on this
3216      attempt) by a subexpression part of the pattern, that is, the
3217      regnum-th regstart pointer points to where in the pattern we began
3218      matching and the regnum-th regend points to right after where we
3219      stopped matching the regnum-th subexpression.  (The zeroth register
3220      keeps track of what the whole pattern matches.)  */
3221   const char **regstart, **regend;
3222 
3223   /* If a group that's operated upon by a repetition operator fails to
3224      match anything, then the register for its start will need to be
3225      restored because it will have been set to wherever in the string we
3226      are when we last see its open-group operator.  Similarly for a
3227      register's end.  */
3228   const char **old_regstart, **old_regend;
3229 
3230   /* The is_active field of reg_info helps us keep track of which (possibly
3231      nested) subexpressions we are currently in. The matched_something
3232      field of reg_info[reg_num] helps us tell whether or not we have
3233      matched any of the pattern so far this time through the reg_num-th
3234      subexpression.  These two fields get reset each time through any
3235      loop their register is in.  */
3236   register_info_type *reg_info;
3237 
3238   /* The following record the register info as found in the above
3239      variables when we find a match better than any we've seen before.
3240      This happens as we backtrack through the failure points, which in
3241      turn happens only if we have not yet matched the entire string. */
3242   unsigned best_regs_set = false;
3243   const char **best_regstart, **best_regend;
3244 
3245   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3246      allocate space for that if we're not allocating space for anything
3247      else (see below).  Also, we never need info about register 0 for
3248      any of the other register vectors, and it seems rather a kludge to
3249      treat `best_regend' differently than the rest.  So we keep track of
3250      the end of the best match so far in a separate variable.  We
3251      initialize this to NULL so that when we backtrack the first time
3252      and need to test it, it's not garbage.  */
3253   const char *match_end = NULL;
3254 
3255   /* Used when we pop values we don't care about.  */
3256   const char **reg_dummy;
3257   register_info_type *reg_info_dummy;
3258 
3259 #ifdef DEBUG
3260   /* Counts the total number of registers pushed.  */
3261   unsigned num_regs_pushed = 0;
3262 #endif
3263 
3264   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3265 
3266   INIT_FAIL_STACK ();
3267 
3268   /* Do not bother to initialize all the register variables if there are
3269      no groups in the pattern, as it takes a fair amount of time.  If
3270      there are groups, we include space for register 0 (the whole
3271      pattern), even though we never use it, since it simplifies the
3272      array indexing.  We should fix this.  */
3273   if (bufp->re_nsub)
3274     {
3275       regstart = REGEX_TALLOC (num_regs, const char *);
3276       regend = REGEX_TALLOC (num_regs, const char *);
3277       old_regstart = REGEX_TALLOC (num_regs, const char *);
3278       old_regend = REGEX_TALLOC (num_regs, const char *);
3279       best_regstart = REGEX_TALLOC (num_regs, const char *);
3280       best_regend = REGEX_TALLOC (num_regs, const char *);
3281       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3282       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3283       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3284 
3285       if (!(regstart && regend && old_regstart && old_regend && reg_info
3286             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3287         {
3288           FREE_VARIABLES ();
3289           return -2;
3290         }
3291     }
3292 #ifdef REGEX_MALLOC
3293   else
3294     {
3295       /* We must initialize all our variables to NULL, so that
3296          `FREE_VARIABLES' doesn't try to free them.  */
3297       regstart = regend = old_regstart = old_regend = best_regstart
3298         = best_regend = reg_dummy = NULL;
3299       reg_info = reg_info_dummy = (register_info_type *) NULL;
3300     }
3301 #endif /* REGEX_MALLOC */
3302 
3303   /* The starting position is bogus.  */
3304   if (pos < 0 || pos > size1 + size2)
3305     {
3306       FREE_VARIABLES ();
3307       return -1;
3308     }
3309 
3310   /* Initialize subexpression text positions to -1 to mark ones that no
3311      start_memory/stop_memory has been seen for. Also initialize the
3312      register information struct.  */
3313   for (mcnt = 1; mcnt < num_regs; mcnt++)
3314     {
3315       regstart[mcnt] = regend[mcnt]
3316         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3317 
3318       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3319       IS_ACTIVE (reg_info[mcnt]) = 0;
3320       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3321       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3322     }
3323 
3324   /* We move `string1' into `string2' if the latter's empty -- but not if
3325      `string1' is null.  */
3326   if (size2 == 0 && string1 != NULL)
3327     {
3328       string2 = string1;
3329       size2 = size1;
3330       string1 = 0;
3331       size1 = 0;
3332     }
3333   end1 = string1 + size1;
3334   end2 = string2 + size2;
3335 
3336   /* Compute where to stop matching, within the two strings.  */
3337   if (stop <= size1)
3338     {
3339       end_match_1 = string1 + stop;
3340       end_match_2 = string2;
3341     }
3342   else
3343     {
3344       end_match_1 = end1;
3345       end_match_2 = string2 + stop - size1;
3346     }
3347 
3348   /* `p' scans through the pattern as `d' scans through the data.
3349      `dend' is the end of the input string that `d' points within.  `d'
3350      is advanced into the following input string whenever necessary, but
3351      this happens before fetching; therefore, at the beginning of the
3352      loop, `d' can be pointing at the end of a string, but it cannot
3353      equal `string2'.  */
3354   if (size1 > 0 && pos <= size1)
3355     {
3356       d = string1 + pos;
3357       dend = end_match_1;
3358     }
3359   else
3360     {
3361       d = string2 + pos - size1;
3362       dend = end_match_2;
3363     }
3364 
3365   DEBUG_PRINT1 ("The compiled pattern is: ");
3366   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3367   DEBUG_PRINT1 ("The string to match is: `");
3368   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3369   DEBUG_PRINT1 ("'\n");
3370 
3371   /* This loops over pattern commands.  It exits by returning from the
3372      function if the match is complete, or it drops through if the match
3373      fails at this starting point in the input data.  */
3374   for (;;)
3375     {
3376       DEBUG_PRINT2 ("\n0x%x: ", p);
3377 
3378       if (p == pend)
3379 	{ /* End of pattern means we might have succeeded.  */
3380           DEBUG_PRINT1 ("end of pattern ... ");
3381 
3382 	  /* If we haven't matched the entire string, and we want the
3383              longest match, try backtracking.  */
3384           if (d != end_match_2)
3385 	    {
3386               DEBUG_PRINT1 ("backtracking.\n");
3387 
3388               if (!FAIL_STACK_EMPTY ())
3389                 { /* More failure points to try.  */
3390                   boolean same_str_p = (FIRST_STRING_P (match_end)
3391 	        	                == MATCHING_IN_FIRST_STRING);
3392 
3393                   /* If exceeds best match so far, save it.  */
3394                   if (!best_regs_set
3395                       || (same_str_p && d > match_end)
3396                       || (!same_str_p && !MATCHING_IN_FIRST_STRING))
3397                     {
3398                       best_regs_set = true;
3399                       match_end = d;
3400 
3401                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3402 
3403                       for (mcnt = 1; mcnt < num_regs; mcnt++)
3404                         {
3405                           best_regstart[mcnt] = regstart[mcnt];
3406                           best_regend[mcnt] = regend[mcnt];
3407                         }
3408                     }
3409                   goto fail;
3410                 }
3411 
3412               /* If no failure points, don't restore garbage.  */
3413               else if (best_regs_set)
3414                 {
3415   	        restore_best_regs:
3416                   /* Restore best match.  It may happen that `dend ==
3417                      end_match_1' while the restored d is in string2.
3418                      For example, the pattern `x.*y.*z' against the
3419                      strings `x-' and `y-z-', if the two strings are
3420                      not consecutive in memory.  */
3421                   DEBUG_PRINT1 ("Restoring best registers.\n");
3422 
3423                   d = match_end;
3424                   dend = ((d >= string1 && d <= end1)
3425 		           ? end_match_1 : end_match_2);
3426 
3427 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3428 		    {
3429 		      regstart[mcnt] = best_regstart[mcnt];
3430 		      regend[mcnt] = best_regend[mcnt];
3431 		    }
3432                 }
3433             } /* d != end_match_2 */
3434 
3435           DEBUG_PRINT1 ("Accepting match.\n");
3436 
3437           /* If caller wants register contents data back, do it.  */
3438           if (regs && !bufp->no_sub)
3439 	    {
3440               /* Have the register data arrays been allocated?  */
3441               if (bufp->regs_allocated == REGS_UNALLOCATED)
3442                 { /* No.  So allocate them with malloc.  We need one
3443                      extra element beyond `num_regs' for the `-1' marker
3444                      GNU code uses.  */
3445                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3446                   regs->start = TALLOC (regs->num_regs, regoff_t);
3447                   regs->end = TALLOC (regs->num_regs, regoff_t);
3448                   if (regs->start == NULL || regs->end == NULL)
3449                     return -2;
3450                   bufp->regs_allocated = REGS_REALLOCATE;
3451                 }
3452               else if (bufp->regs_allocated == REGS_REALLOCATE)
3453                 { /* Yes.  If we need more elements than were already
3454                      allocated, reallocate them.  If we need fewer, just
3455                      leave it alone.  */
3456                   if (regs->num_regs < num_regs + 1)
3457                     {
3458                       regs->num_regs = num_regs + 1;
3459                       RETALLOC (regs->start, regs->num_regs, regoff_t);
3460                       RETALLOC (regs->end, regs->num_regs, regoff_t);
3461                       if (regs->start == NULL || regs->end == NULL)
3462                         return -2;
3463                     }
3464                 }
3465               else
3466                 assert (bufp->regs_allocated == REGS_FIXED);
3467 
3468               /* Convert the pointer data in `regstart' and `regend' to
3469                  indices.  Register zero has to be set differently,
3470                  since we haven't kept track of any info for it.  */
3471               if (regs->num_regs > 0)
3472                 {
3473                   regs->start[0] = pos;
3474                   regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3475 			          : d - string2 + size1);
3476                 }
3477 
3478               /* Go through the first `min (num_regs, regs->num_regs)'
3479                  registers, since that is all we initialized.  */
3480 	      for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3481 		{
3482                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3483                     regs->start[mcnt] = regs->end[mcnt] = -1;
3484                   else
3485                     {
3486 		      regs->start[mcnt] = POINTER_TO_OFFSET (regstart[mcnt]);
3487                       regs->end[mcnt] = POINTER_TO_OFFSET (regend[mcnt]);
3488                     }
3489 		}
3490 
3491               /* If the regs structure we return has more elements than
3492                  were in the pattern, set the extra elements to -1.  If
3493                  we (re)allocated the registers, this is the case,
3494                  because we always allocate enough to have at least one
3495                  -1 at the end.  */
3496               for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3497                 regs->start[mcnt] = regs->end[mcnt] = -1;
3498 	    } /* regs && !bufp->no_sub */
3499 
3500           FREE_VARIABLES ();
3501           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3502                         nfailure_points_pushed, nfailure_points_popped,
3503                         nfailure_points_pushed - nfailure_points_popped);
3504           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3505 
3506           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3507 			    ? string1
3508 			    : string2 - size1);
3509 
3510           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3511 
3512           return mcnt;
3513         }
3514 
3515       /* Otherwise match next pattern command.  */
3516 #ifdef SWITCH_ENUM_BUG
3517       switch ((int) ((re_opcode_t) *p++))
3518 #else
3519       switch ((re_opcode_t) *p++)
3520 #endif
3521 	{
3522         /* Ignore these.  Used to ignore the n of succeed_n's which
3523            currently have n == 0.  */
3524         case no_op:
3525           DEBUG_PRINT1 ("EXECUTING no_op.\n");
3526           break;
3527 
3528 
3529         /* Match the next n pattern characters exactly.  The following
3530            byte in the pattern defines n, and the n bytes after that
3531            are the characters to match.  */
3532 	case exactn:
3533 	  mcnt = *p++;
3534           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3535 
3536           /* This is written out as an if-else so we don't waste time
3537              testing `translate' inside the loop.  */
3538           if (translate)
3539 	    {
3540 	      do
3541 		{
3542 		  PREFETCH ();
3543 		  if (translate[(unsigned char) *d++] != (char) *p++)
3544                     goto fail;
3545 		}
3546 	      while (--mcnt);
3547 	    }
3548 	  else
3549 	    {
3550 	      do
3551 		{
3552 		  PREFETCH ();
3553 		  if (*d++ != (char) *p++) goto fail;
3554 		}
3555 	      while (--mcnt);
3556 	    }
3557 	  SET_REGS_MATCHED ();
3558           break;
3559 
3560 
3561         /* Match any character except possibly a newline or a null.  */
3562 	case anychar:
3563           DEBUG_PRINT1 ("EXECUTING anychar.\n");
3564 
3565           PREFETCH ();
3566 
3567           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3568               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3569 	    goto fail;
3570 
3571           SET_REGS_MATCHED ();
3572           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3573           d++;
3574 	  break;
3575 
3576 
3577 	case charset:
3578 	case charset_not:
3579 	  {
3580 	    register unsigned char c;
3581 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3582 
3583             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3584 
3585 	    PREFETCH ();
3586 	    c = TRANSLATE (*d); /* The character to match.  */
3587 
3588             /* Cast to `unsigned' instead of `unsigned char' in case the
3589                bit list is a full 32 bytes long.  */
3590 	    if (c < (unsigned) (*p * BYTEWIDTH)
3591 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3592 	      not = !not;
3593 
3594 	    p += 1 + *p;
3595 
3596 	    if (!not) goto fail;
3597 
3598 	    SET_REGS_MATCHED ();
3599             d++;
3600 	    break;
3601 	  }
3602 
3603 
3604         /* The beginning of a group is represented by start_memory.
3605            The arguments are the register number in the next byte, and the
3606            number of groups inner to this one in the next.  The text
3607            matched within the group is recorded (in the internal
3608            registers data structure) under the register number.  */
3609         case start_memory:
3610 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3611 
3612           /* Find out if this group can match the empty string.  */
3613 	  p1 = p;		/* To send to group_match_null_string_p.  */
3614 
3615           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3616             REG_MATCH_NULL_STRING_P (reg_info[*p])
3617               = group_match_null_string_p (&p1, pend, reg_info);
3618 
3619           /* Save the position in the string where we were the last time
3620              we were at this open-group operator in case the group is
3621              operated upon by a repetition operator, e.g., with `(a*)*b'
3622              against `ab'; then we want to ignore where we are now in
3623              the string in case this attempt to match fails.  */
3624           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3625                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3626                              : regstart[*p];
3627 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
3628 			 POINTER_TO_OFFSET (old_regstart[*p]));
3629 
3630           regstart[*p] = d;
3631 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
3632 
3633           IS_ACTIVE (reg_info[*p]) = 1;
3634           MATCHED_SOMETHING (reg_info[*p]) = 0;
3635 
3636           /* This is the new highest active register.  */
3637           highest_active_reg = *p;
3638 
3639           /* If nothing was active before, this is the new lowest active
3640              register.  */
3641           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3642             lowest_active_reg = *p;
3643 
3644           /* Move past the register number and inner group count.  */
3645           p += 2;
3646           break;
3647 
3648 
3649         /* The stop_memory opcode represents the end of a group.  Its
3650            arguments are the same as start_memory's: the register
3651            number, and the number of inner groups.  */
3652 	case stop_memory:
3653 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3654 
3655           /* We need to save the string position the last time we were at
3656              this close-group operator in case the group is operated
3657              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3658              against `aba'; then we want to ignore where we are now in
3659              the string in case this attempt to match fails.  */
3660           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3661                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3662 			   : regend[*p];
3663 	  DEBUG_PRINT2 ("      old_regend: %d\n",
3664 			 POINTER_TO_OFFSET (old_regend[*p]));
3665 
3666           regend[*p] = d;
3667 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
3668 
3669           /* This register isn't active anymore.  */
3670           IS_ACTIVE (reg_info[*p]) = 0;
3671 
3672           /* If this was the only register active, nothing is active
3673              anymore.  */
3674           if (lowest_active_reg == highest_active_reg)
3675             {
3676               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3677               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3678             }
3679           else
3680             { /* We must scan for the new highest active register, since
3681                  it isn't necessarily one less than now: consider
3682                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3683                  new highest active register is 1.  */
3684               unsigned char r = *p - 1;
3685               while (r > 0 && !IS_ACTIVE (reg_info[r]))
3686                 r--;
3687 
3688               /* If we end up at register zero, that means that we saved
3689                  the registers as the result of an `on_failure_jump', not
3690                  a `start_memory', and we jumped to past the innermost
3691                  `stop_memory'.  For example, in ((.)*) we save
3692                  registers 1 and 2 as a result of the *, but when we pop
3693                  back to the second ), we are at the stop_memory 1.
3694                  Thus, nothing is active.  */
3695 	      if (r == 0)
3696                 {
3697                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3698                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3699                 }
3700               else
3701                 highest_active_reg = r;
3702             }
3703 
3704           /* If just failed to match something this time around with a
3705              group that's operated on by a repetition operator, try to
3706              force exit from the ``loop'', and restore the register
3707              information for this group that we had before trying this
3708              last match.  */
3709           if ((!MATCHED_SOMETHING (reg_info[*p])
3710                || (re_opcode_t) p[-3] == start_memory)
3711 	      && (p + 2) < pend)
3712             {
3713               boolean is_a_jump_n = false;
3714 
3715               p1 = p + 2;
3716               mcnt = 0;
3717               switch ((re_opcode_t) *p1++)
3718                 {
3719                   case jump_n:
3720 		    is_a_jump_n = true;
3721                   case pop_failure_jump:
3722 		  case maybe_pop_jump:
3723 		  case jump:
3724 		  case dummy_failure_jump:
3725                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3726 		    if (is_a_jump_n)
3727 		      p1 += 2;
3728                     break;
3729 
3730                   default:
3731                     /* do nothing */ ;
3732                 }
3733 	      p1 += mcnt;
3734 
3735               /* If the next operation is a jump backwards in the pattern
3736 	         to an on_failure_jump right before the start_memory
3737                  corresponding to this stop_memory, exit from the loop
3738                  by forcing a failure after pushing on the stack the
3739                  on_failure_jump's jump in the pattern, and d.  */
3740               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
3741                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
3742 		{
3743                   /* If this group ever matched anything, then restore
3744                      what its registers were before trying this last
3745                      failed match, e.g., with `(a*)*b' against `ab' for
3746                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3747                      against `aba' for regend[3].
3748 
3749                      Also restore the registers for inner groups for,
3750                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3751                      otherwise get trashed).  */
3752 
3753                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3754 		    {
3755 		      unsigned r;
3756 
3757                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3758 
3759 		      /* Restore this and inner groups' (if any) registers.  */
3760                       for (r = *p; r < *p + *(p + 1); r++)
3761                         {
3762                           regstart[r] = old_regstart[r];
3763 
3764                           /* xx why this test?  */
3765                           if ((int) old_regend[r] >= (int) regstart[r])
3766                             regend[r] = old_regend[r];
3767                         }
3768                     }
3769 		  p1++;
3770                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3771                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
3772 
3773                   goto fail;
3774                 }
3775             }
3776 
3777           /* Move past the register number and the inner group count.  */
3778           p += 2;
3779           break;
3780 
3781 
3782 	/* \<digit> has been turned into a `duplicate' command which is
3783            followed by the numeric value of <digit> as the register number.  */
3784         case duplicate:
3785 	  {
3786 	    register const char *d2, *dend2;
3787 	    int regno = *p++;   /* Get which register to match against.  */
3788 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
3789 
3790 	    /* Can't back reference a group which we've never matched.  */
3791             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
3792               goto fail;
3793 
3794             /* Where in input to try to start matching.  */
3795             d2 = regstart[regno];
3796 
3797             /* Where to stop matching; if both the place to start and
3798                the place to stop matching are in the same string, then
3799                set to the place to stop, otherwise, for now have to use
3800                the end of the first string.  */
3801 
3802             dend2 = ((FIRST_STRING_P (regstart[regno])
3803 		      == FIRST_STRING_P (regend[regno]))
3804 		     ? regend[regno] : end_match_1);
3805 	    for (;;)
3806 	      {
3807 		/* If necessary, advance to next segment in register
3808                    contents.  */
3809 		while (d2 == dend2)
3810 		  {
3811 		    if (dend2 == end_match_2) break;
3812 		    if (dend2 == regend[regno]) break;
3813 
3814                     /* End of string1 => advance to string2. */
3815                     d2 = string2;
3816                     dend2 = regend[regno];
3817 		  }
3818 		/* At end of register contents => success */
3819 		if (d2 == dend2) break;
3820 
3821 		/* If necessary, advance to next segment in data.  */
3822 		PREFETCH ();
3823 
3824 		/* How many characters left in this segment to match.  */
3825 		mcnt = dend - d;
3826 
3827 		/* Want how many consecutive characters we can match in
3828                    one shot, so, if necessary, adjust the count.  */
3829                 if (mcnt > dend2 - d2)
3830 		  mcnt = dend2 - d2;
3831 
3832 		/* Compare that many; failure if mismatch, else move
3833                    past them.  */
3834 		if (translate
3835                     ? bcmp_translate (d, d2, mcnt, translate)
3836                     : bcmp (d, d2, mcnt))
3837 		  goto fail;
3838 		d += mcnt, d2 += mcnt;
3839 	      }
3840 	  }
3841 	  break;
3842 
3843 
3844         /* begline matches the empty string at the beginning of the string
3845            (unless `not_bol' is set in `bufp'), and, if
3846            `newline_anchor' is set, after newlines.  */
3847 	case begline:
3848           DEBUG_PRINT1 ("EXECUTING begline.\n");
3849 
3850           if (AT_STRINGS_BEG (d))
3851             {
3852               if (!bufp->not_bol) break;
3853             }
3854           else if (d[-1] == '\n' && bufp->newline_anchor)
3855             {
3856               break;
3857             }
3858           /* In all other cases, we fail.  */
3859           goto fail;
3860 
3861 
3862         /* endline is the dual of begline.  */
3863 	case endline:
3864           DEBUG_PRINT1 ("EXECUTING endline.\n");
3865 
3866           if (AT_STRINGS_END (d))
3867             {
3868               if (!bufp->not_eol) break;
3869             }
3870 
3871           /* We have to ``prefetch'' the next character.  */
3872           else if ((d == end1 ? *string2 : *d) == '\n'
3873                    && bufp->newline_anchor)
3874             {
3875               break;
3876             }
3877           goto fail;
3878 
3879 
3880 	/* Match at the very beginning of the data.  */
3881         case begbuf:
3882           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
3883           if (AT_STRINGS_BEG (d))
3884             break;
3885           goto fail;
3886 
3887 
3888 	/* Match at the very end of the data.  */
3889         case endbuf:
3890           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
3891 	  if (AT_STRINGS_END (d))
3892 	    break;
3893           goto fail;
3894 
3895 
3896         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3897            pushes NULL as the value for the string on the stack.  Then
3898            `pop_failure_point' will keep the current value for the
3899            string, instead of restoring it.  To see why, consider
3900            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3901            then the . fails against the \n.  But the next thing we want
3902            to do is match the \n against the \n; if we restored the
3903            string value, we would be back at the foo.
3904 
3905            Because this is used only in specific cases, we don't need to
3906            check all the things that `on_failure_jump' does, to make
3907            sure the right things get saved on the stack.  Hence we don't
3908            share its code.  The only reason to push anything on the
3909            stack at all is that otherwise we would have to change
3910            `anychar's code to do something besides goto fail in this
3911            case; that seems worse than this.  */
3912         case on_failure_keep_string_jump:
3913           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
3914 
3915           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3916           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
3917 
3918           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
3919           break;
3920 
3921 
3922 	/* Uses of on_failure_jump:
3923 
3924            Each alternative starts with an on_failure_jump that points
3925            to the beginning of the next alternative.  Each alternative
3926            except the last ends with a jump that in effect jumps past
3927            the rest of the alternatives.  (They really jump to the
3928            ending jump of the following alternative, because tensioning
3929            these jumps is a hassle.)
3930 
3931            Repeats start with an on_failure_jump that points past both
3932            the repetition text and either the following jump or
3933            pop_failure_jump back to this on_failure_jump.  */
3934 	case on_failure_jump:
3935         on_failure:
3936           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
3937 
3938           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3939           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
3940 
3941           /* If this on_failure_jump comes right before a group (i.e.,
3942              the original * applied to a group), save the information
3943              for that group and all inner ones, so that if we fail back
3944              to this point, the group's information will be correct.
3945              For example, in \(a*\)*\1, we need the preceding group,
3946              and in \(\(a*\)b*\)\2, we need the inner group.  */
3947 
3948           /* We can't use `p' to check ahead because we push
3949              a failure point to `p + mcnt' after we do this.  */
3950           p1 = p;
3951 
3952           /* We need to skip no_op's before we look for the
3953              start_memory in case this on_failure_jump is happening as
3954              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3955              against aba.  */
3956           while (p1 < pend && (re_opcode_t) *p1 == no_op)
3957             p1++;
3958 
3959           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
3960             {
3961               /* We have a new highest active register now.  This will
3962                  get reset at the start_memory we are about to get to,
3963                  but we will have saved all the registers relevant to
3964                  this repetition op, as described above.  */
3965               highest_active_reg = *(p1 + 1) + *(p1 + 2);
3966               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3967                 lowest_active_reg = *(p1 + 1);
3968             }
3969 
3970           DEBUG_PRINT1 (":\n");
3971           PUSH_FAILURE_POINT (p + mcnt, d, -2);
3972           break;
3973 
3974 
3975         /* A smart repeat ends with `maybe_pop_jump'.
3976 	   We change it to either `pop_failure_jump' or `jump'.  */
3977         case maybe_pop_jump:
3978           EXTRACT_NUMBER_AND_INCR (mcnt, p);
3979           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
3980           {
3981 	    register unsigned char *p2 = p;
3982 
3983             /* Compare the beginning of the repeat with what in the
3984                pattern follows its end. If we can establish that there
3985                is nothing that they would both match, i.e., that we
3986                would have to backtrack because of (as in, e.g., `a*a')
3987                then we can change to pop_failure_jump, because we'll
3988                never have to backtrack.
3989 
3990                This is not true in the case of alternatives: in
3991                `(a|ab)*' we do need to backtrack to the `ab' alternative
3992                (e.g., if the string was `ab').  But instead of trying to
3993                detect that here, the alternative has put on a dummy
3994                failure point which is what we will end up popping.  */
3995 
3996 	    /* Skip over open/close-group commands.  */
3997 	    while (p2 + 2 < pend
3998 		   && ((re_opcode_t) *p2 == stop_memory
3999 		       || (re_opcode_t) *p2 == start_memory))
4000 	      p2 += 3;			/* Skip over args, too.  */
4001 
4002             /* If we're at the end of the pattern, we can change.  */
4003             if (p2 == pend)
4004 	      {
4005 		/* Consider what happens when matching ":\(.*\)"
4006 		   against ":/".  I don't really understand this code
4007 		   yet.  */
4008   	        p[-3] = (unsigned char) pop_failure_jump;
4009                 DEBUG_PRINT1
4010                   ("  End of pattern: change to `pop_failure_jump'.\n");
4011               }
4012 
4013             else if ((re_opcode_t) *p2 == exactn
4014 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4015 	      {
4016 		register unsigned char c
4017                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4018 		p1 = p + mcnt;
4019 
4020                 /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4021                    to the `maybe_finalize_jump' of this case.  Examine what
4022                    follows.  */
4023                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4024                   {
4025   		    p[-3] = (unsigned char) pop_failure_jump;
4026                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4027                                   c, p1[5]);
4028                   }
4029 
4030 		else if ((re_opcode_t) p1[3] == charset
4031 			 || (re_opcode_t) p1[3] == charset_not)
4032 		  {
4033 		    int not = (re_opcode_t) p1[3] == charset_not;
4034 
4035 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4036 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4037 		      not = !not;
4038 
4039                     /* `not' is equal to 1 if c would match, which means
4040                         that we can't change to pop_failure_jump.  */
4041 		    if (!not)
4042                       {
4043   		        p[-3] = (unsigned char) pop_failure_jump;
4044                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4045                       }
4046 		  }
4047 	      }
4048 	  }
4049 	  p -= 2;		/* Point at relative address again.  */
4050 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4051 	    {
4052 	      p[-1] = (unsigned char) jump;
4053               DEBUG_PRINT1 ("  Match => jump.\n");
4054 	      goto unconditional_jump;
4055 	    }
4056         /* Note fall through.  */
4057 
4058 
4059 	/* The end of a simple repeat has a pop_failure_jump back to
4060            its matching on_failure_jump, where the latter will push a
4061            failure point.  The pop_failure_jump takes off failure
4062            points put on by this pop_failure_jump's matching
4063            on_failure_jump; we got through the pattern to here from the
4064            matching on_failure_jump, so didn't fail.  */
4065         case pop_failure_jump:
4066           {
4067             /* We need to pass separate storage for the lowest and
4068                highest registers, even though we don't care about the
4069                actual values.  Otherwise, we will restore only one
4070                register from the stack, since lowest will == highest in
4071                `pop_failure_point'.  */
4072             unsigned dummy_low_reg, dummy_high_reg;
4073             unsigned char *pdummy;
4074             const char *sdummy;
4075 
4076             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4077             POP_FAILURE_POINT (sdummy, pdummy,
4078                                dummy_low_reg, dummy_high_reg,
4079                                reg_dummy, reg_dummy, reg_info_dummy);
4080           }
4081           /* Note fall through.  */
4082 
4083 
4084         /* Unconditionally jump (without popping any failure points).  */
4085         case jump:
4086 	unconditional_jump:
4087 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4088           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4089 	  p += mcnt;				/* Do the jump.  */
4090           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4091 	  break;
4092 
4093 
4094         /* We need this opcode so we can detect where alternatives end
4095            in `group_match_null_string_p' et al.  */
4096         case jump_past_alt:
4097           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4098           goto unconditional_jump;
4099 
4100 
4101         /* Normally, the on_failure_jump pushes a failure point, which
4102            then gets popped at pop_failure_jump.  We will end up at
4103            pop_failure_jump, also, and with a pattern of, say, `a+', we
4104            are skipping over the on_failure_jump, so we have to push
4105            something meaningless for pop_failure_jump to pop.  */
4106         case dummy_failure_jump:
4107           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4108           /* It doesn't matter what we push for the string here.  What
4109              the code at `fail' tests is the value for the pattern.  */
4110           PUSH_FAILURE_POINT (0, 0, -2);
4111           goto unconditional_jump;
4112 
4113 
4114         /* At the end of an alternative, we need to push a dummy failure
4115            point in case we are followed by a `pop_failure_jump', because
4116            we don't want the failure point for the alternative to be
4117            popped.  For example, matching `(a|ab)*' against `aab'
4118            requires that we match the `ab' alternative.  */
4119         case push_dummy_failure:
4120           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4121           /* See comments just above at `dummy_failure_jump' about the
4122              two zeroes.  */
4123           PUSH_FAILURE_POINT (0, 0, -2);
4124           break;
4125 
4126         /* Have to succeed matching what follows at least n times.
4127            After that, handle like `on_failure_jump'.  */
4128         case succeed_n:
4129           EXTRACT_NUMBER (mcnt, p + 2);
4130           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4131 
4132           assert (mcnt >= 0);
4133           /* Originally, this is how many times we HAVE to succeed.  */
4134           if (mcnt > 0)
4135             {
4136                mcnt--;
4137 	       p += 2;
4138                STORE_NUMBER_AND_INCR (p, mcnt);
4139                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4140             }
4141 	  else if (mcnt == 0)
4142             {
4143               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4144 	      p[2] = (unsigned char) no_op;
4145               p[3] = (unsigned char) no_op;
4146               goto on_failure;
4147             }
4148           break;
4149 
4150         case jump_n:
4151           EXTRACT_NUMBER (mcnt, p + 2);
4152           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4153 
4154           /* Originally, this is how many times we CAN jump.  */
4155           if (mcnt)
4156             {
4157                mcnt--;
4158                STORE_NUMBER (p + 2, mcnt);
4159 	       goto unconditional_jump;
4160             }
4161           /* If don't have to jump any more, skip over the rest of command.  */
4162 	  else
4163 	    p += 4;
4164           break;
4165 
4166 	case set_number_at:
4167 	  {
4168             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4169 
4170             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4171             p1 = p + mcnt;
4172             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4173             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4174 	    STORE_NUMBER (p1, mcnt);
4175             break;
4176           }
4177 
4178         case wordbound:
4179           DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4180           if (AT_WORD_BOUNDARY (d))
4181 	    break;
4182           goto fail;
4183 
4184 	case notwordbound:
4185           DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4186 	  if (AT_WORD_BOUNDARY (d))
4187 	    goto fail;
4188           break;
4189 
4190 	case wordbeg:
4191           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4192 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4193 	    break;
4194           goto fail;
4195 
4196 	case wordend:
4197           DEBUG_PRINT1 ("EXECUTING wordend.\n");
4198 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4199               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4200 	    break;
4201           goto fail;
4202 
4203 #ifdef emacs
4204 #ifdef emacs19
4205   	case before_dot:
4206           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4207  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4208   	    goto fail;
4209   	  break;
4210 
4211   	case at_dot:
4212           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4213  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4214   	    goto fail;
4215   	  break;
4216 
4217   	case after_dot:
4218           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4219           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4220   	    goto fail;
4221   	  break;
4222 #else /* not emacs19 */
4223 	case at_dot:
4224           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4225 	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4226 	    goto fail;
4227 	  break;
4228 #endif /* not emacs19 */
4229 
4230 	case syntaxspec:
4231           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4232 	  mcnt = *p++;
4233 	  goto matchsyntax;
4234 
4235         case wordchar:
4236           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4237 	  mcnt = (int) Sword;
4238         matchsyntax:
4239 	  PREFETCH ();
4240 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt)
4241             goto fail;
4242           SET_REGS_MATCHED ();
4243 	  break;
4244 
4245 	case notsyntaxspec:
4246           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4247 	  mcnt = *p++;
4248 	  goto matchnotsyntax;
4249 
4250         case notwordchar:
4251           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4252 	  mcnt = (int) Sword;
4253         matchnotsyntax:
4254 	  PREFETCH ();
4255 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt)
4256             goto fail;
4257 	  SET_REGS_MATCHED ();
4258           break;
4259 
4260 #else /* not emacs */
4261 	case wordchar:
4262           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4263 	  PREFETCH ();
4264           if (!WORDCHAR_P (d))
4265             goto fail;
4266 	  SET_REGS_MATCHED ();
4267           d++;
4268 	  break;
4269 
4270 	case notwordchar:
4271           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4272 	  PREFETCH ();
4273 	  if (WORDCHAR_P (d))
4274             goto fail;
4275           SET_REGS_MATCHED ();
4276           d++;
4277 	  break;
4278 #endif /* not emacs */
4279 
4280         default:
4281           abort ();
4282 	}
4283       continue;  /* Successfully executed one pattern command; keep going.  */
4284 
4285 
4286     /* We goto here if a matching operation fails. */
4287     fail:
4288       if (!FAIL_STACK_EMPTY ())
4289 	{ /* A restart point is known.  Restore to that state.  */
4290           DEBUG_PRINT1 ("\nFAIL:\n");
4291           POP_FAILURE_POINT (d, p,
4292                              lowest_active_reg, highest_active_reg,
4293                              regstart, regend, reg_info);
4294 
4295           /* If this failure point is a dummy, try the next one.  */
4296           if (!p)
4297 	    goto fail;
4298 
4299           /* If we failed to the end of the pattern, don't examine *p.  */
4300 	  assert (p <= pend);
4301           if (p < pend)
4302             {
4303               boolean is_a_jump_n = false;
4304 
4305               /* If failed to a backwards jump that's part of a repetition
4306                  loop, need to pop this failure point and use the next one.  */
4307               switch ((re_opcode_t) *p)
4308                 {
4309                 case jump_n:
4310                   is_a_jump_n = true;
4311                 case maybe_pop_jump:
4312                 case pop_failure_jump:
4313                 case jump:
4314                   p1 = p + 1;
4315                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4316                   p1 += mcnt;
4317 
4318                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4319                       || (!is_a_jump_n
4320                           && (re_opcode_t) *p1 == on_failure_jump))
4321                     goto fail;
4322                   break;
4323                 default:
4324                   /* do nothing */ ;
4325                 }
4326             }
4327 
4328           if (d >= string1 && d <= end1)
4329 	    dend = end_match_1;
4330         }
4331       else
4332         break;   /* Matching at this starting point really fails.  */
4333     } /* for (;;) */
4334 
4335   if (best_regs_set)
4336     goto restore_best_regs;
4337 
4338   FREE_VARIABLES ();
4339 
4340   return -1;         			/* Failure to match.  */
4341 } /* re_match_2 */
4342 
4343 /* Subroutine definitions for re_match_2.  */
4344 
4345 
4346 /* We are passed P pointing to a register number after a start_memory.
4347 
4348    Return true if the pattern up to the corresponding stop_memory can
4349    match the empty string, and false otherwise.
4350 
4351    If we find the matching stop_memory, sets P to point to one past its number.
4352    Otherwise, sets P to an undefined byte less than or equal to END.
4353 
4354    We don't handle duplicates properly (yet).  */
4355 
4356 static boolean
group_match_null_string_p(p,end,reg_info)4357 group_match_null_string_p (p, end, reg_info)
4358     unsigned char **p, *end;
4359     register_info_type *reg_info;
4360 {
4361   int mcnt;
4362   /* Point to after the args to the start_memory.  */
4363   unsigned char *p1 = *p + 2;
4364 
4365   while (p1 < end)
4366     {
4367       /* Skip over opcodes that can match nothing, and return true or
4368 	 false, as appropriate, when we get to one that can't, or to the
4369          matching stop_memory.  */
4370 
4371       switch ((re_opcode_t) *p1)
4372         {
4373         /* Could be either a loop or a series of alternatives.  */
4374         case on_failure_jump:
4375           p1++;
4376           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4377 
4378           /* If the next operation is not a jump backwards in the
4379 	     pattern.  */
4380 
4381 	  if (mcnt >= 0)
4382 	    {
4383               /* Go through the on_failure_jumps of the alternatives,
4384                  seeing if any of the alternatives cannot match nothing.
4385                  The last alternative starts with only a jump,
4386                  whereas the rest start with on_failure_jump and end
4387                  with a jump, e.g., here is the pattern for `a|b|c':
4388 
4389                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4390                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4391                  /exactn/1/c
4392 
4393                  So, we have to first go through the first (n-1)
4394                  alternatives and then deal with the last one separately.  */
4395 
4396 
4397               /* Deal with the first (n-1) alternatives, which start
4398                  with an on_failure_jump (see above) that jumps to right
4399                  past a jump_past_alt.  */
4400 
4401               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4402                 {
4403                   /* `mcnt' holds how many bytes long the alternative
4404                      is, including the ending `jump_past_alt' and
4405                      its number.  */
4406 
4407                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4408 				                      reg_info))
4409                     return false;
4410 
4411                   /* Move to right after this alternative, including the
4412 		     jump_past_alt.  */
4413                   p1 += mcnt;
4414 
4415                   /* Break if it's the beginning of an n-th alternative
4416                      that doesn't begin with an on_failure_jump.  */
4417                   if ((re_opcode_t) *p1 != on_failure_jump)
4418                     break;
4419 
4420 		  /* Still have to check that it's not an n-th
4421 		     alternative that starts with an on_failure_jump.  */
4422 		  p1++;
4423                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4424                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4425                     {
4426 		      /* Get to the beginning of the n-th alternative.  */
4427                       p1 -= 3;
4428                       break;
4429                     }
4430                 }
4431 
4432               /* Deal with the last alternative: go back and get number
4433                  of the `jump_past_alt' just before it.  `mcnt' contains
4434                  the length of the alternative.  */
4435               EXTRACT_NUMBER (mcnt, p1 - 2);
4436 
4437               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4438                 return false;
4439 
4440               p1 += mcnt;	/* Get past the n-th alternative.  */
4441             } /* if mcnt > 0 */
4442           break;
4443 
4444 
4445         case stop_memory:
4446 	  assert (p1[1] == **p);
4447           *p = p1 + 2;
4448           return true;
4449 
4450 
4451         default:
4452           if (!common_op_match_null_string_p (&p1, end, reg_info))
4453             return false;
4454         }
4455     } /* while p1 < end */
4456 
4457   return false;
4458 } /* group_match_null_string_p */
4459 
4460 
4461 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4462    It expects P to be the first byte of a single alternative and END one
4463    byte past the last. The alternative can contain groups.  */
4464 
4465 static boolean
alt_match_null_string_p(p,end,reg_info)4466 alt_match_null_string_p (p, end, reg_info)
4467     unsigned char *p, *end;
4468     register_info_type *reg_info;
4469 {
4470   int mcnt;
4471   unsigned char *p1 = p;
4472 
4473   while (p1 < end)
4474     {
4475       /* Skip over opcodes that can match nothing, and break when we get
4476          to one that can't.  */
4477 
4478       switch ((re_opcode_t) *p1)
4479         {
4480 	/* It's a loop.  */
4481         case on_failure_jump:
4482           p1++;
4483           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4484           p1 += mcnt;
4485           break;
4486 
4487 	default:
4488           if (!common_op_match_null_string_p (&p1, end, reg_info))
4489             return false;
4490         }
4491     }  /* while p1 < end */
4492 
4493   return true;
4494 } /* alt_match_null_string_p */
4495 
4496 
4497 /* Deals with the ops common to group_match_null_string_p and
4498    alt_match_null_string_p.
4499 
4500    Sets P to one after the op and its arguments, if any.  */
4501 
4502 static boolean
common_op_match_null_string_p(p,end,reg_info)4503 common_op_match_null_string_p (p, end, reg_info)
4504     unsigned char **p, *end;
4505     register_info_type *reg_info;
4506 {
4507   int mcnt;
4508   boolean ret;
4509   int reg_no;
4510   unsigned char *p1 = *p;
4511 
4512   switch ((re_opcode_t) *p1++)
4513     {
4514     case no_op:
4515     case begline:
4516     case endline:
4517     case begbuf:
4518     case endbuf:
4519     case wordbeg:
4520     case wordend:
4521     case wordbound:
4522     case notwordbound:
4523 #ifdef emacs
4524     case before_dot:
4525     case at_dot:
4526     case after_dot:
4527 #endif
4528       break;
4529 
4530     case start_memory:
4531       reg_no = *p1;
4532       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4533       ret = group_match_null_string_p (&p1, end, reg_info);
4534 
4535       /* Have to set this here in case we're checking a group which
4536          contains a group and a back reference to it.  */
4537 
4538       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4539         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4540 
4541       if (!ret)
4542         return false;
4543       break;
4544 
4545     /* If this is an optimized succeed_n for zero times, make the jump.  */
4546     case jump:
4547       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4548       if (mcnt >= 0)
4549         p1 += mcnt;
4550       else
4551         return false;
4552       break;
4553 
4554     case succeed_n:
4555       /* Get to the number of times to succeed.  */
4556       p1 += 2;
4557       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4558 
4559       if (mcnt == 0)
4560         {
4561           p1 -= 4;
4562           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4563           p1 += mcnt;
4564         }
4565       else
4566         return false;
4567       break;
4568 
4569     case duplicate:
4570       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4571         return false;
4572       break;
4573 
4574     case set_number_at:
4575       p1 += 4;
4576 
4577     default:
4578       /* All other opcodes mean we cannot match the empty string.  */
4579       return false;
4580   }
4581 
4582   *p = p1;
4583   return true;
4584 } /* common_op_match_null_string_p */
4585 
4586 
4587 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4588    bytes; nonzero otherwise.  */
4589 
4590 static int
bcmp_translate(s1,s2,len,translate)4591 bcmp_translate (s1, s2, len, translate)
4592      unsigned char *s1, *s2;
4593      register int len;
4594      char *translate;
4595 {
4596   register unsigned char *p1 = s1, *p2 = s2;
4597   while (len)
4598     {
4599       if (translate[*p1++] != translate[*p2++]) return 1;
4600       len--;
4601     }
4602   return 0;
4603 }
4604 
4605 /* Entry points for GNU code.  */
4606 
4607 /* re_compile_pattern is the GNU regular expression compiler: it
4608    compiles PATTERN (of length SIZE) and puts the result in BUFP.
4609    Returns 0 if the pattern was valid, otherwise an error string.
4610 
4611    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
4612    are set in BUFP on entry.
4613 
4614    We call regex_compile to do the actual compilation.  */
4615 
4616 const char *
re_compile_pattern(pattern,length,bufp)4617 re_compile_pattern (pattern, length, bufp)
4618      const char *pattern;
4619      int length;
4620      struct re_pattern_buffer *bufp;
4621 {
4622   reg_errcode_t ret;
4623 
4624   /* GNU code is written to assume at least RE_NREGS registers will be set
4625      (and at least one extra will be -1).  */
4626   bufp->regs_allocated = REGS_UNALLOCATED;
4627 
4628   /* And GNU code determines whether or not to get register information
4629      by passing null for the REGS argument to re_match, etc., not by
4630      setting no_sub.  */
4631   bufp->no_sub = 0;
4632 
4633   /* Match anchors at newline.  */
4634   bufp->newline_anchor = 1;
4635 
4636   ret = regex_compile (pattern, length, re_syntax_options, bufp);
4637 
4638   return re_error_msg[(int) ret];
4639 }
4640 
4641 /* Entry points compatible with 4.2 BSD regex library.  We don't define
4642    them if this is an Emacs or POSIX compilation.  */
4643 
4644 #if !defined (emacs) && !defined (_POSIX_SOURCE)
4645 
4646 /* BSD has one and only one pattern buffer.  */
4647 static struct re_pattern_buffer re_comp_buf;
4648 
4649 char *
re_comp(s)4650 re_comp (s)
4651     const char *s;
4652 {
4653   reg_errcode_t ret;
4654 
4655   if (!s)
4656     {
4657       if (!re_comp_buf.buffer)
4658 	return "No previous regular expression";
4659       return 0;
4660     }
4661 
4662   if (!re_comp_buf.buffer)
4663     {
4664       re_comp_buf.buffer = (unsigned char *) malloc (200);
4665       if (re_comp_buf.buffer == NULL)
4666         return "Memory exhausted";
4667       re_comp_buf.allocated = 200;
4668 
4669       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
4670       if (re_comp_buf.fastmap == NULL)
4671 	return "Memory exhausted";
4672     }
4673 
4674   /* Since `re_exec' always passes NULL for the `regs' argument, we
4675      don't need to initialize the pattern buffer fields which affect it.  */
4676 
4677   /* Match anchors at newlines.  */
4678   re_comp_buf.newline_anchor = 1;
4679 
4680   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
4681 
4682   /* Yes, we're discarding `const' here.  */
4683   return (char *) re_error_msg[(int) ret];
4684 }
4685 
4686 
4687 int
re_exec(s)4688 re_exec (s)
4689     const char *s;
4690 {
4691   const int len = strlen (s);
4692   return
4693     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
4694 }
4695 #endif /* not emacs and not _POSIX_SOURCE */
4696 
4697 /* POSIX.2 functions.  Don't define these for Emacs.  */
4698 
4699 #ifndef emacs
4700 
4701 /* regcomp takes a regular expression as a string and compiles it.
4702 
4703    PREG is a regex_t *.  We do not expect any fields to be initialized,
4704    since POSIX says we shouldn't.  Thus, we set
4705 
4706      `buffer' to the compiled pattern;
4707      `used' to the length of the compiled pattern;
4708      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4709        REG_EXTENDED bit in CFLAGS is set; otherwise, to
4710        RE_SYNTAX_POSIX_BASIC;
4711      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4712      `fastmap' and `fastmap_accurate' to zero;
4713      `re_nsub' to the number of subexpressions in PATTERN.
4714 
4715    PATTERN is the address of the pattern string.
4716 
4717    CFLAGS is a series of bits which affect compilation.
4718 
4719      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4720      use POSIX basic syntax.
4721 
4722      If REG_NEWLINE is set, then . and [^...] don't match newline.
4723      Also, regexec will try a match beginning after every newline.
4724 
4725      If REG_ICASE is set, then we considers upper- and lowercase
4726      versions of letters to be equivalent when matching.
4727 
4728      If REG_NOSUB is set, then when PREG is passed to regexec, that
4729      routine will report only success or failure, and nothing about the
4730      registers.
4731 
4732    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4733    the return codes and their meanings.)  */
4734 
4735 int
regcomp(preg,pattern,cflags)4736 regcomp (preg, pattern, cflags)
4737     regex_t *preg;
4738     const char *pattern;
4739     int cflags;
4740 {
4741   reg_errcode_t ret;
4742   unsigned syntax
4743     = (cflags & REG_EXTENDED) ?
4744       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4745 
4746   /* regex_compile will allocate the space for the compiled pattern.  */
4747   preg->buffer = 0;
4748   preg->allocated = 0;
4749 
4750   /* Don't bother to use a fastmap when searching.  This simplifies the
4751      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4752      characters after newlines into the fastmap.  This way, we just try
4753      every character.  */
4754   preg->fastmap = 0;
4755 
4756   if (cflags & REG_ICASE)
4757     {
4758       unsigned i;
4759 
4760       preg->translate = (char *) malloc (CHAR_SET_SIZE);
4761       if (preg->translate == NULL)
4762         return (int) REG_ESPACE;
4763 
4764       /* Map uppercase characters to corresponding lowercase ones.  */
4765       for (i = 0; i < CHAR_SET_SIZE; i++)
4766         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
4767     }
4768   else
4769     preg->translate = NULL;
4770 
4771   /* If REG_NEWLINE is set, newlines are treated differently.  */
4772   if (cflags & REG_NEWLINE)
4773     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4774       syntax &= ~RE_DOT_NEWLINE;
4775       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4776       /* It also changes the matching behavior.  */
4777       preg->newline_anchor = 1;
4778     }
4779   else
4780     preg->newline_anchor = 0;
4781 
4782   preg->no_sub = !!(cflags & REG_NOSUB);
4783 
4784   /* POSIX says a null character in the pattern terminates it, so we
4785      can use strlen here in compiling the pattern.  */
4786   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
4787 
4788   /* POSIX doesn't distinguish between an unmatched open-group and an
4789      unmatched close-group: both are REG_EPAREN.  */
4790   if (ret == REG_ERPAREN) ret = REG_EPAREN;
4791 
4792   return (int) ret;
4793 }
4794 
4795 
4796 /* regexec searches for a given pattern, specified by PREG, in the
4797    string STRING.
4798 
4799    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4800    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4801    least NMATCH elements, and we set them to the offsets of the
4802    corresponding matched substrings.
4803 
4804    EFLAGS specifies `execution flags' which affect matching: if
4805    REG_NOTBOL is set, then ^ does not match at the beginning of the
4806    string; if REG_NOTEOL is set, then $ does not match at the end.
4807 
4808    We return 0 if we find a match and REG_NOMATCH if not.  */
4809 
4810 int
regexec(preg,string,nmatch,pmatch,eflags)4811 regexec (preg, string, nmatch, pmatch, eflags)
4812     const regex_t *preg;
4813     const char *string;
4814     size_t nmatch;
4815     regmatch_t pmatch[];
4816     int eflags;
4817 {
4818   int ret;
4819   struct re_registers regs;
4820   regex_t private_preg;
4821   int len = strlen (string);
4822   boolean want_reg_info = !preg->no_sub && nmatch > 0;
4823 
4824   private_preg = *preg;
4825 
4826   private_preg.not_bol = !!(eflags & REG_NOTBOL);
4827   private_preg.not_eol = !!(eflags & REG_NOTEOL);
4828 
4829   /* The user has told us exactly how many registers to return
4830      information about, via `nmatch'.  We have to pass that on to the
4831      matching routines.  */
4832   private_preg.regs_allocated = REGS_FIXED;
4833 
4834   if (want_reg_info)
4835     {
4836       regs.num_regs = nmatch;
4837       regs.start = TALLOC (nmatch, regoff_t);
4838       regs.end = TALLOC (nmatch, regoff_t);
4839       if (regs.start == NULL || regs.end == NULL)
4840         return (int) REG_NOMATCH;
4841     }
4842 
4843   /* Perform the searching operation.  */
4844   ret = re_search (&private_preg, string, len,
4845                    /* start: */ 0, /* range: */ len,
4846                    want_reg_info ? &regs : (struct re_registers *) 0);
4847 
4848   /* Copy the register information to the POSIX structure.  */
4849   if (want_reg_info)
4850     {
4851       if (ret >= 0)
4852         {
4853           unsigned r;
4854 
4855           for (r = 0; r < nmatch; r++)
4856             {
4857               pmatch[r].rm_so = regs.start[r];
4858               pmatch[r].rm_eo = regs.end[r];
4859             }
4860         }
4861 
4862       /* If we needed the temporary register info, free the space now.  */
4863       free (regs.start);
4864       free (regs.end);
4865     }
4866 
4867   /* We want zero return to mean success, unlike `re_search'.  */
4868   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4869 }
4870 
4871 
4872 /* Returns a message corresponding to an error code, ERRCODE, returned
4873    from either regcomp or regexec.   We don't use PREG here.  */
4874 
4875 size_t
regerror(errcode,preg,errbuf,errbuf_size)4876 regerror (errcode, preg, errbuf, errbuf_size)
4877     int errcode;
4878     const regex_t *preg;
4879     char *errbuf;
4880     size_t errbuf_size;
4881 {
4882   const char *msg;
4883   size_t msg_size;
4884 
4885   if (errcode < 0
4886       || errcode >= (sizeof (re_error_msg) / sizeof (re_error_msg[0])))
4887     /* Only error codes returned by the rest of the code should be passed
4888        to this routine.  If we are given anything else, or if other regex
4889        code generates an invalid error code, then the program has a bug.
4890        Dump core so we can fix it.  */
4891     abort ();
4892 
4893   msg = re_error_msg[errcode];
4894 
4895   /* POSIX doesn't require that we do anything in this case, but why
4896      not be nice.  */
4897   if (! msg)
4898     msg = "Success";
4899 
4900   msg_size = strlen (msg) + 1; /* Includes the null.  */
4901 
4902   if (errbuf_size != 0)
4903     {
4904       if (msg_size > errbuf_size)
4905         {
4906           strncpy (errbuf, msg, errbuf_size - 1);
4907           errbuf[errbuf_size - 1] = 0;
4908         }
4909       else
4910         strcpy (errbuf, msg);
4911     }
4912 
4913   return msg_size;
4914 }
4915 
4916 
4917 /* Free dynamically allocated space used by PREG.  */
4918 
4919 void
regfree(preg)4920 regfree (preg)
4921     regex_t *preg;
4922 {
4923   if (preg->buffer != NULL)
4924     free (preg->buffer);
4925   preg->buffer = NULL;
4926 
4927   preg->allocated = 0;
4928   preg->used = 0;
4929 
4930   if (preg->fastmap != NULL)
4931     free (preg->fastmap);
4932   preg->fastmap = NULL;
4933   preg->fastmap_accurate = 0;
4934 
4935   if (preg->translate != NULL)
4936     free (preg->translate);
4937   preg->translate = NULL;
4938 }
4939 
4940 #endif /* not emacs  */
4941 
4942 /*
4943 Local variables:
4944 make-backup-files: t
4945 version-control: t
4946 trim-versions-without-asking: nil
4947 End:
4948 */
4949 
4950 void
printchar(c)4951 printchar (c)
4952      char c;
4953 {
4954   if (c < 040 || c >= 0177)
4955     {
4956       putchar ('\\');
4957       putchar (((c >> 6) & 3) + '0');
4958       putchar (((c >> 3) & 7) + '0');
4959       putchar ((c & 7) + '0');
4960     }
4961   else
4962     putchar (c);
4963 }
4964 
4965