1 /* Extended regular expression matching and search library,
2    version 0.12.
3    (Implements POSIX draft P10003.2/D11.2, except for
4    internationalization features.)
5 
6    Copyright (C) 1993, 1994, 1995 Free Software Foundation, Inc.
7 
8    This program is free software; you can redistribute it and/or modify
9    it under the terms of the GNU General Public License as published by
10    the Free Software Foundation; either version 2, or (at your option)
11    any later version.
12 
13    This program is distributed in the hope that it will be useful,
14    but WITHOUT ANY WARRANTY; without even the implied warranty of
15    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
16    GNU General Public License for more details.
17 
18    You should have received a copy of the GNU General Public License
19    along with this program; if not, write to the Free Software
20    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
21 
22 /* AIX requires this to be the first thing in the file. */
23 #if defined (_AIX) && !defined (REGEX_MALLOC)
24   #pragma alloca
25 #endif
26 
27 #define _GNU_SOURCE
28 
29 #ifdef HAVE_CONFIG_H
30 #include <config.h>
31 #endif
32 
33 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
34 #include <sys/types.h>
35 
36 /* This is for other GNU distributions with internationalized messages.  */
37 #if HAVE_LIBINTL_H || defined (_LIBC)
38 # include <libintl.h>
39 #else
40 # define gettext(msgid) (msgid)
41 #endif
42 
43 /* The `emacs' switch turns on certain matching commands
44    that make sense only in Emacs. */
45 #ifdef emacs
46 
47 #include "lisp.h"
48 #include "buffer.h"
49 #include "syntax.h"
50 
51 #else  /* not emacs */
52 
53 /* If we are not linking with Emacs proper,
54    we can't use the relocating allocator
55    even if config.h says that we can.  */
56 #undef REL_ALLOC
57 
58 #include <stdlib.h>
59 
60 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
61    If nothing else has been done, use the method below.  */
62 #ifdef INHIBIT_STRING_HEADER
63 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
64 #if !defined (bzero) && !defined (bcopy)
65 #undef INHIBIT_STRING_HEADER
66 #endif
67 #endif
68 #endif
69 
70 /* This is the normal way of making sure we have a bcopy and a bzero.
71    This is used in most programs--a few other programs avoid this
72    by defining INHIBIT_STRING_HEADER.  */
73 #ifndef INHIBIT_STRING_HEADER
74 /*#if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)*/
75 #include <string.h>
76 #ifndef bcmp
77 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
78 #endif
79 #ifndef bcopy
80 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
81 #endif
82 #ifndef bzero
83 #define bzero(s, n)	memset ((s), 0, (n))
84 #endif
85 /*#else
86 #include <strings.h>
87 #endif*/
88 #endif
89 
90 /* Define the syntax stuff for \<, \>, etc.  */
91 
92 /* This must be nonzero for the wordchar and notwordchar pattern
93    commands in re_match_2.  */
94 #ifndef Sword
95 #define Sword 1
96 #endif
97 
98 #ifdef SWITCH_ENUM_BUG
99 #define SWITCH_ENUM_CAST(x) ((int)(x))
100 #else
101 #define SWITCH_ENUM_CAST(x) (x)
102 #endif
103 
104 #ifdef SYNTAX_TABLE
105 
106 extern char *re_syntax_table;
107 
108 #else /* not SYNTAX_TABLE */
109 
110 /* How many characters in the character set.  */
111 #define CHAR_SET_SIZE 256
112 
113 static char re_syntax_table[CHAR_SET_SIZE];
114 
115 static void
init_syntax_once()116 init_syntax_once ()
117 {
118    register int c;
119    static int done = 0;
120 
121    if (done)
122 	return;
123 
124    bzero (re_syntax_table, sizeof re_syntax_table);
125 
126    for (c = 'a'; c <= 'z'; c++)
127 	re_syntax_table[c] = Sword;
128 
129    for (c = 'A'; c <= 'Z'; c++)
130 	re_syntax_table[c] = Sword;
131 
132    for (c = '0'; c <= '9'; c++)
133 	re_syntax_table[c] = Sword;
134 
135    re_syntax_table['_'] = Sword;
136 
137    done = 1;
138 }
139 
140 #endif /* not SYNTAX_TABLE */
141 
142 #define SYNTAX(c) re_syntax_table[c]
143 
144 #endif /* not emacs */
145 
146 /* Get the interface, including the syntax bits.  */
147 #include "regex.h"
148 
149 /* isalpha etc. are used for the character classes.  */
150 #include <ctype.h>
151 
152 /* Jim Meyering writes:
153 
154    "... Some ctype macros are valid only for character codes that
155    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
156    using /bin/cc or gcc but without giving an ansi option).  So, all
157    ctype uses should be through macros like ISPRINT...  If
158    STDC_HEADERS is defined, then autoconf has verified that the ctype
159    macros don't need to be guarded with references to isascii. ...
160    Defining isascii to 1 should let any compiler worth its salt
161    eliminate the && through constant folding."  */
162 
163 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
164 #define ISASCII(c) 1
165 #else
166 #define ISASCII(c) isascii(c)
167 #endif
168 
169 #ifdef isblank
170 #define ISBLANK(c) (ISASCII (c) && isblank (c))
171 #else
172 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
173 #endif
174 #ifdef isgraph
175 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
176 #else
177 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
178 #endif
179 
180 #define ISPRINT(c) (ISASCII (c) && isprint (c))
181 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
182 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
183 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
184 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
185 #define ISLOWER(c) (ISASCII (c) && islower (c))
186 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
187 #define ISSPACE(c) (ISASCII (c) && isspace (c))
188 #define ISUPPER(c) (ISASCII (c) && isupper (c))
189 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
190 
191 #ifndef NULL
192 #define NULL (void *)0
193 #endif
194 
195 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
196    since ours (we hope) works properly with all combinations of
197    machines, compilers, `char' and `unsigned char' argument types.
198    (Per Bothner suggested the basic approach.)  */
199 #undef SIGN_EXTEND_CHAR
200 #if __STDC__
201 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
202 #else  /* not __STDC__ */
203 /* As in Harbison and Steele.  */
204 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
205 #endif
206 
207 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
208    use `alloca' instead of `malloc'.  This is because using malloc in
209    re_search* or re_match* could cause memory leaks when C-g is used in
210    Emacs; also, malloc is slower and causes storage fragmentation.  On
211    the other hand, malloc is more portable, and easier to debug.
212 
213    Because we sometimes use alloca, some routines have to be macros,
214    not functions -- `alloca'-allocated space disappears at the end of the
215    function it is called in.  */
216 
217 #ifdef REGEX_MALLOC
218 
219 #define REGEX_ALLOCATE malloc
220 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
221 #define REGEX_FREE free
222 
223 #else /* not REGEX_MALLOC  */
224 
225 /* Emacs already defines alloca, sometimes.  */
226 #ifndef alloca
227 
228 /* Make alloca work the best possible way.  */
229 #ifdef __GNUC__
230 #define alloca __builtin_alloca
231 #else /* not __GNUC__ */
232 #if HAVE_ALLOCA_H
233 #include <alloca.h>
234 #else /* not __GNUC__ or HAVE_ALLOCA_H */
235 #ifndef _AIX /* Already did AIX, up at the top.  */
236 char *alloca ();
237 #endif /* not _AIX */
238 #endif /* not HAVE_ALLOCA_H */
239 #endif /* not __GNUC__ */
240 
241 #endif /* not alloca */
242 
243 #define REGEX_ALLOCATE alloca
244 
245 /* Assumes a `char *destination' variable.  */
246 #define REGEX_REALLOCATE(source, osize, nsize)				\
247   (destination = (char *) alloca (nsize),				\
248    bcopy (source, destination, osize),					\
249    destination)
250 
251 /* No need to do anything to free, after alloca.  */
252 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
253 
254 #endif /* not REGEX_MALLOC */
255 
256 /* Define how to allocate the failure stack.  */
257 
258 #ifdef REL_ALLOC
259 #define REGEX_ALLOCATE_STACK(size)				\
260   r_alloc (&failure_stack_ptr, (size))
261 #define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
262   r_re_alloc (&failure_stack_ptr, (nsize))
263 #define REGEX_FREE_STACK(ptr)					\
264   r_alloc_free (&failure_stack_ptr)
265 
266 #else /* not REL_ALLOC */
267 
268 #ifdef REGEX_MALLOC
269 
270 #define REGEX_ALLOCATE_STACK malloc
271 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
272 #define REGEX_FREE_STACK free
273 
274 #else /* not REGEX_MALLOC */
275 
276 #define REGEX_ALLOCATE_STACK alloca
277 
278 #define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
279    REGEX_REALLOCATE (source, osize, nsize)
280 /* No need to explicitly free anything.  */
281 #define REGEX_FREE_STACK(arg)
282 
283 #endif /* not REGEX_MALLOC */
284 #endif /* not REL_ALLOC */
285 
286 
287 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
288    `string1' or just past its end.  This works if PTR is NULL, which is
289    a good thing.  */
290 #define FIRST_STRING_P(ptr) 					\
291   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
292 
293 /* (Re)Allocate N items of type T using malloc, or fail.  */
294 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
295 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
296 #define RETALLOC_IF(addr, n, t) \
297   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
298 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
299 
300 #define BYTEWIDTH 8 /* In bits.  */
301 
302 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
303 
304 #undef MAX
305 #undef MIN
306 #define MAX(a, b) ((a) > (b) ? (a) : (b))
307 #define MIN(a, b) ((a) < (b) ? (a) : (b))
308 
309 typedef char boolean;
310 #define false 0
311 #define true 1
312 
313 static int re_match_2_internal ();
314 
315 /* These are the command codes that appear in compiled regular
316    expressions.  Some opcodes are followed by argument bytes.  A
317    command code can specify any interpretation whatsoever for its
318    arguments.  Zero bytes may appear in the compiled regular expression.  */
319 
320 typedef enum
321 {
322   no_op = 0,
323 
324   /* Succeed right away--no more backtracking.  */
325   succeed,
326 
327 	   /* Followed by one byte giving n, then by n literal bytes.  */
328   exactn,
329 
330 	   /* Matches any (more or less) character.  */
331   anychar,
332 
333 	   /* Matches any one char belonging to specified set.  First
334 		 following byte is number of bitmap bytes.  Then come bytes
335 		 for a bitmap saying which chars are in.  Bits in each byte
336 		 are ordered low-bit-first.  A character is in the set if its
337 		 bit is 1.  A character too large to have a bit in the map is
338 		 automatically not in the set.  */
339   charset,
340 
341 	   /* Same parameters as charset, but match any character that is
342 		 not one of those specified.  */
343   charset_not,
344 
345 	   /* Start remembering the text that is matched, for storing in a
346 		 register.  Followed by one byte with the register number, in
347 		 the range 0 to one less than the pattern buffer's re_nsub
348 		 field.  Then followed by one byte with the number of groups
349 		 inner to this one.  (This last has to be part of the
350 		 start_memory only because we need it in the on_failure_jump
351 		 of re_match_2.)  */
352   start_memory,
353 
354 	   /* Stop remembering the text that is matched and store it in a
355 		 memory register.  Followed by one byte with the register
356 		 number, in the range 0 to one less than `re_nsub' in the
357 		 pattern buffer, and one byte with the number of inner groups,
358 		 just like `start_memory'.  (We need the number of inner
359 		 groups here because we don't have any easy way of finding the
360 		 corresponding start_memory when we're at a stop_memory.)  */
361   stop_memory,
362 
363 	   /* Match a duplicate of something remembered. Followed by one
364 		 byte containing the register number.  */
365   duplicate,
366 
367 	   /* Fail unless at beginning of line.  */
368   begline,
369 
370 	   /* Fail unless at end of line.  */
371   endline,
372 
373 	   /* Succeeds if at beginning of buffer (if emacs) or at beginning
374 		 of string to be matched (if not).  */
375   begbuf,
376 
377 	   /* Analogously, for end of buffer/string.  */
378   endbuf,
379 
380 	   /* Followed by two byte relative address to which to jump.  */
381   jump,
382 
383 	/* Same as jump, but marks the end of an alternative.  */
384   jump_past_alt,
385 
386 	   /* Followed by two-byte relative address of place to resume at
387 		 in case of failure.  */
388   on_failure_jump,
389 
390 	   /* Like on_failure_jump, but pushes a placeholder instead of the
391 		 current string position when executed.  */
392   on_failure_keep_string_jump,
393 
394 	   /* Throw away latest failure point and then jump to following
395 		 two-byte relative address.  */
396   pop_failure_jump,
397 
398 	   /* Change to pop_failure_jump if know won't have to backtrack to
399 		 match; otherwise change to jump.  This is used to jump
400 		 back to the beginning of a repeat.  If what follows this jump
401 		 clearly won't match what the repeat does, such that we can be
402 		 sure that there is no use backtracking out of repetitions
403 		 already matched, then we change it to a pop_failure_jump.
404 		 Followed by two-byte address.  */
405   maybe_pop_jump,
406 
407 	   /* Jump to following two-byte address, and push a dummy failure
408 		 point. This failure point will be thrown away if an attempt
409 		 is made to use it for a failure.  A `+' construct makes this
410 		 before the first repeat.  Also used as an intermediary kind
411 		 of jump when compiling an alternative.  */
412   dummy_failure_jump,
413 
414 	/* Push a dummy failure point and continue.  Used at the end of
415 	   alternatives.  */
416   push_dummy_failure,
417 
418 	   /* Followed by two-byte relative address and two-byte number n.
419 		 After matching N times, jump to the address upon failure.  */
420   succeed_n,
421 
422 	   /* Followed by two-byte relative address, and two-byte number n.
423 		 Jump to the address N times, then fail.  */
424   jump_n,
425 
426 	   /* Set the following two-byte relative address to the
427 		 subsequent two-byte number.  The address *includes* the two
428 		 bytes of number.  */
429   set_number_at,
430 
431   wordchar,	/* Matches any word-constituent character.  */
432   notwordchar,	/* Matches any char that is not a word-constituent.  */
433 
434   wordbeg,	/* Succeeds if at word beginning.  */
435   wordend,	/* Succeeds if at word end.  */
436 
437   wordbound,	/* Succeeds if at a word boundary.  */
438   notwordbound	/* Succeeds if not at a word boundary.  */
439 
440 #ifdef emacs
441   ,before_dot,	/* Succeeds if before point.  */
442   at_dot,	/* Succeeds if at point.  */
443   after_dot,	/* Succeeds if after point.  */
444 
445 	/* Matches any character whose syntax is specified.  Followed by
446 		 a byte which contains a syntax code, e.g., Sword.  */
447   syntaxspec,
448 
449 	/* Matches any character whose syntax is not that specified.  */
450   notsyntaxspec
451 #endif /* emacs */
452 } re_opcode_t;
453 
454 /* Common operations on the compiled pattern.  */
455 
456 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
457 
458 #define STORE_NUMBER(destination, number)				\
459   do {									\
460     (destination)[0] = (number) & 0377;					\
461     (destination)[1] = (number) >> 8;					\
462   } while (0)
463 
464 /* Same as STORE_NUMBER, except increment DESTINATION to
465    the byte after where the number is stored.  Therefore, DESTINATION
466    must be an lvalue.  */
467 
468 #define STORE_NUMBER_AND_INCR(destination, number)			\
469   do {									\
470     STORE_NUMBER (destination, number);					\
471     (destination) += 2;							\
472   } while (0)
473 
474 /* Put into DESTINATION a number stored in two contiguous bytes starting
475    at SOURCE.  */
476 
477 #define EXTRACT_NUMBER(destination, source)				\
478   do {									\
479     (destination) = *(source) & 0377;					\
480     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
481   } while (0)
482 
483 #ifdef DEBUG
484 static void
extract_number(dest,source)485 extract_number (dest, source)
486     int *dest;
487     unsigned char *source;
488 {
489   int temp = SIGN_EXTEND_CHAR (*(source + 1));
490   *dest = *source & 0377;
491   *dest += temp << 8;
492 }
493 
494 #ifndef EXTRACT_MACROS /* To debug the macros.  */
495 #undef EXTRACT_NUMBER
496 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
497 #endif /* not EXTRACT_MACROS */
498 
499 #endif /* DEBUG */
500 
501 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
502    SOURCE must be an lvalue.  */
503 
504 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
505   do {									\
506     EXTRACT_NUMBER (destination, source);				\
507     (source) += 2; 							\
508   } while (0)
509 
510 #ifdef DEBUG
511 static void
extract_number_and_incr(destination,source)512 extract_number_and_incr (destination, source)
513     int *destination;
514     unsigned char **source;
515 {
516   extract_number (destination, *source);
517   *source += 2;
518 }
519 
520 #ifndef EXTRACT_MACROS
521 #undef EXTRACT_NUMBER_AND_INCR
522 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
523   extract_number_and_incr (&dest, &src)
524 #endif /* not EXTRACT_MACROS */
525 
526 #endif /* DEBUG */
527 
528 /* If DEBUG is defined, Regex prints many voluminous messages about what
529    it is doing (if the variable `debug' is nonzero).  If linked with the
530    main program in `iregex.c', you can enter patterns and strings
531    interactively.  And if linked with the main program in `main.c' and
532    the other test files, you can run the already-written tests.  */
533 
534 #ifdef DEBUG
535 
536 /* We use standard I/O for debugging.  */
537 #include <stdio.h>
538 
539 /* It is useful to test things that ``must'' be true when debugging.  */
540 #include <assert.h>
541 
542 static int debug = 0;
543 
544 #define DEBUG_STATEMENT(e) e
545 #define DEBUG_PRINT1(x) if (debug) printf (x)
546 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
547 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
548 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
549 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
550   if (debug) print_partial_compiled_pattern (s, e)
551 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
552   if (debug) print_double_string (w, s1, sz1, s2, sz2)
553 
554 
555 /* Print the fastmap in human-readable form.  */
556 
557 void
print_fastmap(fastmap)558 print_fastmap (fastmap)
559     char *fastmap;
560 {
561   unsigned was_a_range = 0;
562   unsigned i = 0;
563 
564   while (i < (1 << BYTEWIDTH))
565     {
566 	 if (fastmap[i++])
567 	{
568 	  was_a_range = 0;
569 		putchar (i - 1);
570 		while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
571 		  {
572 		    was_a_range = 1;
573 		    i++;
574 		  }
575 	  if (was_a_range)
576 		  {
577 		    printf ("-");
578 		    putchar (i - 1);
579 		  }
580 	   }
581     }
582   putchar ('\n');
583 }
584 
585 
586 /* Print a compiled pattern string in human-readable form, starting at
587    the START pointer into it and ending just before the pointer END.  */
588 
589 void
print_partial_compiled_pattern(start,end)590 print_partial_compiled_pattern (start, end)
591     unsigned char *start;
592     unsigned char *end;
593 {
594   int mcnt, mcnt2;
595   unsigned char *p = start;
596   unsigned char *pend = end;
597 
598   if (start == NULL)
599     {
600 	 printf ("(null)\n");
601 	 return;
602     }
603 
604   /* Loop over pattern commands.  */
605   while (p < pend)
606     {
607 	 printf ("%d:\t", p - start);
608 
609 	 switch ((re_opcode_t) *p++)
610 	{
611 	   case no_op:
612 		printf ("/no_op");
613 		break;
614 
615 	case exactn:
616 	  mcnt = *p++;
617 		printf ("/exactn/%d", mcnt);
618 		do
619 	    {
620 		    putchar ('/');
621 		 putchar (*p++);
622 		  }
623 		while (--mcnt);
624 		break;
625 
626 	case start_memory:
627 		mcnt = *p++;
628 		printf ("/start_memory/%d/%d", mcnt, *p++);
629 		break;
630 
631 	case stop_memory:
632 		mcnt = *p++;
633 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
634 		break;
635 
636 	case duplicate:
637 	  printf ("/duplicate/%d", *p++);
638 	  break;
639 
640 	case anychar:
641 	  printf ("/anychar");
642 	  break;
643 
644 	case charset:
645 	   case charset_not:
646 		{
647 		  register int c, last = -100;
648 	    register int in_range = 0;
649 
650 	    printf ("/charset [%s",
651 			  (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
652 
653 		  assert (p + *p < pend);
654 
655 		  for (c = 0; c < 256; c++)
656 		 if (c / 8 < *p
657 		  && (p[1 + (c/8)] & (1 << (c % 8))))
658 		{
659 		  /* Are we starting a range?  */
660 		  if (last + 1 == c && ! in_range)
661 		    {
662 			 putchar ('-');
663 			 in_range = 1;
664 		    }
665 		  /* Have we broken a range?  */
666 		  else if (last + 1 != c && in_range)
667 		    {
668 			 putchar (last);
669 			 in_range = 0;
670 		    }
671 
672 		  if (! in_range)
673 		    putchar (c);
674 
675 		  last = c;
676 		    }
677 
678 	    if (in_range)
679 		 putchar (last);
680 
681 	    putchar (']');
682 
683 	    p += 1 + *p;
684 	  }
685 	  break;
686 
687 	case begline:
688 	  printf ("/begline");
689 		break;
690 
691 	case endline:
692 		printf ("/endline");
693 		break;
694 
695 	case on_failure_jump:
696 		extract_number_and_incr (&mcnt, &p);
697 	  printf ("/on_failure_jump to %d", p + mcnt - start);
698 		break;
699 
700 	case on_failure_keep_string_jump:
701 		extract_number_and_incr (&mcnt, &p);
702 	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
703 		break;
704 
705 	case dummy_failure_jump:
706 		extract_number_and_incr (&mcnt, &p);
707 	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
708 		break;
709 
710 	case push_dummy_failure:
711 		printf ("/push_dummy_failure");
712 		break;
713 
714 	   case maybe_pop_jump:
715 		extract_number_and_incr (&mcnt, &p);
716 	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
717 	  break;
718 
719 	   case pop_failure_jump:
720 	  extract_number_and_incr (&mcnt, &p);
721 	  printf ("/pop_failure_jump to %d", p + mcnt - start);
722 	  break;
723 
724 	   case jump_past_alt:
725 	  extract_number_and_incr (&mcnt, &p);
726 	  printf ("/jump_past_alt to %d", p + mcnt - start);
727 	  break;
728 
729 	   case jump:
730 	  extract_number_and_incr (&mcnt, &p);
731 	  printf ("/jump to %d", p + mcnt - start);
732 	  break;
733 
734 	   case succeed_n:
735 		extract_number_and_incr (&mcnt, &p);
736 		extract_number_and_incr (&mcnt2, &p);
737 	  printf ("/succeed_n to %d, %d times", p + mcnt - start, mcnt2);
738 		break;
739 
740 	   case jump_n:
741 		extract_number_and_incr (&mcnt, &p);
742 		extract_number_and_incr (&mcnt2, &p);
743 	  printf ("/jump_n to %d, %d times", p + mcnt - start, mcnt2);
744 		break;
745 
746 	   case set_number_at:
747 		extract_number_and_incr (&mcnt, &p);
748 		extract_number_and_incr (&mcnt2, &p);
749 	  printf ("/set_number_at location %d to %d", p + mcnt - start, mcnt2);
750 		break;
751 
752 	   case wordbound:
753 	  printf ("/wordbound");
754 	  break;
755 
756 	case notwordbound:
757 	  printf ("/notwordbound");
758 		break;
759 
760 	case wordbeg:
761 	  printf ("/wordbeg");
762 	  break;
763 
764 	case wordend:
765 	  printf ("/wordend");
766 
767 #ifdef emacs
768 	case before_dot:
769 	  printf ("/before_dot");
770 		break;
771 
772 	case at_dot:
773 	  printf ("/at_dot");
774 		break;
775 
776 	case after_dot:
777 	  printf ("/after_dot");
778 		break;
779 
780 	case syntaxspec:
781 		printf ("/syntaxspec");
782 	  mcnt = *p++;
783 	  printf ("/%d", mcnt);
784 		break;
785 
786 	case notsyntaxspec:
787 		printf ("/notsyntaxspec");
788 	  mcnt = *p++;
789 	  printf ("/%d", mcnt);
790 	  break;
791 #endif /* emacs */
792 
793 	case wordchar:
794 	  printf ("/wordchar");
795 		break;
796 
797 	case notwordchar:
798 	  printf ("/notwordchar");
799 		break;
800 
801 	case begbuf:
802 	  printf ("/begbuf");
803 		break;
804 
805 	case endbuf:
806 	  printf ("/endbuf");
807 		break;
808 
809 	   default:
810 		printf ("?%d", *(p-1));
811 	}
812 
813 	 putchar ('\n');
814     }
815 
816   printf ("%d:\tend of pattern.\n", p - start);
817 }
818 
819 
820 void
print_compiled_pattern(bufp)821 print_compiled_pattern (bufp)
822     struct re_pattern_buffer *bufp;
823 {
824   unsigned char *buffer = bufp->buffer;
825 
826   print_partial_compiled_pattern (buffer, buffer + bufp->used);
827   printf ("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
828 
829   if (bufp->fastmap_accurate && bufp->fastmap)
830     {
831 	 printf ("fastmap: ");
832 	 print_fastmap (bufp->fastmap);
833     }
834 
835   printf ("re_nsub: %d\t", bufp->re_nsub);
836   printf ("regs_alloc: %d\t", bufp->regs_allocated);
837   printf ("can_be_null: %d\t", bufp->can_be_null);
838   printf ("newline_anchor: %d\n", bufp->newline_anchor);
839   printf ("no_sub: %d\t", bufp->no_sub);
840   printf ("not_bol: %d\t", bufp->not_bol);
841   printf ("not_eol: %d\t", bufp->not_eol);
842   printf ("syntax: %d\n", bufp->syntax);
843   /* Perhaps we should print the translate table?  */
844 }
845 
846 
847 void
print_double_string(where,string1,size1,string2,size2)848 print_double_string (where, string1, size1, string2, size2)
849     const char *where;
850     const char *string1;
851     const char *string2;
852     int size1;
853     int size2;
854 {
855   unsigned this_char;
856 
857   if (where == NULL)
858     printf ("(null)");
859   else
860     {
861 	 if (FIRST_STRING_P (where))
862 	   {
863 		for (this_char = where - string1; this_char < size1; this_char++)
864 		  putchar (string1[this_char]);
865 
866 		where = string2;
867 	   }
868 
869 	 for (this_char = where - string2; this_char < size2; this_char++)
870 	   putchar (string2[this_char]);
871     }
872 }
873 
874 #else /* not DEBUG */
875 
876 #undef assert
877 #define assert(e)
878 
879 #define DEBUG_STATEMENT(e)
880 #define DEBUG_PRINT1(x)
881 #define DEBUG_PRINT2(x1, x2)
882 #define DEBUG_PRINT3(x1, x2, x3)
883 #define DEBUG_PRINT4(x1, x2, x3, x4)
884 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
885 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
886 
887 #endif /* not DEBUG */
888 
889 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
890    also be assigned to arbitrarily: each pattern buffer stores its own
891    syntax, so it can be changed between regex compilations.  */
892 /* This has no initializer because initialized variables in Emacs
893    become read-only after dumping.  */
894 reg_syntax_t re_syntax_options;
895 
896 
897 /* Specify the precise syntax of regexps for compilation.  This provides
898    for compatibility for various utilities which historically have
899    different, incompatible syntaxes.
900 
901    The argument SYNTAX is a bit mask comprised of the various bits
902    defined in regex.h.  We return the old syntax.  */
903 
904 reg_syntax_t
re_set_syntax(syntax)905 re_set_syntax (syntax)
906     reg_syntax_t syntax;
907 {
908   reg_syntax_t ret = re_syntax_options;
909 
910   re_syntax_options = syntax;
911   return ret;
912 }
913 
914 /* This table gives an error message for each of the error codes listed
915    in regex.h.  Obviously the order here has to be same as there.
916    POSIX doesn't require that we do anything for REG_NOERROR,
917    but why not be nice?  */
918 
919 static const char *re_error_msgid[] =
920   { "Success",					/* REG_NOERROR */
921     "No match",					/* REG_NOMATCH */
922     "Invalid regular expression",		/* REG_BADPAT */
923     "Invalid collation character",		/* REG_ECOLLATE */
924     "Invalid character class name",		/* REG_ECTYPE */
925     "Trailing backslash",			/* REG_EESCAPE */
926     "Invalid back reference",			/* REG_ESUBREG */
927     "Unmatched [ or [^",			/* REG_EBRACK */
928     "Unmatched ( or \\(",			/* REG_EPAREN */
929     "Unmatched \\{",				/* REG_EBRACE */
930     "Invalid content of \\{\\}",		/* REG_BADBR */
931     "Invalid range end",			/* REG_ERANGE */
932     "Memory exhausted",				/* REG_ESPACE */
933     "Invalid preceding regular expression",	/* REG_BADRPT */
934     "Premature end of regular expression",	/* REG_EEND */
935     "Regular expression too big",		/* REG_ESIZE */
936     "Unmatched ) or \\)",			/* REG_ERPAREN */
937   };
938 
939 /* Avoiding alloca during matching, to placate r_alloc.  */
940 
941 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
942    searching and matching functions should not call alloca.  On some
943    systems, alloca is implemented in terms of malloc, and if we're
944    using the relocating allocator routines, then malloc could cause a
945    relocation, which might (if the strings being searched are in the
946    ralloc heap) shift the data out from underneath the regexp
947    routines.
948 
949    Here's another reason to avoid allocation: Emacs
950    processes input from X in a signal handler; processing X input may
951    call malloc; if input arrives while a matching routine is calling
952    malloc, then we're scrod.  But Emacs can't just block input while
953    calling matching routines; then we don't notice interrupts when
954    they come in.  So, Emacs blocks input around all regexp calls
955    except the matching calls, which it leaves unprotected, in the
956    faith that they will not malloc.  */
957 
958 /* Normally, this is fine.  */
959 #define MATCH_MAY_ALLOCATE
960 
961 /* When using GNU C, we are not REALLY using the C alloca, no matter
962    what config.h may say.  So don't take precautions for it.  */
963 #ifdef __GNUC__
964 #undef C_ALLOCA
965 #endif
966 
967 /* The match routines may not allocate if (1) they would do it with malloc
968    and (2) it's not safe for them to use malloc.
969    Note that if REL_ALLOC is defined, matching would not use malloc for the
970    failure stack, but we would still use it for the register vectors;
971    so REL_ALLOC should not affect this.  */
972 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
973 #undef MATCH_MAY_ALLOCATE
974 #endif
975 
976 
977 /* Failure stack declarations and macros; both re_compile_fastmap and
978    re_match_2 use a failure stack.  These have to be macros because of
979    REGEX_ALLOCATE_STACK.  */
980 
981 
982 /* Number of failure points for which to initially allocate space
983    when matching.  If this number is exceeded, we allocate more
984    space, so it is not a hard limit.  */
985 #ifndef INIT_FAILURE_ALLOC
986 #define INIT_FAILURE_ALLOC 5
987 #endif
988 
989 /* Roughly the maximum number of failure points on the stack.  Would be
990    exactly that if always used MAX_FAILURE_SPACE each time we failed.
991    This is a variable only so users of regex can assign to it; we never
992    change it ourselves.  */
993 #if defined (MATCH_MAY_ALLOCATE)
994 int re_max_failures = 200000;
995 #else
996 int re_max_failures = 2000;
997 #endif
998 
999 union fail_stack_elt
1000 {
1001   unsigned char *pointer;
1002   int integer;
1003 };
1004 
1005 typedef union fail_stack_elt fail_stack_elt_t;
1006 
1007 typedef struct
1008 {
1009   fail_stack_elt_t *stack;
1010   unsigned size;
1011   unsigned avail;			/* Offset of next open position.  */
1012 } fail_stack_type;
1013 
1014 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1015 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1016 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1017 
1018 
1019 /* Define macros to initialize and free the failure stack.
1020    Do `return -2' if the alloc fails.  */
1021 
1022 #ifdef MATCH_MAY_ALLOCATE
1023 #define INIT_FAIL_STACK()						\
1024   do {									\
1025     fail_stack.stack = (fail_stack_elt_t *)				\
1026 	 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
1027 									\
1028     if (fail_stack.stack == NULL)					\
1029 	 return -2;							\
1030 									\
1031     fail_stack.size = INIT_FAILURE_ALLOC;				\
1032     fail_stack.avail = 0;						\
1033   } while (0)
1034 
1035 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1036 #else
1037 #define INIT_FAIL_STACK()						\
1038   do {									\
1039     fail_stack.avail = 0;						\
1040   } while (0)
1041 
1042 #define RESET_FAIL_STACK()
1043 #endif
1044 
1045 
1046 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1047 
1048    Return 1 if succeeds, and 0 if either ran out of memory
1049    allocating space for it or it was already too large.
1050 
1051    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1052 
1053 #define DOUBLE_FAIL_STACK(fail_stack)					\
1054   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS		\
1055    ? 0									\
1056    : ((fail_stack).stack = (fail_stack_elt_t *)				\
1057 	   REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1058 		(fail_stack).size * sizeof (fail_stack_elt_t),		\
1059 		((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1060 									\
1061 	 (fail_stack).stack == NULL					\
1062 	 ? 0								\
1063 	 : ((fail_stack).size <<= 1, 					\
1064 	    1)))
1065 
1066 
1067 /* Push pointer POINTER on FAIL_STACK.
1068    Return 1 if was able to do so and 0 if ran out of memory allocating
1069    space to do so.  */
1070 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1071   ((FAIL_STACK_FULL ()							\
1072     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1073    ? 0									\
1074    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1075 	 1))
1076 
1077 /* Push a pointer value onto the failure stack.
1078    Assumes the variable `fail_stack'.  Probably should only
1079    be called from within `PUSH_FAILURE_POINT'.  */
1080 #define PUSH_FAILURE_POINTER(item)					\
1081   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1082 
1083 /* This pushes an integer-valued item onto the failure stack.
1084    Assumes the variable `fail_stack'.  Probably should only
1085    be called from within `PUSH_FAILURE_POINT'.  */
1086 #define PUSH_FAILURE_INT(item)					\
1087   fail_stack.stack[fail_stack.avail++].integer = (item)
1088 
1089 /* Push a fail_stack_elt_t value onto the failure stack.
1090    Assumes the variable `fail_stack'.  Probably should only
1091    be called from within `PUSH_FAILURE_POINT'.  */
1092 #define PUSH_FAILURE_ELT(item)					\
1093   fail_stack.stack[fail_stack.avail++] =  (item)
1094 
1095 /* These three POP... operations complement the three PUSH... operations.
1096    All assume that `fail_stack' is nonempty.  */
1097 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1098 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1099 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1100 
1101 /* Used to omit pushing failure point id's when we're not debugging.  */
1102 #ifdef DEBUG
1103 #define DEBUG_PUSH PUSH_FAILURE_INT
1104 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
1105 #else
1106 #define DEBUG_PUSH(item)
1107 #define DEBUG_POP(item_addr)
1108 #endif
1109 
1110 
1111 /* Push the information about the state we will need
1112    if we ever fail back to it.
1113 
1114    Requires variables fail_stack, regstart, regend, reg_info, and
1115    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1116    declared.
1117 
1118    Does `return FAILURE_CODE' if runs out of memory.  */
1119 
1120 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1121   do {									\
1122     char *destination;							\
1123     /* Must be int, so when we don't save any registers, the arithmetic	\
1124 	  of 0 + -1 isn't done as unsigned.  */				\
1125     int this_reg;							\
1126 									\
1127     destination = 0;	/* inhibit possible compiler warning */		\
1128     DEBUG_STATEMENT (failure_id++);					\
1129     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1130     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1131     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1132     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1133 									\
1134     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
1135     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1136 									\
1137     /* Ensure we have enough space allocated for what we will push.  */	\
1138     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1139 	 {									\
1140 	   if (!DOUBLE_FAIL_STACK (fail_stack))				\
1141 		return failure_code;						\
1142 									\
1143 	   DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1144 			  (fail_stack).size);				\
1145 	   DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1146 	 }									\
1147 									\
1148     /* Push the info, starting with the registers.  */			\
1149     DEBUG_PRINT1 ("\n");						\
1150 									\
1151     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;	\
1152 	    this_reg++)							\
1153 	 {									\
1154 	DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);			\
1155 	   DEBUG_STATEMENT (num_regs_pushed++);				\
1156 									\
1157 	DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);		\
1158 	   PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1159 														  \
1160 	DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
1161 	   PUSH_FAILURE_POINTER (regend[this_reg]);			\
1162 									\
1163 	DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
1164 	   DEBUG_PRINT2 (" match_null=%d",					\
1165 				  REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1166 	   DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1167 	   DEBUG_PRINT2 (" matched_something=%d",				\
1168 				  MATCHED_SOMETHING (reg_info[this_reg]));		\
1169 	   DEBUG_PRINT2 (" ever_matched=%d",				\
1170 				  EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1171 	DEBUG_PRINT1 ("\n");						\
1172 	   PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1173 	 }									\
1174 									\
1175     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1176     PUSH_FAILURE_INT (lowest_active_reg);				\
1177 									\
1178     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1179     PUSH_FAILURE_INT (highest_active_reg);				\
1180 									\
1181     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);		\
1182     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1183     PUSH_FAILURE_POINTER (pattern_place);				\
1184 									\
1185     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
1186     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1187 				 size2);				\
1188     DEBUG_PRINT1 ("'\n");						\
1189     PUSH_FAILURE_POINTER (string_place);				\
1190 									\
1191     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1192     DEBUG_PUSH (failure_id);						\
1193   } while (0)
1194 
1195 /* This is the number of items that are pushed and popped on the stack
1196    for each register.  */
1197 #define NUM_REG_ITEMS  3
1198 
1199 /* Individual items aside from the registers.  */
1200 #ifdef DEBUG
1201 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1202 #else
1203 #define NUM_NONREG_ITEMS 4
1204 #endif
1205 
1206 /* We push at most this many items on the stack.  */
1207 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1208 
1209 /* We actually push this many items.  */
1210 #define NUM_FAILURE_ITEMS						\
1211   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS 	\
1212     + NUM_NONREG_ITEMS)
1213 
1214 /* How many items can still be added to the stack without overflowing it.  */
1215 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1216 
1217 
1218 /* Pops what PUSH_FAIL_STACK pushes.
1219 
1220    We restore into the parameters, all of which should be lvalues:
1221 	STR -- the saved data position.
1222 	PAT -- the saved pattern position.
1223 	LOW_REG, HIGH_REG -- the highest and lowest active registers.
1224 	REGSTART, REGEND -- arrays of string positions.
1225 	REG_INFO -- array of information about each subexpression.
1226 
1227    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1228    `pend', `string1', `size1', `string2', and `size2'.  */
1229 
1230 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1231 {									\
1232   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
1233   int this_reg;								\
1234   const unsigned char *string_temp;					\
1235 									\
1236   assert (!FAIL_STACK_EMPTY ());					\
1237 									\
1238   /* Remove failure points and point to how many regs pushed.  */	\
1239   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1240   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1241   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1242 									\
1243   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1244 									\
1245   DEBUG_POP (&failure_id);						\
1246   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1247 									\
1248   /* If the saved string location is NULL, it came from an		\
1249 	on_failure_keep_string_jump opcode, and we want to throw away the	\
1250 	saved NULL, thus retaining our current position in the string.  */	\
1251   string_temp = POP_FAILURE_POINTER ();					\
1252   if (string_temp != NULL)						\
1253     str = (const char *) string_temp;					\
1254 									\
1255   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
1256   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1257   DEBUG_PRINT1 ("'\n");							\
1258 									\
1259   pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1260   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);			\
1261   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1262 									\
1263   /* Restore register info.  */						\
1264   high_reg = (unsigned) POP_FAILURE_INT ();				\
1265   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
1266 									\
1267   low_reg = (unsigned) POP_FAILURE_INT ();				\
1268   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
1269 									\
1270   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)		\
1271     {									\
1272 	 DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);			\
1273 									\
1274 	 reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1275 	 DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);		\
1276 									\
1277 	 regend[this_reg] = (const char *) POP_FAILURE_POINTER ();		\
1278 	 DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
1279 									\
1280 	 regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1281 	 DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);		\
1282     }									\
1283 									\
1284   set_regs_matched_done = 0;						\
1285   DEBUG_STATEMENT (nfailure_points_popped++);				\
1286 } /* POP_FAILURE_POINT */
1287 
1288 
1289 
1290 /* Structure for per-register (a.k.a. per-group) information.
1291    Other register information, such as the
1292    starting and ending positions (which are addresses), and the list of
1293    inner groups (which is a bits list) are maintained in separate
1294    variables.
1295 
1296    We are making a (strictly speaking) nonportable assumption here: that
1297    the compiler will pack our bit fields into something that fits into
1298    the type of `word', i.e., is something that fits into one item on the
1299    failure stack.  */
1300 
1301 typedef union
1302 {
1303   fail_stack_elt_t word;
1304   struct
1305   {
1306 	 /* This field is one if this group can match the empty string,
1307 	    zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1308 #define MATCH_NULL_UNSET_VALUE 3
1309     unsigned match_null_string_p : 2;
1310     unsigned is_active : 1;
1311     unsigned matched_something : 1;
1312     unsigned ever_matched_something : 1;
1313   } bits;
1314 } register_info_type;
1315 
1316 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1317 #define IS_ACTIVE(R)  ((R).bits.is_active)
1318 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1319 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1320 
1321 
1322 /* Call this when have matched a real character; it sets `matched' flags
1323    for the subexpressions which we are currently inside.  Also records
1324    that those subexprs have matched.  */
1325 #define SET_REGS_MATCHED()						\
1326   do									\
1327     {									\
1328 	 if (!set_regs_matched_done)					\
1329 	{								\
1330 	  unsigned r;							\
1331 	  set_regs_matched_done = 1;					\
1332 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1333 	    {								\
1334 		 MATCHED_SOMETHING (reg_info[r])				\
1335 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1336 		= 1;							\
1337 	    }								\
1338 	}								\
1339     }									\
1340   while (0)
1341 
1342 /* Registers are set to a sentinel when they haven't yet matched.  */
1343 static char reg_unset_dummy;
1344 #define REG_UNSET_VALUE (&reg_unset_dummy)
1345 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1346 
1347 /* Subroutine declarations and macros for regex_compile.  */
1348 
1349 static void store_op1 (), store_op2 ();
1350 static void insert_op1 (), insert_op2 ();
1351 static boolean at_begline_loc_p (), at_endline_loc_p ();
1352 static boolean group_in_compile_stack ();
1353 static reg_errcode_t compile_range ();
1354 
1355 /* Fetch the next character in the uncompiled pattern---translating it
1356    if necessary.  Also cast from a signed character in the constant
1357    string passed to us by the user to an unsigned char that we can use
1358    as an array index (in, e.g., `translate').  */
1359 #define PATFETCH(c)							\
1360   do {if (p == pend) return REG_EEND;					\
1361     c = (unsigned char) *p++;						\
1362     if (translate) c = translate[c]; 					\
1363   } while (0)
1364 
1365 /* Fetch the next character in the uncompiled pattern, with no
1366    translation.  */
1367 #define PATFETCH_RAW(c)							\
1368   do {if (p == pend) return REG_EEND;					\
1369     c = (unsigned char) *p++; 						\
1370   } while (0)
1371 
1372 /* Go backwards one character in the pattern.  */
1373 #define PATUNFETCH p--
1374 
1375 
1376 /* If `translate' is non-null, return translate[D], else just D.  We
1377    cast the subscript to translate because some data is declared as
1378    `char *', to avoid warnings when a string constant is passed.  But
1379    when we use a character as a subscript we must make it unsigned.  */
1380 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
1381 
1382 
1383 /* Macros for outputting the compiled pattern into `buffer'.  */
1384 
1385 /* If the buffer isn't allocated when it comes in, use this.  */
1386 #define INIT_BUF_SIZE  32
1387 
1388 /* Make sure we have at least N more bytes of space in buffer.  */
1389 #define GET_BUFFER_SPACE(n)						\
1390     while (b - bufp->buffer + (n) > bufp->allocated)			\
1391 	 EXTEND_BUFFER ()
1392 
1393 /* Make sure we have one more byte of buffer space and then add C to it.  */
1394 #define BUF_PUSH(c)							\
1395   do {									\
1396     GET_BUFFER_SPACE (1);						\
1397     *b++ = (unsigned char) (c);						\
1398   } while (0)
1399 
1400 
1401 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1402 #define BUF_PUSH_2(c1, c2)						\
1403   do {									\
1404     GET_BUFFER_SPACE (2);						\
1405     *b++ = (unsigned char) (c1);					\
1406     *b++ = (unsigned char) (c2);					\
1407   } while (0)
1408 
1409 
1410 /* As with BUF_PUSH_2, except for three bytes.  */
1411 #define BUF_PUSH_3(c1, c2, c3)						\
1412   do {									\
1413     GET_BUFFER_SPACE (3);						\
1414     *b++ = (unsigned char) (c1);					\
1415     *b++ = (unsigned char) (c2);					\
1416     *b++ = (unsigned char) (c3);					\
1417   } while (0)
1418 
1419 
1420 /* Store a jump with opcode OP at LOC to location TO.  We store a
1421    relative address offset by the three bytes the jump itself occupies.  */
1422 #define STORE_JUMP(op, loc, to) \
1423   store_op1 (op, loc, (to) - (loc) - 3)
1424 
1425 /* Likewise, for a two-argument jump.  */
1426 #define STORE_JUMP2(op, loc, to, arg) \
1427   store_op2 (op, loc, (to) - (loc) - 3, arg)
1428 
1429 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1430 #define INSERT_JUMP(op, loc, to) \
1431   insert_op1 (op, loc, (to) - (loc) - 3, b)
1432 
1433 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1434 #define INSERT_JUMP2(op, loc, to, arg) \
1435   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
1436 
1437 
1438 /* This is not an arbitrary limit: the arguments which represent offsets
1439    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1440    be too small, many things would have to change.  */
1441 #define MAX_BUF_SIZE (1L << 16)
1442 
1443 
1444 /* Extend the buffer by twice its current size via realloc and
1445    reset the pointers that pointed into the old block to point to the
1446    correct places in the new one.  If extending the buffer results in it
1447    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1448 #define EXTEND_BUFFER()							\
1449   do { 									\
1450     unsigned char *old_buffer = bufp->buffer;				\
1451     if (bufp->allocated == MAX_BUF_SIZE) 				\
1452 	 return REG_ESIZE;							\
1453     bufp->allocated <<= 1;						\
1454     if (bufp->allocated > MAX_BUF_SIZE)					\
1455 	 bufp->allocated = MAX_BUF_SIZE; 					\
1456     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
1457     if (bufp->buffer == NULL)						\
1458 	 return REG_ESPACE;						\
1459     /* If the buffer moved, move all the pointers into it.  */		\
1460     if (old_buffer != bufp->buffer)					\
1461 	 {									\
1462 	   b = (b - old_buffer) + bufp->buffer;				\
1463 	   begalt = (begalt - old_buffer) + bufp->buffer;			\
1464 	   if (fixup_alt_jump)						\
1465 		fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1466 	   if (laststart)							\
1467 		laststart = (laststart - old_buffer) + bufp->buffer;		\
1468 	   if (pending_exact)						\
1469 		pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1470 	 }									\
1471   } while (0)
1472 
1473 
1474 /* Since we have one byte reserved for the register number argument to
1475    {start,stop}_memory, the maximum number of groups we can report
1476    things about is what fits in that byte.  */
1477 #define MAX_REGNUM 255
1478 
1479 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1480    ignore the excess.  */
1481 typedef unsigned regnum_t;
1482 
1483 
1484 /* Macros for the compile stack.  */
1485 
1486 /* Since offsets can go either forwards or backwards, this type needs to
1487    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1488 typedef int pattern_offset_t;
1489 
1490 typedef struct
1491 {
1492   pattern_offset_t begalt_offset;
1493   pattern_offset_t fixup_alt_jump;
1494   pattern_offset_t inner_group_offset;
1495   pattern_offset_t laststart_offset;
1496   regnum_t regnum;
1497 } compile_stack_elt_t;
1498 
1499 
1500 typedef struct
1501 {
1502   compile_stack_elt_t *stack;
1503   unsigned size;
1504   unsigned avail;			/* Offset of next open position.  */
1505 } compile_stack_type;
1506 
1507 
1508 #define INIT_COMPILE_STACK_SIZE 32
1509 
1510 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1511 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1512 
1513 /* The next available element.  */
1514 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1515 
1516 
1517 /* Set the bit for character C in a list.  */
1518 #define SET_LIST_BIT(c)                               \
1519   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1520    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1521 
1522 
1523 /* Get the next unsigned number in the uncompiled pattern.  */
1524 #define GET_UNSIGNED_NUMBER(num) 					\
1525   { if (p != pend)							\
1526 	{									\
1527 	  PATFETCH (c); 							\
1528 	  while (ISDIGIT (c)) 						\
1529 	    { 								\
1530 		 if (num < 0)							\
1531 		    num = 0;							\
1532 		 num = num * 10 + c - '0'; 					\
1533 		 if (p == pend) 						\
1534 		    break; 							\
1535 		 PATFETCH (c);						\
1536 	    } 								\
1537 	  } 								\
1538     }
1539 
1540 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1541 
1542 #define IS_CHAR_CLASS(string)						\
1543    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1544     || STREQ (string, "lower") || STREQ (string, "digit")		\
1545     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1546     || STREQ (string, "space") || STREQ (string, "print")		\
1547     || STREQ (string, "punct") || STREQ (string, "graph")		\
1548     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1549 
1550 #ifndef MATCH_MAY_ALLOCATE
1551 
1552 /* If we cannot allocate large objects within re_match_2_internal,
1553    we make the fail stack and register vectors global.
1554    The fail stack, we grow to the maximum size when a regexp
1555    is compiled.
1556    The register vectors, we adjust in size each time we
1557    compile a regexp, according to the number of registers it needs.  */
1558 
1559 static fail_stack_type fail_stack;
1560 
1561 /* Size with which the following vectors are currently allocated.
1562    That is so we can make them bigger as needed,
1563    but never make them smaller.  */
1564 static int regs_allocated_size;
1565 
1566 static const char **     regstart, **     regend;
1567 static const char ** old_regstart, ** old_regend;
1568 static const char **best_regstart, **best_regend;
1569 static register_info_type *reg_info;
1570 static const char **reg_dummy;
1571 static register_info_type *reg_info_dummy;
1572 
1573 /* Make the register vectors big enough for NUM_REGS registers,
1574    but don't make them smaller.  */
1575 
1576 static
regex_grow_registers(num_regs)1577 regex_grow_registers (num_regs)
1578 	int num_regs;
1579 {
1580   if (num_regs > regs_allocated_size)
1581     {
1582 	 RETALLOC_IF (regstart,	 num_regs, const char *);
1583 	 RETALLOC_IF (regend,	 num_regs, const char *);
1584 	 RETALLOC_IF (old_regstart, num_regs, const char *);
1585 	 RETALLOC_IF (old_regend,	 num_regs, const char *);
1586 	 RETALLOC_IF (best_regstart, num_regs, const char *);
1587 	 RETALLOC_IF (best_regend,	 num_regs, const char *);
1588 	 RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1589 	 RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1590 	 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1591 
1592 	 regs_allocated_size = num_regs;
1593     }
1594 }
1595 
1596 #endif /* not MATCH_MAY_ALLOCATE */
1597 
1598 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1599    Returns one of error codes defined in `regex.h', or zero for success.
1600 
1601    Assumes the `allocated' (and perhaps `buffer') and `translate'
1602    fields are set in BUFP on entry.
1603 
1604    If it succeeds, results are put in BUFP (if it returns an error, the
1605    contents of BUFP are undefined):
1606 	`buffer' is the compiled pattern;
1607 	`syntax' is set to SYNTAX;
1608 	`used' is set to the length of the compiled pattern;
1609 	`fastmap_accurate' is zero;
1610 	`re_nsub' is the number of subexpressions in PATTERN;
1611 	`not_bol' and `not_eol' are zero;
1612 
1613    The `fastmap' and `newline_anchor' fields are neither
1614    examined nor set.  */
1615 
1616 /* Return, freeing storage we allocated.  */
1617 #define FREE_STACK_RETURN(value)		\
1618   return (free (compile_stack.stack), value)
1619 
1620 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1621 regex_compile (pattern, size, syntax, bufp)
1622 	const char *pattern;
1623 	int size;
1624 	reg_syntax_t syntax;
1625 	struct re_pattern_buffer *bufp;
1626 {
1627   /* We fetch characters from PATTERN here.  Even though PATTERN is
1628 	`char *' (i.e., signed), we declare these variables as unsigned, so
1629 	they can be reliably used as array indices.  */
1630   register unsigned char c, c1;
1631 
1632   /* A random temporary spot in PATTERN.  */
1633   const char *p1;
1634 
1635   /* Points to the end of the buffer, where we should append.  */
1636   register unsigned char *b;
1637 
1638   /* Keeps track of unclosed groups.  */
1639   compile_stack_type compile_stack;
1640 
1641   /* Points to the current (ending) position in the pattern.  */
1642   const char *p = pattern;
1643   const char *pend = pattern + size;
1644 
1645   /* How to translate the characters in the pattern.  */
1646   char *translate = bufp->translate;
1647 
1648   /* Address of the count-byte of the most recently inserted `exactn'
1649 	command.  This makes it possible to tell if a new exact-match
1650 	character can be added to that command or if the character requires
1651 	a new `exactn' command.  */
1652   unsigned char *pending_exact = 0;
1653 
1654   /* Address of start of the most recently finished expression.
1655 	This tells, e.g., postfix * where to find the start of its
1656 	operand.  Reset at the beginning of groups and alternatives.  */
1657   unsigned char *laststart = 0;
1658 
1659   /* Address of beginning of regexp, or inside of last group.  */
1660   unsigned char *begalt;
1661 
1662   /* Place in the uncompiled pattern (i.e., the {) to
1663 	which to go back if the interval is invalid.  */
1664   const char *beg_interval;
1665 
1666   /* Address of the place where a forward jump should go to the end of
1667 	the containing expression.  Each alternative of an `or' -- except the
1668 	last -- ends with a forward jump of this sort.  */
1669   unsigned char *fixup_alt_jump = 0;
1670 
1671   /* Counts open-groups as they are encountered.  Remembered for the
1672 	matching close-group on the compile stack, so the same register
1673 	number is put in the stop_memory as the start_memory.  */
1674   regnum_t regnum = 0;
1675 
1676 #ifdef DEBUG
1677   DEBUG_PRINT1 ("\nCompiling pattern: ");
1678   if (debug)
1679     {
1680 	 unsigned debug_count;
1681 
1682 	 for (debug_count = 0; debug_count < size; debug_count++)
1683 	   putchar (pattern[debug_count]);
1684 	 putchar ('\n');
1685     }
1686 #endif /* DEBUG */
1687 
1688   /* Initialize the compile stack.  */
1689   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1690   if (compile_stack.stack == NULL)
1691     return REG_ESPACE;
1692 
1693   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1694   compile_stack.avail = 0;
1695 
1696   /* Initialize the pattern buffer.  */
1697   bufp->syntax = syntax;
1698   bufp->fastmap_accurate = 0;
1699   bufp->not_bol = bufp->not_eol = 0;
1700 
1701   /* Set `used' to zero, so that if we return an error, the pattern
1702 	printer (for debugging) will think there's no pattern.  We reset it
1703 	at the end.  */
1704   bufp->used = 0;
1705 
1706   /* Always count groups, whether or not bufp->no_sub is set.  */
1707   bufp->re_nsub = 0;
1708 
1709 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1710   /* Initialize the syntax table.  */
1711    init_syntax_once ();
1712 #endif
1713 
1714   if (bufp->allocated == 0)
1715     {
1716 	 if (bufp->buffer)
1717 	{ /* If zero allocated, but buffer is non-null, try to realloc
1718 		   enough space.  This loses if buffer's address is bogus, but
1719 		   that is the user's responsibility.  */
1720 		RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1721 	   }
1722 	 else
1723 	   { /* Caller did not allocate a buffer.  Do it for them.  */
1724 		bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1725 	   }
1726 	 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1727 
1728 	 bufp->allocated = INIT_BUF_SIZE;
1729     }
1730 
1731   begalt = b = bufp->buffer;
1732 
1733   /* Loop through the uncompiled pattern until we're at the end.  */
1734   while (p != pend)
1735     {
1736 	 PATFETCH (c);
1737 
1738 	 switch (c)
1739 	   {
1740 	   case '^':
1741 		{
1742 		  if (   /* If at start of pattern, it's an operator.  */
1743 			    p == pattern + 1
1744 			    /* If context independent, it's an operator.  */
1745 			 || syntax & RE_CONTEXT_INDEP_ANCHORS
1746 			    /* Otherwise, depends on what's come before.  */
1747 			 || at_begline_loc_p (pattern, p, syntax))
1748 		    BUF_PUSH (begline);
1749 		  else
1750 		    goto normal_char;
1751 		}
1752 		break;
1753 
1754 
1755 	   case '$':
1756 		{
1757 		  if (   /* If at end of pattern, it's an operator.  */
1758 			    p == pend
1759 			    /* If context independent, it's an operator.  */
1760 			 || syntax & RE_CONTEXT_INDEP_ANCHORS
1761 			    /* Otherwise, depends on what's next.  */
1762 			 || at_endline_loc_p (p, pend, syntax))
1763 			BUF_PUSH (endline);
1764 		   else
1765 			goto normal_char;
1766 		 }
1767 		 break;
1768 
1769 
1770 	case '+':
1771 	   case '?':
1772 		if ((syntax & RE_BK_PLUS_QM)
1773 		    || (syntax & RE_LIMITED_OPS))
1774 		  goto normal_char;
1775 	   handle_plus:
1776 	   case '*':
1777 		/* If there is no previous pattern... */
1778 		if (!laststart)
1779 		  {
1780 		    if (syntax & RE_CONTEXT_INVALID_OPS)
1781 			 FREE_STACK_RETURN (REG_BADRPT);
1782 		    else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1783 			 goto normal_char;
1784 		  }
1785 
1786 		{
1787 		  /* Are we optimizing this jump?  */
1788 		  boolean keep_string_p = false;
1789 
1790 		  /* 1 means zero (many) matches is allowed.  */
1791 		  char zero_times_ok = 0, many_times_ok = 0;
1792 
1793 		  /* If there is a sequence of repetition chars, collapse it
1794 			down to just one (the right one).  We can't combine
1795 			interval operators with these because of, e.g., `a{2}*',
1796 			which should only match an even number of `a's.  */
1797 
1798 		  for (;;)
1799 		    {
1800 			 zero_times_ok |= c != '+';
1801 			 many_times_ok |= c != '?';
1802 
1803 			 if (p == pend)
1804 			   break;
1805 
1806 			 PATFETCH (c);
1807 
1808 			 if (c == '*'
1809 				|| (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1810 			   ;
1811 
1812 			 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1813 			   {
1814 				if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1815 
1816 				PATFETCH (c1);
1817 				if (!(c1 == '+' || c1 == '?'))
1818 				  {
1819 				    PATUNFETCH;
1820 				    PATUNFETCH;
1821 				    break;
1822 				  }
1823 
1824 				c = c1;
1825 			   }
1826 			 else
1827 			   {
1828 				PATUNFETCH;
1829 				break;
1830 			   }
1831 
1832 			 /* If we get here, we found another repeat character.  */
1833 			}
1834 
1835 		  /* Star, etc. applied to an empty pattern is equivalent
1836 			to an empty pattern.  */
1837 		  if (!laststart)
1838 		    break;
1839 
1840 		  /* Now we know whether or not zero matches is allowed
1841 			and also whether or not two or more matches is allowed.  */
1842 		  if (many_times_ok)
1843 		    { /* More than one repetition is allowed, so put in at the
1844 			    end a backward relative jump from `b' to before the next
1845 			    jump we're going to put in below (which jumps from
1846 			    laststart to after this jump).
1847 
1848 			    But if we are at the `*' in the exact sequence `.*\n',
1849 			    insert an unconditional jump backwards to the .,
1850 			    instead of the beginning of the loop.  This way we only
1851 			    push a failure point once, instead of every time
1852 			    through the loop.  */
1853 			 assert (p - 1 > pattern);
1854 
1855 			 /* Allocate the space for the jump.  */
1856 			 GET_BUFFER_SPACE (3);
1857 
1858 			 /* We know we are not at the first character of the pattern,
1859 			    because laststart was nonzero.  And we've already
1860 			    incremented `p', by the way, to be the character after
1861 			    the `*'.  Do we have to do something analogous here
1862 			    for null bytes, because of RE_DOT_NOT_NULL?  */
1863 			 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
1864 		    && zero_times_ok
1865 				&& p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
1866 				&& !(syntax & RE_DOT_NEWLINE))
1867 			   { /* We have .*\n.  */
1868 				STORE_JUMP (jump, b, laststart);
1869 				keep_string_p = true;
1870 			   }
1871 			 else
1872 			   /* Anything else.  */
1873 			   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
1874 
1875 			 /* We've added more stuff to the buffer.  */
1876 			 b += 3;
1877 		    }
1878 
1879 		  /* On failure, jump from laststart to b + 3, which will be the
1880 			end of the buffer after this jump is inserted.  */
1881 		  GET_BUFFER_SPACE (3);
1882 		  INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
1883 							    : on_failure_jump,
1884 					laststart, b + 3);
1885 		  pending_exact = 0;
1886 		  b += 3;
1887 
1888 		  if (!zero_times_ok)
1889 		    {
1890 			 /* At least one repetition is required, so insert a
1891 			    `dummy_failure_jump' before the initial
1892 			    `on_failure_jump' instruction of the loop. This
1893 			    effects a skip over that instruction the first time
1894 			    we hit that loop.  */
1895 			 GET_BUFFER_SPACE (3);
1896 			 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
1897 			 b += 3;
1898 		    }
1899 		  }
1900 	  break;
1901 
1902 
1903 	case '.':
1904 		laststart = b;
1905 		BUF_PUSH (anychar);
1906 		break;
1907 
1908 
1909 	   case '[':
1910 		{
1911 		  boolean had_char_class = false;
1912 
1913 		  if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1914 
1915 		  /* Ensure that we have enough space to push a charset: the
1916 			opcode, the length count, and the bitset; 34 bytes in all.  */
1917 	    GET_BUFFER_SPACE (34);
1918 
1919 		  laststart = b;
1920 
1921 		  /* We test `*p == '^' twice, instead of using an if
1922 			statement, so we only need one BUF_PUSH.  */
1923 		  BUF_PUSH (*p == '^' ? charset_not : charset);
1924 		  if (*p == '^')
1925 		    p++;
1926 
1927 		  /* Remember the first position in the bracket expression.  */
1928 		  p1 = p;
1929 
1930 		  /* Push the number of bytes in the bitmap.  */
1931 		  BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
1932 
1933 		  /* Clear the whole map.  */
1934 		  bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
1935 
1936 		  /* charset_not matches newline according to a syntax bit.  */
1937 		  if ((re_opcode_t) b[-2] == charset_not
1938 			 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1939 		    SET_LIST_BIT ('\n');
1940 
1941 		  /* Read in characters and ranges, setting map bits.  */
1942 		  for (;;)
1943 		    {
1944 			 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
1945 
1946 			 PATFETCH (c);
1947 
1948 			 /* \ might escape characters inside [...] and [^...].  */
1949 			 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
1950 			   {
1951 				if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1952 
1953 				PATFETCH (c1);
1954 				SET_LIST_BIT (c1);
1955 				continue;
1956 			   }
1957 
1958 			 /* Could be the end of the bracket expression.  If it's
1959 			    not (i.e., when the bracket expression is `[]' so
1960 			    far), the ']' character bit gets set way below.  */
1961 			 if (c == ']' && p != p1 + 1)
1962 			   break;
1963 
1964 			 /* Look ahead to see if it's a range when the last thing
1965 			    was a character class.  */
1966 			 if (had_char_class && c == '-' && *p != ']')
1967 			   FREE_STACK_RETURN (REG_ERANGE);
1968 
1969 			 /* Look ahead to see if it's a range when the last thing
1970 			    was a character: if this is a hyphen not at the
1971 			    beginning or the end of a list, then it's the range
1972 			    operator.  */
1973 			 if (c == '-'
1974 				&& !(p - 2 >= pattern && p[-2] == '[')
1975 				&& !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1976 				&& *p != ']')
1977 			   {
1978 				reg_errcode_t ret
1979 				  = compile_range (&p, pend, translate, syntax, b);
1980 				if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
1981 			   }
1982 
1983 			 else if (p[0] == '-' && p[1] != ']')
1984 			   { /* This handles ranges made up of characters only.  */
1985 				reg_errcode_t ret;
1986 
1987 		    /* Move past the `-'.  */
1988 				PATFETCH (c1);
1989 
1990 				ret = compile_range (&p, pend, translate, syntax, b);
1991 				if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
1992 			   }
1993 
1994 			 /* See if we're at the beginning of a possible character
1995 			    class.  */
1996 
1997 			 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
1998 			   { /* Leave room for the null.  */
1999 				char str[CHAR_CLASS_MAX_LENGTH + 1];
2000 
2001 				PATFETCH (c);
2002 				c1 = 0;
2003 
2004 				/* If pattern is `[[:'.  */
2005 				if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2006 
2007 				for (;;)
2008 				  {
2009 				    PATFETCH (c);
2010 				    if (c == ':' || c == ']' || p == pend
2011 					   || c1 == CHAR_CLASS_MAX_LENGTH)
2012 					 break;
2013 				    str[c1++] = c;
2014 				  }
2015 				str[c1] = '\0';
2016 
2017 				/* If isn't a word bracketed by `[:' and:`]':
2018 				   undo the ending character, the letters, and leave
2019 				   the leading `:' and `[' (but set bits for them).  */
2020 				if (c == ':' && *p == ']')
2021 				  {
2022 				    int ch;
2023 				    boolean is_alnum = STREQ (str, "alnum");
2024 				    boolean is_alpha = STREQ (str, "alpha");
2025 				    boolean is_blank = STREQ (str, "blank");
2026 				    boolean is_cntrl = STREQ (str, "cntrl");
2027 				    boolean is_digit = STREQ (str, "digit");
2028 				    boolean is_graph = STREQ (str, "graph");
2029 				    boolean is_lower = STREQ (str, "lower");
2030 				    boolean is_print = STREQ (str, "print");
2031 				    boolean is_punct = STREQ (str, "punct");
2032 				    boolean is_space = STREQ (str, "space");
2033 				    boolean is_upper = STREQ (str, "upper");
2034 				    boolean is_xdigit = STREQ (str, "xdigit");
2035 
2036 				    if (!IS_CHAR_CLASS (str))
2037 			  FREE_STACK_RETURN (REG_ECTYPE);
2038 
2039 				    /* Throw away the ] at the end of the character
2040 					  class.  */
2041 				    PATFETCH (c);
2042 
2043 				    if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2044 
2045 				    for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2046 					 {
2047 			    /* This was split into 3 if's to
2048 				  avoid an arbitrary limit in some compiler.  */
2049 					   if (   (is_alnum  && ISALNUM (ch))
2050 						  || (is_alpha  && ISALPHA (ch))
2051 						  || (is_blank  && ISBLANK (ch))
2052 						  || (is_cntrl  && ISCNTRL (ch)))
2053 				 SET_LIST_BIT (ch);
2054 			    if (   (is_digit  && ISDIGIT (ch))
2055 						  || (is_graph  && ISGRAPH (ch))
2056 						  || (is_lower  && ISLOWER (ch))
2057 						  || (is_print  && ISPRINT (ch)))
2058 				 SET_LIST_BIT (ch);
2059 			    if (   (is_punct  && ISPUNCT (ch))
2060 						  || (is_space  && ISSPACE (ch))
2061 						  || (is_upper  && ISUPPER (ch))
2062 						  || (is_xdigit && ISXDIGIT (ch)))
2063 				 SET_LIST_BIT (ch);
2064 					 }
2065 				    had_char_class = true;
2066 				  }
2067 				else
2068 				  {
2069 				    c1++;
2070 				    while (c1--)
2071 					 PATUNFETCH;
2072 				    SET_LIST_BIT ('[');
2073 				    SET_LIST_BIT (':');
2074 				    had_char_class = false;
2075 				  }
2076 			   }
2077 			 else
2078 			   {
2079 				had_char_class = false;
2080 				SET_LIST_BIT (c);
2081 			   }
2082 		    }
2083 
2084 		  /* Discard any (non)matching list bytes that are all 0 at the
2085 			end of the map.  Decrease the map-length byte too.  */
2086 		  while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2087 		    b[-1]--;
2088 		  b += b[-1];
2089 		}
2090 		break;
2091 
2092 
2093 	case '(':
2094 		if (syntax & RE_NO_BK_PARENS)
2095 		  goto handle_open;
2096 		else
2097 		  goto normal_char;
2098 
2099 
2100 	   case ')':
2101 		if (syntax & RE_NO_BK_PARENS)
2102 		  goto handle_close;
2103 		else
2104 		  goto normal_char;
2105 
2106 
2107 	   case '\n':
2108 		if (syntax & RE_NEWLINE_ALT)
2109 		  goto handle_alt;
2110 		else
2111 		  goto normal_char;
2112 
2113 
2114 	case '|':
2115 		if (syntax & RE_NO_BK_VBAR)
2116 		  goto handle_alt;
2117 		else
2118 		  goto normal_char;
2119 
2120 
2121 	   case '{':
2122 		 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2123 		   goto handle_interval;
2124 		 else
2125 		   goto normal_char;
2126 
2127 
2128 	   case '\\':
2129 		if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2130 
2131 		/* Do not translate the character after the \, so that we can
2132 		   distinguish, e.g., \B from \b, even if we normally would
2133 		   translate, e.g., B to b.  */
2134 		PATFETCH_RAW (c);
2135 
2136 		switch (c)
2137 		  {
2138 		  case '(':
2139 		    if (syntax & RE_NO_BK_PARENS)
2140 			 goto normal_backslash;
2141 
2142 		  handle_open:
2143 		    bufp->re_nsub++;
2144 		    regnum++;
2145 
2146 		    if (COMPILE_STACK_FULL)
2147 			 {
2148 			   RETALLOC (compile_stack.stack, compile_stack.size << 1,
2149 					   compile_stack_elt_t);
2150 			   if (compile_stack.stack == NULL) return REG_ESPACE;
2151 
2152 			   compile_stack.size <<= 1;
2153 			 }
2154 
2155 		    /* These are the values to restore when we hit end of this
2156 			  group.  They are all relative offsets, so that if the
2157 			  whole pattern moves because of realloc, they will still
2158 			  be valid.  */
2159 		    COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2160 		    COMPILE_STACK_TOP.fixup_alt_jump
2161 			 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2162 		    COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2163 		    COMPILE_STACK_TOP.regnum = regnum;
2164 
2165 		    /* We will eventually replace the 0 with the number of
2166 			  groups inner to this one.  But do not push a
2167 			  start_memory for groups beyond the last one we can
2168 			  represent in the compiled pattern.  */
2169 		    if (regnum <= MAX_REGNUM)
2170 			 {
2171 			   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2172 			   BUF_PUSH_3 (start_memory, regnum, 0);
2173 			 }
2174 
2175 		    compile_stack.avail++;
2176 
2177 		    fixup_alt_jump = 0;
2178 		    laststart = 0;
2179 		    begalt = b;
2180 		 /* If we've reached MAX_REGNUM groups, then this open
2181 		 won't actually generate any code, so we'll have to
2182 		 clear pending_exact explicitly.  */
2183 		 pending_exact = 0;
2184 		    break;
2185 
2186 
2187 		  case ')':
2188 		    if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2189 
2190 		    if (COMPILE_STACK_EMPTY)
2191 			 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2192 			   goto normal_backslash;
2193 			 else
2194 			   FREE_STACK_RETURN (REG_ERPAREN);
2195 
2196 		  handle_close:
2197 		    if (fixup_alt_jump)
2198 			 { /* Push a dummy failure point at the end of the
2199 				 alternative for a possible future
2200 				 `pop_failure_jump' to pop.  See comments at
2201 				 `push_dummy_failure' in `re_match_2'.  */
2202 			   BUF_PUSH (push_dummy_failure);
2203 
2204 			   /* We allocated space for this jump when we assigned
2205 				 to `fixup_alt_jump', in the `handle_alt' case below.  */
2206 			   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2207 			 }
2208 
2209 		    /* See similar code for backslashed left paren above.  */
2210 		    if (COMPILE_STACK_EMPTY)
2211 			 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2212 			   goto normal_char;
2213 			 else
2214 			   FREE_STACK_RETURN (REG_ERPAREN);
2215 
2216 		    /* Since we just checked for an empty stack above, this
2217 			  ``can't happen''.  */
2218 		    assert (compile_stack.avail != 0);
2219 		    {
2220 			 /* We don't just want to restore into `regnum', because
2221 			    later groups should continue to be numbered higher,
2222 			    as in `(ab)c(de)' -- the second group is #2.  */
2223 			 regnum_t this_group_regnum;
2224 
2225 			 compile_stack.avail--;
2226 			 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2227 			 fixup_alt_jump
2228 			   = COMPILE_STACK_TOP.fixup_alt_jump
2229 				? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2230 				: 0;
2231 			 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2232 			 this_group_regnum = COMPILE_STACK_TOP.regnum;
2233 		/* If we've reached MAX_REGNUM groups, then this open
2234 		   won't actually generate any code, so we'll have to
2235 		   clear pending_exact explicitly.  */
2236 		pending_exact = 0;
2237 
2238 			 /* We're at the end of the group, so now we know how many
2239 			    groups were inside this one.  */
2240 			 if (this_group_regnum <= MAX_REGNUM)
2241 			   {
2242 				unsigned char *inner_group_loc
2243 				  = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2244 
2245 				*inner_group_loc = regnum - this_group_regnum;
2246 				BUF_PUSH_3 (stop_memory, this_group_regnum,
2247 						  regnum - this_group_regnum);
2248 			   }
2249 		    }
2250 		    break;
2251 
2252 
2253 		  case '|':					/* `\|'.  */
2254 		    if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2255 			 goto normal_backslash;
2256 		  handle_alt:
2257 		    if (syntax & RE_LIMITED_OPS)
2258 			 goto normal_char;
2259 
2260 		    /* Insert before the previous alternative a jump which
2261 			  jumps to this alternative if the former fails.  */
2262 		    GET_BUFFER_SPACE (3);
2263 		    INSERT_JUMP (on_failure_jump, begalt, b + 6);
2264 		    pending_exact = 0;
2265 		    b += 3;
2266 
2267 		    /* The alternative before this one has a jump after it
2268 			  which gets executed if it gets matched.  Adjust that
2269 			  jump so it will jump to this alternative's analogous
2270 			  jump (put in below, which in turn will jump to the next
2271 			  (if any) alternative's such jump, etc.).  The last such
2272 			  jump jumps to the correct final destination.  A picture:
2273 					 _____ _____
2274 					 |   | |   |
2275 					 |   v |   v
2276 					a | b   | c
2277 
2278 			  If we are at `b', then fixup_alt_jump right now points to a
2279 			  three-byte space after `a'.  We'll put in the jump, set
2280 			  fixup_alt_jump to right after `b', and leave behind three
2281 			  bytes which we'll fill in when we get to after `c'.  */
2282 
2283 		    if (fixup_alt_jump)
2284 			 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2285 
2286 		    /* Mark and leave space for a jump after this alternative,
2287 			  to be filled in later either by next alternative or
2288 			  when know we're at the end of a series of alternatives.  */
2289 		    fixup_alt_jump = b;
2290 		    GET_BUFFER_SPACE (3);
2291 		    b += 3;
2292 
2293 		    laststart = 0;
2294 		    begalt = b;
2295 		    break;
2296 
2297 
2298 		  case '{':
2299 		    /* If \{ is a literal.  */
2300 		    if (!(syntax & RE_INTERVALS)
2301 				 /* If we're at `\{' and it's not the open-interval
2302 				    operator.  */
2303 			   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2304 			   || (p - 2 == pattern  &&  p == pend))
2305 			 goto normal_backslash;
2306 
2307 		  handle_interval:
2308 		    {
2309 			 /* If got here, then the syntax allows intervals.  */
2310 
2311 			 /* At least (most) this many matches must be made.  */
2312 			 int lower_bound = -1, upper_bound = -1;
2313 
2314 			 beg_interval = p - 1;
2315 
2316 			 if (p == pend)
2317 			   {
2318 				if (syntax & RE_NO_BK_BRACES)
2319 				  goto unfetch_interval;
2320 				else
2321 				  FREE_STACK_RETURN (REG_EBRACE);
2322 			   }
2323 
2324 			 GET_UNSIGNED_NUMBER (lower_bound);
2325 
2326 			 if (c == ',')
2327 			   {
2328 				GET_UNSIGNED_NUMBER (upper_bound);
2329 				if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2330 			   }
2331 			 else
2332 			   /* Interval such as `{1}' => match exactly once. */
2333 			   upper_bound = lower_bound;
2334 
2335 			 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2336 				|| lower_bound > upper_bound)
2337 			   {
2338 				if (syntax & RE_NO_BK_BRACES)
2339 				  goto unfetch_interval;
2340 				else
2341 				  FREE_STACK_RETURN (REG_BADBR);
2342 			   }
2343 
2344 			 if (!(syntax & RE_NO_BK_BRACES))
2345 			   {
2346 				if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2347 
2348 				PATFETCH (c);
2349 			   }
2350 
2351 			 if (c != '}')
2352 			   {
2353 				if (syntax & RE_NO_BK_BRACES)
2354 				  goto unfetch_interval;
2355 				else
2356 				  FREE_STACK_RETURN (REG_BADBR);
2357 			   }
2358 
2359 			 /* We just parsed a valid interval.  */
2360 
2361 			 /* If it's invalid to have no preceding re.  */
2362 			 if (!laststart)
2363 			   {
2364 				if (syntax & RE_CONTEXT_INVALID_OPS)
2365 				  FREE_STACK_RETURN (REG_BADRPT);
2366 				else if (syntax & RE_CONTEXT_INDEP_OPS)
2367 				  laststart = b;
2368 				else
2369 				  goto unfetch_interval;
2370 			   }
2371 
2372 			 /* If the upper bound is zero, don't want to succeed at
2373 			    all; jump from `laststart' to `b + 3', which will be
2374 			    the end of the buffer after we insert the jump.  */
2375 			  if (upper_bound == 0)
2376 			    {
2377 				 GET_BUFFER_SPACE (3);
2378 				 INSERT_JUMP (jump, laststart, b + 3);
2379 				 b += 3;
2380 			    }
2381 
2382 			  /* Otherwise, we have a nontrivial interval.  When
2383 				we're all done, the pattern will look like:
2384 				  set_number_at <jump count> <upper bound>
2385 				  set_number_at <succeed_n count> <lower bound>
2386 				  succeed_n <after jump addr> <succeed_n count>
2387 				  <body of loop>
2388 				  jump_n <succeed_n addr> <jump count>
2389 				(The upper bound and `jump_n' are omitted if
2390 				`upper_bound' is 1, though.)  */
2391 			  else
2392 			    { /* If the upper bound is > 1, we need to insert
2393 				    more at the end of the loop.  */
2394 				 unsigned nbytes = 10 + (upper_bound > 1) * 10;
2395 
2396 				 GET_BUFFER_SPACE (nbytes);
2397 
2398 				 /* Initialize lower bound of the `succeed_n', even
2399 				    though it will be set during matching by its
2400 				    attendant `set_number_at' (inserted next),
2401 				    because `re_compile_fastmap' needs to know.
2402 				    Jump to the `jump_n' we might insert below.  */
2403 				 INSERT_JUMP2 (succeed_n, laststart,
2404 							b + 5 + (upper_bound > 1) * 5,
2405 							lower_bound);
2406 				 b += 5;
2407 
2408 				 /* Code to initialize the lower bound.  Insert
2409 				    before the `succeed_n'.  The `5' is the last two
2410 				    bytes of this `set_number_at', plus 3 bytes of
2411 				    the following `succeed_n'.  */
2412 				 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2413 				 b += 5;
2414 
2415 				 if (upper_bound > 1)
2416 				   { /* More than one repetition is allowed, so
2417 					   append a backward jump to the `succeed_n'
2418 					   that starts this interval.
2419 
2420 					   When we've reached this during matching,
2421 					   we'll have matched the interval once, so
2422 					   jump back only `upper_bound - 1' times.  */
2423 					STORE_JUMP2 (jump_n, b, laststart + 5,
2424 							   upper_bound - 1);
2425 					b += 5;
2426 
2427 					/* The location we want to set is the second
2428 					   parameter of the `jump_n'; that is `b-2' as
2429 					   an absolute address.  `laststart' will be
2430 					   the `set_number_at' we're about to insert;
2431 					   `laststart+3' the number to set, the source
2432 					   for the relative address.  But we are
2433 					   inserting into the middle of the pattern --
2434 					   so everything is getting moved up by 5.
2435 					   Conclusion: (b - 2) - (laststart + 3) + 5,
2436 					   i.e., b - laststart.
2437 
2438 					   We insert this at the beginning of the loop
2439 					   so that if we fail during matching, we'll
2440 					   reinitialize the bounds.  */
2441 					insert_op2 (set_number_at, laststart, b - laststart,
2442 							  upper_bound - 1, b);
2443 					b += 5;
2444 				   }
2445 			    }
2446 			 pending_exact = 0;
2447 			 beg_interval = NULL;
2448 		    }
2449 		    break;
2450 
2451 		  unfetch_interval:
2452 		    /* If an invalid interval, match the characters as literals.  */
2453 			assert (beg_interval);
2454 			p = beg_interval;
2455 			beg_interval = NULL;
2456 
2457 			/* normal_char and normal_backslash need `c'.  */
2458 			PATFETCH (c);
2459 
2460 			if (!(syntax & RE_NO_BK_BRACES))
2461 			  {
2462 			    if (p > pattern  &&  p[-1] == '\\')
2463 				 goto normal_backslash;
2464 			  }
2465 			goto normal_char;
2466 
2467 #ifdef emacs
2468 		  /* There is no way to specify the before_dot and after_dot
2469 			operators.  rms says this is ok.  --karl  */
2470 		  case '=':
2471 		    BUF_PUSH (at_dot);
2472 		    break;
2473 
2474 		  case 's':
2475 		    laststart = b;
2476 		    PATFETCH (c);
2477 		    BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2478 		    break;
2479 
2480 		  case 'S':
2481 		    laststart = b;
2482 		    PATFETCH (c);
2483 		    BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2484 		    break;
2485 #endif /* emacs */
2486 
2487 
2488 		  case 'w':
2489 		    laststart = b;
2490 		    BUF_PUSH (wordchar);
2491 		    break;
2492 
2493 
2494 		  case 'W':
2495 		    laststart = b;
2496 		    BUF_PUSH (notwordchar);
2497 		    break;
2498 
2499 
2500 		  case '<':
2501 		    BUF_PUSH (wordbeg);
2502 		    break;
2503 
2504 		  case '>':
2505 		    BUF_PUSH (wordend);
2506 		    break;
2507 
2508 		  case 'b':
2509 		    BUF_PUSH (wordbound);
2510 		    break;
2511 
2512 		  case 'B':
2513 		    BUF_PUSH (notwordbound);
2514 		    break;
2515 
2516 		  case '`':
2517 		    BUF_PUSH (begbuf);
2518 		    break;
2519 
2520 		  case '\'':
2521 		    BUF_PUSH (endbuf);
2522 		    break;
2523 
2524 		  case '1': case '2': case '3': case '4': case '5':
2525 		  case '6': case '7': case '8': case '9':
2526 		    if (syntax & RE_NO_BK_REFS)
2527 			 goto normal_char;
2528 
2529 		    c1 = c - '0';
2530 
2531 		    if (c1 > regnum)
2532 			 FREE_STACK_RETURN (REG_ESUBREG);
2533 
2534 		    /* Can't back reference to a subexpression if inside of it.  */
2535 		    if (group_in_compile_stack (compile_stack, c1))
2536 			 goto normal_char;
2537 
2538 		    laststart = b;
2539 		    BUF_PUSH_2 (duplicate, c1);
2540 		    break;
2541 
2542 
2543 		  case '+':
2544 		  case '?':
2545 		    if (syntax & RE_BK_PLUS_QM)
2546 			 goto handle_plus;
2547 		    else
2548 			 goto normal_backslash;
2549 
2550 		  default:
2551 		  normal_backslash:
2552 		    /* You might think it would be useful for \ to mean
2553 			  not to translate; but if we don't translate it
2554 			  it will never match anything.  */
2555 		    c = TRANSLATE (c);
2556 		    goto normal_char;
2557 		  }
2558 		break;
2559 
2560 
2561 	default:
2562 	   /* Expects the character in `c'.  */
2563 	normal_char:
2564 		 /* If no exactn currently being built.  */
2565 		if (!pending_exact
2566 
2567 		    /* If last exactn not at current position.  */
2568 		    || pending_exact + *pending_exact + 1 != b
2569 
2570 		    /* We have only one byte following the exactn for the count.  */
2571 		 || *pending_exact == (1 << BYTEWIDTH) - 1
2572 
2573 		    /* If followed by a repetition operator.  */
2574 		    || *p == '*' || *p == '^'
2575 		 || ((syntax & RE_BK_PLUS_QM)
2576 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2577 		  : (*p == '+' || *p == '?'))
2578 		 || ((syntax & RE_INTERVALS)
2579 			   && ((syntax & RE_NO_BK_BRACES)
2580 			 ? *p == '{'
2581 				  : (p[0] == '\\' && p[1] == '{'))))
2582 	    {
2583 		 /* Start building a new exactn.  */
2584 
2585 		    laststart = b;
2586 
2587 		 BUF_PUSH_2 (exactn, 0);
2588 		 pending_exact = b - 1;
2589 		  }
2590 
2591 	  BUF_PUSH (c);
2592 		(*pending_exact)++;
2593 	  break;
2594 	   } /* switch (c) */
2595     } /* while p != pend */
2596 
2597 
2598   /* Through the pattern now.  */
2599 
2600   if (fixup_alt_jump)
2601     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2602 
2603   if (!COMPILE_STACK_EMPTY)
2604     FREE_STACK_RETURN (REG_EPAREN);
2605 
2606   /* If we don't want backtracking, force success
2607 	the first time we reach the end of the compiled pattern.  */
2608   if (syntax & RE_NO_POSIX_BACKTRACKING)
2609     BUF_PUSH (succeed);
2610 
2611   free (compile_stack.stack);
2612 
2613   /* We have succeeded; set the length of the buffer.  */
2614   bufp->used = b - bufp->buffer;
2615 
2616 #ifdef DEBUG
2617   if (debug)
2618     {
2619 	 DEBUG_PRINT1 ("\nCompiled pattern: \n");
2620 	 print_compiled_pattern (bufp);
2621     }
2622 #endif /* DEBUG */
2623 
2624 #ifndef MATCH_MAY_ALLOCATE
2625   /* Initialize the failure stack to the largest possible stack.  This
2626 	isn't necessary unless we're trying to avoid calling alloca in
2627 	the search and match routines.  */
2628   {
2629     int num_regs = bufp->re_nsub + 1;
2630 
2631     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2632 	  is strictly greater than re_max_failures, the largest possible stack
2633 	  is 2 * re_max_failures failure points.  */
2634     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2635 	 {
2636 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2637 
2638 #ifdef emacs
2639 	if (! fail_stack.stack)
2640 	  fail_stack.stack
2641 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2642 					    * sizeof (fail_stack_elt_t));
2643 	else
2644 	  fail_stack.stack
2645 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2646 						(fail_stack.size
2647 						 * sizeof (fail_stack_elt_t)));
2648 #else /* not emacs */
2649 	if (! fail_stack.stack)
2650 	  fail_stack.stack
2651 	    = (fail_stack_elt_t *) malloc (fail_stack.size
2652 					   * sizeof (fail_stack_elt_t));
2653 	else
2654 	  fail_stack.stack
2655 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2656 					    (fail_stack.size
2657 						* sizeof (fail_stack_elt_t)));
2658 #endif /* not emacs */
2659 	 }
2660 
2661     regex_grow_registers (num_regs);
2662   }
2663 #endif /* not MATCH_MAY_ALLOCATE */
2664 
2665   return REG_NOERROR;
2666 } /* regex_compile */
2667 
2668 /* Subroutines for `regex_compile'.  */
2669 
2670 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2671 
2672 static void
store_op1(op,loc,arg)2673 store_op1 (op, loc, arg)
2674     re_opcode_t op;
2675     unsigned char *loc;
2676     int arg;
2677 {
2678   *loc = (unsigned char) op;
2679   STORE_NUMBER (loc + 1, arg);
2680 }
2681 
2682 
2683 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2684 
2685 static void
store_op2(op,loc,arg1,arg2)2686 store_op2 (op, loc, arg1, arg2)
2687     re_opcode_t op;
2688     unsigned char *loc;
2689     int arg1, arg2;
2690 {
2691   *loc = (unsigned char) op;
2692   STORE_NUMBER (loc + 1, arg1);
2693   STORE_NUMBER (loc + 3, arg2);
2694 }
2695 
2696 
2697 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2698    for OP followed by two-byte integer parameter ARG.  */
2699 
2700 static void
insert_op1(op,loc,arg,end)2701 insert_op1 (op, loc, arg, end)
2702     re_opcode_t op;
2703     unsigned char *loc;
2704     int arg;
2705     unsigned char *end;
2706 {
2707   register unsigned char *pfrom = end;
2708   register unsigned char *pto = end + 3;
2709 
2710   while (pfrom != loc)
2711     *--pto = *--pfrom;
2712 
2713   store_op1 (op, loc, arg);
2714 }
2715 
2716 
2717 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2718 
2719 static void
insert_op2(op,loc,arg1,arg2,end)2720 insert_op2 (op, loc, arg1, arg2, end)
2721     re_opcode_t op;
2722     unsigned char *loc;
2723     int arg1, arg2;
2724     unsigned char *end;
2725 {
2726   register unsigned char *pfrom = end;
2727   register unsigned char *pto = end + 5;
2728 
2729   while (pfrom != loc)
2730     *--pto = *--pfrom;
2731 
2732   store_op2 (op, loc, arg1, arg2);
2733 }
2734 
2735 
2736 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2737    after an alternative or a begin-subexpression.  We assume there is at
2738    least one character before the ^.  */
2739 
2740 static boolean
at_begline_loc_p(pattern,p,syntax)2741 at_begline_loc_p (pattern, p, syntax)
2742     const char *pattern, *p;
2743     reg_syntax_t syntax;
2744 {
2745   const char *prev = p - 2;
2746   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2747 
2748   return
2749 	  /* After a subexpression?  */
2750 	  (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2751 	  /* After an alternative?  */
2752     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2753 }
2754 
2755 
2756 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2757    at least one character after the $, i.e., `P < PEND'.  */
2758 
2759 static boolean
at_endline_loc_p(p,pend,syntax)2760 at_endline_loc_p (p, pend, syntax)
2761     const char *p, *pend;
2762     int syntax;
2763 {
2764   const char *next = p;
2765   boolean next_backslash = *next == '\\';
2766   const char *next_next = p + 1 < pend ? p + 1 : 0;
2767 
2768   return
2769 	  /* Before a subexpression?  */
2770 	  (syntax & RE_NO_BK_PARENS ? *next == ')'
2771 	   : next_backslash && next_next && *next_next == ')')
2772 	  /* Before an alternative?  */
2773     || (syntax & RE_NO_BK_VBAR ? *next == '|'
2774 	   : next_backslash && next_next && *next_next == '|');
2775 }
2776 
2777 
2778 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
2779    false if it's not.  */
2780 
2781 static boolean
group_in_compile_stack(compile_stack,regnum)2782 group_in_compile_stack (compile_stack, regnum)
2783     compile_stack_type compile_stack;
2784     regnum_t regnum;
2785 {
2786   int this_element;
2787 
2788   for (this_element = compile_stack.avail - 1;
2789 	  this_element >= 0;
2790 	  this_element--)
2791     if (compile_stack.stack[this_element].regnum == regnum)
2792 	 return true;
2793 
2794   return false;
2795 }
2796 
2797 
2798 /* Read the ending character of a range (in a bracket expression) from the
2799    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
2800    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
2801    Then we set the translation of all bits between the starting and
2802    ending characters (inclusive) in the compiled pattern B.
2803 
2804    Return an error code.
2805 
2806    We use these short variable names so we can use the same macros as
2807    `regex_compile' itself.  */
2808 
2809 static reg_errcode_t
compile_range(p_ptr,pend,translate,syntax,b)2810 compile_range (p_ptr, pend, translate, syntax, b)
2811     const char **p_ptr, *pend;
2812     char *translate;
2813     reg_syntax_t syntax;
2814     unsigned char *b;
2815 {
2816   unsigned this_char;
2817 
2818   const char *p = *p_ptr;
2819   int range_start, range_end;
2820 
2821   if (p == pend)
2822     return REG_ERANGE;
2823 
2824   /* Even though the pattern is a signed `char *', we need to fetch
2825 	with unsigned char *'s; if the high bit of the pattern character
2826 	is set, the range endpoints will be negative if we fetch using a
2827 	signed char *.
2828 
2829 	We also want to fetch the endpoints without translating them; the
2830 	appropriate translation is done in the bit-setting loop below.  */
2831   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
2832   range_start = ((const unsigned char *) p)[-2];
2833   range_end   = ((const unsigned char *) p)[0];
2834 
2835   /* Have to increment the pointer into the pattern string, so the
2836 	caller isn't still at the ending character.  */
2837   (*p_ptr)++;
2838 
2839   /* If the start is after the end, the range is empty.  */
2840   if (range_start > range_end)
2841     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2842 
2843   /* Here we see why `this_char' has to be larger than an `unsigned
2844 	char' -- the range is inclusive, so if `range_end' == 0xff
2845 	(assuming 8-bit characters), we would otherwise go into an infinite
2846 	loop, since all characters <= 0xff.  */
2847   for (this_char = range_start; this_char <= range_end; this_char++)
2848     {
2849 	 SET_LIST_BIT (TRANSLATE (this_char));
2850     }
2851 
2852   return REG_NOERROR;
2853 }
2854 
2855 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2856    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2857    characters can start a string that matches the pattern.  This fastmap
2858    is used by re_search to skip quickly over impossible starting points.
2859 
2860    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2861    area as BUFP->fastmap.
2862 
2863    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2864    the pattern buffer.
2865 
2866    Returns 0 if we succeed, -2 if an internal error.   */
2867 
2868 int
re_compile_fastmap(bufp)2869 re_compile_fastmap (bufp)
2870 	struct re_pattern_buffer *bufp;
2871 {
2872   int j, k;
2873 #ifdef MATCH_MAY_ALLOCATE
2874   fail_stack_type fail_stack;
2875 #endif
2876 #ifndef REGEX_MALLOC
2877   char *destination;
2878 #endif
2879   /* We don't push any register information onto the failure stack.  */
2880   unsigned num_regs = 0;
2881 
2882   register char *fastmap = bufp->fastmap;
2883   unsigned char *pattern = bufp->buffer;
2884   unsigned long size = bufp->used;
2885   unsigned char *p = pattern;
2886   register unsigned char *pend = pattern + size;
2887 
2888 #ifdef REL_ALLOC
2889   /* This holds the pointer to the failure stack, when
2890 	it is allocated relocatably.  */
2891   fail_stack_elt_t *failure_stack_ptr;
2892 #endif
2893 
2894   /* Assume that each path through the pattern can be null until
2895 	proven otherwise.  We set this false at the bottom of switch
2896 	statement, to which we get only if a particular path doesn't
2897 	match the empty string.  */
2898   boolean path_can_be_null = true;
2899 
2900   /* We aren't doing a `succeed_n' to begin with.  */
2901   boolean succeed_n_p = false;
2902 
2903   assert (fastmap != NULL && p != NULL);
2904 
2905   INIT_FAIL_STACK ();
2906   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
2907   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
2908   bufp->can_be_null = 0;
2909 
2910   while (1)
2911     {
2912 	 if (p == pend || *p == succeed)
2913 	{
2914 	  /* We have reached the (effective) end of pattern.  */
2915 	  if (!FAIL_STACK_EMPTY ())
2916 	    {
2917 		 bufp->can_be_null |= path_can_be_null;
2918 
2919 		 /* Reset for next path.  */
2920 		 path_can_be_null = true;
2921 
2922 		 p = fail_stack.stack[--fail_stack.avail].pointer;
2923 
2924 		 continue;
2925 	    }
2926 	  else
2927 	    break;
2928 	}
2929 
2930 	 /* We should never be about to go beyond the end of the pattern.  */
2931 	 assert (p < pend);
2932 
2933 	 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
2934 	{
2935 
2936 	   /* I guess the idea here is to simply not bother with a fastmap
2937 		 if a backreference is used, since it's too hard to figure out
2938 		 the fastmap for the corresponding group.  Setting
2939 		 `can_be_null' stops `re_search_2' from using the fastmap, so
2940 		 that is all we do.  */
2941 	case duplicate:
2942 	  bufp->can_be_null = 1;
2943 		goto done;
2944 
2945 
2946 	 /* Following are the cases which match a character.  These end
2947 	    with `break'.  */
2948 
2949 	case exactn:
2950 		fastmap[p[1]] = 1;
2951 	  break;
2952 
2953 
2954 	   case charset:
2955 		for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2956 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2957 		    fastmap[j] = 1;
2958 	  break;
2959 
2960 
2961 	case charset_not:
2962 	  /* Chars beyond end of map must be allowed.  */
2963 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2964 		  fastmap[j] = 1;
2965 
2966 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2967 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2968 		    fastmap[j] = 1;
2969 		break;
2970 
2971 
2972 	case wordchar:
2973 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2974 	    if (SYNTAX (j) == Sword)
2975 		 fastmap[j] = 1;
2976 	  break;
2977 
2978 
2979 	case notwordchar:
2980 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2981 	    if (SYNTAX (j) != Sword)
2982 		 fastmap[j] = 1;
2983 	  break;
2984 
2985 
2986 	   case anychar:
2987 	  {
2988 	    int fastmap_newline = fastmap['\n'];
2989 
2990 	    /* `.' matches anything ...  */
2991 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
2992 		 fastmap[j] = 1;
2993 
2994 	    /* ... except perhaps newline.  */
2995 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
2996 		 fastmap['\n'] = fastmap_newline;
2997 
2998 	    /* Return if we have already set `can_be_null'; if we have,
2999 		  then the fastmap is irrelevant.  Something's wrong here.  */
3000 	    else if (bufp->can_be_null)
3001 		 goto done;
3002 
3003 	    /* Otherwise, have to check alternative paths.  */
3004 	    break;
3005 	  }
3006 
3007 #ifdef emacs
3008 	   case syntaxspec:
3009 	  k = *p++;
3010 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3011 	    if (SYNTAX (j) == (enum syntaxcode) k)
3012 		 fastmap[j] = 1;
3013 	  break;
3014 
3015 
3016 	case notsyntaxspec:
3017 	  k = *p++;
3018 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3019 	    if (SYNTAX (j) != (enum syntaxcode) k)
3020 		 fastmap[j] = 1;
3021 	  break;
3022 
3023 
3024 	 /* All cases after this match the empty string.  These end with
3025 	    `continue'.  */
3026 
3027 
3028 	case before_dot:
3029 	case at_dot:
3030 	case after_dot:
3031 		continue;
3032 #endif /* not emacs */
3033 
3034 
3035 	   case no_op:
3036 	   case begline:
3037 	   case endline:
3038 	case begbuf:
3039 	case endbuf:
3040 	case wordbound:
3041 	case notwordbound:
3042 	case wordbeg:
3043 	case wordend:
3044 	   case push_dummy_failure:
3045 		continue;
3046 
3047 
3048 	case jump_n:
3049 	   case pop_failure_jump:
3050 	case maybe_pop_jump:
3051 	case jump:
3052 	   case jump_past_alt:
3053 	case dummy_failure_jump:
3054 		EXTRACT_NUMBER_AND_INCR (j, p);
3055 	  p += j;
3056 	  if (j > 0)
3057 	    continue;
3058 
3059 		/* Jump backward implies we just went through the body of a
3060 		   loop and matched nothing.  Opcode jumped to should be
3061 		   `on_failure_jump' or `succeed_n'.  Just treat it like an
3062 		   ordinary jump.  For a * loop, it has pushed its failure
3063 		   point already; if so, discard that as redundant.  */
3064 		if ((re_opcode_t) *p != on_failure_jump
3065 		 && (re_opcode_t) *p != succeed_n)
3066 	    continue;
3067 
3068 		p++;
3069 		EXTRACT_NUMBER_AND_INCR (j, p);
3070 		p += j;
3071 
3072 		/* If what's on the stack is where we are now, pop it.  */
3073 		if (!FAIL_STACK_EMPTY ()
3074 		 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3075 		  fail_stack.avail--;
3076 
3077 		continue;
3078 
3079 
3080 	   case on_failure_jump:
3081 	   case on_failure_keep_string_jump:
3082 	handle_on_failure_jump:
3083 		EXTRACT_NUMBER_AND_INCR (j, p);
3084 
3085 		/* For some patterns, e.g., `(a?)?', `p+j' here points to the
3086 		   end of the pattern.  We don't want to push such a point,
3087 		   since when we restore it above, entering the switch will
3088 		   increment `p' past the end of the pattern.  We don't need
3089 		   to push such a point since we obviously won't find any more
3090 		   fastmap entries beyond `pend'.  Such a pattern can match
3091 		   the null string, though.  */
3092 		if (p + j < pend)
3093 		  {
3094 		    if (!PUSH_PATTERN_OP (p + j, fail_stack))
3095 		{
3096 		  RESET_FAIL_STACK ();
3097 		  return -2;
3098 		}
3099 		  }
3100 		else
3101 		  bufp->can_be_null = 1;
3102 
3103 		if (succeed_n_p)
3104 		  {
3105 		    EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3106 		    succeed_n_p = false;
3107 	    }
3108 
3109 		continue;
3110 
3111 
3112 	case succeed_n:
3113 		/* Get to the number of times to succeed.  */
3114 		p += 2;
3115 
3116 		/* Increment p past the n for when k != 0.  */
3117 		EXTRACT_NUMBER_AND_INCR (k, p);
3118 		if (k == 0)
3119 	    {
3120 		    p -= 4;
3121 		 succeed_n_p = true;  /* Spaghetti code alert.  */
3122 		    goto handle_on_failure_jump;
3123 		  }
3124 		continue;
3125 
3126 
3127 	case set_number_at:
3128 		p += 4;
3129 		continue;
3130 
3131 
3132 	case start_memory:
3133 	   case stop_memory:
3134 	  p += 2;
3135 	  continue;
3136 
3137 
3138 	default:
3139 		abort (); /* We have listed all the cases.  */
3140 	   } /* switch *p++ */
3141 
3142 	 /* Getting here means we have found the possible starting
3143 	    characters for one path of the pattern -- and that the empty
3144 	    string does not match.  We need not follow this path further.
3145 	    Instead, look at the next alternative (remembered on the
3146 	    stack), or quit if no more.  The test at the top of the loop
3147 	    does these things.  */
3148 	 path_can_be_null = false;
3149 	 p = pend;
3150     } /* while p */
3151 
3152   /* Set `can_be_null' for the last path (also the first path, if the
3153 	pattern is empty).  */
3154   bufp->can_be_null |= path_can_be_null;
3155 
3156  done:
3157   RESET_FAIL_STACK ();
3158   return 0;
3159 } /* re_compile_fastmap */
3160 
3161 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3162    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3163    this memory for recording register information.  STARTS and ENDS
3164    must be allocated using the malloc library routine, and must each
3165    be at least NUM_REGS * sizeof (regoff_t) bytes long.
3166 
3167    If NUM_REGS == 0, then subsequent matches should allocate their own
3168    register data.
3169 
3170    Unless this function is called, the first search or match using
3171    PATTERN_BUFFER will allocate its own register data, without
3172    freeing the old data.  */
3173 
3174 void
re_set_registers(bufp,regs,num_regs,starts,ends)3175 re_set_registers (bufp, regs, num_regs, starts, ends)
3176     struct re_pattern_buffer *bufp;
3177     struct re_registers *regs;
3178     unsigned num_regs;
3179     regoff_t *starts, *ends;
3180 {
3181   if (num_regs)
3182     {
3183 	 bufp->regs_allocated = REGS_REALLOCATE;
3184 	 regs->num_regs = num_regs;
3185 	 regs->start = starts;
3186 	 regs->end = ends;
3187     }
3188   else
3189     {
3190 	 bufp->regs_allocated = REGS_UNALLOCATED;
3191 	 regs->num_regs = 0;
3192 	 regs->start = regs->end = (regoff_t *) 0;
3193     }
3194 }
3195 
3196 /* Searching routines.  */
3197 
3198 /* Like re_search_2, below, but only one string is specified, and
3199    doesn't let you say where to stop matching. */
3200 
3201 int
re_search(bufp,string,size,startpos,range,regs)3202 re_search (bufp, string, size, startpos, range, regs)
3203 	struct re_pattern_buffer *bufp;
3204 	const char *string;
3205 	int size, startpos, range;
3206 	struct re_registers *regs;
3207 {
3208   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3209 			 regs, size);
3210 }
3211 
3212 
3213 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3214    virtual concatenation of STRING1 and STRING2, starting first at index
3215    STARTPOS, then at STARTPOS + 1, and so on.
3216 
3217    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3218 
3219    RANGE is how far to scan while trying to match.  RANGE = 0 means try
3220    only at STARTPOS; in general, the last start tried is STARTPOS +
3221    RANGE.
3222 
3223    In REGS, return the indices of the virtual concatenation of STRING1
3224    and STRING2 that matched the entire BUFP->buffer and its contained
3225    subexpressions.
3226 
3227    Do not consider matching one past the index STOP in the virtual
3228    concatenation of STRING1 and STRING2.
3229 
3230    We return either the position in the strings at which the match was
3231    found, -1 if no match, or -2 if error (such as failure
3232    stack overflow).  */
3233 
3234 int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)3235 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3236 	struct re_pattern_buffer *bufp;
3237 	const char *string1, *string2;
3238 	int size1, size2;
3239 	int startpos;
3240 	int range;
3241 	struct re_registers *regs;
3242 	int stop;
3243 {
3244   int val;
3245   register char *fastmap = bufp->fastmap;
3246   register char *translate = bufp->translate;
3247   int total_size = size1 + size2;
3248   int endpos = startpos + range;
3249 
3250   /* Check for out-of-range STARTPOS.  */
3251   if (startpos < 0 || startpos > total_size)
3252     return -1;
3253 
3254   /* Fix up RANGE if it might eventually take us outside
3255 	the virtual concatenation of STRING1 and STRING2.  */
3256   if (endpos < -1)
3257     range = -1 - startpos;
3258   else if (endpos > total_size)
3259     range = total_size - startpos;
3260 
3261   /* If the search isn't to be a backwards one, don't waste time in a
3262 	search for a pattern that must be anchored.  */
3263   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3264     {
3265 	 if (startpos > 0)
3266 	return -1;
3267 	 else
3268 	range = 1;
3269     }
3270 
3271   /* Update the fastmap now if not correct already.  */
3272   if (fastmap && !bufp->fastmap_accurate)
3273     if (re_compile_fastmap (bufp) == -2)
3274 	 return -2;
3275 
3276   /* Loop through the string, looking for a place to start matching.  */
3277   for (;;)
3278     {
3279 	 /* If a fastmap is supplied, skip quickly over characters that
3280 	    cannot be the start of a match.  If the pattern can match the
3281 	    null string, however, we don't need to skip characters; we want
3282 	    the first null string.  */
3283 	 if (fastmap && startpos < total_size && !bufp->can_be_null)
3284 	{
3285 	  if (range > 0)	/* Searching forwards.  */
3286 	    {
3287 		 register const char *d;
3288 		 register int lim = 0;
3289 		 int irange = range;
3290 
3291 		    if (startpos < size1 && startpos + range >= size1)
3292 			 lim = range - (size1 - startpos);
3293 
3294 		 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3295 
3296 		    /* Written out as an if-else to avoid testing `translate'
3297 			  inside the loop.  */
3298 		 if (translate)
3299 			 while (range > lim
3300 				   && !fastmap[(unsigned char)
3301 				   translate[(unsigned char) *d++]])
3302 			   range--;
3303 		 else
3304 			 while (range > lim && !fastmap[(unsigned char) *d++])
3305 			   range--;
3306 
3307 		 startpos += irange - range;
3308 	    }
3309 	  else				/* Searching backwards.  */
3310 	    {
3311 		 register char c = (size1 == 0 || startpos >= size1
3312 						   ? string2[startpos - size1]
3313 						   : string1[startpos]);
3314 
3315 		 if (!fastmap[(unsigned char) TRANSLATE (c)])
3316 		goto advance;
3317 	    }
3318 	}
3319 
3320 	 /* If can't match the null string, and that's all we have left, fail.  */
3321 	 if (range >= 0 && startpos == total_size && fastmap
3322 		&& !bufp->can_be_null)
3323 	return -1;
3324 
3325 	 val = re_match_2_internal (bufp, string1, size1, string2, size2,
3326 				 startpos, regs, stop);
3327 #ifndef REGEX_MALLOC
3328 #ifdef C_ALLOCA
3329 	 alloca (0);
3330 #endif
3331 #endif
3332 
3333 	 if (val >= 0)
3334 	return startpos;
3335 
3336 	 if (val == -2)
3337 	return -2;
3338 
3339     advance:
3340 	 if (!range)
3341 	   break;
3342 	 else if (range > 0)
3343 	   {
3344 		range--;
3345 		startpos++;
3346 	   }
3347 	 else
3348 	   {
3349 		range++;
3350 		startpos--;
3351 	   }
3352     }
3353   return -1;
3354 } /* re_search_2 */
3355 
3356 /* Declarations and macros for re_match_2.  */
3357 
3358 static int bcmp_translate ();
3359 static boolean alt_match_null_string_p (),
3360 			common_op_match_null_string_p (),
3361 			group_match_null_string_p ();
3362 
3363 /* This converts PTR, a pointer into one of the search strings `string1'
3364    and `string2' into an offset from the beginning of that string.  */
3365 #define POINTER_TO_OFFSET(ptr)			\
3366   (FIRST_STRING_P (ptr)				\
3367    ? ((regoff_t) ((ptr) - string1))		\
3368    : ((regoff_t) ((ptr) - string2 + size1)))
3369 
3370 /* Macros for dealing with the split strings in re_match_2.  */
3371 
3372 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3373 
3374 /* Call before fetching a character with *d.  This switches over to
3375    string2 if necessary.  */
3376 #define PREFETCH()							\
3377   while (d == dend)						    	\
3378     {									\
3379 	 /* End of string2 => fail.  */					\
3380 	 if (dend == end_match_2) 						\
3381 	   goto fail;							\
3382 	 /* End of string1 => advance to string2.  */ 			\
3383 	 d = string2;						        \
3384 	 dend = end_match_2;						\
3385     }
3386 
3387 
3388 /* Test if at very beginning or at very end of the virtual concatenation
3389    of `string1' and `string2'.  If only one string, it's `string2'.  */
3390 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3391 #define AT_STRINGS_END(d) ((d) == end2)
3392 
3393 
3394 /* Test if D points to a character which is word-constituent.  We have
3395    two special cases to check for: if past the end of string1, look at
3396    the first character in string2; and if before the beginning of
3397    string2, look at the last character in string1.  */
3398 #define WORDCHAR_P(d)							\
3399   (SYNTAX ((d) == end1 ? *string2					\
3400 		 : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3401    == Sword)
3402 
3403 /* Test if the character before D and the one at D differ with respect
3404    to being word-constituent.  */
3405 #define AT_WORD_BOUNDARY(d)						\
3406   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3407    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3408 
3409 
3410 /* Free everything we malloc.  */
3411 #ifdef MATCH_MAY_ALLOCATE
3412 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3413 #define FREE_VARIABLES()						\
3414   do {									\
3415     REGEX_FREE_STACK (fail_stack.stack);				\
3416     FREE_VAR (regstart);						\
3417     FREE_VAR (regend);							\
3418     FREE_VAR (old_regstart);						\
3419     FREE_VAR (old_regend);						\
3420     FREE_VAR (best_regstart);						\
3421     FREE_VAR (best_regend);						\
3422     FREE_VAR (reg_info);						\
3423     FREE_VAR (reg_dummy);						\
3424     FREE_VAR (reg_info_dummy);						\
3425   } while (0)
3426 #else
3427 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
3428 #endif /* not MATCH_MAY_ALLOCATE */
3429 
3430 /* These values must meet several constraints.  They must not be valid
3431    register values; since we have a limit of 255 registers (because
3432    we use only one byte in the pattern for the register number), we can
3433    use numbers larger than 255.  They must differ by 1, because of
3434    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3435    be larger than the value for the highest register, so we do not try
3436    to actually save any registers when none are active.  */
3437 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3438 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3439 
3440 /* Matching routines.  */
3441 
3442 #ifndef emacs   /* Emacs never uses this.  */
3443 /* re_match is like re_match_2 except it takes only a single string.  */
3444 
3445 int
re_match(bufp,string,size,pos,regs)3446 re_match (bufp, string, size, pos, regs)
3447 	struct re_pattern_buffer *bufp;
3448 	const char *string;
3449 	int size, pos;
3450 	struct re_registers *regs;
3451 {
3452   int result = re_match_2_internal (bufp, NULL, 0, string, size,
3453 				    pos, regs, size);
3454 #ifndef REGEX_MALLOC
3455   alloca (0);
3456 #endif
3457   return result;
3458 }
3459 #endif /* not emacs */
3460 
3461 
3462 /* re_match_2 matches the compiled pattern in BUFP against the
3463    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3464    and SIZE2, respectively).  We start matching at POS, and stop
3465    matching at STOP.
3466 
3467    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3468    store offsets for the substring each group matched in REGS.  See the
3469    documentation for exactly how many groups we fill.
3470 
3471    We return -1 if no match, -2 if an internal error (such as the
3472    failure stack overflowing).  Otherwise, we return the length of the
3473    matched substring.  */
3474 
3475 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)3476 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3477 	struct re_pattern_buffer *bufp;
3478 	const char *string1, *string2;
3479 	int size1, size2;
3480 	int pos;
3481 	struct re_registers *regs;
3482 	int stop;
3483 {
3484   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3485 				    pos, regs, stop);
3486 #ifndef REGEX_MALLOC
3487   alloca (0);
3488 #endif
3489   return result;
3490 }
3491 
3492 /* This is a separate function so that we can force an alloca cleanup
3493    afterwards.  */
3494 static int
re_match_2_internal(bufp,string1,size1,string2,size2,pos,regs,stop)3495 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3496 	struct re_pattern_buffer *bufp;
3497 	const char *string1, *string2;
3498 	int size1, size2;
3499 	int pos;
3500 	struct re_registers *regs;
3501 	int stop;
3502 {
3503   /* General temporaries.  */
3504   int mcnt;
3505   unsigned char *p1;
3506 
3507   /* Just past the end of the corresponding string.  */
3508   const char *end1, *end2;
3509 
3510   /* Pointers into string1 and string2, just past the last characters in
3511 	each to consider matching.  */
3512   const char *end_match_1, *end_match_2;
3513 
3514   /* Where we are in the data, and the end of the current string.  */
3515   const char *d, *dend;
3516 
3517   /* Where we are in the pattern, and the end of the pattern.  */
3518   unsigned char *p = bufp->buffer;
3519   register unsigned char *pend = p + bufp->used;
3520 
3521   /* Mark the opcode just after a start_memory, so we can test for an
3522 	empty subpattern when we get to the stop_memory.  */
3523   unsigned char *just_past_start_mem = 0;
3524 
3525   /* We use this to map every character in the string.  */
3526   char *translate = bufp->translate;
3527 
3528   /* Failure point stack.  Each place that can handle a failure further
3529 	down the line pushes a failure point on this stack.  It consists of
3530 	restart, regend, and reg_info for all registers corresponding to
3531 	the subexpressions we're currently inside, plus the number of such
3532 	registers, and, finally, two char *'s.  The first char * is where
3533 	to resume scanning the pattern; the second one is where to resume
3534 	scanning the strings.  If the latter is zero, the failure point is
3535 	a ``dummy''; if a failure happens and the failure point is a dummy,
3536 	it gets discarded and the next next one is tried.  */
3537 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3538   fail_stack_type fail_stack;
3539 #endif
3540 #ifdef DEBUG
3541   static unsigned failure_id = 0;
3542   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3543 #endif
3544 
3545 #ifdef REL_ALLOC
3546   /* This holds the pointer to the failure stack, when
3547 	it is allocated relocatably.  */
3548   fail_stack_elt_t *failure_stack_ptr;
3549 #endif
3550 
3551   /* We fill all the registers internally, independent of what we
3552 	return, for use in backreferences.  The number here includes
3553 	an element for register zero.  */
3554   unsigned num_regs = bufp->re_nsub + 1;
3555 
3556   /* The currently active registers.  */
3557   unsigned lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3558   unsigned highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3559 
3560   /* Information on the contents of registers. These are pointers into
3561 	the input strings; they record just what was matched (on this
3562 	attempt) by a subexpression part of the pattern, that is, the
3563 	regnum-th regstart pointer points to where in the pattern we began
3564 	matching and the regnum-th regend points to right after where we
3565 	stopped matching the regnum-th subexpression.  (The zeroth register
3566 	keeps track of what the whole pattern matches.)  */
3567 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3568   const char **regstart, **regend;
3569 #endif
3570 
3571   /* If a group that's operated upon by a repetition operator fails to
3572 	match anything, then the register for its start will need to be
3573 	restored because it will have been set to wherever in the string we
3574 	are when we last see its open-group operator.  Similarly for a
3575 	register's end.  */
3576 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3577   const char **old_regstart, **old_regend;
3578 #endif
3579 
3580   /* The is_active field of reg_info helps us keep track of which (possibly
3581 	nested) subexpressions we are currently in. The matched_something
3582 	field of reg_info[reg_num] helps us tell whether or not we have
3583 	matched any of the pattern so far this time through the reg_num-th
3584 	subexpression.  These two fields get reset each time through any
3585 	loop their register is in.  */
3586 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3587   register_info_type *reg_info;
3588 #endif
3589 
3590   /* The following record the register info as found in the above
3591 	variables when we find a match better than any we've seen before.
3592 	This happens as we backtrack through the failure points, which in
3593 	turn happens only if we have not yet matched the entire string. */
3594   unsigned best_regs_set = false;
3595 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3596   const char **best_regstart, **best_regend;
3597 #endif
3598 
3599   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3600 	allocate space for that if we're not allocating space for anything
3601 	else (see below).  Also, we never need info about register 0 for
3602 	any of the other register vectors, and it seems rather a kludge to
3603 	treat `best_regend' differently than the rest.  So we keep track of
3604 	the end of the best match so far in a separate variable.  We
3605 	initialize this to NULL so that when we backtrack the first time
3606 	and need to test it, it's not garbage.  */
3607   const char *match_end = NULL;
3608 
3609   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3610   int set_regs_matched_done = 0;
3611 
3612   /* Used when we pop values we don't care about.  */
3613 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3614   const char **reg_dummy;
3615   register_info_type *reg_info_dummy;
3616 #endif
3617 
3618 #ifdef DEBUG
3619   /* Counts the total number of registers pushed.  */
3620   unsigned num_regs_pushed = 0;
3621 #endif
3622 
3623   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3624 
3625   INIT_FAIL_STACK ();
3626 
3627 #ifdef MATCH_MAY_ALLOCATE
3628   /* Do not bother to initialize all the register variables if there are
3629 	no groups in the pattern, as it takes a fair amount of time.  If
3630 	there are groups, we include space for register 0 (the whole
3631 	pattern), even though we never use it, since it simplifies the
3632 	array indexing.  We should fix this.  */
3633   if (bufp->re_nsub)
3634     {
3635 	 regstart = REGEX_TALLOC (num_regs, const char *);
3636 	 regend = REGEX_TALLOC (num_regs, const char *);
3637 	 old_regstart = REGEX_TALLOC (num_regs, const char *);
3638 	 old_regend = REGEX_TALLOC (num_regs, const char *);
3639 	 best_regstart = REGEX_TALLOC (num_regs, const char *);
3640 	 best_regend = REGEX_TALLOC (num_regs, const char *);
3641 	 reg_info = REGEX_TALLOC (num_regs, register_info_type);
3642 	 reg_dummy = REGEX_TALLOC (num_regs, const char *);
3643 	 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3644 
3645 	 if (!(regstart && regend && old_regstart && old_regend && reg_info
3646 		  && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3647 	   {
3648 		FREE_VARIABLES ();
3649 		return -2;
3650 	   }
3651     }
3652   else
3653     {
3654 	 /* We must initialize all our variables to NULL, so that
3655 	    `FREE_VARIABLES' doesn't try to free them.  */
3656 	 regstart = regend = old_regstart = old_regend = best_regstart
3657 	   = best_regend = reg_dummy = NULL;
3658 	 reg_info = reg_info_dummy = (register_info_type *) NULL;
3659     }
3660 #endif /* MATCH_MAY_ALLOCATE */
3661 
3662   /* The starting position is bogus.  */
3663   if (pos < 0 || pos > size1 + size2)
3664     {
3665 	 FREE_VARIABLES ();
3666 	 return -1;
3667     }
3668 
3669   /* Initialize subexpression text positions to -1 to mark ones that no
3670 	start_memory/stop_memory has been seen for. Also initialize the
3671 	register information struct.  */
3672   for (mcnt = 1; mcnt < num_regs; mcnt++)
3673     {
3674 	 regstart[mcnt] = regend[mcnt]
3675 	   = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3676 
3677 	 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3678 	 IS_ACTIVE (reg_info[mcnt]) = 0;
3679 	 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3680 	 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3681     }
3682 
3683   /* We move `string1' into `string2' if the latter's empty -- but not if
3684 	`string1' is null.  */
3685   if (size2 == 0 && string1 != NULL)
3686     {
3687 	 string2 = string1;
3688 	 size2 = size1;
3689 	 string1 = 0;
3690 	 size1 = 0;
3691     }
3692   end1 = string1 + size1;
3693   end2 = string2 + size2;
3694 
3695   /* Compute where to stop matching, within the two strings.  */
3696   if (stop <= size1)
3697     {
3698 	 end_match_1 = string1 + stop;
3699 	 end_match_2 = string2;
3700     }
3701   else
3702     {
3703 	 end_match_1 = end1;
3704 	 end_match_2 = string2 + stop - size1;
3705     }
3706 
3707   /* `p' scans through the pattern as `d' scans through the data.
3708 	`dend' is the end of the input string that `d' points within.  `d'
3709 	is advanced into the following input string whenever necessary, but
3710 	this happens before fetching; therefore, at the beginning of the
3711 	loop, `d' can be pointing at the end of a string, but it cannot
3712 	equal `string2'.  */
3713   if (size1 > 0 && pos <= size1)
3714     {
3715 	 d = string1 + pos;
3716 	 dend = end_match_1;
3717     }
3718   else
3719     {
3720 	 d = string2 + pos - size1;
3721 	 dend = end_match_2;
3722     }
3723 
3724   DEBUG_PRINT1 ("The compiled pattern is: ");
3725   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3726   DEBUG_PRINT1 ("The string to match is: `");
3727   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3728   DEBUG_PRINT1 ("'\n");
3729 
3730   /* This loops over pattern commands.  It exits by returning from the
3731 	function if the match is complete, or it drops through if the match
3732 	fails at this starting point in the input data.  */
3733   for (;;)
3734     {
3735 	 DEBUG_PRINT2 ("\n0x%x: ", p);
3736 
3737 	 if (p == pend)
3738 	{ /* End of pattern means we might have succeeded.  */
3739 		DEBUG_PRINT1 ("end of pattern ... ");
3740 
3741 	  /* If we haven't matched the entire string, and we want the
3742 		   longest match, try backtracking.  */
3743 		if (d != end_match_2)
3744 	    {
3745 		 /* 1 if this match ends in the same string (string1 or string2)
3746 		 as the best previous match.  */
3747 		 boolean same_str_p = (FIRST_STRING_P (match_end)
3748 				    == MATCHING_IN_FIRST_STRING);
3749 		 /* 1 if this match is the best seen so far.  */
3750 		 boolean best_match_p;
3751 
3752 		 /* AIX compiler got confused when this was combined
3753 		 with the previous declaration.  */
3754 		 if (same_str_p)
3755 		best_match_p = d > match_end;
3756 		 else
3757 		best_match_p = !MATCHING_IN_FIRST_STRING;
3758 
3759 		    DEBUG_PRINT1 ("backtracking.\n");
3760 
3761 		    if (!FAIL_STACK_EMPTY ())
3762 			 { /* More failure points to try.  */
3763 
3764 			   /* If exceeds best match so far, save it.  */
3765 			   if (!best_regs_set || best_match_p)
3766 				{
3767 				  best_regs_set = true;
3768 				  match_end = d;
3769 
3770 				  DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
3771 
3772 				  for (mcnt = 1; mcnt < num_regs; mcnt++)
3773 				    {
3774 					 best_regstart[mcnt] = regstart[mcnt];
3775 					 best_regend[mcnt] = regend[mcnt];
3776 				    }
3777 				}
3778 			   goto fail;
3779 			 }
3780 
3781 		    /* If no failure points, don't restore garbage.  And if
3782 			  last match is real best match, don't restore second
3783 			  best one. */
3784 		    else if (best_regs_set && !best_match_p)
3785 			 {
3786 		   restore_best_regs:
3787 			   /* Restore best match.  It may happen that `dend ==
3788 				 end_match_1' while the restored d is in string2.
3789 				 For example, the pattern `x.*y.*z' against the
3790 				 strings `x-' and `y-z-', if the two strings are
3791 				 not consecutive in memory.  */
3792 			   DEBUG_PRINT1 ("Restoring best registers.\n");
3793 
3794 			   d = match_end;
3795 			   dend = ((d >= string1 && d <= end1)
3796 				 ? end_match_1 : end_match_2);
3797 
3798 		  for (mcnt = 1; mcnt < num_regs; mcnt++)
3799 		    {
3800 			 regstart[mcnt] = best_regstart[mcnt];
3801 			 regend[mcnt] = best_regend[mcnt];
3802 		    }
3803 			 }
3804 		  } /* d != end_match_2 */
3805 
3806 	succeed_label:
3807 		DEBUG_PRINT1 ("Accepting match.\n");
3808 
3809 		/* If caller wants register contents data back, do it.  */
3810 		if (regs && !bufp->no_sub)
3811 	    {
3812 		    /* Have the register data arrays been allocated?  */
3813 		    if (bufp->regs_allocated == REGS_UNALLOCATED)
3814 			 { /* No.  So allocate them with malloc.  We need one
3815 				 extra element beyond `num_regs' for the `-1' marker
3816 				 GNU code uses.  */
3817 			   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
3818 			   regs->start = TALLOC (regs->num_regs, regoff_t);
3819 			   regs->end = TALLOC (regs->num_regs, regoff_t);
3820 			   if (regs->start == NULL || regs->end == NULL)
3821 		    {
3822 			 FREE_VARIABLES ();
3823 			 return -2;
3824 		    }
3825 			   bufp->regs_allocated = REGS_REALLOCATE;
3826 			 }
3827 		    else if (bufp->regs_allocated == REGS_REALLOCATE)
3828 			 { /* Yes.  If we need more elements than were already
3829 				 allocated, reallocate them.  If we need fewer, just
3830 				 leave it alone.  */
3831 			   if (regs->num_regs < num_regs + 1)
3832 				{
3833 				  regs->num_regs = num_regs + 1;
3834 				  RETALLOC (regs->start, regs->num_regs, regoff_t);
3835 				  RETALLOC (regs->end, regs->num_regs, regoff_t);
3836 				  if (regs->start == NULL || regs->end == NULL)
3837 			{
3838 			  FREE_VARIABLES ();
3839 			  return -2;
3840 			}
3841 				}
3842 			 }
3843 		    else
3844 		{
3845 		  /* These braces fend off a "empty body in an else-statement"
3846 			warning under GCC when assert expands to nothing.  */
3847 		  assert (bufp->regs_allocated == REGS_FIXED);
3848 		}
3849 
3850 		    /* Convert the pointer data in `regstart' and `regend' to
3851 			  indices.  Register zero has to be set differently,
3852 			  since we haven't kept track of any info for it.  */
3853 		    if (regs->num_regs > 0)
3854 			 {
3855 			   regs->start[0] = pos;
3856 			   regs->end[0] = (MATCHING_IN_FIRST_STRING
3857 				  ? ((regoff_t) (d - string1))
3858 					: ((regoff_t) (d - string2 + size1)));
3859 			 }
3860 
3861 		    /* Go through the first `min (num_regs, regs->num_regs)'
3862 			  registers, since that is all we initialized.  */
3863 		 for (mcnt = 1; mcnt < MIN (num_regs, regs->num_regs); mcnt++)
3864 		{
3865 			   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
3866 				regs->start[mcnt] = regs->end[mcnt] = -1;
3867 			   else
3868 				{
3869 			 regs->start[mcnt]
3870 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
3871 				  regs->end[mcnt]
3872 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
3873 				}
3874 		}
3875 
3876 		    /* If the regs structure we return has more elements than
3877 			  were in the pattern, set the extra elements to -1.  If
3878 			  we (re)allocated the registers, this is the case,
3879 			  because we always allocate enough to have at least one
3880 			  -1 at the end.  */
3881 		    for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3882 			 regs->start[mcnt] = regs->end[mcnt] = -1;
3883 	    } /* regs && !bufp->no_sub */
3884 
3885 		DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
3886 				    nfailure_points_pushed, nfailure_points_popped,
3887 				    nfailure_points_pushed - nfailure_points_popped);
3888 		DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
3889 
3890 		mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3891 			    ? string1
3892 			    : string2 - size1);
3893 
3894 		DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
3895 
3896 		FREE_VARIABLES ();
3897 		return mcnt;
3898 	   }
3899 
3900 	 /* Otherwise match next pattern command.  */
3901 	 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3902 	{
3903 	   /* Ignore these.  Used to ignore the n of succeed_n's which
3904 		 currently have n == 0.  */
3905 	   case no_op:
3906 		DEBUG_PRINT1 ("EXECUTING no_op.\n");
3907 		break;
3908 
3909 	case succeed:
3910 		DEBUG_PRINT1 ("EXECUTING succeed.\n");
3911 	  goto succeed_label;
3912 
3913 	   /* Match the next n pattern characters exactly.  The following
3914 		 byte in the pattern defines n, and the n bytes after that
3915 		 are the characters to match.  */
3916 	case exactn:
3917 	  mcnt = *p++;
3918 		DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
3919 
3920 		/* This is written out as an if-else so we don't waste time
3921 		   testing `translate' inside the loop.  */
3922 		if (translate)
3923 	    {
3924 		 do
3925 		{
3926 		  PREFETCH ();
3927 		  if (translate[(unsigned char) *d++] != (char) *p++)
3928 				goto fail;
3929 		}
3930 		 while (--mcnt);
3931 	    }
3932 	  else
3933 	    {
3934 		 do
3935 		{
3936 		  PREFETCH ();
3937 		  if (*d++ != (char) *p++) goto fail;
3938 		}
3939 		 while (--mcnt);
3940 	    }
3941 	  SET_REGS_MATCHED ();
3942 		break;
3943 
3944 
3945 	   /* Match any character except possibly a newline or a null.  */
3946 	case anychar:
3947 		DEBUG_PRINT1 ("EXECUTING anychar.\n");
3948 
3949 		PREFETCH ();
3950 
3951 		if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
3952 		    || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
3953 	    goto fail;
3954 
3955 		SET_REGS_MATCHED ();
3956 		DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
3957 		d++;
3958 	  break;
3959 
3960 
3961 	case charset:
3962 	case charset_not:
3963 	  {
3964 	    register unsigned char c;
3965 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
3966 
3967 		  DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
3968 
3969 	    PREFETCH ();
3970 	    c = TRANSLATE (*d); /* The character to match.  */
3971 
3972 		  /* Cast to `unsigned' instead of `unsigned char' in case the
3973 			bit list is a full 32 bytes long.  */
3974 	    if (c < (unsigned) (*p * BYTEWIDTH)
3975 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3976 		 not = !not;
3977 
3978 	    p += 1 + *p;
3979 
3980 	    if (!not) goto fail;
3981 
3982 	    SET_REGS_MATCHED ();
3983 		  d++;
3984 	    break;
3985 	  }
3986 
3987 
3988 	   /* The beginning of a group is represented by start_memory.
3989 		 The arguments are the register number in the next byte, and the
3990 		 number of groups inner to this one in the next.  The text
3991 		 matched within the group is recorded (in the internal
3992 		 registers data structure) under the register number.  */
3993 	   case start_memory:
3994 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3995 
3996 		/* Find out if this group can match the empty string.  */
3997 	  p1 = p;		/* To send to group_match_null_string_p.  */
3998 
3999 		if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4000 		  REG_MATCH_NULL_STRING_P (reg_info[*p])
4001 		    = group_match_null_string_p (&p1, pend, reg_info);
4002 
4003 		/* Save the position in the string where we were the last time
4004 		   we were at this open-group operator in case the group is
4005 		   operated upon by a repetition operator, e.g., with `(a*)*b'
4006 		   against `ab'; then we want to ignore where we are now in
4007 		   the string in case this attempt to match fails.  */
4008 		old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4009 					    ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4010 					    : regstart[*p];
4011 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4012 			 POINTER_TO_OFFSET (old_regstart[*p]));
4013 
4014 		regstart[*p] = d;
4015 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4016 
4017 		IS_ACTIVE (reg_info[*p]) = 1;
4018 		MATCHED_SOMETHING (reg_info[*p]) = 0;
4019 
4020 	  /* Clear this whenever we change the register activity status.  */
4021 	  set_regs_matched_done = 0;
4022 
4023 		/* This is the new highest active register.  */
4024 		highest_active_reg = *p;
4025 
4026 		/* If nothing was active before, this is the new lowest active
4027 		   register.  */
4028 		if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4029 		  lowest_active_reg = *p;
4030 
4031 		/* Move past the register number and inner group count.  */
4032 		p += 2;
4033 	  just_past_start_mem = p;
4034 
4035 		break;
4036 
4037 
4038 	   /* The stop_memory opcode represents the end of a group.  Its
4039 		 arguments are the same as start_memory's: the register
4040 		 number, and the number of inner groups.  */
4041 	case stop_memory:
4042 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4043 
4044 		/* We need to save the string position the last time we were at
4045 		   this close-group operator in case the group is operated
4046 		   upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4047 		   against `aba'; then we want to ignore where we are now in
4048 		   the string in case this attempt to match fails.  */
4049 		old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4050 					  ? REG_UNSET (regend[*p]) ? d : regend[*p]
4051 			   : regend[*p];
4052 	  DEBUG_PRINT2 ("      old_regend: %d\n",
4053 			 POINTER_TO_OFFSET (old_regend[*p]));
4054 
4055 		regend[*p] = d;
4056 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4057 
4058 		/* This register isn't active anymore.  */
4059 		IS_ACTIVE (reg_info[*p]) = 0;
4060 
4061 	  /* Clear this whenever we change the register activity status.  */
4062 	  set_regs_matched_done = 0;
4063 
4064 		/* If this was the only register active, nothing is active
4065 		   anymore.  */
4066 		if (lowest_active_reg == highest_active_reg)
4067 		  {
4068 		    lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4069 		    highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4070 		  }
4071 		else
4072 		  { /* We must scan for the new highest active register, since
4073 			  it isn't necessarily one less than now: consider
4074 			  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4075 			  new highest active register is 1.  */
4076 		    unsigned char r = *p - 1;
4077 		    while (r > 0 && !IS_ACTIVE (reg_info[r]))
4078 			 r--;
4079 
4080 		    /* If we end up at register zero, that means that we saved
4081 			  the registers as the result of an `on_failure_jump', not
4082 			  a `start_memory', and we jumped to past the innermost
4083 			  `stop_memory'.  For example, in ((.)*) we save
4084 			  registers 1 and 2 as a result of the *, but when we pop
4085 			  back to the second ), we are at the stop_memory 1.
4086 			  Thus, nothing is active.  */
4087 		 if (r == 0)
4088 			 {
4089 			   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4090 			   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4091 			 }
4092 		    else
4093 			 highest_active_reg = r;
4094 		  }
4095 
4096 		/* If just failed to match something this time around with a
4097 		   group that's operated on by a repetition operator, try to
4098 		   force exit from the ``loop'', and restore the register
4099 		   information for this group that we had before trying this
4100 		   last match.  */
4101 		if ((!MATCHED_SOMETHING (reg_info[*p])
4102 			|| just_past_start_mem == p - 1)
4103 		 && (p + 2) < pend)
4104 		  {
4105 		    boolean is_a_jump_n = false;
4106 
4107 		    p1 = p + 2;
4108 		    mcnt = 0;
4109 		    switch ((re_opcode_t) *p1++)
4110 			 {
4111 			   case jump_n:
4112 		    is_a_jump_n = true;
4113 			   case pop_failure_jump:
4114 		  case maybe_pop_jump:
4115 		  case jump:
4116 		  case dummy_failure_jump:
4117 				EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4118 		    if (is_a_jump_n)
4119 			 p1 += 2;
4120 				break;
4121 
4122 			   default:
4123 				/* do nothing */ ;
4124 			 }
4125 		 p1 += mcnt;
4126 
4127 		    /* If the next operation is a jump backwards in the pattern
4128 		    to an on_failure_jump right before the start_memory
4129 			  corresponding to this stop_memory, exit from the loop
4130 			  by forcing a failure after pushing on the stack the
4131 			  on_failure_jump's jump in the pattern, and d.  */
4132 		    if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4133 			   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4134 		{
4135 			   /* If this group ever matched anything, then restore
4136 				 what its registers were before trying this last
4137 				 failed match, e.g., with `(a*)*b' against `ab' for
4138 				 regstart[1], and, e.g., with `((a*)*(b*)*)*'
4139 				 against `aba' for regend[3].
4140 
4141 				 Also restore the registers for inner groups for,
4142 				 e.g., `((a*)(b*))*' against `aba' (register 3 would
4143 				 otherwise get trashed).  */
4144 
4145 			   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4146 		    {
4147 			 unsigned r;
4148 
4149 				  EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4150 
4151 			 /* Restore this and inner groups' (if any) registers.  */
4152 				  for (r = *p; r < *p + *(p + 1); r++)
4153 				    {
4154 					 regstart[r] = old_regstart[r];
4155 
4156 					 /* xx why this test?  */
4157 					 if (old_regend[r] >= regstart[r])
4158 					   regend[r] = old_regend[r];
4159 				    }
4160 				}
4161 		  p1++;
4162 			   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4163 			   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4164 
4165 			   goto fail;
4166 			 }
4167 		  }
4168 
4169 		/* Move past the register number and the inner group count.  */
4170 		p += 2;
4171 		break;
4172 
4173 
4174 	/* \<digit> has been turned into a `duplicate' command which is
4175 		 followed by the numeric value of <digit> as the register number.  */
4176 	   case duplicate:
4177 	  {
4178 	    register const char *d2, *dend2;
4179 	    int regno = *p++;   /* Get which register to match against.  */
4180 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4181 
4182 	    /* Can't back reference a group which we've never matched.  */
4183 		  if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4184 		    goto fail;
4185 
4186 		  /* Where in input to try to start matching.  */
4187 		  d2 = regstart[regno];
4188 
4189 		  /* Where to stop matching; if both the place to start and
4190 			the place to stop matching are in the same string, then
4191 			set to the place to stop, otherwise, for now have to use
4192 			the end of the first string.  */
4193 
4194 		  dend2 = ((FIRST_STRING_P (regstart[regno])
4195 			 == FIRST_STRING_P (regend[regno]))
4196 			? regend[regno] : end_match_1);
4197 	    for (;;)
4198 		 {
4199 		/* If necessary, advance to next segment in register
4200 			    contents.  */
4201 		while (d2 == dend2)
4202 		  {
4203 		    if (dend2 == end_match_2) break;
4204 		    if (dend2 == regend[regno]) break;
4205 
4206 				/* End of string1 => advance to string2. */
4207 				d2 = string2;
4208 				dend2 = regend[regno];
4209 		  }
4210 		/* At end of register contents => success */
4211 		if (d2 == dend2) break;
4212 
4213 		/* If necessary, advance to next segment in data.  */
4214 		PREFETCH ();
4215 
4216 		/* How many characters left in this segment to match.  */
4217 		mcnt = dend - d;
4218 
4219 		/* Want how many consecutive characters we can match in
4220 			    one shot, so, if necessary, adjust the count.  */
4221 			 if (mcnt > dend2 - d2)
4222 		  mcnt = dend2 - d2;
4223 
4224 		/* Compare that many; failure if mismatch, else move
4225 			    past them.  */
4226 		if (translate
4227 				? bcmp_translate (d, d2, mcnt, translate)
4228 				: bcmp (d, d2, mcnt))
4229 		  goto fail;
4230 		d += mcnt, d2 += mcnt;
4231 
4232 		/* Do this because we've match some characters.  */
4233 		SET_REGS_MATCHED ();
4234 		 }
4235 	  }
4236 	  break;
4237 
4238 
4239 	   /* begline matches the empty string at the beginning of the string
4240 		 (unless `not_bol' is set in `bufp'), and, if
4241 		 `newline_anchor' is set, after newlines.  */
4242 	case begline:
4243 		DEBUG_PRINT1 ("EXECUTING begline.\n");
4244 
4245 		if (AT_STRINGS_BEG (d))
4246 		  {
4247 		    if (!bufp->not_bol) break;
4248 		  }
4249 		else if (d[-1] == '\n' && bufp->newline_anchor)
4250 		  {
4251 		    break;
4252 		  }
4253 		/* In all other cases, we fail.  */
4254 		goto fail;
4255 
4256 
4257 	   /* endline is the dual of begline.  */
4258 	case endline:
4259 		DEBUG_PRINT1 ("EXECUTING endline.\n");
4260 
4261 		if (AT_STRINGS_END (d))
4262 		  {
4263 		    if (!bufp->not_eol) break;
4264 		  }
4265 
4266 		/* We have to ``prefetch'' the next character.  */
4267 		else if ((d == end1 ? *string2 : *d) == '\n'
4268 			    && bufp->newline_anchor)
4269 		  {
4270 		    break;
4271 		  }
4272 		goto fail;
4273 
4274 
4275 	/* Match at the very beginning of the data.  */
4276 	   case begbuf:
4277 		DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4278 		if (AT_STRINGS_BEG (d))
4279 		  break;
4280 		goto fail;
4281 
4282 
4283 	/* Match at the very end of the data.  */
4284 	   case endbuf:
4285 		DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4286 	  if (AT_STRINGS_END (d))
4287 	    break;
4288 		goto fail;
4289 
4290 
4291 	   /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4292 		 pushes NULL as the value for the string on the stack.  Then
4293 		 `pop_failure_point' will keep the current value for the
4294 		 string, instead of restoring it.  To see why, consider
4295 		 matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4296 		 then the . fails against the \n.  But the next thing we want
4297 		 to do is match the \n against the \n; if we restored the
4298 		 string value, we would be back at the foo.
4299 
4300 		 Because this is used only in specific cases, we don't need to
4301 		 check all the things that `on_failure_jump' does, to make
4302 		 sure the right things get saved on the stack.  Hence we don't
4303 		 share its code.  The only reason to push anything on the
4304 		 stack at all is that otherwise we would have to change
4305 		 `anychar's code to do something besides goto fail in this
4306 		 case; that seems worse than this.  */
4307 	   case on_failure_keep_string_jump:
4308 		DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4309 
4310 		EXTRACT_NUMBER_AND_INCR (mcnt, p);
4311 		DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4312 
4313 		PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4314 		break;
4315 
4316 
4317 	/* Uses of on_failure_jump:
4318 
4319 		 Each alternative starts with an on_failure_jump that points
4320 		 to the beginning of the next alternative.  Each alternative
4321 		 except the last ends with a jump that in effect jumps past
4322 		 the rest of the alternatives.  (They really jump to the
4323 		 ending jump of the following alternative, because tensioning
4324 		 these jumps is a hassle.)
4325 
4326 		 Repeats start with an on_failure_jump that points past both
4327 		 the repetition text and either the following jump or
4328 		 pop_failure_jump back to this on_failure_jump.  */
4329 	case on_failure_jump:
4330 	   on_failure:
4331 		DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4332 
4333 		EXTRACT_NUMBER_AND_INCR (mcnt, p);
4334 		DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4335 
4336 		/* If this on_failure_jump comes right before a group (i.e.,
4337 		   the original * applied to a group), save the information
4338 		   for that group and all inner ones, so that if we fail back
4339 		   to this point, the group's information will be correct.
4340 		   For example, in \(a*\)*\1, we need the preceding group,
4341 		   and in \(\(a*\)b*\)\2, we need the inner group.  */
4342 
4343 		/* We can't use `p' to check ahead because we push
4344 		   a failure point to `p + mcnt' after we do this.  */
4345 		p1 = p;
4346 
4347 		/* We need to skip no_op's before we look for the
4348 		   start_memory in case this on_failure_jump is happening as
4349 		   the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4350 		   against aba.  */
4351 		while (p1 < pend && (re_opcode_t) *p1 == no_op)
4352 		  p1++;
4353 
4354 		if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4355 		  {
4356 		    /* We have a new highest active register now.  This will
4357 			  get reset at the start_memory we are about to get to,
4358 			  but we will have saved all the registers relevant to
4359 			  this repetition op, as described above.  */
4360 		    highest_active_reg = *(p1 + 1) + *(p1 + 2);
4361 		    if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4362 			 lowest_active_reg = *(p1 + 1);
4363 		  }
4364 
4365 		DEBUG_PRINT1 (":\n");
4366 		PUSH_FAILURE_POINT (p + mcnt, d, -2);
4367 		break;
4368 
4369 
4370 	   /* A smart repeat ends with `maybe_pop_jump'.
4371 	   We change it to either `pop_failure_jump' or `jump'.  */
4372 	   case maybe_pop_jump:
4373 		EXTRACT_NUMBER_AND_INCR (mcnt, p);
4374 		DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4375 		{
4376 	    register unsigned char *p2 = p;
4377 
4378 		  /* Compare the beginning of the repeat with what in the
4379 			pattern follows its end. If we can establish that there
4380 			is nothing that they would both match, i.e., that we
4381 			would have to backtrack because of (as in, e.g., `a*a')
4382 			then we can change to pop_failure_jump, because we'll
4383 			never have to backtrack.
4384 
4385 			This is not true in the case of alternatives: in
4386 			`(a|ab)*' we do need to backtrack to the `ab' alternative
4387 			(e.g., if the string was `ab').  But instead of trying to
4388 			detect that here, the alternative has put on a dummy
4389 			failure point which is what we will end up popping.  */
4390 
4391 	    /* Skip over open/close-group commands.
4392 		  If what follows this loop is a ...+ construct,
4393 		  look at what begins its body, since we will have to
4394 		  match at least one of that.  */
4395 	    while (1)
4396 		 {
4397 		if (p2 + 2 < pend
4398 		    && ((re_opcode_t) *p2 == stop_memory
4399 			|| (re_opcode_t) *p2 == start_memory))
4400 		  p2 += 3;
4401 		else if (p2 + 6 < pend
4402 			 && (re_opcode_t) *p2 == dummy_failure_jump)
4403 		  p2 += 6;
4404 		else
4405 		  break;
4406 		 }
4407 
4408 	    p1 = p + mcnt;
4409 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4410 		  to the `maybe_finalize_jump' of this case.  Examine what
4411 		  follows.  */
4412 
4413 		  /* If we're at the end of the pattern, we can change.  */
4414 		  if (p2 == pend)
4415 		 {
4416 		/* Consider what happens when matching ":\(.*\)"
4417 		   against ":/".  I don't really understand this code
4418 		   yet.  */
4419 		   p[-3] = (unsigned char) pop_failure_jump;
4420 			 DEBUG_PRINT1
4421 			   ("  End of pattern: change to `pop_failure_jump'.\n");
4422 		    }
4423 
4424 		  else if ((re_opcode_t) *p2 == exactn
4425 			|| (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4426 		 {
4427 		register unsigned char c
4428 			   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4429 
4430 			 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4431 			   {
4432 		    p[-3] = (unsigned char) pop_failure_jump;
4433 				DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4434 						    c, p1[5]);
4435 			   }
4436 
4437 		else if ((re_opcode_t) p1[3] == charset
4438 			 || (re_opcode_t) p1[3] == charset_not)
4439 		  {
4440 		    int not = (re_opcode_t) p1[3] == charset_not;
4441 
4442 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4443 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4444 			 not = !not;
4445 
4446 				/* `not' is equal to 1 if c would match, which means
4447 				    that we can't change to pop_failure_jump.  */
4448 		    if (!not)
4449 				  {
4450 			   p[-3] = (unsigned char) pop_failure_jump;
4451 				    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4452 				  }
4453 		  }
4454 		 }
4455 		  else if ((re_opcode_t) *p2 == charset)
4456 		 {
4457 #ifdef DEBUG
4458 		register unsigned char c
4459 			   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4460 #endif
4461 
4462 			 if ((re_opcode_t) p1[3] == exactn
4463 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4464 			  && (p2[1 + p1[4] / BYTEWIDTH]
4465 				 & (1 << (p1[4] % BYTEWIDTH)))))
4466 			   {
4467 		    p[-3] = (unsigned char) pop_failure_jump;
4468 				DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4469 						    c, p1[5]);
4470 			   }
4471 
4472 		else if ((re_opcode_t) p1[3] == charset_not)
4473 		  {
4474 		    int idx;
4475 		    /* We win if the charset_not inside the loop
4476 			  lists every character listed in the charset after.  */
4477 		    for (idx = 0; idx < (int) p2[1]; idx++)
4478 			 if (! (p2[2 + idx] == 0
4479 				|| (idx < (int) p1[4]
4480 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4481 			break;
4482 
4483 		    if (idx == p2[1])
4484 				  {
4485 			   p[-3] = (unsigned char) pop_failure_jump;
4486 				    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4487 				  }
4488 		  }
4489 		else if ((re_opcode_t) p1[3] == charset)
4490 		  {
4491 		    int idx;
4492 		    /* We win if the charset inside the loop
4493 			  has no overlap with the one after the loop.  */
4494 		    for (idx = 0;
4495 			 idx < (int) p2[1] && idx < (int) p1[4];
4496 			 idx++)
4497 			 if ((p2[2 + idx] & p1[5 + idx]) != 0)
4498 			break;
4499 
4500 		    if (idx == p2[1] || idx == p1[4])
4501 				  {
4502 			   p[-3] = (unsigned char) pop_failure_jump;
4503 				    DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4504 				  }
4505 		  }
4506 		 }
4507 	  }
4508 	  p -= 2;		/* Point at relative address again.  */
4509 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4510 	    {
4511 		 p[-1] = (unsigned char) jump;
4512 		    DEBUG_PRINT1 ("  Match => jump.\n");
4513 		 goto unconditional_jump;
4514 	    }
4515 	   /* Note fall through.  */
4516 
4517 
4518 	/* The end of a simple repeat has a pop_failure_jump back to
4519 		 its matching on_failure_jump, where the latter will push a
4520 		 failure point.  The pop_failure_jump takes off failure
4521 		 points put on by this pop_failure_jump's matching
4522 		 on_failure_jump; we got through the pattern to here from the
4523 		 matching on_failure_jump, so didn't fail.  */
4524 	   case pop_failure_jump:
4525 		{
4526 		  /* We need to pass separate storage for the lowest and
4527 			highest registers, even though we don't care about the
4528 			actual values.  Otherwise, we will restore only one
4529 			register from the stack, since lowest will == highest in
4530 			`pop_failure_point'.  */
4531 		  unsigned dummy_low_reg, dummy_high_reg;
4532 		  unsigned char *pdummy;
4533 		  const char *sdummy;
4534 
4535 		  DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4536 		  POP_FAILURE_POINT (sdummy, pdummy,
4537 						 dummy_low_reg, dummy_high_reg,
4538 						 reg_dummy, reg_dummy, reg_info_dummy);
4539 		}
4540 		/* Note fall through.  */
4541 
4542 
4543 	   /* Unconditionally jump (without popping any failure points).  */
4544 	   case jump:
4545 	unconditional_jump:
4546 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4547 		DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4548 	  p += mcnt;				/* Do the jump.  */
4549 		DEBUG_PRINT2 ("(to 0x%x).\n", p);
4550 	  break;
4551 
4552 
4553 	   /* We need this opcode so we can detect where alternatives end
4554 		 in `group_match_null_string_p' et al.  */
4555 	   case jump_past_alt:
4556 		DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4557 		goto unconditional_jump;
4558 
4559 
4560 	   /* Normally, the on_failure_jump pushes a failure point, which
4561 		 then gets popped at pop_failure_jump.  We will end up at
4562 		 pop_failure_jump, also, and with a pattern of, say, `a+', we
4563 		 are skipping over the on_failure_jump, so we have to push
4564 		 something meaningless for pop_failure_jump to pop.  */
4565 	   case dummy_failure_jump:
4566 		DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4567 		/* It doesn't matter what we push for the string here.  What
4568 		   the code at `fail' tests is the value for the pattern.  */
4569 		PUSH_FAILURE_POINT (0, 0, -2);
4570 		goto unconditional_jump;
4571 
4572 
4573 	   /* At the end of an alternative, we need to push a dummy failure
4574 		 point in case we are followed by a `pop_failure_jump', because
4575 		 we don't want the failure point for the alternative to be
4576 		 popped.  For example, matching `(a|ab)*' against `aab'
4577 		 requires that we match the `ab' alternative.  */
4578 	   case push_dummy_failure:
4579 		DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4580 		/* See comments just above at `dummy_failure_jump' about the
4581 		   two zeroes.  */
4582 		PUSH_FAILURE_POINT (0, 0, -2);
4583 		break;
4584 
4585 	   /* Have to succeed matching what follows at least n times.
4586 		 After that, handle like `on_failure_jump'.  */
4587 	   case succeed_n:
4588 		EXTRACT_NUMBER (mcnt, p + 2);
4589 		DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4590 
4591 		assert (mcnt >= 0);
4592 		/* Originally, this is how many times we HAVE to succeed.  */
4593 		if (mcnt > 0)
4594 		  {
4595 			mcnt--;
4596 		  p += 2;
4597 			STORE_NUMBER_AND_INCR (p, mcnt);
4598 			DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p, mcnt);
4599 		  }
4600 	  else if (mcnt == 0)
4601 		  {
4602 		    DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4603 		 p[2] = (unsigned char) no_op;
4604 		    p[3] = (unsigned char) no_op;
4605 		    goto on_failure;
4606 		  }
4607 		break;
4608 
4609 	   case jump_n:
4610 		EXTRACT_NUMBER (mcnt, p + 2);
4611 		DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4612 
4613 		/* Originally, this is how many times we CAN jump.  */
4614 		if (mcnt)
4615 		  {
4616 			mcnt--;
4617 			STORE_NUMBER (p + 2, mcnt);
4618 		  goto unconditional_jump;
4619 		  }
4620 		/* If don't have to jump any more, skip over the rest of command.  */
4621 	  else
4622 	    p += 4;
4623 		break;
4624 
4625 	case set_number_at:
4626 	  {
4627 		  DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4628 
4629 		  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4630 		  p1 = p + mcnt;
4631 		  EXTRACT_NUMBER_AND_INCR (mcnt, p);
4632 		  DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4633 	    STORE_NUMBER (p1, mcnt);
4634 		  break;
4635 		}
4636 
4637 	   case wordbound:
4638 		DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4639 		if (AT_WORD_BOUNDARY (d))
4640 	    break;
4641 		goto fail;
4642 
4643 	case notwordbound:
4644 		DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4645 	  if (AT_WORD_BOUNDARY (d))
4646 	    goto fail;
4647 		break;
4648 
4649 	case wordbeg:
4650 		DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4651 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4652 	    break;
4653 		goto fail;
4654 
4655 	case wordend:
4656 		DEBUG_PRINT1 ("EXECUTING wordend.\n");
4657 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4658 		    && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4659 	    break;
4660 		goto fail;
4661 
4662 #ifdef emacs
4663 	case before_dot:
4664 		DEBUG_PRINT1 ("EXECUTING before_dot.\n");
4665 	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
4666 	    goto fail;
4667 	  break;
4668 
4669 	case at_dot:
4670 		DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4671 	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
4672 	    goto fail;
4673 	  break;
4674 
4675 	case after_dot:
4676 		DEBUG_PRINT1 ("EXECUTING after_dot.\n");
4677 		if (PTR_CHAR_POS ((unsigned char *) d) <= point)
4678 	    goto fail;
4679 	  break;
4680 #if 0 /* not emacs19 */
4681 	case at_dot:
4682 		DEBUG_PRINT1 ("EXECUTING at_dot.\n");
4683 	  if (PTR_CHAR_POS ((unsigned char *) d) + 1 != point)
4684 	    goto fail;
4685 	  break;
4686 #endif /* not emacs19 */
4687 
4688 	case syntaxspec:
4689 		DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
4690 	  mcnt = *p++;
4691 	  goto matchsyntax;
4692 
4693 	   case wordchar:
4694 		DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
4695 	  mcnt = (int) Sword;
4696 	   matchsyntax:
4697 	  PREFETCH ();
4698 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
4699 	  d++;
4700 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
4701 	    goto fail;
4702 		SET_REGS_MATCHED ();
4703 	  break;
4704 
4705 	case notsyntaxspec:
4706 		DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
4707 	  mcnt = *p++;
4708 	  goto matchnotsyntax;
4709 
4710 	   case notwordchar:
4711 		DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
4712 	  mcnt = (int) Sword;
4713 	   matchnotsyntax:
4714 	  PREFETCH ();
4715 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
4716 	  d++;
4717 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
4718 	    goto fail;
4719 	  SET_REGS_MATCHED ();
4720 		break;
4721 
4722 #else /* not emacs */
4723 	case wordchar:
4724 		DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
4725 	  PREFETCH ();
4726 		if (!WORDCHAR_P (d))
4727 		  goto fail;
4728 	  SET_REGS_MATCHED ();
4729 		d++;
4730 	  break;
4731 
4732 	case notwordchar:
4733 		DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
4734 	  PREFETCH ();
4735 	  if (WORDCHAR_P (d))
4736 		  goto fail;
4737 		SET_REGS_MATCHED ();
4738 		d++;
4739 	  break;
4740 #endif /* not emacs */
4741 
4742 	   default:
4743 		abort ();
4744 	}
4745 	 continue;  /* Successfully executed one pattern command; keep going.  */
4746 
4747 
4748     /* We goto here if a matching operation fails. */
4749     fail:
4750 	 if (!FAIL_STACK_EMPTY ())
4751 	{ /* A restart point is known.  Restore to that state.  */
4752 		DEBUG_PRINT1 ("\nFAIL:\n");
4753 		POP_FAILURE_POINT (d, p,
4754 					    lowest_active_reg, highest_active_reg,
4755 					    regstart, regend, reg_info);
4756 
4757 		/* If this failure point is a dummy, try the next one.  */
4758 		if (!p)
4759 	    goto fail;
4760 
4761 		/* If we failed to the end of the pattern, don't examine *p.  */
4762 	  assert (p <= pend);
4763 		if (p < pend)
4764 		  {
4765 		    boolean is_a_jump_n = false;
4766 
4767 		    /* If failed to a backwards jump that's part of a repetition
4768 			  loop, need to pop this failure point and use the next one.  */
4769 		    switch ((re_opcode_t) *p)
4770 			 {
4771 			 case jump_n:
4772 			   is_a_jump_n = true;
4773 			 case maybe_pop_jump:
4774 			 case pop_failure_jump:
4775 			 case jump:
4776 			   p1 = p + 1;
4777 			   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4778 			   p1 += mcnt;
4779 
4780 			   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
4781 				  || (!is_a_jump_n
4782 					 && (re_opcode_t) *p1 == on_failure_jump))
4783 				goto fail;
4784 			   break;
4785 			 default:
4786 			   /* do nothing */ ;
4787 			 }
4788 		  }
4789 
4790 		if (d >= string1 && d <= end1)
4791 	    dend = end_match_1;
4792 	   }
4793 	 else
4794 	   break;   /* Matching at this starting point really fails.  */
4795     } /* for (;;) */
4796 
4797   if (best_regs_set)
4798     goto restore_best_regs;
4799 
4800   FREE_VARIABLES ();
4801 
4802   return -1;         			/* Failure to match.  */
4803 } /* re_match_2 */
4804 
4805 /* Subroutine definitions for re_match_2.  */
4806 
4807 
4808 /* We are passed P pointing to a register number after a start_memory.
4809 
4810    Return true if the pattern up to the corresponding stop_memory can
4811    match the empty string, and false otherwise.
4812 
4813    If we find the matching stop_memory, sets P to point to one past its number.
4814    Otherwise, sets P to an undefined byte less than or equal to END.
4815 
4816    We don't handle duplicates properly (yet).  */
4817 
4818 static boolean
group_match_null_string_p(p,end,reg_info)4819 group_match_null_string_p (p, end, reg_info)
4820     unsigned char **p, *end;
4821     register_info_type *reg_info;
4822 {
4823   int mcnt;
4824   /* Point to after the args to the start_memory.  */
4825   unsigned char *p1 = *p + 2;
4826 
4827   while (p1 < end)
4828     {
4829 	 /* Skip over opcodes that can match nothing, and return true or
4830 	 false, as appropriate, when we get to one that can't, or to the
4831 	    matching stop_memory.  */
4832 
4833 	 switch ((re_opcode_t) *p1)
4834 	   {
4835 	   /* Could be either a loop or a series of alternatives.  */
4836 	   case on_failure_jump:
4837 		p1++;
4838 		EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4839 
4840 		/* If the next operation is not a jump backwards in the
4841 		pattern.  */
4842 
4843 	  if (mcnt >= 0)
4844 	    {
4845 		    /* Go through the on_failure_jumps of the alternatives,
4846 			  seeing if any of the alternatives cannot match nothing.
4847 			  The last alternative starts with only a jump,
4848 			  whereas the rest start with on_failure_jump and end
4849 			  with a jump, e.g., here is the pattern for `a|b|c':
4850 
4851 			  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4852 			  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4853 			  /exactn/1/c
4854 
4855 			  So, we have to first go through the first (n-1)
4856 			  alternatives and then deal with the last one separately.  */
4857 
4858 
4859 		    /* Deal with the first (n-1) alternatives, which start
4860 			  with an on_failure_jump (see above) that jumps to right
4861 			  past a jump_past_alt.  */
4862 
4863 		    while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
4864 			 {
4865 			   /* `mcnt' holds how many bytes long the alternative
4866 				 is, including the ending `jump_past_alt' and
4867 				 its number.  */
4868 
4869 			   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4870 								  reg_info))
4871 				return false;
4872 
4873 			   /* Move to right after this alternative, including the
4874 			jump_past_alt.  */
4875 			   p1 += mcnt;
4876 
4877 			   /* Break if it's the beginning of an n-th alternative
4878 				 that doesn't begin with an on_failure_jump.  */
4879 			   if ((re_opcode_t) *p1 != on_failure_jump)
4880 				break;
4881 
4882 		  /* Still have to check that it's not an n-th
4883 			alternative that starts with an on_failure_jump.  */
4884 		  p1++;
4885 			   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4886 			   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
4887 				{
4888 			 /* Get to the beginning of the n-th alternative.  */
4889 				  p1 -= 3;
4890 				  break;
4891 				}
4892 			 }
4893 
4894 		    /* Deal with the last alternative: go back and get number
4895 			  of the `jump_past_alt' just before it.  `mcnt' contains
4896 			  the length of the alternative.  */
4897 		    EXTRACT_NUMBER (mcnt, p1 - 2);
4898 
4899 		    if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4900 			 return false;
4901 
4902 		    p1 += mcnt;	/* Get past the n-th alternative.  */
4903 		  } /* if mcnt > 0 */
4904 		break;
4905 
4906 
4907 	   case stop_memory:
4908 	  assert (p1[1] == **p);
4909 		*p = p1 + 2;
4910 		return true;
4911 
4912 
4913 	   default:
4914 		if (!common_op_match_null_string_p (&p1, end, reg_info))
4915 		  return false;
4916 	   }
4917     } /* while p1 < end */
4918 
4919   return false;
4920 } /* group_match_null_string_p */
4921 
4922 
4923 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4924    It expects P to be the first byte of a single alternative and END one
4925    byte past the last. The alternative can contain groups.  */
4926 
4927 static boolean
alt_match_null_string_p(p,end,reg_info)4928 alt_match_null_string_p (p, end, reg_info)
4929     unsigned char *p, *end;
4930     register_info_type *reg_info;
4931 {
4932   int mcnt;
4933   unsigned char *p1 = p;
4934 
4935   while (p1 < end)
4936     {
4937 	 /* Skip over opcodes that can match nothing, and break when we get
4938 	    to one that can't.  */
4939 
4940 	 switch ((re_opcode_t) *p1)
4941 	   {
4942 	/* It's a loop.  */
4943 	   case on_failure_jump:
4944 		p1++;
4945 		EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4946 		p1 += mcnt;
4947 		break;
4948 
4949 	default:
4950 		if (!common_op_match_null_string_p (&p1, end, reg_info))
4951 		  return false;
4952 	   }
4953     }  /* while p1 < end */
4954 
4955   return true;
4956 } /* alt_match_null_string_p */
4957 
4958 
4959 /* Deals with the ops common to group_match_null_string_p and
4960    alt_match_null_string_p.
4961 
4962    Sets P to one after the op and its arguments, if any.  */
4963 
4964 static boolean
common_op_match_null_string_p(p,end,reg_info)4965 common_op_match_null_string_p (p, end, reg_info)
4966     unsigned char **p, *end;
4967     register_info_type *reg_info;
4968 {
4969   int mcnt;
4970   boolean ret;
4971   int reg_no;
4972   unsigned char *p1 = *p;
4973 
4974   switch ((re_opcode_t) *p1++)
4975     {
4976     case no_op:
4977     case begline:
4978     case endline:
4979     case begbuf:
4980     case endbuf:
4981     case wordbeg:
4982     case wordend:
4983     case wordbound:
4984     case notwordbound:
4985 #ifdef emacs
4986     case before_dot:
4987     case at_dot:
4988     case after_dot:
4989 #endif
4990 	 break;
4991 
4992     case start_memory:
4993 	 reg_no = *p1;
4994 	 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
4995 	 ret = group_match_null_string_p (&p1, end, reg_info);
4996 
4997 	 /* Have to set this here in case we're checking a group which
4998 	    contains a group and a back reference to it.  */
4999 
5000 	 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5001 	   REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5002 
5003 	 if (!ret)
5004 	   return false;
5005 	 break;
5006 
5007     /* If this is an optimized succeed_n for zero times, make the jump.  */
5008     case jump:
5009 	 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5010 	 if (mcnt >= 0)
5011 	   p1 += mcnt;
5012 	 else
5013 	   return false;
5014 	 break;
5015 
5016     case succeed_n:
5017 	 /* Get to the number of times to succeed.  */
5018 	 p1 += 2;
5019 	 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5020 
5021 	 if (mcnt == 0)
5022 	   {
5023 		p1 -= 4;
5024 		EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5025 		p1 += mcnt;
5026 	   }
5027 	 else
5028 	   return false;
5029 	 break;
5030 
5031     case duplicate:
5032 	 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5033 	   return false;
5034 	 break;
5035 
5036     case set_number_at:
5037 	 p1 += 4;
5038 
5039     default:
5040 	 /* All other opcodes mean we cannot match the empty string.  */
5041 	 return false;
5042   }
5043 
5044   *p = p1;
5045   return true;
5046 } /* common_op_match_null_string_p */
5047 
5048 
5049 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5050    bytes; nonzero otherwise.  */
5051 
5052 static int
bcmp_translate(s1,s2,len,translate)5053 bcmp_translate (s1, s2, len, translate)
5054 	unsigned char *s1, *s2;
5055 	register int len;
5056 	char *translate;
5057 {
5058   register unsigned char *p1 = s1, *p2 = s2;
5059   while (len)
5060     {
5061 	 if (translate[*p1++] != translate[*p2++]) return 1;
5062 	 len--;
5063     }
5064   return 0;
5065 }
5066 
5067 /* Entry points for GNU code.  */
5068 
5069 /* re_compile_pattern is the GNU regular expression compiler: it
5070    compiles PATTERN (of length SIZE) and puts the result in BUFP.
5071    Returns 0 if the pattern was valid, otherwise an error string.
5072 
5073    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5074    are set in BUFP on entry.
5075 
5076    We call regex_compile to do the actual compilation.  */
5077 
5078 const char *
re_compile_pattern(pattern,length,bufp)5079 re_compile_pattern (pattern, length, bufp)
5080 	const char *pattern;
5081 	int length;
5082 	struct re_pattern_buffer *bufp;
5083 {
5084   reg_errcode_t ret;
5085 
5086   /* GNU code is written to assume at least RE_NREGS registers will be set
5087 	(and at least one extra will be -1).  */
5088   bufp->regs_allocated = REGS_UNALLOCATED;
5089 
5090   /* And GNU code determines whether or not to get register information
5091 	by passing null for the REGS argument to re_match, etc., not by
5092 	setting no_sub.  */
5093   bufp->no_sub = 0;
5094 
5095   /* Match anchors at newline.  */
5096   bufp->newline_anchor = 1;
5097 
5098   ret = regex_compile (pattern, length, re_syntax_options, bufp);
5099 
5100   if (!ret)
5101     return NULL;
5102   return gettext (re_error_msgid[(int) ret]);
5103 }
5104 
5105 /* Entry points compatible with 4.2 BSD regex library.  We don't define
5106    them unless specifically requested.  */
5107 
5108 #ifdef _REGEX_RE_COMP
5109 
5110 /* BSD has one and only one pattern buffer.  */
5111 static struct re_pattern_buffer re_comp_buf;
5112 
5113 char *
re_comp(s)5114 re_comp (s)
5115     const char *s;
5116 {
5117   reg_errcode_t ret;
5118 
5119   if (!s)
5120     {
5121 	 if (!re_comp_buf.buffer)
5122 	return gettext ("No previous regular expression");
5123 	 return 0;
5124     }
5125 
5126   if (!re_comp_buf.buffer)
5127     {
5128 	 re_comp_buf.buffer = (unsigned char *) malloc (200);
5129 	 if (re_comp_buf.buffer == NULL)
5130 	   return gettext (re_error_msgid[(int) REG_ESPACE]);
5131 	 re_comp_buf.allocated = 200;
5132 
5133 	 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5134 	 if (re_comp_buf.fastmap == NULL)
5135 	return gettext (re_error_msgid[(int) REG_ESPACE]);
5136     }
5137 
5138   /* Since `re_exec' always passes NULL for the `regs' argument, we
5139 	don't need to initialize the pattern buffer fields which affect it.  */
5140 
5141   /* Match anchors at newlines.  */
5142   re_comp_buf.newline_anchor = 1;
5143 
5144   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5145 
5146   if (!ret)
5147     return NULL;
5148 
5149   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5150   return (char *) gettext (re_error_msgid[(int) ret]);
5151 }
5152 
5153 
5154 int
re_exec(s)5155 re_exec (s)
5156     const char *s;
5157 {
5158   const int len = strlen (s);
5159   return
5160     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5161 }
5162 #endif /* _REGEX_RE_COMP */
5163 
5164 /* POSIX.2 functions.  Don't define these for Emacs.  */
5165 
5166 #ifndef emacs
5167 
5168 /* regcomp takes a regular expression as a string and compiles it.
5169 
5170    PREG is a regex_t *.  We do not expect any fields to be initialized,
5171    since POSIX says we shouldn't.  Thus, we set
5172 
5173 	`buffer' to the compiled pattern;
5174 	`used' to the length of the compiled pattern;
5175 	`syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5176 	  REG_EXTENDED bit in CFLAGS is set; otherwise, to
5177 	  RE_SYNTAX_POSIX_BASIC;
5178 	`newline_anchor' to REG_NEWLINE being set in CFLAGS;
5179 	`fastmap' and `fastmap_accurate' to zero;
5180 	`re_nsub' to the number of subexpressions in PATTERN.
5181 
5182    PATTERN is the address of the pattern string.
5183 
5184    CFLAGS is a series of bits which affect compilation.
5185 
5186 	If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5187 	use POSIX basic syntax.
5188 
5189 	If REG_NEWLINE is set, then . and [^...] don't match newline.
5190 	Also, regexec will try a match beginning after every newline.
5191 
5192 	If REG_ICASE is set, then we considers upper- and lowercase
5193 	versions of letters to be equivalent when matching.
5194 
5195 	If REG_NOSUB is set, then when PREG is passed to regexec, that
5196 	routine will report only success or failure, and nothing about the
5197 	registers.
5198 
5199    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
5200    the return codes and their meanings.)  */
5201 
5202 int
regcomp(preg,pattern,cflags)5203 regcomp (preg, pattern, cflags)
5204     regex_t *preg;
5205     const char *pattern;
5206     int cflags;
5207 {
5208   reg_errcode_t ret;
5209   unsigned syntax
5210     = (cflags & REG_EXTENDED) ?
5211 	 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5212 
5213   /* regex_compile will allocate the space for the compiled pattern.  */
5214   preg->buffer = 0;
5215   preg->allocated = 0;
5216   preg->used = 0;
5217 
5218   /* Don't bother to use a fastmap when searching.  This simplifies the
5219 	REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5220 	characters after newlines into the fastmap.  This way, we just try
5221 	every character.  */
5222   preg->fastmap = 0;
5223 
5224   if (cflags & REG_ICASE)
5225     {
5226 	 unsigned i;
5227 
5228 	 preg->translate = (char *) malloc (CHAR_SET_SIZE);
5229 	 if (preg->translate == NULL)
5230 	   return (int) REG_ESPACE;
5231 
5232 	 /* Map uppercase characters to corresponding lowercase ones.  */
5233 	 for (i = 0; i < CHAR_SET_SIZE; i++)
5234 	   preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5235     }
5236   else
5237     preg->translate = NULL;
5238 
5239   /* If REG_NEWLINE is set, newlines are treated differently.  */
5240   if (cflags & REG_NEWLINE)
5241     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5242 	 syntax &= ~RE_DOT_NEWLINE;
5243 	 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5244 	 /* It also changes the matching behavior.  */
5245 	 preg->newline_anchor = 1;
5246     }
5247   else
5248     preg->newline_anchor = 0;
5249 
5250   preg->no_sub = !!(cflags & REG_NOSUB);
5251 
5252   /* POSIX says a null character in the pattern terminates it, so we
5253 	can use strlen here in compiling the pattern.  */
5254   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5255 
5256   /* POSIX doesn't distinguish between an unmatched open-group and an
5257 	unmatched close-group: both are REG_EPAREN.  */
5258   if (ret == REG_ERPAREN) ret = REG_EPAREN;
5259 
5260   return (int) ret;
5261 }
5262 
5263 
5264 /* regexec searches for a given pattern, specified by PREG, in the
5265    string STRING.
5266 
5267    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5268    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5269    least NMATCH elements, and we set them to the offsets of the
5270    corresponding matched substrings.
5271 
5272    EFLAGS specifies `execution flags' which affect matching: if
5273    REG_NOTBOL is set, then ^ does not match at the beginning of the
5274    string; if REG_NOTEOL is set, then $ does not match at the end.
5275 
5276    We return 0 if we find a match and REG_NOMATCH if not.  */
5277 
5278 int
regexec(preg,string,nmatch,pmatch,eflags)5279 regexec (preg, string, nmatch, pmatch, eflags)
5280     const regex_t *preg;
5281     const char *string;
5282     size_t nmatch;
5283     regmatch_t pmatch[];
5284     int eflags;
5285 {
5286   int ret;
5287   struct re_registers regs;
5288   regex_t private_preg;
5289   int len = strlen (string);
5290   boolean want_reg_info = !preg->no_sub && nmatch > 0;
5291 
5292   private_preg = *preg;
5293 
5294   private_preg.not_bol = !!(eflags & REG_NOTBOL);
5295   private_preg.not_eol = !!(eflags & REG_NOTEOL);
5296 
5297   /* The user has told us exactly how many registers to return
5298 	information about, via `nmatch'.  We have to pass that on to the
5299 	matching routines.  */
5300   private_preg.regs_allocated = REGS_FIXED;
5301 
5302   if (want_reg_info)
5303     {
5304 	 regs.num_regs = nmatch;
5305 	 regs.start = TALLOC (nmatch, regoff_t);
5306 	 regs.end = TALLOC (nmatch, regoff_t);
5307 	 if (regs.start == NULL || regs.end == NULL)
5308 	   return (int) REG_NOMATCH;
5309     }
5310 
5311   /* Perform the searching operation.  */
5312   ret = re_search (&private_preg, string, len,
5313 			    /* start: */ 0, /* range: */ len,
5314 			    want_reg_info ? &regs : (struct re_registers *) 0);
5315 
5316   /* Copy the register information to the POSIX structure.  */
5317   if (want_reg_info)
5318     {
5319 	 if (ret >= 0)
5320 	   {
5321 		unsigned r;
5322 
5323 		for (r = 0; r < nmatch; r++)
5324 		  {
5325 		    pmatch[r].rm_so = regs.start[r];
5326 		    pmatch[r].rm_eo = regs.end[r];
5327 		  }
5328 	   }
5329 
5330 	 /* If we needed the temporary register info, free the space now.  */
5331 	 free (regs.start);
5332 	 free (regs.end);
5333     }
5334 
5335   /* We want zero return to mean success, unlike `re_search'.  */
5336   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5337 }
5338 
5339 
5340 /* Returns a message corresponding to an error code, ERRCODE, returned
5341    from either regcomp or regexec.   We don't use PREG here.  */
5342 
5343 size_t
regerror(errcode,preg,errbuf,errbuf_size)5344 regerror (errcode, preg, errbuf, errbuf_size)
5345     int errcode;
5346     const regex_t *preg;
5347     char *errbuf;
5348     size_t errbuf_size;
5349 {
5350   const char *msg;
5351   size_t msg_size;
5352 
5353   if (errcode < 0
5354 	 || errcode >= (sizeof (re_error_msgid) / sizeof (re_error_msgid[0])))
5355     /* Only error codes returned by the rest of the code should be passed
5356 	  to this routine.  If we are given anything else, or if other regex
5357 	  code generates an invalid error code, then the program has a bug.
5358 	  Dump core so we can fix it.  */
5359     abort ();
5360 
5361   msg = gettext (re_error_msgid[errcode]);
5362 
5363   msg_size = strlen (msg) + 1; /* Includes the null.  */
5364 
5365   if (errbuf_size != 0)
5366     {
5367 	 if (msg_size > errbuf_size)
5368 	   {
5369 		strncpy (errbuf, msg, errbuf_size - 1);
5370 		errbuf[errbuf_size - 1] = 0;
5371 	   }
5372 	 else
5373 	   strcpy (errbuf, msg);
5374     }
5375 
5376   return msg_size;
5377 }
5378 
5379 
5380 /* Free dynamically allocated space used by PREG.  */
5381 
5382 void
regfree(preg)5383 regfree (preg)
5384     regex_t *preg;
5385 {
5386   if (preg->buffer != NULL)
5387     free (preg->buffer);
5388   preg->buffer = NULL;
5389 
5390   preg->allocated = 0;
5391   preg->used = 0;
5392 
5393   if (preg->fastmap != NULL)
5394     free (preg->fastmap);
5395   preg->fastmap = NULL;
5396   preg->fastmap_accurate = 0;
5397 
5398   if (preg->translate != NULL)
5399     free (preg->translate);
5400   preg->translate = NULL;
5401 }
5402 
5403 #endif /* not emacs  */
5404 
5405 /*
5406 Local variables:
5407 make-backup-files: t
5408 version-control: t
5409 trim-versions-without-asking: nil
5410 End:
5411 */
5412