1 /*
2  *   Extended regular expression matching and search library,
3  *   version 0.12.
4  *   (Implements POSIX draft P1003.2/D11.2, except for some of the
5  *   internationalization features.)
6  *
7  *   This code will be used by Epos only if the operating system offers
8  *   no regular expressions as a standard library.
9  *
10  *   rx.c is (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
11  *
12  *   This file is free software; you can redistribute it and/or
13  *   modify it under the terms of the GNU Library General Public License as
14  *   published by the Free Software Foundation; either version 2 of the
15  *   License, or (at your option) any later version.
16  *
17  *   The GNU C Library is distributed in the hope that it will be useful,
18  *   but WITHOUT ANY WARRANTY; without even the implied warranty of
19  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
20  *   Library General Public License in libs/regex/COPYING for more details.
21  *
22  */
23 
24 #define REGEX_MALLOC
25 
26 /* AIX requires this to be the first thing in the file. */
27 #if defined (_AIX) && !defined (REGEX_MALLOC)
28   #pragma alloca
29 #endif
30 
31 #undef	_GNU_SOURCE
32 #define _GNU_SOURCE
33 
34 #ifdef DEBUG
35 	#undef DEBUG
36 #endif
37 
38 #include "config.h"
39 
40 #ifdef BROKEN_LOCALE
isupper(int c)41 	int isupper(int c) { return 0; }
isdigit(int c)42 	int isdigit(int c) { return c <= '9' && c >= '0'; }
43 #endif
44 
45 
46 
47 #ifndef const
48 #define const		/* Visual C++ for example dislikes free()ing const data */
49 #endif
50 
51 #ifndef  __STDC__
52 #define  __STDC__  1
53 #endif
54 
55 #if defined(STDC_HEADERS) && !defined(emacs)
56 #include <stddef.h>
57 #else
58 /* We need this for `rx.h', and perhaps for the Emacs include files.  */
59 #include <sys/types.h>
60 #endif
61 
62 /* For platform which support the ISO C amendement 1 functionality we
63    support user defined character classes.  */
64 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
65 # include <wctype.h>
66 # include <wchar.h>
67 #endif
68 
69 /* This is for other GNU distributions with internationalized messages.  */
70 #if HAVE_LIBINTL_H || defined (_LIBC)
71 # include <libintl.h>
72 #else
73 # define gettext(msgid) (msgid)
74 #endif
75 
76 #ifndef gettext_noop
77 /* This define is so xgettext can find the internationalizable
78    strings.  */
79 #define gettext_noop(String) String
80 #endif
81 
82 /* The `emacs' switch turns on certain matching commands
83    that make sense only in Emacs. */
84 #ifdef emacs
85 
86 #include "lisp.h"
87 #include "buffer.h"
88 #include "syntax.h"
89 
90 #else  /* not emacs */
91 
92 /* If we are not linking with Emacs proper,
93    we can't use the relocating allocator
94    even if config.h says that we can.  */
95 #undef REL_ALLOC
96 
97 #if defined (STDC_HEADERS) || defined (_LIBC)
98 #include <stdlib.h>
99 #else
100 char *malloc ();
101 char *realloc ();
102 void free(void *ptr);
103 #endif
104 
call_abort(void)105 static void call_abort(void)
106 {
107 #ifdef HAVE_ABORT
108 	abort();
109 #else
110 	while (1) ;
111 #endif
112 }
113 
114 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
115    If nothing else has been done, use the method below.  */
116 #ifdef INHIBIT_STRING_HEADER
117 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
118 #if !defined (bzero) && !defined (bcopy)
119 #undef INHIBIT_STRING_HEADER
120 #endif
121 #endif
122 #endif
123 
124 /* This is the normal way of making sure we have a bcopy and a bzero.
125    This is used in most programs--a few other programs avoid this
126    by defining INHIBIT_STRING_HEADER.  */
127 #ifndef INHIBIT_STRING_HEADER
128 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC)
129 #include <string.h>
130 #ifndef bcmp
131 #define bcmp(s1, s2, n)	memcmp ((s1), (s2), (n))
132 #endif
133 #ifndef bcopy
134 #define bcopy(s, d, n)	memcpy ((d), (s), (n))
135 #endif
136 #ifndef bzero
137 #define bzero(s, n)	memset ((s), 0, (n))
138 #endif
139 #else
140 #include <strings.h>
141 #endif
142 #endif
143 
144 /* Define the syntax stuff for \<, \>, etc.  */
145 
146 /* This must be nonzero for the wordchar and notwordchar pattern
147    commands in re_match_2.  */
148 #ifndef Sword
149 #define Sword 1
150 #endif
151 
152 #ifdef SWITCH_ENUM_BUG
153 #define SWITCH_ENUM_CAST(x) ((int)(x))
154 #else
155 #define SWITCH_ENUM_CAST(x) (x)
156 #endif
157 
158 #ifdef SYNTAX_TABLE
159 
160 extern char *re_syntax_table;
161 
162 #else /* not SYNTAX_TABLE */
163 
164 /* How many characters in the character set.  */
165 #define CHAR_SET_SIZE 256
166 
167 static char re_syntax_table[CHAR_SET_SIZE];
168 
169 static void
init_syntax_once()170 init_syntax_once ()
171 {
172    register int c;
173    static int done = 0;
174 
175    if (done)
176      return;
177 
178    bzero (re_syntax_table, sizeof re_syntax_table);
179 
180    for (c = 'a'; c <= 'z'; c++)
181      re_syntax_table[c] = Sword;
182 
183    for (c = 'A'; c <= 'Z'; c++)
184      re_syntax_table[c] = Sword;
185 
186    for (c = '0'; c <= '9'; c++)
187      re_syntax_table[c] = Sword;
188 
189    re_syntax_table['_'] = Sword;
190 
191    done = 1;
192 }
193 
194 #endif /* not SYNTAX_TABLE */
195 
196 #define SYNTAX(c) re_syntax_table[c]
197 
198 #endif /* not emacs */
199 
200 /* Get the interface, including the syntax bits.  */
201 #include "rx.h"
202 
203 /* isalpha etc. are used for the character classes.  */
204 #include <ctype.h>
205 
206 /* Jim Meyering writes:
207 
208    "... Some ctype macros are valid only for character codes that
209    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
210    using /bin/cc or gcc but without giving an ansi option).  So, all
211    ctype uses should be through macros like ISPRINT...  If
212    STDC_HEADERS is defined, then autoconf has verified that the ctype
213    macros don't need to be guarded with references to isascii. ...
214    Defining isascii to 1 should let any compiler worth its salt
215    eliminate the && through constant folding."  */
216 
217 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
218 #define ISASCII(c) 1
219 #else
220 #define ISASCII(c) isascii(c)
221 #endif
222 
223 #ifdef isblank
224 #define ISBLANK(c) (ISASCII (c) && isblank (c))
225 #else
226 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
227 #endif
228 #ifdef isgraph
229 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
230 #else
231 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
232 #endif
233 
234 #define ISPRINT(c) (ISASCII (c) && isprint (c))
235 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
236 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
237 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
238 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
239 #define ISLOWER(c) (ISASCII (c) && islower (c))
240 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
241 #define ISSPACE(c) (ISASCII (c) && isspace (c))
242 #define ISUPPER(c) (ISASCII (c) && isupper (c))
243 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
244 
245 #ifndef NULL
246 #define NULL (void *)0
247 #endif
248 
249 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
250    since ours (we hope) works properly with all combinations of
251    machines, compilers, `char' and `unsigned char' argument types.
252    (Per Bothner suggested the basic approach.)  */
253 #undef SIGN_EXTEND_CHAR
254 #if __STDC__
255 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
256 #else  /* not __STDC__ */
257 /* As in Harbison and Steele.  */
258 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
259 #endif
260 
261 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
262    use `alloca' instead of `malloc'.  This is because using malloc in
263    re_search* or re_match* could cause memory leaks when C-g is used in
264    Emacs; also, malloc is slower and causes storage fragmentation.  On
265    the other hand, malloc is more portable, and easier to debug.
266 
267    Because we sometimes use alloca, some routines have to be macros,
268    not functions -- `alloca'-allocated space disappears at the end of the
269    function it is called in.  */
270 
271 #ifdef REGEX_MALLOC
272 
273 #define REGEX_ALLOCATE malloc
274 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
275 #define REGEX_FREE free
276 
277 #else /* not REGEX_MALLOC  */
278 
279 /* Emacs already defines alloca, sometimes.  */
280 #ifndef alloca
281 
282 /* Make alloca work the best possible way.  */
283 #ifdef __GNUC__
284 #define alloca __builtin_alloca
285 #else /* not __GNUC__ */
286 #if HAVE_ALLOCA_H
287 #include <alloca.h>
288 #else /* not __GNUC__ or HAVE_ALLOCA_H */
289 #if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
290 #ifndef _AIX /* Already did AIX, up at the top.  */
291 char *alloca ();
292 #endif /* not _AIX */
293 #endif
294 #endif /* not HAVE_ALLOCA_H */
295 #endif /* not __GNUC__ */
296 
297 #endif /* not alloca */
298 
299 #define REGEX_ALLOCATE alloca
300 
301 /* Assumes a `char *destination' variable.  */
302 #define REGEX_REALLOCATE(source, osize, nsize)				\
303   (destination = (char *) alloca (nsize),				\
304    bcopy (source, destination, osize),					\
305    destination)
306 
307 /* No need to do anything to free, after alloca.  */
308 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
309 
310 #endif /* not REGEX_MALLOC */
311 
312 /* Define how to allocate the failure stack.  */
313 
314 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
315 
316 #define REGEX_ALLOCATE_STACK(size)				\
317   r_alloc (&failure_stack_ptr, (size))
318 #define REGEX_REALLOCATE_STACK(source, osize, nsize)		\
319   r_re_alloc (&failure_stack_ptr, (nsize))
320 #define REGEX_FREE_STACK(ptr)					\
321   r_alloc_free (&failure_stack_ptr)
322 
323 #else /* not using relocating allocator */
324 
325 #ifdef REGEX_MALLOC
326 
327 #define REGEX_ALLOCATE_STACK malloc
328 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
329 #define REGEX_FREE_STACK free
330 
331 #else /* not REGEX_MALLOC */
332 
333 #define REGEX_ALLOCATE_STACK alloca
334 
335 #define REGEX_REALLOCATE_STACK(source, osize, nsize)			\
336    REGEX_REALLOCATE (source, osize, nsize)
337 /* No need to explicitly free anything.  */
338 #define REGEX_FREE_STACK(arg)
339 
340 #endif /* not REGEX_MALLOC */
341 #endif /* not using relocating allocator */
342 
343 
344 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
345    `string1' or just past its end.  This works if PTR is NULL, which is
346    a good thing.  */
347 #define FIRST_STRING_P(ptr) 					\
348   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
349 
350 /* (Re)Allocate N items of type T using malloc, or fail.  */
351 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
352 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
353 #define RETALLOC_IF(addr, n, t) \
354   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
355 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
356 
357 #define BYTEWIDTH 8 /* In bits.  */
358 
359 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
360 
361 #undef MAX
362 #undef MIN
363 #define MAX(a, b) ((a) > (b) ? (a) : (b))
364 #define MIN(a, b) ((a) < (b) ? (a) : (b))
365 
366 typedef char boolean;
367 #define false 0
368 #define true 1
369 
370 struct re_pattern_buffer;
371 
372 static int
373 re_match_2_internal (struct re_pattern_buffer *bufp, const char *string1, int size1,
374 	const char *string2, int size2, int pos, struct re_registers *regs, int stop);
375 
376 
377 /* These are the command codes that appear in compiled regular
378    expressions.  Some opcodes are followed by argument bytes.  A
379    command code can specify any interpretation whatsoever for its
380    arguments.  Zero bytes may appear in the compiled regular expression.  */
381 
382 typedef enum
383 {
384   no_op = 0,
385 
386   /* Succeed right away--no more backtracking.  */
387   succeed,
388 
389         /* Followed by one byte giving n, then by n literal bytes.  */
390   exactn,
391 
392         /* Matches any (more or less) character.  */
393   anychar,
394 
395         /* Matches any one char belonging to specified set.  First
396            following byte is number of bitmap bytes.  Then come bytes
397            for a bitmap saying which chars are in.  Bits in each byte
398            are ordered low-bit-first.  A character is in the set if its
399            bit is 1.  A character too large to have a bit in the map is
400            automatically not in the set.  */
401   charset,
402 
403         /* Same parameters as charset, but match any character that is
404            not one of those specified.  */
405   charset_not,
406 
407         /* Start remembering the text that is matched, for storing in a
408            register.  Followed by one byte with the register number, in
409            the range 0 to one less than the pattern buffer's re_nsub
410            field.  Then followed by one byte with the number of groups
411            inner to this one.  (This last has to be part of the
412            start_memory only because we need it in the on_failure_jump
413            of re_match_2.)  */
414   start_memory,
415 
416         /* Stop remembering the text that is matched and store it in a
417            memory register.  Followed by one byte with the register
418            number, in the range 0 to one less than `re_nsub' in the
419            pattern buffer, and one byte with the number of inner groups,
420            just like `start_memory'.  (We need the number of inner
421            groups here because we don't have any easy way of finding the
422            corresponding start_memory when we're at a stop_memory.)  */
423   stop_memory,
424 
425         /* Match a duplicate of something remembered. Followed by one
426            byte containing the register number.  */
427   duplicate,
428 
429         /* Fail unless at beginning of line.  */
430   begline,
431 
432         /* Fail unless at end of line.  */
433   endline,
434 
435         /* Succeeds if at beginning of buffer (if emacs) or at beginning
436            of string to be matched (if not).  */
437   begbuf,
438 
439         /* Analogously, for end of buffer/string.  */
440   endbuf,
441 
442         /* Followed by two byte relative address to which to jump.  */
443   jump,
444 
445 	/* Same as jump, but marks the end of an alternative.  */
446   jump_past_alt,
447 
448         /* Followed by two-byte relative address of place to resume at
449            in case of failure.  */
450   on_failure_jump,
451 
452         /* Like on_failure_jump, but pushes a placeholder instead of the
453            current string position when executed.  */
454   on_failure_keep_string_jump,
455 
456         /* Throw away latest failure point and then jump to following
457            two-byte relative address.  */
458   pop_failure_jump,
459 
460         /* Change to pop_failure_jump if know won't have to backtrack to
461            match; otherwise change to jump.  This is used to jump
462            back to the beginning of a repeat.  If what follows this jump
463            clearly won't match what the repeat does, such that we can be
464            sure that there is no use backtracking out of repetitions
465            already matched, then we change it to a pop_failure_jump.
466            Followed by two-byte address.  */
467   maybe_pop_jump,
468 
469         /* Jump to following two-byte address, and push a dummy failure
470            point. This failure point will be thrown away if an attempt
471            is made to use it for a failure.  A `+' construct makes this
472            before the first repeat.  Also used as an intermediary kind
473            of jump when compiling an alternative.  */
474   dummy_failure_jump,
475 
476 	/* Push a dummy failure point and continue.  Used at the end of
477 	   alternatives.  */
478   push_dummy_failure,
479 
480         /* Followed by two-byte relative address and two-byte number n.
481            After matching N times, jump to the address upon failure.  */
482   succeed_n,
483 
484         /* Followed by two-byte relative address, and two-byte number n.
485            Jump to the address N times, then fail.  */
486   jump_n,
487 
488         /* Set the following two-byte relative address to the
489            subsequent two-byte number.  The address *includes* the two
490            bytes of number.  */
491   set_number_at,
492 
493   wordchar,	/* Matches any word-constituent character.  */
494   notwordchar,	/* Matches any char that is not a word-constituent.  */
495 
496   wordbeg,	/* Succeeds if at word beginning.  */
497   wordend,	/* Succeeds if at word end.  */
498 
499   wordbound,	/* Succeeds if at a word boundary.  */
500   notwordbound	/* Succeeds if not at a word boundary.  */
501 
502 #ifdef emacs
503   ,before_dot,	/* Succeeds if before point.  */
504   at_dot,	/* Succeeds if at point.  */
505   after_dot,	/* Succeeds if after point.  */
506 
507 	/* Matches any character whose syntax is specified.  Followed by
508            a byte which contains a syntax code, e.g., Sword.  */
509   syntaxspec,
510 
511 	/* Matches any character whose syntax is not that specified.  */
512   notsyntaxspec
513 #endif /* emacs */
514 } re_opcode_t;
515 
516 /* Common operations on the compiled pattern.  */
517 
518 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
519 
520 #define STORE_NUMBER(destination, number)				\
521   do {									\
522     (destination)[0] = (number) & 0377;					\
523     (destination)[1] = (number) >> 8;					\
524   } while (0)
525 
526 /* Same as STORE_NUMBER, except increment DESTINATION to
527    the byte after where the number is stored.  Therefore, DESTINATION
528    must be an lvalue.  */
529 
530 #define STORE_NUMBER_AND_INCR(destination, number)			\
531   do {									\
532     STORE_NUMBER (destination, number);					\
533     (destination) += 2;							\
534   } while (0)
535 
536 /* Put into DESTINATION a number stored in two contiguous bytes starting
537    at SOURCE.  */
538 
539 #define EXTRACT_NUMBER(destination, source)				\
540   do {									\
541     (destination) = *(source) & 0377;					\
542     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;		\
543   } while (0)
544 
545 #ifdef DEBUG
546 static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
547 static void
extract_number(dest,source)548 extract_number (dest, source)
549     int *dest;
550     unsigned char *source;
551 {
552   int temp = SIGN_EXTEND_CHAR (*(source + 1));
553   *dest = *source & 0377;
554   *dest += temp << 8;
555 }
556 
557 #ifndef EXTRACT_MACROS /* To debug the macros.  */
558 #undef EXTRACT_NUMBER
559 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
560 #endif /* not EXTRACT_MACROS */
561 
562 #endif /* DEBUG */
563 
564 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
565    SOURCE must be an lvalue.  */
566 
567 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
568   do {									\
569     EXTRACT_NUMBER (destination, source);				\
570     (source) += 2; 							\
571   } while (0)
572 
573 #ifdef DEBUG
574 static void extract_number_and_incr _RE_ARGS ((int *destination,
575 					       unsigned char **source));
576 static void
extract_number_and_incr(destination,source)577 extract_number_and_incr (destination, source)
578     int *destination;
579     unsigned char **source;
580 {
581   extract_number (destination, *source);
582   *source += 2;
583 }
584 
585 #ifndef EXTRACT_MACROS
586 #undef EXTRACT_NUMBER_AND_INCR
587 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
588   extract_number_and_incr (&dest, &src)
589 #endif /* not EXTRACT_MACROS */
590 
591 #endif /* DEBUG */
592 
593 /* If DEBUG is defined, Regex prints many voluminous messages about what
594    it is doing (if the variable `debug' is nonzero).  If linked with the
595    main program in `iregex.c', you can enter patterns and strings
596    interactively.  And if linked with the main program in `main.c' and
597    the other test files, you can run the already-written tests.  */
598 
599 #ifdef DEBUG
600 
601 /* We use standard I/O for debugging.  */
602 #include <stdio.h>
603 
604 /* It is useful to test things that ``must'' be true when debugging.  */
605 #include <assert.h>
606 
607 static int debug = 0;
608 
609 #define DEBUG_STATEMENT(e) e
610 #define DEBUG_PRINT1(x) if (debug) printf (x)
611 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
612 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
613 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
614 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) 				\
615   if (debug) print_partial_compiled_pattern (s, e)
616 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)			\
617   if (debug) print_double_string (w, s1, sz1, s2, sz2)
618 
619 
620 /* Print the fastmap in human-readable form.  */
621 
622 void
print_fastmap(fastmap)623 print_fastmap (fastmap)
624     char *fastmap;
625 {
626   unsigned was_a_range = 0;
627   unsigned i = 0;
628 
629   while (i < (1 << BYTEWIDTH))
630     {
631       if (fastmap[i++])
632 	{
633 	  was_a_range = 0;
634           putchar (i - 1);
635           while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
636             {
637               was_a_range = 1;
638               i++;
639             }
640 	  if (was_a_range)
641             {
642               printf ("-");
643               putchar (i - 1);
644             }
645         }
646     }
647   putchar ('\n');
648 }
649 
650 
651 /* Print a compiled pattern string in human-readable form, starting at
652    the START pointer into it and ending just before the pointer END.  */
653 
654 void
print_partial_compiled_pattern(start,end)655 print_partial_compiled_pattern (start, end)
656     unsigned char *start;
657     unsigned char *end;
658 {
659   int mcnt, mcnt2;
660   unsigned char *p1;
661   unsigned char *p = start;
662   unsigned char *pend = end;
663 
664   if (start == NULL)
665     {
666       printf ("(null)\n");
667       return;
668     }
669 
670   /* Loop over pattern commands.  */
671   while (p < pend)
672     {
673       printf ("%d:\t", p - start);
674 
675       switch ((re_opcode_t) *p++)
676 	{
677         case no_op:
678           printf ("/no_op");
679           break;
680 
681 	case exactn:
682 	  mcnt = *p++;
683           printf ("/exactn/%d", mcnt);
684           do
685 	    {
686               putchar ('/');
687 	      putchar (*p++);
688             }
689           while (--mcnt);
690           break;
691 
692 	case start_memory:
693           mcnt = *p++;
694           printf ("/start_memory/%d/%d", mcnt, *p++);
695           break;
696 
697 	case stop_memory:
698           mcnt = *p++;
699 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
700           break;
701 
702 	case duplicate:
703 	  printf ("/duplicate/%d", *p++);
704 	  break;
705 
706 	case anychar:
707 	  printf ("/anychar");
708 	  break;
709 
710 	case charset:
711         case charset_not:
712           {
713             register int c, last = -100;
714 	    register int in_range = 0;
715 
716 	    printf ("/charset [%s",
717 	            (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
718 
719             assert (p + *p < pend);
720 
721             for (c = 0; c < 256; c++)
722 	      if (c / 8 < *p
723 		  && (p[1 + (c/8)] & (1 << (c % 8))))
724 		{
725 		  /* Are we starting a range?  */
726 		  if (last + 1 == c && ! in_range)
727 		    {
728 		      putchar ('-');
729 		      in_range = 1;
730 		    }
731 		  /* Have we broken a range?  */
732 		  else if (last + 1 != c && in_range)
733               {
734 		      putchar (last);
735 		      in_range = 0;
736 		    }
737 
738 		  if (! in_range)
739 		    putchar (c);
740 
741 		  last = c;
742               }
743 
744 	    if (in_range)
745 	      putchar (last);
746 
747 	    putchar (']');
748 
749 	    p += 1 + *p;
750 	  }
751 	  break;
752 
753 	case begline:
754 	  printf ("/begline");
755           break;
756 
757 	case endline:
758           printf ("/endline");
759           break;
760 
761 	case on_failure_jump:
762           extract_number_and_incr (&mcnt, &p);
763   	  printf ("/on_failure_jump to %d", p + mcnt - start);
764           break;
765 
766 	case on_failure_keep_string_jump:
767           extract_number_and_incr (&mcnt, &p);
768   	  printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
769           break;
770 
771 	case dummy_failure_jump:
772           extract_number_and_incr (&mcnt, &p);
773   	  printf ("/dummy_failure_jump to %d", p + mcnt - start);
774           break;
775 
776 	case push_dummy_failure:
777           printf ("/push_dummy_failure");
778           break;
779 
780         case maybe_pop_jump:
781           extract_number_and_incr (&mcnt, &p);
782   	  printf ("/maybe_pop_jump to %d", p + mcnt - start);
783 	  break;
784 
785         case pop_failure_jump:
786 	  extract_number_and_incr (&mcnt, &p);
787   	  printf ("/pop_failure_jump to %d", p + mcnt - start);
788 	  break;
789 
790         case jump_past_alt:
791 	  extract_number_and_incr (&mcnt, &p);
792   	  printf ("/jump_past_alt to %d", p + mcnt - start);
793 	  break;
794 
795         case jump:
796 	  extract_number_and_incr (&mcnt, &p);
797   	  printf ("/jump to %d", p + mcnt - start);
798 	  break;
799 
800         case succeed_n:
801           extract_number_and_incr (&mcnt, &p);
802 	  p1 = p + mcnt;
803           extract_number_and_incr (&mcnt2, &p);
804 	  printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
805           break;
806 
807         case jump_n:
808           extract_number_and_incr (&mcnt, &p);
809 	  p1 = p + mcnt;
810           extract_number_and_incr (&mcnt2, &p);
811 	  printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
812           break;
813 
814         case set_number_at:
815           extract_number_and_incr (&mcnt, &p);
816 	  p1 = p + mcnt;
817           extract_number_and_incr (&mcnt2, &p);
818 	  printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
819           break;
820 
821         case wordbound:
822 	  printf ("/wordbound");
823 	  break;
824 
825 	case notwordbound:
826 	  printf ("/notwordbound");
827           break;
828 
829 	case wordbeg:
830 	  printf ("/wordbeg");
831 	  break;
832 
833 	case wordend:
834 	  printf ("/wordend");
835 
836 #ifdef emacs
837 	case before_dot:
838 	  printf ("/before_dot");
839           break;
840 
841 	case at_dot:
842 	  printf ("/at_dot");
843           break;
844 
845 	case after_dot:
846 	  printf ("/after_dot");
847           break;
848 
849 	case syntaxspec:
850           printf ("/syntaxspec");
851 	  mcnt = *p++;
852 	  printf ("/%d", mcnt);
853           break;
854 
855 	case notsyntaxspec:
856           printf ("/notsyntaxspec");
857 	  mcnt = *p++;
858 	  printf ("/%d", mcnt);
859 	  break;
860 #endif /* emacs */
861 
862 	case wordchar:
863 	  printf ("/wordchar");
864           break;
865 
866 	case notwordchar:
867 	  printf ("/notwordchar");
868           break;
869 
870 	case begbuf:
871 	  printf ("/begbuf");
872           break;
873 
874 	case endbuf:
875 	  printf ("/endbuf");
876           break;
877 
878         default:
879           printf ("?%d", *(p-1));
880 	}
881 
882       putchar ('\n');
883     }
884 
885   printf ("%d:\tend of pattern.\n", p - start);
886 }
887 
888 
889 void
print_compiled_pattern(bufp)890 print_compiled_pattern (bufp)
891     struct re_pattern_buffer *bufp;
892 {
893   unsigned char *buffer = bufp->buffer;
894 
895   print_partial_compiled_pattern (buffer, buffer + bufp->used);
896   printf ("%ld bytes used/%ld bytes allocated.\n",
897 	  bufp->used, bufp->allocated);
898 
899   if (bufp->fastmap_accurate && bufp->fastmap)
900     {
901       printf ("fastmap: ");
902       print_fastmap (bufp->fastmap);
903     }
904 
905   printf ("re_nsub: %d\t", bufp->re_nsub);
906   printf ("regs_alloc: %d\t", bufp->regs_allocated);
907   printf ("can_be_null: %d\t", bufp->can_be_null);
908   printf ("newline_anchor: %d\n", bufp->newline_anchor);
909   printf ("no_sub: %d\t", bufp->no_sub);
910   printf ("not_bol: %d\t", bufp->not_bol);
911   printf ("not_eol: %d\t", bufp->not_eol);
912   printf ("syntax: %lx\n", bufp->syntax);
913   /* Perhaps we should print the translate table?  */
914 }
915 
916 
917 void
print_double_string(where,string1,size1,string2,size2)918 print_double_string (where, string1, size1, string2, size2)
919     const char *where;
920     const char *string1;
921     const char *string2;
922     int size1;
923     int size2;
924 {
925   int this_char;
926 
927   if (where == NULL)
928     printf ("(null)");
929   else
930     {
931       if (FIRST_STRING_P (where))
932         {
933           for (this_char = where - string1; this_char < size1; this_char++)
934             putchar (string1[this_char]);
935 
936           where = string2;
937         }
938 
939       for (this_char = where - string2; this_char < size2; this_char++)
940         putchar (string2[this_char]);
941     }
942 }
943 
944 void
printchar(c)945 printchar (c)
946      int c;
947 {
948   putc (c, stderr);
949 }
950 
951 #else /* not DEBUG */
952 
953 #undef assert
954 #define assert(e)
955 
956 #define DEBUG_STATEMENT(e)
957 #define DEBUG_PRINT1(x)
958 #define DEBUG_PRINT2(x1, x2)
959 #define DEBUG_PRINT3(x1, x2, x3)
960 #define DEBUG_PRINT4(x1, x2, x3, x4)
961 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
962 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
963 
964 #endif /* not DEBUG */
965 
966 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
967    also be assigned to arbitrarily: each pattern buffer stores its own
968    syntax, so it can be changed between regex compilations.  */
969 /* This has no initializer because initialized variables in Emacs
970    become read-only after dumping.  */
971 reg_syntax_t re_syntax_options;
972 
973 
974 /* Specify the precise syntax of regexps for compilation.  This provides
975    for compatibility for various utilities which historically have
976    different, incompatible syntaxes.
977 
978    The argument SYNTAX is a bit mask comprised of the various bits
979    defined in rx.h.  We return the old syntax.  */
980 
981 reg_syntax_t
re_set_syntax(syntax)982 re_set_syntax (syntax)
983     reg_syntax_t syntax;
984 {
985   reg_syntax_t ret = re_syntax_options;
986 
987   re_syntax_options = syntax;
988 #ifdef DEBUG
989   if (syntax & RE_DEBUG)
990     debug = 1;
991   else if (debug) /* was on but now is not */
992     debug = 0;
993 #endif /* DEBUG */
994   return ret;
995 }
996 
997 /* This table gives an error message for each of the error codes listed
998    in rx.h.  Obviously the order here has to be same as there.
999    POSIX doesn't require that we do anything for REG_NOERROR,
1000    but why not be nice?  */
1001 
1002 static const char *re_error_msgid[] =
1003   {
1004     gettext_noop ("Success"),	/* REG_NOERROR */
1005     gettext_noop ("No match"),	/* REG_NOMATCH */
1006     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1007     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1008     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1009     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1010     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1011     gettext_noop ("Unmatched [ or [^"),	/* REG_EBRACK */
1012     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1013     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1014     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1015     gettext_noop ("Invalid range end"),	/* REG_ERANGE */
1016     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1017     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1018     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1019     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1020     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1021   };
1022 
1023 /* Avoiding alloca during matching, to placate r_alloc.  */
1024 
1025 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1026    searching and matching functions should not call alloca.  On some
1027    systems, alloca is implemented in terms of malloc, and if we're
1028    using the relocating allocator routines, then malloc could cause a
1029    relocation, which might (if the strings being searched are in the
1030    ralloc heap) shift the data out from underneath the regexp
1031    routines.
1032 
1033    Here's another reason to avoid allocation: Emacs
1034    processes input from X in a signal handler; processing X input may
1035    call malloc; if input arrives while a matching routine is calling
1036    malloc, then we're scrod.  But Emacs can't just block input while
1037    calling matching routines; then we don't notice interrupts when
1038    they come in.  So, Emacs blocks input around all regexp calls
1039    except the matching calls, which it leaves unprotected, in the
1040    faith that they will not malloc.  */
1041 
1042 /* Normally, this is fine.  */
1043 #define MATCH_MAY_ALLOCATE
1044 
1045 /* When using GNU C, we are not REALLY using the C alloca, no matter
1046    what config.h may say.  So don't take precautions for it.  */
1047 #ifdef __GNUC__
1048 #undef C_ALLOCA
1049 #endif
1050 
1051 /* The match routines may not allocate if (1) they would do it with malloc
1052    and (2) it's not safe for them to use malloc.
1053    Note that if REL_ALLOC is defined, matching would not use malloc for the
1054    failure stack, but we would still use it for the register vectors;
1055    so REL_ALLOC should not affect this.  */
1056 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1057 #undef MATCH_MAY_ALLOCATE
1058 #endif
1059 
1060 
1061 /* Failure stack declarations and macros; both re_compile_fastmap and
1062    re_match_2 use a failure stack.  These have to be macros because of
1063    REGEX_ALLOCATE_STACK.  */
1064 
1065 
1066 /* Number of failure points for which to initially allocate space
1067    when matching.  If this number is exceeded, we allocate more
1068    space, so it is not a hard limit.  */
1069 #ifndef INIT_FAILURE_ALLOC
1070 #define INIT_FAILURE_ALLOC 5
1071 #endif
1072 
1073 /* Roughly the maximum number of failure points on the stack.  Would be
1074    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1075    This is a variable only so users of regex can assign to it; we never
1076    change it ourselves.  */
1077 
1078 #ifdef INT_IS_16BIT
1079 
1080 #if defined (MATCH_MAY_ALLOCATE)
1081 /* 4400 was enough to cause a crash on Alpha OSF/1,
1082    whose default stack limit is 2mb.  */
1083 long int re_max_failures = 4000;
1084 #else
1085 long int re_max_failures = 2000;
1086 #endif
1087 
1088 union fail_stack_elt
1089 {
1090   unsigned char *pointer;
1091   long int integer;
1092 };
1093 
1094 typedef union fail_stack_elt fail_stack_elt_t;
1095 
1096 typedef struct
1097 {
1098   fail_stack_elt_t *stack;
1099   unsigned long int size;
1100   unsigned long int avail;		/* Offset of next open position.  */
1101 } fail_stack_type;
1102 
1103 #else /* not INT_IS_16BIT */
1104 
1105 #if defined (MATCH_MAY_ALLOCATE)
1106 /* 4400 was enough to cause a crash on Alpha OSF/1,
1107    whose default stack limit is 2mb.  */
1108 int re_max_failures = 20000;
1109 #else
1110 int re_max_failures = 2000;
1111 #endif
1112 
1113 union fail_stack_elt
1114 {
1115   unsigned char *pointer;
1116   int integer;
1117 };
1118 
1119 typedef union fail_stack_elt fail_stack_elt_t;
1120 
1121 typedef struct
1122 {
1123   fail_stack_elt_t *stack;
1124   unsigned size;
1125   unsigned avail;			/* Offset of next open position.  */
1126 } fail_stack_type;
1127 
1128 #endif /* INT_IS_16BIT */
1129 
1130 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1131 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1132 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1133 
1134 
1135 /* Define macros to initialize and free the failure stack.
1136    Do `return -2' if the alloc fails.  */
1137 
1138 #ifdef MATCH_MAY_ALLOCATE
1139 #define INIT_FAIL_STACK()						\
1140   do {									\
1141     fail_stack.stack = (fail_stack_elt_t *)				\
1142       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));	\
1143 									\
1144     if (fail_stack.stack == NULL)					\
1145       return -2;							\
1146 									\
1147     fail_stack.size = INIT_FAILURE_ALLOC;				\
1148     fail_stack.avail = 0;						\
1149   } while (0)
1150 
1151 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1152 #else
1153 #define INIT_FAIL_STACK()						\
1154   do {									\
1155     fail_stack.avail = 0;						\
1156   } while (0)
1157 
1158 #define RESET_FAIL_STACK()
1159 #endif
1160 
1161 
1162 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1163 
1164    Return 1 if succeeds, and 0 if either ran out of memory
1165    allocating space for it or it was already too large.
1166 
1167    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1168 
1169 #define DOUBLE_FAIL_STACK(fail_stack)					\
1170   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS)	\
1171    ? 0									\
1172    : ((fail_stack).stack = (fail_stack_elt_t *)				\
1173         REGEX_REALLOCATE_STACK ((fail_stack).stack, 			\
1174           (fail_stack).size * sizeof (fail_stack_elt_t),		\
1175           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),	\
1176 									\
1177       (fail_stack).stack == NULL					\
1178       ? 0								\
1179       : ((fail_stack).size <<= 1, 					\
1180          1)))
1181 
1182 
1183 /* Push pointer POINTER on FAIL_STACK.
1184    Return 1 if was able to do so and 0 if ran out of memory allocating
1185    space to do so.  */
1186 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)				\
1187   ((FAIL_STACK_FULL ()							\
1188     && !DOUBLE_FAIL_STACK (FAIL_STACK))					\
1189    ? 0									\
1190    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,	\
1191       1))
1192 
1193 /* Push a pointer value onto the failure stack.
1194    Assumes the variable `fail_stack'.  Probably should only
1195    be called from within `PUSH_FAILURE_POINT'.  */
1196 #define PUSH_FAILURE_POINTER(item)					\
1197   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1198 
1199 /* This pushes an integer-valued item onto the failure stack.
1200    Assumes the variable `fail_stack'.  Probably should only
1201    be called from within `PUSH_FAILURE_POINT'.  */
1202 #define PUSH_FAILURE_INT(item)					\
1203   fail_stack.stack[fail_stack.avail++].integer = (item)
1204 
1205 /* Push a fail_stack_elt_t value onto the failure stack.
1206    Assumes the variable `fail_stack'.  Probably should only
1207    be called from within `PUSH_FAILURE_POINT'.  */
1208 #define PUSH_FAILURE_ELT(item)					\
1209   fail_stack.stack[fail_stack.avail++] =  (item)
1210 
1211 /* These three POP... operations complement the three PUSH... operations.
1212    All assume that `fail_stack' is nonempty.  */
1213 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1214 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1215 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1216 
1217 /* Used to omit pushing failure point id's when we're not debugging.  */
1218 #ifdef DEBUG
1219 #define DEBUG_PUSH PUSH_FAILURE_INT
1220 #define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
1221 #else
1222 #define DEBUG_PUSH(item)
1223 #define DEBUG_POP(item_addr)
1224 #endif
1225 
1226 
1227 /* Push the information about the state we will need
1228    if we ever fail back to it.
1229 
1230    Requires variables fail_stack, regstart, regend, reg_info, and
1231    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1232    declared.
1233 
1234    Does `return FAILURE_CODE' if runs out of memory.  */
1235 
1236 /* following is a mini hack by geo */
1237 #ifdef REGEX_MALLOC
1238 	#define DECL_DESTINATION_PLEASE /**/
1239 #else
1240 	#define DECL_DESTINATION_PLEASE  char *destination;
1241 #endif
1242 
1243 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)	\
1244   do {									\
1245     DECL_DESTINATION_PLEASE						\
1246     /* Must be int, so when we don't save any registers, the arithmetic	\
1247        of 0 + -1 isn't done as unsigned.  */				\
1248     /* Can't be int, since there is not a shred of a guarantee that int	\
1249        is wide enough to hold a value of something to which pointer can	\
1250        be assigned */							\
1251     s_reg_t this_reg;							\
1252     									\
1253     DEBUG_STATEMENT (failure_id++);					\
1254     DEBUG_STATEMENT (nfailure_points_pushed++);				\
1255     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);		\
1256     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1257     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1258 									\
1259     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);		\
1260     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);	\
1261 									\
1262     /* Ensure we have enough space allocated for what we will push.  */	\
1263     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)			\
1264       {									\
1265         if (!DOUBLE_FAIL_STACK (fail_stack))				\
1266           return failure_code;						\
1267 									\
1268         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",		\
1269 		       (fail_stack).size);				\
1270         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1271       }									\
1272 									\
1273     /* Push the info, starting with the registers.  */			\
1274     DEBUG_PRINT1 ("\n");						\
1275 									\
1276     if (1)								\
1277       for (this_reg = lowest_active_reg; this_reg <= (s_reg_t) highest_active_reg; \
1278 	   this_reg++)							\
1279 	{								\
1280 	  DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);		\
1281 	  DEBUG_STATEMENT (num_regs_pushed++);				\
1282 									\
1283 	  DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);	\
1284 	  PUSH_FAILURE_POINTER (regstart[this_reg]);			\
1285 									\
1286 	  DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);		\
1287 	  PUSH_FAILURE_POINTER (regend[this_reg]);			\
1288 									\
1289 	  DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);	\
1290 	  DEBUG_PRINT2 (" match_null=%d",				\
1291 			REG_MATCH_NULL_STRING_P (reg_info[this_reg]));	\
1292 	  DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));	\
1293 	  DEBUG_PRINT2 (" matched_something=%d",			\
1294 			MATCHED_SOMETHING (reg_info[this_reg]));	\
1295 	  DEBUG_PRINT2 (" ever_matched=%d",				\
1296 			EVER_MATCHED_SOMETHING (reg_info[this_reg]));	\
1297 	  DEBUG_PRINT1 ("\n");						\
1298 	  PUSH_FAILURE_ELT (reg_info[this_reg].word);			\
1299 	}								\
1300 									\
1301     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1302     PUSH_FAILURE_INT (lowest_active_reg);				\
1303 									\
1304     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1305     PUSH_FAILURE_INT (highest_active_reg);				\
1306 									\
1307     DEBUG_PRINT2 ("  Pushing pattern 0x%x:\n", pattern_place);		\
1308     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);		\
1309     PUSH_FAILURE_POINTER (pattern_place);				\
1310 									\
1311     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);		\
1312     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1313 				 size2);				\
1314     DEBUG_PRINT1 ("'\n");						\
1315     PUSH_FAILURE_POINTER (string_place);				\
1316 									\
1317     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);		\
1318     DEBUG_PUSH (failure_id);						\
1319   } while (0)
1320 
1321 /* This is the number of items that are pushed and popped on the stack
1322    for each register.  */
1323 #define NUM_REG_ITEMS  3
1324 
1325 /* Individual items aside from the registers.  */
1326 #ifdef DEBUG
1327 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1328 #else
1329 #define NUM_NONREG_ITEMS 4
1330 #endif
1331 
1332 /* We push at most this many items on the stack.  */
1333 /* We used to use (num_regs - 1), which is the number of registers
1334    this regexp will save; but that was changed to 5
1335    to avoid stack overflow for a regexp with lots of parens.  */
1336 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1337 
1338 /* We actually push this many items.  */
1339 #define NUM_FAILURE_ITEMS				\
1340   (((0							\
1341      ? 0 : highest_active_reg - lowest_active_reg + 1)	\
1342     * NUM_REG_ITEMS)					\
1343    + NUM_NONREG_ITEMS)
1344 
1345 /* How many items can still be added to the stack without overflowing it.  */
1346 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1347 
1348 
1349 /* Pops what PUSH_FAIL_STACK pushes.
1350 
1351    We restore into the parameters, all of which should be lvalues:
1352      STR -- the saved data position.
1353      PAT -- the saved pattern position.
1354      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1355      REGSTART, REGEND -- arrays of string positions.
1356      REG_INFO -- array of information about each subexpression.
1357 
1358    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1359    `pend', `string1', `size1', `string2', and `size2'.  */
1360 
1361 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1362 {									\
1363   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)			\
1364   s_reg_t this_reg;							\
1365   const unsigned char *string_temp;					\
1366 									\
1367   assert (!FAIL_STACK_EMPTY ());					\
1368 									\
1369   /* Remove failure points and point to how many regs pushed.  */	\
1370   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");				\
1371   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);	\
1372   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);	\
1373 									\
1374   assert (fail_stack.avail >= NUM_NONREG_ITEMS);			\
1375 									\
1376   DEBUG_POP (&failure_id);						\
1377   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);		\
1378 									\
1379   /* If the saved string location is NULL, it came from an		\
1380      on_failure_keep_string_jump opcode, and we want to throw away the	\
1381      saved NULL, thus retaining our current position in the string.  */	\
1382   string_temp = POP_FAILURE_POINTER ();					\
1383   if (string_temp != NULL)						\
1384     str = (const char *) string_temp;					\
1385 									\
1386   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);			\
1387   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);	\
1388   DEBUG_PRINT1 ("'\n");							\
1389 									\
1390   pat = (unsigned char *) POP_FAILURE_POINTER ();			\
1391   DEBUG_PRINT2 ("  Popping pattern 0x%x:\n", pat);			\
1392   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);			\
1393 									\
1394   /* Restore register info.  */						\
1395   high_reg = (active_reg_t) POP_FAILURE_INT ();				\
1396   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);		\
1397 									\
1398   low_reg = (active_reg_t) POP_FAILURE_INT ();				\
1399   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);		\
1400 									\
1401   if (1)								\
1402     for (this_reg = high_reg; this_reg >= (s_reg_t)low_reg; this_reg--)		\
1403       {									\
1404 	DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);		\
1405 									\
1406 	reg_info[this_reg].word = POP_FAILURE_ELT ();			\
1407 	DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);	\
1408 									\
1409 	regend[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1410 	DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);		\
1411 									\
1412 	regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();	\
1413 	DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);	\
1414       }									\
1415   else									\
1416     {									\
1417       for (this_reg = highest_active_reg; this_reg > (s_reg_t)high_reg; this_reg--) \
1418 	{								\
1419 	  reg_info[this_reg].word.integer = 0;				\
1420 	  regend[this_reg] = 0;						\
1421 	  regstart[this_reg] = 0;					\
1422 	}								\
1423       highest_active_reg = high_reg;					\
1424     }									\
1425 									\
1426   set_regs_matched_done = 0;						\
1427   DEBUG_STATEMENT (nfailure_points_popped++);				\
1428 } /* POP_FAILURE_POINT */
1429 
1430 
1431 
1432 /* Structure for per-register (a.k.a. per-group) information.
1433    Other register information, such as the
1434    starting and ending positions (which are addresses), and the list of
1435    inner groups (which is a bits list) are maintained in separate
1436    variables.
1437 
1438    We are making a (strictly speaking) nonportable assumption here: that
1439    the compiler will pack our bit fields into something that fits into
1440    the type of `word', i.e., is something that fits into one item on the
1441    failure stack.  */
1442 
1443 
1444 /* Declarations and macros for re_match_2.  */
1445 
1446 typedef union
1447 {
1448   fail_stack_elt_t word;
1449   struct
1450   {
1451       /* This field is one if this group can match the empty string,
1452          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1453 #define MATCH_NULL_UNSET_VALUE 3
1454     unsigned match_null_string_p : 2;
1455     unsigned is_active : 1;
1456     unsigned matched_something : 1;
1457     unsigned ever_matched_something : 1;
1458   } bits;
1459 } register_info_type;
1460 
1461 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1462 #define IS_ACTIVE(R)  ((R).bits.is_active)
1463 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1464 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1465 
1466 
1467 /* Call this when have matched a real character; it sets `matched' flags
1468    for the subexpressions which we are currently inside.  Also records
1469    that those subexprs have matched.  */
1470 #define SET_REGS_MATCHED()						\
1471   do									\
1472     {									\
1473       if (!set_regs_matched_done)					\
1474 	{								\
1475 	  active_reg_t r;						\
1476 	  set_regs_matched_done = 1;					\
1477 	  for (r = lowest_active_reg; r <= highest_active_reg; r++)	\
1478 	    {								\
1479 	      MATCHED_SOMETHING (reg_info[r])				\
1480 		= EVER_MATCHED_SOMETHING (reg_info[r])			\
1481 		= 1;							\
1482 	    }								\
1483 	}								\
1484     }									\
1485   while (0)
1486 
1487 /* Registers are set to a sentinel when they haven't yet matched.  */
1488 static char reg_unset_dummy;
1489 #define REG_UNSET_VALUE (&reg_unset_dummy)
1490 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1491 
1492 /* Subroutine declarations and macros for regex_compile.  */
1493 
1494 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1495 					      reg_syntax_t syntax,
1496 					      struct re_pattern_buffer *bufp));
1497 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1498 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1499 				 int arg1, int arg2));
1500 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1501 				  int arg, unsigned char *end));
1502 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1503 				  int arg1, int arg2, unsigned char *end));
1504 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1505 					   reg_syntax_t syntax));
1506 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1507 					   reg_syntax_t syntax));
1508 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1509 					      const char *pend,
1510 					      char *translate,
1511 					      reg_syntax_t syntax,
1512 					      unsigned char *b));
1513 
1514 /* Fetch the next character in the uncompiled pattern---translating it
1515    if necessary.  Also cast from a signed character in the constant
1516    string passed to us by the user to an unsigned char that we can use
1517    as an array index (in, e.g., `translate').  */
1518 #ifndef PATFETCH
1519 #define PATFETCH(c)							\
1520   do {if (p == pend) return REG_EEND;					\
1521     c = (unsigned char) *p++;						\
1522     if (translate) c = (unsigned char) translate[c];			\
1523   } while (0)
1524 #endif
1525 
1526 /* Fetch the next character in the uncompiled pattern, with no
1527    translation.  */
1528 #define PATFETCH_RAW(c)							\
1529   do {if (p == pend) return REG_EEND;					\
1530     c = (unsigned char) *p++; 						\
1531   } while (0)
1532 
1533 /* Go backwards one character in the pattern.  */
1534 #define PATUNFETCH p--
1535 
1536 
1537 /* If `translate' is non-null, return translate[D], else just D.  We
1538    cast the subscript to translate because some data is declared as
1539    `char *', to avoid warnings when a string constant is passed.  But
1540    when we use a character as a subscript we must make it unsigned.  */
1541 #ifndef TRANSLATE
1542 #define TRANSLATE(d) \
1543   (translate ? (char) translate[(unsigned char) (d)] : (d))
1544 #endif
1545 
1546 
1547 /* Macros for outputting the compiled pattern into `buffer'.  */
1548 
1549 /* If the buffer isn't allocated when it comes in, use this.  */
1550 #define INIT_BUF_SIZE  32
1551 
1552 /* Make sure we have at least N more bytes of space in buffer.  */
1553 #define GET_BUFFER_SPACE(n)						\
1554     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)	\
1555       EXTEND_BUFFER ()
1556 
1557 /* Make sure we have one more byte of buffer space and then add C to it.  */
1558 #define BUF_PUSH(c)							\
1559   do {									\
1560     GET_BUFFER_SPACE (1);						\
1561     *b++ = (unsigned char) (c);						\
1562   } while (0)
1563 
1564 
1565 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1566 #define BUF_PUSH_2(c1, c2)						\
1567   do {									\
1568     GET_BUFFER_SPACE (2);						\
1569     *b++ = (unsigned char) (c1);					\
1570     *b++ = (unsigned char) (c2);					\
1571   } while (0)
1572 
1573 
1574 /* As with BUF_PUSH_2, except for three bytes.  */
1575 #define BUF_PUSH_3(c1, c2, c3)						\
1576   do {									\
1577     GET_BUFFER_SPACE (3);						\
1578     *b++ = (unsigned char) (c1);					\
1579     *b++ = (unsigned char) (c2);					\
1580     *b++ = (unsigned char) (c3);					\
1581   } while (0)
1582 
1583 
1584 /* Store a jump with opcode OP at LOC to location TO.  We store a
1585    relative address offset by the three bytes the jump itself occupies.  */
1586 #define STORE_JUMP(op, loc, to) \
1587   store_op1 (op, loc, (int) ((to) - (loc) - 3))
1588 
1589 /* Likewise, for a two-argument jump.  */
1590 #define STORE_JUMP2(op, loc, to, arg) \
1591   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1592 
1593 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1594 #define INSERT_JUMP(op, loc, to) \
1595   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1596 
1597 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1598 #define INSERT_JUMP2(op, loc, to, arg) \
1599   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1600 
1601 
1602 /* This is not an arbitrary limit: the arguments which represent offsets
1603    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1604    be too small, many things would have to change.  */
1605 /* Any other compiler which, like MSC, has allocation limit below 2^16
1606    bytes will have to use approach similar to what was done below for
1607    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1608    reallocating to 0 bytes.  Such thing is not going to work too well.
1609    You have been warned!!  */
1610 #if defined(_MSC_VER)  && !defined(WIN32)
1611 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1612    The REALLOC define eliminates a flurry of conversion warnings,
1613    but is not required. */
1614 #define MAX_BUF_SIZE  65500L
1615 #define REALLOC(p,s) realloc ((p), (size_t) (s))
1616 #else
1617 #define MAX_BUF_SIZE (1L << 16)
1618 #define REALLOC(p,s) realloc ((p), (s))
1619 #endif
1620 
1621 /* Extend the buffer by twice its current size via realloc and
1622    reset the pointers that pointed into the old block to point to the
1623    correct places in the new one.  If extending the buffer results in it
1624    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1625 #define EXTEND_BUFFER()							\
1626   do { 									\
1627     unsigned char *old_buffer = bufp->buffer;				\
1628     if (bufp->allocated == MAX_BUF_SIZE) 				\
1629       return REG_ESIZE;							\
1630     bufp->allocated <<= 1;						\
1631     if (bufp->allocated > MAX_BUF_SIZE)					\
1632       bufp->allocated = MAX_BUF_SIZE; 					\
1633     bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1634     if (bufp->buffer == NULL)						\
1635       return REG_ESPACE;						\
1636     /* If the buffer moved, move all the pointers into it.  */		\
1637     if (old_buffer != bufp->buffer)					\
1638       {									\
1639         b = (b - old_buffer) + bufp->buffer;				\
1640         begalt = (begalt - old_buffer) + bufp->buffer;			\
1641         if (fixup_alt_jump)						\
1642           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1643         if (laststart)							\
1644           laststart = (laststart - old_buffer) + bufp->buffer;		\
1645         if (pending_exact)						\
1646           pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
1647       }									\
1648   } while (0)
1649 
1650 
1651 /* Since we have one byte reserved for the register number argument to
1652    {start,stop}_memory, the maximum number of groups we can report
1653    things about is what fits in that byte.  */
1654 #define MAX_REGNUM 255
1655 
1656 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1657    ignore the excess.  */
1658 typedef unsigned regnum_t;
1659 
1660 
1661 /* Macros for the compile stack.  */
1662 
1663 /* Since offsets can go either forwards or backwards, this type needs to
1664    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1665 /* int may be not enough when sizeof(int) == 2.  */
1666 typedef long pattern_offset_t;
1667 
1668 typedef struct
1669 {
1670   pattern_offset_t begalt_offset;
1671   pattern_offset_t fixup_alt_jump;
1672   pattern_offset_t inner_group_offset;
1673   pattern_offset_t laststart_offset;
1674   regnum_t regnum;
1675 } compile_stack_elt_t;
1676 
1677 
1678 typedef struct
1679 {
1680   compile_stack_elt_t *stack;
1681   unsigned size;
1682   unsigned avail;			/* Offset of next open position.  */
1683 } compile_stack_type;
1684 
1685 
1686 #define INIT_COMPILE_STACK_SIZE 32
1687 
1688 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1689 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1690 
1691 /* The next available element.  */
1692 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1693 
1694 
1695 /* Set the bit for character C in a list.  */
1696 #define SET_LIST_BIT(c)                               \
1697   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1698    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1699 
1700 
1701 /* Get the next unsigned number in the uncompiled pattern.  */
1702 #define GET_UNSIGNED_NUMBER(num) 					\
1703   { if (p != pend)							\
1704      {									\
1705        PATFETCH (c); 							\
1706        while (ISDIGIT (c)) 						\
1707          { 								\
1708            if (num < 0)							\
1709               num = 0;							\
1710            num = num * 10 + c - '0'; 					\
1711            if (p == pend) 						\
1712               break; 							\
1713            PATFETCH (c);						\
1714          } 								\
1715        } 								\
1716     }
1717 
1718 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1719 /* The GNU C library provides support for user-defined character classes
1720    and the functions from ISO C amendement 1.  */
1721 # ifdef CHARCLASS_NAME_MAX
1722 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1723 # else
1724 /* This shouldn't happen but some implementation might still have this
1725    problem.  Use a reasonable default value.  */
1726 #  define CHAR_CLASS_MAX_LENGTH 256
1727 # endif
1728 
1729 # define IS_CHAR_CLASS(string) wctype (string)
1730 #else
1731 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1732 
1733 # define IS_CHAR_CLASS(string)						\
1734    (STREQ (string, "alpha") || STREQ (string, "upper")			\
1735     || STREQ (string, "lower") || STREQ (string, "digit")		\
1736     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
1737     || STREQ (string, "space") || STREQ (string, "print")		\
1738     || STREQ (string, "punct") || STREQ (string, "graph")		\
1739     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1740 #endif
1741 
1742 #ifndef MATCH_MAY_ALLOCATE
1743 
1744 /* If we cannot allocate large objects within re_match_2_internal,
1745    we make the fail stack and register vectors global.
1746    The fail stack, we grow to the maximum size when a regexp
1747    is compiled.
1748    The register vectors, we adjust in size each time we
1749    compile a regexp, according to the number of registers it needs.  */
1750 
1751 static fail_stack_type fail_stack;
1752 
1753 /* Size with which the following vectors are currently allocated.
1754    That is so we can make them bigger as needed,
1755    but never make them smaller.  */
1756 static int regs_allocated_size;
1757 
1758 static const char **     regstart, **     regend;
1759 static const char ** old_regstart, ** old_regend;
1760 static const char **best_regstart, **best_regend;
1761 static register_info_type *reg_info;
1762 static const char **reg_dummy;
1763 static register_info_type *reg_info_dummy;
1764 
1765 /* Make the register vectors big enough for NUM_REGS registers,
1766    but don't make them smaller.  */
1767 
1768 static
regex_grow_registers(num_regs)1769 regex_grow_registers (num_regs)
1770      int num_regs;
1771 {
1772   if (num_regs > regs_allocated_size)
1773     {
1774       RETALLOC_IF (regstart,	 num_regs, const char *);
1775       RETALLOC_IF (regend,	 num_regs, const char *);
1776       RETALLOC_IF (old_regstart, num_regs, const char *);
1777       RETALLOC_IF (old_regend,	 num_regs, const char *);
1778       RETALLOC_IF (best_regstart, num_regs, const char *);
1779       RETALLOC_IF (best_regend,	 num_regs, const char *);
1780       RETALLOC_IF (reg_info,	 num_regs, register_info_type);
1781       RETALLOC_IF (reg_dummy,	 num_regs, const char *);
1782       RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1783 
1784       regs_allocated_size = num_regs;
1785     }
1786 }
1787 
1788 #endif /* not MATCH_MAY_ALLOCATE */
1789 
1790 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1791 						 compile_stack,
1792 						 regnum_t regnum));
1793 
1794 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1795    Returns one of error codes defined in `rx.h', or zero for success.
1796 
1797    Assumes the `allocated' (and perhaps `buffer') and `translate'
1798    fields are set in BUFP on entry.
1799 
1800    If it succeeds, results are put in BUFP (if it returns an error, the
1801    contents of BUFP are undefined):
1802      `buffer' is the compiled pattern;
1803      `syntax' is set to SYNTAX;
1804      `used' is set to the length of the compiled pattern;
1805      `fastmap_accurate' is zero;
1806      `re_nsub' is the number of subexpressions in PATTERN;
1807      `not_bol' and `not_eol' are zero;
1808 
1809    The `fastmap' and `newline_anchor' fields are neither
1810    examined nor set.  */
1811 
1812 /* Return, freeing storage we allocated.  */
1813 #define FREE_STACK_RETURN(value)		\
1814   return (free (compile_stack.stack), value)
1815 
1816 static reg_errcode_t
regex_compile(pattern,size,syntax,bufp)1817 regex_compile (pattern, size, syntax, bufp)
1818      const char *pattern;
1819      size_t size;
1820      reg_syntax_t syntax;
1821      struct re_pattern_buffer *bufp;
1822 {
1823   /* We fetch characters from PATTERN here.  Even though PATTERN is
1824      `char *' (i.e., signed), we declare these variables as unsigned, so
1825      they can be reliably used as array indices.  */
1826   register unsigned char c, c1;
1827 
1828   /* A random temporary spot in PATTERN.  */
1829   const char *p1;
1830 
1831   /* Points to the end of the buffer, where we should append.  */
1832   register unsigned char *b;
1833 
1834   /* Keeps track of unclosed groups.  */
1835   compile_stack_type compile_stack;
1836 
1837   /* Points to the current (ending) position in the pattern.  */
1838   const char *p = pattern;
1839   const char *pend = pattern + size;
1840 
1841   /* How to translate the characters in the pattern.  */
1842   RE_TRANSLATE_TYPE translate = bufp->translate;
1843 
1844   /* Address of the count-byte of the most recently inserted `exactn'
1845      command.  This makes it possible to tell if a new exact-match
1846      character can be added to that command or if the character requires
1847      a new `exactn' command.  */
1848   unsigned char *pending_exact = 0;
1849 
1850   /* Address of start of the most recently finished expression.
1851      This tells, e.g., postfix * where to find the start of its
1852      operand.  Reset at the beginning of groups and alternatives.  */
1853   unsigned char *laststart = 0;
1854 
1855   /* Address of beginning of regexp, or inside of last group.  */
1856   unsigned char *begalt;
1857 
1858   /* Place in the uncompiled pattern (i.e., the {) to
1859      which to go back if the interval is invalid.  */
1860   const char *beg_interval;
1861 
1862   /* Address of the place where a forward jump should go to the end of
1863      the containing expression.  Each alternative of an `or' -- except the
1864      last -- ends with a forward jump of this sort.  */
1865   unsigned char *fixup_alt_jump = 0;
1866 
1867   /* Counts open-groups as they are encountered.  Remembered for the
1868      matching close-group on the compile stack, so the same register
1869      number is put in the stop_memory as the start_memory.  */
1870   regnum_t regnum = 0;
1871 
1872 #ifdef DEBUG
1873   DEBUG_PRINT1 ("\nCompiling pattern: ");
1874   if (debug)
1875     {
1876       unsigned debug_count;
1877 
1878       for (debug_count = 0; debug_count < size; debug_count++)
1879         putchar (pattern[debug_count]);
1880       putchar ('\n');
1881     }
1882 #endif /* DEBUG */
1883 
1884   /* Initialize the compile stack.  */
1885   compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1886   if (compile_stack.stack == NULL)
1887     return REG_ESPACE;
1888 
1889   compile_stack.size = INIT_COMPILE_STACK_SIZE;
1890   compile_stack.avail = 0;
1891 
1892   /* Initialize the pattern buffer.  */
1893   bufp->syntax = syntax;
1894   bufp->fastmap_accurate = 0;
1895   bufp->not_bol = bufp->not_eol = 0;
1896 
1897   /* Set `used' to zero, so that if we return an error, the pattern
1898      printer (for debugging) will think there's no pattern.  We reset it
1899      at the end.  */
1900   bufp->used = 0;
1901 
1902   /* Always count groups, whether or not bufp->no_sub is set.  */
1903   bufp->re_nsub = 0;
1904 
1905 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1906   /* Initialize the syntax table.  */
1907    init_syntax_once ();
1908 #endif
1909 
1910   if (bufp->allocated == 0)
1911     {
1912       if (bufp->buffer)
1913 	{ /* If zero allocated, but buffer is non-null, try to realloc
1914              enough space.  This loses if buffer's address is bogus, but
1915              that is the user's responsibility.  */
1916           RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1917         }
1918       else
1919         { /* Caller did not allocate a buffer.  Do it for them.  */
1920           bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1921         }
1922       if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1923 
1924       bufp->allocated = INIT_BUF_SIZE;
1925     }
1926 
1927   begalt = b = bufp->buffer;
1928 
1929   /* Loop through the uncompiled pattern until we're at the end.  */
1930   while (p != pend)
1931     {
1932       PATFETCH (c);
1933 
1934       switch (c)
1935         {
1936         case '^':
1937           {
1938             if (   /* If at start of pattern, it's an operator.  */
1939                    p == pattern + 1
1940                    /* If context independent, it's an operator.  */
1941                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1942                    /* Otherwise, depends on what's come before.  */
1943                 || at_begline_loc_p (pattern, p, syntax))
1944               BUF_PUSH (begline);
1945             else
1946               goto normal_char;
1947           }
1948           break;
1949 
1950 
1951         case '$':
1952           {
1953             if (   /* If at end of pattern, it's an operator.  */
1954                    p == pend
1955                    /* If context independent, it's an operator.  */
1956                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1957                    /* Otherwise, depends on what's next.  */
1958                 || at_endline_loc_p (p, pend, syntax))
1959                BUF_PUSH (endline);
1960              else
1961                goto normal_char;
1962            }
1963            break;
1964 
1965 
1966 	case '+':
1967         case '?':
1968           if ((syntax & RE_BK_PLUS_QM)
1969               || (syntax & RE_LIMITED_OPS))
1970             goto normal_char;
1971         handle_plus:
1972         case '*':
1973           /* If there is no previous pattern... */
1974           if (!laststart)
1975             {
1976               if (syntax & RE_CONTEXT_INVALID_OPS)
1977                 FREE_STACK_RETURN (REG_BADRPT);
1978               else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1979                 goto normal_char;
1980             }
1981 
1982           {
1983             /* Are we optimizing this jump?  */
1984             boolean keep_string_p = false;
1985 
1986             /* 1 means zero (many) matches is allowed.  */
1987             char zero_times_ok = 0, many_times_ok = 0;
1988 
1989             /* If there is a sequence of repetition chars, collapse it
1990                down to just one (the right one).  We can't combine
1991                interval operators with these because of, e.g., `a{2}*',
1992                which should only match an even number of `a's.  */
1993 
1994             for (;;)
1995               {
1996                 zero_times_ok |= c != '+';
1997                 many_times_ok |= c != '?';
1998 
1999                 if (p == pend)
2000                   break;
2001 
2002                 PATFETCH (c);
2003 
2004                 if (c == '*'
2005                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
2006                   ;
2007 
2008                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
2009                   {
2010                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2011 
2012                     PATFETCH (c1);
2013                     if (!(c1 == '+' || c1 == '?'))
2014                       {
2015                         PATUNFETCH;
2016                         PATUNFETCH;
2017                         break;
2018                       }
2019 
2020                     c = c1;
2021                   }
2022                 else
2023                   {
2024                     PATUNFETCH;
2025                     break;
2026                   }
2027 
2028                 /* If we get here, we found another repeat character.  */
2029                }
2030 
2031             /* Star, etc. applied to an empty pattern is equivalent
2032                to an empty pattern.  */
2033             if (!laststart)
2034               break;
2035 
2036             /* Now we know whether or not zero matches is allowed
2037                and also whether or not two or more matches is allowed.  */
2038             if (many_times_ok)
2039               { /* More than one repetition is allowed, so put in at the
2040                    end a backward relative jump from `b' to before the next
2041                    jump we're going to put in below (which jumps from
2042                    laststart to after this jump).
2043 
2044                    But if we are at the `*' in the exact sequence `.*\n',
2045                    insert an unconditional jump backwards to the .,
2046                    instead of the beginning of the loop.  This way we only
2047                    push a failure point once, instead of every time
2048                    through the loop.  */
2049                 assert (p - 1 > pattern);
2050 
2051                 /* Allocate the space for the jump.  */
2052                 GET_BUFFER_SPACE (3);
2053 
2054                 /* We know we are not at the first character of the pattern,
2055                    because laststart was nonzero.  And we've already
2056                    incremented `p', by the way, to be the character after
2057                    the `*'.  Do we have to do something analogous here
2058                    for null bytes, because of RE_DOT_NOT_NULL?  */
2059                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2060 		    && zero_times_ok
2061                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2062                     && !(syntax & RE_DOT_NEWLINE))
2063                   { /* We have .*\n.  */
2064                     STORE_JUMP (jump, b, laststart);
2065                     keep_string_p = true;
2066                   }
2067                 else
2068                   /* Anything else.  */
2069                   STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2070 
2071                 /* We've added more stuff to the buffer.  */
2072                 b += 3;
2073               }
2074 
2075             /* On failure, jump from laststart to b + 3, which will be the
2076                end of the buffer after this jump is inserted.  */
2077             GET_BUFFER_SPACE (3);
2078             INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2079                                        : on_failure_jump,
2080                          laststart, b + 3);
2081             pending_exact = 0;
2082             b += 3;
2083 
2084             if (!zero_times_ok)
2085               {
2086                 /* At least one repetition is required, so insert a
2087                    `dummy_failure_jump' before the initial
2088                    `on_failure_jump' instruction of the loop. This
2089                    effects a skip over that instruction the first time
2090                    we hit that loop.  */
2091                 GET_BUFFER_SPACE (3);
2092                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2093                 b += 3;
2094               }
2095             }
2096 	  break;
2097 
2098 
2099 	case '.':
2100           laststart = b;
2101           BUF_PUSH (anychar);
2102           break;
2103 
2104 
2105         case '[':
2106           {
2107             boolean had_char_class = false;
2108 
2109             if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2110 
2111             /* Ensure that we have enough space to push a charset: the
2112                opcode, the length count, and the bitset; 34 bytes in all.  */
2113 	    GET_BUFFER_SPACE (34);
2114 
2115             laststart = b;
2116 
2117             /* We test `*p == '^' twice, instead of using an if
2118                statement, so we only need one BUF_PUSH.  */
2119             BUF_PUSH (*p == '^' ? charset_not : charset);
2120             if (*p == '^')
2121               p++;
2122 
2123             /* Remember the first position in the bracket expression.  */
2124             p1 = p;
2125 
2126             /* Push the number of bytes in the bitmap.  */
2127             BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2128 
2129             /* Clear the whole map.  */
2130             bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2131 
2132             /* charset_not matches newline according to a syntax bit.  */
2133             if ((re_opcode_t) b[-2] == charset_not
2134                 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2135               SET_LIST_BIT ('\n');
2136 
2137             /* Read in characters and ranges, setting map bits.  */
2138             for (;;)
2139               {
2140                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2141 
2142                 PATFETCH (c);
2143 
2144                 /* \ might escape characters inside [...] and [^...].  */
2145                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2146                   {
2147                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2148 
2149                     PATFETCH (c1);
2150                     SET_LIST_BIT (c1);
2151                     continue;
2152                   }
2153 
2154                 /* Could be the end of the bracket expression.  If it's
2155                    not (i.e., when the bracket expression is `[]' so
2156                    far), the ']' character bit gets set way below.  */
2157                 if (c == ']' && p != p1 + 1)
2158                   break;
2159 
2160                 /* Look ahead to see if it's a range when the last thing
2161                    was a character class.  */
2162                 if (had_char_class && c == '-' && *p != ']')
2163                   FREE_STACK_RETURN (REG_ERANGE);
2164 
2165                 /* Look ahead to see if it's a range when the last thing
2166                    was a character: if this is a hyphen not at the
2167                    beginning or the end of a list, then it's the range
2168                    operator.  */
2169                 if (c == '-'
2170                     && !(p - 2 >= pattern && p[-2] == '[')
2171                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2172                     && *p != ']')
2173                   {
2174                     reg_errcode_t ret
2175                       = compile_range (&p, pend, translate, syntax, b);
2176                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2177                   }
2178 
2179                 else if (p[0] == '-' && p[1] != ']')
2180                   { /* This handles ranges made up of characters only.  */
2181                     reg_errcode_t ret;
2182 
2183 		    /* Move past the `-'.  */
2184                     PATFETCH (c1);
2185 
2186                     ret = compile_range (&p, pend, translate, syntax, b);
2187                     if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2188                   }
2189 
2190                 /* See if we're at the beginning of a possible character
2191                    class.  */
2192 
2193                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2194                   { /* Leave room for the null.  */
2195                     char str[CHAR_CLASS_MAX_LENGTH + 1];
2196 
2197                     PATFETCH (c);
2198                     c1 = 0;
2199 
2200                     /* If pattern is `[[:'.  */
2201                     if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2202 
2203                     for (;;)
2204                       {
2205                         PATFETCH (c);
2206                         if (c == ':' || c == ']' || p == pend
2207                             || c1 == CHAR_CLASS_MAX_LENGTH)
2208                           break;
2209                         str[c1++] = c;
2210                       }
2211                     str[c1] = '\0';
2212 
2213                     /* If isn't a word bracketed by `[:' and:`]':
2214                        undo the ending character, the letters, and leave
2215                        the leading `:' and `[' (but set bits for them).  */
2216                     if (c == ':' && *p == ']')
2217                       {
2218 #ifdef BROKEN_LOCALE
2219 			FREE_STACK_RETURN(REG_ECTYPE);
2220 #else
2221 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
2222                         boolean is_lower = STREQ (str, "lower");
2223                         boolean is_upper = STREQ (str, "upper");
2224 			wctype_t wt;
2225                         int ch;
2226 
2227 			wt = wctype (str);
2228 			if (wt == 0)
2229 			  FREE_STACK_RETURN (REG_ECTYPE);
2230 
2231                         /* Throw away the ] at the end of the character
2232                            class.  */
2233                         PATFETCH (c);
2234 
2235                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2236 
2237                         for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2238 			  {
2239 			    if (iswctype (btowc (ch), wt))
2240 			      SET_LIST_BIT (ch);
2241 
2242 			    if (translate && (is_upper || is_lower)
2243 				&& (ISUPPER (ch) || ISLOWER (ch)))
2244 			      SET_LIST_BIT (ch);
2245 			  }
2246 
2247                         had_char_class = true;
2248 #else
2249                         int ch;
2250                         boolean is_alnum = STREQ (str, "alnum");
2251                         boolean is_alpha = STREQ (str, "alpha");
2252                         boolean is_blank = STREQ (str, "blank");
2253                         boolean is_cntrl = STREQ (str, "cntrl");
2254                         boolean is_digit = STREQ (str, "digit");
2255                         boolean is_graph = STREQ (str, "graph");
2256                         boolean is_lower = STREQ (str, "lower");
2257                         boolean is_print = STREQ (str, "print");
2258                         boolean is_punct = STREQ (str, "punct");
2259                         boolean is_space = STREQ (str, "space");
2260                         boolean is_upper = STREQ (str, "upper");
2261                         boolean is_xdigit = STREQ (str, "xdigit");
2262 
2263                         if (!IS_CHAR_CLASS (str))
2264 			  FREE_STACK_RETURN (REG_ECTYPE);
2265 
2266                         /* Throw away the ] at the end of the character
2267                            class.  */
2268                         PATFETCH (c);
2269 
2270                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2271 
2272                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2273                           {
2274 			    /* This was split into 3 if's to
2275 			       avoid an arbitrary limit in some compiler.  */
2276                             if (   (is_alnum  && ISALNUM (ch))
2277                                 || (is_alpha  && ISALPHA (ch))
2278                                 || (is_blank  && ISBLANK (ch))
2279                                 || (is_cntrl  && ISCNTRL (ch)))
2280 			      SET_LIST_BIT (ch);
2281 			    if (   (is_digit  && ISDIGIT (ch))
2282                                 || (is_graph  && ISGRAPH (ch))
2283                                 || (is_lower  && ISLOWER (ch))
2284                                 || (is_print  && ISPRINT (ch)))
2285 			      SET_LIST_BIT (ch);
2286 			    if (   (is_punct  && ISPUNCT (ch))
2287                                 || (is_space  && ISSPACE (ch))
2288                                 || (is_upper  && ISUPPER (ch))
2289                                 || (is_xdigit && ISXDIGIT (ch)))
2290 			      SET_LIST_BIT (ch);
2291 			    if (   translate && (is_upper || is_lower)
2292 				&& (ISUPPER (ch) || ISLOWER (ch)))
2293 			      SET_LIST_BIT (ch);
2294                           }
2295                         had_char_class = true;
2296 #endif	/* libc || wctype.h */
2297 #endif  /* BROKEN_LOCALE */
2298                       }
2299                     else
2300                       {
2301                         c1++;
2302                         while (c1--)
2303                           PATUNFETCH;
2304                         SET_LIST_BIT ('[');
2305                         SET_LIST_BIT (':');
2306                         had_char_class = false;
2307                       }
2308                   }
2309                 else
2310                   {
2311                     had_char_class = false;
2312                     SET_LIST_BIT (c);
2313                   }
2314               }
2315 
2316             /* Discard any (non)matching list bytes that are all 0 at the
2317                end of the map.  Decrease the map-length byte too.  */
2318             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2319               b[-1]--;
2320             b += b[-1];
2321           }
2322           break;
2323 
2324 
2325 	case '(':
2326           if (syntax & RE_NO_BK_PARENS)
2327             goto handle_open;
2328           else
2329             goto normal_char;
2330 
2331 
2332         case ')':
2333           if (syntax & RE_NO_BK_PARENS)
2334             goto handle_close;
2335           else
2336             goto normal_char;
2337 
2338 
2339         case '\n':
2340           if (syntax & RE_NEWLINE_ALT)
2341             goto handle_alt;
2342           else
2343             goto normal_char;
2344 
2345 
2346 	case '|':
2347           if (syntax & RE_NO_BK_VBAR)
2348             goto handle_alt;
2349           else
2350             goto normal_char;
2351 
2352 
2353         case '{':
2354            if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2355              goto handle_interval;
2356            else
2357              goto normal_char;
2358 
2359 
2360         case '\\':
2361           if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2362 
2363           /* Do not translate the character after the \, so that we can
2364              distinguish, e.g., \B from \b, even if we normally would
2365              translate, e.g., B to b.  */
2366           PATFETCH_RAW (c);
2367 
2368           switch (c)
2369             {
2370             case '(':
2371               if (syntax & RE_NO_BK_PARENS)
2372                 goto normal_backslash;
2373 
2374             handle_open:
2375               bufp->re_nsub++;
2376               regnum++;
2377 
2378               if (COMPILE_STACK_FULL)
2379                 {
2380                   RETALLOC (compile_stack.stack, compile_stack.size << 1,
2381                             compile_stack_elt_t);
2382                   if (compile_stack.stack == NULL) return REG_ESPACE;
2383 
2384                   compile_stack.size <<= 1;
2385                 }
2386 
2387               /* These are the values to restore when we hit end of this
2388                  group.  They are all relative offsets, so that if the
2389                  whole pattern moves because of realloc, they will still
2390                  be valid.  */
2391               COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2392               COMPILE_STACK_TOP.fixup_alt_jump
2393                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2394               COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2395               COMPILE_STACK_TOP.regnum = regnum;
2396 
2397               /* We will eventually replace the 0 with the number of
2398                  groups inner to this one.  But do not push a
2399                  start_memory for groups beyond the last one we can
2400                  represent in the compiled pattern.  */
2401               if (regnum <= MAX_REGNUM)
2402                 {
2403                   COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2404                   BUF_PUSH_3 (start_memory, regnum, 0);
2405                 }
2406 
2407               compile_stack.avail++;
2408 
2409               fixup_alt_jump = 0;
2410               laststart = 0;
2411               begalt = b;
2412 	      /* If we've reached MAX_REGNUM groups, then this open
2413 		 won't actually generate any code, so we'll have to
2414 		 clear pending_exact explicitly.  */
2415 	      pending_exact = 0;
2416               break;
2417 
2418 
2419             case ')':
2420               if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2421 
2422               if (COMPILE_STACK_EMPTY)
2423                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2424                   goto normal_backslash;
2425                 else
2426                   FREE_STACK_RETURN (REG_ERPAREN);
2427 
2428             handle_close:
2429               if (fixup_alt_jump)
2430                 { /* Push a dummy failure point at the end of the
2431                      alternative for a possible future
2432                      `pop_failure_jump' to pop.  See comments at
2433                      `push_dummy_failure' in `re_match_2'.  */
2434                   BUF_PUSH (push_dummy_failure);
2435 
2436                   /* We allocated space for this jump when we assigned
2437                      to `fixup_alt_jump', in the `handle_alt' case below.  */
2438                   STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2439                 }
2440 
2441               /* See similar code for backslashed left paren above.  */
2442               if (COMPILE_STACK_EMPTY)
2443                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2444                   goto normal_char;
2445                 else
2446                   FREE_STACK_RETURN (REG_ERPAREN);
2447 
2448               /* Since we just checked for an empty stack above, this
2449                  ``can't happen''.  */
2450               assert (compile_stack.avail != 0);
2451               {
2452                 /* We don't just want to restore into `regnum', because
2453                    later groups should continue to be numbered higher,
2454                    as in `(ab)c(de)' -- the second group is #2.  */
2455                 regnum_t this_group_regnum;
2456 
2457                 compile_stack.avail--;
2458                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2459                 fixup_alt_jump
2460                   = COMPILE_STACK_TOP.fixup_alt_jump
2461                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2462                     : 0;
2463                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2464                 this_group_regnum = COMPILE_STACK_TOP.regnum;
2465 		/* If we've reached MAX_REGNUM groups, then this open
2466 		   won't actually generate any code, so we'll have to
2467 		   clear pending_exact explicitly.  */
2468 		pending_exact = 0;
2469 
2470                 /* We're at the end of the group, so now we know how many
2471                    groups were inside this one.  */
2472                 if (this_group_regnum <= MAX_REGNUM)
2473                   {
2474                     unsigned char *inner_group_loc
2475                       = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2476 
2477                     *inner_group_loc = regnum - this_group_regnum;
2478                     BUF_PUSH_3 (stop_memory, this_group_regnum,
2479                                 regnum - this_group_regnum);
2480                   }
2481               }
2482               break;
2483 
2484 
2485             case '|':					/* `\|'.  */
2486               if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2487                 goto normal_backslash;
2488             handle_alt:
2489               if (syntax & RE_LIMITED_OPS)
2490                 goto normal_char;
2491 
2492               /* Insert before the previous alternative a jump which
2493                  jumps to this alternative if the former fails.  */
2494               GET_BUFFER_SPACE (3);
2495               INSERT_JUMP (on_failure_jump, begalt, b + 6);
2496               pending_exact = 0;
2497               b += 3;
2498 
2499               /* The alternative before this one has a jump after it
2500                  which gets executed if it gets matched.  Adjust that
2501                  jump so it will jump to this alternative's analogous
2502                  jump (put in below, which in turn will jump to the next
2503                  (if any) alternative's such jump, etc.).  The last such
2504                  jump jumps to the correct final destination.  A picture:
2505                           _____ _____
2506                           |   | |   |
2507                           |   v |   v
2508                          a | b   | c
2509 
2510                  If we are at `b', then fixup_alt_jump right now points to a
2511                  three-byte space after `a'.  We'll put in the jump, set
2512                  fixup_alt_jump to right after `b', and leave behind three
2513                  bytes which we'll fill in when we get to after `c'.  */
2514 
2515               if (fixup_alt_jump)
2516                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2517 
2518               /* Mark and leave space for a jump after this alternative,
2519                  to be filled in later either by next alternative or
2520                  when know we're at the end of a series of alternatives.  */
2521               fixup_alt_jump = b;
2522               GET_BUFFER_SPACE (3);
2523               b += 3;
2524 
2525               laststart = 0;
2526               begalt = b;
2527               break;
2528 
2529 
2530             case '{':
2531               /* If \{ is a literal.  */
2532               if (!(syntax & RE_INTERVALS)
2533                      /* If we're at `\{' and it's not the open-interval
2534                         operator.  */
2535                   || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2536                   || (p - 2 == pattern  &&  p == pend))
2537                 goto normal_backslash;
2538 
2539             handle_interval:
2540               {
2541                 /* If got here, then the syntax allows intervals.  */
2542 
2543                 /* At least (most) this many matches must be made.  */
2544                 int lower_bound = -1, upper_bound = -1;
2545 
2546                 beg_interval = p - 1;
2547 
2548                 if (p == pend)
2549                   {
2550                     if (syntax & RE_NO_BK_BRACES)
2551                       goto unfetch_interval;
2552                     else
2553                       FREE_STACK_RETURN (REG_EBRACE);
2554                   }
2555 
2556                 GET_UNSIGNED_NUMBER (lower_bound);
2557 
2558                 if (c == ',')
2559                   {
2560                     GET_UNSIGNED_NUMBER (upper_bound);
2561                     if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2562                   }
2563                 else
2564                   /* Interval such as `{1}' => match exactly once. */
2565                   upper_bound = lower_bound;
2566 
2567                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2568                     || lower_bound > upper_bound)
2569                   {
2570                     if (syntax & RE_NO_BK_BRACES)
2571                       goto unfetch_interval;
2572                     else
2573                       FREE_STACK_RETURN (REG_BADBR);
2574                   }
2575 
2576                 if (!(syntax & RE_NO_BK_BRACES))
2577                   {
2578                     if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2579 
2580                     PATFETCH (c);
2581                   }
2582 
2583                 if (c != '}')
2584                   {
2585                     if (syntax & RE_NO_BK_BRACES)
2586                       goto unfetch_interval;
2587                     else
2588                       FREE_STACK_RETURN (REG_BADBR);
2589                   }
2590 
2591                 /* We just parsed a valid interval.  */
2592 
2593                 /* If it's invalid to have no preceding re.  */
2594                 if (!laststart)
2595                   {
2596                     if (syntax & RE_CONTEXT_INVALID_OPS)
2597                       FREE_STACK_RETURN (REG_BADRPT);
2598                     else if (syntax & RE_CONTEXT_INDEP_OPS)
2599                       laststart = b;
2600                     else
2601                       goto unfetch_interval;
2602                   }
2603 
2604                 /* If the upper bound is zero, don't want to succeed at
2605                    all; jump from `laststart' to `b + 3', which will be
2606                    the end of the buffer after we insert the jump.  */
2607                  if (upper_bound == 0)
2608                    {
2609                      GET_BUFFER_SPACE (3);
2610                      INSERT_JUMP (jump, laststart, b + 3);
2611                      b += 3;
2612                    }
2613 
2614                  /* Otherwise, we have a nontrivial interval.  When
2615                     we're all done, the pattern will look like:
2616                       set_number_at <jump count> <upper bound>
2617                       set_number_at <succeed_n count> <lower bound>
2618                       succeed_n <after jump addr> <succeed_n count>
2619                       <body of loop>
2620                       jump_n <succeed_n addr> <jump count>
2621                     (The upper bound and `jump_n' are omitted if
2622                     `upper_bound' is 1, though.)  */
2623                  else
2624                    { /* If the upper bound is > 1, we need to insert
2625                         more at the end of the loop.  */
2626                      unsigned nbytes = 10 + (upper_bound > 1) * 10;
2627 
2628                      GET_BUFFER_SPACE (nbytes);
2629 
2630                      /* Initialize lower bound of the `succeed_n', even
2631                         though it will be set during matching by its
2632                         attendant `set_number_at' (inserted next),
2633                         because `re_compile_fastmap' needs to know.
2634                         Jump to the `jump_n' we might insert below.  */
2635                      INSERT_JUMP2 (succeed_n, laststart,
2636                                    b + 5 + (upper_bound > 1) * 5,
2637                                    lower_bound);
2638                      b += 5;
2639 
2640                      /* Code to initialize the lower bound.  Insert
2641                         before the `succeed_n'.  The `5' is the last two
2642                         bytes of this `set_number_at', plus 3 bytes of
2643                         the following `succeed_n'.  */
2644                      insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2645                      b += 5;
2646 
2647                      if (upper_bound > 1)
2648                        { /* More than one repetition is allowed, so
2649                             append a backward jump to the `succeed_n'
2650                             that starts this interval.
2651 
2652                             When we've reached this during matching,
2653                             we'll have matched the interval once, so
2654                             jump back only `upper_bound - 1' times.  */
2655                          STORE_JUMP2 (jump_n, b, laststart + 5,
2656                                       upper_bound - 1);
2657                          b += 5;
2658 
2659                          /* The location we want to set is the second
2660                             parameter of the `jump_n'; that is `b-2' as
2661                             an absolute address.  `laststart' will be
2662                             the `set_number_at' we're about to insert;
2663                             `laststart+3' the number to set, the source
2664                             for the relative address.  But we are
2665                             inserting into the middle of the pattern --
2666                             so everything is getting moved up by 5.
2667                             Conclusion: (b - 2) - (laststart + 3) + 5,
2668                             i.e., b - laststart.
2669 
2670                             We insert this at the beginning of the loop
2671                             so that if we fail during matching, we'll
2672                             reinitialize the bounds.  */
2673                          insert_op2 (set_number_at, laststart, b - laststart,
2674                                      upper_bound - 1, b);
2675                          b += 5;
2676                        }
2677                    }
2678                 pending_exact = 0;
2679                 beg_interval = NULL;
2680               }
2681               break;
2682 
2683             unfetch_interval:
2684               /* If an invalid interval, match the characters as literals.  */
2685                assert (beg_interval);
2686                p = beg_interval;
2687                beg_interval = NULL;
2688 
2689                /* normal_char and normal_backslash need `c'.  */
2690                PATFETCH (c);
2691 
2692                if (!(syntax & RE_NO_BK_BRACES))
2693                  {
2694                    if (p > pattern  &&  p[-1] == '\\')
2695                      goto normal_backslash;
2696                  }
2697                goto normal_char;
2698 
2699 #ifdef emacs
2700             /* There is no way to specify the before_dot and after_dot
2701                operators.  rms says this is ok.  --karl  */
2702             case '=':
2703               BUF_PUSH (at_dot);
2704               break;
2705 
2706             case 's':
2707               laststart = b;
2708               PATFETCH (c);
2709               BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2710               break;
2711 
2712             case 'S':
2713               laststart = b;
2714               PATFETCH (c);
2715               BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2716               break;
2717 #endif /* emacs */
2718 
2719 
2720             case 'w':
2721 	      if (re_syntax_options & RE_NO_GNU_OPS)
2722 		goto normal_char;
2723               laststart = b;
2724               BUF_PUSH (wordchar);
2725               break;
2726 
2727 
2728             case 'W':
2729 	      if (re_syntax_options & RE_NO_GNU_OPS)
2730 		goto normal_char;
2731               laststart = b;
2732               BUF_PUSH (notwordchar);
2733               break;
2734 
2735 
2736             case '<':
2737 	      if (re_syntax_options & RE_NO_GNU_OPS)
2738 		goto normal_char;
2739               BUF_PUSH (wordbeg);
2740               break;
2741 
2742             case '>':
2743 	      if (re_syntax_options & RE_NO_GNU_OPS)
2744 		goto normal_char;
2745               BUF_PUSH (wordend);
2746               break;
2747 
2748             case 'b':
2749 	      if (re_syntax_options & RE_NO_GNU_OPS)
2750 		goto normal_char;
2751               BUF_PUSH (wordbound);
2752               break;
2753 
2754             case 'B':
2755 	      if (re_syntax_options & RE_NO_GNU_OPS)
2756 		goto normal_char;
2757               BUF_PUSH (notwordbound);
2758               break;
2759 
2760             case '`':
2761 	      if (re_syntax_options & RE_NO_GNU_OPS)
2762 		goto normal_char;
2763               BUF_PUSH (begbuf);
2764               break;
2765 
2766             case '\'':
2767 	      if (re_syntax_options & RE_NO_GNU_OPS)
2768 		goto normal_char;
2769               BUF_PUSH (endbuf);
2770               break;
2771 
2772             case '1': case '2': case '3': case '4': case '5':
2773             case '6': case '7': case '8': case '9':
2774               if (syntax & RE_NO_BK_REFS)
2775                 goto normal_char;
2776 
2777               c1 = c - '0';
2778 
2779               if (c1 > regnum)
2780                 FREE_STACK_RETURN (REG_ESUBREG);
2781 
2782               /* Can't back reference to a subexpression if inside of it.  */
2783               if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2784                 goto normal_char;
2785 
2786               laststart = b;
2787               BUF_PUSH_2 (duplicate, c1);
2788               break;
2789 
2790 
2791             case '+':
2792             case '?':
2793               if (syntax & RE_BK_PLUS_QM)
2794                 goto handle_plus;
2795               else
2796                 goto normal_backslash;
2797 
2798             default:
2799             normal_backslash:
2800               /* You might think it would be useful for \ to mean
2801                  not to translate; but if we don't translate it
2802                  it will never match anything.  */
2803               c = TRANSLATE (c);
2804               goto normal_char;
2805             }
2806           break;
2807 
2808 
2809 	default:
2810         /* Expects the character in `c'.  */
2811 	normal_char:
2812 	      /* If no exactn currently being built.  */
2813           if (!pending_exact
2814 
2815               /* If last exactn not at current position.  */
2816               || pending_exact + *pending_exact + 1 != b
2817 
2818               /* We have only one byte following the exactn for the count.  */
2819 	      || *pending_exact == (1 << BYTEWIDTH) - 1
2820 
2821               /* If followed by a repetition operator.  */
2822               || *p == '*' || *p == '^'
2823 	      || ((syntax & RE_BK_PLUS_QM)
2824 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2825 		  : (*p == '+' || *p == '?'))
2826 	      || ((syntax & RE_INTERVALS)
2827                   && ((syntax & RE_NO_BK_BRACES)
2828 		      ? *p == '{'
2829                       : (p[0] == '\\' && p[1] == '{'))))
2830 	    {
2831 	      /* Start building a new exactn.  */
2832 
2833               laststart = b;
2834 
2835 	      BUF_PUSH_2 (exactn, 0);
2836 	      pending_exact = b - 1;
2837             }
2838 
2839 	  BUF_PUSH (c);
2840           (*pending_exact)++;
2841 	  break;
2842         } /* switch (c) */
2843     } /* while p != pend */
2844 
2845 
2846   /* Through the pattern now.  */
2847 
2848   if (fixup_alt_jump)
2849     STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2850 
2851   if (!COMPILE_STACK_EMPTY)
2852     FREE_STACK_RETURN (REG_EPAREN);
2853 
2854   /* If we don't want backtracking, force success
2855      the first time we reach the end of the compiled pattern.  */
2856   if (syntax & RE_NO_POSIX_BACKTRACKING)
2857     BUF_PUSH (succeed);
2858 
2859   free (compile_stack.stack);
2860 
2861   /* We have succeeded; set the length of the buffer.  */
2862   bufp->used = b - bufp->buffer;
2863 
2864 #ifdef DEBUG
2865   if (debug)
2866     {
2867       DEBUG_PRINT1 ("\nCompiled pattern: \n");
2868       print_compiled_pattern (bufp);
2869     }
2870 #endif /* DEBUG */
2871 
2872 #ifndef MATCH_MAY_ALLOCATE
2873   /* Initialize the failure stack to the largest possible stack.  This
2874      isn't necessary unless we're trying to avoid calling alloca in
2875      the search and match routines.  */
2876   {
2877     int num_regs = bufp->re_nsub + 1;
2878 
2879     /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2880        is strictly greater than re_max_failures, the largest possible stack
2881        is 2 * re_max_failures failure points.  */
2882     if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2883       {
2884 	fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2885 
2886 #ifdef emacs
2887 	if (! fail_stack.stack)
2888 	  fail_stack.stack
2889 	    = (fail_stack_elt_t *) xmalloc (fail_stack.size
2890 					    * sizeof (fail_stack_elt_t));
2891 	else
2892 	  fail_stack.stack
2893 	    = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2894 					     (fail_stack.size
2895 					      * sizeof (fail_stack_elt_t)));
2896 #else /* not emacs */
2897 	if (! fail_stack.stack)
2898 	  fail_stack.stack
2899 	    = (fail_stack_elt_t *) malloc (fail_stack.size
2900 					   * sizeof (fail_stack_elt_t));
2901 	else
2902 	  fail_stack.stack
2903 	    = (fail_stack_elt_t *) realloc (fail_stack.stack,
2904 					    (fail_stack.size
2905 					     * sizeof (fail_stack_elt_t)));
2906 #endif /* not emacs */
2907       }
2908 
2909     regex_grow_registers (num_regs);
2910   }
2911 #endif /* not MATCH_MAY_ALLOCATE */
2912 
2913   return REG_NOERROR;
2914 } /* regex_compile */
2915 
2916 /* Subroutines for `regex_compile'.  */
2917 
2918 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2919 
2920 static void
store_op1(op,loc,arg)2921 store_op1 (op, loc, arg)
2922     re_opcode_t op;
2923     unsigned char *loc;
2924     int arg;
2925 {
2926   *loc = (unsigned char) op;
2927   STORE_NUMBER (loc + 1, arg);
2928 }
2929 
2930 
2931 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2932 
2933 static void
store_op2(op,loc,arg1,arg2)2934 store_op2 (op, loc, arg1, arg2)
2935     re_opcode_t op;
2936     unsigned char *loc;
2937     int arg1, arg2;
2938 {
2939   *loc = (unsigned char) op;
2940   STORE_NUMBER (loc + 1, arg1);
2941   STORE_NUMBER (loc + 3, arg2);
2942 }
2943 
2944 
2945 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2946    for OP followed by two-byte integer parameter ARG.  */
2947 
2948 static void
insert_op1(op,loc,arg,end)2949 insert_op1 (op, loc, arg, end)
2950     re_opcode_t op;
2951     unsigned char *loc;
2952     int arg;
2953     unsigned char *end;
2954 {
2955   register unsigned char *pfrom = end;
2956   register unsigned char *pto = end + 3;
2957 
2958   while (pfrom != loc)
2959     *--pto = *--pfrom;
2960 
2961   store_op1 (op, loc, arg);
2962 }
2963 
2964 
2965 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2966 
2967 static void
insert_op2(op,loc,arg1,arg2,end)2968 insert_op2 (op, loc, arg1, arg2, end)
2969     re_opcode_t op;
2970     unsigned char *loc;
2971     int arg1, arg2;
2972     unsigned char *end;
2973 {
2974   register unsigned char *pfrom = end;
2975   register unsigned char *pto = end + 5;
2976 
2977   while (pfrom != loc)
2978     *--pto = *--pfrom;
2979 
2980   store_op2 (op, loc, arg1, arg2);
2981 }
2982 
2983 
2984 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2985    after an alternative or a begin-subexpression.  We assume there is at
2986    least one character before the ^.  */
2987 
2988 static boolean
at_begline_loc_p(pattern,p,syntax)2989 at_begline_loc_p (pattern, p, syntax)
2990     const char *pattern, *p;
2991     reg_syntax_t syntax;
2992 {
2993   const char *prev = p - 2;
2994   boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2995 
2996   return
2997        /* After a subexpression?  */
2998        (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2999        /* After an alternative?  */
3000     || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
3001 }
3002 
3003 
3004 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
3005    at least one character after the $, i.e., `P < PEND'.  */
3006 
3007 static boolean
at_endline_loc_p(p,pend,syntax)3008 at_endline_loc_p (p, pend, syntax)
3009     const char *p, *pend;
3010     reg_syntax_t syntax;
3011 {
3012   const char *next = p;
3013   boolean next_backslash = *next == '\\';
3014   const char *next_next = p + 1 < pend ? p + 1 : 0;
3015 
3016   return
3017        /* Before a subexpression?  */
3018        (syntax & RE_NO_BK_PARENS ? *next == ')'
3019         : next_backslash && next_next && *next_next == ')')
3020        /* Before an alternative?  */
3021     || (syntax & RE_NO_BK_VBAR ? *next == '|'
3022         : next_backslash && next_next && *next_next == '|');
3023 }
3024 
3025 
3026 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3027    false if it's not.  */
3028 
3029 static boolean
group_in_compile_stack(compile_stack,regnum)3030 group_in_compile_stack (compile_stack, regnum)
3031     compile_stack_type compile_stack;
3032     regnum_t regnum;
3033 {
3034   int this_element;
3035 
3036   for (this_element = compile_stack.avail - 1;
3037        this_element >= 0;
3038        this_element--)
3039     if (compile_stack.stack[this_element].regnum == regnum)
3040       return true;
3041 
3042   return false;
3043 }
3044 
3045 
3046 /* Read the ending character of a range (in a bracket expression) from the
3047    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3048    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3049    Then we set the translation of all bits between the starting and
3050    ending characters (inclusive) in the compiled pattern B.
3051 
3052    Return an error code.
3053 
3054    We use these short variable names so we can use the same macros as
3055    `regex_compile' itself.  */
3056 
3057 static reg_errcode_t
compile_range(p_ptr,pend,translate,syntax,b)3058 compile_range (p_ptr, pend, translate, syntax, b)
3059     const char **p_ptr, *pend;
3060     RE_TRANSLATE_TYPE translate;
3061     reg_syntax_t syntax;
3062     unsigned char *b;
3063 {
3064   unsigned this_char;
3065 
3066   const char *p = *p_ptr;
3067   unsigned int range_start, range_end;
3068 
3069   if (p == pend)
3070     return REG_ERANGE;
3071 
3072   /* Even though the pattern is a signed `char *', we need to fetch
3073      with unsigned char *'s; if the high bit of the pattern character
3074      is set, the range endpoints will be negative if we fetch using a
3075      signed char *.
3076 
3077      We also want to fetch the endpoints without translating them; the
3078      appropriate translation is done in the bit-setting loop below.  */
3079   /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3080   range_start = ((const unsigned char *) p)[-2];
3081   range_end   = ((const unsigned char *) p)[0];
3082 
3083   /* Have to increment the pointer into the pattern string, so the
3084      caller isn't still at the ending character.  */
3085   (*p_ptr)++;
3086 
3087   /* If the start is after the end, the range is empty.  */
3088   if (range_start > range_end)
3089     return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3090 
3091   /* Here we see why `this_char' has to be larger than an `unsigned
3092      char' -- the range is inclusive, so if `range_end' == 0xff
3093      (assuming 8-bit characters), we would otherwise go into an infinite
3094      loop, since all characters <= 0xff.  */
3095   for (this_char = range_start; this_char <= range_end; this_char++)
3096     {
3097       SET_LIST_BIT (TRANSLATE (this_char));
3098     }
3099 
3100   return REG_NOERROR;
3101 }
3102 
3103 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3104    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3105    characters can start a string that matches the pattern.  This fastmap
3106    is used by re_search to skip quickly over impossible starting points.
3107 
3108    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3109    area as BUFP->fastmap.
3110 
3111    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3112    the pattern buffer.
3113 
3114    Returns 0 if we succeed, -2 if an internal error.   */
3115 
3116 int
re_compile_fastmap(bufp)3117 re_compile_fastmap (bufp)
3118      struct re_pattern_buffer *bufp;
3119 {
3120   int j, k;
3121 #ifdef MATCH_MAY_ALLOCATE
3122   fail_stack_type fail_stack;
3123 #endif
3124 #ifndef REGEX_MALLOC
3125   char *destination;
3126 #endif
3127   /* We don't push any register information onto the failure stack.  */
3128 /*  unsigned num_regs = 0;	superfluous */
3129 
3130   register char *fastmap = bufp->fastmap;
3131   unsigned char *pattern = bufp->buffer;
3132   unsigned char *p = pattern;
3133   register unsigned char *pend = pattern + bufp->used;
3134 
3135 #ifdef REL_ALLOC
3136   /* This holds the pointer to the failure stack, when
3137      it is allocated relocatably.  */
3138   fail_stack_elt_t *failure_stack_ptr;
3139 #endif
3140 
3141   /* Assume that each path through the pattern can be null until
3142      proven otherwise.  We set this false at the bottom of switch
3143      statement, to which we get only if a particular path doesn't
3144      match the empty string.  */
3145   boolean path_can_be_null = true;
3146 
3147   /* We aren't doing a `succeed_n' to begin with.  */
3148   boolean succeed_n_p = false;
3149 
3150   assert (fastmap != NULL && p != NULL);
3151 
3152   INIT_FAIL_STACK ();
3153   bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3154   bufp->fastmap_accurate = 1;	    /* It will be when we're done.  */
3155   bufp->can_be_null = 0;
3156 
3157   while (1)
3158     {
3159       if (p == pend || *p == succeed)
3160 	{
3161 	  /* We have reached the (effective) end of pattern.  */
3162 	  if (!FAIL_STACK_EMPTY ())
3163 	    {
3164 	      bufp->can_be_null |= path_can_be_null;
3165 
3166 	      /* Reset for next path.  */
3167 	      path_can_be_null = true;
3168 
3169 	      p = fail_stack.stack[--fail_stack.avail].pointer;
3170 
3171 	      continue;
3172 	    }
3173 	  else
3174 	    break;
3175 	}
3176 
3177       /* We should never be about to go beyond the end of the pattern.  */
3178       assert (p < pend);
3179 
3180       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3181 	{
3182 
3183         /* I guess the idea here is to simply not bother with a fastmap
3184            if a backreference is used, since it's too hard to figure out
3185            the fastmap for the corresponding group.  Setting
3186            `can_be_null' stops `re_search_2' from using the fastmap, so
3187            that is all we do.  */
3188 	case duplicate:
3189 	  bufp->can_be_null = 1;
3190           goto done;
3191 
3192 
3193       /* Following are the cases which match a character.  These end
3194          with `break'.  */
3195 
3196 	case exactn:
3197           fastmap[p[1]] = 1;
3198 	  break;
3199 
3200 
3201         case charset:
3202           for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3203 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3204               fastmap[j] = 1;
3205 	  break;
3206 
3207 
3208 	case charset_not:
3209 	  /* Chars beyond end of map must be allowed.  */
3210 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3211             fastmap[j] = 1;
3212 
3213 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3214 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3215               fastmap[j] = 1;
3216           break;
3217 
3218 
3219 	case wordchar:
3220 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3221 	    if (SYNTAX (j) == Sword)
3222 	      fastmap[j] = 1;
3223 	  break;
3224 
3225 
3226 	case notwordchar:
3227 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3228 	    if (SYNTAX (j) != Sword)
3229 	      fastmap[j] = 1;
3230 	  break;
3231 
3232 
3233         case anychar:
3234 	  {
3235 	    int fastmap_newline = fastmap['\n'];
3236 
3237 	    /* `.' matches anything ...  */
3238 	    for (j = 0; j < (1 << BYTEWIDTH); j++)
3239 	      fastmap[j] = 1;
3240 
3241 	    /* ... except perhaps newline.  */
3242 	    if (!(bufp->syntax & RE_DOT_NEWLINE))
3243 	      fastmap['\n'] = fastmap_newline;
3244 
3245 	    /* Return if we have already set `can_be_null'; if we have,
3246 	       then the fastmap is irrelevant.  Something's wrong here.  */
3247 	    else if (bufp->can_be_null)
3248 	      goto done;
3249 
3250 	    /* Otherwise, have to check alternative paths.  */
3251 	    break;
3252 	  }
3253 
3254 #ifdef emacs
3255         case syntaxspec:
3256 	  k = *p++;
3257 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3258 	    if (SYNTAX (j) == (enum syntaxcode) k)
3259 	      fastmap[j] = 1;
3260 	  break;
3261 
3262 
3263 	case notsyntaxspec:
3264 	  k = *p++;
3265 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
3266 	    if (SYNTAX (j) != (enum syntaxcode) k)
3267 	      fastmap[j] = 1;
3268 	  break;
3269 
3270 
3271       /* All cases after this match the empty string.  These end with
3272          `continue'.  */
3273 
3274 
3275 	case before_dot:
3276 	case at_dot:
3277 	case after_dot:
3278           continue;
3279 #endif /* emacs */
3280 
3281 
3282         case no_op:
3283         case begline:
3284         case endline:
3285 	case begbuf:
3286 	case endbuf:
3287 	case wordbound:
3288 	case notwordbound:
3289 	case wordbeg:
3290 	case wordend:
3291         case push_dummy_failure:
3292           continue;
3293 
3294 
3295 	case jump_n:
3296         case pop_failure_jump:
3297 	case maybe_pop_jump:
3298 	case jump:
3299         case jump_past_alt:
3300 	case dummy_failure_jump:
3301           EXTRACT_NUMBER_AND_INCR (j, p);
3302 	  p += j;
3303 	  if (j > 0)
3304 	    continue;
3305 
3306           /* Jump backward implies we just went through the body of a
3307              loop and matched nothing.  Opcode jumped to should be
3308              `on_failure_jump' or `succeed_n'.  Just treat it like an
3309              ordinary jump.  For a * loop, it has pushed its failure
3310              point already; if so, discard that as redundant.  */
3311           if ((re_opcode_t) *p != on_failure_jump
3312 	      && (re_opcode_t) *p != succeed_n)
3313 	    continue;
3314 
3315           p++;
3316           EXTRACT_NUMBER_AND_INCR (j, p);
3317           p += j;
3318 
3319           /* If what's on the stack is where we are now, pop it.  */
3320           if (!FAIL_STACK_EMPTY ()
3321 	      && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3322             fail_stack.avail--;
3323 
3324           continue;
3325 
3326 
3327         case on_failure_jump:
3328         case on_failure_keep_string_jump:
3329 	handle_on_failure_jump:
3330           EXTRACT_NUMBER_AND_INCR (j, p);
3331 
3332           /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3333              end of the pattern.  We don't want to push such a point,
3334              since when we restore it above, entering the switch will
3335              increment `p' past the end of the pattern.  We don't need
3336              to push such a point since we obviously won't find any more
3337              fastmap entries beyond `pend'.  Such a pattern can match
3338              the null string, though.  */
3339           if (p + j < pend)
3340             {
3341               if (!PUSH_PATTERN_OP (p + j, fail_stack))
3342 		{
3343 		  RESET_FAIL_STACK ();
3344 		  return -2;
3345 		}
3346             }
3347           else
3348             bufp->can_be_null = 1;
3349 
3350           if (succeed_n_p)
3351             {
3352               EXTRACT_NUMBER_AND_INCR (k, p);	/* Skip the n.  */
3353               succeed_n_p = false;
3354 	    }
3355 
3356           continue;
3357 
3358 
3359 	case succeed_n:
3360           /* Get to the number of times to succeed.  */
3361           p += 2;
3362 
3363           /* Increment p past the n for when k != 0.  */
3364           EXTRACT_NUMBER_AND_INCR (k, p);
3365           if (k == 0)
3366 	    {
3367               p -= 4;
3368   	      succeed_n_p = true;  /* Spaghetti code alert.  */
3369               goto handle_on_failure_jump;
3370             }
3371           continue;
3372 
3373 
3374 	case set_number_at:
3375           p += 4;
3376           continue;
3377 
3378 
3379 	case start_memory:
3380         case stop_memory:
3381 	  p += 2;
3382 	  continue;
3383 
3384 
3385 	default:
3386           call_abort (); /* We have listed all the cases.  */
3387         } /* switch *p++ */
3388 
3389       /* Getting here means we have found the possible starting
3390          characters for one path of the pattern -- and that the empty
3391          string does not match.  We need not follow this path further.
3392          Instead, look at the next alternative (remembered on the
3393          stack), or quit if no more.  The test at the top of the loop
3394          does these things.  */
3395       path_can_be_null = false;
3396       p = pend;
3397     } /* while p */
3398 
3399   /* Set `can_be_null' for the last path (also the first path, if the
3400      pattern is empty).  */
3401   bufp->can_be_null |= path_can_be_null;
3402 
3403  done:
3404   RESET_FAIL_STACK ();
3405   return 0;
3406 } /* re_compile_fastmap */
3407 
3408 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3409    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3410    this memory for recording register information.  STARTS and ENDS
3411    must be allocated using the malloc library routine, and must each
3412    be at least NUM_REGS * sizeof (regoff_t) bytes long.
3413 
3414    If NUM_REGS == 0, then subsequent matches should allocate their own
3415    register data.
3416 
3417    Unless this function is called, the first search or match using
3418    PATTERN_BUFFER will allocate its own register data, without
3419    freeing the old data.  */
3420 
3421 void
re_set_registers(bufp,regs,num_regs,starts,ends)3422 re_set_registers (bufp, regs, num_regs, starts, ends)
3423     struct re_pattern_buffer *bufp;
3424     struct re_registers *regs;
3425     unsigned num_regs;
3426     regoff_t *starts, *ends;
3427 {
3428   if (num_regs)
3429     {
3430       bufp->regs_allocated = REGS_REALLOCATE;
3431       regs->num_regs = num_regs;
3432       regs->start = starts;
3433       regs->end = ends;
3434     }
3435   else
3436     {
3437       bufp->regs_allocated = REGS_UNALLOCATED;
3438       regs->num_regs = 0;
3439       regs->start = regs->end = (regoff_t *) 0;
3440     }
3441 }
3442 
3443 /* Searching routines.  */
3444 
3445 /* Like re_search_2, below, but only one string is specified, and
3446    doesn't let you say where to stop matching. */
3447 
3448 int
re_search(bufp,string,size,startpos,range,regs)3449 re_search (bufp, string, size, startpos, range, regs)
3450      struct re_pattern_buffer *bufp;
3451      const char *string;
3452      int size, startpos, range;
3453      struct re_registers *regs;
3454 {
3455   return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3456 		      regs, size);
3457 }
3458 
3459 
3460 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3461    virtual concatenation of STRING1 and STRING2, starting first at index
3462    STARTPOS, then at STARTPOS + 1, and so on.
3463 
3464    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3465 
3466    RANGE is how far to scan while trying to match.  RANGE = 0 means try
3467    only at STARTPOS; in general, the last start tried is STARTPOS +
3468    RANGE.
3469 
3470    In REGS, return the indices of the virtual concatenation of STRING1
3471    and STRING2 that matched the entire BUFP->buffer and its contained
3472    subexpressions.
3473 
3474    Do not consider matching one past the index STOP in the virtual
3475    concatenation of STRING1 and STRING2.
3476 
3477    We return either the position in the strings at which the match was
3478    found, -1 if no match, or -2 if error (such as failure
3479    stack overflow).  */
3480 
3481 int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)3482 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
3483      struct re_pattern_buffer *bufp;
3484      const char *string1, *string2;
3485      int size1, size2;
3486      int startpos;
3487      int range;
3488      struct re_registers *regs;
3489      int stop;
3490 {
3491   int val;
3492   register char *fastmap = bufp->fastmap;
3493   register RE_TRANSLATE_TYPE translate = bufp->translate;
3494   int total_size = size1 + size2;
3495   int endpos = startpos + range;
3496 
3497   /* Check for out-of-range STARTPOS.  */
3498   if (startpos < 0 || startpos > total_size)
3499     return -1;
3500 
3501   /* Fix up RANGE if it might eventually take us outside
3502      the virtual concatenation of STRING1 and STRING2.
3503      Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3504   if (endpos < 0)
3505     range = 0 - startpos;
3506   else if (endpos > total_size)
3507     range = total_size - startpos;
3508 
3509   /* If the search isn't to be a backwards one, don't waste time in a
3510      search for a pattern that must be anchored.  */
3511   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3512     {
3513       if (startpos > 0)
3514 	return -1;
3515       else
3516 	range = 1;
3517     }
3518 
3519 #ifdef emacs
3520   /* In a forward search for something that starts with \=.
3521      don't keep searching past point.  */
3522   if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3523     {
3524       range = PT - startpos;
3525       if (range <= 0)
3526 	return -1;
3527     }
3528 #endif /* emacs */
3529 
3530   /* Update the fastmap now if not correct already.  */
3531   if (fastmap && !bufp->fastmap_accurate)
3532     if (re_compile_fastmap (bufp) == -2)
3533       return -2;
3534 
3535   /* Loop through the string, looking for a place to start matching.  */
3536   for (;;)
3537     {
3538       /* If a fastmap is supplied, skip quickly over characters that
3539          cannot be the start of a match.  If the pattern can match the
3540          null string, however, we don't need to skip characters; we want
3541          the first null string.  */
3542       if (fastmap && startpos < total_size && !bufp->can_be_null)
3543 	{
3544 	  if (range > 0)	/* Searching forwards.  */
3545 	    {
3546 	      register const char *d;
3547 	      register int lim = 0;
3548 	      int irange = range;
3549 
3550               if (startpos < size1 && startpos + range >= size1)
3551                 lim = range - (size1 - startpos);
3552 
3553 	      d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3554 
3555               /* Written out as an if-else to avoid testing `translate'
3556                  inside the loop.  */
3557 	      if (translate)
3558                 while (range > lim
3559                        && !fastmap[(unsigned char)
3560 				   translate[(unsigned char) *d++]])
3561                   range--;
3562 	      else
3563                 while (range > lim && !fastmap[(unsigned char) *d++])
3564                   range--;
3565 
3566 	      startpos += irange - range;
3567 	    }
3568 	  else				/* Searching backwards.  */
3569 	    {
3570 	      register char c = (size1 == 0 || startpos >= size1
3571                                  ? string2[startpos - size1]
3572                                  : string1[startpos]);
3573 
3574 	      if (!fastmap[(unsigned char) TRANSLATE (c)])
3575 		goto advance;
3576 	    }
3577 	}
3578 
3579       /* If can't match the null string, and that's all we have left, fail.  */
3580       if (range >= 0 && startpos == total_size && fastmap
3581           && !bufp->can_be_null)
3582 	return -1;
3583 
3584       val = re_match_2_internal (bufp, string1, size1, string2, size2,
3585 				 startpos, regs, stop);
3586 #ifndef REGEX_MALLOC
3587 #ifdef C_ALLOCA
3588       alloca (0);
3589 #endif
3590 #endif
3591 
3592       if (val >= 0)
3593 	return startpos;
3594 
3595       if (val == -2)
3596 	return -2;
3597 
3598     advance:
3599       if (!range)
3600         break;
3601       else if (range > 0)
3602         {
3603           range--;
3604           startpos++;
3605         }
3606       else
3607         {
3608           range++;
3609           startpos--;
3610         }
3611     }
3612   return -1;
3613 } /* re_search_2 */
3614 
3615 /* This converts PTR, a pointer into one of the search strings `string1'
3616    and `string2' into an offset from the beginning of that string.  */
3617 #define POINTER_TO_OFFSET(ptr)			\
3618   (FIRST_STRING_P (ptr)				\
3619    ? ((regoff_t) ((ptr) - string1))		\
3620    : ((regoff_t) ((ptr) - string2 + size1)))
3621 
3622 /* Macros for dealing with the split strings in re_match_2.  */
3623 
3624 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3625 
3626 /* Call before fetching a character with *d.  This switches over to
3627    string2 if necessary.  */
3628 #define PREFETCH()							\
3629   while (d == dend)						    	\
3630     {									\
3631       /* End of string2 => fail.  */					\
3632       if (dend == end_match_2) 						\
3633         goto fail;							\
3634       /* End of string1 => advance to string2.  */ 			\
3635       d = string2;						        \
3636       dend = end_match_2;						\
3637     }
3638 
3639 
3640 /* Test if at very beginning or at very end of the virtual concatenation
3641    of `string1' and `string2'.  If only one string, it's `string2'.  */
3642 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3643 #define AT_STRINGS_END(d) ((d) == end2)
3644 
3645 
3646 /* Test if D points to a character which is word-constituent.  We have
3647    two special cases to check for: if past the end of string1, look at
3648    the first character in string2; and if before the beginning of
3649    string2, look at the last character in string1.  */
3650 #define WORDCHAR_P(d)							\
3651   (SYNTAX ((d) == end1 ? *string2					\
3652            : (d) == string2 - 1 ? *(end1 - 1) : *(d))			\
3653    == Sword)
3654 
3655 /* Disabled due to a compiler bug -- see comment at case wordbound */
3656 #if 0
3657 /* Test if the character before D and the one at D differ with respect
3658    to being word-constituent.  */
3659 #define AT_WORD_BOUNDARY(d)						\
3660   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)				\
3661    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3662 #endif
3663 
3664 /* Free everything we malloc.  */
3665 #ifdef MATCH_MAY_ALLOCATE
3666 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3667 #define FREE_VARIABLES()						\
3668   do {									\
3669     REGEX_FREE_STACK (fail_stack.stack);				\
3670     FREE_VAR (regstart);						\
3671     FREE_VAR (regend);							\
3672     FREE_VAR (old_regstart);						\
3673     FREE_VAR (old_regend);						\
3674     FREE_VAR (best_regstart);						\
3675     FREE_VAR (best_regend);						\
3676     FREE_VAR (reg_info);						\
3677     FREE_VAR (reg_dummy);						\
3678     FREE_VAR (reg_info_dummy);						\
3679   } while (0)
3680 #else
3681 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
3682 #endif /* not MATCH_MAY_ALLOCATE */
3683 
3684 /* These values must meet several constraints.  They must not be valid
3685    register values; since we have a limit of 255 registers (because
3686    we use only one byte in the pattern for the register number), we can
3687    use numbers larger than 255.  They must differ by 1, because of
3688    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3689    be larger than the value for the highest register, so we do not try
3690    to actually save any registers when none are active.  */
3691 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3692 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3693 
3694 /* Matching routines.  */
3695 
3696 #ifndef emacs   /* Emacs never uses this.  */
3697 /* re_match is like re_match_2 except it takes only a single string.  */
3698 
3699 int
re_match(bufp,string,size,pos,regs)3700 re_match (bufp, string, size, pos, regs)
3701      struct re_pattern_buffer *bufp;
3702      const char *string;
3703      int size, pos;
3704      struct re_registers *regs;
3705 {
3706   int result = re_match_2_internal (bufp, NULL, 0, string, size,
3707 				    pos, regs, size);
3708 #ifndef REGEX_MALLOC
3709 #ifdef C_ALLOCA
3710   alloca (0);
3711 #endif
3712 #endif
3713   return result;
3714 }
3715 #endif /* not emacs */
3716 
3717 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3718 						    unsigned char *end,
3719 						register_info_type *reg_info));
3720 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3721 						  unsigned char *end,
3722 						register_info_type *reg_info));
3723 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3724 							unsigned char *end,
3725 						register_info_type *reg_info));
3726 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3727 				     int len, char *translate));
3728 
3729 /* re_match_2 matches the compiled pattern in BUFP against the
3730    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3731    and SIZE2, respectively).  We start matching at POS, and stop
3732    matching at STOP.
3733 
3734    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3735    store offsets for the substring each group matched in REGS.  See the
3736    documentation for exactly how many groups we fill.
3737 
3738    We return -1 if no match, -2 if an internal error (such as the
3739    failure stack overflowing).  Otherwise, we return the length of the
3740    matched substring.  */
3741 
3742 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)3743 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
3744      struct re_pattern_buffer *bufp;
3745      const char *string1, *string2;
3746      int size1, size2;
3747      int pos;
3748      struct re_registers *regs;
3749      int stop;
3750 {
3751   int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3752 				    pos, regs, stop);
3753 #ifndef REGEX_MALLOC
3754 #ifdef C_ALLOCA
3755   alloca (0);
3756 #endif
3757 #endif
3758   return result;
3759 }
3760 
3761 /* This is a separate function so that we can force an alloca cleanup
3762    afterwards.  */
3763 static int
re_match_2_internal(bufp,string1,size1,string2,size2,pos,regs,stop)3764 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
3765      struct re_pattern_buffer *bufp;
3766      const char *string1, *string2;
3767      int size1, size2;
3768      int pos;
3769      struct re_registers *regs;
3770      int stop;
3771 {
3772   /* General temporaries.  */
3773   int mcnt;
3774   unsigned char *p1;
3775 
3776   /* Just past the end of the corresponding string.  */
3777   const char *end1, *end2;
3778 
3779   /* Pointers into string1 and string2, just past the last characters in
3780      each to consider matching.  */
3781   const char *end_match_1, *end_match_2;
3782 
3783   /* Where we are in the data, and the end of the current string.  */
3784   const char *d, *dend;
3785 
3786   /* Where we are in the pattern, and the end of the pattern.  */
3787   unsigned char *p = bufp->buffer;
3788   register unsigned char *pend = p + bufp->used;
3789 
3790   /* Mark the opcode just after a start_memory, so we can test for an
3791      empty subpattern when we get to the stop_memory.  */
3792   unsigned char *just_past_start_mem = 0;
3793 
3794   /* We use this to map every character in the string.  */
3795   RE_TRANSLATE_TYPE translate = bufp->translate;
3796 
3797   /* Failure point stack.  Each place that can handle a failure further
3798      down the line pushes a failure point on this stack.  It consists of
3799      restart, regend, and reg_info for all registers corresponding to
3800      the subexpressions we're currently inside, plus the number of such
3801      registers, and, finally, two char *'s.  The first char * is where
3802      to resume scanning the pattern; the second one is where to resume
3803      scanning the strings.  If the latter is zero, the failure point is
3804      a ``dummy''; if a failure happens and the failure point is a dummy,
3805      it gets discarded and the next next one is tried.  */
3806 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3807   fail_stack_type fail_stack;
3808 #endif
3809 #ifdef DEBUG
3810   static unsigned failure_id = 0;
3811   unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3812 #endif
3813 
3814 #ifdef REL_ALLOC
3815   /* This holds the pointer to the failure stack, when
3816      it is allocated relocatably.  */
3817   fail_stack_elt_t *failure_stack_ptr;
3818 #endif
3819 
3820   /* We fill all the registers internally, independent of what we
3821      return, for use in backreferences.  The number here includes
3822      an element for register zero.  */
3823   size_t num_regs = bufp->re_nsub + 1;
3824 
3825   /* The currently active registers.  */
3826   active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3827   active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3828 
3829   /* Information on the contents of registers. These are pointers into
3830      the input strings; they record just what was matched (on this
3831      attempt) by a subexpression part of the pattern, that is, the
3832      regnum-th regstart pointer points to where in the pattern we began
3833      matching and the regnum-th regend points to right after where we
3834      stopped matching the regnum-th subexpression.  (The zeroth register
3835      keeps track of what the whole pattern matches.)  */
3836 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3837   const char **regstart, **regend;
3838 #endif
3839 
3840   /* If a group that's operated upon by a repetition operator fails to
3841      match anything, then the register for its start will need to be
3842      restored because it will have been set to wherever in the string we
3843      are when we last see its open-group operator.  Similarly for a
3844      register's end.  */
3845 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3846   const char **old_regstart, **old_regend;
3847 #endif
3848 
3849   /* The is_active field of reg_info helps us keep track of which (possibly
3850      nested) subexpressions we are currently in. The matched_something
3851      field of reg_info[reg_num] helps us tell whether or not we have
3852      matched any of the pattern so far this time through the reg_num-th
3853      subexpression.  These two fields get reset each time through any
3854      loop their register is in.  */
3855 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3856   register_info_type *reg_info;
3857 #endif
3858 
3859   /* The following record the register info as found in the above
3860      variables when we find a match better than any we've seen before.
3861      This happens as we backtrack through the failure points, which in
3862      turn happens only if we have not yet matched the entire string. */
3863   unsigned best_regs_set = false;
3864 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3865   const char **best_regstart, **best_regend;
3866 #endif
3867 
3868   /* Logically, this is `best_regend[0]'.  But we don't want to have to
3869      allocate space for that if we're not allocating space for anything
3870      else (see below).  Also, we never need info about register 0 for
3871      any of the other register vectors, and it seems rather a kludge to
3872      treat `best_regend' differently than the rest.  So we keep track of
3873      the end of the best match so far in a separate variable.  We
3874      initialize this to NULL so that when we backtrack the first time
3875      and need to test it, it's not garbage.  */
3876   const char *match_end = NULL;
3877 
3878   /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3879   int set_regs_matched_done = 0;
3880 
3881   /* Used when we pop values we don't care about.  */
3882 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3883   const char **reg_dummy;
3884   register_info_type *reg_info_dummy;
3885 #endif
3886 
3887 #ifdef DEBUG
3888   /* Counts the total number of registers pushed.  */
3889   unsigned num_regs_pushed = 0;
3890 #endif
3891 
3892   DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3893 
3894   INIT_FAIL_STACK ();
3895 
3896 #ifdef MATCH_MAY_ALLOCATE
3897   /* Do not bother to initialize all the register variables if there are
3898      no groups in the pattern, as it takes a fair amount of time.  If
3899      there are groups, we include space for register 0 (the whole
3900      pattern), even though we never use it, since it simplifies the
3901      array indexing.  We should fix this.  */
3902   if (bufp->re_nsub)
3903     {
3904       regstart = REGEX_TALLOC (num_regs, const char *);
3905       regend = REGEX_TALLOC (num_regs, const char *);
3906       old_regstart = REGEX_TALLOC (num_regs, const char *);
3907       old_regend = REGEX_TALLOC (num_regs, const char *);
3908       best_regstart = REGEX_TALLOC (num_regs, const char *);
3909       best_regend = REGEX_TALLOC (num_regs, const char *);
3910       reg_info = REGEX_TALLOC (num_regs, register_info_type);
3911       reg_dummy = REGEX_TALLOC (num_regs, const char *);
3912       reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3913 
3914       if (!(regstart && regend && old_regstart && old_regend && reg_info
3915             && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3916         {
3917           FREE_VARIABLES ();
3918           return -2;
3919         }
3920     }
3921   else
3922     {
3923       /* We must initialize all our variables to NULL, so that
3924          `FREE_VARIABLES' doesn't try to free them.  */
3925       regstart = regend = old_regstart = old_regend = best_regstart
3926         = best_regend = reg_dummy = NULL;
3927       reg_info = reg_info_dummy = (register_info_type *) NULL;
3928     }
3929 #endif /* MATCH_MAY_ALLOCATE */
3930 
3931   /* The starting position is bogus.  */
3932   if (pos < 0 || pos > size1 + size2)
3933     {
3934       FREE_VARIABLES ();
3935       return -1;
3936     }
3937 
3938   /* Initialize subexpression text positions to -1 to mark ones that no
3939      start_memory/stop_memory has been seen for. Also initialize the
3940      register information struct.  */
3941   for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3942     {
3943       regstart[mcnt] = regend[mcnt]
3944         = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3945 
3946       REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3947       IS_ACTIVE (reg_info[mcnt]) = 0;
3948       MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3949       EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3950     }
3951 
3952   /* We move `string1' into `string2' if the latter's empty -- but not if
3953      `string1' is null.  */
3954   if (size2 == 0 && string1 != NULL)
3955     {
3956       string2 = string1;
3957       size2 = size1;
3958       string1 = 0;
3959       size1 = 0;
3960     }
3961   end1 = string1 + size1;
3962   end2 = string2 + size2;
3963 
3964   /* Compute where to stop matching, within the two strings.  */
3965   if (stop <= size1)
3966     {
3967       end_match_1 = string1 + stop;
3968       end_match_2 = string2;
3969     }
3970   else
3971     {
3972       end_match_1 = end1;
3973       end_match_2 = string2 + stop - size1;
3974     }
3975 
3976   /* `p' scans through the pattern as `d' scans through the data.
3977      `dend' is the end of the input string that `d' points within.  `d'
3978      is advanced into the following input string whenever necessary, but
3979      this happens before fetching; therefore, at the beginning of the
3980      loop, `d' can be pointing at the end of a string, but it cannot
3981      equal `string2'.  */
3982   if (size1 > 0 && pos <= size1)
3983     {
3984       d = string1 + pos;
3985       dend = end_match_1;
3986     }
3987   else
3988     {
3989       d = string2 + pos - size1;
3990       dend = end_match_2;
3991     }
3992 
3993   DEBUG_PRINT1 ("The compiled pattern is:\n");
3994   DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3995   DEBUG_PRINT1 ("The string to match is: `");
3996   DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3997   DEBUG_PRINT1 ("'\n");
3998 
3999   /* This loops over pattern commands.  It exits by returning from the
4000      function if the match is complete, or it drops through if the match
4001      fails at this starting point in the input data.  */
4002   for (;;)
4003     {
4004 #ifdef _LIBC
4005       DEBUG_PRINT2 ("\n%p: ", p);
4006 #else
4007       DEBUG_PRINT2 ("\n0x%x: ", p);
4008 #endif
4009 
4010       if (p == pend)
4011 	{ /* End of pattern means we might have succeeded.  */
4012           DEBUG_PRINT1 ("end of pattern ... ");
4013 
4014 	  /* If we haven't matched the entire string, and we want the
4015              longest match, try backtracking.  */
4016           if (d != end_match_2)
4017 	    {
4018 	      /* 1 if this match ends in the same string (string1 or string2)
4019 		 as the best previous match.  */
4020 	      boolean same_str_p;
4021 	      /* 1 if this match is the best seen so far.  */
4022 	      boolean best_match_p;
4023 
4024               same_str_p = (FIRST_STRING_P (match_end)
4025 				    == MATCHING_IN_FIRST_STRING);
4026 
4027 	      /* AIX compiler got confused when this was combined
4028 		 with the previous declaration.  */
4029 	      if (same_str_p)
4030 		best_match_p = d > match_end;
4031 	      else
4032 		best_match_p = !MATCHING_IN_FIRST_STRING;
4033 
4034               DEBUG_PRINT1 ("backtracking.\n");
4035 
4036               if (!FAIL_STACK_EMPTY ())
4037                 { /* More failure points to try.  */
4038 
4039                   /* If exceeds best match so far, save it.  */
4040                   if (!best_regs_set || best_match_p)
4041                     {
4042                       best_regs_set = true;
4043                       match_end = d;
4044 
4045                       DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4046 
4047                       for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4048                         {
4049                           best_regstart[mcnt] = regstart[mcnt];
4050                           best_regend[mcnt] = regend[mcnt];
4051                         }
4052                     }
4053                   goto fail;
4054                 }
4055 
4056               /* If no failure points, don't restore garbage.  And if
4057                  last match is real best match, don't restore second
4058                  best one. */
4059               else if (best_regs_set && !best_match_p)
4060                 {
4061   	        restore_best_regs:
4062                   /* Restore best match.  It may happen that `dend ==
4063                      end_match_1' while the restored d is in string2.
4064                      For example, the pattern `x.*y.*z' against the
4065                      strings `x-' and `y-z-', if the two strings are
4066                      not consecutive in memory.  */
4067                   DEBUG_PRINT1 ("Restoring best registers.\n");
4068 
4069                   d = match_end;
4070                   dend = ((d >= string1 && d <= end1)
4071 		           ? end_match_1 : end_match_2);
4072 
4073 		  for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4074 		    {
4075 		      regstart[mcnt] = best_regstart[mcnt];
4076 		      regend[mcnt] = best_regend[mcnt];
4077 		    }
4078                 }
4079             } /* d != end_match_2 */
4080 
4081 	succeed_label:
4082           DEBUG_PRINT1 ("Accepting match.\n");
4083 
4084           /* If caller wants register contents data back, do it.  */
4085           if (regs && !bufp->no_sub)
4086 	    {
4087               /* Have the register data arrays been allocated?  */
4088               if (bufp->regs_allocated == REGS_UNALLOCATED)
4089                 { /* No.  So allocate them with malloc.  We need one
4090                      extra element beyond `num_regs' for the `-1' marker
4091                      GNU code uses.  */
4092                   regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4093                   regs->start = TALLOC (regs->num_regs, regoff_t);
4094                   regs->end = TALLOC (regs->num_regs, regoff_t);
4095                   if (regs->start == NULL || regs->end == NULL)
4096 		    {
4097 		      FREE_VARIABLES ();
4098 		      return -2;
4099 		    }
4100                   bufp->regs_allocated = REGS_REALLOCATE;
4101                 }
4102               else if (bufp->regs_allocated == REGS_REALLOCATE)
4103                 { /* Yes.  If we need more elements than were already
4104                      allocated, reallocate them.  If we need fewer, just
4105                      leave it alone.  */
4106                   if (regs->num_regs < num_regs + 1)
4107                     {
4108                       regs->num_regs = num_regs + 1;
4109                       RETALLOC (regs->start, regs->num_regs, regoff_t);
4110                       RETALLOC (regs->end, regs->num_regs, regoff_t);
4111                       if (regs->start == NULL || regs->end == NULL)
4112 			{
4113 			  FREE_VARIABLES ();
4114 			  return -2;
4115 			}
4116                     }
4117                 }
4118               else
4119 		{
4120 		  /* These braces fend off a "empty body in an else-statement"
4121 		     warning under GCC when assert expands to nothing.  */
4122 		  assert (bufp->regs_allocated == REGS_FIXED);
4123 		}
4124 
4125               /* Convert the pointer data in `regstart' and `regend' to
4126                  indices.  Register zero has to be set differently,
4127                  since we haven't kept track of any info for it.  */
4128               if (regs->num_regs > 0)
4129                 {
4130                   regs->start[0] = pos;
4131                   regs->end[0] = (MATCHING_IN_FIRST_STRING
4132 				  ? ((regoff_t) (d - string1))
4133 			          : ((regoff_t) (d - string2 + size1)));
4134                 }
4135 
4136               /* Go through the first `min (num_regs, regs->num_regs)'
4137                  registers, since that is all we initialized.  */
4138 	      for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4139 		   mcnt++)
4140 		{
4141                   if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4142                     regs->start[mcnt] = regs->end[mcnt] = -1;
4143                   else
4144                     {
4145 		      regs->start[mcnt]
4146 			= (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4147                       regs->end[mcnt]
4148 			= (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4149                     }
4150 		}
4151 
4152               /* If the regs structure we return has more elements than
4153                  were in the pattern, set the extra elements to -1.  If
4154                  we (re)allocated the registers, this is the case,
4155                  because we always allocate enough to have at least one
4156                  -1 at the end.  */
4157               for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4158                 regs->start[mcnt] = regs->end[mcnt] = -1;
4159 	    } /* regs && !bufp->no_sub */
4160 
4161           DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4162                         nfailure_points_pushed, nfailure_points_popped,
4163                         nfailure_points_pushed - nfailure_points_popped);
4164           DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4165 
4166           mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4167 			    ? string1
4168 			    : string2 - size1);
4169 
4170           DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4171 
4172           FREE_VARIABLES ();
4173           return mcnt;
4174         }
4175 
4176       /* Otherwise match next pattern command.  */
4177       switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4178 	{
4179         /* Ignore these.  Used to ignore the n of succeed_n's which
4180            currently have n == 0.  */
4181         case no_op:
4182           DEBUG_PRINT1 ("EXECUTING no_op.\n");
4183           break;
4184 
4185 	case succeed:
4186           DEBUG_PRINT1 ("EXECUTING succeed.\n");
4187 	  goto succeed_label;
4188 
4189         /* Match the next n pattern characters exactly.  The following
4190            byte in the pattern defines n, and the n bytes after that
4191            are the characters to match.  */
4192 	case exactn:
4193 	  mcnt = *p++;
4194           DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4195 
4196           /* This is written out as an if-else so we don't waste time
4197              testing `translate' inside the loop.  */
4198           if (translate)
4199 	    {
4200 	      do
4201 		{
4202 		  PREFETCH ();
4203 		  if ((unsigned char) translate[(unsigned char) *d++]
4204 		      != (unsigned char) *p++)
4205                     goto fail;
4206 		}
4207 	      while (--mcnt);
4208 	    }
4209 	  else
4210 	    {
4211 	      do
4212 		{
4213 		  PREFETCH ();
4214 		  if (*d++ != (char) *p++) goto fail;
4215 		}
4216 	      while (--mcnt);
4217 	    }
4218 	  SET_REGS_MATCHED ();
4219           break;
4220 
4221 
4222         /* Match any character except possibly a newline or a null.  */
4223 	case anychar:
4224           DEBUG_PRINT1 ("EXECUTING anychar.\n");
4225 
4226           PREFETCH ();
4227 
4228           if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4229               || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4230 	    goto fail;
4231 
4232           SET_REGS_MATCHED ();
4233           DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4234           d++;
4235 	  break;
4236 
4237 
4238 	case charset:
4239 	case charset_not:
4240 	  {
4241 	    register unsigned char c;
4242 	    boolean not = (re_opcode_t) *(p - 1) == charset_not;
4243 
4244             DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4245 
4246 	    PREFETCH ();
4247 	    c = TRANSLATE (*d); /* The character to match.  */
4248 
4249             /* Cast to `unsigned' instead of `unsigned char' in case the
4250                bit list is a full 32 bytes long.  */
4251 	    if (c < (unsigned) (*p * BYTEWIDTH)
4252 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4253 	      not = !not;
4254 
4255 	    p += 1 + *p;
4256 
4257 	    if (!not) goto fail;
4258 
4259 	    SET_REGS_MATCHED ();
4260             d++;
4261 	    break;
4262 	  }
4263 
4264 
4265         /* The beginning of a group is represented by start_memory.
4266            The arguments are the register number in the next byte, and the
4267            number of groups inner to this one in the next.  The text
4268            matched within the group is recorded (in the internal
4269            registers data structure) under the register number.  */
4270         case start_memory:
4271 	  DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4272 
4273           /* Find out if this group can match the empty string.  */
4274 	  p1 = p;		/* To send to group_match_null_string_p.  */
4275 
4276           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4277             REG_MATCH_NULL_STRING_P (reg_info[*p])
4278               = group_match_null_string_p (&p1, pend, reg_info);
4279 
4280           /* Save the position in the string where we were the last time
4281              we were at this open-group operator in case the group is
4282              operated upon by a repetition operator, e.g., with `(a*)*b'
4283              against `ab'; then we want to ignore where we are now in
4284              the string in case this attempt to match fails.  */
4285           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4286                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4287                              : regstart[*p];
4288 	  DEBUG_PRINT2 ("  old_regstart: %d\n",
4289 			 POINTER_TO_OFFSET (old_regstart[*p]));
4290 
4291           regstart[*p] = d;
4292 	  DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4293 
4294           IS_ACTIVE (reg_info[*p]) = 1;
4295           MATCHED_SOMETHING (reg_info[*p]) = 0;
4296 
4297 	  /* Clear this whenever we change the register activity status.  */
4298 	  set_regs_matched_done = 0;
4299 
4300           /* This is the new highest active register.  */
4301           highest_active_reg = *p;
4302 
4303           /* If nothing was active before, this is the new lowest active
4304              register.  */
4305           if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4306             lowest_active_reg = *p;
4307 
4308           /* Move past the register number and inner group count.  */
4309           p += 2;
4310 	  just_past_start_mem = p;
4311 
4312           break;
4313 
4314 
4315         /* The stop_memory opcode represents the end of a group.  Its
4316            arguments are the same as start_memory's: the register
4317            number, and the number of inner groups.  */
4318 	case stop_memory:
4319 	  DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4320 
4321           /* We need to save the string position the last time we were at
4322              this close-group operator in case the group is operated
4323              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4324              against `aba'; then we want to ignore where we are now in
4325              the string in case this attempt to match fails.  */
4326           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4327                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
4328 			   : regend[*p];
4329 	  DEBUG_PRINT2 ("      old_regend: %d\n",
4330 			 POINTER_TO_OFFSET (old_regend[*p]));
4331 
4332           regend[*p] = d;
4333 	  DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4334 
4335           /* This register isn't active anymore.  */
4336           IS_ACTIVE (reg_info[*p]) = 0;
4337 
4338 	  /* Clear this whenever we change the register activity status.  */
4339 	  set_regs_matched_done = 0;
4340 
4341           /* If this was the only register active, nothing is active
4342              anymore.  */
4343           if (lowest_active_reg == highest_active_reg)
4344             {
4345               lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4346               highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4347             }
4348           else
4349             { /* We must scan for the new highest active register, since
4350                  it isn't necessarily one less than now: consider
4351                  (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4352                  new highest active register is 1.  */
4353               unsigned char r = *p - 1;
4354               while (r > 0 && !IS_ACTIVE (reg_info[r]))
4355                 r--;
4356 
4357               /* If we end up at register zero, that means that we saved
4358                  the registers as the result of an `on_failure_jump', not
4359                  a `start_memory', and we jumped to past the innermost
4360                  `stop_memory'.  For example, in ((.)*) we save
4361                  registers 1 and 2 as a result of the *, but when we pop
4362                  back to the second ), we are at the stop_memory 1.
4363                  Thus, nothing is active.  */
4364 	      if (r == 0)
4365                 {
4366                   lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4367                   highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4368                 }
4369               else
4370                 highest_active_reg = r;
4371             }
4372 
4373           /* If just failed to match something this time around with a
4374              group that's operated on by a repetition operator, try to
4375              force exit from the ``loop'', and restore the register
4376              information for this group that we had before trying this
4377              last match.  */
4378           if ((!MATCHED_SOMETHING (reg_info[*p])
4379                || just_past_start_mem == p - 1)
4380 	      && (p + 2) < pend)
4381             {
4382               boolean is_a_jump_n = false;
4383 
4384               p1 = p + 2;
4385               mcnt = 0;
4386               switch ((re_opcode_t) *p1++)
4387                 {
4388                   case jump_n:
4389 		    is_a_jump_n = true;
4390                   case pop_failure_jump:
4391 		  case maybe_pop_jump:
4392 		  case jump:
4393 		  case dummy_failure_jump:
4394                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4395 		    if (is_a_jump_n)
4396 		      p1 += 2;
4397                     break;
4398 
4399                   default:
4400                     /* do nothing */ ;
4401                 }
4402 	      p1 += mcnt;
4403 
4404               /* If the next operation is a jump backwards in the pattern
4405 	         to an on_failure_jump right before the start_memory
4406                  corresponding to this stop_memory, exit from the loop
4407                  by forcing a failure after pushing on the stack the
4408                  on_failure_jump's jump in the pattern, and d.  */
4409               if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4410                   && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4411 		{
4412                   /* If this group ever matched anything, then restore
4413                      what its registers were before trying this last
4414                      failed match, e.g., with `(a*)*b' against `ab' for
4415                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
4416                      against `aba' for regend[3].
4417 
4418                      Also restore the registers for inner groups for,
4419                      e.g., `((a*)(b*))*' against `aba' (register 3 would
4420                      otherwise get trashed).  */
4421 
4422                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4423 		    {
4424 		      unsigned r;
4425 
4426                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4427 
4428 		      /* Restore this and inner groups' (if any) registers.  */
4429                       for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4430 			   r++)
4431                         {
4432                           regstart[r] = old_regstart[r];
4433 
4434                           /* xx why this test?  */
4435                           if (old_regend[r] >= regstart[r])
4436                             regend[r] = old_regend[r];
4437                         }
4438                     }
4439 		  p1++;
4440                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4441                   PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4442 
4443                   goto fail;
4444                 }
4445             }
4446 
4447           /* Move past the register number and the inner group count.  */
4448           p += 2;
4449           break;
4450 
4451 
4452 	/* \<digit> has been turned into a `duplicate' command which is
4453            followed by the numeric value of <digit> as the register number.  */
4454         case duplicate:
4455 	  {
4456 	    register const char *d2, *dend2;
4457 	    int regno = *p++;   /* Get which register to match against.  */
4458 	    DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4459 
4460 	    /* Can't back reference a group which we've never matched.  */
4461             if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4462               goto fail;
4463 
4464             /* Where in input to try to start matching.  */
4465             d2 = regstart[regno];
4466 
4467             /* Where to stop matching; if both the place to start and
4468                the place to stop matching are in the same string, then
4469                set to the place to stop, otherwise, for now have to use
4470                the end of the first string.  */
4471 
4472             dend2 = ((FIRST_STRING_P (regstart[regno])
4473 		      == FIRST_STRING_P (regend[regno]))
4474 		     ? regend[regno] : end_match_1);
4475 	    for (;;)
4476 	      {
4477 		/* If necessary, advance to next segment in register
4478                    contents.  */
4479 		while (d2 == dend2)
4480 		  {
4481 		    if (dend2 == end_match_2) break;
4482 		    if (dend2 == regend[regno]) break;
4483 
4484                     /* End of string1 => advance to string2. */
4485                     d2 = string2;
4486                     dend2 = regend[regno];
4487 		  }
4488 		/* At end of register contents => success */
4489 		if (d2 == dend2) break;
4490 
4491 		/* If necessary, advance to next segment in data.  */
4492 		PREFETCH ();
4493 
4494 		/* How many characters left in this segment to match.  */
4495 		mcnt = dend - d;
4496 
4497 		/* Want how many consecutive characters we can match in
4498                    one shot, so, if necessary, adjust the count.  */
4499                 if (mcnt > dend2 - d2)
4500 		  mcnt = dend2 - d2;
4501 
4502 		/* Compare that many; failure if mismatch, else move
4503                    past them.  */
4504 		if (translate
4505                     ? bcmp_translate (d, d2, mcnt, translate)
4506                     : bcmp (d, d2, mcnt))
4507 		  goto fail;
4508 		d += mcnt, d2 += mcnt;
4509 
4510 		/* Do this because we've match some characters.  */
4511 		SET_REGS_MATCHED ();
4512 	      }
4513 	  }
4514 	  break;
4515 
4516 
4517         /* begline matches the empty string at the beginning of the string
4518            (unless `not_bol' is set in `bufp'), and, if
4519            `newline_anchor' is set, after newlines.  */
4520 	case begline:
4521           DEBUG_PRINT1 ("EXECUTING begline.\n");
4522 
4523           if (AT_STRINGS_BEG (d))
4524             {
4525               if (!bufp->not_bol) break;
4526             }
4527           else if (d[-1] == '\n' && bufp->newline_anchor)
4528             {
4529               break;
4530             }
4531           /* In all other cases, we fail.  */
4532           goto fail;
4533 
4534 
4535         /* endline is the dual of begline.  */
4536 	case endline:
4537           DEBUG_PRINT1 ("EXECUTING endline.\n");
4538 
4539           if (AT_STRINGS_END (d))
4540             {
4541               if (!bufp->not_eol) break;
4542             }
4543 
4544           /* We have to ``prefetch'' the next character.  */
4545           else if ((d == end1 ? *string2 : *d) == '\n'
4546                    && bufp->newline_anchor)
4547             {
4548               break;
4549             }
4550           goto fail;
4551 
4552 
4553 	/* Match at the very beginning of the data.  */
4554         case begbuf:
4555           DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4556           if (AT_STRINGS_BEG (d))
4557             break;
4558           goto fail;
4559 
4560 
4561 	/* Match at the very end of the data.  */
4562         case endbuf:
4563           DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4564 	  if (AT_STRINGS_END (d))
4565 	    break;
4566           goto fail;
4567 
4568 
4569         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4570            pushes NULL as the value for the string on the stack.  Then
4571            `pop_failure_point' will keep the current value for the
4572            string, instead of restoring it.  To see why, consider
4573            matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4574            then the . fails against the \n.  But the next thing we want
4575            to do is match the \n against the \n; if we restored the
4576            string value, we would be back at the foo.
4577 
4578            Because this is used only in specific cases, we don't need to
4579            check all the things that `on_failure_jump' does, to make
4580            sure the right things get saved on the stack.  Hence we don't
4581            share its code.  The only reason to push anything on the
4582            stack at all is that otherwise we would have to change
4583            `anychar's code to do something besides goto fail in this
4584            case; that seems worse than this.  */
4585         case on_failure_keep_string_jump:
4586           DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4587 
4588           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4589 #ifdef _LIBC
4590           DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4591 #else
4592           DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4593 #endif
4594 
4595           PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4596           break;
4597 
4598 
4599 	/* Uses of on_failure_jump:
4600 
4601            Each alternative starts with an on_failure_jump that points
4602            to the beginning of the next alternative.  Each alternative
4603            except the last ends with a jump that in effect jumps past
4604            the rest of the alternatives.  (They really jump to the
4605            ending jump of the following alternative, because tensioning
4606            these jumps is a hassle.)
4607 
4608            Repeats start with an on_failure_jump that points past both
4609            the repetition text and either the following jump or
4610            pop_failure_jump back to this on_failure_jump.  */
4611 	case on_failure_jump:
4612         on_failure:
4613           DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4614 
4615           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4616 #ifdef _LIBC
4617           DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4618 #else
4619           DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4620 #endif
4621 
4622           /* If this on_failure_jump comes right before a group (i.e.,
4623              the original * applied to a group), save the information
4624              for that group and all inner ones, so that if we fail back
4625              to this point, the group's information will be correct.
4626              For example, in \(a*\)*\1, we need the preceding group,
4627              and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4628 
4629           /* We can't use `p' to check ahead because we push
4630              a failure point to `p + mcnt' after we do this.  */
4631           p1 = p;
4632 
4633           /* We need to skip no_op's before we look for the
4634              start_memory in case this on_failure_jump is happening as
4635              the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4636              against aba.  */
4637           while (p1 < pend && (re_opcode_t) *p1 == no_op)
4638             p1++;
4639 
4640           if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4641             {
4642               /* We have a new highest active register now.  This will
4643                  get reset at the start_memory we are about to get to,
4644                  but we will have saved all the registers relevant to
4645                  this repetition op, as described above.  */
4646               highest_active_reg = *(p1 + 1) + *(p1 + 2);
4647               if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4648                 lowest_active_reg = *(p1 + 1);
4649             }
4650 
4651           DEBUG_PRINT1 (":\n");
4652           PUSH_FAILURE_POINT (p + mcnt, d, -2);
4653           break;
4654 
4655 
4656         /* A smart repeat ends with `maybe_pop_jump'.
4657 	   We change it to either `pop_failure_jump' or `jump'.  */
4658         case maybe_pop_jump:
4659           EXTRACT_NUMBER_AND_INCR (mcnt, p);
4660           DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4661           {
4662 	    register unsigned char *p2 = p;
4663 
4664             /* Compare the beginning of the repeat with what in the
4665                pattern follows its end. If we can establish that there
4666                is nothing that they would both match, i.e., that we
4667                would have to backtrack because of (as in, e.g., `a*a')
4668                then we can change to pop_failure_jump, because we'll
4669                never have to backtrack.
4670 
4671                This is not true in the case of alternatives: in
4672                `(a|ab)*' we do need to backtrack to the `ab' alternative
4673                (e.g., if the string was `ab').  But instead of trying to
4674                detect that here, the alternative has put on a dummy
4675                failure point which is what we will end up popping.  */
4676 
4677 	    /* Skip over open/close-group commands.
4678 	       If what follows this loop is a ...+ construct,
4679 	       look at what begins its body, since we will have to
4680 	       match at least one of that.  */
4681 	    while (1)
4682 	      {
4683 		if (p2 + 2 < pend
4684 		    && ((re_opcode_t) *p2 == stop_memory
4685 			|| (re_opcode_t) *p2 == start_memory))
4686 		  p2 += 3;
4687 		else if (p2 + 6 < pend
4688 			 && (re_opcode_t) *p2 == dummy_failure_jump)
4689 		  p2 += 6;
4690 		else
4691 		  break;
4692 	      }
4693 
4694 	    p1 = p + mcnt;
4695 	    /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4696 	       to the `maybe_finalize_jump' of this case.  Examine what
4697 	       follows.  */
4698 
4699             /* If we're at the end of the pattern, we can change.  */
4700             if (p2 == pend)
4701 	      {
4702 		/* Consider what happens when matching ":\(.*\)"
4703 		   against ":/".  I don't really understand this code
4704 		   yet.  */
4705   	        p[-3] = (unsigned char) pop_failure_jump;
4706                 DEBUG_PRINT1
4707                   ("  End of pattern: change to `pop_failure_jump'.\n");
4708               }
4709 
4710             else if ((re_opcode_t) *p2 == exactn
4711 		     || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4712 	      {
4713 		register unsigned char c
4714                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4715 
4716                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4717                   {
4718   		    p[-3] = (unsigned char) pop_failure_jump;
4719                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4720                                   c, p1[5]);
4721                   }
4722 
4723 		else if ((re_opcode_t) p1[3] == charset
4724 			 || (re_opcode_t) p1[3] == charset_not)
4725 		  {
4726 		    int not = (re_opcode_t) p1[3] == charset_not;
4727 
4728 		    if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4729 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4730 		      not = !not;
4731 
4732                     /* `not' is equal to 1 if c would match, which means
4733                         that we can't change to pop_failure_jump.  */
4734 		    if (!not)
4735                       {
4736   		        p[-3] = (unsigned char) pop_failure_jump;
4737                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4738                       }
4739 		  }
4740 	      }
4741             else if ((re_opcode_t) *p2 == charset)
4742 	      {
4743 #ifdef DEBUG
4744 		register unsigned char c
4745                   = *p2 == (unsigned char) endline ? '\n' : p2[2];
4746 #endif
4747 
4748 #if 0
4749                 if ((re_opcode_t) p1[3] == exactn
4750 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4751 			  && (p2[2 + p1[5] / BYTEWIDTH]
4752 			      & (1 << (p1[5] % BYTEWIDTH)))))
4753 #else
4754                 if ((re_opcode_t) p1[3] == exactn
4755 		    && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4756 			  && (p2[2 + p1[4] / BYTEWIDTH]
4757 			      & (1 << (p1[4] % BYTEWIDTH)))))
4758 #endif
4759                   {
4760   		    p[-3] = (unsigned char) pop_failure_jump;
4761                     DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4762                                   c, p1[5]);
4763                   }
4764 
4765 		else if ((re_opcode_t) p1[3] == charset_not)
4766 		  {
4767 		    int idx;
4768 		    /* We win if the charset_not inside the loop
4769 		       lists every character listed in the charset after.  */
4770 		    for (idx = 0; idx < (int) p2[1]; idx++)
4771 		      if (! (p2[2 + idx] == 0
4772 			     || (idx < (int) p1[4]
4773 				 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4774 			break;
4775 
4776 		    if (idx == p2[1])
4777                       {
4778   		        p[-3] = (unsigned char) pop_failure_jump;
4779                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4780                       }
4781 		  }
4782 		else if ((re_opcode_t) p1[3] == charset)
4783 		  {
4784 		    int idx;
4785 		    /* We win if the charset inside the loop
4786 		       has no overlap with the one after the loop.  */
4787 		    for (idx = 0;
4788 			 idx < (int) p2[1] && idx < (int) p1[4];
4789 			 idx++)
4790 		      if ((p2[2 + idx] & p1[5 + idx]) != 0)
4791 			break;
4792 
4793 		    if (idx == p2[1] || idx == p1[4])
4794                       {
4795   		        p[-3] = (unsigned char) pop_failure_jump;
4796                         DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4797                       }
4798 		  }
4799 	      }
4800 	  }
4801 	  p -= 2;		/* Point at relative address again.  */
4802 	  if ((re_opcode_t) p[-1] != pop_failure_jump)
4803 	    {
4804 	      p[-1] = (unsigned char) jump;
4805               DEBUG_PRINT1 ("  Match => jump.\n");
4806 	      goto unconditional_jump;
4807 	    }
4808         /* Note fall through.  */
4809 
4810 
4811 	/* The end of a simple repeat has a pop_failure_jump back to
4812            its matching on_failure_jump, where the latter will push a
4813            failure point.  The pop_failure_jump takes off failure
4814            points put on by this pop_failure_jump's matching
4815            on_failure_jump; we got through the pattern to here from the
4816            matching on_failure_jump, so didn't fail.  */
4817         case pop_failure_jump:
4818           {
4819             /* We need to pass separate storage for the lowest and
4820                highest registers, even though we don't care about the
4821                actual values.  Otherwise, we will restore only one
4822                register from the stack, since lowest will == highest in
4823                `pop_failure_point'.  */
4824             active_reg_t dummy_low_reg, dummy_high_reg;
4825             unsigned char *pdummy;
4826             const char *sdummy;
4827 
4828             DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4829             POP_FAILURE_POINT (sdummy, pdummy,
4830                                dummy_low_reg, dummy_high_reg,
4831                                reg_dummy, reg_dummy, reg_info_dummy);
4832           }
4833 	  /* Note fall through.  */
4834 
4835 	unconditional_jump:
4836 #ifdef _LIBC
4837 	  DEBUG_PRINT2 ("\n%p: ", p);
4838 #else
4839 	  DEBUG_PRINT2 ("\n0x%x: ", p);
4840 #endif
4841           /* Note fall through.  */
4842 
4843         /* Unconditionally jump (without popping any failure points).  */
4844         case jump:
4845 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);	/* Get the amount to jump.  */
4846           DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4847 	  p += mcnt;				/* Do the jump.  */
4848 #ifdef _LIBC
4849           DEBUG_PRINT2 ("(to %p).\n", p);
4850 #else
4851           DEBUG_PRINT2 ("(to 0x%x).\n", p);
4852 #endif
4853 	  break;
4854 
4855 
4856         /* We need this opcode so we can detect where alternatives end
4857            in `group_match_null_string_p' et al.  */
4858         case jump_past_alt:
4859           DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4860           goto unconditional_jump;
4861 
4862 
4863         /* Normally, the on_failure_jump pushes a failure point, which
4864            then gets popped at pop_failure_jump.  We will end up at
4865            pop_failure_jump, also, and with a pattern of, say, `a+', we
4866            are skipping over the on_failure_jump, so we have to push
4867            something meaningless for pop_failure_jump to pop.  */
4868         case dummy_failure_jump:
4869           DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4870           /* It doesn't matter what we push for the string here.  What
4871              the code at `fail' tests is the value for the pattern.  */
4872           PUSH_FAILURE_POINT (0, 0, -2);
4873           goto unconditional_jump;
4874 
4875 
4876         /* At the end of an alternative, we need to push a dummy failure
4877            point in case we are followed by a `pop_failure_jump', because
4878            we don't want the failure point for the alternative to be
4879            popped.  For example, matching `(a|ab)*' against `aab'
4880            requires that we match the `ab' alternative.  */
4881         case push_dummy_failure:
4882           DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4883           /* See comments just above at `dummy_failure_jump' about the
4884              two zeroes.  */
4885           PUSH_FAILURE_POINT (0, 0, -2);
4886           break;
4887 
4888         /* Have to succeed matching what follows at least n times.
4889            After that, handle like `on_failure_jump'.  */
4890         case succeed_n:
4891           EXTRACT_NUMBER (mcnt, p + 2);
4892           DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4893 
4894           assert (mcnt >= 0);
4895           /* Originally, this is how many times we HAVE to succeed.  */
4896           if (mcnt > 0)
4897             {
4898                mcnt--;
4899 	       p += 2;
4900                STORE_NUMBER_AND_INCR (p, mcnt);
4901 #ifdef _LIBC
4902                DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4903 #else
4904                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4905 #endif
4906             }
4907 	  else if (mcnt == 0)
4908             {
4909 #ifdef _LIBC
4910               DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4911 #else
4912               DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4913 #endif
4914 	      p[2] = (unsigned char) no_op;
4915               p[3] = (unsigned char) no_op;
4916               goto on_failure;
4917             }
4918           break;
4919 
4920         case jump_n:
4921           EXTRACT_NUMBER (mcnt, p + 2);
4922           DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4923 
4924           /* Originally, this is how many times we CAN jump.  */
4925           if (mcnt)
4926             {
4927                mcnt--;
4928                STORE_NUMBER (p + 2, mcnt);
4929 #ifdef _LIBC
4930                DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4931 #else
4932                DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4933 #endif
4934 	       goto unconditional_jump;
4935             }
4936           /* If don't have to jump any more, skip over the rest of command.  */
4937 	  else
4938 	    p += 4;
4939           break;
4940 
4941 	case set_number_at:
4942 	  {
4943             DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4944 
4945             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4946             p1 = p + mcnt;
4947             EXTRACT_NUMBER_AND_INCR (mcnt, p);
4948 #ifdef _LIBC
4949             DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4950 #else
4951             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4952 #endif
4953 	    STORE_NUMBER (p1, mcnt);
4954             break;
4955           }
4956 
4957 #if 0
4958 	/* The DEC Alpha C compiler 3.x generates incorrect code for the
4959 	   test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4960 	   AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4961 	   macro and introducing temporary variables works around the bug.  */
4962 
4963 	case wordbound:
4964 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4965 	  if (AT_WORD_BOUNDARY (d))
4966 	    break;
4967 	  goto fail;
4968 
4969 	case notwordbound:
4970 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4971 	  if (AT_WORD_BOUNDARY (d))
4972 	    goto fail;
4973 	  break;
4974 #else
4975 	case wordbound:
4976 	{
4977 	  boolean prevchar, thischar;
4978 
4979 	  DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4980 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4981 	    break;
4982 
4983 	  prevchar = WORDCHAR_P (d - 1);
4984 	  thischar = WORDCHAR_P (d);
4985 	  if (prevchar != thischar)
4986 	    break;
4987 	  goto fail;
4988 	}
4989 
4990       case notwordbound:
4991 	{
4992 	  boolean prevchar, thischar;
4993 
4994 	  DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4995 	  if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4996 	    goto fail;
4997 
4998 	  prevchar = WORDCHAR_P (d - 1);
4999 	  thischar = WORDCHAR_P (d);
5000 	  if (prevchar != thischar)
5001 	    goto fail;
5002 	  break;
5003 	}
5004 #endif
5005 
5006 	case wordbeg:
5007           DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
5008 	  if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
5009 	    break;
5010           goto fail;
5011 
5012 	case wordend:
5013           DEBUG_PRINT1 ("EXECUTING wordend.\n");
5014 	  if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
5015               && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
5016 	    break;
5017           goto fail;
5018 
5019 #ifdef emacs
5020   	case before_dot:
5021           DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5022  	  if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5023   	    goto fail;
5024   	  break;
5025 
5026   	case at_dot:
5027           DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5028  	  if (PTR_CHAR_POS ((unsigned char *) d) != point)
5029   	    goto fail;
5030   	  break;
5031 
5032   	case after_dot:
5033           DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5034           if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5035   	    goto fail;
5036   	  break;
5037 
5038 	case syntaxspec:
5039           DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5040 	  mcnt = *p++;
5041 	  goto matchsyntax;
5042 
5043         case wordchar:
5044           DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5045 	  mcnt = (int) Sword;
5046         matchsyntax:
5047 	  PREFETCH ();
5048 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5049 	  d++;
5050 	  if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5051 	    goto fail;
5052           SET_REGS_MATCHED ();
5053 	  break;
5054 
5055 	case notsyntaxspec:
5056           DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5057 	  mcnt = *p++;
5058 	  goto matchnotsyntax;
5059 
5060         case notwordchar:
5061           DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5062 	  mcnt = (int) Sword;
5063         matchnotsyntax:
5064 	  PREFETCH ();
5065 	  /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5066 	  d++;
5067 	  if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5068 	    goto fail;
5069 	  SET_REGS_MATCHED ();
5070           break;
5071 
5072 #else /* not emacs */
5073 	case wordchar:
5074           DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5075 	  PREFETCH ();
5076           if (!WORDCHAR_P (d))
5077             goto fail;
5078 	  SET_REGS_MATCHED ();
5079           d++;
5080 	  break;
5081 
5082 	case notwordchar:
5083           DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5084 	  PREFETCH ();
5085 	  if (WORDCHAR_P (d))
5086             goto fail;
5087           SET_REGS_MATCHED ();
5088           d++;
5089 	  break;
5090 #endif /* not emacs */
5091 
5092         default:
5093           call_abort ();
5094 	}
5095       continue;  /* Successfully executed one pattern command; keep going.  */
5096 
5097 
5098     /* We goto here if a matching operation fails. */
5099     fail:
5100       if (!FAIL_STACK_EMPTY ())
5101 	{ /* A restart point is known.  Restore to that state.  */
5102           DEBUG_PRINT1 ("\nFAIL:\n");
5103           POP_FAILURE_POINT (d, p,
5104                              lowest_active_reg, highest_active_reg,
5105                              regstart, regend, reg_info);
5106 
5107           /* If this failure point is a dummy, try the next one.  */
5108           if (!p)
5109 	    goto fail;
5110 
5111           /* If we failed to the end of the pattern, don't examine *p.  */
5112 	  assert (p <= pend);
5113           if (p < pend)
5114             {
5115               boolean is_a_jump_n = false;
5116 
5117               /* If failed to a backwards jump that's part of a repetition
5118                  loop, need to pop this failure point and use the next one.  */
5119               switch ((re_opcode_t) *p)
5120                 {
5121                 case jump_n:
5122                   is_a_jump_n = true;
5123                 case maybe_pop_jump:
5124                 case pop_failure_jump:
5125                 case jump:
5126                   p1 = p + 1;
5127                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5128                   p1 += mcnt;
5129 
5130                   if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5131                       || (!is_a_jump_n
5132                           && (re_opcode_t) *p1 == on_failure_jump))
5133                     goto fail;
5134                   break;
5135                 default:
5136                   /* do nothing */ ;
5137                 }
5138             }
5139 
5140           if (d >= string1 && d <= end1)
5141 	    dend = end_match_1;
5142         }
5143       else
5144         break;   /* Matching at this starting point really fails.  */
5145     } /* for (;;) */
5146 
5147   if (best_regs_set)
5148     goto restore_best_regs;
5149 
5150   FREE_VARIABLES ();
5151 
5152   return -1;         			/* Failure to match.  */
5153 } /* re_match_2 */
5154 
5155 /* Subroutine definitions for re_match_2.  */
5156 
5157 
5158 /* We are passed P pointing to a register number after a start_memory.
5159 
5160    Return true if the pattern up to the corresponding stop_memory can
5161    match the empty string, and false otherwise.
5162 
5163    If we find the matching stop_memory, sets P to point to one past its number.
5164    Otherwise, sets P to an undefined byte less than or equal to END.
5165 
5166    We don't handle duplicates properly (yet).  */
5167 
5168 static boolean
group_match_null_string_p(p,end,reg_info)5169 group_match_null_string_p (p, end, reg_info)
5170     unsigned char **p, *end;
5171     register_info_type *reg_info;
5172 {
5173   int mcnt;
5174   /* Point to after the args to the start_memory.  */
5175   unsigned char *p1 = *p + 2;
5176 
5177   while (p1 < end)
5178     {
5179       /* Skip over opcodes that can match nothing, and return true or
5180 	 false, as appropriate, when we get to one that can't, or to the
5181          matching stop_memory.  */
5182 
5183       switch ((re_opcode_t) *p1)
5184         {
5185         /* Could be either a loop or a series of alternatives.  */
5186         case on_failure_jump:
5187           p1++;
5188           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5189 
5190           /* If the next operation is not a jump backwards in the
5191 	     pattern.  */
5192 
5193 	  if (mcnt >= 0)
5194 	    {
5195               /* Go through the on_failure_jumps of the alternatives,
5196                  seeing if any of the alternatives cannot match nothing.
5197                  The last alternative starts with only a jump,
5198                  whereas the rest start with on_failure_jump and end
5199                  with a jump, e.g., here is the pattern for `a|b|c':
5200 
5201                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5202                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5203                  /exactn/1/c
5204 
5205                  So, we have to first go through the first (n-1)
5206                  alternatives and then deal with the last one separately.  */
5207 
5208 
5209               /* Deal with the first (n-1) alternatives, which start
5210                  with an on_failure_jump (see above) that jumps to right
5211                  past a jump_past_alt.  */
5212 
5213               while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5214                 {
5215                   /* `mcnt' holds how many bytes long the alternative
5216                      is, including the ending `jump_past_alt' and
5217                      its number.  */
5218 
5219                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5220 				                      reg_info))
5221                     return false;
5222 
5223                   /* Move to right after this alternative, including the
5224 		     jump_past_alt.  */
5225                   p1 += mcnt;
5226 
5227                   /* Break if it's the beginning of an n-th alternative
5228                      that doesn't begin with an on_failure_jump.  */
5229                   if ((re_opcode_t) *p1 != on_failure_jump)
5230                     break;
5231 
5232 		  /* Still have to check that it's not an n-th
5233 		     alternative that starts with an on_failure_jump.  */
5234 		  p1++;
5235                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5236                   if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5237                     {
5238 		      /* Get to the beginning of the n-th alternative.  */
5239                       p1 -= 3;
5240                       break;
5241                     }
5242                 }
5243 
5244               /* Deal with the last alternative: go back and get number
5245                  of the `jump_past_alt' just before it.  `mcnt' contains
5246                  the length of the alternative.  */
5247               EXTRACT_NUMBER (mcnt, p1 - 2);
5248 
5249               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5250                 return false;
5251 
5252               p1 += mcnt;	/* Get past the n-th alternative.  */
5253             } /* if mcnt > 0 */
5254           break;
5255 
5256 
5257         case stop_memory:
5258 	  assert (p1[1] == **p);
5259           *p = p1 + 2;
5260           return true;
5261 
5262 
5263         default:
5264           if (!common_op_match_null_string_p (&p1, end, reg_info))
5265             return false;
5266         }
5267     } /* while p1 < end */
5268 
5269   return false;
5270 } /* group_match_null_string_p */
5271 
5272 
5273 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5274    It expects P to be the first byte of a single alternative and END one
5275    byte past the last. The alternative can contain groups.  */
5276 
5277 static boolean
alt_match_null_string_p(p,end,reg_info)5278 alt_match_null_string_p (p, end, reg_info)
5279     unsigned char *p, *end;
5280     register_info_type *reg_info;
5281 {
5282   int mcnt;
5283   unsigned char *p1 = p;
5284 
5285   while (p1 < end)
5286     {
5287       /* Skip over opcodes that can match nothing, and break when we get
5288          to one that can't.  */
5289 
5290       switch ((re_opcode_t) *p1)
5291         {
5292 	/* It's a loop.  */
5293         case on_failure_jump:
5294           p1++;
5295           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5296           p1 += mcnt;
5297           break;
5298 
5299 	default:
5300           if (!common_op_match_null_string_p (&p1, end, reg_info))
5301             return false;
5302         }
5303     }  /* while p1 < end */
5304 
5305   return true;
5306 } /* alt_match_null_string_p */
5307 
5308 
5309 /* Deals with the ops common to group_match_null_string_p and
5310    alt_match_null_string_p.
5311 
5312    Sets P to one after the op and its arguments, if any.  */
5313 
5314 static boolean
common_op_match_null_string_p(p,end,reg_info)5315 common_op_match_null_string_p (p, end, reg_info)
5316     unsigned char **p, *end;
5317     register_info_type *reg_info;
5318 {
5319   int mcnt;
5320   boolean ret;
5321   int reg_no;
5322   unsigned char *p1 = *p;
5323 
5324   switch ((re_opcode_t) *p1++)
5325     {
5326     case no_op:
5327     case begline:
5328     case endline:
5329     case begbuf:
5330     case endbuf:
5331     case wordbeg:
5332     case wordend:
5333     case wordbound:
5334     case notwordbound:
5335 #ifdef emacs
5336     case before_dot:
5337     case at_dot:
5338     case after_dot:
5339 #endif
5340       break;
5341 
5342     case start_memory:
5343       reg_no = *p1;
5344       assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5345       ret = group_match_null_string_p (&p1, end, reg_info);
5346 
5347       /* Have to set this here in case we're checking a group which
5348          contains a group and a back reference to it.  */
5349 
5350       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5351         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5352 
5353       if (!ret)
5354         return false;
5355       break;
5356 
5357     /* If this is an optimized succeed_n for zero times, make the jump.  */
5358     case jump:
5359       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5360       if (mcnt >= 0)
5361         p1 += mcnt;
5362       else
5363         return false;
5364       break;
5365 
5366     case succeed_n:
5367       /* Get to the number of times to succeed.  */
5368       p1 += 2;
5369       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5370 
5371       if (mcnt == 0)
5372         {
5373           p1 -= 4;
5374           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5375           p1 += mcnt;
5376         }
5377       else
5378         return false;
5379       break;
5380 
5381     case duplicate:
5382       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5383         return false;
5384       break;
5385 
5386     case set_number_at:
5387       p1 += 4;
5388 
5389     default:
5390       /* All other opcodes mean we cannot match the empty string.  */
5391       return false;
5392   }
5393 
5394   *p = p1;
5395   return true;
5396 } /* common_op_match_null_string_p */
5397 
5398 
5399 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5400    bytes; nonzero otherwise.  */
5401 
5402 static int
bcmp_translate(s1,s2,len,translate)5403 bcmp_translate (s1, s2, len, translate)
5404      const char *s1, *s2;
5405      register int len;
5406      RE_TRANSLATE_TYPE translate;
5407 {
5408   register const unsigned char *p1 = (const unsigned char *) s1;
5409   register const unsigned char *p2 = (const unsigned char *) s2;
5410   while (len)
5411     {
5412       if (translate[*p1++] != translate[*p2++]) return 1;
5413       len--;
5414     }
5415   return 0;
5416 }
5417 
5418 /* Entry points for GNU code.  */
5419 
5420 /* re_compile_pattern is the GNU regular expression compiler: it
5421    compiles PATTERN (of length SIZE) and puts the result in BUFP.
5422    Returns 0 if the pattern was valid, otherwise an error string.
5423 
5424    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5425    are set in BUFP on entry.
5426 
5427    We call regex_compile to do the actual compilation.  */
5428 
5429 const char *
re_compile_pattern(pattern,length,bufp)5430 re_compile_pattern (pattern, length, bufp)
5431      const char *pattern;
5432      size_t length;
5433      struct re_pattern_buffer *bufp;
5434 {
5435   reg_errcode_t ret;
5436 
5437   /* GNU code is written to assume at least RE_NREGS registers will be set
5438      (and at least one extra will be -1).  */
5439   bufp->regs_allocated = REGS_UNALLOCATED;
5440 
5441   /* And GNU code determines whether or not to get register information
5442      by passing null for the REGS argument to re_match, etc., not by
5443      setting no_sub.  */
5444   bufp->no_sub = 0;
5445 
5446   /* Match anchors at newline.  */
5447   bufp->newline_anchor = 1;
5448 
5449   ret = regex_compile (pattern, length, re_syntax_options, bufp);
5450 
5451   if (!ret)
5452     return NULL;
5453   return gettext (re_error_msgid[(int) ret]);
5454 }
5455 
5456 /* Entry points compatible with 4.2 BSD regex library.  We don't define
5457    them unless specifically requested.  */
5458 
5459 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
5460 
5461 /* BSD has one and only one pattern buffer.  */
5462 static struct re_pattern_buffer re_comp_buf;
5463 
5464 char *
5465 #ifdef _LIBC
5466 /* Make these definitions weak in libc, so POSIX programs can redefine
5467    these names if they don't use our functions, and still use
5468    regcomp/regexec below without link errors.  */
5469 weak_function
5470 #endif
re_comp(s)5471 re_comp (s)
5472     const char *s;
5473 {
5474   reg_errcode_t ret;
5475 
5476   if (!s)
5477     {
5478       if (!re_comp_buf.buffer)
5479 	return gettext ("No previous regular expression");
5480       return 0;
5481     }
5482 
5483   if (!re_comp_buf.buffer)
5484     {
5485       re_comp_buf.buffer = (unsigned char *) malloc (200);
5486       if (re_comp_buf.buffer == NULL)
5487         return gettext (re_error_msgid[(int) REG_ESPACE]);
5488       re_comp_buf.allocated = 200;
5489 
5490       re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5491       if (re_comp_buf.fastmap == NULL)
5492 	return gettext (re_error_msgid[(int) REG_ESPACE]);
5493     }
5494 
5495   /* Since `re_exec' always passes NULL for the `regs' argument, we
5496      don't need to initialize the pattern buffer fields which affect it.  */
5497 
5498   /* Match anchors at newlines.  */
5499   re_comp_buf.newline_anchor = 1;
5500 
5501   ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5502 
5503   if (!ret)
5504     return NULL;
5505 
5506   /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5507   return (char *) gettext (re_error_msgid[(int) ret]);
5508 }
5509 
5510 
5511 int
5512 #ifdef _LIBC
5513 weak_function
5514 #endif
re_exec(s)5515 re_exec (s)
5516     const char *s;
5517 {
5518   const int len = strlen (s);
5519   return
5520     0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5521 }
5522 
5523 #endif /* _REGEX_RE_COMP */
5524 
5525 /* POSIX.2 functions.  Don't define these for Emacs.  */
5526 
5527 #ifndef emacs
5528 
5529 /* regcomp takes a regular expression as a string and compiles it.
5530 
5531    PREG is a regex_t *.  We do not expect any fields to be initialized,
5532    since POSIX says we shouldn't.  Thus, we set
5533 
5534      `buffer' to the compiled pattern;
5535      `used' to the length of the compiled pattern;
5536      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5537        REG_EXTENDED bit in CFLAGS is set; otherwise, to
5538        RE_SYNTAX_POSIX_BASIC;
5539      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5540      `fastmap' and `fastmap_accurate' to zero;
5541      `re_nsub' to the number of subexpressions in PATTERN.
5542 
5543    PATTERN is the address of the pattern string.
5544 
5545    CFLAGS is a series of bits which affect compilation.
5546 
5547      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5548      use POSIX basic syntax.
5549 
5550      If REG_NEWLINE is set, then . and [^...] don't match newline.
5551      Also, regexec will try a match beginning after every newline.
5552 
5553      If REG_ICASE is set, then we considers upper- and lowercase
5554      versions of letters to be equivalent when matching.
5555 
5556      If REG_NOSUB is set, then when PREG is passed to regexec, that
5557      routine will report only success or failure, and nothing about the
5558      registers.
5559 
5560    It returns 0 if it succeeds, nonzero if it doesn't.  (See rx.h for
5561    the return codes and their meanings.)  */
5562 
5563 int
regcomp(preg,pattern,cflags)5564 regcomp (preg, pattern, cflags)
5565     regex_t *preg;
5566     const char *pattern;
5567     int cflags;
5568 {
5569   reg_errcode_t ret;
5570   reg_syntax_t syntax
5571     = (cflags & REG_EXTENDED) ?
5572       RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5573 
5574   /* regex_compile will allocate the space for the compiled pattern.  */
5575   preg->buffer = 0;
5576   preg->allocated = 0;
5577   preg->used = 0;
5578 
5579   /* Don't bother to use a fastmap when searching.  This simplifies the
5580      REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5581      characters after newlines into the fastmap.  This way, we just try
5582      every character.  */
5583   preg->fastmap = 0;
5584 
5585   if (cflags & REG_ICASE)
5586     {
5587       unsigned i;
5588 
5589       preg->translate
5590 	= (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5591 				      * sizeof (*(RE_TRANSLATE_TYPE)0));
5592       if (preg->translate == NULL)
5593         return (int) REG_ESPACE;
5594 
5595       /* Map uppercase characters to corresponding lowercase ones.  */
5596       for (i = 0; i < CHAR_SET_SIZE; i++)
5597         preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5598     }
5599   else
5600     preg->translate = NULL;
5601 
5602   /* If REG_NEWLINE is set, newlines are treated differently.  */
5603   if (cflags & REG_NEWLINE)
5604     { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5605       syntax &= ~RE_DOT_NEWLINE;
5606       syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5607       /* It also changes the matching behavior.  */
5608       preg->newline_anchor = 1;
5609     }
5610   else
5611     preg->newline_anchor = 0;
5612 
5613   preg->no_sub = !!(cflags & REG_NOSUB);
5614 
5615   /* POSIX says a null character in the pattern terminates it, so we
5616      can use strlen here in compiling the pattern.  */
5617   ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5618 
5619   /* POSIX doesn't distinguish between an unmatched open-group and an
5620      unmatched close-group: both are REG_EPAREN.  */
5621   if (ret == REG_ERPAREN) ret = REG_EPAREN;
5622 
5623   return (int) ret;
5624 }
5625 
5626 
5627 /* regexec searches for a given pattern, specified by PREG, in the
5628    string STRING.
5629 
5630    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5631    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5632    least NMATCH elements, and we set them to the offsets of the
5633    corresponding matched substrings.
5634 
5635    EFLAGS specifies `execution flags' which affect matching: if
5636    REG_NOTBOL is set, then ^ does not match at the beginning of the
5637    string; if REG_NOTEOL is set, then $ does not match at the end.
5638 
5639    We return 0 if we find a match and REG_NOMATCH if not.  */
5640 
5641 int
regexec(preg,string,nmatch,pmatch,eflags)5642 regexec (preg, string, nmatch, pmatch, eflags)
5643     const regex_t *preg;
5644     const char *string;
5645     size_t nmatch;
5646     regmatch_t pmatch[];
5647     int eflags;
5648 {
5649   int ret;
5650   struct re_registers regs;
5651   regex_t private_preg;
5652   int len = strlen (string);
5653   boolean want_reg_info = !preg->no_sub && nmatch > 0;
5654 
5655   private_preg = *preg;
5656 
5657   private_preg.not_bol = !!(eflags & REG_NOTBOL);
5658   private_preg.not_eol = !!(eflags & REG_NOTEOL);
5659 
5660   /* The user has told us exactly how many registers to return
5661      information about, via `nmatch'.  We have to pass that on to the
5662      matching routines.  */
5663   private_preg.regs_allocated = REGS_FIXED;
5664 
5665   if (want_reg_info)
5666     {
5667       regs.num_regs = nmatch;
5668       regs.start = TALLOC (nmatch, regoff_t);
5669       regs.end = TALLOC (nmatch, regoff_t);
5670       if (regs.start == NULL || regs.end == NULL)
5671         return (int) REG_NOMATCH;
5672     }
5673 
5674   /* Perform the searching operation.  */
5675   ret = re_search (&private_preg, string, len,
5676                    /* start: */ 0, /* range: */ len,
5677                    want_reg_info ? &regs : (struct re_registers *) 0);
5678 
5679   /* Copy the register information to the POSIX structure.  */
5680   if (want_reg_info)
5681     {
5682       if (ret >= 0)
5683         {
5684           unsigned r;
5685 
5686           for (r = 0; r < nmatch; r++)
5687             {
5688               pmatch[r].rm_so = regs.start[r];
5689               pmatch[r].rm_eo = regs.end[r];
5690             }
5691         }
5692 
5693       /* If we needed the temporary register info, free the space now.  */
5694       free (regs.start);
5695       free (regs.end);
5696     }
5697 
5698   /* We want zero return to mean success, unlike `re_search'.  */
5699   return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5700 }
5701 
5702 
5703 /* Returns a message corresponding to an error code, ERRCODE, returned
5704    from either regcomp or regexec.   We don't use PREG here.  */
5705 
5706 size_t
regerror(errcode,preg,errbuf,errbuf_size)5707 regerror (errcode, preg, errbuf, errbuf_size)
5708     int errcode;
5709     const regex_t *preg;
5710     char *errbuf;
5711     size_t errbuf_size;
5712 {
5713   const char *msg;
5714   size_t msg_size;
5715 
5716   if (errcode < 0
5717       || errcode >= (int) (sizeof (re_error_msgid)
5718 			   / sizeof (re_error_msgid[0])))
5719     /* Only error codes returned by the rest of the code should be passed
5720        to this routine.  If we are given anything else, or if other regex
5721        code generates an invalid error code, then the program has a bug.
5722        Dump core so we can fix it.  */
5723     call_abort ();
5724 
5725   msg = gettext (re_error_msgid[errcode]);
5726 
5727   msg_size = strlen (msg) + 1; /* Includes the null.  */
5728 
5729   if (errbuf_size != 0)
5730     {
5731       if (msg_size > errbuf_size)
5732         {
5733           strncpy (errbuf, msg, errbuf_size - 1);
5734           errbuf[errbuf_size - 1] = 0;
5735         }
5736       else
5737         strcpy (errbuf, msg);
5738     }
5739 
5740   return msg_size;
5741 }
5742 
5743 
5744 /* Free dynamically allocated space used by PREG.  */
5745 
5746 void
regfree(preg)5747 regfree (preg)
5748     regex_t *preg;
5749 {
5750   if (preg->buffer != NULL)
5751     free (preg->buffer);
5752   preg->buffer = NULL;
5753 
5754   preg->allocated = 0;
5755   preg->used = 0;
5756 
5757   if (preg->fastmap != NULL)
5758     free (preg->fastmap);
5759   preg->fastmap = NULL;
5760   preg->fastmap_accurate = 0;
5761 
5762   if (preg->translate != NULL)
5763     free (preg->translate);
5764   preg->translate = NULL;
5765 }
5766 
5767 #endif /* not emacs  */
5768