1 /* LGPLd GNU regex for Native WIN32 */
2 
3 /* Part of the ht://Dig package   <http://www.htdig.org/> */
4 /* Copyright (c) 2003 The ht://Dig Group */
5 /* For copyright details, see the file COPYING in your distribution */
6 /* or the GNU Library General Public License (LGPL) version 2 or later or later */
7 /* <http://www.gnu.org/copyleft/lgpl.html> */
8 
9 /* Added June 2003 Neal Richter, RightNow Technologies */
10 
11 /* note that this version is significantly different from the original  */
12 /* version 0.12 GNU source code.  It compiles and works on Native WIN32. */
13 
14 /* Extended regular expression matching and search library,
15    version 0.12.
16    (Implements POSIX draft P1003.2/D11.2, except for some of the
17    internationalization features.)
18 
19    Copyright (C) 1993, 1994, 1995, 1996, 1997 Free Software Foundation, Inc.
20 
21    This file is part of the GNU C Library.  Its master source is NOT part of
22    the C library, however.  The master source lives in /gd/gnu/lib.
23 
24    The GNU C Library is free software; you can redistribute it and/or
25    modify it under the terms of the GNU Library General Public License as
26    published by the Free Software Foundation; either version 2 of the
27    License, or (at your option) any later version.
28 
29    The GNU C Library is distributed in the hope that it will be useful,
30    but WITHOUT ANY WARRANTY; without even the implied warranty of
31    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
32    Library General Public License for more details.
33 
34    You should have received a copy of the GNU Library General Public
35    License along with the GNU C Library; see the file COPYING.LIB.  If not,
36    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
37    Boston, MA 02111-1307, USA.  */
38 
39 #if defined(_WIN32)
40 #pragma warning(disable: 4018 4101)
41 #endif
42 
43 /* AIX requires this to be the first thing in the file. */
44 #if defined (_AIX) && !defined (REGEX_MALLOC)
45 #pragma alloca
46 #endif
47 
48 #undef  _GNU_SOURCE
49 #define _GNU_SOURCE
50 
51 #if defined(LINUX)
52 #define STDC_HEADERS
53 #endif
54 
55 #if defined(STDC_HEADERS) && !defined(emacs)
56 #include <stddef.h>
57 #else
58 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
59 #include <sys/types.h>
60 #endif
61 
62 /* For platform which support the ISO C amendement 1 functionality we
63    support user defined character classes.  */
64 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
65 # include <wctype.h>
66 # include <wchar.h>
67 #endif
68 
69 /* This is for other GNU distributions with internationalized messages.  */
70 #if HAVE_LIBINTL_H || defined (_LIBC)
71 # include <libintl.h>
72 #else
73 # define gettext(msgid) (msgid)
74 #endif
75 
76 #ifndef gettext_noop
77 /* This define is so xgettext can find the internationalizable
78    strings.  */
79 #define gettext_noop(String) String
80 #endif
81 
82 /* The `emacs' switch turns on certain matching commands
83    that make sense only in Emacs. */
84 #ifdef emacs
85 
86 #include "lisp.h"
87 #include "buffer.h"
88 #include "syntax.h"
89 
90 #else  /* not emacs */
91 
92 /* If we are not linking with Emacs proper,
93    we can't use the relocating allocator
94    even if config.h says that we can.  */
95 #undef REL_ALLOC
96 
97 #if defined (STDC_HEADERS) || defined (_LIBC) || defined(_WIN32)
98 #include <stdlib.h>
99 #else
100 char *malloc ();
101 char *realloc ();
102 void free();
103 #endif
104 
105 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
106    If nothing else has been done, use the method below.  */
107 #ifdef INHIBIT_STRING_HEADER
108 #if !(defined (HAVE_BZERO) && defined (HAVE_BCOPY))
109 #if !defined (bzero) && !defined (bcopy)
110 #undef INHIBIT_STRING_HEADER
111 #endif
112 #endif
113 #endif
114 
115 #include <string.h>
116 
117 /* This is the normal way of making sure we have a bcopy and a bzero.
118    This is used in most programs--a few other programs avoid this
119    by defining INHIBIT_STRING_HEADER.  */
120 #ifndef INHIBIT_STRING_HEADER
121 #if defined (HAVE_STRING_H) || defined (STDC_HEADERS) || defined (_LIBC) || defined (_WIN32)
122 #ifndef bcmp
123 #define bcmp(s1, s2, n) memcmp ((s1), (s2), (n))
124 #endif
125 #ifndef bcopy
126 #define bcopy(s, d, n)  memcpy ((d), (s), (n))
127 #endif
128 #ifndef bzero
129 #define bzero(s, n)     memset ((s), 0, (n))
130 #endif
131 #else
132 #include <strings.h>
133 #endif
134 #endif
135 
136 /* Define the syntax stuff for \<, \>, etc.  */
137 
138 /* This must be nonzero for the wordchar and notwordchar pattern
139    commands in re_match_2.  */
140 #ifndef Sword
141 #define Sword 1
142 #endif
143 
144 #ifdef SWITCH_ENUM_BUG
145 #define SWITCH_ENUM_CAST(x) ((int)(x))
146 #else
147 #define SWITCH_ENUM_CAST(x) (x)
148 #endif
149 
150 #ifdef SYNTAX_TABLE
151 
152 extern char *re_syntax_table;
153 
154 #else /* not SYNTAX_TABLE */
155 
156 /* How many characters in the character set.  */
157 #define CHAR_SET_SIZE 256
158 
159 static char re_syntax_table[CHAR_SET_SIZE];
160 
161 static void
init_syntax_once()162 init_syntax_once ()
163 {
164     register int c;
165     static int done = 0;
166 
167     if (done)
168         return;
169 
170     bzero (re_syntax_table, sizeof re_syntax_table);
171 
172     for (c = 'a'; c <= 'z'; c++)
173         re_syntax_table[c] = Sword;
174 
175     for (c = 'A'; c <= 'Z'; c++)
176         re_syntax_table[c] = Sword;
177 
178     for (c = '0'; c <= '9'; c++)
179         re_syntax_table[c] = Sword;
180 
181     re_syntax_table['_'] = Sword;
182 
183     done = 1;
184 }
185 
186 #endif /* not SYNTAX_TABLE */
187 
188 #define SYNTAX(c) re_syntax_table[c]
189 
190 #endif /* not emacs */
191 
192 /* Get the interface, including the syntax bits.  */
193 /* #include "regex.h" */
194 #include "regex_win32.h"
195 
196 /* isalpha etc. are used for the character classes.  */
197 #include <ctype.h>
198 
199 /* Jim Meyering writes:
200 
201    "... Some ctype macros are valid only for character codes that
202    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
203    using /bin/cc or gcc but without giving an ansi option).  So, all
204    ctype uses should be through macros like ISPRINT...  If
205    STDC_HEADERS is defined, then autoconf has verified that the ctype
206    macros don't need to be guarded with references to isascii. ...
207    Defining isascii to 1 should let any compiler worth its salt
208    eliminate the && through constant folding."  */
209 
210 #if defined (STDC_HEADERS) || (!defined (isascii) && !defined (HAVE_ISASCII))
211 #define ISASCII(c) 1
212 #else
213 #define ISASCII(c) isascii(c)
214 #endif
215 
216 #ifdef isblank
217 #define ISBLANK(c) (ISASCII (c) && isblank (c))
218 #else
219 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
220 #endif
221 #ifdef isgraph
222 #define ISGRAPH(c) (ISASCII (c) && isgraph (c))
223 #else
224 #define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
225 #endif
226 
227 #define ISPRINT(c) (ISASCII (c) && isprint (c))
228 #define ISDIGIT(c) (ISASCII (c) && isdigit (c))
229 #define ISALNUM(c) (ISASCII (c) && isalnum (c))
230 #define ISALPHA(c) (ISASCII (c) && isalpha (c))
231 #define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
232 #define ISLOWER(c) (ISASCII (c) && islower (c))
233 #define ISPUNCT(c) (ISASCII (c) && ispunct (c))
234 #define ISSPACE(c) (ISASCII (c) && isspace (c))
235 #define ISUPPER(c) (ISASCII (c) && isupper (c))
236 #define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
237 
238 #ifndef NULL
239 #define NULL (void *)0
240 #endif
241 
242 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
243    since ours (we hope) works properly with all combinations of
244    machines, compilers, `char' and `unsigned char' argument types.
245    (Per Bothner suggested the basic approach.)  */
246 #undef SIGN_EXTEND_CHAR
247 #if __STDC__
248 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
249 #else  /* not __STDC__ */
250 /* As in Harbison and Steele.  */
251 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
252 #endif
253 
254 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
255    use `alloca' instead of `malloc'.  This is because using malloc in
256    re_search* or re_match* could cause memory leaks when C-g is used in
257    Emacs; also, malloc is slower and causes storage fragmentation.  On
258    the other hand, malloc is more portable, and easier to debug.
259 
260    Because we sometimes use alloca, some routines have to be macros,
261    not functions -- `alloca'-allocated space disappears at the end of the
262    function it is called in.  */
263 
264 #if defined(REGEX_MALLOC) || defined(_WIN32)
265 
266 #define REGEX_ALLOCATE malloc
267 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
268 #define REGEX_FREE free
269 #define REGEX_MALLOC
270 
271 #else /* not REGEX_MALLOC  */
272 
273 /* Emacs already defines alloca, sometimes.  */
274 #ifndef alloca
275 
276 /* Make alloca work the best possible way.  */
277 #ifdef __GNUC__
278 #define alloca __builtin_alloca
279 #else /* not __GNUC__ */
280 #if HAVE_ALLOCA_H
281 #include <alloca.h>
282 #else /* not __GNUC__ or HAVE_ALLOCA_H */
283 #if 0 /* It is a bad idea to declare alloca.  We always cast the result.  */
284 #ifndef _AIX /* Already did AIX, up at the top.  */
285 char *alloca ();
286 #endif /* not _AIX */
287 #endif
288 #endif /* not HAVE_ALLOCA_H */
289 #endif /* not __GNUC__ */
290 
291 #endif /* not alloca */
292 
293 #define REGEX_ALLOCATE alloca
294 
295 /* Assumes a `char *destination' variable.  */
296 #define REGEX_REALLOCATE(source, osize, nsize)                          \
297   (destination = (char *) alloca (nsize),                               \
298    bcopy (source, destination, osize),                                  \
299    destination)
300 
301 /* No need to do anything to free, after alloca.  */
302 #define REGEX_FREE(arg) ((void)0) /* Do nothing!  But inhibit gcc warning.  */
303 
304 #endif /* not REGEX_MALLOC */
305 
306 /* Define how to allocate the failure stack.  */
307 
308 #if defined (REL_ALLOC) && defined (REGEX_MALLOC)
309 
310 #define REGEX_ALLOCATE_STACK(size)                              \
311   r_alloc (&failure_stack_ptr, (size))
312 #define REGEX_REALLOCATE_STACK(source, osize, nsize)            \
313   r_re_alloc (&failure_stack_ptr, (nsize))
314 #define REGEX_FREE_STACK(ptr)                                   \
315   r_alloc_free (&failure_stack_ptr)
316 
317 #else /* not using relocating allocator */
318 
319 #ifdef REGEX_MALLOC
320 
321 #define REGEX_ALLOCATE_STACK malloc
322 #define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
323 #define REGEX_FREE_STACK free
324 
325 #else /* not REGEX_MALLOC */
326 
327 #define REGEX_ALLOCATE_STACK alloca
328 
329 #define REGEX_REALLOCATE_STACK(source, osize, nsize)                    \
330    REGEX_REALLOCATE (source, osize, nsize)
331 /* No need to explicitly free anything.  */
332 #define REGEX_FREE_STACK(arg)
333 
334 #endif /* not REGEX_MALLOC */
335 #endif /* not using relocating allocator */
336 
337 
338 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
339    `string1' or just past its end.  This works if PTR is NULL, which is
340    a good thing.  */
341 #define FIRST_STRING_P(ptr)                                     \
342   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
343 
344 /* (Re)Allocate N items of type T using malloc, or fail.  */
345 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
346 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
347 #define RETALLOC_IF(addr, n, t) \
348   if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
349 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
350 
351 #define BYTEWIDTH 8 /* In bits.  */
352 
353 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
354 
355 #undef MAX
356 #undef MIN
357 #define MAX(a, b) ((a) > (b) ? (a) : (b))
358 #define MIN(a, b) ((a) < (b) ? (a) : (b))
359 
360 typedef char boolean;
361 #define false 0
362 #define true 1
363 
364 static int
365 re_match_2_internal(struct re_pattern_buffer *bufp,
366                     const char *string1,
367                     int size1,
368                     const char *string2,
369                     int size2,
370                     int pos,
371                     struct re_registers *regs,
372                     int stop);
373 
374 /* These are the command codes that appear in compiled regular
375    expressions.  Some opcodes are followed by argument bytes.  A
376    command code can specify any interpretation whatsoever for its
377    arguments.  Zero bytes may appear in the compiled regular expression.  */
378 
379 typedef enum
380 {
381     no_op = 0,
382 
383     /* Succeed right away--no more backtracking.  */
384     succeed,
385 
386     /* Followed by one byte giving n, then by n literal bytes.  */
387     exactn,
388 
389     /* Matches any (more or less) character.  */
390     anychar,
391 
392     /* Matches any one char belonging to specified set.  First
393        following byte is number of bitmap bytes.  Then come bytes
394        for a bitmap saying which chars are in.  Bits in each byte
395        are ordered low-bit-first.  A character is in the set if its
396        bit is 1.  A character too large to have a bit in the map is
397        automatically not in the set.  */
398     charset,
399 
400     /* Same parameters as charset, but match any character that is
401        not one of those specified.  */
402     charset_not,
403 
404     /* Start remembering the text that is matched, for storing in a
405        register.  Followed by one byte with the register number, in
406        the range 0 to one less than the pattern buffer's re_nsub
407        field.  Then followed by one byte with the number of groups
408        inner to this one.  (This last has to be part of the
409        start_memory only because we need it in the on_failure_jump
410        of re_match_2.)  */
411     start_memory,
412 
413     /* Stop remembering the text that is matched and store it in a
414        memory register.  Followed by one byte with the register
415        number, in the range 0 to one less than `re_nsub' in the
416        pattern buffer, and one byte with the number of inner groups,
417        just like `start_memory'.  (We need the number of inner
418        groups here because we don't have any easy way of finding the
419        corresponding start_memory when we're at a stop_memory.)  */
420     stop_memory,
421 
422     /* Match a duplicate of something remembered. Followed by one
423        byte containing the register number.  */
424     duplicate,
425 
426     /* Fail unless at beginning of line.  */
427     begline,
428 
429     /* Fail unless at end of line.  */
430     endline,
431 
432     /* Succeeds if at beginning of buffer (if emacs) or at beginning
433        of string to be matched (if not).  */
434     begbuf,
435 
436     /* Analogously, for end of buffer/string.  */
437     endbuf,
438 
439     /* Followed by two byte relative address to which to jump.  */
440     jump,
441 
442     /* Same as jump, but marks the end of an alternative.  */
443     jump_past_alt,
444 
445     /* Followed by two-byte relative address of place to resume at
446        in case of failure.  */
447     on_failure_jump,
448 
449     /* Like on_failure_jump, but pushes a placeholder instead of the
450        current string position when executed.  */
451     on_failure_keep_string_jump,
452 
453     /* Throw away latest failure point and then jump to following
454        two-byte relative address.  */
455     pop_failure_jump,
456 
457     /* Change to pop_failure_jump if know won't have to backtrack to
458        match; otherwise change to jump.  This is used to jump
459        back to the beginning of a repeat.  If what follows this jump
460        clearly won't match what the repeat does, such that we can be
461        sure that there is no use backtracking out of repetitions
462        already matched, then we change it to a pop_failure_jump.
463        Followed by two-byte address.  */
464     maybe_pop_jump,
465 
466     /* Jump to following two-byte address, and push a dummy failure
467        point. This failure point will be thrown away if an attempt
468        is made to use it for a failure.  A `+' construct makes this
469        before the first repeat.  Also used as an intermediary kind
470        of jump when compiling an alternative.  */
471     dummy_failure_jump,
472 
473     /* Push a dummy failure point and continue.  Used at the end of
474        alternatives.  */
475     push_dummy_failure,
476 
477     /* Followed by two-byte relative address and two-byte number n.
478        After matching N times, jump to the address upon failure.  */
479     succeed_n,
480 
481     /* Followed by two-byte relative address, and two-byte number n.
482        Jump to the address N times, then fail.  */
483     jump_n,
484 
485     /* Set the following two-byte relative address to the
486        subsequent two-byte number.  The address *includes* the two
487        bytes of number.  */
488     set_number_at,
489 
490     wordchar,   /* Matches any word-constituent character.  */
491     notwordchar,        /* Matches any char that is not a word-constituent.  */
492 
493     wordbeg,    /* Succeeds if at word beginning.  */
494     wordend,    /* Succeeds if at word end.  */
495 
496     wordbound,  /* Succeeds if at a word boundary.  */
497     notwordbound        /* Succeeds if not at a word boundary.  */
498 
499 #ifdef emacs
500     ,before_dot,        /* Succeeds if before point.  */
501     at_dot,     /* Succeeds if at point.  */
502     after_dot,  /* Succeeds if after point.  */
503 
504     /* Matches any character whose syntax is specified.  Followed by
505        a byte which contains a syntax code, e.g., Sword.  */
506     syntaxspec,
507 
508     /* Matches any character whose syntax is not that specified.  */
509     notsyntaxspec
510 #endif /* emacs */
511 } re_opcode_t;
512 
513 /* Common operations on the compiled pattern.  */
514 
515 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
516 
517 #define STORE_NUMBER(destination, number)                               \
518   do {                                                                  \
519     (destination)[0] = (number) & 0377;                                 \
520     (destination)[1] = (number) >> 8;                                   \
521   } while (0)
522 
523 /* Same as STORE_NUMBER, except increment DESTINATION to
524    the byte after where the number is stored.  Therefore, DESTINATION
525    must be an lvalue.  */
526 
527 #define STORE_NUMBER_AND_INCR(destination, number)                      \
528   do {                                                                  \
529     STORE_NUMBER (destination, number);                                 \
530     (destination) += 2;                                                 \
531   } while (0)
532 
533 /* Put into DESTINATION a number stored in two contiguous bytes starting
534    at SOURCE.  */
535 
536 #define EXTRACT_NUMBER(destination, source)                             \
537   do {                                                                  \
538     (destination) = *(source) & 0377;                                   \
539     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;           \
540   } while (0)
541 
542 #ifdef DEBUG
543 static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
544 static void
extract_number(dest,source)545 extract_number (dest, source)
546      int *dest;
547      unsigned char *source;
548 {
549     int temp = SIGN_EXTEND_CHAR (*(source + 1));
550     *dest = *source & 0377;
551     *dest += temp << 8;
552 }
553 
554 #ifndef EXTRACT_MACROS /* To debug the macros.  */
555 #undef EXTRACT_NUMBER
556 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
557 #endif /* not EXTRACT_MACROS */
558 
559 #endif /* DEBUG */
560 
561 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
562    SOURCE must be an lvalue.  */
563 
564 #define EXTRACT_NUMBER_AND_INCR(destination, source)                    \
565   do {                                                                  \
566     EXTRACT_NUMBER (destination, source);                               \
567     (source) += 2;                                                      \
568   } while (0)
569 
570 #ifdef DEBUG
571 static void extract_number_and_incr _RE_ARGS ((int *destination,
572                                                unsigned char **source));
573 static void
extract_number_and_incr(destination,source)574 extract_number_and_incr (destination, source)
575      int *destination;
576      unsigned char **source;
577 {
578     extract_number (destination, *source);
579     *source += 2;
580 }
581 
582 #ifndef EXTRACT_MACROS
583 #undef EXTRACT_NUMBER_AND_INCR
584 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
585   extract_number_and_incr (&dest, &src)
586 #endif /* not EXTRACT_MACROS */
587 
588 #endif /* DEBUG */
589 
590 /* If DEBUG is defined, Regex prints many voluminous messages about what
591    it is doing (if the variable `debug' is nonzero).  If linked with the
592    main program in `iregex.c', you can enter patterns and strings
593    interactively.  And if linked with the main program in `main.c' and
594    the other test files, you can run the already-written tests.  */
595 
596 #ifdef DEBUG
597 
598 /* We use standard I/O for debugging.  */
599 #include <stdio.h>
600 
601 /* It is useful to test things that ``must'' be true when debugging.  */
602 #include <assert.h>
603 
604 static int debug = 0;
605 
606 #define DEBUG_STATEMENT(e) e
607 #define DEBUG_PRINT1(x) if (debug) printf (x)
608 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
609 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
610 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
611 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)                           \
612   if (debug) print_partial_compiled_pattern (s, e)
613 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)                  \
614   if (debug) print_double_string (w, s1, sz1, s2, sz2)
615 
616 
617 /* Print the fastmap in human-readable form.  */
618 
619 void
print_fastmap(fastmap)620 print_fastmap (fastmap)
621      char *fastmap;
622 {
623     unsigned was_a_range = 0;
624     unsigned i = 0;
625 
626     while (i < (1 << BYTEWIDTH))
627         {
628             if (fastmap[i++])
629                 {
630                     was_a_range = 0;
631                     putchar (i - 1);
632                     while (i < (1 << BYTEWIDTH)  &&  fastmap[i])
633                         {
634                             was_a_range = 1;
635                             i++;
636                         }
637                     if (was_a_range)
638                         {
639                             printf ("-");
640                             putchar (i - 1);
641                         }
642                 }
643         }
644     putchar ('\n');
645 }
646 
647 
648 /* Print a compiled pattern string in human-readable form, starting at
649    the START pointer into it and ending just before the pointer END.  */
650 
651 void
print_partial_compiled_pattern(start,end)652 print_partial_compiled_pattern (start, end)
653      unsigned char *start;
654      unsigned char *end;
655 {
656     int mcnt, mcnt2;
657     unsigned char *p1;
658     unsigned char *p = start;
659     unsigned char *pend = end;
660 
661     if (start == NULL)
662         {
663             printf ("(null)\n");
664             return;
665         }
666 
667     /* Loop over pattern commands.  */
668     while (p < pend)
669         {
670             printf ("%d:\t", p - start);
671 
672             switch ((re_opcode_t) *p++)
673                 {
674                 case no_op:
675                     printf ("/no_op");
676                     break;
677 
678                 case exactn:
679                     mcnt = *p++;
680                     printf ("/exactn/%d", mcnt);
681                     do
682                         {
683                             putchar ('/');
684                             putchar (*p++);
685                         }
686                     while (--mcnt);
687                     break;
688 
689                 case start_memory:
690                     mcnt = *p++;
691                     printf ("/start_memory/%d/%d", mcnt, *p++);
692                     break;
693 
694                 case stop_memory:
695                     mcnt = *p++;
696                     printf ("/stop_memory/%d/%d", mcnt, *p++);
697                     break;
698 
699                 case duplicate:
700                     printf ("/duplicate/%d", *p++);
701                     break;
702 
703                 case anychar:
704                     printf ("/anychar");
705                     break;
706 
707                 case charset:
708                 case charset_not:
709                     {
710                         register int c, last = -100;
711                         register int in_range = 0;
712 
713                         printf ("/charset [%s",
714                                 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
715 
716                         assert (p + *p < pend);
717 
718                         for (c = 0; c < 256; c++)
719                             if (c / 8 < *p
720                                 && (p[1 + (c/8)] & (1 << (c % 8))))
721                                 {
722                                     /* Are we starting a range?  */
723                                     if (last + 1 == c && ! in_range)
724                                         {
725                                             putchar ('-');
726                                             in_range = 1;
727                                         }
728                                     /* Have we broken a range?  */
729                                     else if (last + 1 != c && in_range)
730                                         {
731                                             putchar (last);
732                                             in_range = 0;
733                                         }
734 
735                                     if (! in_range)
736                                         putchar (c);
737 
738                                     last = c;
739                                 }
740 
741                         if (in_range)
742                             putchar (last);
743 
744                         putchar (']');
745 
746                         p += 1 + *p;
747                     }
748                     break;
749 
750                 case begline:
751                     printf ("/begline");
752                     break;
753 
754                 case endline:
755                     printf ("/endline");
756                     break;
757 
758                 case on_failure_jump:
759                     extract_number_and_incr (&mcnt, &p);
760                     printf ("/on_failure_jump to %d", p + mcnt - start);
761                     break;
762 
763                 case on_failure_keep_string_jump:
764                     extract_number_and_incr (&mcnt, &p);
765                     printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
766                     break;
767 
768                 case dummy_failure_jump:
769                     extract_number_and_incr (&mcnt, &p);
770                     printf ("/dummy_failure_jump to %d", p + mcnt - start);
771                     break;
772 
773                 case push_dummy_failure:
774                     printf ("/push_dummy_failure");
775                     break;
776 
777                 case maybe_pop_jump:
778                     extract_number_and_incr (&mcnt, &p);
779                     printf ("/maybe_pop_jump to %d", p + mcnt - start);
780                     break;
781 
782                 case pop_failure_jump:
783                     extract_number_and_incr (&mcnt, &p);
784                     printf ("/pop_failure_jump to %d", p + mcnt - start);
785                     break;
786 
787                 case jump_past_alt:
788                     extract_number_and_incr (&mcnt, &p);
789                     printf ("/jump_past_alt to %d", p + mcnt - start);
790                     break;
791 
792                 case jump:
793                     extract_number_and_incr (&mcnt, &p);
794                     printf ("/jump to %d", p + mcnt - start);
795                     break;
796 
797                 case succeed_n:
798                     extract_number_and_incr (&mcnt, &p);
799                     p1 = p + mcnt;
800                     extract_number_and_incr (&mcnt2, &p);
801                     printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
802                     break;
803 
804                 case jump_n:
805                     extract_number_and_incr (&mcnt, &p);
806                     p1 = p + mcnt;
807                     extract_number_and_incr (&mcnt2, &p);
808                     printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
809                     break;
810 
811                 case set_number_at:
812                     extract_number_and_incr (&mcnt, &p);
813                     p1 = p + mcnt;
814                     extract_number_and_incr (&mcnt2, &p);
815                     printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
816                     break;
817 
818                 case wordbound:
819                     printf ("/wordbound");
820                     break;
821 
822                 case notwordbound:
823                     printf ("/notwordbound");
824                     break;
825 
826                 case wordbeg:
827                     printf ("/wordbeg");
828                     break;
829 
830                 case wordend:
831                     printf ("/wordend");
832 
833 #ifdef emacs
834                 case before_dot:
835                     printf ("/before_dot");
836                     break;
837 
838                 case at_dot:
839                     printf ("/at_dot");
840                     break;
841 
842                 case after_dot:
843                     printf ("/after_dot");
844                     break;
845 
846                 case syntaxspec:
847                     printf ("/syntaxspec");
848                     mcnt = *p++;
849                     printf ("/%d", mcnt);
850                     break;
851 
852                 case notsyntaxspec:
853                     printf ("/notsyntaxspec");
854                     mcnt = *p++;
855                     printf ("/%d", mcnt);
856                     break;
857 #endif /* emacs */
858 
859                 case wordchar:
860                     printf ("/wordchar");
861                     break;
862 
863                 case notwordchar:
864                     printf ("/notwordchar");
865                     break;
866 
867                 case begbuf:
868                     printf ("/begbuf");
869                     break;
870 
871                 case endbuf:
872                     printf ("/endbuf");
873                     break;
874 
875                 default:
876                     printf ("?%d", *(p-1));
877                 }
878 
879             putchar ('\n');
880         }
881 
882     printf ("%d:\tend of pattern.\n", p - start);
883 }
884 
885 
886 void
print_compiled_pattern(bufp)887 print_compiled_pattern (bufp)
888      struct re_pattern_buffer *bufp;
889 {
890     unsigned char *buffer = bufp->buffer;
891 
892     print_partial_compiled_pattern (buffer, buffer + bufp->used);
893     printf ("%ld bytes used/%ld bytes allocated.\n",
894             bufp->used, bufp->allocated);
895 
896     if (bufp->fastmap_accurate && bufp->fastmap)
897         {
898             printf ("fastmap: ");
899             print_fastmap (bufp->fastmap);
900         }
901 
902     printf ("re_nsub: %d\t", bufp->re_nsub);
903     printf ("regs_alloc: %d\t", bufp->regs_allocated);
904     printf ("can_be_null: %d\t", bufp->can_be_null);
905     printf ("newline_anchor: %d\n", bufp->newline_anchor);
906     printf ("no_sub: %d\t", bufp->no_sub);
907     printf ("not_bol: %d\t", bufp->not_bol);
908     printf ("not_eol: %d\t", bufp->not_eol);
909     printf ("syntax: %lx\n", bufp->syntax);
910     /* Perhaps we should print the translate table?  */
911 }
912 
913 
914 void
print_double_string(where,string1,size1,string2,size2)915 print_double_string (where, string1, size1, string2, size2)
916      const char *where;
917      const char *string1;
918      const char *string2;
919      int size1;
920      int size2;
921 {
922     int this_char;
923 
924     if (where == NULL)
925         printf ("(null)");
926     else
927         {
928             if (FIRST_STRING_P (where))
929                 {
930                     for (this_char = where - string1; this_char < size1; this_char++)
931                         putchar (string1[this_char]);
932 
933                     where = string2;
934                 }
935 
936             for (this_char = where - string2; this_char < size2; this_char++)
937                 putchar (string2[this_char]);
938         }
939 }
940 
941 void
printchar(c)942 printchar (c)
943      int c;
944 {
945     putc (c, stderr);
946 }
947 
948 #else /* not DEBUG */
949 
950 #undef assert
951 #define assert(e)
952 
953 #define DEBUG_STATEMENT(e)
954 #define DEBUG_PRINT1(x)
955 #define DEBUG_PRINT2(x1, x2)
956 #define DEBUG_PRINT3(x1, x2, x3)
957 #define DEBUG_PRINT4(x1, x2, x3, x4)
958 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
959 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
960 
961 #endif /* not DEBUG */
962 
963 /* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
964    also be assigned to arbitrarily: each pattern buffer stores its own
965    syntax, so it can be changed between regex compilations.  */
966 /* This has no initializer because initialized variables in Emacs
967    become read-only after dumping.  */
968 reg_syntax_t re_syntax_options;
969 
970 
971 /* Specify the precise syntax of regexps for compilation.  This provides
972    for compatibility for various utilities which historically have
973    different, incompatible syntaxes.
974 
975    The argument SYNTAX is a bit mask comprised of the various bits
976    defined in regex.h.  We return the old syntax.  */
977 
978 reg_syntax_t
re_set_syntax(reg_syntax_t syntax)979 re_set_syntax(reg_syntax_t syntax)
980 {
981     reg_syntax_t ret = re_syntax_options;
982 
983     re_syntax_options = syntax;
984 #ifdef DEBUG
985     if (syntax & RE_DEBUG)
986         debug = 1;
987     else if (debug) /* was on but now is not */
988         debug = 0;
989 #endif /* DEBUG */
990     return ret;
991 }
992 
993 /* This table gives an error message for each of the error codes listed
994    in regex.h.  Obviously the order here has to be same as there.
995    POSIX doesn't require that we do anything for REG_NOERROR,
996    but why not be nice?  */
997 
998 static const char *re_error_msgid[] =
999 {
1000     gettext_noop ("Success"),   /* REG_NOERROR */
1001     gettext_noop ("No match"),  /* REG_NOMATCH */
1002     gettext_noop ("Invalid regular expression"), /* REG_BADPAT */
1003     gettext_noop ("Invalid collation character"), /* REG_ECOLLATE */
1004     gettext_noop ("Invalid character class name"), /* REG_ECTYPE */
1005     gettext_noop ("Trailing backslash"), /* REG_EESCAPE */
1006     gettext_noop ("Invalid back reference"), /* REG_ESUBREG */
1007     gettext_noop ("Unmatched [ or [^"), /* REG_EBRACK */
1008     gettext_noop ("Unmatched ( or \\("), /* REG_EPAREN */
1009     gettext_noop ("Unmatched \\{"), /* REG_EBRACE */
1010     gettext_noop ("Invalid content of \\{\\}"), /* REG_BADBR */
1011     gettext_noop ("Invalid range end"), /* REG_ERANGE */
1012     gettext_noop ("Memory exhausted"), /* REG_ESPACE */
1013     gettext_noop ("Invalid preceding regular expression"), /* REG_BADRPT */
1014     gettext_noop ("Premature end of regular expression"), /* REG_EEND */
1015     gettext_noop ("Regular expression too big"), /* REG_ESIZE */
1016     gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
1017 };
1018 
1019 /* Avoiding alloca during matching, to placate r_alloc.  */
1020 
1021 /* Define MATCH_MAY_ALLOCATE unless we need to make sure that the
1022    searching and matching functions should not call alloca.  On some
1023    systems, alloca is implemented in terms of malloc, and if we're
1024    using the relocating allocator routines, then malloc could cause a
1025    relocation, which might (if the strings being searched are in the
1026    ralloc heap) shift the data out from underneath the regexp
1027    routines.
1028 
1029    Here's another reason to avoid allocation: Emacs
1030    processes input from X in a signal handler; processing X input may
1031    call malloc; if input arrives while a matching routine is calling
1032    malloc, then we're scrod.  But Emacs can't just block input while
1033    calling matching routines; then we don't notice interrupts when
1034    they come in.  So, Emacs blocks input around all regexp calls
1035    except the matching calls, which it leaves unprotected, in the
1036    faith that they will not malloc.  */
1037 
1038 /* Normally, this is fine.  */
1039 #define MATCH_MAY_ALLOCATE
1040 
1041 /* When using GNU C, we are not REALLY using the C alloca, no matter
1042    what config.h may say.  So don't take precautions for it.  */
1043 #ifdef __GNUC__
1044 #undef C_ALLOCA
1045 #endif
1046 
1047 /* The match routines may not allocate if (1) they would do it with malloc
1048    and (2) it's not safe for them to use malloc.
1049    Note that if REL_ALLOC is defined, matching would not use malloc for the
1050    failure stack, but we would still use it for the register vectors;
1051    so REL_ALLOC should not affect this.  */
1052 #if (defined (C_ALLOCA) || defined (REGEX_MALLOC)) && defined (emacs)
1053 #undef MATCH_MAY_ALLOCATE
1054 #endif
1055 
1056 
1057 /* Failure stack declarations and macros; both re_compile_fastmap and
1058    re_match_2 use a failure stack.  These have to be macros because of
1059    REGEX_ALLOCATE_STACK.  */
1060 
1061 
1062 /* Number of failure points for which to initially allocate space
1063    when matching.  If this number is exceeded, we allocate more
1064    space, so it is not a hard limit.  */
1065 #ifndef INIT_FAILURE_ALLOC
1066 #define INIT_FAILURE_ALLOC 5
1067 #endif
1068 
1069 /* Roughly the maximum number of failure points on the stack.  Would be
1070    exactly that if always used MAX_FAILURE_ITEMS items each time we failed.
1071    This is a variable only so users of regex can assign to it; we never
1072    change it ourselves.  */
1073 
1074 #ifdef INT_IS_16BIT
1075 
1076 #if defined (MATCH_MAY_ALLOCATE)
1077 /* 4400 was enough to cause a crash on Alpha OSF/1,
1078    whose default stack limit is 2mb.  */
1079 static long int re_max_failures = 4000;
1080 #else
1081 static long int re_max_failures = 2000;
1082 #endif
1083 
1084 union fail_stack_elt
1085 {
1086     unsigned char *pointer;
1087     long int integer;
1088 };
1089 
1090 typedef union fail_stack_elt fail_stack_elt_t;
1091 
1092 typedef struct
1093 {
1094     fail_stack_elt_t *stack;
1095     unsigned long int size;
1096     unsigned long int avail;            /* Offset of next open position.  */
1097 } fail_stack_type;
1098 
1099 #else /* not INT_IS_16BIT */
1100 
1101 #if defined (MATCH_MAY_ALLOCATE)
1102 /* 4400 was enough to cause a crash on Alpha OSF/1,
1103    whose default stack limit is 2mb.  */
1104 static int re_max_failures = 20000;
1105 #else
1106 static int re_max_failures = 2000;
1107 #endif
1108 
1109 union fail_stack_elt
1110 {
1111     unsigned char *pointer;
1112     int integer;
1113 };
1114 
1115 typedef union fail_stack_elt fail_stack_elt_t;
1116 
1117 typedef struct
1118 {
1119     fail_stack_elt_t *stack;
1120     unsigned size;
1121     unsigned avail;                     /* Offset of next open position.  */
1122 } fail_stack_type;
1123 
1124 #endif /* INT_IS_16BIT */
1125 
1126 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
1127 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
1128 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
1129 
1130 
1131 /* Define macros to initialize and free the failure stack.
1132    Do `return -2' if the alloc fails.  */
1133 
1134 #ifdef MATCH_MAY_ALLOCATE
1135 #define INIT_FAIL_STACK()                                               \
1136   do {                                                                  \
1137     fail_stack.stack = (fail_stack_elt_t *)                             \
1138       REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));    \
1139                                                                         \
1140     if (fail_stack.stack == NULL)                                       \
1141       return -2;                                                        \
1142                                                                         \
1143     fail_stack.size = INIT_FAILURE_ALLOC;                               \
1144     fail_stack.avail = 0;                                               \
1145   } while (0)
1146 
1147 #define RESET_FAIL_STACK()  REGEX_FREE_STACK (fail_stack.stack)
1148 #else
1149 #define INIT_FAIL_STACK()                                               \
1150   do {                                                                  \
1151     fail_stack.avail = 0;                                               \
1152   } while (0)
1153 
1154 #define RESET_FAIL_STACK()
1155 #endif
1156 
1157 
1158 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
1159 
1160    Return 1 if succeeds, and 0 if either ran out of memory
1161    allocating space for it or it was already too large.
1162 
1163    REGEX_REALLOCATE_STACK requires `destination' be declared.   */
1164 
1165 #define DOUBLE_FAIL_STACK(fail_stack)                                   \
1166   ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
1167    ? 0                                                                  \
1168    : ((fail_stack).stack = (fail_stack_elt_t *)                         \
1169         REGEX_REALLOCATE_STACK ((fail_stack).stack,                     \
1170           (fail_stack).size * sizeof (fail_stack_elt_t),                \
1171           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),        \
1172                                                                         \
1173       (fail_stack).stack == NULL                                        \
1174       ? 0                                                               \
1175       : ((fail_stack).size <<= 1,                                       \
1176          1)))
1177 
1178 
1179 /* Push pointer POINTER on FAIL_STACK.
1180    Return 1 if was able to do so and 0 if ran out of memory allocating
1181    space to do so.  */
1182 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK)                            \
1183   ((FAIL_STACK_FULL ()                                                  \
1184     && !DOUBLE_FAIL_STACK (FAIL_STACK))                                 \
1185    ? 0                                                                  \
1186    : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER,       \
1187       1))
1188 
1189 /* Push a pointer value onto the failure stack.
1190    Assumes the variable `fail_stack'.  Probably should only
1191    be called from within `PUSH_FAILURE_POINT'.  */
1192 #define PUSH_FAILURE_POINTER(item)                                      \
1193   fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
1194 
1195 /* This pushes an integer-valued item onto the failure stack.
1196    Assumes the variable `fail_stack'.  Probably should only
1197    be called from within `PUSH_FAILURE_POINT'.  */
1198 #define PUSH_FAILURE_INT(item)                                  \
1199   fail_stack.stack[fail_stack.avail++].integer = (item)
1200 
1201 /* Push a fail_stack_elt_t value onto the failure stack.
1202    Assumes the variable `fail_stack'.  Probably should only
1203    be called from within `PUSH_FAILURE_POINT'.  */
1204 #define PUSH_FAILURE_ELT(item)                                  \
1205   fail_stack.stack[fail_stack.avail++] =  (item)
1206 
1207 /* These three POP... operations complement the three PUSH... operations.
1208    All assume that `fail_stack' is nonempty.  */
1209 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
1210 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
1211 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
1212 
1213 /* Used to omit pushing failure point id's when we're not debugging.  */
1214 #ifdef DEBUG
1215 #define DEBUG_PUSH PUSH_FAILURE_INT
1216 #define DEBUG_POP(item_addr) (item_addr)->integer = POP_FAILURE_INT ()
1217 #else
1218 #define DEBUG_PUSH(item)
1219 #define DEBUG_POP(item_addr)
1220 #endif
1221 
1222 
1223 /* Push the information about the state we will need
1224    if we ever fail back to it.
1225 
1226    Requires variables fail_stack, regstart, regend, reg_info, and
1227    num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
1228    declared.
1229 
1230    Does `return FAILURE_CODE' if runs out of memory.  */
1231 
1232 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
1233   do {                                                                  \
1234     char *destination;                                                  \
1235     /* Must be int, so when we don't save any registers, the arithmetic \
1236        of 0 + -1 isn't done as unsigned.  */                            \
1237     /* Can't be int, since there is not a shred of a guarantee that int \
1238        is wide enough to hold a value of something to which pointer can \
1239        be assigned */                                                   \
1240     s_reg_t this_reg;                                                   \
1241                                                                         \
1242     DEBUG_STATEMENT (failure_id++);                                     \
1243     DEBUG_STATEMENT (nfailure_points_pushed++);                         \
1244     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);           \
1245     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
1246     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
1247                                                                         \
1248     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);           \
1249     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);       \
1250                                                                         \
1251     /* Ensure we have enough space allocated for what we will push.  */ \
1252     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)                   \
1253       {                                                                 \
1254         if (!DOUBLE_FAIL_STACK (fail_stack))                            \
1255           return failure_code;                                          \
1256                                                                         \
1257         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",              \
1258                        (fail_stack).size);                              \
1259         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
1260       }                                                                 \
1261                                                                         \
1262     /* Push the info, starting with the registers.  */                  \
1263     DEBUG_PRINT1 ("\n");                                                \
1264                                                                         \
1265     if (1)                                                              \
1266       for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
1267            this_reg++)                                                  \
1268         {                                                               \
1269           DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);               \
1270           DEBUG_STATEMENT (num_regs_pushed++);                          \
1271                                                                         \
1272           DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);       \
1273           PUSH_FAILURE_POINTER (regstart[this_reg]);                    \
1274                                                                         \
1275           DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);           \
1276           PUSH_FAILURE_POINTER (regend[this_reg]);                      \
1277                                                                         \
1278           DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);  \
1279           DEBUG_PRINT2 (" match_null=%d",                               \
1280                         REG_MATCH_NULL_STRING_P (reg_info[this_reg]));  \
1281           DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));  \
1282           DEBUG_PRINT2 (" matched_something=%d",                        \
1283                         MATCHED_SOMETHING (reg_info[this_reg]));        \
1284           DEBUG_PRINT2 (" ever_matched=%d",                             \
1285                         EVER_MATCHED_SOMETHING (reg_info[this_reg]));   \
1286           DEBUG_PRINT1 ("\n");                                          \
1287           PUSH_FAILURE_ELT (reg_info[this_reg].word);                   \
1288         }                                                               \
1289                                                                         \
1290     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
1291     PUSH_FAILURE_INT (lowest_active_reg);                               \
1292                                                                         \
1293     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
1294     PUSH_FAILURE_INT (highest_active_reg);                              \
1295                                                                         \
1296     DEBUG_PRINT2 ("  Pushing pattern 0x%x:\n", pattern_place);          \
1297     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);           \
1298     PUSH_FAILURE_POINTER (pattern_place);                               \
1299                                                                         \
1300     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);            \
1301     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
1302                                  size2);                                \
1303     DEBUG_PRINT1 ("'\n");                                               \
1304     PUSH_FAILURE_POINTER (string_place);                                \
1305                                                                         \
1306     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);            \
1307     DEBUG_PUSH (failure_id);                                            \
1308   } while (0)
1309 
1310 /* This is the number of items that are pushed and popped on the stack
1311    for each register.  */
1312 #define NUM_REG_ITEMS  3
1313 
1314 /* Individual items aside from the registers.  */
1315 #ifdef DEBUG
1316 #define NUM_NONREG_ITEMS 5 /* Includes failure point id.  */
1317 #else
1318 #define NUM_NONREG_ITEMS 4
1319 #endif
1320 
1321 /* We push at most this many items on the stack.  */
1322 /* We used to use (num_regs - 1), which is the number of registers
1323    this regexp will save; but that was changed to 5
1324    to avoid stack overflow for a regexp with lots of parens.  */
1325 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
1326 
1327 /* We actually push this many items.  */
1328 #define NUM_FAILURE_ITEMS                               \
1329   (((0                                                  \
1330      ? 0 : highest_active_reg - lowest_active_reg + 1)  \
1331     * NUM_REG_ITEMS)                                    \
1332    + NUM_NONREG_ITEMS)
1333 
1334 /* How many items can still be added to the stack without overflowing it.  */
1335 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
1336 
1337 
1338 /* Pops what PUSH_FAIL_STACK pushes.
1339 
1340    We restore into the parameters, all of which should be lvalues:
1341      STR -- the saved data position.
1342      PAT -- the saved pattern position.
1343      LOW_REG, HIGH_REG -- the highest and lowest active registers.
1344      REGSTART, REGEND -- arrays of string positions.
1345      REG_INFO -- array of information about each subexpression.
1346 
1347    Also assumes the variables `fail_stack' and (if debugging), `bufp',
1348    `pend', `string1', `size1', `string2', and `size2'.  */
1349 
1350 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
1351 {                                                                       \
1352   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)                        \
1353   s_reg_t this_reg;                                                     \
1354   const unsigned char *string_temp;                                     \
1355                                                                         \
1356   assert (!FAIL_STACK_EMPTY ());                                        \
1357                                                                         \
1358   /* Remove failure points and point to how many regs pushed.  */       \
1359   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                                \
1360   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
1361   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size);     \
1362                                                                         \
1363   assert (fail_stack.avail >= NUM_NONREG_ITEMS);                        \
1364                                                                         \
1365   DEBUG_POP (&failure_id);                                              \
1366   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);              \
1367                                                                         \
1368   /* If the saved string location is NULL, it came from an              \
1369      on_failure_keep_string_jump opcode, and we want to throw away the  \
1370      saved NULL, thus retaining our current position in the string.  */ \
1371   string_temp = POP_FAILURE_POINTER ();                                 \
1372   if (string_temp != NULL)                                              \
1373     str = (const char *) string_temp;                                   \
1374                                                                         \
1375   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);                       \
1376   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);      \
1377   DEBUG_PRINT1 ("'\n");                                                 \
1378                                                                         \
1379   pat = (unsigned char *) POP_FAILURE_POINTER ();                       \
1380   DEBUG_PRINT2 ("  Popping pattern 0x%x:\n", pat);                      \
1381   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);                       \
1382                                                                         \
1383   /* Restore register info.  */                                         \
1384   high_reg = (active_reg_t) POP_FAILURE_INT ();                         \
1385   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);           \
1386                                                                         \
1387   low_reg = (active_reg_t) POP_FAILURE_INT ();                          \
1388   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);            \
1389                                                                         \
1390   if (1)                                                                \
1391     for (this_reg = high_reg; this_reg >= low_reg; this_reg--)          \
1392       {                                                                 \
1393         DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);               \
1394                                                                         \
1395         reg_info[this_reg].word = POP_FAILURE_ELT ();                   \
1396         DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);        \
1397                                                                         \
1398         regend[this_reg] = (const char *) POP_FAILURE_POINTER ();       \
1399         DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);           \
1400                                                                         \
1401         regstart[this_reg] = (const char *) POP_FAILURE_POINTER ();     \
1402         DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);       \
1403       }                                                                 \
1404   else                                                                  \
1405     {                                                                   \
1406       for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
1407         {                                                               \
1408           reg_info[this_reg].word.integer = 0;                          \
1409           regend[this_reg] = 0;                                         \
1410           regstart[this_reg] = 0;                                       \
1411         }                                                               \
1412       highest_active_reg = high_reg;                                    \
1413     }                                                                   \
1414                                                                         \
1415   set_regs_matched_done = 0;                                            \
1416   DEBUG_STATEMENT (nfailure_points_popped++);                           \
1417 } /* POP_FAILURE_POINT */
1418 
1419 
1420 
1421 /* Structure for per-register (a.k.a. per-group) information.
1422    Other register information, such as the
1423    starting and ending positions (which are addresses), and the list of
1424    inner groups (which is a bits list) are maintained in separate
1425    variables.
1426 
1427    We are making a (strictly speaking) nonportable assumption here: that
1428    the compiler will pack our bit fields into something that fits into
1429    the type of `word', i.e., is something that fits into one item on the
1430    failure stack.  */
1431 
1432 
1433 /* Declarations and macros for re_match_2.  */
1434 
1435 typedef union
1436 {
1437     fail_stack_elt_t word;
1438     struct
1439     {
1440         /* This field is one if this group can match the empty string,
1441            zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
1442 #define MATCH_NULL_UNSET_VALUE 3
1443         unsigned match_null_string_p : 2;
1444         unsigned is_active : 1;
1445         unsigned matched_something : 1;
1446         unsigned ever_matched_something : 1;
1447     } bits;
1448 } register_info_type;
1449 
1450 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
1451 #define IS_ACTIVE(R)  ((R).bits.is_active)
1452 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
1453 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
1454 
1455 
1456 /* Call this when have matched a real character; it sets `matched' flags
1457    for the subexpressions which we are currently inside.  Also records
1458    that those subexprs have matched.  */
1459 #define SET_REGS_MATCHED()                                              \
1460   do                                                                    \
1461     {                                                                   \
1462       if (!set_regs_matched_done)                                       \
1463         {                                                               \
1464           active_reg_t r;                                               \
1465           set_regs_matched_done = 1;                                    \
1466           for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
1467             {                                                           \
1468               MATCHED_SOMETHING (reg_info[r])                           \
1469                 = EVER_MATCHED_SOMETHING (reg_info[r])                  \
1470                 = 1;                                                    \
1471             }                                                           \
1472         }                                                               \
1473     }                                                                   \
1474   while (0)
1475 
1476 /* Registers are set to a sentinel when they haven't yet matched.  */
1477 static char reg_unset_dummy;
1478 #define REG_UNSET_VALUE (&reg_unset_dummy)
1479 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
1480 
1481 /* Subroutine declarations and macros for regex_compile.  */
1482 
1483 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
1484                                               reg_syntax_t syntax,
1485                                               struct re_pattern_buffer *bufp));
1486 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
1487 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1488                                  int arg1, int arg2));
1489 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1490                                   int arg, unsigned char *end));
1491 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
1492                                   int arg1, int arg2, unsigned char *end));
1493 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
1494                                            reg_syntax_t syntax));
1495 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
1496                                            reg_syntax_t syntax));
1497 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
1498                                               const char *pend,
1499                                               char *translate,
1500                                               reg_syntax_t syntax,
1501                                               unsigned char *b));
1502 
1503 /* Fetch the next character in the uncompiled pattern---translating it
1504    if necessary.  Also cast from a signed character in the constant
1505    string passed to us by the user to an unsigned char that we can use
1506    as an array index (in, e.g., `translate').  */
1507 #ifndef PATFETCH
1508 #define PATFETCH(c)                                                     \
1509   do {if (p == pend) return REG_EEND;                                   \
1510     c = (unsigned char) *p++;                                           \
1511     if (translate) c = (unsigned char) translate[c];                    \
1512   } while (0)
1513 #endif
1514 
1515 /* Fetch the next character in the uncompiled pattern, with no
1516    translation.  */
1517 #define PATFETCH_RAW(c)                                                 \
1518   do {if (p == pend) return REG_EEND;                                   \
1519     c = (unsigned char) *p++;                                           \
1520   } while (0)
1521 
1522 /* Go backwards one character in the pattern.  */
1523 #define PATUNFETCH p--
1524 
1525 
1526 /* If `translate' is non-null, return translate[D], else just D.  We
1527    cast the subscript to translate because some data is declared as
1528    `char *', to avoid warnings when a string constant is passed.  But
1529    when we use a character as a subscript we must make it unsigned.  */
1530 #ifndef TRANSLATE
1531 #define TRANSLATE(d) \
1532   (translate ? (char) translate[(unsigned char) (d)] : (d))
1533 #endif
1534 
1535 
1536 /* Macros for outputting the compiled pattern into `buffer'.  */
1537 
1538 /* If the buffer isn't allocated when it comes in, use this.  */
1539 #define INIT_BUF_SIZE  32
1540 
1541 /* Make sure we have at least N more bytes of space in buffer.  */
1542 #define GET_BUFFER_SPACE(n)                                             \
1543     while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated)  \
1544       EXTEND_BUFFER ()
1545 
1546 /* Make sure we have one more byte of buffer space and then add C to it.  */
1547 #define BUF_PUSH(c)                                                     \
1548   do {                                                                  \
1549     GET_BUFFER_SPACE (1);                                               \
1550     *b++ = (unsigned char) (c);                                         \
1551   } while (0)
1552 
1553 
1554 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
1555 #define BUF_PUSH_2(c1, c2)                                              \
1556   do {                                                                  \
1557     GET_BUFFER_SPACE (2);                                               \
1558     *b++ = (unsigned char) (c1);                                        \
1559     *b++ = (unsigned char) (c2);                                        \
1560   } while (0)
1561 
1562 
1563 /* As with BUF_PUSH_2, except for three bytes.  */
1564 #define BUF_PUSH_3(c1, c2, c3)                                          \
1565   do {                                                                  \
1566     GET_BUFFER_SPACE (3);                                               \
1567     *b++ = (unsigned char) (c1);                                        \
1568     *b++ = (unsigned char) (c2);                                        \
1569     *b++ = (unsigned char) (c3);                                        \
1570   } while (0)
1571 
1572 
1573 /* Store a jump with opcode OP at LOC to location TO.  We store a
1574    relative address offset by the three bytes the jump itself occupies.  */
1575 #define STORE_JUMP(op, loc, to) \
1576   store_op1 (op, loc, (int) ((to) - (loc) - 3))
1577 
1578 /* Likewise, for a two-argument jump.  */
1579 #define STORE_JUMP2(op, loc, to, arg) \
1580   store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
1581 
1582 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
1583 #define INSERT_JUMP(op, loc, to) \
1584   insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
1585 
1586 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
1587 #define INSERT_JUMP2(op, loc, to, arg) \
1588   insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
1589 
1590 
1591 /* This is not an arbitrary limit: the arguments which represent offsets
1592    into the pattern are two bytes long.  So if 2^16 bytes turns out to
1593    be too small, many things would have to change.  */
1594 /* Any other compiler which, like MSC, has allocation limit below 2^16
1595    bytes will have to use approach similar to what was done below for
1596    MSC and drop MAX_BUF_SIZE a bit.  Otherwise you may end up
1597    reallocating to 0 bytes.  Such thing is not going to work too well.
1598    You have been warned!!  */
1599 #if defined(_MSC_VER)  && !defined(_WIN32)
1600 /* Microsoft C 16-bit versions limit malloc to approx 65512 bytes.
1601    The REALLOC define eliminates a flurry of conversion warnings,
1602    but is not required. */
1603 #define MAX_BUF_SIZE  65500L
1604 #define REALLOC(p,s) realloc ((p), (size_t) (s))
1605 #else
1606 #define MAX_BUF_SIZE (1L << 16)
1607 #define REALLOC(p,s) realloc ((p), (s))
1608 #endif
1609 
1610 /* Extend the buffer by twice its current size via realloc and
1611    reset the pointers that pointed into the old block to point to the
1612    correct places in the new one.  If extending the buffer results in it
1613    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
1614 #define EXTEND_BUFFER()                                                 \
1615   do {                                                                  \
1616     unsigned char *old_buffer = bufp->buffer;                           \
1617     if (bufp->allocated == MAX_BUF_SIZE)                                \
1618       return REG_ESIZE;                                                 \
1619     bufp->allocated <<= 1;                                              \
1620     if (bufp->allocated > MAX_BUF_SIZE)                                 \
1621       bufp->allocated = MAX_BUF_SIZE;                                   \
1622     bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
1623     if (bufp->buffer == NULL)                                           \
1624       return REG_ESPACE;                                                \
1625     /* If the buffer moved, move all the pointers into it.  */          \
1626     if (old_buffer != bufp->buffer)                                     \
1627       {                                                                 \
1628         b = (b - old_buffer) + bufp->buffer;                            \
1629         begalt = (begalt - old_buffer) + bufp->buffer;                  \
1630         if (fixup_alt_jump)                                             \
1631           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
1632         if (laststart)                                                  \
1633           laststart = (laststart - old_buffer) + bufp->buffer;          \
1634         if (pending_exact)                                              \
1635           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
1636       }                                                                 \
1637   } while (0)
1638 
1639 
1640 /* Since we have one byte reserved for the register number argument to
1641    {start,stop}_memory, the maximum number of groups we can report
1642    things about is what fits in that byte.  */
1643 #define MAX_REGNUM 255
1644 
1645 /* But patterns can have more than `MAX_REGNUM' registers.  We just
1646    ignore the excess.  */
1647 typedef unsigned regnum_t;
1648 
1649 
1650 /* Macros for the compile stack.  */
1651 
1652 /* Since offsets can go either forwards or backwards, this type needs to
1653    be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
1654 /* int may be not enough when sizeof(int) == 2.  */
1655 typedef long pattern_offset_t;
1656 
1657 typedef struct
1658 {
1659     pattern_offset_t begalt_offset;
1660     pattern_offset_t fixup_alt_jump;
1661     pattern_offset_t inner_group_offset;
1662     pattern_offset_t laststart_offset;
1663     regnum_t regnum;
1664 } compile_stack_elt_t;
1665 
1666 
1667 typedef struct
1668 {
1669     compile_stack_elt_t *stack;
1670     unsigned size;
1671     unsigned avail;                     /* Offset of next open position.  */
1672 } compile_stack_type;
1673 
1674 
1675 #define INIT_COMPILE_STACK_SIZE 32
1676 
1677 #define COMPILE_STACK_EMPTY  (compile_stack.avail == 0)
1678 #define COMPILE_STACK_FULL  (compile_stack.avail == compile_stack.size)
1679 
1680 /* The next available element.  */
1681 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
1682 
1683 
1684 /* Set the bit for character C in a list.  */
1685 #define SET_LIST_BIT(c)                               \
1686   (b[((unsigned char) (c)) / BYTEWIDTH]               \
1687    |= 1 << (((unsigned char) c) % BYTEWIDTH))
1688 
1689 
1690 /* Get the next unsigned number in the uncompiled pattern.  */
1691 #define GET_UNSIGNED_NUMBER(num)                                        \
1692   { if (p != pend)                                                      \
1693      {                                                                  \
1694        PATFETCH (c);                                                    \
1695        while (ISDIGIT (c))                                              \
1696          {                                                              \
1697            if (num < 0)                                                 \
1698               num = 0;                                                  \
1699            num = num * 10 + c - '0';                                    \
1700            if (p == pend)                                               \
1701               break;                                                    \
1702            PATFETCH (c);                                                \
1703          }                                                              \
1704        }                                                                \
1705     }
1706 
1707 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
1708 /* The GNU C library provides support for user-defined character classes
1709    and the functions from ISO C amendement 1.  */
1710 # ifdef CHARCLASS_NAME_MAX
1711 #  define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
1712 # else
1713 /* This shouldn't happen but some implementation might still have this
1714    problem.  Use a reasonable default value.  */
1715 #  define CHAR_CLASS_MAX_LENGTH 256
1716 # endif
1717 
1718 # define IS_CHAR_CLASS(string) wctype (string)
1719 #else
1720 # define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
1721 
1722 # define IS_CHAR_CLASS(string)                                          \
1723    (STREQ (string, "alpha") || STREQ (string, "upper")                  \
1724     || STREQ (string, "lower") || STREQ (string, "digit")               \
1725     || STREQ (string, "alnum") || STREQ (string, "xdigit")              \
1726     || STREQ (string, "space") || STREQ (string, "print")               \
1727     || STREQ (string, "punct") || STREQ (string, "graph")               \
1728     || STREQ (string, "cntrl") || STREQ (string, "blank"))
1729 #endif
1730 
1731 #ifndef MATCH_MAY_ALLOCATE
1732 
1733 /* If we cannot allocate large objects within re_match_2_internal,
1734    we make the fail stack and register vectors global.
1735    The fail stack, we grow to the maximum size when a regexp
1736    is compiled.
1737    The register vectors, we adjust in size each time we
1738    compile a regexp, according to the number of registers it needs.  */
1739 
1740 static fail_stack_type fail_stack;
1741 
1742 /* Size with which the following vectors are currently allocated.
1743    That is so we can make them bigger as needed,
1744    but never make them smaller.  */
1745 static int regs_allocated_size;
1746 
1747 static const char **     regstart, **     regend;
1748 static const char ** old_regstart, ** old_regend;
1749 static const char **best_regstart, **best_regend;
1750 static register_info_type *reg_info;
1751 static const char **reg_dummy;
1752 static register_info_type *reg_info_dummy;
1753 
1754 /* Make the register vectors big enough for NUM_REGS registers,
1755    but don't make them smaller.  */
1756 
1757 static
regex_grow_registers(num_regs)1758 regex_grow_registers (num_regs)
1759      int num_regs;
1760 {
1761     if (num_regs > regs_allocated_size)
1762         {
1763             RETALLOC_IF (regstart,       num_regs, const char *);
1764             RETALLOC_IF (regend,         num_regs, const char *);
1765             RETALLOC_IF (old_regstart, num_regs, const char *);
1766             RETALLOC_IF (old_regend,     num_regs, const char *);
1767             RETALLOC_IF (best_regstart, num_regs, const char *);
1768             RETALLOC_IF (best_regend,    num_regs, const char *);
1769             RETALLOC_IF (reg_info,       num_regs, register_info_type);
1770             RETALLOC_IF (reg_dummy,      num_regs, const char *);
1771             RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
1772 
1773             regs_allocated_size = num_regs;
1774         }
1775 }
1776 
1777 #endif /* not MATCH_MAY_ALLOCATE */
1778 
1779 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
1780                                                  compile_stack,
1781                                                  regnum_t regnum));
1782 
1783 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
1784    Returns one of error codes defined in `regex.h', or zero for success.
1785 
1786    Assumes the `allocated' (and perhaps `buffer') and `translate'
1787    fields are set in BUFP on entry.
1788 
1789    If it succeeds, results are put in BUFP (if it returns an error, the
1790    contents of BUFP are undefined):
1791      `buffer' is the compiled pattern;
1792      `syntax' is set to SYNTAX;
1793      `used' is set to the length of the compiled pattern;
1794      `fastmap_accurate' is zero;
1795      `re_nsub' is the number of subexpressions in PATTERN;
1796      `not_bol' and `not_eol' are zero;
1797 
1798    The `fastmap' and `newline_anchor' fields are neither
1799    examined nor set.  */
1800 
1801 /* Return, freeing storage we allocated.  */
1802 #define FREE_STACK_RETURN(value)                \
1803   return (free (compile_stack.stack), value)
1804 
1805 static reg_errcode_t
regex_compile(const char * pattern,size_t size,reg_syntax_t syntax,struct re_pattern_buffer * bufp)1806 regex_compile (const char *pattern,
1807                size_t size,
1808                reg_syntax_t syntax,
1809                struct re_pattern_buffer *bufp)
1810 {
1811     /* We fetch characters from PATTERN here.  Even though PATTERN is
1812        `char *' (i.e., signed), we declare these variables as unsigned, so
1813        they can be reliably used as array indices.  */
1814     register unsigned char c, c1;
1815 
1816     /* A random temporary spot in PATTERN.  */
1817     const char *p1;
1818 
1819     /* Points to the end of the buffer, where we should append.  */
1820     register unsigned char *b;
1821 
1822     /* Keeps track of unclosed groups.  */
1823     compile_stack_type compile_stack;
1824 
1825     /* Points to the current (ending) position in the pattern.  */
1826     const char *p = pattern;
1827     const char *pend = pattern + size;
1828 
1829   /* How to translate the characters in the pattern.  */
1830     RE_TRANSLATE_TYPE translate = bufp->translate;
1831 
1832     /* Address of the count-byte of the most recently inserted `exactn'
1833      command.  This makes it possible to tell if a new exact-match
1834      character can be added to that command or if the character requires
1835      a new `exactn' command.  */
1836     unsigned char *pending_exact = 0;
1837 
1838     /* Address of start of the most recently finished expression.
1839      This tells, e.g., postfix * where to find the start of its
1840      operand.  Reset at the beginning of groups and alternatives.  */
1841     unsigned char *laststart = 0;
1842 
1843     /* Address of beginning of regexp, or inside of last group.  */
1844     unsigned char *begalt;
1845 
1846     /* Place in the uncompiled pattern (i.e., the {) to
1847      which to go back if the interval is invalid.  */
1848     const char *beg_interval;
1849 
1850     /* Address of the place where a forward jump should go to the end of
1851      the containing expression.  Each alternative of an `or' -- except the
1852      last -- ends with a forward jump of this sort.  */
1853     unsigned char *fixup_alt_jump = 0;
1854 
1855     /* Counts open-groups as they are encountered.  Remembered for the
1856      matching close-group on the compile stack, so the same register
1857      number is put in the stop_memory as the start_memory.  */
1858     regnum_t regnum = 0;
1859 
1860 #ifdef DEBUG
1861     DEBUG_PRINT1 ("\nCompiling pattern: ");
1862     if (debug)
1863         {
1864             unsigned debug_count;
1865 
1866             for (debug_count = 0; debug_count < size; debug_count++)
1867                 putchar (pattern[debug_count]);
1868             putchar ('\n');
1869         }
1870 #endif /* DEBUG */
1871 
1872     /* Initialize the compile stack.  */
1873     compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1874     if (compile_stack.stack == NULL)
1875         return REG_ESPACE;
1876 
1877     compile_stack.size = INIT_COMPILE_STACK_SIZE;
1878     compile_stack.avail = 0;
1879 
1880   /* Initialize the pattern buffer.  */
1881     bufp->syntax = syntax;
1882     bufp->fastmap_accurate = 0;
1883     bufp->not_bol = bufp->not_eol = 0;
1884 
1885     /* Set `used' to zero, so that if we return an error, the pattern
1886      printer (for debugging) will think there's no pattern.  We reset it
1887      at the end.  */
1888     bufp->used = 0;
1889 
1890     /* Always count groups, whether or not bufp->no_sub is set.  */
1891     bufp->re_nsub = 0;
1892 
1893 #if !defined (emacs) && !defined (SYNTAX_TABLE)
1894     /* Initialize the syntax table.  */
1895     init_syntax_once ();
1896 #endif
1897 
1898     if (bufp->allocated == 0)
1899         {
1900             if (bufp->buffer)
1901                 { /* If zero allocated, but buffer is non-null, try to realloc
1902                      enough space.  This loses if buffer's address is bogus, but
1903                      that is the user's responsibility.  */
1904                     RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
1905                 }
1906             else
1907                 { /* Caller did not allocate a buffer.  Do it for them.  */
1908                     bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
1909                 }
1910             if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
1911 
1912             bufp->allocated = INIT_BUF_SIZE;
1913         }
1914 
1915     begalt = b = bufp->buffer;
1916 
1917   /* Loop through the uncompiled pattern until we're at the end.  */
1918     while (p != pend)
1919         {
1920             PATFETCH (c);
1921 
1922             switch (c)
1923                 {
1924                 case '^':
1925                     {
1926                         if (   /* If at start of pattern, it's an operator.  */
1927                             p == pattern + 1
1928                             /* If context independent, it's an operator.  */
1929                             || syntax & RE_CONTEXT_INDEP_ANCHORS
1930                             /* Otherwise, depends on what's come before.  */
1931                             || at_begline_loc_p (pattern, p, syntax))
1932                             BUF_PUSH (begline);
1933                         else
1934                             goto normal_char;
1935                     }
1936                     break;
1937 
1938 
1939                 case '$':
1940                     {
1941                         if (   /* If at end of pattern, it's an operator.  */
1942                             p == pend
1943                             /* If context independent, it's an operator.  */
1944                             || syntax & RE_CONTEXT_INDEP_ANCHORS
1945                             /* Otherwise, depends on what's next.  */
1946                             || at_endline_loc_p (p, pend, syntax))
1947                             BUF_PUSH (endline);
1948                         else
1949                             goto normal_char;
1950                     }
1951                     break;
1952 
1953 
1954                 case '+':
1955                 case '?':
1956                     if ((syntax & RE_BK_PLUS_QM)
1957                         || (syntax & RE_LIMITED_OPS))
1958                         goto normal_char;
1959                 handle_plus:
1960                 case '*':
1961                     /* If there is no previous pattern... */
1962                     if (!laststart)
1963                         {
1964                             if (syntax & RE_CONTEXT_INVALID_OPS)
1965                                 FREE_STACK_RETURN (REG_BADRPT);
1966                             else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1967                                 goto normal_char;
1968                         }
1969 
1970                     {
1971                         /* Are we optimizing this jump?  */
1972                         boolean keep_string_p = false;
1973 
1974                         /* 1 means zero (many) matches is allowed.  */
1975                         char zero_times_ok = 0, many_times_ok = 0;
1976 
1977                         /* If there is a sequence of repetition chars, collapse it
1978                            down to just one (the right one).  We can't combine
1979                            interval operators with these because of, e.g., `a{2}*',
1980                            which should only match an even number of `a's.  */
1981 
1982                         for (;;)
1983                             {
1984                                 zero_times_ok |= c != '+';
1985                                 many_times_ok |= c != '?';
1986 
1987                                 if (p == pend)
1988                                     break;
1989 
1990                                 PATFETCH (c);
1991 
1992                                 if (c == '*'
1993                                     || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
1994                                     ;
1995 
1996                                 else if (syntax & RE_BK_PLUS_QM  &&  c == '\\')
1997                                     {
1998                                         if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
1999 
2000                                         PATFETCH (c1);
2001                                         if (!(c1 == '+' || c1 == '?'))
2002                                             {
2003                                                 PATUNFETCH;
2004                                                 PATUNFETCH;
2005                                                 break;
2006                                             }
2007 
2008                                         c = c1;
2009                                     }
2010                                 else
2011                                     {
2012                                         PATUNFETCH;
2013                                         break;
2014                                     }
2015 
2016                                 /* If we get here, we found another repeat character.  */
2017                             }
2018 
2019                         /* Star, etc. applied to an empty pattern is equivalent
2020                            to an empty pattern.  */
2021                         if (!laststart)
2022                             break;
2023 
2024                         /* Now we know whether or not zero matches is allowed
2025                            and also whether or not two or more matches is allowed.  */
2026                         if (many_times_ok)
2027                             { /* More than one repetition is allowed, so put in at the
2028                                  end a backward relative jump from `b' to before the next
2029                                  jump we're going to put in below (which jumps from
2030                                  laststart to after this jump).
2031 
2032                                  But if we are at the `*' in the exact sequence `.*\n',
2033                                  insert an unconditional jump backwards to the .,
2034                                  instead of the beginning of the loop.  This way we only
2035                                  push a failure point once, instead of every time
2036                                  through the loop.  */
2037                                 assert (p - 1 > pattern);
2038 
2039                                 /* Allocate the space for the jump.  */
2040                                 GET_BUFFER_SPACE (3);
2041 
2042                                 /* We know we are not at the first character of the pattern,
2043                                    because laststart was nonzero.  And we've already
2044                                    incremented `p', by the way, to be the character after
2045                                    the `*'.  Do we have to do something analogous here
2046                                    for null bytes, because of RE_DOT_NOT_NULL?  */
2047                                 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
2048                                     && zero_times_ok
2049                                     && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
2050                                     && !(syntax & RE_DOT_NEWLINE))
2051                                     { /* We have .*\n.  */
2052                                         STORE_JUMP (jump, b, laststart);
2053                                         keep_string_p = true;
2054                                     }
2055                                 else
2056                                     /* Anything else.  */
2057                                     STORE_JUMP (maybe_pop_jump, b, laststart - 3);
2058 
2059                                 /* We've added more stuff to the buffer.  */
2060                                 b += 3;
2061                             }
2062 
2063                         /* On failure, jump from laststart to b + 3, which will be the
2064                            end of the buffer after this jump is inserted.  */
2065                         GET_BUFFER_SPACE (3);
2066                         INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
2067                                      : on_failure_jump,
2068                                      laststart, b + 3);
2069                         pending_exact = 0;
2070                         b += 3;
2071 
2072                         if (!zero_times_ok)
2073                             {
2074                                 /* At least one repetition is required, so insert a
2075                                    `dummy_failure_jump' before the initial
2076                                    `on_failure_jump' instruction of the loop. This
2077                                    effects a skip over that instruction the first time
2078                                    we hit that loop.  */
2079                                 GET_BUFFER_SPACE (3);
2080                                 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
2081                                 b += 3;
2082                             }
2083                     }
2084                     break;
2085 
2086 
2087                 case '.':
2088                     laststart = b;
2089                     BUF_PUSH (anychar);
2090                     break;
2091 
2092 
2093                 case '[':
2094                     {
2095                         boolean had_char_class = false;
2096 
2097                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2098 
2099                         /* Ensure that we have enough space to push a charset: the
2100                            opcode, the length count, and the bitset; 34 bytes in all.  */
2101                         GET_BUFFER_SPACE (34);
2102 
2103                         laststart = b;
2104 
2105                         /* We test `*p == '^' twice, instead of using an if
2106                            statement, so we only need one BUF_PUSH.  */
2107                         BUF_PUSH (*p == '^' ? charset_not : charset);
2108                         if (*p == '^')
2109                             p++;
2110 
2111                         /* Remember the first position in the bracket expression.  */
2112                         p1 = p;
2113 
2114                         /* Push the number of bytes in the bitmap.  */
2115                         BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
2116 
2117                         /* Clear the whole map.  */
2118                         bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
2119 
2120                         /* charset_not matches newline according to a syntax bit.  */
2121                         if ((re_opcode_t) b[-2] == charset_not
2122                             && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
2123                             SET_LIST_BIT ('\n');
2124 
2125                         /* Read in characters and ranges, setting map bits.  */
2126                         for (;;)
2127                             {
2128                                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2129 
2130                                 PATFETCH (c);
2131 
2132                                 /* \ might escape characters inside [...] and [^...].  */
2133                                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
2134                                     {
2135                                         if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2136 
2137                                         PATFETCH (c1);
2138                                         SET_LIST_BIT (c1);
2139                                         continue;
2140                                     }
2141 
2142                                 /* Could be the end of the bracket expression.  If it's
2143                                    not (i.e., when the bracket expression is `[]' so
2144                                    far), the ']' character bit gets set way below.  */
2145                                 if (c == ']' && p != p1 + 1)
2146                                     break;
2147 
2148                                 /* Look ahead to see if it's a range when the last thing
2149                                    was a character class.  */
2150                                 if (had_char_class && c == '-' && *p != ']')
2151                                     FREE_STACK_RETURN (REG_ERANGE);
2152 
2153                                 /* Look ahead to see if it's a range when the last thing
2154                                    was a character: if this is a hyphen not at the
2155                                    beginning or the end of a list, then it's the range
2156                                    operator.  */
2157                                 if (c == '-'
2158                                     && !(p - 2 >= pattern && p[-2] == '[')
2159                                     && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
2160                                     && *p != ']')
2161                                     {
2162                                         reg_errcode_t ret
2163                                             = compile_range (&p, pend, translate, syntax, b);
2164                                         if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2165                                     }
2166 
2167                                 else if (p[0] == '-' && p[1] != ']')
2168                                     { /* This handles ranges made up of characters only.  */
2169                                         reg_errcode_t ret;
2170 
2171                                         /* Move past the `-'.  */
2172                                         PATFETCH (c1);
2173 
2174                                         ret = compile_range (&p, pend, translate, syntax, b);
2175                                         if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
2176                                     }
2177 
2178                                 /* See if we're at the beginning of a possible character
2179                                    class.  */
2180 
2181                                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
2182                                     { /* Leave room for the null.  */
2183                                         char str[CHAR_CLASS_MAX_LENGTH + 1];
2184 
2185                                         PATFETCH (c);
2186                                         c1 = 0;
2187 
2188                                         /* If pattern is `[[:'.  */
2189                                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2190 
2191                                         for (;;)
2192                                             {
2193                                                 PATFETCH (c);
2194                                                 if (c == ':' || c == ']' || p == pend
2195                                                     || c1 == CHAR_CLASS_MAX_LENGTH)
2196                                                     break;
2197                                                 str[c1++] = c;
2198                                             }
2199                                         str[c1] = '\0';
2200 
2201                                         /* If isn't a word bracketed by `[:' and:`]':
2202                                            undo the ending character, the letters, and leave
2203                                            the leading `:' and `[' (but set bits for them).  */
2204                                         if (c == ':' && *p == ']')
2205                                             {
2206 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
2207                                                 boolean is_lower = STREQ (str, "lower");
2208                                                 boolean is_upper = STREQ (str, "upper");
2209                                                 wctype_t wt;
2210                                                 int ch;
2211 
2212                                                 wt = wctype (str);
2213                                                 if (wt == 0)
2214                                                     FREE_STACK_RETURN (REG_ECTYPE);
2215 
2216                                                 /* Throw away the ] at the end of the character
2217                                                    class.  */
2218                                                 PATFETCH (c);
2219 
2220                                                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2221 
2222                                                 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
2223                                                     {
2224                                                         if (iswctype (btowc (ch), wt))
2225                                                             SET_LIST_BIT (ch);
2226 
2227                                                         if (translate && (is_upper || is_lower)
2228                                                             && (ISUPPER (ch) || ISLOWER (ch)))
2229                                                             SET_LIST_BIT (ch);
2230                                                     }
2231 
2232                                                 had_char_class = true;
2233 #else
2234                                                 int ch;
2235                                                 boolean is_alnum = STREQ (str, "alnum");
2236                                                 boolean is_alpha = STREQ (str, "alpha");
2237                                                 boolean is_blank = STREQ (str, "blank");
2238                                                 boolean is_cntrl = STREQ (str, "cntrl");
2239                                                 boolean is_digit = STREQ (str, "digit");
2240                                                 boolean is_graph = STREQ (str, "graph");
2241                                                 boolean is_lower = STREQ (str, "lower");
2242                                                 boolean is_print = STREQ (str, "print");
2243                                                 boolean is_punct = STREQ (str, "punct");
2244                                                 boolean is_space = STREQ (str, "space");
2245                                                 boolean is_upper = STREQ (str, "upper");
2246                                                 boolean is_xdigit = STREQ (str, "xdigit");
2247 
2248                                                 if (!IS_CHAR_CLASS (str))
2249                                                     FREE_STACK_RETURN (REG_ECTYPE);
2250 
2251                                                 /* Throw away the ] at the end of the character
2252                                                    class.  */
2253                                                 PATFETCH (c);
2254 
2255                                                 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
2256 
2257                                                 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
2258                                                     {
2259                                                         /* This was split into 3 if's to
2260                                                            avoid an arbitrary limit in some compiler.  */
2261                                                         if (   (is_alnum  && ISALNUM (ch))
2262                                                                || (is_alpha  && ISALPHA (ch))
2263                                                                || (is_blank  && ISBLANK (ch))
2264                                                                || (is_cntrl  && ISCNTRL (ch)))
2265                                                             SET_LIST_BIT (ch);
2266                                                         if (   (is_digit  && ISDIGIT (ch))
2267                                                                || (is_graph  && ISGRAPH (ch))
2268                                                                || (is_lower  && ISLOWER (ch))
2269                                                                || (is_print  && ISPRINT (ch)))
2270                                                             SET_LIST_BIT (ch);
2271                                                         if (   (is_punct  && ISPUNCT (ch))
2272                                                                || (is_space  && ISSPACE (ch))
2273                                                                || (is_upper  && ISUPPER (ch))
2274                                                                || (is_xdigit && ISXDIGIT (ch)))
2275                                                             SET_LIST_BIT (ch);
2276                                                         if (   translate && (is_upper || is_lower)
2277                                                                && (ISUPPER (ch) || ISLOWER (ch)))
2278                                                             SET_LIST_BIT (ch);
2279                                                     }
2280                                                 had_char_class = true;
2281 #endif  /* libc || wctype.h */
2282                                             }
2283                                         else
2284                                             {
2285                                                 c1++;
2286                                                 while (c1--)
2287                                                     PATUNFETCH;
2288                                                 SET_LIST_BIT ('[');
2289                                                 SET_LIST_BIT (':');
2290                                                 had_char_class = false;
2291                                             }
2292                                     }
2293                                 else
2294                                     {
2295                                         had_char_class = false;
2296                                         SET_LIST_BIT (c);
2297                                     }
2298                             }
2299 
2300                         /* Discard any (non)matching list bytes that are all 0 at the
2301                            end of the map.  Decrease the map-length byte too.  */
2302                         while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
2303                             b[-1]--;
2304                         b += b[-1];
2305                     }
2306                     break;
2307 
2308 
2309                 case '(':
2310                     if (syntax & RE_NO_BK_PARENS)
2311                         goto handle_open;
2312                     else
2313                         goto normal_char;
2314 
2315 
2316                 case ')':
2317                     if (syntax & RE_NO_BK_PARENS)
2318                         goto handle_close;
2319                     else
2320                         goto normal_char;
2321 
2322 
2323                 case '\n':
2324                     if (syntax & RE_NEWLINE_ALT)
2325                         goto handle_alt;
2326                     else
2327                         goto normal_char;
2328 
2329 
2330                 case '|':
2331                     if (syntax & RE_NO_BK_VBAR)
2332                         goto handle_alt;
2333                     else
2334                         goto normal_char;
2335 
2336 
2337                 case '{':
2338                     if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
2339                         goto handle_interval;
2340                     else
2341                         goto normal_char;
2342 
2343 
2344                 case '\\':
2345                     if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
2346 
2347                     /* Do not translate the character after the \, so that we can
2348                        distinguish, e.g., \B from \b, even if we normally would
2349                        translate, e.g., B to b.  */
2350                     PATFETCH_RAW (c);
2351 
2352                     switch (c)
2353                         {
2354                         case '(':
2355                             if (syntax & RE_NO_BK_PARENS)
2356                                 goto normal_backslash;
2357 
2358                         handle_open:
2359                             bufp->re_nsub++;
2360                             regnum++;
2361 
2362                             if (COMPILE_STACK_FULL)
2363                                 {
2364                                     RETALLOC (compile_stack.stack, compile_stack.size << 1,
2365                                               compile_stack_elt_t);
2366                                     if (compile_stack.stack == NULL) return REG_ESPACE;
2367 
2368                                     compile_stack.size <<= 1;
2369                                 }
2370 
2371                             /* These are the values to restore when we hit end of this
2372                                group.  They are all relative offsets, so that if the
2373                                whole pattern moves because of realloc, they will still
2374                                be valid.  */
2375                             COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
2376                             COMPILE_STACK_TOP.fixup_alt_jump
2377                                 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
2378                             COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
2379                             COMPILE_STACK_TOP.regnum = regnum;
2380 
2381                             /* We will eventually replace the 0 with the number of
2382                                groups inner to this one.  But do not push a
2383                                start_memory for groups beyond the last one we can
2384                                represent in the compiled pattern.  */
2385                             if (regnum <= MAX_REGNUM)
2386                                 {
2387                                     COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
2388                                     BUF_PUSH_3 (start_memory, regnum, 0);
2389                                 }
2390 
2391                             compile_stack.avail++;
2392 
2393                             fixup_alt_jump = 0;
2394                             laststart = 0;
2395                             begalt = b;
2396                             /* If we've reached MAX_REGNUM groups, then this open
2397                                won't actually generate any code, so we'll have to
2398                                clear pending_exact explicitly.  */
2399                             pending_exact = 0;
2400                             break;
2401 
2402 
2403                         case ')':
2404                             if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
2405 
2406                             if (COMPILE_STACK_EMPTY) {
2407                                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2408                                     goto normal_backslash;
2409                                 else
2410                                     FREE_STACK_RETURN (REG_ERPAREN);
2411                             }
2412                         handle_close:
2413                             if (fixup_alt_jump)
2414                                 { /* Push a dummy failure point at the end of the
2415                                      alternative for a possible future
2416                                      `pop_failure_jump' to pop.  See comments at
2417                                      `push_dummy_failure' in `re_match_2'.  */
2418                                     BUF_PUSH (push_dummy_failure);
2419 
2420                                     /* We allocated space for this jump when we assigned
2421                                        to `fixup_alt_jump', in the `handle_alt' case below.  */
2422                                     STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
2423                                 }
2424 
2425                             /* See similar code for backslashed left paren above.  */
2426                             if (COMPILE_STACK_EMPTY) {
2427                                 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
2428                                     goto normal_char;
2429                                 else
2430                                     FREE_STACK_RETURN (REG_ERPAREN);
2431                             }
2432                             /* Since we just checked for an empty stack above, this
2433                                ``can't happen''.  */
2434                             assert (compile_stack.avail != 0);
2435                             {
2436                                 /* We don't just want to restore into `regnum', because
2437                                    later groups should continue to be numbered higher,
2438                                    as in `(ab)c(de)' -- the second group is #2.  */
2439                                 regnum_t this_group_regnum;
2440 
2441                                 compile_stack.avail--;
2442                                 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
2443                                 fixup_alt_jump
2444                                     = COMPILE_STACK_TOP.fixup_alt_jump
2445                                     ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
2446                                     : 0;
2447                                 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
2448                                 this_group_regnum = COMPILE_STACK_TOP.regnum;
2449                                 /* If we've reached MAX_REGNUM groups, then this open
2450                                    won't actually generate any code, so we'll have to
2451                                    clear pending_exact explicitly.  */
2452                                 pending_exact = 0;
2453 
2454                                 /* We're at the end of the group, so now we know how many
2455                                    groups were inside this one.  */
2456                                 if (this_group_regnum <= MAX_REGNUM)
2457                                     {
2458                                         unsigned char *inner_group_loc
2459                                             = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
2460 
2461                                         *inner_group_loc = regnum - this_group_regnum;
2462                                         BUF_PUSH_3 (stop_memory, this_group_regnum,
2463                                                     regnum - this_group_regnum);
2464                                     }
2465                             }
2466                             break;
2467 
2468 
2469                         case '|':                                       /* `\|'.  */
2470                             if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
2471                                 goto normal_backslash;
2472                         handle_alt:
2473                             if (syntax & RE_LIMITED_OPS)
2474                                 goto normal_char;
2475 
2476                             /* Insert before the previous alternative a jump which
2477                                jumps to this alternative if the former fails.  */
2478                             GET_BUFFER_SPACE (3);
2479                             INSERT_JUMP (on_failure_jump, begalt, b + 6);
2480                             pending_exact = 0;
2481                             b += 3;
2482 
2483                             /* The alternative before this one has a jump after it
2484                                which gets executed if it gets matched.  Adjust that
2485                                jump so it will jump to this alternative's analogous
2486                                jump (put in below, which in turn will jump to the next
2487                                (if any) alternative's such jump, etc.).  The last such
2488                                jump jumps to the correct final destination.  A picture:
2489                                _____ _____
2490                                |   | |   |
2491                                |   v |   v
2492                                a | b   | c
2493 
2494                                If we are at `b', then fixup_alt_jump right now points to a
2495                                three-byte space after `a'.  We'll put in the jump, set
2496                                fixup_alt_jump to right after `b', and leave behind three
2497                                bytes which we'll fill in when we get to after `c'.  */
2498 
2499                             if (fixup_alt_jump)
2500                                 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2501 
2502                             /* Mark and leave space for a jump after this alternative,
2503                                to be filled in later either by next alternative or
2504                                when know we're at the end of a series of alternatives.  */
2505                             fixup_alt_jump = b;
2506                             GET_BUFFER_SPACE (3);
2507                             b += 3;
2508 
2509                             laststart = 0;
2510                             begalt = b;
2511                             break;
2512 
2513 
2514                         case '{':
2515                             /* If \{ is a literal.  */
2516                             if (!(syntax & RE_INTERVALS)
2517                                 /* If we're at `\{' and it's not the open-interval
2518                                    operator.  */
2519                                 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
2520                                 || (p - 2 == pattern  &&  p == pend))
2521                                 goto normal_backslash;
2522 
2523                         handle_interval:
2524                             {
2525                                 /* If got here, then the syntax allows intervals.  */
2526 
2527                                 /* At least (most) this many matches must be made.  */
2528                                 int lower_bound = -1, upper_bound = -1;
2529 
2530                                 beg_interval = p - 1;
2531 
2532                                 if (p == pend)
2533                                     {
2534                                         if (syntax & RE_NO_BK_BRACES)
2535                                             goto unfetch_interval;
2536                                         else
2537                                             FREE_STACK_RETURN (REG_EBRACE);
2538                                     }
2539 
2540                                 GET_UNSIGNED_NUMBER (lower_bound);
2541 
2542                                 if (c == ',')
2543                                     {
2544                                         GET_UNSIGNED_NUMBER (upper_bound);
2545                                         if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2546                                     }
2547                                 else
2548                                     /* Interval such as `{1}' => match exactly once. */
2549                                     upper_bound = lower_bound;
2550 
2551                                 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
2552                                     || lower_bound > upper_bound)
2553                                     {
2554                                         if (syntax & RE_NO_BK_BRACES)
2555                                             goto unfetch_interval;
2556                                         else
2557                                             FREE_STACK_RETURN (REG_BADBR);
2558                                     }
2559 
2560                                 if (!(syntax & RE_NO_BK_BRACES))
2561                                     {
2562                                         if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
2563 
2564                                         PATFETCH (c);
2565                                     }
2566 
2567                                 if (c != '}')
2568                                     {
2569                                         if (syntax & RE_NO_BK_BRACES)
2570                                             goto unfetch_interval;
2571                                         else
2572                                             FREE_STACK_RETURN (REG_BADBR);
2573                                     }
2574 
2575                                 /* We just parsed a valid interval.  */
2576 
2577                                 /* If it's invalid to have no preceding re.  */
2578                                 if (!laststart)
2579                                     {
2580                                         if (syntax & RE_CONTEXT_INVALID_OPS)
2581                                             FREE_STACK_RETURN (REG_BADRPT);
2582                                         else if (syntax & RE_CONTEXT_INDEP_OPS)
2583                                             laststart = b;
2584                                         else
2585                                             goto unfetch_interval;
2586                                     }
2587 
2588                                 /* If the upper bound is zero, don't want to succeed at
2589                                    all; jump from `laststart' to `b + 3', which will be
2590                                    the end of the buffer after we insert the jump.  */
2591                                 if (upper_bound == 0)
2592                                     {
2593                                         GET_BUFFER_SPACE (3);
2594                                         INSERT_JUMP (jump, laststart, b + 3);
2595                                         b += 3;
2596                                     }
2597 
2598                                 /* Otherwise, we have a nontrivial interval.  When
2599                                    we're all done, the pattern will look like:
2600                                    set_number_at <jump count> <upper bound>
2601                                    set_number_at <succeed_n count> <lower bound>
2602                                    succeed_n <after jump addr> <succeed_n count>
2603                                    <body of loop>
2604                                    jump_n <succeed_n addr> <jump count>
2605                                    (The upper bound and `jump_n' are omitted if
2606                                    `upper_bound' is 1, though.)  */
2607                                 else
2608                                     { /* If the upper bound is > 1, we need to insert
2609                                          more at the end of the loop.  */
2610                                         unsigned nbytes = 10 + (upper_bound > 1) * 10;
2611 
2612                                         GET_BUFFER_SPACE (nbytes);
2613 
2614                                         /* Initialize lower bound of the `succeed_n', even
2615                                            though it will be set during matching by its
2616                                            attendant `set_number_at' (inserted next),
2617                                            because `re_compile_fastmap' needs to know.
2618                                            Jump to the `jump_n' we might insert below.  */
2619                                         INSERT_JUMP2 (succeed_n, laststart,
2620                                                       b + 5 + (upper_bound > 1) * 5,
2621                                                       lower_bound);
2622                                         b += 5;
2623 
2624                                         /* Code to initialize the lower bound.  Insert
2625                                            before the `succeed_n'.  The `5' is the last two
2626                                            bytes of this `set_number_at', plus 3 bytes of
2627                                            the following `succeed_n'.  */
2628                                         insert_op2 (set_number_at, laststart, 5, lower_bound, b);
2629                                         b += 5;
2630 
2631                                         if (upper_bound > 1)
2632                                             { /* More than one repetition is allowed, so
2633                                                  append a backward jump to the `succeed_n'
2634                                                  that starts this interval.
2635 
2636                                                  When we've reached this during matching,
2637                                                  we'll have matched the interval once, so
2638                                                  jump back only `upper_bound - 1' times.  */
2639                                                 STORE_JUMP2 (jump_n, b, laststart + 5,
2640                                                              upper_bound - 1);
2641                                                 b += 5;
2642 
2643                                                 /* The location we want to set is the second
2644                                                    parameter of the `jump_n'; that is `b-2' as
2645                                                    an absolute address.  `laststart' will be
2646                                                    the `set_number_at' we're about to insert;
2647                                                    `laststart+3' the number to set, the source
2648                                                    for the relative address.  But we are
2649                                                    inserting into the middle of the pattern --
2650                                                    so everything is getting moved up by 5.
2651                                                    Conclusion: (b - 2) - (laststart + 3) + 5,
2652                                                    i.e., b - laststart.
2653 
2654                                                    We insert this at the beginning of the loop
2655                                                    so that if we fail during matching, we'll
2656                                                    reinitialize the bounds.  */
2657                                                 insert_op2 (set_number_at, laststart, b - laststart,
2658                                                             upper_bound - 1, b);
2659                                                 b += 5;
2660                                             }
2661                                     }
2662                                 pending_exact = 0;
2663                                 beg_interval = NULL;
2664                             }
2665                             break;
2666 
2667                         unfetch_interval:
2668                             /* If an invalid interval, match the characters as literals.  */
2669                             assert (beg_interval);
2670                             p = beg_interval;
2671                             beg_interval = NULL;
2672 
2673                             /* normal_char and normal_backslash need `c'.  */
2674                             PATFETCH (c);
2675 
2676                             if (!(syntax & RE_NO_BK_BRACES))
2677                                 {
2678                                     if (p > pattern  &&  p[-1] == '\\')
2679                                         goto normal_backslash;
2680                                 }
2681                             goto normal_char;
2682 
2683 #ifdef emacs
2684                             /* There is no way to specify the before_dot and after_dot
2685                                operators.  rms says this is ok.  --karl  */
2686                         case '=':
2687                             BUF_PUSH (at_dot);
2688                             break;
2689 
2690                         case 's':
2691                             laststart = b;
2692                             PATFETCH (c);
2693                             BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
2694                             break;
2695 
2696                         case 'S':
2697                             laststart = b;
2698                             PATFETCH (c);
2699                             BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
2700                             break;
2701 #endif /* emacs */
2702 
2703 
2704                         case 'w':
2705                             if (re_syntax_options & RE_NO_GNU_OPS)
2706                                 goto normal_char;
2707                             laststart = b;
2708                             BUF_PUSH (wordchar);
2709                             break;
2710 
2711 
2712                         case 'W':
2713                             if (re_syntax_options & RE_NO_GNU_OPS)
2714                                 goto normal_char;
2715                             laststart = b;
2716                             BUF_PUSH (notwordchar);
2717                             break;
2718 
2719 
2720                         case '<':
2721                             if (re_syntax_options & RE_NO_GNU_OPS)
2722                                 goto normal_char;
2723                             BUF_PUSH (wordbeg);
2724                             break;
2725 
2726                         case '>':
2727                             if (re_syntax_options & RE_NO_GNU_OPS)
2728                                 goto normal_char;
2729                             BUF_PUSH (wordend);
2730                             break;
2731 
2732                         case 'b':
2733                             if (re_syntax_options & RE_NO_GNU_OPS)
2734                                 goto normal_char;
2735                             BUF_PUSH (wordbound);
2736                             break;
2737 
2738                         case 'B':
2739                             if (re_syntax_options & RE_NO_GNU_OPS)
2740                                 goto normal_char;
2741                             BUF_PUSH (notwordbound);
2742                             break;
2743 
2744                         case '`':
2745                             if (re_syntax_options & RE_NO_GNU_OPS)
2746                                 goto normal_char;
2747                             BUF_PUSH (begbuf);
2748                             break;
2749 
2750                         case '\'':
2751                             if (re_syntax_options & RE_NO_GNU_OPS)
2752                                 goto normal_char;
2753                             BUF_PUSH (endbuf);
2754                             break;
2755 
2756                         case '1': case '2': case '3': case '4': case '5':
2757                         case '6': case '7': case '8': case '9':
2758                             if (syntax & RE_NO_BK_REFS)
2759                                 goto normal_char;
2760 
2761                             c1 = c - '0';
2762 
2763                             if (c1 > regnum)
2764                                 FREE_STACK_RETURN (REG_ESUBREG);
2765 
2766                             /* Can't back reference to a subexpression if inside of it.  */
2767                             if (group_in_compile_stack (compile_stack, (regnum_t) c1))
2768                                 goto normal_char;
2769 
2770                             laststart = b;
2771                             BUF_PUSH_2 (duplicate, c1);
2772                             break;
2773 
2774 
2775                         case '+':
2776                         case '?':
2777                             if (syntax & RE_BK_PLUS_QM)
2778                                 goto handle_plus;
2779                             else
2780                                 goto normal_backslash;
2781 
2782                         default:
2783                         normal_backslash:
2784                             /* You might think it would be useful for \ to mean
2785                                not to translate; but if we don't translate it
2786                                it will never match anything.  */
2787                             c = TRANSLATE (c);
2788                             goto normal_char;
2789                         }
2790                     break;
2791 
2792 
2793                 default:
2794                     /* Expects the character in `c'.  */
2795                 normal_char:
2796                     /* If no exactn currently being built.  */
2797                     if (!pending_exact
2798 
2799                         /* If last exactn not at current position.  */
2800                         || pending_exact + *pending_exact + 1 != b
2801 
2802                         /* We have only one byte following the exactn for the count.  */
2803                         || *pending_exact == (1 << BYTEWIDTH) - 1
2804 
2805                         /* If followed by a repetition operator.  */
2806                         || *p == '*' || *p == '^'
2807                         || ((syntax & RE_BK_PLUS_QM)
2808                             ? *p == '\\' && (p[1] == '+' || p[1] == '?')
2809                             : (*p == '+' || *p == '?'))
2810                         || ((syntax & RE_INTERVALS)
2811                             && ((syntax & RE_NO_BK_BRACES)
2812                                 ? *p == '{'
2813                                 : (p[0] == '\\' && p[1] == '{'))))
2814                         {
2815                             /* Start building a new exactn.  */
2816 
2817                             laststart = b;
2818 
2819                             BUF_PUSH_2 (exactn, 0);
2820                             pending_exact = b - 1;
2821                         }
2822 
2823                     BUF_PUSH (c);
2824                     (*pending_exact)++;
2825                     break;
2826                 } /* switch (c) */
2827         } /* while p != pend */
2828 
2829 
2830     /* Through the pattern now.  */
2831 
2832     if (fixup_alt_jump)
2833         STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
2834 
2835     if (!COMPILE_STACK_EMPTY)
2836         FREE_STACK_RETURN (REG_EPAREN);
2837 
2838   /* If we don't want backtracking, force success
2839      the first time we reach the end of the compiled pattern.  */
2840     if (syntax & RE_NO_POSIX_BACKTRACKING)
2841         BUF_PUSH (succeed);
2842 
2843     free (compile_stack.stack);
2844 
2845     /* We have succeeded; set the length of the buffer.  */
2846     bufp->used = b - bufp->buffer;
2847 
2848 #ifdef DEBUG
2849     if (debug)
2850         {
2851             DEBUG_PRINT1 ("\nCompiled pattern: \n");
2852             print_compiled_pattern (bufp);
2853         }
2854 #endif /* DEBUG */
2855 
2856 #ifndef MATCH_MAY_ALLOCATE
2857     /* Initialize the failure stack to the largest possible stack.  This
2858      isn't necessary unless we're trying to avoid calling alloca in
2859      the search and match routines.  */
2860     {
2861         int num_regs = bufp->re_nsub + 1;
2862 
2863         /* Since DOUBLE_FAIL_STACK refuses to double only if the current size
2864            is strictly greater than re_max_failures, the largest possible stack
2865            is 2 * re_max_failures failure points.  */
2866         if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
2867             {
2868                 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
2869 
2870 #ifdef emacs
2871                 if (! fail_stack.stack)
2872                     fail_stack.stack
2873                         = (fail_stack_elt_t *) xmalloc (fail_stack.size
2874                                                         * sizeof (fail_stack_elt_t));
2875                 else
2876                     fail_stack.stack
2877                         = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
2878                                                          (fail_stack.size
2879                                                           * sizeof (fail_stack_elt_t)));
2880 #else /* not emacs */
2881                 if (! fail_stack.stack)
2882                     fail_stack.stack
2883                         = (fail_stack_elt_t *) malloc (fail_stack.size
2884                                                        * sizeof (fail_stack_elt_t));
2885                 else
2886                     fail_stack.stack
2887                         = (fail_stack_elt_t *) realloc (fail_stack.stack,
2888                                                         (fail_stack.size
2889                                                          * sizeof (fail_stack_elt_t)));
2890 #endif /* not emacs */
2891             }
2892 
2893         regex_grow_registers (num_regs);
2894     }
2895 #endif /* not MATCH_MAY_ALLOCATE */
2896 
2897     return REG_NOERROR;
2898 } /* regex_compile */
2899 
2900 /* Subroutines for `regex_compile'.  */
2901 
2902 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
2903 
2904 static void
store_op1(re_opcode_t op,unsigned char * loc,int arg)2905 store_op1 (re_opcode_t op,
2906            unsigned char *loc,
2907            int arg)
2908 {
2909     *loc = (unsigned char) op;
2910     STORE_NUMBER (loc + 1, arg);
2911 }
2912 
2913 
2914 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
2915 
2916 static void
store_op2(re_opcode_t op,unsigned char * loc,int arg1,int arg2)2917 store_op2(re_opcode_t op,
2918           unsigned char *loc,
2919           int arg1,
2920           int arg2)
2921 {
2922     *loc = (unsigned char) op;
2923     STORE_NUMBER (loc + 1, arg1);
2924     STORE_NUMBER (loc + 3, arg2);
2925 }
2926 
2927 
2928 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
2929    for OP followed by two-byte integer parameter ARG.  */
2930 
2931 static void
insert_op1(re_opcode_t op,unsigned char * loc,int arg,unsigned char * end)2932 insert_op1(re_opcode_t op,
2933            unsigned char *loc,
2934            int arg,
2935            unsigned char *end)
2936 {
2937     register unsigned char *pfrom = end;
2938     register unsigned char *pto = end + 3;
2939 
2940     while (pfrom != loc)
2941         *--pto = *--pfrom;
2942 
2943     store_op1 (op, loc, arg);
2944 }
2945 
2946 
2947 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
2948 
2949 static void
insert_op2(re_opcode_t op,unsigned char * loc,int arg1,int arg2,unsigned char * end)2950 insert_op2(re_opcode_t op,
2951            unsigned char *loc,
2952            int arg1,
2953            int arg2,
2954            unsigned char *end)
2955 {
2956     register unsigned char *pfrom = end;
2957     register unsigned char *pto = end + 5;
2958 
2959     while (pfrom != loc)
2960         *--pto = *--pfrom;
2961 
2962     store_op2 (op, loc, arg1, arg2);
2963 }
2964 
2965 
2966 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
2967    after an alternative or a begin-subexpression.  We assume there is at
2968    least one character before the ^.  */
2969 
2970 static boolean
at_begline_loc_p(const char * pattern,const char * p,reg_syntax_t syntax)2971 at_begline_loc_p(const char *pattern,
2972                  const char *p,
2973                  reg_syntax_t syntax)
2974 {
2975     const char *prev = p - 2;
2976     boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
2977 
2978     return
2979         /* After a subexpression?  */
2980         (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
2981         /* After an alternative?  */
2982         || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
2983 }
2984 
2985 
2986 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
2987    at least one character after the $, i.e., `P < PEND'.  */
2988 
2989 static boolean
at_endline_loc_p(const char * p,const char * pend,reg_syntax_t syntax)2990 at_endline_loc_p(const char *p,
2991                  const char *pend,
2992                  reg_syntax_t syntax)
2993 {
2994     const char *next = p;
2995     boolean next_backslash = *next == '\\';
2996     const char *next_next = p + 1 < pend ? p + 1 : 0;
2997 
2998     return
2999         /* Before a subexpression?  */
3000         (syntax & RE_NO_BK_PARENS ? *next == ')'
3001          : next_backslash && next_next && *next_next == ')')
3002         /* Before an alternative?  */
3003         || (syntax & RE_NO_BK_VBAR ? *next == '|'
3004             : next_backslash && next_next && *next_next == '|');
3005 }
3006 
3007 
3008 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
3009    false if it's not.  */
3010 
3011 static boolean
group_in_compile_stack(compile_stack_type compile_stack,regnum_t regnum)3012 group_in_compile_stack(compile_stack_type compile_stack,
3013                        regnum_t regnum)
3014 {
3015     int this_element;
3016 
3017     for (this_element = compile_stack.avail - 1;
3018          this_element >= 0;
3019          this_element--)
3020         if (compile_stack.stack[this_element].regnum == regnum)
3021             return true;
3022 
3023     return false;
3024 }
3025 
3026 
3027 /* Read the ending character of a range (in a bracket expression) from the
3028    uncompiled pattern *P_PTR (which ends at PEND).  We assume the
3029    starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
3030    Then we set the translation of all bits between the starting and
3031    ending characters (inclusive) in the compiled pattern B.
3032 
3033    Return an error code.
3034 
3035    We use these short variable names so we can use the same macros as
3036    `regex_compile' itself.  */
3037 
3038 static reg_errcode_t
compile_range(const char ** p_ptr,const char * pend,RE_TRANSLATE_TYPE translate,reg_syntax_t syntax,unsigned char * b)3039 compile_range(const char **p_ptr,
3040               const char *pend,
3041               RE_TRANSLATE_TYPE translate,
3042               reg_syntax_t syntax,
3043               unsigned char *b)
3044 {
3045     unsigned this_char;
3046 
3047     const char *p = *p_ptr;
3048     unsigned int range_start, range_end;
3049 
3050     if (p == pend)
3051         return REG_ERANGE;
3052 
3053     /* Even though the pattern is a signed `char *', we need to fetch
3054        with unsigned char *'s; if the high bit of the pattern character
3055        is set, the range endpoints will be negative if we fetch using a
3056        signed char *.
3057 
3058        We also want to fetch the endpoints without translating them; the
3059        appropriate translation is done in the bit-setting loop below.  */
3060     /* The SVR4 compiler on the 3B2 had trouble with unsigned const char *.  */
3061     range_start = ((const unsigned char *) p)[-2];
3062     range_end   = ((const unsigned char *) p)[0];
3063 
3064     /* Have to increment the pointer into the pattern string, so the
3065        caller isn't still at the ending character.  */
3066     (*p_ptr)++;
3067 
3068     /* If the start is after the end, the range is empty.  */
3069     if (range_start > range_end)
3070         return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
3071 
3072     /* Here we see why `this_char' has to be larger than an `unsigned
3073        char' -- the range is inclusive, so if `range_end' == 0xff
3074        (assuming 8-bit characters), we would otherwise go into an infinite
3075        loop, since all characters <= 0xff.  */
3076     for (this_char = range_start; this_char <= range_end; this_char++)
3077         {
3078             SET_LIST_BIT (TRANSLATE (this_char));
3079         }
3080 
3081     return REG_NOERROR;
3082 }
3083 
3084 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
3085    BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
3086    characters can start a string that matches the pattern.  This fastmap
3087    is used by re_search to skip quickly over impossible starting points.
3088 
3089    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
3090    area as BUFP->fastmap.
3091 
3092    We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
3093    the pattern buffer.
3094 
3095    Returns 0 if we succeed, -2 if an internal error.   */
3096 
3097 int
re_compile_fastmap(struct re_pattern_buffer * bufp)3098 re_compile_fastmap(struct re_pattern_buffer *bufp)
3099 {
3100     int j, k;
3101 #ifdef MATCH_MAY_ALLOCATE
3102     fail_stack_type fail_stack;
3103 #endif
3104 #ifndef REGEX_MALLOC
3105     char *destination;
3106 #endif
3107 
3108     register char *fastmap = bufp->fastmap;
3109     unsigned char *pattern = bufp->buffer;
3110     unsigned char *p = pattern;
3111     register unsigned char *pend = pattern + bufp->used;
3112 
3113 #ifdef REL_ALLOC
3114     /* This holds the pointer to the failure stack, when
3115        it is allocated relocatably.  */
3116     fail_stack_elt_t *failure_stack_ptr;
3117 #endif
3118 
3119     /* Assume that each path through the pattern can be null until
3120        proven otherwise.  We set this false at the bottom of switch
3121        statement, to which we get only if a particular path doesn't
3122        match the empty string.  */
3123     boolean path_can_be_null = true;
3124 
3125     /* We aren't doing a `succeed_n' to begin with.  */
3126     boolean succeed_n_p = false;
3127 
3128     assert (fastmap != NULL && p != NULL);
3129 
3130     INIT_FAIL_STACK ();
3131     bzero (fastmap, 1 << BYTEWIDTH);  /* Assume nothing's valid.  */
3132     bufp->fastmap_accurate = 1;     /* It will be when we're done.  */
3133     bufp->can_be_null = 0;
3134 
3135     while (1)
3136         {
3137             if (p == pend || *p == succeed)
3138                 {
3139                     /* We have reached the (effective) end of pattern.  */
3140                     if (!FAIL_STACK_EMPTY ())
3141                         {
3142                             bufp->can_be_null |= path_can_be_null;
3143 
3144                             /* Reset for next path.  */
3145                             path_can_be_null = true;
3146 
3147                             p = fail_stack.stack[--fail_stack.avail].pointer;
3148 
3149                             continue;
3150                         }
3151                     else
3152                         break;
3153                 }
3154 
3155             /* We should never be about to go beyond the end of the pattern.  */
3156             assert (p < pend);
3157 
3158             switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
3159                 {
3160 
3161                     /* I guess the idea here is to simply not bother with a fastmap
3162                        if a backreference is used, since it's too hard to figure out
3163                        the fastmap for the corresponding group.  Setting
3164                        `can_be_null' stops `re_search_2' from using the fastmap, so
3165                        that is all we do.  */
3166                 case duplicate:
3167                     bufp->can_be_null = 1;
3168                     goto done;
3169 
3170 
3171                     /* Following are the cases which match a character.  These end
3172                        with `break'.  */
3173 
3174                 case exactn:
3175                     fastmap[p[1]] = 1;
3176                     break;
3177 
3178 
3179                 case charset:
3180                     for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3181                         if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
3182                             fastmap[j] = 1;
3183                     break;
3184 
3185 
3186                 case charset_not:
3187                     /* Chars beyond end of map must be allowed.  */
3188                     for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3189                         fastmap[j] = 1;
3190 
3191                     for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3192                         if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
3193                             fastmap[j] = 1;
3194                     break;
3195 
3196 
3197                 case wordchar:
3198                     for (j = 0; j < (1 << BYTEWIDTH); j++)
3199                         if (SYNTAX (j) == Sword)
3200                             fastmap[j] = 1;
3201                     break;
3202 
3203 
3204                 case notwordchar:
3205                     for (j = 0; j < (1 << BYTEWIDTH); j++)
3206                         if (SYNTAX (j) != Sword)
3207                             fastmap[j] = 1;
3208                     break;
3209 
3210 
3211                 case anychar:
3212                     {
3213                         int fastmap_newline = fastmap['\n'];
3214 
3215                         /* `.' matches anything ...  */
3216                         for (j = 0; j < (1 << BYTEWIDTH); j++)
3217                             fastmap[j] = 1;
3218 
3219                         /* ... except perhaps newline.  */
3220                         if (!(bufp->syntax & RE_DOT_NEWLINE))
3221                             fastmap['\n'] = fastmap_newline;
3222 
3223                         /* Return if we have already set `can_be_null'; if we have,
3224                            then the fastmap is irrelevant.  Something's wrong here.  */
3225                         else if (bufp->can_be_null)
3226                             goto done;
3227 
3228                         /* Otherwise, have to check alternative paths.  */
3229                         break;
3230                     }
3231 
3232 #ifdef emacs
3233                 case syntaxspec:
3234                     k = *p++;
3235                     for (j = 0; j < (1 << BYTEWIDTH); j++)
3236                         if (SYNTAX (j) == (enum syntaxcode) k)
3237                             fastmap[j] = 1;
3238                     break;
3239 
3240 
3241                 case notsyntaxspec:
3242                     k = *p++;
3243                     for (j = 0; j < (1 << BYTEWIDTH); j++)
3244                         if (SYNTAX (j) != (enum syntaxcode) k)
3245                             fastmap[j] = 1;
3246                     break;
3247 
3248 
3249                     /* All cases after this match the empty string.  These end with
3250                        `continue'.  */
3251 
3252 
3253                 case before_dot:
3254                 case at_dot:
3255                 case after_dot:
3256                     continue;
3257 #endif /* emacs */
3258 
3259 
3260                 case no_op:
3261                 case begline:
3262                 case endline:
3263                 case begbuf:
3264                 case endbuf:
3265                 case wordbound:
3266                 case notwordbound:
3267                 case wordbeg:
3268                 case wordend:
3269                 case push_dummy_failure:
3270                     continue;
3271 
3272 
3273                 case jump_n:
3274                 case pop_failure_jump:
3275                 case maybe_pop_jump:
3276                 case jump:
3277                 case jump_past_alt:
3278                 case dummy_failure_jump:
3279                     EXTRACT_NUMBER_AND_INCR (j, p);
3280                     p += j;
3281                     if (j > 0)
3282                         continue;
3283 
3284                     /* Jump backward implies we just went through the body of a
3285                        loop and matched nothing.  Opcode jumped to should be
3286                        `on_failure_jump' or `succeed_n'.  Just treat it like an
3287                        ordinary jump.  For a * loop, it has pushed its failure
3288                        point already; if so, discard that as redundant.  */
3289                     if ((re_opcode_t) *p != on_failure_jump
3290                         && (re_opcode_t) *p != succeed_n)
3291                         continue;
3292 
3293                     p++;
3294                     EXTRACT_NUMBER_AND_INCR (j, p);
3295                     p += j;
3296 
3297                     /* If what's on the stack is where we are now, pop it.  */
3298                     if (!FAIL_STACK_EMPTY ()
3299                         && fail_stack.stack[fail_stack.avail - 1].pointer == p)
3300                         fail_stack.avail--;
3301 
3302                     continue;
3303 
3304 
3305                 case on_failure_jump:
3306                 case on_failure_keep_string_jump:
3307                 handle_on_failure_jump:
3308                 EXTRACT_NUMBER_AND_INCR (j, p);
3309 
3310                 /* For some patterns, e.g., `(a?)?', `p+j' here points to the
3311                    end of the pattern.  We don't want to push such a point,
3312                    since when we restore it above, entering the switch will
3313                    increment `p' past the end of the pattern.  We don't need
3314                    to push such a point since we obviously won't find any more
3315                    fastmap entries beyond `pend'.  Such a pattern can match
3316                    the null string, though.  */
3317                 if (p + j < pend)
3318                     {
3319                         if (!PUSH_PATTERN_OP (p + j, fail_stack))
3320                             {
3321                                 RESET_FAIL_STACK ();
3322                                 return -2;
3323                             }
3324                     }
3325                 else
3326                     bufp->can_be_null = 1;
3327 
3328                 if (succeed_n_p)
3329                     {
3330                         EXTRACT_NUMBER_AND_INCR (k, p); /* Skip the n.  */
3331                         succeed_n_p = false;
3332                     }
3333 
3334                 continue;
3335 
3336 
3337                 case succeed_n:
3338                     /* Get to the number of times to succeed.  */
3339                     p += 2;
3340 
3341                     /* Increment p past the n for when k != 0.  */
3342                     EXTRACT_NUMBER_AND_INCR (k, p);
3343                     if (k == 0)
3344                         {
3345                             p -= 4;
3346                             succeed_n_p = true;  /* Spaghetti code alert.  */
3347                             goto handle_on_failure_jump;
3348                         }
3349                     continue;
3350 
3351 
3352                 case set_number_at:
3353                     p += 4;
3354                     continue;
3355 
3356 
3357                 case start_memory:
3358                 case stop_memory:
3359                     p += 2;
3360                     continue;
3361 
3362 
3363                 default:
3364                     abort (); /* We have listed all the cases.  */
3365                 } /* switch *p++ */
3366 
3367             /* Getting here means we have found the possible starting
3368                characters for one path of the pattern -- and that the empty
3369                string does not match.  We need not follow this path further.
3370                Instead, look at the next alternative (remembered on the
3371                stack), or quit if no more.  The test at the top of the loop
3372                does these things.  */
3373             path_can_be_null = false;
3374             p = pend;
3375         } /* while p */
3376 
3377     /* Set `can_be_null' for the last path (also the first path, if the
3378        pattern is empty).  */
3379     bufp->can_be_null |= path_can_be_null;
3380 
3381  done:
3382     RESET_FAIL_STACK ();
3383     return 0;
3384 } /* re_compile_fastmap */
3385 
3386 /* Set REGS to hold NUM_REGS registers, storing them in STARTS and
3387    ENDS.  Subsequent matches using PATTERN_BUFFER and REGS will use
3388    this memory for recording register information.  STARTS and ENDS
3389    must be allocated using the malloc library routine, and must each
3390    be at least NUM_REGS * sizeof (regoff_t) bytes long.
3391 
3392    If NUM_REGS == 0, then subsequent matches should allocate their own
3393    register data.
3394 
3395    Unless this function is called, the first search or match using
3396    PATTERN_BUFFER will allocate its own register data, without
3397    freeing the old data.  */
3398 
3399 void
re_set_registers(struct re_pattern_buffer * bufp,struct re_registers * regs,unsigned num_regs,regoff_t * starts,regoff_t * ends)3400 re_set_registers(struct re_pattern_buffer *bufp,
3401                  struct re_registers *regs,
3402                  unsigned num_regs,
3403                  regoff_t *starts,
3404                  regoff_t *ends)
3405 {
3406     if (num_regs)
3407         {
3408             bufp->regs_allocated = REGS_REALLOCATE;
3409             regs->num_regs = num_regs;
3410             regs->start = starts;
3411             regs->end = ends;
3412         }
3413     else
3414         {
3415             bufp->regs_allocated = REGS_UNALLOCATED;
3416             regs->num_regs = 0;
3417             regs->start = regs->end = (regoff_t *) 0;
3418         }
3419 }
3420 
3421 /* Searching routines.  */
3422 
3423 /* Like re_search_2, below, but only one string is specified, and
3424    doesn't let you say where to stop matching. */
3425 
3426 int
re_search(struct re_pattern_buffer * bufp,const char * string,int size,int startpos,int range,struct re_registers * regs)3427 re_search(struct re_pattern_buffer *bufp,
3428           const char *string,
3429           int size,
3430           int startpos,
3431           int range,
3432           struct re_registers *regs)
3433 {
3434     return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
3435                         regs, size);
3436 }
3437 
3438 
3439 /* Using the compiled pattern in BUFP->buffer, first tries to match the
3440    virtual concatenation of STRING1 and STRING2, starting first at index
3441    STARTPOS, then at STARTPOS + 1, and so on.
3442 
3443    STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
3444 
3445    RANGE is how far to scan while trying to match.  RANGE = 0 means try
3446    only at STARTPOS; in general, the last start tried is STARTPOS +
3447    RANGE.
3448 
3449    In REGS, return the indices of the virtual concatenation of STRING1
3450    and STRING2 that matched the entire BUFP->buffer and its contained
3451    subexpressions.
3452 
3453    Do not consider matching one past the index STOP in the virtual
3454    concatenation of STRING1 and STRING2.
3455 
3456    We return either the position in the strings at which the match was
3457    found, -1 if no match, or -2 if error (such as failure
3458    stack overflow).  */
3459 
3460 int
re_search_2(struct re_pattern_buffer * bufp,const char * string1,int size1,const char * string2,int size2,int startpos,int range,struct re_registers * regs,int stop)3461 re_search_2(struct re_pattern_buffer *bufp,
3462             const char *string1,
3463             int size1,
3464             const char *string2,
3465             int size2,
3466             int startpos,
3467             int range,
3468             struct re_registers *regs,
3469             int stop)
3470 {
3471     int val;
3472     register char *fastmap = bufp->fastmap;
3473     register RE_TRANSLATE_TYPE translate = bufp->translate;
3474     int total_size = size1 + size2;
3475     int endpos = startpos + range;
3476 
3477     /* Check for out-of-range STARTPOS.  */
3478     if (startpos < 0 || startpos > total_size)
3479         return -1;
3480 
3481     /* Fix up RANGE if it might eventually take us outside
3482        the virtual concatenation of STRING1 and STRING2.
3483        Make sure we won't move STARTPOS below 0 or above TOTAL_SIZE.  */
3484     if (endpos < 0)
3485         range = 0 - startpos;
3486     else if (endpos > total_size)
3487         range = total_size - startpos;
3488 
3489     /* If the search isn't to be a backwards one, don't waste time in a
3490        search for a pattern that must be anchored.  */
3491     if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
3492         {
3493             if (startpos > 0)
3494                 return -1;
3495             else
3496                 range = 1;
3497         }
3498 
3499 #ifdef emacs
3500     /* In a forward search for something that starts with \=.
3501        don't keep searching past point.  */
3502     if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
3503         {
3504             range = PT - startpos;
3505             if (range <= 0)
3506                 return -1;
3507         }
3508 #endif /* emacs */
3509 
3510     /* Update the fastmap now if not correct already.  */
3511     if (fastmap && !bufp->fastmap_accurate)
3512         if (re_compile_fastmap (bufp) == -2)
3513             return -2;
3514 
3515     /* Loop through the string, looking for a place to start matching.  */
3516     for (;;)
3517         {
3518             /* If a fastmap is supplied, skip quickly over characters that
3519                cannot be the start of a match.  If the pattern can match the
3520                null string, however, we don't need to skip characters; we want
3521                the first null string.  */
3522             if (fastmap && startpos < total_size && !bufp->can_be_null)
3523                 {
3524                     if (range > 0)      /* Searching forwards.  */
3525                         {
3526                             register const char *d;
3527                             register int lim = 0;
3528                             int irange = range;
3529 
3530                             if (startpos < size1 && startpos + range >= size1)
3531                                 lim = range - (size1 - startpos);
3532 
3533                             d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
3534 
3535                             /* Written out as an if-else to avoid testing `translate'
3536                                inside the loop.  */
3537                             if (translate)
3538                                 while (range > lim
3539                                        && !fastmap[(unsigned char)
3540                                                   translate[(unsigned char) *d++]])
3541                                     range--;
3542                             else
3543                                 while (range > lim && !fastmap[(unsigned char) *d++])
3544                                     range--;
3545 
3546                             startpos += irange - range;
3547                         }
3548                     else                                /* Searching backwards.  */
3549                         {
3550                             register char c = (size1 == 0 || startpos >= size1
3551                                                ? string2[startpos - size1]
3552                                                : string1[startpos]);
3553 
3554                             if (!fastmap[(unsigned char) TRANSLATE (c)])
3555                                 goto advance;
3556                         }
3557                 }
3558 
3559             /* If can't match the null string, and that's all we have left, fail.  */
3560             if (range >= 0 && startpos == total_size && fastmap
3561                 && !bufp->can_be_null)
3562                 return -1;
3563 
3564             val = re_match_2_internal (bufp, string1, size1, string2, size2,
3565                                        startpos, regs, stop);
3566 #ifndef REGEX_MALLOC
3567 #ifdef C_ALLOCA
3568             alloca (0);
3569 #endif
3570 #endif
3571 
3572             if (val >= 0)
3573                 return startpos;
3574 
3575             if (val == -2)
3576                 return -2;
3577 
3578         advance:
3579             if (!range)
3580                 break;
3581             else if (range > 0)
3582                 {
3583                     range--;
3584                     startpos++;
3585                 }
3586             else
3587                 {
3588                     range++;
3589                     startpos--;
3590                 }
3591         }
3592     return -1;
3593 } /* re_search_2 */
3594 
3595 /* This converts PTR, a pointer into one of the search strings `string1'
3596    and `string2' into an offset from the beginning of that string.  */
3597 #define POINTER_TO_OFFSET(ptr)                  \
3598   (FIRST_STRING_P (ptr)                         \
3599    ? ((regoff_t) ((ptr) - string1))             \
3600    : ((regoff_t) ((ptr) - string2 + size1)))
3601 
3602 /* Macros for dealing with the split strings in re_match_2.  */
3603 
3604 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
3605 
3606 /* Call before fetching a character with *d.  This switches over to
3607    string2 if necessary.  */
3608 #define PREFETCH()                                                      \
3609   while (d == dend)                                                     \
3610     {                                                                   \
3611       /* End of string2 => fail.  */                                    \
3612       if (dend == end_match_2)                                          \
3613         goto fail;                                                      \
3614       /* End of string1 => advance to string2.  */                      \
3615       d = string2;                                                      \
3616       dend = end_match_2;                                               \
3617     }
3618 
3619 
3620 /* Test if at very beginning or at very end of the virtual concatenation
3621    of `string1' and `string2'.  If only one string, it's `string2'.  */
3622 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
3623 #define AT_STRINGS_END(d) ((d) == end2)
3624 
3625 
3626 /* Test if D points to a character which is word-constituent.  We have
3627    two special cases to check for: if past the end of string1, look at
3628    the first character in string2; and if before the beginning of
3629    string2, look at the last character in string1.  */
3630 #define WORDCHAR_P(d)                                                   \
3631   (SYNTAX ((d) == end1 ? *string2                                       \
3632            : (d) == string2 - 1 ? *(end1 - 1) : *(d))                   \
3633    == Sword)
3634 
3635 /* Disabled due to a compiler bug -- see comment at case wordbound */
3636 #if 0
3637 /* Test if the character before D and the one at D differ with respect
3638    to being word-constituent.  */
3639 #define AT_WORD_BOUNDARY(d)                                             \
3640   (AT_STRINGS_BEG (d) || AT_STRINGS_END (d)                             \
3641    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
3642 #endif
3643 
3644 /* Free everything we malloc.  */
3645 #ifdef MATCH_MAY_ALLOCATE
3646 #define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
3647 #define FREE_VARIABLES()                                                \
3648   do {                                                                  \
3649     REGEX_FREE_STACK (fail_stack.stack);                                \
3650     FREE_VAR ((void*) regstart);                                        \
3651     FREE_VAR ((void*) regend);                                          \
3652     FREE_VAR ((void*) old_regstart);                                    \
3653     FREE_VAR ((void*) old_regend);                                      \
3654     FREE_VAR ((void*) best_regstart);                                   \
3655     FREE_VAR ((void*) best_regend);                                     \
3656     FREE_VAR ((void*) reg_info);                                        \
3657     FREE_VAR ((void*) reg_dummy);                                       \
3658     FREE_VAR ((void*) reg_info_dummy);                                  \
3659   } while (0)
3660 #else
3661 #define FREE_VARIABLES() ((void)0) /* Do nothing!  But inhibit gcc warning.  */
3662 #endif /* not MATCH_MAY_ALLOCATE */
3663 
3664 /* These values must meet several constraints.  They must not be valid
3665    register values; since we have a limit of 255 registers (because
3666    we use only one byte in the pattern for the register number), we can
3667    use numbers larger than 255.  They must differ by 1, because of
3668    NUM_FAILURE_ITEMS above.  And the value for the lowest register must
3669    be larger than the value for the highest register, so we do not try
3670    to actually save any registers when none are active.  */
3671 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
3672 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
3673 
3674 /* Matching routines.  */
3675 
3676 #ifndef emacs   /* Emacs never uses this.  */
3677 /* re_match is like re_match_2 except it takes only a single string.  */
3678 
3679 int
re_match(struct re_pattern_buffer * bufp,const char * string,int size,int pos,struct re_registers * regs)3680 re_match(struct re_pattern_buffer *bufp,
3681          const char *string,
3682          int size,
3683          int pos,
3684          struct re_registers *regs)
3685 {
3686     int result = re_match_2_internal (bufp, NULL, 0, string, size,
3687                                       pos, regs, size);
3688 #ifndef REGEX_MALLOC
3689 #ifdef C_ALLOCA
3690     alloca (0);
3691 #endif
3692 #endif
3693     return result;
3694 }
3695 #endif /* not emacs */
3696 
3697 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
3698                                                     unsigned char *end,
3699                                                     register_info_type *reg_info));
3700 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
3701                                                   unsigned char *end,
3702                                                   register_info_type *reg_info));
3703 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
3704                                                         unsigned char *end,
3705                                                         register_info_type *reg_info));
3706 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
3707                                      int len, char *translate));
3708 
3709 /* re_match_2 matches the compiled pattern in BUFP against the
3710    the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
3711    and SIZE2, respectively).  We start matching at POS, and stop
3712    matching at STOP.
3713 
3714    If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
3715    store offsets for the substring each group matched in REGS.  See the
3716    documentation for exactly how many groups we fill.
3717 
3718    We return -1 if no match, -2 if an internal error (such as the
3719    failure stack overflowing).  Otherwise, we return the length of the
3720    matched substring.  */
3721 
3722 int
re_match_2(struct re_pattern_buffer * bufp,const char * string1,int size1,const char * string2,int size2,int pos,struct re_registers * regs,int stop)3723 re_match_2(struct re_pattern_buffer *bufp,
3724            const char *string1,
3725            int size1,
3726            const char *string2,
3727            int size2,
3728            int pos,
3729            struct re_registers *regs,
3730            int stop)
3731 {
3732     int result = re_match_2_internal (bufp, string1, size1, string2, size2,
3733                                       pos, regs, stop);
3734 #ifndef REGEX_MALLOC
3735 #ifdef C_ALLOCA
3736     alloca (0);
3737 #endif
3738 #endif
3739     return result;
3740 }
3741 
3742 /* This is a separate function so that we can force an alloca cleanup
3743    afterwards.  */
3744 static int
re_match_2_internal(struct re_pattern_buffer * bufp,const char * string1,int size1,const char * string2,int size2,int pos,struct re_registers * regs,int stop)3745 re_match_2_internal(struct re_pattern_buffer *bufp,
3746                     const char *string1,
3747                     int size1,
3748                     const char *string2,
3749                     int size2,
3750                     int pos,
3751                     struct re_registers *regs,
3752                     int stop)
3753 {
3754     /* General temporaries.  */
3755     int mcnt;
3756     unsigned char *p1;
3757 
3758     /* Just past the end of the corresponding string.  */
3759     const char *end1, *end2;
3760 
3761     /* Pointers into string1 and string2, just past the last characters in
3762      each to consider matching.  */
3763     const char *end_match_1, *end_match_2;
3764 
3765     /* Where we are in the data, and the end of the current string.  */
3766     const char *d, *dend;
3767 
3768     /* Where we are in the pattern, and the end of the pattern.  */
3769     unsigned char *p = bufp->buffer;
3770     register unsigned char *pend = p + bufp->used;
3771 
3772   /* Mark the opcode just after a start_memory, so we can test for an
3773      empty subpattern when we get to the stop_memory.  */
3774     unsigned char *just_past_start_mem = 0;
3775 
3776     /* We use this to map every character in the string.  */
3777     RE_TRANSLATE_TYPE translate = bufp->translate;
3778 
3779     /* Failure point stack.  Each place that can handle a failure further
3780      down the line pushes a failure point on this stack.  It consists of
3781      restart, regend, and reg_info for all registers corresponding to
3782      the subexpressions we're currently inside, plus the number of such
3783      registers, and, finally, two char *'s.  The first char * is where
3784      to resume scanning the pattern; the second one is where to resume
3785      scanning the strings.  If the latter is zero, the failure point is
3786      a ``dummy''; if a failure happens and the failure point is a dummy,
3787      it gets discarded and the next next one is tried.  */
3788 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3789     fail_stack_type fail_stack;
3790 #endif
3791 #ifdef DEBUG
3792     static unsigned failure_id = 0;
3793     unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
3794 #endif
3795 
3796 #ifdef REL_ALLOC
3797     /* This holds the pointer to the failure stack, when
3798      it is allocated relocatably.  */
3799     fail_stack_elt_t *failure_stack_ptr;
3800 #endif
3801 
3802     /* We fill all the registers internally, independent of what we
3803      return, for use in backreferences.  The number here includes
3804      an element for register zero.  */
3805     size_t num_regs = bufp->re_nsub + 1;
3806 
3807     /* The currently active registers.  */
3808     active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3809     active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3810 
3811   /* Information on the contents of registers. These are pointers into
3812      the input strings; they record just what was matched (on this
3813      attempt) by a subexpression part of the pattern, that is, the
3814      regnum-th regstart pointer points to where in the pattern we began
3815      matching and the regnum-th regend points to right after where we
3816      stopped matching the regnum-th subexpression.  (The zeroth register
3817      keeps track of what the whole pattern matches.)  */
3818 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3819     const char **regstart, **regend;
3820 #endif
3821 
3822     /* If a group that's operated upon by a repetition operator fails to
3823      match anything, then the register for its start will need to be
3824      restored because it will have been set to wherever in the string we
3825      are when we last see its open-group operator.  Similarly for a
3826      register's end.  */
3827 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3828     const char **old_regstart, **old_regend;
3829 #endif
3830 
3831     /* The is_active field of reg_info helps us keep track of which (possibly
3832      nested) subexpressions we are currently in. The matched_something
3833      field of reg_info[reg_num] helps us tell whether or not we have
3834      matched any of the pattern so far this time through the reg_num-th
3835      subexpression.  These two fields get reset each time through any
3836      loop their register is in.  */
3837 #ifdef MATCH_MAY_ALLOCATE /* otherwise, this is global.  */
3838     register_info_type *reg_info;
3839 #endif
3840 
3841     /* The following record the register info as found in the above
3842      variables when we find a match better than any we've seen before.
3843      This happens as we backtrack through the failure points, which in
3844      turn happens only if we have not yet matched the entire string. */
3845     unsigned best_regs_set = false;
3846 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3847     const char **best_regstart, **best_regend;
3848 #endif
3849 
3850     /* Logically, this is `best_regend[0]'.  But we don't want to have to
3851      allocate space for that if we're not allocating space for anything
3852      else (see below).  Also, we never need info about register 0 for
3853      any of the other register vectors, and it seems rather a kludge to
3854      treat `best_regend' differently than the rest.  So we keep track of
3855      the end of the best match so far in a separate variable.  We
3856      initialize this to NULL so that when we backtrack the first time
3857      and need to test it, it's not garbage.  */
3858     const char *match_end = NULL;
3859 
3860     /* This helps SET_REGS_MATCHED avoid doing redundant work.  */
3861     int set_regs_matched_done = 0;
3862 
3863     /* Used when we pop values we don't care about.  */
3864 #ifdef MATCH_MAY_ALLOCATE /* otherwise, these are global.  */
3865     const char **reg_dummy;
3866     register_info_type *reg_info_dummy;
3867 #endif
3868 
3869 #ifdef DEBUG
3870     /* Counts the total number of registers pushed.  */
3871     unsigned num_regs_pushed = 0;
3872 #endif
3873 
3874     DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
3875 
3876     INIT_FAIL_STACK ();
3877 
3878 #ifdef MATCH_MAY_ALLOCATE
3879     /* Do not bother to initialize all the register variables if there are
3880      no groups in the pattern, as it takes a fair amount of time.  If
3881      there are groups, we include space for register 0 (the whole
3882      pattern), even though we never use it, since it simplifies the
3883      array indexing.  We should fix this.  */
3884     if (bufp->re_nsub)
3885         {
3886             regstart = REGEX_TALLOC (num_regs, const char *);
3887             regend = REGEX_TALLOC (num_regs, const char *);
3888             old_regstart = REGEX_TALLOC (num_regs, const char *);
3889             old_regend = REGEX_TALLOC (num_regs, const char *);
3890             best_regstart = REGEX_TALLOC (num_regs, const char *);
3891             best_regend = REGEX_TALLOC (num_regs, const char *);
3892             reg_info = REGEX_TALLOC (num_regs, register_info_type);
3893             reg_dummy = REGEX_TALLOC (num_regs, const char *);
3894             reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
3895 
3896             if (!(regstart && regend && old_regstart && old_regend && reg_info
3897                   && best_regstart && best_regend && reg_dummy && reg_info_dummy))
3898                 {
3899                     FREE_VARIABLES ();
3900                     return -2;
3901                 }
3902         }
3903     else
3904         {
3905             /* We must initialize all our variables to NULL, so that
3906                `FREE_VARIABLES' doesn't try to free them.  */
3907             regstart = regend = old_regstart = old_regend = best_regstart
3908                 = best_regend = reg_dummy = NULL;
3909             reg_info = reg_info_dummy = (register_info_type *) NULL;
3910         }
3911 #endif /* MATCH_MAY_ALLOCATE */
3912 
3913     /* The starting position is bogus.  */
3914     if (pos < 0 || pos > size1 + size2)
3915         {
3916             FREE_VARIABLES ();
3917             return -1;
3918         }
3919 
3920     /* Initialize subexpression text positions to -1 to mark ones that no
3921      start_memory/stop_memory has been seen for. Also initialize the
3922      register information struct.  */
3923     for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
3924         {
3925             regstart[mcnt] = regend[mcnt]
3926                 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
3927 
3928             REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3929             IS_ACTIVE (reg_info[mcnt]) = 0;
3930             MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3931             EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3932         }
3933 
3934     /* We move `string1' into `string2' if the latter's empty -- but not if
3935      `string1' is null.  */
3936     if (size2 == 0 && string1 != NULL)
3937         {
3938             string2 = string1;
3939             size2 = size1;
3940             string1 = 0;
3941             size1 = 0;
3942         }
3943     end1 = string1 + size1;
3944     end2 = string2 + size2;
3945 
3946     /* Compute where to stop matching, within the two strings.  */
3947     if (stop <= size1)
3948         {
3949             end_match_1 = string1 + stop;
3950             end_match_2 = string2;
3951         }
3952     else
3953         {
3954             end_match_1 = end1;
3955             end_match_2 = string2 + stop - size1;
3956         }
3957 
3958     /* `p' scans through the pattern as `d' scans through the data.
3959      `dend' is the end of the input string that `d' points within.  `d'
3960      is advanced into the following input string whenever necessary, but
3961      this happens before fetching; therefore, at the beginning of the
3962      loop, `d' can be pointing at the end of a string, but it cannot
3963      equal `string2'.  */
3964     if (size1 > 0 && pos <= size1)
3965         {
3966             d = string1 + pos;
3967             dend = end_match_1;
3968         }
3969     else
3970         {
3971             d = string2 + pos - size1;
3972             dend = end_match_2;
3973         }
3974 
3975     DEBUG_PRINT1 ("The compiled pattern is:\n");
3976     DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
3977     DEBUG_PRINT1 ("The string to match is: `");
3978     DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
3979     DEBUG_PRINT1 ("'\n");
3980 
3981   /* This loops over pattern commands.  It exits by returning from the
3982      function if the match is complete, or it drops through if the match
3983      fails at this starting point in the input data.  */
3984     for (;;)
3985         {
3986 #ifdef _LIBC
3987             DEBUG_PRINT2 ("\n%p: ", p);
3988 #else
3989             DEBUG_PRINT2 ("\n0x%x: ", p);
3990 #endif
3991 
3992             if (p == pend)
3993                 { /* End of pattern means we might have succeeded.  */
3994                     DEBUG_PRINT1 ("end of pattern ... ");
3995 
3996                     /* If we haven't matched the entire string, and we want the
3997                        longest match, try backtracking.  */
3998                     if (d != end_match_2)
3999                         {
4000                             /* 1 if this match ends in the same string (string1 or string2)
4001                                as the best previous match.  */
4002                             boolean same_str_p = (FIRST_STRING_P (match_end)
4003                                                   == MATCHING_IN_FIRST_STRING);
4004                             /* 1 if this match is the best seen so far.  */
4005                             boolean best_match_p;
4006 
4007                             /* AIX compiler got confused when this was combined
4008                                with the previous declaration.  */
4009                             if (same_str_p)
4010                                 best_match_p = d > match_end;
4011                             else
4012                                 best_match_p = !MATCHING_IN_FIRST_STRING;
4013 
4014                             DEBUG_PRINT1 ("backtracking.\n");
4015 
4016                             if (!FAIL_STACK_EMPTY ())
4017                                 { /* More failure points to try.  */
4018 
4019                                     /* If exceeds best match so far, save it.  */
4020                                     if (!best_regs_set || best_match_p)
4021                                         {
4022                                             best_regs_set = true;
4023                                             match_end = d;
4024 
4025                                             DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
4026 
4027                                             for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4028                                                 {
4029                                                     best_regstart[mcnt] = regstart[mcnt];
4030                                                     best_regend[mcnt] = regend[mcnt];
4031                                                 }
4032                                         }
4033                                     goto fail;
4034                                 }
4035 
4036                             /* If no failure points, don't restore garbage.  And if
4037                                last match is real best match, don't restore second
4038                                best one. */
4039                             else if (best_regs_set && !best_match_p)
4040                                 {
4041                                 restore_best_regs:
4042                                     /* Restore best match.  It may happen that `dend ==
4043                                        end_match_1' while the restored d is in string2.
4044                                        For example, the pattern `x.*y.*z' against the
4045                                        strings `x-' and `y-z-', if the two strings are
4046                                        not consecutive in memory.  */
4047                                     DEBUG_PRINT1 ("Restoring best registers.\n");
4048 
4049                                     d = match_end;
4050                                     dend = ((d >= string1 && d <= end1)
4051                                             ? end_match_1 : end_match_2);
4052 
4053                                     for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
4054                                         {
4055                                             regstart[mcnt] = best_regstart[mcnt];
4056                                             regend[mcnt] = best_regend[mcnt];
4057                                         }
4058                                 }
4059                         } /* d != end_match_2 */
4060 
4061                 succeed_label:
4062                     DEBUG_PRINT1 ("Accepting match.\n");
4063 
4064                     /* If caller wants register contents data back, do it.  */
4065                     if (regs && !bufp->no_sub)
4066                         {
4067                             /* Have the register data arrays been allocated?  */
4068                             if (bufp->regs_allocated == REGS_UNALLOCATED)
4069                                 { /* No.  So allocate them with malloc.  We need one
4070                                      extra element beyond `num_regs' for the `-1' marker
4071                                      GNU code uses.  */
4072                                     regs->num_regs = MAX (RE_NREGS, num_regs + 1);
4073                                     regs->start = TALLOC (regs->num_regs, regoff_t);
4074                                     regs->end = TALLOC (regs->num_regs, regoff_t);
4075                                     if (regs->start == NULL || regs->end == NULL)
4076                                         {
4077                                             FREE_VARIABLES ();
4078                                             return -2;
4079                                         }
4080                                     bufp->regs_allocated = REGS_REALLOCATE;
4081                                 }
4082                             else if (bufp->regs_allocated == REGS_REALLOCATE)
4083                                 { /* Yes.  If we need more elements than were already
4084                                      allocated, reallocate them.  If we need fewer, just
4085                                      leave it alone.  */
4086                                     if (regs->num_regs < num_regs + 1)
4087                                         {
4088                                             regs->num_regs = num_regs + 1;
4089                                             RETALLOC (regs->start, regs->num_regs, regoff_t);
4090                                             RETALLOC (regs->end, regs->num_regs, regoff_t);
4091                                             if (regs->start == NULL || regs->end == NULL)
4092                                                 {
4093                                                     FREE_VARIABLES ();
4094                                                     return -2;
4095                                                 }
4096                                         }
4097                                 }
4098                             else
4099                                 {
4100                                     /* These braces fend off a "empty body in an else-statement"
4101                                        warning under GCC when assert expands to nothing.  */
4102                                     assert (bufp->regs_allocated == REGS_FIXED);
4103                                 }
4104 
4105                             /* Convert the pointer data in `regstart' and `regend' to
4106                                indices.  Register zero has to be set differently,
4107                                since we haven't kept track of any info for it.  */
4108                             if (regs->num_regs > 0)
4109                                 {
4110                                     regs->start[0] = pos;
4111                                     regs->end[0] = (MATCHING_IN_FIRST_STRING
4112                                                     ? ((regoff_t) (d - string1))
4113                                                     : ((regoff_t) (d - string2 + size1)));
4114                                 }
4115 
4116                             /* Go through the first `min (num_regs, regs->num_regs)'
4117                                registers, since that is all we initialized.  */
4118                             for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
4119                                  mcnt++)
4120                                 {
4121                                     if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
4122                                         regs->start[mcnt] = regs->end[mcnt] = -1;
4123                                     else
4124                                         {
4125                                             regs->start[mcnt]
4126                                                 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
4127                                             regs->end[mcnt]
4128                                                 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
4129                                         }
4130                                 }
4131 
4132                             /* If the regs structure we return has more elements than
4133                                were in the pattern, set the extra elements to -1.  If
4134                                we (re)allocated the registers, this is the case,
4135                                because we always allocate enough to have at least one
4136                                -1 at the end.  */
4137                             for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
4138                                 regs->start[mcnt] = regs->end[mcnt] = -1;
4139                         } /* regs && !bufp->no_sub */
4140 
4141                     DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
4142                                   nfailure_points_pushed, nfailure_points_popped,
4143                                   nfailure_points_pushed - nfailure_points_popped);
4144                     DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
4145 
4146                     mcnt = d - pos - (MATCHING_IN_FIRST_STRING
4147                                       ? string1
4148                                       : string2 - size1);
4149 
4150                     DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
4151 
4152                     FREE_VARIABLES ();
4153                     return mcnt;
4154                 }
4155 
4156             /* Otherwise match next pattern command.  */
4157             switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
4158                 {
4159                     /* Ignore these.  Used to ignore the n of succeed_n's which
4160                        currently have n == 0.  */
4161                 case no_op:
4162                     DEBUG_PRINT1 ("EXECUTING no_op.\n");
4163                     break;
4164 
4165                 case succeed:
4166                     DEBUG_PRINT1 ("EXECUTING succeed.\n");
4167                     goto succeed_label;
4168 
4169                     /* Match the next n pattern characters exactly.  The following
4170                        byte in the pattern defines n, and the n bytes after that
4171                        are the characters to match.  */
4172                 case exactn:
4173                     mcnt = *p++;
4174                     DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
4175 
4176                     /* This is written out as an if-else so we don't waste time
4177                        testing `translate' inside the loop.  */
4178                     if (translate)
4179                         {
4180                             do
4181                                 {
4182                                     PREFETCH ();
4183                                     if ((unsigned char) translate[(unsigned char) *d++]
4184                                         != (unsigned char) *p++)
4185                                         goto fail;
4186                                 }
4187                             while (--mcnt);
4188                         }
4189                     else
4190                         {
4191                             do
4192                                 {
4193                                     PREFETCH ();
4194                                     if (*d++ != (char) *p++) goto fail;
4195                                 }
4196                             while (--mcnt);
4197                         }
4198                     SET_REGS_MATCHED ();
4199                     break;
4200 
4201 
4202                     /* Match any character except possibly a newline or a null.  */
4203                 case anychar:
4204                     DEBUG_PRINT1 ("EXECUTING anychar.\n");
4205 
4206                     PREFETCH ();
4207 
4208                     if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
4209                         || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
4210                         goto fail;
4211 
4212                     SET_REGS_MATCHED ();
4213                     DEBUG_PRINT2 ("  Matched `%d'.\n", *d);
4214                     d++;
4215                     break;
4216 
4217 
4218                 case charset:
4219                 case charset_not:
4220                     {
4221                         register unsigned char c;
4222                         boolean not = (re_opcode_t) *(p - 1) == charset_not;
4223 
4224                         DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
4225 
4226                         PREFETCH ();
4227                         c = TRANSLATE (*d); /* The character to match.  */
4228 
4229                         /* Cast to `unsigned' instead of `unsigned char' in case the
4230                            bit list is a full 32 bytes long.  */
4231                         if (c < (unsigned) (*p * BYTEWIDTH)
4232                             && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4233                             not = !not;
4234 
4235                         p += 1 + *p;
4236 
4237                         if (!not) goto fail;
4238 
4239                         SET_REGS_MATCHED ();
4240                         d++;
4241                         break;
4242                     }
4243 
4244 
4245                     /* The beginning of a group is represented by start_memory.
4246                        The arguments are the register number in the next byte, and the
4247                        number of groups inner to this one in the next.  The text
4248                        matched within the group is recorded (in the internal
4249                        registers data structure) under the register number.  */
4250                 case start_memory:
4251                     DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
4252 
4253                     /* Find out if this group can match the empty string.  */
4254                     p1 = p;             /* To send to group_match_null_string_p.  */
4255 
4256                     if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
4257                         REG_MATCH_NULL_STRING_P (reg_info[*p])
4258                             = group_match_null_string_p (&p1, pend, reg_info);
4259 
4260                     /* Save the position in the string where we were the last time
4261                        we were at this open-group operator in case the group is
4262                        operated upon by a repetition operator, e.g., with `(a*)*b'
4263                        against `ab'; then we want to ignore where we are now in
4264                        the string in case this attempt to match fails.  */
4265                     old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4266                         ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
4267                         : regstart[*p];
4268                     DEBUG_PRINT2 ("  old_regstart: %d\n",
4269                                   POINTER_TO_OFFSET (old_regstart[*p]));
4270 
4271                     regstart[*p] = d;
4272                     DEBUG_PRINT2 ("  regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
4273 
4274                     IS_ACTIVE (reg_info[*p]) = 1;
4275                     MATCHED_SOMETHING (reg_info[*p]) = 0;
4276 
4277                     /* Clear this whenever we change the register activity status.  */
4278                     set_regs_matched_done = 0;
4279 
4280                     /* This is the new highest active register.  */
4281                     highest_active_reg = *p;
4282 
4283                     /* If nothing was active before, this is the new lowest active
4284                        register.  */
4285                     if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4286                         lowest_active_reg = *p;
4287 
4288                     /* Move past the register number and inner group count.  */
4289                     p += 2;
4290                     just_past_start_mem = p;
4291 
4292                     break;
4293 
4294 
4295                     /* The stop_memory opcode represents the end of a group.  Its
4296                        arguments are the same as start_memory's: the register
4297                        number, and the number of inner groups.  */
4298                 case stop_memory:
4299                     DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
4300 
4301                     /* We need to save the string position the last time we were at
4302                        this close-group operator in case the group is operated
4303                        upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
4304                        against `aba'; then we want to ignore where we are now in
4305                        the string in case this attempt to match fails.  */
4306                     old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
4307                         ? REG_UNSET (regend[*p]) ? d : regend[*p]
4308                         : regend[*p];
4309                     DEBUG_PRINT2 ("      old_regend: %d\n",
4310                                   POINTER_TO_OFFSET (old_regend[*p]));
4311 
4312                     regend[*p] = d;
4313                     DEBUG_PRINT2 ("      regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
4314 
4315                     /* This register isn't active anymore.  */
4316                     IS_ACTIVE (reg_info[*p]) = 0;
4317 
4318                     /* Clear this whenever we change the register activity status.  */
4319                     set_regs_matched_done = 0;
4320 
4321                     /* If this was the only register active, nothing is active
4322                        anymore.  */
4323                     if (lowest_active_reg == highest_active_reg)
4324                         {
4325                             lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4326                             highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4327                         }
4328                     else
4329                         { /* We must scan for the new highest active register, since
4330                              it isn't necessarily one less than now: consider
4331                              (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
4332                              new highest active register is 1.  */
4333                             unsigned char r = *p - 1;
4334                             while (r > 0 && !IS_ACTIVE (reg_info[r]))
4335                                 r--;
4336 
4337                             /* If we end up at register zero, that means that we saved
4338                                the registers as the result of an `on_failure_jump', not
4339                                a `start_memory', and we jumped to past the innermost
4340                                `stop_memory'.  For example, in ((.)*) we save
4341                                registers 1 and 2 as a result of the *, but when we pop
4342                                back to the second ), we are at the stop_memory 1.
4343                                Thus, nothing is active.  */
4344                             if (r == 0)
4345                                 {
4346                                     lowest_active_reg = NO_LOWEST_ACTIVE_REG;
4347                                     highest_active_reg = NO_HIGHEST_ACTIVE_REG;
4348                                 }
4349                             else
4350                                 highest_active_reg = r;
4351                         }
4352 
4353                     /* If just failed to match something this time around with a
4354                        group that's operated on by a repetition operator, try to
4355                        force exit from the ``loop'', and restore the register
4356                        information for this group that we had before trying this
4357                        last match.  */
4358                     if ((!MATCHED_SOMETHING (reg_info[*p])
4359                          || just_past_start_mem == p - 1)
4360                         && (p + 2) < pend)
4361                         {
4362                             boolean is_a_jump_n = false;
4363 
4364                             p1 = p + 2;
4365                             mcnt = 0;
4366                             switch ((re_opcode_t) *p1++)
4367                                 {
4368                                 case jump_n:
4369                                     is_a_jump_n = true;
4370                                 case pop_failure_jump:
4371                                 case maybe_pop_jump:
4372                                 case jump:
4373                                 case dummy_failure_jump:
4374                                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4375                                     if (is_a_jump_n)
4376                                         p1 += 2;
4377                                     break;
4378 
4379                                 default:
4380                                     /* do nothing */ ;
4381                                 }
4382                             p1 += mcnt;
4383 
4384                             /* If the next operation is a jump backwards in the pattern
4385                                to an on_failure_jump right before the start_memory
4386                                corresponding to this stop_memory, exit from the loop
4387                                by forcing a failure after pushing on the stack the
4388                                on_failure_jump's jump in the pattern, and d.  */
4389                             if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
4390                                 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
4391                                 {
4392                                     /* If this group ever matched anything, then restore
4393                                        what its registers were before trying this last
4394                                        failed match, e.g., with `(a*)*b' against `ab' for
4395                                        regstart[1], and, e.g., with `((a*)*(b*)*)*'
4396                                        against `aba' for regend[3].
4397 
4398                                        Also restore the registers for inner groups for,
4399                                        e.g., `((a*)(b*))*' against `aba' (register 3 would
4400                                        otherwise get trashed).  */
4401 
4402                                     if (EVER_MATCHED_SOMETHING (reg_info[*p]))
4403                                         {
4404                                             unsigned r;
4405 
4406                                             EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
4407 
4408                                             /* Restore this and inner groups' (if any) registers.  */
4409                                             for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
4410                                                  r++)
4411                                                 {
4412                                                     regstart[r] = old_regstart[r];
4413 
4414                                                     /* xx why this test?  */
4415                                                     if (old_regend[r] >= regstart[r])
4416                                                         regend[r] = old_regend[r];
4417                                                 }
4418                                         }
4419                                     p1++;
4420                                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4421                                     PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
4422 
4423                                     goto fail;
4424                                 }
4425                         }
4426 
4427                     /* Move past the register number and the inner group count.  */
4428                     p += 2;
4429                     break;
4430 
4431 
4432                     /* \<digit> has been turned into a `duplicate' command which is
4433                        followed by the numeric value of <digit> as the register number.  */
4434                 case duplicate:
4435                     {
4436                         register const char *d2, *dend2;
4437                         int regno = *p++;   /* Get which register to match against.  */
4438                         DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
4439 
4440                         /* Can't back reference a group which we've never matched.  */
4441                         if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
4442                             goto fail;
4443 
4444                         /* Where in input to try to start matching.  */
4445                         d2 = regstart[regno];
4446 
4447                         /* Where to stop matching; if both the place to start and
4448                            the place to stop matching are in the same string, then
4449                            set to the place to stop, otherwise, for now have to use
4450                            the end of the first string.  */
4451 
4452                         dend2 = ((FIRST_STRING_P (regstart[regno])
4453                                   == FIRST_STRING_P (regend[regno]))
4454                                  ? regend[regno] : end_match_1);
4455                         for (;;)
4456                             {
4457                                 /* If necessary, advance to next segment in register
4458                                    contents.  */
4459                                 while (d2 == dend2)
4460                                     {
4461                                         if (dend2 == end_match_2) break;
4462                                         if (dend2 == regend[regno]) break;
4463 
4464                                         /* End of string1 => advance to string2. */
4465                                         d2 = string2;
4466                                         dend2 = regend[regno];
4467                                     }
4468                                 /* At end of register contents => success */
4469                                 if (d2 == dend2) break;
4470 
4471                                 /* If necessary, advance to next segment in data.  */
4472                                 PREFETCH ();
4473 
4474                                 /* How many characters left in this segment to match.  */
4475                                 mcnt = dend - d;
4476 
4477                                 /* Want how many consecutive characters we can match in
4478                                    one shot, so, if necessary, adjust the count.  */
4479                                 if (mcnt > dend2 - d2)
4480                                     mcnt = dend2 - d2;
4481 
4482                                 /* Compare that many; failure if mismatch, else move
4483                                    past them.  */
4484                                 if (translate
4485                                     ? bcmp_translate (d, d2, mcnt, translate)
4486                                     : bcmp (d, d2, mcnt))
4487                                     goto fail;
4488                                 d += mcnt, d2 += mcnt;
4489 
4490                                 /* Do this because we've match some characters.  */
4491                                 SET_REGS_MATCHED ();
4492                             }
4493                     }
4494                     break;
4495 
4496 
4497                     /* begline matches the empty string at the beginning of the string
4498                        (unless `not_bol' is set in `bufp'), and, if
4499                        `newline_anchor' is set, after newlines.  */
4500                 case begline:
4501                     DEBUG_PRINT1 ("EXECUTING begline.\n");
4502 
4503                     if (AT_STRINGS_BEG (d))
4504                         {
4505                             if (!bufp->not_bol) break;
4506                         }
4507                     else if (d[-1] == '\n' && bufp->newline_anchor)
4508                         {
4509                             break;
4510                         }
4511                     /* In all other cases, we fail.  */
4512                     goto fail;
4513 
4514 
4515                     /* endline is the dual of begline.  */
4516                 case endline:
4517                     DEBUG_PRINT1 ("EXECUTING endline.\n");
4518 
4519                     if (AT_STRINGS_END (d))
4520                         {
4521                             if (!bufp->not_eol) break;
4522                         }
4523 
4524                     /* We have to ``prefetch'' the next character.  */
4525                     else if ((d == end1 ? *string2 : *d) == '\n'
4526                              && bufp->newline_anchor)
4527                         {
4528                             break;
4529                         }
4530                     goto fail;
4531 
4532 
4533                     /* Match at the very beginning of the data.  */
4534                 case begbuf:
4535                     DEBUG_PRINT1 ("EXECUTING begbuf.\n");
4536                     if (AT_STRINGS_BEG (d))
4537                         break;
4538                     goto fail;
4539 
4540 
4541                     /* Match at the very end of the data.  */
4542                 case endbuf:
4543                     DEBUG_PRINT1 ("EXECUTING endbuf.\n");
4544                     if (AT_STRINGS_END (d))
4545                         break;
4546                     goto fail;
4547 
4548 
4549                     /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
4550                        pushes NULL as the value for the string on the stack.  Then
4551                        `pop_failure_point' will keep the current value for the
4552                        string, instead of restoring it.  To see why, consider
4553                        matching `foo\nbar' against `.*\n'.  The .* matches the foo;
4554                        then the . fails against the \n.  But the next thing we want
4555                        to do is match the \n against the \n; if we restored the
4556                        string value, we would be back at the foo.
4557 
4558                        Because this is used only in specific cases, we don't need to
4559                        check all the things that `on_failure_jump' does, to make
4560                        sure the right things get saved on the stack.  Hence we don't
4561                        share its code.  The only reason to push anything on the
4562                        stack at all is that otherwise we would have to change
4563                        `anychar's code to do something besides goto fail in this
4564                        case; that seems worse than this.  */
4565                 case on_failure_keep_string_jump:
4566                     DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
4567 
4568                     EXTRACT_NUMBER_AND_INCR (mcnt, p);
4569 #ifdef _LIBC
4570                     DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
4571 #else
4572                     DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
4573 #endif
4574 
4575                     PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
4576                     break;
4577 
4578 
4579                     /* Uses of on_failure_jump:
4580 
4581                        Each alternative starts with an on_failure_jump that points
4582                        to the beginning of the next alternative.  Each alternative
4583                        except the last ends with a jump that in effect jumps past
4584                        the rest of the alternatives.  (They really jump to the
4585                        ending jump of the following alternative, because tensioning
4586                        these jumps is a hassle.)
4587 
4588                        Repeats start with an on_failure_jump that points past both
4589                        the repetition text and either the following jump or
4590                        pop_failure_jump back to this on_failure_jump.  */
4591                 case on_failure_jump:
4592                 on_failure:
4593                 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
4594 
4595                 EXTRACT_NUMBER_AND_INCR (mcnt, p);
4596 #ifdef _LIBC
4597                 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
4598 #else
4599                 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
4600 #endif
4601 
4602                 /* If this on_failure_jump comes right before a group (i.e.,
4603                    the original * applied to a group), save the information
4604                    for that group and all inner ones, so that if we fail back
4605                    to this point, the group's information will be correct.
4606                    For example, in \(a*\)*\1, we need the preceding group,
4607                    and in \(zz\(a*\)b*\)\2, we need the inner group.  */
4608 
4609                 /* We can't use `p' to check ahead because we push
4610                    a failure point to `p + mcnt' after we do this.  */
4611                 p1 = p;
4612 
4613                 /* We need to skip no_op's before we look for the
4614                    start_memory in case this on_failure_jump is happening as
4615                    the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
4616                    against aba.  */
4617                 while (p1 < pend && (re_opcode_t) *p1 == no_op)
4618                     p1++;
4619 
4620                 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
4621                     {
4622                         /* We have a new highest active register now.  This will
4623                            get reset at the start_memory we are about to get to,
4624                            but we will have saved all the registers relevant to
4625                            this repetition op, as described above.  */
4626                         highest_active_reg = *(p1 + 1) + *(p1 + 2);
4627                         if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
4628                             lowest_active_reg = *(p1 + 1);
4629                     }
4630 
4631                 DEBUG_PRINT1 (":\n");
4632                 PUSH_FAILURE_POINT (p + mcnt, d, -2);
4633                 break;
4634 
4635 
4636                 /* A smart repeat ends with `maybe_pop_jump'.
4637                    We change it to either `pop_failure_jump' or `jump'.  */
4638                 case maybe_pop_jump:
4639                     EXTRACT_NUMBER_AND_INCR (mcnt, p);
4640                     DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
4641                     {
4642                         register unsigned char *p2 = p;
4643 
4644                         /* Compare the beginning of the repeat with what in the
4645                            pattern follows its end. If we can establish that there
4646                            is nothing that they would both match, i.e., that we
4647                            would have to backtrack because of (as in, e.g., `a*a')
4648                            then we can change to pop_failure_jump, because we'll
4649                            never have to backtrack.
4650 
4651                            This is not true in the case of alternatives: in
4652                            `(a|ab)*' we do need to backtrack to the `ab' alternative
4653                            (e.g., if the string was `ab').  But instead of trying to
4654                            detect that here, the alternative has put on a dummy
4655                            failure point which is what we will end up popping.  */
4656 
4657                         /* Skip over open/close-group commands.
4658                            If what follows this loop is a ...+ construct,
4659                            look at what begins its body, since we will have to
4660                            match at least one of that.  */
4661                         while (1)
4662                             {
4663                                 if (p2 + 2 < pend
4664                                     && ((re_opcode_t) *p2 == stop_memory
4665                                         || (re_opcode_t) *p2 == start_memory))
4666                                     p2 += 3;
4667                                 else if (p2 + 6 < pend
4668                                          && (re_opcode_t) *p2 == dummy_failure_jump)
4669                                     p2 += 6;
4670                                 else
4671                                     break;
4672                             }
4673 
4674                         p1 = p + mcnt;
4675                         /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
4676                            to the `maybe_finalize_jump' of this case.  Examine what
4677                            follows.  */
4678 
4679                         /* If we're at the end of the pattern, we can change.  */
4680                         if (p2 == pend)
4681                             {
4682                                 /* Consider what happens when matching ":\(.*\)"
4683                                    against ":/".  I don't really understand this code
4684                                    yet.  */
4685                                 p[-3] = (unsigned char) pop_failure_jump;
4686                                 DEBUG_PRINT1
4687                                     ("  End of pattern: change to `pop_failure_jump'.\n");
4688                             }
4689 
4690                         else if ((re_opcode_t) *p2 == exactn
4691                                  || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
4692                             {
4693                                 register unsigned char c
4694                                     = *p2 == (unsigned char) endline ? '\n' : p2[2];
4695 
4696                                 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
4697                                     {
4698                                         p[-3] = (unsigned char) pop_failure_jump;
4699                                         DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4700                                                       c, p1[5]);
4701                                     }
4702 
4703                                 else if ((re_opcode_t) p1[3] == charset
4704                                          || (re_opcode_t) p1[3] == charset_not)
4705                                     {
4706                                         int not = (re_opcode_t) p1[3] == charset_not;
4707 
4708                                         if (c < (unsigned char) (p1[4] * BYTEWIDTH)
4709                                             && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
4710                                             not = !not;
4711 
4712                                         /* `not' is equal to 1 if c would match, which means
4713                                            that we can't change to pop_failure_jump.  */
4714                                         if (!not)
4715                                             {
4716                                                 p[-3] = (unsigned char) pop_failure_jump;
4717                                                 DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4718                                             }
4719                                     }
4720                             }
4721                         else if ((re_opcode_t) *p2 == charset)
4722                             {
4723 #ifdef DEBUG
4724                                 register unsigned char c
4725                                     = *p2 == (unsigned char) endline ? '\n' : p2[2];
4726 #endif
4727 
4728 #if 0
4729                                 if ((re_opcode_t) p1[3] == exactn
4730                                     && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
4731                                           && (p2[2 + p1[5] / BYTEWIDTH]
4732                                               & (1 << (p1[5] % BYTEWIDTH)))))
4733 #else
4734                                     if ((re_opcode_t) p1[3] == exactn
4735                                         && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
4736                                               && (p2[2 + p1[4] / BYTEWIDTH]
4737                                                   & (1 << (p1[4] % BYTEWIDTH)))))
4738 #endif
4739                                         {
4740                                             p[-3] = (unsigned char) pop_failure_jump;
4741                                             DEBUG_PRINT3 ("  %c != %c => pop_failure_jump.\n",
4742                                                           c, p1[5]);
4743                                         }
4744 
4745                                     else if ((re_opcode_t) p1[3] == charset_not)
4746                                         {
4747                                             int idx;
4748                                             /* We win if the charset_not inside the loop
4749                                                lists every character listed in the charset after.  */
4750                                             for (idx = 0; idx < (int) p2[1]; idx++)
4751                                                 if (! (p2[2 + idx] == 0
4752                                                        || (idx < (int) p1[4]
4753                                                            && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
4754                                                     break;
4755 
4756                                             if (idx == p2[1])
4757                                                 {
4758                                                     p[-3] = (unsigned char) pop_failure_jump;
4759                                                     DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4760                                                 }
4761                                         }
4762                                     else if ((re_opcode_t) p1[3] == charset)
4763                                         {
4764                                             int idx;
4765                                             /* We win if the charset inside the loop
4766                                                has no overlap with the one after the loop.  */
4767                                             for (idx = 0;
4768                                                  idx < (int) p2[1] && idx < (int) p1[4];
4769                                                  idx++)
4770                                                 if ((p2[2 + idx] & p1[5 + idx]) != 0)
4771                                                     break;
4772 
4773                                             if (idx == p2[1] || idx == p1[4])
4774                                                 {
4775                                                     p[-3] = (unsigned char) pop_failure_jump;
4776                                                     DEBUG_PRINT1 ("  No match => pop_failure_jump.\n");
4777                                                 }
4778                                         }
4779                             }
4780                     }
4781                     p -= 2;             /* Point at relative address again.  */
4782                     if ((re_opcode_t) p[-1] != pop_failure_jump)
4783                         {
4784                             p[-1] = (unsigned char) jump;
4785                             DEBUG_PRINT1 ("  Match => jump.\n");
4786                             goto unconditional_jump;
4787                         }
4788                     /* Note fall through.  */
4789 
4790 
4791                     /* The end of a simple repeat has a pop_failure_jump back to
4792                        its matching on_failure_jump, where the latter will push a
4793                        failure point.  The pop_failure_jump takes off failure
4794                        points put on by this pop_failure_jump's matching
4795                        on_failure_jump; we got through the pattern to here from the
4796                        matching on_failure_jump, so didn't fail.  */
4797                 case pop_failure_jump:
4798                     {
4799                         /* We need to pass separate storage for the lowest and
4800                            highest registers, even though we don't care about the
4801                            actual values.  Otherwise, we will restore only one
4802                            register from the stack, since lowest will == highest in
4803                            `pop_failure_point'.  */
4804                         active_reg_t dummy_low_reg, dummy_high_reg;
4805                         unsigned char *pdummy;
4806                         const char *sdummy;
4807 
4808                         DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
4809                         POP_FAILURE_POINT (sdummy, pdummy,
4810                                            dummy_low_reg, dummy_high_reg,
4811                                            reg_dummy, reg_dummy, reg_info_dummy);
4812                     }
4813                     /* Note fall through.  */
4814 
4815                 unconditional_jump:
4816 #ifdef _LIBC
4817                 DEBUG_PRINT2 ("\n%p: ", p);
4818 #else
4819                 DEBUG_PRINT2 ("\n0x%x: ", p);
4820 #endif
4821                 /* Note fall through.  */
4822 
4823                 /* Unconditionally jump (without popping any failure points).  */
4824                 case jump:
4825                     EXTRACT_NUMBER_AND_INCR (mcnt, p);  /* Get the amount to jump.  */
4826                     DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
4827                     p += mcnt;                          /* Do the jump.  */
4828 #ifdef _LIBC
4829                     DEBUG_PRINT2 ("(to %p).\n", p);
4830 #else
4831                     DEBUG_PRINT2 ("(to 0x%x).\n", p);
4832 #endif
4833                     break;
4834 
4835 
4836                     /* We need this opcode so we can detect where alternatives end
4837                        in `group_match_null_string_p' et al.  */
4838                 case jump_past_alt:
4839                     DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
4840                     goto unconditional_jump;
4841 
4842 
4843                     /* Normally, the on_failure_jump pushes a failure point, which
4844                        then gets popped at pop_failure_jump.  We will end up at
4845                        pop_failure_jump, also, and with a pattern of, say, `a+', we
4846                        are skipping over the on_failure_jump, so we have to push
4847                        something meaningless for pop_failure_jump to pop.  */
4848                 case dummy_failure_jump:
4849                     DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
4850                     /* It doesn't matter what we push for the string here.  What
4851                        the code at `fail' tests is the value for the pattern.  */
4852                     PUSH_FAILURE_POINT (0, 0, -2);
4853                     goto unconditional_jump;
4854 
4855 
4856                     /* At the end of an alternative, we need to push a dummy failure
4857                        point in case we are followed by a `pop_failure_jump', because
4858                        we don't want the failure point for the alternative to be
4859                        popped.  For example, matching `(a|ab)*' against `aab'
4860                        requires that we match the `ab' alternative.  */
4861                 case push_dummy_failure:
4862                     DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
4863                     /* See comments just above at `dummy_failure_jump' about the
4864                        two zeroes.  */
4865                     PUSH_FAILURE_POINT (0, 0, -2);
4866                     break;
4867 
4868                     /* Have to succeed matching what follows at least n times.
4869                        After that, handle like `on_failure_jump'.  */
4870                 case succeed_n:
4871                     EXTRACT_NUMBER (mcnt, p + 2);
4872                     DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
4873 
4874                     assert (mcnt >= 0);
4875                     /* Originally, this is how many times we HAVE to succeed.  */
4876                     if (mcnt > 0)
4877                         {
4878                             mcnt--;
4879                             p += 2;
4880                             STORE_NUMBER_AND_INCR (p, mcnt);
4881 #ifdef _LIBC
4882                             DEBUG_PRINT3 ("  Setting %p to %d.\n", p - 2, mcnt);
4883 #else
4884                             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p - 2, mcnt);
4885 #endif
4886                         }
4887                     else if (mcnt == 0)
4888                         {
4889 #ifdef _LIBC
4890                             DEBUG_PRINT2 ("  Setting two bytes from %p to no_op.\n", p+2);
4891 #else
4892                             DEBUG_PRINT2 ("  Setting two bytes from 0x%x to no_op.\n", p+2);
4893 #endif
4894                             p[2] = (unsigned char) no_op;
4895                             p[3] = (unsigned char) no_op;
4896                             goto on_failure;
4897                         }
4898                     break;
4899 
4900                 case jump_n:
4901                     EXTRACT_NUMBER (mcnt, p + 2);
4902                     DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
4903 
4904                     /* Originally, this is how many times we CAN jump.  */
4905                     if (mcnt)
4906                         {
4907                             mcnt--;
4908                             STORE_NUMBER (p + 2, mcnt);
4909 #ifdef _LIBC
4910                             DEBUG_PRINT3 ("  Setting %p to %d.\n", p + 2, mcnt);
4911 #else
4912                             DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p + 2, mcnt);
4913 #endif
4914                             goto unconditional_jump;
4915                         }
4916                     /* If don't have to jump any more, skip over the rest of command.  */
4917                     else
4918                         p += 4;
4919                     break;
4920 
4921                 case set_number_at:
4922                     {
4923                         DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
4924 
4925                         EXTRACT_NUMBER_AND_INCR (mcnt, p);
4926                         p1 = p + mcnt;
4927                         EXTRACT_NUMBER_AND_INCR (mcnt, p);
4928 #ifdef _LIBC
4929                         DEBUG_PRINT3 ("  Setting %p to %d.\n", p1, mcnt);
4930 #else
4931                         DEBUG_PRINT3 ("  Setting 0x%x to %d.\n", p1, mcnt);
4932 #endif
4933                         STORE_NUMBER (p1, mcnt);
4934                         break;
4935                     }
4936 
4937 #if 0
4938                     /* The DEC Alpha C compiler 3.x generates incorrect code for the
4939                        test  WORDCHAR_P (d - 1) != WORDCHAR_P (d)  in the expansion of
4940                        AT_WORD_BOUNDARY, so this code is disabled.  Expanding the
4941                        macro and introducing temporary variables works around the bug.  */
4942 
4943                 case wordbound:
4944                     DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4945                     if (AT_WORD_BOUNDARY (d))
4946                         break;
4947                     goto fail;
4948 
4949                 case notwordbound:
4950                     DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4951                     if (AT_WORD_BOUNDARY (d))
4952                         goto fail;
4953                     break;
4954 #else
4955                 case wordbound:
4956                     {
4957                         boolean prevchar, thischar;
4958 
4959                         DEBUG_PRINT1 ("EXECUTING wordbound.\n");
4960                         if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4961                             break;
4962 
4963                         prevchar = WORDCHAR_P (d - 1);
4964                         thischar = WORDCHAR_P (d);
4965                         if (prevchar != thischar)
4966                             break;
4967                         goto fail;
4968                     }
4969 
4970                 case notwordbound:
4971                     {
4972                         boolean prevchar, thischar;
4973 
4974                         DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
4975                         if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
4976                             goto fail;
4977 
4978                         prevchar = WORDCHAR_P (d - 1);
4979                         thischar = WORDCHAR_P (d);
4980                         if (prevchar != thischar)
4981                             goto fail;
4982                         break;
4983                     }
4984 #endif
4985 
4986                 case wordbeg:
4987                     DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
4988                     if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
4989                         break;
4990                     goto fail;
4991 
4992                 case wordend:
4993                     DEBUG_PRINT1 ("EXECUTING wordend.\n");
4994                     if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
4995                         && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
4996                         break;
4997                     goto fail;
4998 
4999 #ifdef emacs
5000                 case before_dot:
5001                     DEBUG_PRINT1 ("EXECUTING before_dot.\n");
5002                     if (PTR_CHAR_POS ((unsigned char *) d) >= point)
5003                         goto fail;
5004                     break;
5005 
5006                 case at_dot:
5007                     DEBUG_PRINT1 ("EXECUTING at_dot.\n");
5008                     if (PTR_CHAR_POS ((unsigned char *) d) != point)
5009                         goto fail;
5010                     break;
5011 
5012                 case after_dot:
5013                     DEBUG_PRINT1 ("EXECUTING after_dot.\n");
5014                     if (PTR_CHAR_POS ((unsigned char *) d) <= point)
5015                         goto fail;
5016                     break;
5017 
5018                 case syntaxspec:
5019                     DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
5020                     mcnt = *p++;
5021                     goto matchsyntax;
5022 
5023                 case wordchar:
5024                     DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
5025                     mcnt = (int) Sword;
5026                 matchsyntax:
5027                     PREFETCH ();
5028                     /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5029                     d++;
5030                     if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
5031                         goto fail;
5032                     SET_REGS_MATCHED ();
5033                     break;
5034 
5035                 case notsyntaxspec:
5036                     DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
5037                     mcnt = *p++;
5038                     goto matchnotsyntax;
5039 
5040                 case notwordchar:
5041                     DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
5042                     mcnt = (int) Sword;
5043                 matchnotsyntax:
5044                     PREFETCH ();
5045                     /* Can't use *d++ here; SYNTAX may be an unsafe macro.  */
5046                     d++;
5047                     if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
5048                         goto fail;
5049                     SET_REGS_MATCHED ();
5050                     break;
5051 
5052 #else /* not emacs */
5053                 case wordchar:
5054                     DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
5055                     PREFETCH ();
5056                     if (!WORDCHAR_P (d))
5057                         goto fail;
5058                     SET_REGS_MATCHED ();
5059                     d++;
5060                     break;
5061 
5062                 case notwordchar:
5063                     DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
5064                     PREFETCH ();
5065                     if (WORDCHAR_P (d))
5066                         goto fail;
5067                     SET_REGS_MATCHED ();
5068                     d++;
5069                     break;
5070 #endif /* not emacs */
5071 
5072                 default:
5073                     abort ();
5074                 }
5075             continue;  /* Successfully executed one pattern command; keep going.  */
5076 
5077 
5078             /* We goto here if a matching operation fails. */
5079         fail:
5080             if (!FAIL_STACK_EMPTY ())
5081                 { /* A restart point is known.  Restore to that state.  */
5082                     DEBUG_PRINT1 ("\nFAIL:\n");
5083                     POP_FAILURE_POINT (d, p,
5084                                        lowest_active_reg, highest_active_reg,
5085                                        regstart, regend, reg_info);
5086 
5087                     /* If this failure point is a dummy, try the next one.  */
5088                     if (!p)
5089                         goto fail;
5090 
5091                     /* If we failed to the end of the pattern, don't examine *p.  */
5092                     assert (p <= pend);
5093                     if (p < pend)
5094                         {
5095                             boolean is_a_jump_n = false;
5096 
5097                             /* If failed to a backwards jump that's part of a repetition
5098                                loop, need to pop this failure point and use the next one.  */
5099                             switch ((re_opcode_t) *p)
5100                                 {
5101                                 case jump_n:
5102                                     is_a_jump_n = true;
5103                                 case maybe_pop_jump:
5104                                 case pop_failure_jump:
5105                                 case jump:
5106                                     p1 = p + 1;
5107                                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5108                                     p1 += mcnt;
5109 
5110                                     if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
5111                                         || (!is_a_jump_n
5112                                             && (re_opcode_t) *p1 == on_failure_jump))
5113                                         goto fail;
5114                                     break;
5115                                 default:
5116                                     /* do nothing */ ;
5117                                 }
5118                         }
5119 
5120                     if (d >= string1 && d <= end1)
5121                         dend = end_match_1;
5122                 }
5123             else
5124                 break;   /* Matching at this starting point really fails.  */
5125         } /* for (;;) */
5126 
5127     if (best_regs_set)
5128         goto restore_best_regs;
5129 
5130     FREE_VARIABLES ();
5131 
5132     return -1;                          /* Failure to match.  */
5133 } /* re_match_2 */
5134 
5135 /* Subroutine definitions for re_match_2.  */
5136 
5137 
5138 /* We are passed P pointing to a register number after a start_memory.
5139 
5140    Return true if the pattern up to the corresponding stop_memory can
5141    match the empty string, and false otherwise.
5142 
5143    If we find the matching stop_memory, sets P to point to one past its number.
5144    Otherwise, sets P to an undefined byte less than or equal to END.
5145 
5146    We don't handle duplicates properly (yet).  */
5147 
5148 static boolean
group_match_null_string_p(unsigned char ** p,unsigned char * end,register_info_type * reg_info)5149 group_match_null_string_p(unsigned char **p,
5150                           unsigned char *end,
5151                           register_info_type *reg_info)
5152 {
5153     int mcnt;
5154     /* Point to after the args to the start_memory.  */
5155     unsigned char *p1 = *p + 2;
5156 
5157     while (p1 < end)
5158         {
5159             /* Skip over opcodes that can match nothing, and return true or
5160                false, as appropriate, when we get to one that can't, or to the
5161                matching stop_memory.  */
5162 
5163             switch ((re_opcode_t) *p1)
5164                 {
5165                     /* Could be either a loop or a series of alternatives.  */
5166                 case on_failure_jump:
5167                     p1++;
5168                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5169 
5170                     /* If the next operation is not a jump backwards in the
5171                        pattern.  */
5172 
5173                     if (mcnt >= 0)
5174                         {
5175                             /* Go through the on_failure_jumps of the alternatives,
5176                                seeing if any of the alternatives cannot match nothing.
5177                                The last alternative starts with only a jump,
5178                                whereas the rest start with on_failure_jump and end
5179                                with a jump, e.g., here is the pattern for `a|b|c':
5180 
5181                                /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
5182                                /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
5183                                /exactn/1/c
5184 
5185                                So, we have to first go through the first (n-1)
5186                                alternatives and then deal with the last one separately.  */
5187 
5188 
5189                             /* Deal with the first (n-1) alternatives, which start
5190                                with an on_failure_jump (see above) that jumps to right
5191                                past a jump_past_alt.  */
5192 
5193                             while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
5194                                 {
5195                                     /* `mcnt' holds how many bytes long the alternative
5196                                        is, including the ending `jump_past_alt' and
5197                                        its number.  */
5198 
5199                                     if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
5200                                                                   reg_info))
5201                                         return false;
5202 
5203                                     /* Move to right after this alternative, including the
5204                                        jump_past_alt.  */
5205                                     p1 += mcnt;
5206 
5207                                     /* Break if it's the beginning of an n-th alternative
5208                                        that doesn't begin with an on_failure_jump.  */
5209                                     if ((re_opcode_t) *p1 != on_failure_jump)
5210                                         break;
5211 
5212                                     /* Still have to check that it's not an n-th
5213                                        alternative that starts with an on_failure_jump.  */
5214                                     p1++;
5215                                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5216                                     if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
5217                                         {
5218                                             /* Get to the beginning of the n-th alternative.  */
5219                                             p1 -= 3;
5220                                             break;
5221                                         }
5222                                 }
5223 
5224                             /* Deal with the last alternative: go back and get number
5225                                of the `jump_past_alt' just before it.  `mcnt' contains
5226                                the length of the alternative.  */
5227                             EXTRACT_NUMBER (mcnt, p1 - 2);
5228 
5229                             if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
5230                                 return false;
5231 
5232                             p1 += mcnt; /* Get past the n-th alternative.  */
5233                         } /* if mcnt > 0 */
5234                     break;
5235 
5236 
5237                 case stop_memory:
5238                     assert (p1[1] == **p);
5239                     *p = p1 + 2;
5240                     return true;
5241 
5242 
5243                 default:
5244                     if (!common_op_match_null_string_p (&p1, end, reg_info))
5245                         return false;
5246                 }
5247         } /* while p1 < end */
5248 
5249     return false;
5250 } /* group_match_null_string_p */
5251 
5252 
5253 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
5254    It expects P to be the first byte of a single alternative and END one
5255    byte past the last. The alternative can contain groups.  */
5256 
5257 static boolean
alt_match_null_string_p(unsigned char * p,unsigned char * end,register_info_type * reg_info)5258 alt_match_null_string_p(unsigned char *p,
5259                         unsigned char *end,
5260                         register_info_type *reg_info)
5261 {
5262     int mcnt;
5263     unsigned char *p1 = p;
5264 
5265     while (p1 < end)
5266         {
5267             /* Skip over opcodes that can match nothing, and break when we get
5268                to one that can't.  */
5269 
5270             switch ((re_opcode_t) *p1)
5271                 {
5272                     /* It's a loop.  */
5273                 case on_failure_jump:
5274                     p1++;
5275                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5276                     p1 += mcnt;
5277                     break;
5278 
5279                 default:
5280                     if (!common_op_match_null_string_p (&p1, end, reg_info))
5281                         return false;
5282                 }
5283         }  /* while p1 < end */
5284 
5285     return true;
5286 } /* alt_match_null_string_p */
5287 
5288 
5289 /* Deals with the ops common to group_match_null_string_p and
5290    alt_match_null_string_p.
5291 
5292    Sets P to one after the op and its arguments, if any.  */
5293 
5294 static boolean
common_op_match_null_string_p(unsigned char ** p,unsigned char * end,register_info_type * reg_info)5295 common_op_match_null_string_p(unsigned char **p,
5296                               unsigned char *end,
5297                               register_info_type *reg_info)
5298 {
5299     int mcnt;
5300     boolean ret;
5301     int reg_no;
5302     unsigned char *p1 = *p;
5303 
5304     switch ((re_opcode_t) *p1++)
5305         {
5306         case no_op:
5307         case begline:
5308         case endline:
5309         case begbuf:
5310         case endbuf:
5311         case wordbeg:
5312         case wordend:
5313         case wordbound:
5314         case notwordbound:
5315 #ifdef emacs
5316         case before_dot:
5317         case at_dot:
5318         case after_dot:
5319 #endif
5320             break;
5321 
5322         case start_memory:
5323             reg_no = *p1;
5324             assert (reg_no > 0 && reg_no <= MAX_REGNUM);
5325             ret = group_match_null_string_p (&p1, end, reg_info);
5326 
5327             /* Have to set this here in case we're checking a group which
5328                contains a group and a back reference to it.  */
5329 
5330             if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
5331                 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
5332 
5333             if (!ret)
5334                 return false;
5335             break;
5336 
5337             /* If this is an optimized succeed_n for zero times, make the jump.  */
5338         case jump:
5339             EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5340             if (mcnt >= 0)
5341                 p1 += mcnt;
5342             else
5343                 return false;
5344             break;
5345 
5346         case succeed_n:
5347             /* Get to the number of times to succeed.  */
5348             p1 += 2;
5349             EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5350 
5351             if (mcnt == 0)
5352                 {
5353                     p1 -= 4;
5354                     EXTRACT_NUMBER_AND_INCR (mcnt, p1);
5355                     p1 += mcnt;
5356                 }
5357             else
5358                 return false;
5359             break;
5360 
5361         case duplicate:
5362             if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
5363                 return false;
5364             break;
5365 
5366         case set_number_at:
5367             p1 += 4;
5368 
5369         default:
5370             /* All other opcodes mean we cannot match the empty string.  */
5371             return false;
5372         }
5373 
5374     *p = p1;
5375     return true;
5376 } /* common_op_match_null_string_p */
5377 
5378 
5379 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
5380    bytes; nonzero otherwise.  */
5381 
5382 static int
bcmp_translate(const char * s1,const char * s2,register int len,RE_TRANSLATE_TYPE translate)5383 bcmp_translate(const char *s1,
5384                const char *s2,
5385                register int len,
5386                RE_TRANSLATE_TYPE translate)
5387 {
5388     register const unsigned char *p1 = (const unsigned char *) s1;
5389     register const unsigned char *p2 = (const unsigned char *) s2;
5390     while (len)
5391         {
5392             if (translate[*p1++] != translate[*p2++]) return 1;
5393             len--;
5394         }
5395     return 0;
5396 }
5397 
5398 /* Entry points for GNU code.  */
5399 
5400 /* re_compile_pattern is the GNU regular expression compiler: it
5401    compiles PATTERN (of length SIZE) and puts the result in BUFP.
5402    Returns 0 if the pattern was valid, otherwise an error string.
5403 
5404    Assumes the `allocated' (and perhaps `buffer') and `translate' fields
5405    are set in BUFP on entry.
5406 
5407    We call regex_compile to do the actual compilation.  */
5408 
5409 const char *
re_compile_pattern(const char * pattern,size_t length,struct re_pattern_buffer * bufp)5410 re_compile_pattern(const char *pattern,
5411                    size_t length,
5412                    struct re_pattern_buffer *bufp)
5413 {
5414     reg_errcode_t ret;
5415 
5416     /* GNU code is written to assume at least RE_NREGS registers will be set
5417        (and at least one extra will be -1).  */
5418     bufp->regs_allocated = REGS_UNALLOCATED;
5419 
5420     /* And GNU code determines whether or not to get register information
5421        by passing null for the REGS argument to re_match, etc., not by
5422        setting no_sub.  */
5423     bufp->no_sub = 0;
5424 
5425     /* Match anchors at newline.  */
5426     bufp->newline_anchor = 1;
5427 
5428     ret = regex_compile (pattern, length, re_syntax_options, bufp);
5429 
5430     if (!ret)
5431         return NULL;
5432     return gettext (re_error_msgid[(int) ret]);
5433 }
5434 
5435 /* Entry points compatible with 4.2 BSD regex library.  We don't define
5436    them unless specifically requested.  */
5437 
5438 #if defined (_REGEX_RE_COMP) || defined (_LIBC)
5439 
5440 /* BSD has one and only one pattern buffer.  */
5441 static struct re_pattern_buffer re_comp_buf;
5442 
5443 char *
5444 #ifdef _LIBC
5445 /* Make these definitions weak in libc, so POSIX programs can redefine
5446    these names if they don't use our functions, and still use
5447    regcomp/regexec below without link errors.  */
5448 weak_function
5449 #endif
re_comp(s)5450 re_comp (s)
5451      const char *s;
5452 {
5453     reg_errcode_t ret;
5454 
5455     if (!s)
5456         {
5457             if (!re_comp_buf.buffer)
5458                 return gettext ("No previous regular expression");
5459             return 0;
5460         }
5461 
5462     if (!re_comp_buf.buffer)
5463         {
5464             re_comp_buf.buffer = (unsigned char *) malloc (200);
5465             if (re_comp_buf.buffer == NULL)
5466                 return gettext (re_error_msgid[(int) REG_ESPACE]);
5467             re_comp_buf.allocated = 200;
5468 
5469             re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
5470             if (re_comp_buf.fastmap == NULL)
5471                 return gettext (re_error_msgid[(int) REG_ESPACE]);
5472         }
5473 
5474     /* Since `re_exec' always passes NULL for the `regs' argument, we
5475        don't need to initialize the pattern buffer fields which affect it.  */
5476 
5477     /* Match anchors at newlines.  */
5478     re_comp_buf.newline_anchor = 1;
5479 
5480     ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
5481 
5482     if (!ret)
5483         return NULL;
5484 
5485     /* Yes, we're discarding `const' here if !HAVE_LIBINTL.  */
5486     return (char *) gettext (re_error_msgid[(int) ret]);
5487 }
5488 
5489 
5490 int
5491 #ifdef _LIBC
5492 weak_function
5493 #endif
re_exec(s)5494 re_exec (s)
5495      const char *s;
5496 {
5497     const int len = strlen (s);
5498     return
5499         0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
5500 }
5501 
5502 #endif /* _REGEX_RE_COMP */
5503 
5504 /* POSIX.2 functions.  Don't define these for Emacs.  */
5505 
5506 #ifndef emacs
5507 
5508 /* regcomp takes a regular expression as a string and compiles it.
5509 
5510    PREG is a regex_t *.  We do not expect any fields to be initialized,
5511    since POSIX says we shouldn't.  Thus, we set
5512 
5513      `buffer' to the compiled pattern;
5514      `used' to the length of the compiled pattern;
5515      `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
5516        REG_EXTENDED bit in CFLAGS is set; otherwise, to
5517        RE_SYNTAX_POSIX_BASIC;
5518      `newline_anchor' to REG_NEWLINE being set in CFLAGS;
5519      `fastmap' and `fastmap_accurate' to zero;
5520      `re_nsub' to the number of subexpressions in PATTERN.
5521 
5522    PATTERN is the address of the pattern string.
5523 
5524    CFLAGS is a series of bits which affect compilation.
5525 
5526      If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
5527      use POSIX basic syntax.
5528 
5529      If REG_NEWLINE is set, then . and [^...] don't match newline.
5530      Also, regexec will try a match beginning after every newline.
5531 
5532      If REG_ICASE is set, then we considers upper- and lowercase
5533      versions of letters to be equivalent when matching.
5534 
5535      If REG_NOSUB is set, then when PREG is passed to regexec, that
5536      routine will report only success or failure, and nothing about the
5537      registers.
5538 
5539    It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
5540    the return codes and their meanings.)  */
5541 
5542 int
regcomp(regex_t * preg,const char * pattern,int cflags)5543 regcomp(regex_t *preg,
5544         const char *pattern,
5545         int cflags)
5546 {
5547     reg_errcode_t ret;
5548     reg_syntax_t syntax
5549         = (cflags & REG_EXTENDED) ?
5550         RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
5551 
5552     /* regex_compile will allocate the space for the compiled pattern.  */
5553     preg->buffer = 0;
5554     preg->allocated = 0;
5555     preg->used = 0;
5556 
5557     /* Don't bother to use a fastmap when searching.  This simplifies the
5558        REG_NEWLINE case: if we used a fastmap, we'd have to put all the
5559        characters after newlines into the fastmap.  This way, we just try
5560        every character.  */
5561     preg->fastmap = 0;
5562 
5563     if (cflags & REG_ICASE)
5564         {
5565             unsigned i;
5566 
5567             preg->translate
5568                 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
5569                                               * sizeof (*(RE_TRANSLATE_TYPE)0));
5570             if (preg->translate == NULL)
5571                 return (int) REG_ESPACE;
5572 
5573             /* Map uppercase characters to corresponding lowercase ones.  */
5574             for (i = 0; i < CHAR_SET_SIZE; i++)
5575                 preg->translate[i] = ISUPPER (i) ? tolower (i) : i;
5576         }
5577     else
5578         preg->translate = NULL;
5579 
5580     /* If REG_NEWLINE is set, newlines are treated differently.  */
5581     if (cflags & REG_NEWLINE)
5582         { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
5583             syntax &= ~RE_DOT_NEWLINE;
5584             syntax |= RE_HAT_LISTS_NOT_NEWLINE;
5585             /* It also changes the matching behavior.  */
5586             preg->newline_anchor = 1;
5587         }
5588     else
5589         preg->newline_anchor = 0;
5590 
5591     preg->no_sub = !!(cflags & REG_NOSUB);
5592 
5593     /* POSIX says a null character in the pattern terminates it, so we
5594        can use strlen here in compiling the pattern.  */
5595     ret = regex_compile (pattern, strlen (pattern), syntax, preg);
5596 
5597     /* POSIX doesn't distinguish between an unmatched open-group and an
5598        unmatched close-group: both are REG_EPAREN.  */
5599     if (ret == REG_ERPAREN) ret = REG_EPAREN;
5600 
5601     return (int) ret;
5602 }
5603 
5604 
5605 /* regexec searches for a given pattern, specified by PREG, in the
5606    string STRING.
5607 
5608    If NMATCH is zero or REG_NOSUB was set in the cflags argument to
5609    `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
5610    least NMATCH elements, and we set them to the offsets of the
5611    corresponding matched substrings.
5612 
5613    EFLAGS specifies `execution flags' which affect matching: if
5614    REG_NOTBOL is set, then ^ does not match at the beginning of the
5615    string; if REG_NOTEOL is set, then $ does not match at the end.
5616 
5617    We return 0 if we find a match and REG_NOMATCH if not.  */
5618 
5619 int
regexec(const regex_t * preg,const char * string,size_t nmatch,regmatch_t pmatch[],int eflags)5620 regexec(const regex_t *preg,
5621         const char *string,
5622         size_t nmatch,
5623         regmatch_t pmatch[],
5624         int eflags)
5625 {
5626     int ret;
5627     struct re_registers regs;
5628     regex_t private_preg;
5629     int len = strlen (string);
5630     boolean want_reg_info = !preg->no_sub && nmatch > 0;
5631 
5632     private_preg = *preg;
5633 
5634     private_preg.not_bol = !!(eflags & REG_NOTBOL);
5635     private_preg.not_eol = !!(eflags & REG_NOTEOL);
5636 
5637     /* The user has told us exactly how many registers to return
5638        information about, via `nmatch'.  We have to pass that on to the
5639        matching routines.  */
5640     private_preg.regs_allocated = REGS_FIXED;
5641 
5642     if (want_reg_info)
5643         {
5644             regs.num_regs = nmatch;
5645             regs.start = TALLOC (nmatch, regoff_t);
5646             regs.end = TALLOC (nmatch, regoff_t);
5647             if (regs.start == NULL || regs.end == NULL)
5648                 return (int) REG_NOMATCH;
5649         }
5650 
5651     /* Perform the searching operation.  */
5652     ret = re_search (&private_preg, string, len,
5653                      /* start: */ 0, /* range: */ len,
5654                      want_reg_info ? &regs : (struct re_registers *) 0);
5655 
5656     /* Copy the register information to the POSIX structure.  */
5657     if (want_reg_info)
5658         {
5659             if (ret >= 0)
5660                 {
5661                     unsigned r;
5662 
5663                     for (r = 0; r < nmatch; r++)
5664                         {
5665                             pmatch[r].rm_so = regs.start[r];
5666                             pmatch[r].rm_eo = regs.end[r];
5667                         }
5668                 }
5669 
5670             /* If we needed the temporary register info, free the space now.  */
5671             free (regs.start);
5672             free (regs.end);
5673         }
5674 
5675     /* We want zero return to mean success, unlike `re_search'.  */
5676     return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
5677 }
5678 
5679 
5680 /* Returns a message corresponding to an error code, ERRCODE, returned
5681    from either regcomp or regexec.   We don't use PREG here.  */
5682 
5683 size_t
regerror(int errcode,const regex_t * preg,char * errbuf,size_t errbuf_size)5684 regerror(int errcode,
5685          const regex_t *preg,
5686          char *errbuf,
5687          size_t errbuf_size)
5688 {
5689     const char *msg;
5690     size_t msg_size;
5691 
5692     if (errcode < 0
5693         || errcode >= (int) (sizeof (re_error_msgid)
5694                              / sizeof (re_error_msgid[0])))
5695         /* Only error codes returned by the rest of the code should be passed
5696            to this routine.  If we are given anything else, or if other regex
5697            code generates an invalid error code, then the program has a bug.
5698            Dump core so we can fix it.  */
5699         abort ();
5700 
5701     msg = gettext (re_error_msgid[errcode]);
5702 
5703     msg_size = strlen (msg) + 1; /* Includes the null.  */
5704 
5705     if (errbuf_size != 0)
5706         {
5707             if (msg_size > errbuf_size)
5708                 {
5709                     strncpy (errbuf, msg, errbuf_size - 1);
5710                     errbuf[errbuf_size - 1] = 0;
5711                 }
5712             else
5713                 strcpy (errbuf, msg);
5714         }
5715 
5716     return msg_size;
5717 }
5718 
5719 
5720 /* Free dynamically allocated space used by PREG.  */
5721 
5722 void
regfree(regex_t * preg)5723 regfree(regex_t *preg)
5724 {
5725     if (preg->buffer != NULL)
5726         free (preg->buffer);
5727     preg->buffer = NULL;
5728 
5729     preg->allocated = 0;
5730     preg->used = 0;
5731 
5732     if (preg->fastmap != NULL)
5733         free (preg->fastmap);
5734     preg->fastmap = NULL;
5735     preg->fastmap_accurate = 0;
5736 
5737     if (preg->translate != NULL)
5738         free (preg->translate);
5739     preg->translate = NULL;
5740 }
5741 
5742 #endif /* not emacs  */
5743