1 /*
2  * Copyright (C) 1996-2021 The Squid Software Foundation and contributors
3  *
4  * Squid software is distributed under GPLv2+ license and includes
5  * contributions from numerous individuals and organizations.
6  * Please see the COPYING and CONTRIBUTORS files for details.
7  */
8 
9 /* Extended regular expression matching and search library,
10  * version 0.12.
11  * (Implements POSIX draft P10003.2/D11.2, except for
12  * internationalization features.)
13  *
14  * Copyright (C) 1993 Free Software Foundation, Inc.
15  *
16  * This program is free software; you can redistribute it and/or modify
17  * it under the terms of the GNU General Public License as published by
18  * the Free Software Foundation; either version 2, or (at your option)
19  * any later version.
20  *
21  * This program is distributed in the hope that it will be useful,
22  * but WITHOUT ANY WARRANTY; without even the implied warranty of
23  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
24  * GNU General Public License for more details.
25  *
26  * You should have received a copy of the GNU General Public License
27  * along with this program; if not, write to the Free Software
28  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111, USA.  */
29 
30 /* AIX requires this to be the first thing in the file. */
31 #if defined (_AIX) && !defined(REGEX_MALLOC)
32 #pragma alloca
33 #endif
34 
35 #ifndef _GNU_SOURCE
36 #define _GNU_SOURCE 1
37 #endif
38 
39 #include "squid.h"
40 
41 #if USE_GNUREGEX /* only if squid needs it. Usually not */
42 
43 #if !HAVE_ALLOCA
44 #define REGEX_MALLOC 1
45 #endif
46 
47 /* We used to test for `BSTRING' here, but only GCC and Emacs define
48  * `BSTRING', as far as I know, and neither of them use this code.  */
49 #if HAVE_STRING_H || STDC_HEADERS
50 #include <string.h>
51 #else
52 #include <strings.h>
53 #endif
54 
55 /* Define the syntax stuff for \<, \>, etc.  */
56 
57 /* This must be nonzero for the wordchar and notwordchar pattern
58  * commands in re_match_2.  */
59 #ifndef Sword
60 #define Sword 1
61 #endif
62 
63 #ifdef SYNTAX_TABLE
64 
65 extern char *re_syntax_table;
66 
67 #else /* not SYNTAX_TABLE */
68 
69 /* How many characters in the character set.  */
70 #define CHAR_SET_SIZE 256
71 
72 static char re_syntax_table[CHAR_SET_SIZE];
73 
74 static void
init_syntax_once(void)75 init_syntax_once(void)
76 {
77     register int c;
78     static int done = 0;
79 
80     if (done)
81         return;
82 
83     memset(re_syntax_table, 0, sizeof re_syntax_table);
84 
85     for (c = 'a'; c <= 'z'; c++)
86         re_syntax_table[c] = Sword;
87 
88     for (c = 'A'; c <= 'Z'; c++)
89         re_syntax_table[c] = Sword;
90 
91     for (c = '0'; c <= '9'; c++)
92         re_syntax_table[c] = Sword;
93 
94     re_syntax_table['_'] = Sword;
95 
96     done = 1;
97 }
98 
99 #endif /* not SYNTAX_TABLE */
100 
101 /* Get the interface, including the syntax bits.  */
102 #include "compat/GnuRegex.h"
103 
104 /* Compile a fastmap for the compiled pattern in BUFFER; used to
105  * accelerate searches.  Return 0 if successful and -2 if was an
106  * internal error.  */
107 static int re_compile_fastmap(struct re_pattern_buffer * buffer);
108 
109 /* Search in the string STRING (with length LENGTH) for the pattern
110  * compiled into BUFFER.  Start searching at position START, for RANGE
111  * characters.  Return the starting position of the match, -1 for no
112  * match, or -2 for an internal error.  Also return register
113  * information in REGS (if REGS and BUFFER->no_sub are nonzero).  */
114 static int re_search(struct re_pattern_buffer * buffer, const char *string,
115                      int length, int start, int range, struct re_registers * regs);
116 
117 /* Like `re_search', but search in the concatenation of STRING1 and
118  * STRING2.  Also, stop searching at index START + STOP.  */
119 static int re_search_2(struct re_pattern_buffer * buffer, const char *string1,
120                        int length1, const char *string2, int length2,
121                        int start, int range, struct re_registers * regs, int stop);
122 
123 /* Like `re_search_2', but return how many characters in STRING the regexp
124  * in BUFFER matched, starting at position START.  */
125 static int re_match_2(struct re_pattern_buffer * buffer, const char *string1,
126                       int length1, const char *string2, int length2,
127                       int start, struct re_registers * regs, int stop);
128 
129 /* isalpha etc. are used for the character classes.  */
130 #include <ctype.h>
131 
132 #ifndef isascii
133 #define isascii(c) 1
134 #endif
135 
136 #ifdef isblank
137 #define ISBLANK(c) (isascii ((unsigned char)c) && isblank ((unsigned char)c))
138 #else
139 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
140 #endif
141 #ifdef isgraph
142 #define ISGRAPH(c) (isascii ((unsigned char)c) && isgraph ((unsigned char)c))
143 #else
144 #define ISGRAPH(c) (isascii ((unsigned char)c) && isprint ((unsigned char)c) && !isspace ((unsigned char)c))
145 #endif
146 
147 #define ISPRINT(c) (isascii ((unsigned char)c) && isprint ((unsigned char)c))
148 #define ISDIGIT(c) (isascii ((unsigned char)c) && isdigit ((unsigned char)c))
149 #define ISALNUM(c) (isascii ((unsigned char)c) && isalnum ((unsigned char)c))
150 #define ISALPHA(c) (isascii ((unsigned char)c) && isalpha ((unsigned char)c))
151 #define ISCNTRL(c) (isascii ((unsigned char)c) && iscntrl ((unsigned char)c))
152 #define ISLOWER(c) (isascii ((unsigned char)c) && islower ((unsigned char)c))
153 #define ISPUNCT(c) (isascii ((unsigned char)c) && ispunct ((unsigned char)c))
154 #define ISSPACE(c) (isascii ((unsigned char)c) && isspace ((unsigned char)c))
155 #define ISUPPER(c) (isascii ((unsigned char)c) && isupper ((unsigned char)c))
156 #define ISXDIGIT(c) (isascii ((unsigned char)c) && isxdigit ((unsigned char)c))
157 
158 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
159  * since ours (we hope) works properly with all combinations of
160  * machines, compilers, `char' and `unsigned char' argument types.
161  * (Per Bothner suggested the basic approach.)  */
162 #undef SIGN_EXTEND_CHAR
163 #ifdef __STDC__
164 #define SIGN_EXTEND_CHAR(c) ((signed char) (c))
165 #else /* not __STDC__ */
166 /* As in Harbison and Steele.  */
167 #define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
168 #endif
169 
170 /* Should we use malloc or alloca?  If REGEX_MALLOC is not defined, we
171  * use `alloca' instead of `malloc'.  This is because using malloc in
172  * re_search* or re_match* could cause memory leaks when C-g is used in
173  * Emacs; also, malloc is slower and causes storage fragmentation.  On
174  * the other hand, malloc is more portable, and easier to debug.
175  *
176  * Because we sometimes use alloca, some routines have to be macros,
177  * not functions -- `alloca'-allocated space disappears at the end of the
178  * function it is called in.  */
179 
180 #ifdef REGEX_MALLOC
181 
182 #define REGEX_ALLOCATE malloc
183 #define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
184 
185 #else /* not REGEX_MALLOC  */
186 
187 /* Emacs already defines alloca, sometimes.  */
188 #ifndef alloca
189 
190 /* Make alloca work the best possible way.  */
191 #ifdef __GNUC__
192 #define alloca __builtin_alloca
193 #else /* not __GNUC__ */
194 #if HAVE_ALLOCA_H
195 #include <alloca.h>
196 #else /* not __GNUC__ or HAVE_ALLOCA_H */
197 #ifndef _AIX            /* Already did AIX, up at the top.  */
198 char *alloca();
199 #endif /* not _AIX */
200 #endif /* not HAVE_ALLOCA_H */
201 #endif /* not __GNUC__ */
202 
203 #endif /* not alloca */
204 
205 #define REGEX_ALLOCATE alloca
206 
207 /* Assumes a `char *destination' variable.  */
208 #define REGEX_REALLOCATE(source, osize, nsize)              \
209   (destination = (char *) alloca (nsize),               \
210    memcpy (destination, source, osize),             \
211    destination)
212 
213 #endif /* not REGEX_MALLOC */
214 
215 /* True if `size1' is non-NULL and PTR is pointing anywhere inside
216  * `string1' or just past its end.  This works if PTR is NULL, which is
217  * a good thing.  */
218 #define FIRST_STRING_P(ptr)                     \
219   (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
220 
221 /* (Re)Allocate N items of type T using malloc, or fail.  */
222 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
223 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
224 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
225 
226 #define BYTEWIDTH 8     /* In bits.  */
227 
228 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
229 
230 #if !defined(__MINGW32__)   /* MinGW defines boolean */
231 typedef char boolean;
232 #endif
233 #define false 0
234 #define true 1
235 
236 /* These are the command codes that appear in compiled regular
237  * expressions.  Some opcodes are followed by argument bytes.  A
238  * command code can specify any interpretation whatsoever for its
239  * arguments.  Zero bytes may appear in the compiled regular expression.
240  *
241  * The value of `exactn' is needed in search.c (search_buffer) in Emacs.
242  * So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
243  * `exactn' we use here must also be 1.  */
244 
245 typedef enum {
246     no_op = 0,
247 
248     /* Followed by one byte giving n, then by n literal bytes.  */
249     exactn = 1,
250 
251     /* Matches any (more or less) character.  */
252     anychar,
253 
254     /* Matches any one char belonging to specified set.  First
255      * following byte is number of bitmap bytes.  Then come bytes
256      * for a bitmap saying which chars are in.  Bits in each byte
257      * are ordered low-bit-first.  A character is in the set if its
258      * bit is 1.  A character too large to have a bit in the map is
259      * automatically not in the set.  */
260     charset,
261 
262     /* Same parameters as charset, but match any character that is
263      * not one of those specified.  */
264     charset_not,
265 
266     /* Start remembering the text that is matched, for storing in a
267      * register.  Followed by one byte with the register number, in
268      * the range 0 to one less than the pattern buffer's re_nsub
269      * field.  Then followed by one byte with the number of groups
270      * inner to this one.  (This last has to be part of the
271      * start_memory only because we need it in the on_failure_jump
272      * of re_match_2.)  */
273     start_memory,
274 
275     /* Stop remembering the text that is matched and store it in a
276      * memory register.  Followed by one byte with the register
277      * number, in the range 0 to one less than `re_nsub' in the
278      * pattern buffer, and one byte with the number of inner groups,
279      * just like `start_memory'.  (We need the number of inner
280      * groups here because we don't have any easy way of finding the
281      * corresponding start_memory when we're at a stop_memory.)  */
282     stop_memory,
283 
284     /* Match a duplicate of something remembered. Followed by one
285      * byte containing the register number.  */
286     duplicate,
287 
288     /* Fail unless at beginning of line.  */
289     begline,
290 
291     /* Fail unless at end of line.  */
292     endline,
293 
294     /* Succeeds if or at beginning of string to be matched.  */
295     begbuf,
296 
297     /* Analogously, for end of buffer/string.  */
298     endbuf,
299 
300     /* Followed by two byte relative address to which to jump.  */
301     jump,
302 
303     /* Same as jump, but marks the end of an alternative.  */
304     jump_past_alt,
305 
306     /* Followed by two-byte relative address of place to resume at
307      * in case of failure.  */
308     on_failure_jump,
309 
310     /* Like on_failure_jump, but pushes a placeholder instead of the
311      * current string position when executed.  */
312     on_failure_keep_string_jump,
313 
314     /* Throw away latest failure point and then jump to following
315      * two-byte relative address.  */
316     pop_failure_jump,
317 
318     /* Change to pop_failure_jump if know won't have to backtrack to
319      * match; otherwise change to jump.  This is used to jump
320      * back to the beginning of a repeat.  If what follows this jump
321      * clearly won't match what the repeat does, such that we can be
322      * sure that there is no use backtracking out of repetitions
323      * already matched, then we change it to a pop_failure_jump.
324      * Followed by two-byte address.  */
325     maybe_pop_jump,
326 
327     /* Jump to following two-byte address, and push a dummy failure
328      * point. This failure point will be thrown away if an attempt
329      * is made to use it for a failure.  A `+' construct makes this
330      * before the first repeat.  Also used as an intermediary kind
331      * of jump when compiling an alternative.  */
332     dummy_failure_jump,
333 
334     /* Push a dummy failure point and continue.  Used at the end of
335      * alternatives.  */
336     push_dummy_failure,
337 
338     /* Followed by two-byte relative address and two-byte number n.
339      * After matching N times, jump to the address upon failure.  */
340     succeed_n,
341 
342     /* Followed by two-byte relative address, and two-byte number n.
343      * Jump to the address N times, then fail.  */
344     jump_n,
345 
346     /* Set the following two-byte relative address to the
347      * subsequent two-byte number.  The address *includes* the two
348      * bytes of number.  */
349     set_number_at,
350 
351     wordchar,           /* Matches any word-constituent character.  */
352     notwordchar,        /* Matches any char that is not a word-constituent.  */
353 
354     wordbeg,            /* Succeeds if at word beginning.  */
355     wordend,            /* Succeeds if at word end.  */
356 
357     wordbound,          /* Succeeds if at a word boundary.  */
358     notwordbound        /* Succeeds if not at a word boundary.  */
359 
360 } re_opcode_t;
361 
362 /* Common operations on the compiled pattern.  */
363 
364 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
365 
366 #define STORE_NUMBER(destination, number)               \
367   do {                                  \
368     (destination)[0] = (number) & 0377;                 \
369     (destination)[1] = (number) >> 8;                   \
370   } while (0)
371 
372 /* Same as STORE_NUMBER, except increment DESTINATION to
373  * the byte after where the number is stored.  Therefore, DESTINATION
374  * must be an lvalue.  */
375 
376 #define STORE_NUMBER_AND_INCR(destination, number)          \
377   do {                                  \
378     STORE_NUMBER (destination, number);                 \
379     (destination) += 2;                         \
380   } while (0)
381 
382 /* Put into DESTINATION a number stored in two contiguous bytes starting
383  * at SOURCE.  */
384 
385 #define EXTRACT_NUMBER(destination, source)             \
386   do {                                  \
387     (destination) = *(source) & 0377;                   \
388     (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8;       \
389   } while (0)
390 
391 #ifdef DEBUG
392 static void
extract_number(dest,source)393 extract_number(dest, source)
394 int *dest;
395 unsigned char *source;
396 {
397     int temp = SIGN_EXTEND_CHAR(*(source + 1));
398     *dest = *source & 0377;
399     *dest += temp << 8;
400 }
401 
402 #ifndef EXTRACT_MACROS      /* To debug the macros.  */
403 #undef EXTRACT_NUMBER
404 #define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
405 #endif /* not EXTRACT_MACROS */
406 
407 #endif /* DEBUG */
408 
409 /* Same as EXTRACT_NUMBER, except increment SOURCE to after the number.
410  * SOURCE must be an lvalue.  */
411 
412 #define EXTRACT_NUMBER_AND_INCR(destination, source)            \
413   do {                                  \
414     EXTRACT_NUMBER (destination, source);               \
415     (source) += 2;                          \
416   } while (0)
417 
418 #ifdef DEBUG
419 static void
extract_number_and_incr(destination,source)420 extract_number_and_incr(destination, source)
421 int *destination;
422 unsigned char **source;
423 {
424     extract_number(destination, *source);
425     *source += 2;
426 }
427 
428 #ifndef EXTRACT_MACROS
429 #undef EXTRACT_NUMBER_AND_INCR
430 #define EXTRACT_NUMBER_AND_INCR(dest, src) \
431   extract_number_and_incr (&dest, &src)
432 #endif /* not EXTRACT_MACROS */
433 
434 #endif /* DEBUG */
435 
436 /* If DEBUG is defined, Regex prints many voluminous messages about what
437  * it is doing (if the variable `debug' is nonzero).  If linked with the
438  * main program in `iregex.c', you can enter patterns and strings
439  * interactively.  And if linked with the main program in `main.c' and
440  * the other test files, you can run the already-written tests.  */
441 
442 #ifdef DEBUG
443 
444 static int debug = 0;
445 
446 #define DEBUG_STATEMENT(e) e
447 #define DEBUG_PRINT1(x) if (debug) printf (x)
448 #define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
449 #define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
450 #define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
451 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)               \
452   if (debug) print_partial_compiled_pattern (s, e)
453 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)          \
454   if (debug) print_double_string (w, s1, sz1, s2, sz2)
455 
456 extern void printchar();
457 
458 /* Print the fastmap in human-readable form.  */
459 
460 void
print_fastmap(fastmap)461 print_fastmap(fastmap)
462 char *fastmap;
463 {
464     unsigned was_a_range = 0;
465     unsigned i = 0;
466 
467     while (i < (1 << BYTEWIDTH)) {
468         if (fastmap[i++]) {
469             was_a_range = 0;
470             printchar(i - 1);
471             while (i < (1 << BYTEWIDTH) && fastmap[i]) {
472                 was_a_range = 1;
473                 i++;
474             }
475             if (was_a_range) {
476                 printf("-");
477                 printchar(i - 1);
478             }
479         }
480     }
481     putchar('\n');
482 }
483 
484 /* Print a compiled pattern string in human-readable form, starting at
485  * the START pointer into it and ending just before the pointer END.  */
486 
487 void
print_partial_compiled_pattern(start,end)488 print_partial_compiled_pattern(start, end)
489 unsigned char *start;
490 unsigned char *end;
491 {
492     int mcnt, mcnt2;
493     unsigned char *p = start;
494     unsigned char *pend = end;
495 
496     if (start == NULL) {
497         printf("(null)\n");
498         return;
499     }
500     /* Loop over pattern commands.  */
501     while (p < pend) {
502         switch ((re_opcode_t) * p++) {
503         case no_op:
504             printf("/no_op");
505             break;
506 
507         case exactn:
508             mcnt = *p++;
509             printf("/exactn/%d", mcnt);
510             do {
511                 putchar('/');
512                 printchar(*p++);
513             } while (--mcnt);
514             break;
515 
516         case start_memory:
517             mcnt = *p++;
518             printf("/start_memory/%d/%d", mcnt, *p++);
519             break;
520 
521         case stop_memory:
522             mcnt = *p++;
523             printf("/stop_memory/%d/%d", mcnt, *p++);
524             break;
525 
526         case duplicate:
527             printf("/duplicate/%d", *p++);
528             break;
529 
530         case anychar:
531             printf("/anychar");
532             break;
533 
534         case charset:
535         case charset_not: {
536             register int c;
537 
538             printf("/charset%s",
539                    (re_opcode_t) * (p - 1) == charset_not ? "_not" : "");
540 
541             assert(p + *p < pend);
542 
543             for (c = 0; c < *p; c++) {
544                 unsigned bit;
545                 unsigned char map_byte = p[1 + c];
546 
547                 putchar('/');
548 
549                 for (bit = 0; bit < BYTEWIDTH; bit++)
550                     if (map_byte & (1 << bit))
551                         printchar(c * BYTEWIDTH + bit);
552             }
553             p += 1 + *p;
554             break;
555         }
556 
557         case begline:
558             printf("/begline");
559             break;
560 
561         case endline:
562             printf("/endline");
563             break;
564 
565         case on_failure_jump:
566             extract_number_and_incr(&mcnt, &p);
567             printf("/on_failure_jump/0/%d", mcnt);
568             break;
569 
570         case on_failure_keep_string_jump:
571             extract_number_and_incr(&mcnt, &p);
572             printf("/on_failure_keep_string_jump/0/%d", mcnt);
573             break;
574 
575         case dummy_failure_jump:
576             extract_number_and_incr(&mcnt, &p);
577             printf("/dummy_failure_jump/0/%d", mcnt);
578             break;
579 
580         case push_dummy_failure:
581             printf("/push_dummy_failure");
582             break;
583 
584         case maybe_pop_jump:
585             extract_number_and_incr(&mcnt, &p);
586             printf("/maybe_pop_jump/0/%d", mcnt);
587             break;
588 
589         case pop_failure_jump:
590             extract_number_and_incr(&mcnt, &p);
591             printf("/pop_failure_jump/0/%d", mcnt);
592             break;
593 
594         case jump_past_alt:
595             extract_number_and_incr(&mcnt, &p);
596             printf("/jump_past_alt/0/%d", mcnt);
597             break;
598 
599         case jump:
600             extract_number_and_incr(&mcnt, &p);
601             printf("/jump/0/%d", mcnt);
602             break;
603 
604         case succeed_n:
605             extract_number_and_incr(&mcnt, &p);
606             extract_number_and_incr(&mcnt2, &p);
607             printf("/succeed_n/0/%d/0/%d", mcnt, mcnt2);
608             break;
609 
610         case jump_n:
611             extract_number_and_incr(&mcnt, &p);
612             extract_number_and_incr(&mcnt2, &p);
613             printf("/jump_n/0/%d/0/%d", mcnt, mcnt2);
614             break;
615 
616         case set_number_at:
617             extract_number_and_incr(&mcnt, &p);
618             extract_number_and_incr(&mcnt2, &p);
619             printf("/set_number_at/0/%d/0/%d", mcnt, mcnt2);
620             break;
621 
622         case wordbound:
623             printf("/wordbound");
624             break;
625 
626         case notwordbound:
627             printf("/notwordbound");
628             break;
629 
630         case wordbeg:
631             printf("/wordbeg");
632             break;
633 
634         case wordend:
635             printf("/wordend");
636 
637         case wordchar:
638             printf("/wordchar");
639             break;
640 
641         case notwordchar:
642             printf("/notwordchar");
643             break;
644 
645         case begbuf:
646             printf("/begbuf");
647             break;
648 
649         case endbuf:
650             printf("/endbuf");
651             break;
652 
653         default:
654             printf("?%d", *(p - 1));
655         }
656     }
657     printf("/\n");
658 }
659 
660 void
print_compiled_pattern(bufp)661 print_compiled_pattern(bufp)
662 struct re_pattern_buffer *bufp;
663 {
664     unsigned char *buffer = bufp->buffer;
665 
666     print_partial_compiled_pattern(buffer, buffer + bufp->used);
667     printf("%d bytes used/%d bytes allocated.\n", bufp->used, bufp->allocated);
668 
669     if (bufp->fastmap_accurate && bufp->fastmap) {
670         printf("fastmap: ");
671         print_fastmap(bufp->fastmap);
672     }
673     printf("re_nsub: %d\t", bufp->re_nsub);
674     printf("regs_alloc: %d\t", bufp->regs_allocated);
675     printf("can_be_null: %d\t", bufp->can_be_null);
676     printf("newline_anchor: %d\n", bufp->newline_anchor);
677     printf("no_sub: %d\t", bufp->no_sub);
678     printf("not_bol: %d\t", bufp->not_bol);
679     printf("not_eol: %d\t", bufp->not_eol);
680     printf("syntax: %d\n", bufp->syntax);
681     /* Perhaps we should print the translate table?  */
682 }
683 
684 void
print_double_string(where,string1,size1,string2,size2)685 print_double_string(where, string1, size1, string2, size2)
686 const char *where;
687 const char *string1;
688 const char *string2;
689 int size1;
690 int size2;
691 {
692     unsigned this_char;
693 
694     if (where == NULL)
695         printf("(null)");
696     else {
697         if (FIRST_STRING_P(where)) {
698             for (this_char = where - string1; this_char < size1; this_char++)
699                 printchar(string1[this_char]);
700 
701             where = string2;
702         }
703         for (this_char = where - string2; this_char < size2; this_char++)
704             printchar(string2[this_char]);
705     }
706 }
707 
708 #else /* not DEBUG */
709 
710 #undef assert
711 #define assert(e)
712 
713 #define DEBUG_STATEMENT(e)
714 #define DEBUG_PRINT1(x)
715 #define DEBUG_PRINT2(x1, x2)
716 #define DEBUG_PRINT3(x1, x2, x3)
717 #define DEBUG_PRINT4(x1, x2, x3, x4)
718 #define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
719 #define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
720 
721 #endif /* not DEBUG */
722 
723 /* This table gives an error message for each of the error codes listed
724  * in regex.h.  Obviously the order here has to be same as there.  */
725 
726 static const char *re_error_msg[] = {NULL,              /* REG_NOERROR */
727                                      "No match",            /* REG_NOMATCH */
728                                      "Invalid regular expression",  /* REG_BADPAT */
729                                      "Invalid collation character", /* REG_ECOLLATE */
730                                      "Invalid character class name",    /* REG_ECTYPE */
731                                      "Trailing backslash",  /* REG_EESCAPE */
732                                      "Invalid back reference",  /* REG_ESUBREG */
733                                      "Unmatched [ or [^",   /* REG_EBRACK */
734                                      "Unmatched ( or \\(",  /* REG_EPAREN */
735                                      "Unmatched \\{",       /* REG_EBRACE */
736                                      "Invalid content of \\{\\}",   /* REG_BADBR */
737                                      "Invalid range end",   /* REG_ERANGE */
738                                      "Memory exhausted",        /* REG_ESPACE */
739                                      "Invalid preceding regular expression",    /* REG_BADRPT */
740                                      "Premature end of regular expression", /* REG_EEND */
741                                      "Regular expression too big",  /* REG_ESIZE */
742                                      "Unmatched ) or \\)",  /* REG_ERPAREN */
743                                     };
744 
745 /* Subroutine declarations and macros for regex_compile.  */
746 
747 /* Fetch the next character in the uncompiled pattern---translating it
748  * if necessary.  Also cast from a signed character in the constant
749  * string passed to us by the user to an unsigned char that we can use
750  * as an array index (in, e.g., `translate').  */
751 #define PATFETCH(c)                         \
752   do {if (p == pend) return REG_EEND;                   \
753     c = (unsigned char) *p++;                       \
754     if (translate) c = translate[c];                    \
755   } while (0)
756 
757 /* Fetch the next character in the uncompiled pattern, with no
758  * translation.  */
759 #define PATFETCH_RAW(c)                         \
760   do {if (p == pend) return REG_EEND;                   \
761     c = (unsigned char) *p++;                       \
762   } while (0)
763 
764 /* Go backwards one character in the pattern.  */
765 #define PATUNFETCH p--
766 
767 /* If `translate' is non-null, return translate[D], else just D.  We
768  * cast the subscript to translate because some data is declared as
769  * `char *', to avoid warnings when a string constant is passed.  But
770  * when we use a character as a subscript we must make it unsigned.  */
771 #define TRANSLATE(d) (translate ? translate[(unsigned char) (d)] : (d))
772 
773 /* Macros for outputting the compiled pattern into `buffer'.  */
774 
775 /* If the buffer isn't allocated when it comes in, use this.  */
776 #define INIT_BUF_SIZE  32
777 
778 /* Make sure we have at least N more bytes of space in buffer.  */
779 #define GET_BUFFER_SPACE(n)                     \
780     while (b - bufp->buffer + (n) > bufp->allocated)            \
781       EXTEND_BUFFER ()
782 
783 /* Make sure we have one more byte of buffer space and then add C to it.  */
784 #define BUF_PUSH(c)                         \
785   do {                                  \
786     GET_BUFFER_SPACE (1);                       \
787     *b++ = (unsigned char) (c);                     \
788   } while (0)
789 
790 /* Ensure we have two more bytes of buffer space and then append C1 and C2.  */
791 #define BUF_PUSH_2(c1, c2)                      \
792   do {                                  \
793     GET_BUFFER_SPACE (2);                       \
794     *b++ = (unsigned char) (c1);                    \
795     *b++ = (unsigned char) (c2);                    \
796   } while (0)
797 
798 /* As with BUF_PUSH_2, except for three bytes.  */
799 #define BUF_PUSH_3(c1, c2, c3)                      \
800   do {                                  \
801     GET_BUFFER_SPACE (3);                       \
802     *b++ = (unsigned char) (c1);                    \
803     *b++ = (unsigned char) (c2);                    \
804     *b++ = (unsigned char) (c3);                    \
805   } while (0)
806 
807 /* Store a jump with opcode OP at LOC to location TO.  We store a
808  * relative address offset by the three bytes the jump itself occupies.  */
809 #define STORE_JUMP(op, loc, to) \
810   store_op1 (op, loc, (to) - (loc) - 3)
811 
812 /* Likewise, for a two-argument jump.  */
813 #define STORE_JUMP2(op, loc, to, arg) \
814   store_op2 (op, loc, (to) - (loc) - 3, arg)
815 
816 /* Like `STORE_JUMP', but for inserting.  Assume `b' is the buffer end.  */
817 #define INSERT_JUMP(op, loc, to) \
818   insert_op1 (op, loc, (to) - (loc) - 3, b)
819 
820 /* Like `STORE_JUMP2', but for inserting.  Assume `b' is the buffer end.  */
821 #define INSERT_JUMP2(op, loc, to, arg) \
822   insert_op2 (op, loc, (to) - (loc) - 3, arg, b)
823 
824 /* This is not an arbitrary limit: the arguments which represent offsets
825  * into the pattern are two bytes long.  So if 2^16 bytes turns out to
826  * be too small, many things would have to change.  */
827 #define MAX_BUF_SIZE (1L << 16)
828 
829 /* Extend the buffer by twice its current size via realloc and
830  * reset the pointers that pointed into the old block to point to the
831  * correct places in the new one.  If extending the buffer results in it
832  * being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
833 #define EXTEND_BUFFER()                         \
834   do {                                  \
835     unsigned char *old_buffer = bufp->buffer;               \
836     if (bufp->allocated == MAX_BUF_SIZE)                \
837       return REG_ESIZE;                         \
838     bufp->allocated <<= 1;                      \
839     if (bufp->allocated > MAX_BUF_SIZE)                 \
840       bufp->allocated = MAX_BUF_SIZE;                   \
841     bufp->buffer = (unsigned char *) realloc (bufp->buffer, bufp->allocated);\
842     if (bufp->buffer == NULL)                       \
843       return REG_ESPACE;                        \
844     /* If the buffer moved, move all the pointers into it.  */      \
845     if (old_buffer != bufp->buffer)                 \
846       {                                 \
847         b = (b - old_buffer) + bufp->buffer;                \
848         begalt = (begalt - old_buffer) + bufp->buffer;          \
849         if (fixup_alt_jump)                     \
850           fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
851         if (laststart)                          \
852           laststart = (laststart - old_buffer) + bufp->buffer;      \
853         if (pending_exact)                      \
854           pending_exact = (pending_exact - old_buffer) + bufp->buffer;  \
855       }                                 \
856   } while (0)
857 
858 /* Since we have one byte reserved for the register number argument to
859  * {start,stop}_memory, the maximum number of groups we can report
860  * things about is what fits in that byte.  */
861 #define MAX_REGNUM 255
862 
863 /* But patterns can have more than `MAX_REGNUM' registers.  We just
864  * ignore the excess.  */
865 typedef unsigned regnum_t;
866 
867 /* Macros for the compile stack.  */
868 
869 /* Since offsets can go either forwards or backwards, this type needs to
870  * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.  */
871 typedef int pattern_offset_t;
872 
873 typedef struct {
874     pattern_offset_t begalt_offset;
875     pattern_offset_t fixup_alt_jump;
876     pattern_offset_t inner_group_offset;
877     pattern_offset_t laststart_offset;
878     regnum_t regnum;
879 } compile_stack_elt_t;
880 
881 typedef struct {
882     compile_stack_elt_t *stack;
883     unsigned size;
884     unsigned avail;     /* Offset of next open position.  */
885 } compile_stack_type;
886 
887 static void store_op1(re_opcode_t op, unsigned char *loc, int arg);
888 static void store_op2( re_opcode_t op, unsigned char *loc, int arg1, int arg2);
889 static void insert_op1(re_opcode_t op, unsigned char *loc, int arg, unsigned char *end);
890 static void insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end);
891 static boolean at_begline_loc_p(const char * pattern, const char *p, reg_syntax_t syntax);
892 static boolean at_endline_loc_p(const char *p, const char *pend, int syntax);
893 static boolean group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum);
894 static reg_errcode_t compile_range(const char **p_ptr, const char *pend, char *translate, reg_syntax_t syntax, unsigned char *b);
895 
896 #define INIT_COMPILE_STACK_SIZE 32
897 
898 /* The next available element.  */
899 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
900 
901 /* Set the bit for character C in a list.  */
902 #define SET_LIST_BIT(c)                               \
903   (b[((unsigned char) (c)) / BYTEWIDTH]               \
904    |= 1 << (((unsigned char) c) % BYTEWIDTH))
905 
906 /* Get the next unsigned number in the uncompiled pattern.  */
907 #define GET_UNSIGNED_NUMBER(num)                    \
908   { if (p != pend)                          \
909      {                                  \
910        PATFETCH (c);                            \
911        while (ISDIGIT (c))                      \
912          {                              \
913            if (num < 0)                         \
914               num = 0;                          \
915            num = num * 10 + c - '0';                    \
916            if (p == pend)                       \
917               break;                            \
918            PATFETCH (c);                        \
919          }                              \
920        }                                \
921     }
922 
923 #define CHAR_CLASS_MAX_LENGTH  6    /* Namely, `xdigit'.  */
924 
925 #define IS_CHAR_CLASS(string)                       \
926    (STREQ (string, "alpha") || STREQ (string, "upper")          \
927     || STREQ (string, "lower") || STREQ (string, "digit")       \
928     || STREQ (string, "alnum") || STREQ (string, "xdigit")      \
929     || STREQ (string, "space") || STREQ (string, "print")       \
930     || STREQ (string, "punct") || STREQ (string, "graph")       \
931     || STREQ (string, "cntrl") || STREQ (string, "blank"))
932 
933 /* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
934  * Returns one of error codes defined in `regex.h', or zero for success.
935  *
936  * Assumes the `allocated' (and perhaps `buffer') and `translate'
937  * fields are set in BUFP on entry.
938  *
939  * If it succeeds, results are put in BUFP (if it returns an error, the
940  * contents of BUFP are undefined):
941  * `buffer' is the compiled pattern;
942  * `syntax' is set to SYNTAX;
943  * `used' is set to the length of the compiled pattern;
944  * `fastmap_accurate' is zero;
945  * `re_nsub' is the number of subexpressions in PATTERN;
946  * `not_bol' and `not_eol' are zero;
947  *
948  * The `fastmap' and `newline_anchor' fields are neither
949  * examined nor set.  */
950 
951 static reg_errcode_t
regex_compile(const char * pattern,int size,reg_syntax_t syntax,struct re_pattern_buffer * bufp)952 regex_compile(const char *pattern, int size, reg_syntax_t syntax, struct re_pattern_buffer *bufp)
953 {
954     /* We fetch characters from PATTERN here.  Even though PATTERN is
955      * `char *' (i.e., signed), we declare these variables as unsigned, so
956      * they can be reliably used as array indices.  */
957     register unsigned char c, c1;
958 
959     /* A random tempory spot in PATTERN.  */
960     const char *p1;
961 
962     /* Points to the end of the buffer, where we should append.  */
963     register unsigned char *b;
964 
965     /* Keeps track of unclosed groups.  */
966     compile_stack_type compile_stack;
967 
968     /* Points to the current (ending) position in the pattern.  */
969     const char *p = pattern;
970     const char *pend = pattern + size;
971 
972     /* How to translate the characters in the pattern.  */
973     char *translate = bufp->translate;
974 
975     /* Address of the count-byte of the most recently inserted `exactn'
976      * command.  This makes it possible to tell if a new exact-match
977      * character can be added to that command or if the character requires
978      * a new `exactn' command.  */
979     unsigned char *pending_exact = 0;
980 
981     /* Address of start of the most recently finished expression.
982      * This tells, e.g., postfix * where to find the start of its
983      * operand.  Reset at the beginning of groups and alternatives.  */
984     unsigned char *laststart = 0;
985 
986     /* Address of beginning of regexp, or inside of last group.  */
987     unsigned char *begalt;
988 
989     /* Place in the uncompiled pattern (i.e., the {) to
990      * which to go back if the interval is invalid.  */
991     const char *beg_interval;
992 
993     /* Address of the place where a forward jump should go to the end of
994      * the containing expression.  Each alternative of an `or' -- except the
995      * last -- ends with a forward jump of this sort.  */
996     unsigned char *fixup_alt_jump = 0;
997 
998     /* Counts open-groups as they are encountered.  Remembered for the
999      * matching close-group on the compile stack, so the same register
1000      * number is put in the stop_memory as the start_memory.  */
1001     regnum_t regnum = 0;
1002 
1003 #ifdef DEBUG
1004     DEBUG_PRINT1("\nCompiling pattern: ");
1005     if (debug) {
1006         unsigned debug_count;
1007 
1008         for (debug_count = 0; debug_count < size; debug_count++)
1009             printchar(pattern[debug_count]);
1010         putchar('\n');
1011     }
1012 #endif /* DEBUG */
1013 
1014     /* Initialize the compile stack.  */
1015     compile_stack.stack = TALLOC(INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
1016     if (compile_stack.stack == NULL)
1017         return REG_ESPACE;
1018 
1019     compile_stack.size = INIT_COMPILE_STACK_SIZE;
1020     compile_stack.avail = 0;
1021 
1022     /* Initialize the pattern buffer.  */
1023     bufp->syntax = syntax;
1024     bufp->fastmap_accurate = 0;
1025     bufp->not_bol = bufp->not_eol = 0;
1026 
1027     /* Set `used' to zero, so that if we return an error, the pattern
1028      * printer (for debugging) will think there's no pattern.  We reset it
1029      * at the end.  */
1030     bufp->used = 0;
1031 
1032     /* Always count groups, whether or not bufp->no_sub is set.  */
1033     bufp->re_nsub = 0;
1034 
1035 #if !defined (SYNTAX_TABLE)
1036     /* Initialize the syntax table.  */
1037     init_syntax_once();
1038 #endif
1039 
1040     if (bufp->allocated == 0) {
1041         if (bufp->buffer) {
1042             /* If zero allocated, but buffer is non-null, try to realloc
1043                      * enough space.  This loses if buffer's address is bogus, but
1044                      * that is the user's responsibility.  */
1045             RETALLOC(bufp->buffer, INIT_BUF_SIZE, unsigned char);
1046         } else {        /* Caller did not allocate a buffer.  Do it for them.  */
1047             bufp->buffer = TALLOC(INIT_BUF_SIZE, unsigned char);
1048         }
1049         if (!bufp->buffer)
1050             return REG_ESPACE;
1051 
1052         bufp->allocated = INIT_BUF_SIZE;
1053     }
1054     begalt = b = bufp->buffer;
1055 
1056     /* Loop through the uncompiled pattern until we're at the end.  */
1057     while (p != pend) {
1058         PATFETCH(c);
1059 
1060         switch (c) {
1061         case '^': {
1062             if (        /* If at start of pattern, it's an operator.  */
1063                 p == pattern + 1
1064                 /* If context independent, it's an operator.  */
1065                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1066                 /* Otherwise, depends on what's come before.  */
1067                 || at_begline_loc_p(pattern, p, syntax))
1068                 BUF_PUSH(begline);
1069             else
1070                 goto normal_char;
1071         }
1072         break;
1073 
1074         case '$': {
1075             if (        /* If at end of pattern, it's an operator.  */
1076                 p == pend
1077                 /* If context independent, it's an operator.  */
1078                 || syntax & RE_CONTEXT_INDEP_ANCHORS
1079                 /* Otherwise, depends on what's next.  */
1080                 || at_endline_loc_p(p, pend, syntax))
1081                 BUF_PUSH(endline);
1082             else
1083                 goto normal_char;
1084         }
1085         break;
1086 
1087         case '+':
1088         case '?':
1089             if ((syntax & RE_BK_PLUS_QM)
1090                     || (syntax & RE_LIMITED_OPS))
1091                 goto normal_char;
1092 handle_plus:
1093         case '*':
1094             /* If there is no previous pattern... */
1095             if (!laststart) {
1096                 if (syntax & RE_CONTEXT_INVALID_OPS)
1097                     return REG_BADRPT;
1098                 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
1099                     goto normal_char;
1100             } {
1101                 /* Are we optimizing this jump?  */
1102                 boolean keep_string_p = false;
1103 
1104                 /* 1 means zero (many) matches is allowed.  */
1105                 char zero_times_ok = 0, many_times_ok = 0;
1106 
1107                 /* If there is a sequence of repetition chars, collapse it
1108                  * down to just one (the right one).  We can't combine
1109                  * interval operators with these because of, e.g., `a{2}*',
1110                  * which should only match an even number of `a's.  */
1111 
1112                 for (;;) {
1113                     zero_times_ok |= c != '+';
1114                     many_times_ok |= c != '?';
1115 
1116                     if (p == pend)
1117                         break;
1118 
1119                     PATFETCH(c);
1120 
1121                     if (c == '*'
1122                             || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')));
1123 
1124                     else if (syntax & RE_BK_PLUS_QM && c == '\\') {
1125                         if (p == pend)
1126                             return REG_EESCAPE;
1127 
1128                         PATFETCH(c1);
1129                         if (!(c1 == '+' || c1 == '?')) {
1130                             PATUNFETCH;
1131                             PATUNFETCH;
1132                             break;
1133                         }
1134                         c = c1;
1135                     } else {
1136                         PATUNFETCH;
1137                         break;
1138                     }
1139 
1140                     /* If we get here, we found another repeat character.  */
1141                 }
1142 
1143                 /* Star, etc. applied to an empty pattern is equivalent
1144                  * to an empty pattern.  */
1145                 if (!laststart)
1146                     break;
1147 
1148                 /* Now we know whether or not zero matches is allowed
1149                  * and also whether or not two or more matches is allowed.  */
1150                 if (many_times_ok) {
1151                     /* More than one repetition is allowed, so put in at the
1152                          * end a backward relative jump from `b' to before the next
1153                          * jump we're going to put in below (which jumps from
1154                          * laststart to after this jump).
1155                          *
1156                          * But if we are at the `*' in the exact sequence `.*\n',
1157                          * insert an unconditional jump backwards to the .,
1158                          * instead of the beginning of the loop.  This way we only
1159                          * push a failure point once, instead of every time
1160                          * through the loop.  */
1161                     assert(p - 1 > pattern);
1162 
1163                     /* Allocate the space for the jump.  */
1164                     GET_BUFFER_SPACE(3);
1165 
1166                     /* We know we are not at the first character of the pattern,
1167                      * because laststart was nonzero.  And we've already
1168                      * incremented `p', by the way, to be the character after
1169                      * the `*'.  Do we have to do something analogous here
1170                      * for null bytes, because of RE_DOT_NOT_NULL?  */
1171                     if (TRANSLATE(*(p - 2)) == TRANSLATE('.')
1172                             && zero_times_ok
1173                             && p < pend && TRANSLATE(*p) == TRANSLATE('\n')
1174                             && !(syntax & RE_DOT_NEWLINE)) {    /* We have .*\n.  */
1175                         STORE_JUMP(jump, b, laststart);
1176                         keep_string_p = true;
1177                     } else
1178                         /* Anything else.  */
1179                         STORE_JUMP(maybe_pop_jump, b, laststart - 3);
1180 
1181                     /* We've added more stuff to the buffer.  */
1182                     b += 3;
1183                 }
1184                 /* On failure, jump from laststart to b + 3, which will be the
1185                  * end of the buffer after this jump is inserted.  */
1186                 GET_BUFFER_SPACE(3);
1187                 INSERT_JUMP(keep_string_p ? on_failure_keep_string_jump
1188                             : on_failure_jump,
1189                             laststart, b + 3);
1190                 pending_exact = 0;
1191                 b += 3;
1192 
1193                 if (!zero_times_ok) {
1194                     /* At least one repetition is required, so insert a
1195                      * `dummy_failure_jump' before the initial
1196                      * `on_failure_jump' instruction of the loop. This
1197                      * effects a skip over that instruction the first time
1198                      * we hit that loop.  */
1199                     GET_BUFFER_SPACE(3);
1200                     INSERT_JUMP(dummy_failure_jump, laststart, laststart + 6);
1201                     b += 3;
1202                 }
1203             }
1204             break;
1205 
1206         case '.':
1207             laststart = b;
1208             BUF_PUSH(anychar);
1209             break;
1210 
1211         case '[': {
1212             boolean had_char_class = false;
1213 
1214             if (p == pend)
1215                 return REG_EBRACK;
1216 
1217             /* Ensure that we have enough space to push a charset: the
1218              * opcode, the length count, and the bitset; 34 bytes in all.  */
1219             GET_BUFFER_SPACE(34);
1220 
1221             laststart = b;
1222 
1223             /* We test `*p == '^' twice, instead of using an if
1224              * statement, so we only need one BUF_PUSH.  */
1225             BUF_PUSH(*p == '^' ? charset_not : charset);
1226             if (*p == '^')
1227                 p++;
1228 
1229             /* Remember the first position in the bracket expression.  */
1230             p1 = p;
1231 
1232             /* Push the number of bytes in the bitmap.  */
1233             BUF_PUSH((1 << BYTEWIDTH) / BYTEWIDTH);
1234 
1235             /* Clear the whole map.  */
1236             memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH);
1237 
1238             /* charset_not matches newline according to a syntax bit.  */
1239             if ((re_opcode_t) b[-2] == charset_not
1240                     && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
1241                 SET_LIST_BIT('\n');
1242 
1243             /* Read in characters and ranges, setting map bits.  */
1244             for (;;) {
1245                 if (p == pend)
1246                     return REG_EBRACK;
1247 
1248                 PATFETCH(c);
1249 
1250                 /* \ might escape characters inside [...] and [^...].  */
1251                 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\') {
1252                     if (p == pend)
1253                         return REG_EESCAPE;
1254 
1255                     PATFETCH(c1);
1256                     SET_LIST_BIT(c1);
1257                     continue;
1258                 }
1259                 /* Could be the end of the bracket expression.  If it's
1260                  * not (i.e., when the bracket expression is `[]' so
1261                  * far), the ']' character bit gets set way below.  */
1262                 if (c == ']' && p != p1 + 1)
1263                     break;
1264 
1265                 /* Look ahead to see if it's a range when the last thing
1266                  * was a character class.  */
1267                 if (had_char_class && c == '-' && *p != ']')
1268                     return REG_ERANGE;
1269 
1270                 /* Look ahead to see if it's a range when the last thing
1271                  * was a character: if this is a hyphen not at the
1272                  * beginning or the end of a list, then it's the range
1273                  * operator.  */
1274                 if (c == '-'
1275                         && !(p - 2 >= pattern && p[-2] == '[')
1276                         && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
1277                         && *p != ']') {
1278                     reg_errcode_t ret
1279                         = compile_range(&p, pend, translate, syntax, b);
1280                     if (ret != REG_NOERROR)
1281                         return ret;
1282                 } else if (p[0] == '-' && p[1] != ']') {    /* This handles ranges made up of characters only.  */
1283                     reg_errcode_t ret;
1284 
1285                     /* Move past the `-'.  */
1286                     PATFETCH(c1);
1287 
1288                     ret = compile_range(&p, pend, translate, syntax, b);
1289                     if (ret != REG_NOERROR)
1290                         return ret;
1291                 }
1292                 /* See if we're at the beginning of a possible character
1293                  * class.  */
1294 
1295                 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':') {   /* Leave room for the null.  */
1296                     char str[CHAR_CLASS_MAX_LENGTH + 1];
1297 
1298                     PATFETCH(c);
1299                     c1 = 0;
1300 
1301                     /* If pattern is `[[:'.  */
1302                     if (p == pend)
1303                         return REG_EBRACK;
1304 
1305                     for (;;) {
1306                         PATFETCH(c);
1307                         if (c == ':' || c == ']' || p == pend
1308                                 || c1 == CHAR_CLASS_MAX_LENGTH)
1309                             break;
1310                         str[c1++] = c;
1311                     }
1312                     str[c1] = '\0';
1313 
1314                     /* If isn't a word bracketed by `[:' and:`]':
1315                      * undo the ending character, the letters, and leave
1316                      * the leading `:' and `[' (but set bits for them).  */
1317                     if (c == ':' && *p == ']') {
1318                         int ch;
1319                         boolean is_alnum = STREQ(str, "alnum");
1320                         boolean is_alpha = STREQ(str, "alpha");
1321                         boolean is_blank = STREQ(str, "blank");
1322                         boolean is_cntrl = STREQ(str, "cntrl");
1323                         boolean is_digit = STREQ(str, "digit");
1324                         boolean is_graph = STREQ(str, "graph");
1325                         boolean is_lower = STREQ(str, "lower");
1326                         boolean is_print = STREQ(str, "print");
1327                         boolean is_punct = STREQ(str, "punct");
1328                         boolean is_space = STREQ(str, "space");
1329                         boolean is_upper = STREQ(str, "upper");
1330                         boolean is_xdigit = STREQ(str, "xdigit");
1331 
1332                         if (!IS_CHAR_CLASS(str))
1333                             return REG_ECTYPE;
1334 
1335                         /* Throw away the ] at the end of the character
1336                          * class.  */
1337                         PATFETCH(c);
1338 
1339                         if (p == pend)
1340                             return REG_EBRACK;
1341 
1342                         for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
1343                             if ((is_alnum && ISALNUM(ch))
1344                                     || (is_alpha && ISALPHA(ch))
1345                                     || (is_blank && ISBLANK(ch))
1346                                     || (is_cntrl && ISCNTRL(ch))
1347                                     || (is_digit && ISDIGIT(ch))
1348                                     || (is_graph && ISGRAPH(ch))
1349                                     || (is_lower && ISLOWER(ch))
1350                                     || (is_print && ISPRINT(ch))
1351                                     || (is_punct && ISPUNCT(ch))
1352                                     || (is_space && ISSPACE(ch))
1353                                     || (is_upper && ISUPPER(ch))
1354                                     || (is_xdigit && ISXDIGIT(ch)))
1355                                 SET_LIST_BIT(ch);
1356                         }
1357                         had_char_class = true;
1358                     } else {
1359                         c1++;
1360                         while (c1--)
1361                             PATUNFETCH;
1362                         SET_LIST_BIT('[');
1363                         SET_LIST_BIT(':');
1364                         had_char_class = false;
1365                     }
1366                 } else {
1367                     had_char_class = false;
1368                     SET_LIST_BIT(c);
1369                 }
1370             }
1371 
1372             /* Discard any (non)matching list bytes that are all 0 at the
1373              * end of the map.  Decrease the map-length byte too.  */
1374             while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1375                 b[-1]--;
1376             b += b[-1];
1377         }
1378         break;
1379 
1380         case '(':
1381             if (syntax & RE_NO_BK_PARENS)
1382                 goto handle_open;
1383             else
1384                 goto normal_char;
1385 
1386         case ')':
1387             if (syntax & RE_NO_BK_PARENS)
1388                 goto handle_close;
1389             else
1390                 goto normal_char;
1391 
1392         case '\n':
1393             if (syntax & RE_NEWLINE_ALT)
1394                 goto handle_alt;
1395             else
1396                 goto normal_char;
1397 
1398         case '|':
1399             if (syntax & RE_NO_BK_VBAR)
1400                 goto handle_alt;
1401             else
1402                 goto normal_char;
1403 
1404         case '{':
1405             if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
1406                 goto handle_interval;
1407             else
1408                 goto normal_char;
1409 
1410         case '\\':
1411             if (p == pend)
1412                 return REG_EESCAPE;
1413 
1414             /* Do not translate the character after the \, so that we can
1415              * distinguish, e.g., \B from \b, even if we normally would
1416              * translate, e.g., B to b.  */
1417             PATFETCH_RAW(c);
1418 
1419             switch (c) {
1420             case '(':
1421                 if (syntax & RE_NO_BK_PARENS)
1422                     goto normal_backslash;
1423 
1424 handle_open:
1425                 bufp->re_nsub++;
1426                 regnum++;
1427 
1428                 if (compile_stack.avail == compile_stack.size) {
1429                     RETALLOC(compile_stack.stack, compile_stack.size << 1,
1430                              compile_stack_elt_t);
1431                     if (compile_stack.stack == NULL)
1432                         return REG_ESPACE;
1433 
1434                     compile_stack.size <<= 1;
1435                 }
1436                 /* These are the values to restore when we hit end of this
1437                  * group.  They are all relative offsets, so that if the
1438                  * whole pattern moves because of realloc, they will still
1439                  * be valid.  */
1440                 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
1441                 COMPILE_STACK_TOP.fixup_alt_jump
1442                     = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1443                 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
1444                 COMPILE_STACK_TOP.regnum = regnum;
1445 
1446                 /* We will eventually replace the 0 with the number of
1447                  * groups inner to this one.  But do not push a
1448                  * start_memory for groups beyond the last one we can
1449                  * represent in the compiled pattern.  */
1450                 if (regnum <= MAX_REGNUM) {
1451                     COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
1452                     BUF_PUSH_3(start_memory, regnum, 0);
1453                 }
1454                 compile_stack.avail++;
1455 
1456                 fixup_alt_jump = 0;
1457                 laststart = 0;
1458                 begalt = b;
1459                 /* If we've reached MAX_REGNUM groups, then this open
1460                  * won't actually generate any code, so we'll have to
1461                  * clear pending_exact explicitly.  */
1462                 pending_exact = 0;
1463                 break;
1464 
1465             case ')':
1466                 if (syntax & RE_NO_BK_PARENS)
1467                     goto normal_backslash;
1468 
1469                 if (compile_stack.avail == 0) {
1470                     if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1471                         goto normal_backslash;
1472                     else
1473                         return REG_ERPAREN;
1474                 }
1475 handle_close:
1476                 if (fixup_alt_jump) {
1477                     /* Push a dummy failure point at the end of the
1478                          * alternative for a possible future
1479                          * `pop_failure_jump' to pop.  See comments at
1480                          * `push_dummy_failure' in `re_match_2'.  */
1481                     BUF_PUSH(push_dummy_failure);
1482 
1483                     /* We allocated space for this jump when we assigned
1484                      * to `fixup_alt_jump', in the `handle_alt' case below.  */
1485                     STORE_JUMP(jump_past_alt, fixup_alt_jump, b - 1);
1486                 }
1487                 /* See similar code for backslashed left paren above.  */
1488                 if (compile_stack.avail == 0) {
1489                     if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
1490                         goto normal_char;
1491                     else
1492                         return REG_ERPAREN;
1493                 }
1494                 /* Since we just checked for an empty stack above, this
1495                  * ``can't happen''.  */
1496                 assert(compile_stack.avail != 0);
1497                 {
1498                     /* We don't just want to restore into `regnum', because
1499                      * later groups should continue to be numbered higher,
1500                      * as in `(ab)c(de)' -- the second group is #2.  */
1501                     regnum_t this_group_regnum;
1502 
1503                     compile_stack.avail--;
1504                     begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
1505                     fixup_alt_jump
1506                         = COMPILE_STACK_TOP.fixup_alt_jump
1507                           ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
1508                           : 0;
1509                     laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
1510                     this_group_regnum = COMPILE_STACK_TOP.regnum;
1511                     /* If we've reached MAX_REGNUM groups, then this open
1512                      * won't actually generate any code, so we'll have to
1513                      * clear pending_exact explicitly.  */
1514                     pending_exact = 0;
1515 
1516                     /* We're at the end of the group, so now we know how many
1517                      * groups were inside this one.  */
1518                     if (this_group_regnum <= MAX_REGNUM) {
1519                         unsigned char *inner_group_loc
1520                             = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
1521 
1522                         *inner_group_loc = regnum - this_group_regnum;
1523                         BUF_PUSH_3(stop_memory, this_group_regnum,
1524                                    regnum - this_group_regnum);
1525                     }
1526                 }
1527                 break;
1528 
1529             case '|':       /* `\|'.  */
1530                 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
1531                     goto normal_backslash;
1532 handle_alt:
1533                 if (syntax & RE_LIMITED_OPS)
1534                     goto normal_char;
1535 
1536                 /* Insert before the previous alternative a jump which
1537                  * jumps to this alternative if the former fails.  */
1538                 GET_BUFFER_SPACE(3);
1539                 INSERT_JUMP(on_failure_jump, begalt, b + 6);
1540                 pending_exact = 0;
1541                 b += 3;
1542 
1543                 /* The alternative before this one has a jump after it
1544                  * which gets executed if it gets matched.  Adjust that
1545                  * jump so it will jump to this alternative's analogous
1546                  * jump (put in below, which in turn will jump to the next
1547                  * (if any) alternative's such jump, etc.).  The last such
1548                  * jump jumps to the correct final destination.  A picture:
1549                  * _____ _____
1550                  * |   | |   |
1551                  * |   v |   v
1552                  * a | b   | c
1553                  *
1554                  * If we are at `b', then fixup_alt_jump right now points to a
1555                  * three-byte space after `a'.  We'll put in the jump, set
1556                  * fixup_alt_jump to right after `b', and leave behind three
1557                  * bytes which we'll fill in when we get to after `c'.  */
1558 
1559                 if (fixup_alt_jump)
1560                     STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
1561 
1562                 /* Mark and leave space for a jump after this alternative,
1563                  * to be filled in later either by next alternative or
1564                  * when know we're at the end of a series of alternatives.  */
1565                 fixup_alt_jump = b;
1566                 GET_BUFFER_SPACE(3);
1567                 b += 3;
1568 
1569                 laststart = 0;
1570                 begalt = b;
1571                 break;
1572 
1573             case '{':
1574                 /* If \{ is a literal.  */
1575                 if (!(syntax & RE_INTERVALS)
1576                         /* If we're at `\{' and it's not the open-interval
1577                          * operator.  */
1578                         || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
1579                         || (p - 2 == pattern && p == pend))
1580                     goto normal_backslash;
1581 
1582 handle_interval: {
1583                     /* If got here, then the syntax allows intervals.  */
1584 
1585                     /* At least (most) this many matches must be made.  */
1586                     int lower_bound = -1, upper_bound = -1;
1587 
1588                     beg_interval = p - 1;
1589 
1590                     if (p == pend) {
1591                         if (syntax & RE_NO_BK_BRACES)
1592                             goto unfetch_interval;
1593                         else
1594                             return REG_EBRACE;
1595                     }
1596                     GET_UNSIGNED_NUMBER(lower_bound);
1597 
1598                     if (c == ',') {
1599                         GET_UNSIGNED_NUMBER(upper_bound);
1600                         if (upper_bound < 0)
1601                             upper_bound = RE_DUP_MAX;
1602                     } else
1603                         /* Interval such as `{1}' => match exactly once. */
1604                         upper_bound = lower_bound;
1605 
1606                     if (lower_bound < 0 || upper_bound > RE_DUP_MAX
1607                             || lower_bound > upper_bound) {
1608                         if (syntax & RE_NO_BK_BRACES)
1609                             goto unfetch_interval;
1610                         else
1611                             return REG_BADBR;
1612                     }
1613                     if (!(syntax & RE_NO_BK_BRACES)) {
1614                         if (c != '\\')
1615                             return REG_EBRACE;
1616 
1617                         PATFETCH(c);
1618                     }
1619                     if (c != '}') {
1620                         if (syntax & RE_NO_BK_BRACES)
1621                             goto unfetch_interval;
1622                         else
1623                             return REG_BADBR;
1624                     }
1625                     /* We just parsed a valid interval.  */
1626 
1627                     /* If it's invalid to have no preceding re.  */
1628                     if (!laststart) {
1629                         if (syntax & RE_CONTEXT_INVALID_OPS)
1630                             return REG_BADRPT;
1631                         else if (syntax & RE_CONTEXT_INDEP_OPS)
1632                             laststart = b;
1633                         else
1634                             goto unfetch_interval;
1635                     }
1636                     /* If the upper bound is zero, don't want to succeed at
1637                      * all; jump from `laststart' to `b + 3', which will be
1638                      * the end of the buffer after we insert the jump.  */
1639                     if (upper_bound == 0) {
1640                         GET_BUFFER_SPACE(3);
1641                         INSERT_JUMP(jump, laststart, b + 3);
1642                         b += 3;
1643                     }
1644                     /* Otherwise, we have a nontrivial interval.  When
1645                      * we're all done, the pattern will look like:
1646                      * set_number_at <jump count> <upper bound>
1647                      * set_number_at <succeed_n count> <lower bound>
1648                      * succeed_n <after jump addr> <succed_n count>
1649                      * <body of loop>
1650                      * jump_n <succeed_n addr> <jump count>
1651                      * (The upper bound and `jump_n' are omitted if
1652                      * `upper_bound' is 1, though.)  */
1653                     else {
1654                         /* If the upper bound is > 1, we need to insert
1655                         * more at the end of the loop.  */
1656                         unsigned nbytes = 10 + (upper_bound > 1) * 10;
1657 
1658                         GET_BUFFER_SPACE(nbytes);
1659 
1660                         /* Initialize lower bound of the `succeed_n', even
1661                          * though it will be set during matching by its
1662                          * attendant `set_number_at' (inserted next),
1663                          * because `re_compile_fastmap' needs to know.
1664                          * Jump to the `jump_n' we might insert below.  */
1665                         INSERT_JUMP2(succeed_n, laststart,
1666                                      b + 5 + (upper_bound > 1) * 5,
1667                                      lower_bound);
1668                         b += 5;
1669 
1670                         /* Code to initialize the lower bound.  Insert
1671                          * before the `succeed_n'.  The `5' is the last two
1672                          * bytes of this `set_number_at', plus 3 bytes of
1673                          * the following `succeed_n'.  */
1674                         insert_op2(set_number_at, laststart, 5, lower_bound, b);
1675                         b += 5;
1676 
1677                         if (upper_bound > 1) {
1678                             /* More than one repetition is allowed, so
1679                              * append a backward jump to the `succeed_n'
1680                              * that starts this interval.
1681                              *
1682                              * When we've reached this during matching,
1683                              * we'll have matched the interval once, so
1684                              * jump back only `upper_bound - 1' times.  */
1685                             STORE_JUMP2(jump_n, b, laststart + 5,
1686                                         upper_bound - 1);
1687                             b += 5;
1688 
1689                             /* The location we want to set is the second
1690                              * parameter of the `jump_n'; that is `b-2' as
1691                              * an absolute address.  `laststart' will be
1692                              * the `set_number_at' we're about to insert;
1693                              * `laststart+3' the number to set, the source
1694                              * for the relative address.  But we are
1695                              * inserting into the middle of the pattern --
1696                              * so everything is getting moved up by 5.
1697                              * Conclusion: (b - 2) - (laststart + 3) + 5,
1698                              * i.e., b - laststart.
1699                              *
1700                              * We insert this at the beginning of the loop
1701                              * so that if we fail during matching, we'll
1702                              * reinitialize the bounds.  */
1703                             insert_op2(set_number_at, laststart, b - laststart,
1704                                        upper_bound - 1, b);
1705                             b += 5;
1706                         }
1707                     }
1708                     pending_exact = 0;
1709                     beg_interval = NULL;
1710                 }
1711                 break;
1712 
1713 unfetch_interval:
1714                 /* If an invalid interval, match the characters as literals.  */
1715                 assert(beg_interval);
1716                 p = beg_interval;
1717                 beg_interval = NULL;
1718 
1719                 /* normal_char and normal_backslash need `c'.  */
1720                 PATFETCH(c);
1721 
1722                 if (!(syntax & RE_NO_BK_BRACES)) {
1723                     if (p > pattern && p[-1] == '\\')
1724                         goto normal_backslash;
1725                 }
1726                 goto normal_char;
1727 
1728             case 'w':
1729                 laststart = b;
1730                 BUF_PUSH(wordchar);
1731                 break;
1732 
1733             case 'W':
1734                 laststart = b;
1735                 BUF_PUSH(notwordchar);
1736                 break;
1737 
1738             case '<':
1739                 BUF_PUSH(wordbeg);
1740                 break;
1741 
1742             case '>':
1743                 BUF_PUSH(wordend);
1744                 break;
1745 
1746             case 'b':
1747                 BUF_PUSH(wordbound);
1748                 break;
1749 
1750             case 'B':
1751                 BUF_PUSH(notwordbound);
1752                 break;
1753 
1754             case '`':
1755                 BUF_PUSH(begbuf);
1756                 break;
1757 
1758             case '\'':
1759                 BUF_PUSH(endbuf);
1760                 break;
1761 
1762             case '1':
1763             case '2':
1764             case '3':
1765             case '4':
1766             case '5':
1767             case '6':
1768             case '7':
1769             case '8':
1770             case '9':
1771                 if (syntax & RE_NO_BK_REFS)
1772                     goto normal_char;
1773 
1774                 c1 = c - '0';
1775 
1776                 if (c1 > regnum)
1777                     return REG_ESUBREG;
1778 
1779                 /* Can't back reference to a subexpression if inside of it.  */
1780                 if (group_in_compile_stack(compile_stack, c1))
1781                     goto normal_char;
1782 
1783                 laststart = b;
1784                 BUF_PUSH_2(duplicate, c1);
1785                 break;
1786 
1787             case '+':
1788             case '?':
1789                 if (syntax & RE_BK_PLUS_QM)
1790                     goto handle_plus;
1791                 else
1792                     goto normal_backslash;
1793 
1794             default:
1795 normal_backslash:
1796                 /* You might think it would be useful for \ to mean
1797                  * not to translate; but if we don't translate it
1798                  * it will never match anything.  */
1799                 c = TRANSLATE(c);
1800                 goto normal_char;
1801             }
1802             break;
1803 
1804         default:
1805             /* Expects the character in `c'.  */
1806 normal_char:
1807             /* If no exactn currently being built.  */
1808             if (!pending_exact
1809 
1810                     /* If last exactn not at current position.  */
1811                     || pending_exact + *pending_exact + 1 != b
1812 
1813                     /* We have only one byte following the exactn for the count.  */
1814                     || *pending_exact == (1 << BYTEWIDTH) - 1
1815 
1816                     /* If followed by a repetition operator.  */
1817                     || *p == '*' || *p == '^'
1818                     || ((syntax & RE_BK_PLUS_QM)
1819                         ? *p == '\\' && (p[1] == '+' || p[1] == '?')
1820                         : (*p == '+' || *p == '?'))
1821                     || ((syntax & RE_INTERVALS)
1822                         && ((syntax & RE_NO_BK_BRACES)
1823                             ? *p == '{'
1824                             : (p[0] == '\\' && p[1] == '{')))) {
1825                 /* Start building a new exactn.  */
1826 
1827                 laststart = b;
1828 
1829                 BUF_PUSH_2(exactn, 0);
1830                 pending_exact = b - 1;
1831             }
1832             BUF_PUSH(c);
1833             (*pending_exact)++;
1834             break;
1835         }           /* switch (c) */
1836     }               /* while p != pend */
1837 
1838     /* Through the pattern now.  */
1839 
1840     if (fixup_alt_jump)
1841         STORE_JUMP(jump_past_alt, fixup_alt_jump, b);
1842 
1843     if (compile_stack.avail != 0)
1844         return REG_EPAREN;
1845 
1846     free(compile_stack.stack);
1847 
1848     /* We have succeeded; set the length of the buffer.  */
1849     bufp->used = b - bufp->buffer;
1850 
1851 #ifdef DEBUG
1852     if (debug) {
1853         DEBUG_PRINT1("\nCompiled pattern: ");
1854         print_compiled_pattern(bufp);
1855     }
1856 #endif /* DEBUG */
1857 
1858     return REG_NOERROR;
1859 }               /* regex_compile */
1860 
1861 /* Subroutines for `regex_compile'.  */
1862 
1863 /* Store OP at LOC followed by two-byte integer parameter ARG.  */
1864 
store_op1(re_opcode_t op,unsigned char * loc,int arg)1865 void store_op1(re_opcode_t op, unsigned char *loc, int arg)
1866 {
1867     *loc = (unsigned char) op;
1868     STORE_NUMBER(loc + 1, arg);
1869 }
1870 
1871 /* Like `store_op1', but for two two-byte parameters ARG1 and ARG2.  */
1872 
1873 void
store_op2(re_opcode_t op,unsigned char * loc,int arg1,int arg2)1874 store_op2( re_opcode_t op, unsigned char *loc, int arg1, int arg2)
1875 {
1876     *loc = (unsigned char) op;
1877     STORE_NUMBER(loc + 1, arg1);
1878     STORE_NUMBER(loc + 3, arg2);
1879 }
1880 
1881 /* Copy the bytes from LOC to END to open up three bytes of space at LOC
1882  * for OP followed by two-byte integer parameter ARG.  */
1883 
1884 void
insert_op1(re_opcode_t op,unsigned char * loc,int arg,unsigned char * end)1885 insert_op1(re_opcode_t op, unsigned char *loc, int arg, unsigned char *end)
1886 {
1887     register unsigned char *pfrom = end;
1888     register unsigned char *pto = end + 3;
1889 
1890     while (pfrom != loc)
1891         *--pto = *--pfrom;
1892 
1893     store_op1(op, loc, arg);
1894 }
1895 
1896 /* Like `insert_op1', but for two two-byte parameters ARG1 and ARG2.  */
1897 
1898 void
insert_op2(re_opcode_t op,unsigned char * loc,int arg1,int arg2,unsigned char * end)1899 insert_op2(re_opcode_t op, unsigned char *loc, int arg1, int arg2, unsigned char *end)
1900 {
1901     register unsigned char *pfrom = end;
1902     register unsigned char *pto = end + 5;
1903 
1904     while (pfrom != loc)
1905         *--pto = *--pfrom;
1906 
1907     store_op2(op, loc, arg1, arg2);
1908 }
1909 
1910 /* P points to just after a ^ in PATTERN.  Return true if that ^ comes
1911  * after an alternative or a begin-subexpression.  We assume there is at
1912  * least one character before the ^.  */
1913 
1914 boolean
at_begline_loc_p(const char * pattern,const char * p,reg_syntax_t syntax)1915 at_begline_loc_p(const char * pattern, const char *p, reg_syntax_t syntax)
1916 {
1917     const char *prev = p - 2;
1918     boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
1919 
1920     return
1921         /* After a subexpression?  */
1922         (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
1923         /* After an alternative?  */
1924         || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
1925 }
1926 
1927 /* The dual of at_begline_loc_p.  This one is for $.  We assume there is
1928  * at least one character after the $, i.e., `P < PEND'.  */
1929 
1930 boolean
at_endline_loc_p(const char * p,const char * pend,int syntax)1931 at_endline_loc_p(const char *p, const char *pend, int syntax)
1932 {
1933     const char *next = p;
1934     boolean next_backslash = *next == '\\';
1935     const char *next_next = p + 1 < pend ? p + 1 : NULL;
1936 
1937     return
1938         /* Before a subexpression?  */
1939         (syntax & RE_NO_BK_PARENS ? *next == ')'
1940          : next_backslash && next_next && *next_next == ')')
1941         /* Before an alternative?  */
1942         || (syntax & RE_NO_BK_VBAR ? *next == '|'
1943             : next_backslash && next_next && *next_next == '|');
1944 }
1945 
1946 /* Returns true if REGNUM is in one of COMPILE_STACK's elements and
1947  * false if it's not.  */
1948 
1949 boolean
group_in_compile_stack(compile_stack_type compile_stack,regnum_t regnum)1950 group_in_compile_stack(compile_stack_type compile_stack, regnum_t regnum)
1951 {
1952     int this_element;
1953 
1954     for (this_element = compile_stack.avail - 1;
1955             this_element >= 0;
1956             this_element--)
1957         if (compile_stack.stack[this_element].regnum == regnum)
1958             return true;
1959 
1960     return false;
1961 }
1962 
1963 /* Read the ending character of a range (in a bracket expression) from the
1964  * uncompiled pattern *P_PTR (which ends at PEND).  We assume the
1965  * starting character is in `P[-2]'.  (`P[-1]' is the character `-'.)
1966  * Then we set the translation of all bits between the starting and
1967  * ending characters (inclusive) in the compiled pattern B.
1968  *
1969  * Return an error code.
1970  *
1971  * We use these short variable names so we can use the same macros as
1972  * `regex_compile' itself.  */
1973 
1974 reg_errcode_t
compile_range(const char ** p_ptr,const char * pend,char * translate,reg_syntax_t syntax,unsigned char * b)1975 compile_range(const char **p_ptr, const char *pend, char *translate, reg_syntax_t syntax, unsigned char *b)
1976 {
1977     unsigned this_char;
1978 
1979     const char *p = *p_ptr;
1980     int range_start, range_end;
1981 
1982     if (p == pend)
1983         return REG_ERANGE;
1984 
1985     /* Even though the pattern is a signed `char *', we need to fetch
1986      * with unsigned char *'s; if the high bit of the pattern character
1987      * is set, the range endpoints will be negative if we fetch using a
1988      * signed char *.
1989      *
1990      * We also want to fetch the endpoints without translating them; the
1991      * appropriate translation is done in the bit-setting loop below.  */
1992     range_start = ((unsigned char *) p)[-2];
1993     range_end = ((unsigned char *) p)[0];
1994 
1995     /* Have to increment the pointer into the pattern string, so the
1996      * caller isn't still at the ending character.  */
1997     (*p_ptr)++;
1998 
1999     /* If the start is after the end, the range is empty.  */
2000     if (range_start > range_end)
2001         return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
2002 
2003     /* Here we see why `this_char' has to be larger than an `unsigned
2004      * char' -- the range is inclusive, so if `range_end' == 0xff
2005      * (assuming 8-bit characters), we would otherwise go into an infinite
2006      * loop, since all characters <= 0xff.  */
2007     for (this_char = range_start; this_char <= range_end; this_char++) {
2008         SET_LIST_BIT(TRANSLATE(this_char));
2009     }
2010 
2011     return REG_NOERROR;
2012 }
2013 
2014 /* Failure stack declarations and macros; both re_compile_fastmap and
2015  * re_match_2 use a failure stack.  These have to be macros because of
2016  * REGEX_ALLOCATE.  */
2017 
2018 /* Number of failure points for which to initially allocate space
2019  * when matching.  If this number is exceeded, we allocate more
2020  * space, so it is not a hard limit.  */
2021 #ifndef INIT_FAILURE_ALLOC
2022 #define INIT_FAILURE_ALLOC 5
2023 #endif
2024 
2025 /* Roughly the maximum number of failure points on the stack.  Would be
2026  * exactly that if always used MAX_FAILURE_SPACE each time we failed.
2027  * This is a variable only so users of regex can assign to it; we never
2028  * change it ourselves.  */
2029 int re_max_failures = 2000;
2030 
2031 typedef const unsigned char *fail_stack_elt_t;
2032 
2033 typedef struct {
2034     fail_stack_elt_t *stack;
2035     unsigned size;
2036     unsigned avail;     /* Offset of next open position.  */
2037 } fail_stack_type;
2038 
2039 #define FAIL_STACK_EMPTY()     (fail_stack.avail == 0)
2040 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
2041 #define FAIL_STACK_FULL()      (fail_stack.avail == fail_stack.size)
2042 #define FAIL_STACK_TOP()       (fail_stack.stack[fail_stack.avail])
2043 
2044 /* Initialize `fail_stack'.  Do `return -2' if the alloc fails.  */
2045 
2046 #define INIT_FAIL_STACK()                       \
2047   do {                                  \
2048     fail_stack.stack = (fail_stack_elt_t *)             \
2049       REGEX_ALLOCATE (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t));  \
2050                                     \
2051     if (fail_stack.stack == NULL)                   \
2052       return -2;                            \
2053                                     \
2054     fail_stack.size = INIT_FAILURE_ALLOC;               \
2055     fail_stack.avail = 0;                       \
2056   } while (0)
2057 
2058 /* Double the size of FAIL_STACK, up to approximately `re_max_failures' items.
2059  *
2060  * Return 1 if succeeds, and 0 if either ran out of memory
2061  * allocating space for it or it was already too large.
2062  *
2063  * REGEX_REALLOCATE requires `destination' be declared.   */
2064 
2065 #define DOUBLE_FAIL_STACK(fail_stack)                   \
2066   ((fail_stack).size > re_max_failures * MAX_FAILURE_ITEMS      \
2067    ? 0                                  \
2068    : ((fail_stack).stack = (fail_stack_elt_t *)             \
2069         REGEX_REALLOCATE ((fail_stack).stack,               \
2070           (fail_stack).size * sizeof (fail_stack_elt_t),        \
2071           ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)),    \
2072                                     \
2073       (fail_stack).stack == NULL                    \
2074       ? 0                               \
2075       : ((fail_stack).size <<= 1,                   \
2076          1)))
2077 
2078 /* Push PATTERN_OP on FAIL_STACK.
2079  *
2080  * Return 1 if was able to do so and 0 if ran out of memory allocating
2081  * space to do so.  */
2082 #define PUSH_PATTERN_OP(pattern_op, fail_stack)             \
2083   ((FAIL_STACK_FULL ()                          \
2084     && !DOUBLE_FAIL_STACK (fail_stack))                 \
2085     ? 0                                 \
2086     : ((fail_stack).stack[(fail_stack).avail++] = pattern_op,       \
2087        1))
2088 
2089 /* This pushes an item onto the failure stack.  Must be a four-byte
2090  * value.  Assumes the variable `fail_stack'.  Probably should only
2091  * be called from within `PUSH_FAILURE_POINT'.  */
2092 #define PUSH_FAILURE_ITEM(item)                     \
2093   fail_stack.stack[fail_stack.avail++] = (fail_stack_elt_t) item
2094 
2095 /* The complement operation.  Assumes `fail_stack' is nonempty.  */
2096 #define POP_FAILURE_ITEM() fail_stack.stack[--fail_stack.avail]
2097 
2098 /* Used to omit pushing failure point id's when we're not debugging.  */
2099 #ifdef DEBUG
2100 #define DEBUG_PUSH PUSH_FAILURE_ITEM
2101 #define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_ITEM ()
2102 #else
2103 #define DEBUG_PUSH(item)
2104 #define DEBUG_POP(item_addr)
2105 #endif
2106 
2107 /* Push the information about the state we will need
2108  * if we ever fail back to it.
2109  *
2110  * Requires variables fail_stack, regstart, regend, reg_info, and
2111  * num_regs be declared.  DOUBLE_FAIL_STACK requires `destination' be
2112  * declared.
2113  *
2114  * Does `return FAILURE_CODE' if runs out of memory.  */
2115 
2116 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code)   \
2117   do {                                  \
2118     char *destination;                          \
2119     /* Must be int, so when we don't save any registers, the arithmetic \
2120        of 0 + -1 isn't done as unsigned.  */                \
2121     int this_reg;                           \
2122                                         \
2123     DEBUG_STATEMENT (failure_id++);                 \
2124     DEBUG_STATEMENT (nfailure_points_pushed++);             \
2125     DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id);       \
2126     DEBUG_PRINT2 ("  Before push, next avail: %d\n", (fail_stack).avail);\
2127     DEBUG_PRINT2 ("                     size: %d\n", (fail_stack).size);\
2128                                     \
2129     DEBUG_PRINT2 ("  slots needed: %d\n", NUM_FAILURE_ITEMS);       \
2130     DEBUG_PRINT2 ("     available: %d\n", REMAINING_AVAIL_SLOTS);   \
2131                                     \
2132     /* Ensure we have enough space allocated for what we will push.  */ \
2133     while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS)           \
2134       {                                 \
2135         if (!DOUBLE_FAIL_STACK (fail_stack))            \
2136           return failure_code;                      \
2137                                     \
2138         DEBUG_PRINT2 ("\n  Doubled stack; size now: %d\n",      \
2139                (fail_stack).size);              \
2140         DEBUG_PRINT2 ("  slots available: %d\n", REMAINING_AVAIL_SLOTS);\
2141       }                                 \
2142                                     \
2143     /* Push the info, starting with the registers.  */          \
2144     DEBUG_PRINT1 ("\n");                        \
2145                                     \
2146     for (this_reg = lowest_active_reg; this_reg <= highest_active_reg;  \
2147          this_reg++)                            \
2148       {                                 \
2149     DEBUG_PRINT2 ("  Pushing reg: %d\n", this_reg);         \
2150         DEBUG_STATEMENT (num_regs_pushed++);                \
2151                                     \
2152     DEBUG_PRINT2 ("    start: 0x%x\n", regstart[this_reg]);     \
2153         PUSH_FAILURE_ITEM (regstart[this_reg]);             \
2154                                                                         \
2155     DEBUG_PRINT2 ("    end: 0x%x\n", regend[this_reg]);     \
2156         PUSH_FAILURE_ITEM (regend[this_reg]);               \
2157                                     \
2158     DEBUG_PRINT2 ("    info: 0x%x\n      ", reg_info[this_reg]);    \
2159         DEBUG_PRINT2 (" match_null=%d",                 \
2160                       REG_MATCH_NULL_STRING_P (reg_info[this_reg]));    \
2161         DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg]));    \
2162         DEBUG_PRINT2 (" matched_something=%d",              \
2163                       MATCHED_SOMETHING (reg_info[this_reg]));      \
2164         DEBUG_PRINT2 (" ever_matched=%d",               \
2165                       EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
2166     DEBUG_PRINT1 ("\n");                        \
2167         PUSH_FAILURE_ITEM (reg_info[this_reg].word);            \
2168       }                                 \
2169                                     \
2170     DEBUG_PRINT2 ("  Pushing  low active reg: %d\n", lowest_active_reg);\
2171     PUSH_FAILURE_ITEM (lowest_active_reg);              \
2172                                     \
2173     DEBUG_PRINT2 ("  Pushing high active reg: %d\n", highest_active_reg);\
2174     PUSH_FAILURE_ITEM (highest_active_reg);             \
2175                                     \
2176     DEBUG_PRINT2 ("  Pushing pattern 0x%x: ", pattern_place);       \
2177     DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend);       \
2178     PUSH_FAILURE_ITEM (pattern_place);                  \
2179                                     \
2180     DEBUG_PRINT2 ("  Pushing string 0x%x: `", string_place);        \
2181     DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2,   \
2182                  size2);                \
2183     DEBUG_PRINT1 ("'\n");                       \
2184     PUSH_FAILURE_ITEM (string_place);                   \
2185                                     \
2186     DEBUG_PRINT2 ("  Pushing failure id: %u\n", failure_id);        \
2187     DEBUG_PUSH (failure_id);                        \
2188   } while (0)
2189 
2190 /* This is the number of items that are pushed and popped on the stack
2191  * for each register.  */
2192 #define NUM_REG_ITEMS  3
2193 
2194 /* Individual items aside from the registers.  */
2195 #ifdef DEBUG
2196 #define NUM_NONREG_ITEMS 5  /* Includes failure point id.  */
2197 #else
2198 #define NUM_NONREG_ITEMS 4
2199 #endif
2200 
2201 /* We push at most this many items on the stack.  */
2202 #define MAX_FAILURE_ITEMS ((num_regs - 1) * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2203 
2204 /* We actually push this many items.  */
2205 #define NUM_FAILURE_ITEMS                       \
2206   ((highest_active_reg - lowest_active_reg + 1) * NUM_REG_ITEMS     \
2207     + NUM_NONREG_ITEMS)
2208 
2209 /* How many items can still be added to the stack without overflowing it.  */
2210 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
2211 
2212 /* Pops what PUSH_FAIL_STACK pushes.
2213  *
2214  * We restore into the parameters, all of which should be lvalues:
2215  * STR -- the saved data position.
2216  * PAT -- the saved pattern position.
2217  * LOW_REG, HIGH_REG -- the highest and lowest active registers.
2218  * REGSTART, REGEND -- arrays of string positions.
2219  * REG_INFO -- array of information about each subexpression.
2220  *
2221  * Also assumes the variables `fail_stack' and (if debugging), `bufp',
2222  * `pend', `string1', `size1', `string2', and `size2'.  */
2223 
2224 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
2225 {                                   \
2226   DEBUG_STATEMENT (fail_stack_elt_t failure_id;)            \
2227   int this_reg;                             \
2228   const unsigned char *string_temp;                 \
2229                                     \
2230   assert (!FAIL_STACK_EMPTY ());                    \
2231                                     \
2232   /* Remove failure points and point to how many regs pushed.  */   \
2233   DEBUG_PRINT1 ("POP_FAILURE_POINT:\n");                \
2234   DEBUG_PRINT2 ("  Before pop, next avail: %d\n", fail_stack.avail);    \
2235   DEBUG_PRINT2 ("                    size: %d\n", fail_stack.size); \
2236                                     \
2237   assert (fail_stack.avail >= NUM_NONREG_ITEMS);            \
2238                                     \
2239   DEBUG_POP (&failure_id);                      \
2240   DEBUG_PRINT2 ("  Popping failure id: %u\n", failure_id);      \
2241                                     \
2242   /* If the saved string location is NULL, it came from an      \
2243      on_failure_keep_string_jump opcode, and we want to throw away the  \
2244      saved NULL, thus retaining our current position in the string.  */ \
2245   string_temp = POP_FAILURE_ITEM ();                    \
2246   if (string_temp != NULL)                      \
2247     str = (const char *) string_temp;                   \
2248                                     \
2249   DEBUG_PRINT2 ("  Popping string 0x%x: `", str);           \
2250   DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2);  \
2251   DEBUG_PRINT1 ("'\n");                         \
2252                                     \
2253   pat = (unsigned char *) POP_FAILURE_ITEM ();              \
2254   DEBUG_PRINT2 ("  Popping pattern 0x%x: ", pat);           \
2255   DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend);           \
2256                                     \
2257   /* Restore register info.  */                     \
2258   high_reg = (unsigned long) POP_FAILURE_ITEM ();           \
2259   DEBUG_PRINT2 ("  Popping high active reg: %d\n", high_reg);       \
2260                                     \
2261   low_reg = (unsigned long) POP_FAILURE_ITEM ();            \
2262   DEBUG_PRINT2 ("  Popping  low active reg: %d\n", low_reg);        \
2263                                     \
2264   for (this_reg = high_reg; this_reg >= low_reg; this_reg--)        \
2265     {                                   \
2266       DEBUG_PRINT2 ("    Popping reg: %d\n", this_reg);         \
2267                                     \
2268       reg_info[this_reg].word = POP_FAILURE_ITEM ();            \
2269       DEBUG_PRINT2 ("      info: 0x%x\n", reg_info[this_reg]);      \
2270                                     \
2271       regend[this_reg] = (const char *) POP_FAILURE_ITEM ();        \
2272       DEBUG_PRINT2 ("      end: 0x%x\n", regend[this_reg]);     \
2273                                     \
2274       regstart[this_reg] = (const char *) POP_FAILURE_ITEM ();      \
2275       DEBUG_PRINT2 ("      start: 0x%x\n", regstart[this_reg]);     \
2276     }                                   \
2277                                     \
2278   DEBUG_STATEMENT (nfailure_points_popped++);               \
2279 }               /* POP_FAILURE_POINT */
2280 
2281 /* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
2282  * BUFP.  A fastmap records which of the (1 << BYTEWIDTH) possible
2283  * characters can start a string that matches the pattern.  This fastmap
2284  * is used by re_search to skip quickly over impossible starting points.
2285  *
2286  * The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2287  * area as BUFP->fastmap.
2288  *
2289  * We set the `fastmap', `fastmap_accurate', and `can_be_null' fields in
2290  * the pattern buffer.
2291  *
2292  * Returns 0 if we succeed, -2 if an internal error.   */
2293 #ifdef STDC_HEADERS
2294 int
re_compile_fastmap(struct re_pattern_buffer * bufp)2295 re_compile_fastmap(struct re_pattern_buffer *bufp)
2296 #else
2297 int
2298 re_compile_fastmap(bufp)
2299 struct re_pattern_buffer *bufp;
2300 #endif
2301 {
2302     int j, k;
2303     fail_stack_type fail_stack;
2304 #ifndef REGEX_MALLOC
2305     char *destination;
2306 #endif
2307     /* We don't push any register information onto the failure stack.  */
2308     unsigned num_regs = 0;
2309 
2310     register char *fastmap = bufp->fastmap;
2311     unsigned char *pattern = bufp->buffer;
2312     unsigned long size = bufp->used;
2313     const unsigned char *p = pattern;
2314     register unsigned char *pend = pattern + size;
2315 
2316     /* Assume that each path through the pattern can be null until
2317      * proven otherwise.  We set this false at the bottom of switch
2318      * statement, to which we get only if a particular path doesn't
2319      * match the empty string.  */
2320     boolean path_can_be_null = true;
2321 
2322     /* We aren't doing a `succeed_n' to begin with.  */
2323     boolean succeed_n_p = false;
2324 
2325     assert(fastmap != NULL && p != NULL);
2326 
2327     INIT_FAIL_STACK();
2328     memset(fastmap, 0, 1 << BYTEWIDTH);     /* Assume nothing's valid.  */
2329     bufp->fastmap_accurate = 1; /* It will be when we're done.  */
2330     bufp->can_be_null = 0;
2331 
2332     while (p != pend || !FAIL_STACK_EMPTY()) {
2333         if (p == pend) {
2334             bufp->can_be_null |= path_can_be_null;
2335 
2336             /* Reset for next path.  */
2337             path_can_be_null = true;
2338 
2339             p = fail_stack.stack[--fail_stack.avail];
2340         }
2341         /* We should never be about to go beyond the end of the pattern.  */
2342         assert(p < pend);
2343 
2344 #ifdef SWITCH_ENUM_BUG
2345         switch ((int) ((re_opcode_t) * p++))
2346 #else
2347         switch ((re_opcode_t) * p++)
2348 #endif
2349         {
2350 
2351         /* I guess the idea here is to simply not bother with a fastmap
2352          * if a backreference is used, since it's too hard to figure out
2353          * the fastmap for the corresponding group.  Setting
2354          * `can_be_null' stops `re_search_2' from using the fastmap, so
2355          * that is all we do.  */
2356         case duplicate:
2357             bufp->can_be_null = 1;
2358             return 0;
2359 
2360         /* Following are the cases which match a character.  These end
2361          * with `break'.  */
2362 
2363         case exactn:
2364             fastmap[p[1]] = 1;
2365             break;
2366 
2367         case charset:
2368             for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2369                 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2370                     fastmap[j] = 1;
2371             break;
2372 
2373         case charset_not:
2374             /* Chars beyond end of map must be allowed.  */
2375             for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2376                 fastmap[j] = 1;
2377 
2378             for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2379                 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2380                     fastmap[j] = 1;
2381             break;
2382 
2383         case wordchar:
2384             for (j = 0; j < (1 << BYTEWIDTH); j++)
2385                 if (re_syntax_table[j] == Sword)
2386                     fastmap[j] = 1;
2387             break;
2388 
2389         case notwordchar:
2390             for (j = 0; j < (1 << BYTEWIDTH); j++)
2391                 if (re_syntax_table[j] != Sword)
2392                     fastmap[j] = 1;
2393             break;
2394 
2395         case anychar:
2396             /* `.' matches anything ...  */
2397             for (j = 0; j < (1 << BYTEWIDTH); j++)
2398                 fastmap[j] = 1;
2399 
2400             /* ... except perhaps newline.  */
2401             if (!(bufp->syntax & RE_DOT_NEWLINE))
2402                 fastmap['\n'] = 0;
2403 
2404             /* Return if we have already set `can_be_null'; if we have,
2405              * then the fastmap is irrelevant.  Something's wrong here.  */
2406             else if (bufp->can_be_null)
2407                 return 0;
2408 
2409             /* Otherwise, have to check alternative paths.  */
2410             break;
2411 
2412         case no_op:
2413         case begline:
2414         case endline:
2415         case begbuf:
2416         case endbuf:
2417         case wordbound:
2418         case notwordbound:
2419         case wordbeg:
2420         case wordend:
2421         case push_dummy_failure:
2422             continue;
2423 
2424         case jump_n:
2425         case pop_failure_jump:
2426         case maybe_pop_jump:
2427         case jump:
2428         case jump_past_alt:
2429         case dummy_failure_jump:
2430             EXTRACT_NUMBER_AND_INCR(j, p);
2431             p += j;
2432             if (j > 0)
2433                 continue;
2434 
2435             /* Jump backward implies we just went through the body of a
2436              * loop and matched nothing.  Opcode jumped to should be
2437              * `on_failure_jump' or `succeed_n'.  Just treat it like an
2438              * ordinary jump.  For a * loop, it has pushed its failure
2439              * point already; if so, discard that as redundant.  */
2440             if ((re_opcode_t) * p != on_failure_jump
2441                     && (re_opcode_t) * p != succeed_n)
2442                 continue;
2443 
2444             p++;
2445             EXTRACT_NUMBER_AND_INCR(j, p);
2446             p += j;
2447 
2448             /* If what's on the stack is where we are now, pop it.  */
2449             if (!FAIL_STACK_EMPTY()
2450                     && fail_stack.stack[fail_stack.avail - 1] == p)
2451                 fail_stack.avail--;
2452 
2453             continue;
2454 
2455         case on_failure_jump:
2456         case on_failure_keep_string_jump:
2457 handle_on_failure_jump:
2458             EXTRACT_NUMBER_AND_INCR(j, p);
2459 
2460             /* For some patterns, e.g., `(a?)?', `p+j' here points to the
2461              * end of the pattern.  We don't want to push such a point,
2462              * since when we restore it above, entering the switch will
2463              * increment `p' past the end of the pattern.  We don't need
2464              * to push such a point since we obviously won't find any more
2465              * fastmap entries beyond `pend'.  Such a pattern can match
2466              * the null string, though.  */
2467             if (p + j < pend) {
2468                 if (!PUSH_PATTERN_OP(p + j, fail_stack))
2469                     return -2;
2470             } else
2471                 bufp->can_be_null = 1;
2472 
2473             if (succeed_n_p) {
2474                 EXTRACT_NUMBER_AND_INCR(k, p);  /* Skip the n.  */
2475                 succeed_n_p = false;
2476             }
2477             continue;
2478 
2479         case succeed_n:
2480             /* Get to the number of times to succeed.  */
2481             p += 2;
2482 
2483             /* Increment p past the n for when k != 0.  */
2484             EXTRACT_NUMBER_AND_INCR(k, p);
2485             if (k == 0) {
2486                 p -= 4;
2487                 succeed_n_p = true; /* Spaghetti code alert.  */
2488                 goto handle_on_failure_jump;
2489             }
2490             continue;
2491 
2492         case set_number_at:
2493             p += 4;
2494             continue;
2495 
2496         case start_memory:
2497         case stop_memory:
2498             p += 2;
2499             continue;
2500 
2501         default:
2502             abort();        /* We have listed all the cases.  */
2503         }           /* switch *p++ */
2504 
2505         /* Getting here means we have found the possible starting
2506          * characters for one path of the pattern -- and that the empty
2507          * string does not match.  We need not follow this path further.
2508          * Instead, look at the next alternative (remembered on the
2509          * stack), or quit if no more.  The test at the top of the loop
2510          * does these things.  */
2511         path_can_be_null = false;
2512         p = pend;
2513     }               /* while p */
2514 
2515     /* Set `can_be_null' for the last path (also the first path, if the
2516      * pattern is empty).  */
2517     bufp->can_be_null |= path_can_be_null;
2518     return 0;
2519 }               /* re_compile_fastmap */
2520 
2521 /* Searching routines.  */
2522 
2523 /* Like re_search_2, below, but only one string is specified, and
2524  * doesn't let you say where to stop matching. */
2525 
2526 static int
re_search(bufp,string,size,startpos,range,regs)2527 re_search(bufp, string, size, startpos, range, regs)
2528 struct re_pattern_buffer *bufp;
2529 const char *string;
2530 int size, startpos, range;
2531 struct re_registers *regs;
2532 {
2533     return re_search_2(bufp, NULL, 0, string, size, startpos, range,
2534                        regs, size);
2535 }
2536 
2537 /* Using the compiled pattern in BUFP->buffer, first tries to match the
2538  * virtual concatenation of STRING1 and STRING2, starting first at index
2539  * STARTPOS, then at STARTPOS + 1, and so on.
2540  *
2541  * STRING1 and STRING2 have length SIZE1 and SIZE2, respectively.
2542  *
2543  * RANGE is how far to scan while trying to match.  RANGE = 0 means try
2544  * only at STARTPOS; in general, the last start tried is STARTPOS +
2545  * RANGE.
2546  *
2547  * In REGS, return the indices of the virtual concatenation of STRING1
2548  * and STRING2 that matched the entire BUFP->buffer and its contained
2549  * subexpressions.
2550  *
2551  * Do not consider matching one past the index STOP in the virtual
2552  * concatenation of STRING1 and STRING2.
2553  *
2554  * We return either the position in the strings at which the match was
2555  * found, -1 if no match, or -2 if error (such as failure
2556  * stack overflow).  */
2557 
2558 static int
re_search_2(bufp,string1,size1,string2,size2,startpos,range,regs,stop)2559 re_search_2(bufp, string1, size1, string2, size2, startpos, range, regs, stop)
2560 struct re_pattern_buffer *bufp;
2561 const char *string1, *string2;
2562 int size1, size2;
2563 int startpos;
2564 int range;
2565 struct re_registers *regs;
2566 int stop;
2567 {
2568     int val;
2569     register char *fastmap = bufp->fastmap;
2570     register char *translate = bufp->translate;
2571     int total_size = size1 + size2;
2572     int endpos = startpos + range;
2573 
2574     /* Check for out-of-range STARTPOS.  */
2575     if (startpos < 0 || startpos > total_size)
2576         return -1;
2577 
2578     /* Fix up RANGE if it might eventually take us outside
2579      * the virtual concatenation of STRING1 and STRING2.  */
2580     if (endpos < -1)
2581         range = -1 - startpos;
2582     else if (endpos > total_size)
2583         range = total_size - startpos;
2584 
2585     /* If the search isn't to be a backwards one, don't waste time in a
2586      * search for a pattern that must be anchored.  */
2587     if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0) {
2588         if (startpos > 0)
2589             return -1;
2590         else
2591             range = 1;
2592     }
2593     /* Update the fastmap now if not correct already.  */
2594     if (fastmap && !bufp->fastmap_accurate)
2595         if (re_compile_fastmap(bufp) == -2)
2596             return -2;
2597 
2598     /* Loop through the string, looking for a place to start matching.  */
2599     for (;;) {
2600         /* If a fastmap is supplied, skip quickly over characters that
2601          * cannot be the start of a match.  If the pattern can match the
2602          * null string, however, we don't need to skip characters; we want
2603          * the first null string.  */
2604         if (fastmap && startpos < total_size && !bufp->can_be_null) {
2605             if (range > 0) {    /* Searching forwards.  */
2606                 register const char *d;
2607                 register int lim = 0;
2608                 int irange = range;
2609 
2610                 if (startpos < size1 && startpos + range >= size1)
2611                     lim = range - (size1 - startpos);
2612 
2613                 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
2614 
2615                 /* Written out as an if-else to avoid testing `translate'
2616                  * inside the loop.  */
2617                 if (translate)
2618                     while (range > lim
2619                             && !fastmap[(unsigned char)
2620                                         translate[(unsigned char) *d++]])
2621                         range--;
2622                 else
2623                     while (range > lim && !fastmap[(unsigned char) *d++])
2624                         range--;
2625 
2626                 startpos += irange - range;
2627             } else {        /* Searching backwards.  */
2628                 register char c = (size1 == 0 || startpos >= size1
2629                                    ? string2[startpos - size1]
2630                                    : string1[startpos]);
2631 
2632                 if (!fastmap[(unsigned char) TRANSLATE(c)])
2633                     goto advance;
2634             }
2635         }
2636         /* If can't match the null string, and that's all we have left, fail.  */
2637         if (range >= 0 && startpos == total_size && fastmap
2638                 && !bufp->can_be_null)
2639             return -1;
2640 
2641         val = re_match_2(bufp, string1, size1, string2, size2,
2642                          startpos, regs, stop);
2643         if (val >= 0)
2644             return startpos;
2645 
2646         if (val == -2)
2647             return -2;
2648 
2649 advance:
2650         if (!range)
2651             break;
2652         else if (range > 0) {
2653             range--;
2654             startpos++;
2655         } else {
2656             range++;
2657             startpos--;
2658         }
2659     }
2660     return -1;
2661 }               /* re_search_2 */
2662 
2663 /* Declarations and macros for re_match_2.  */
2664 
2665 /* Structure for per-register (a.k.a. per-group) information.
2666  * This must not be longer than one word, because we push this value
2667  * onto the failure stack.  Other register information, such as the
2668  * starting and ending positions (which are addresses), and the list of
2669  * inner groups (which is a bits list) are maintained in separate
2670  * variables.
2671  *
2672  * We are making a (strictly speaking) nonportable assumption here: that
2673  * the compiler will pack our bit fields into something that fits into
2674  * the type of `word', i.e., is something that fits into one item on the
2675  * failure stack.  */
2676 typedef union {
2677     fail_stack_elt_t word;
2678     struct {
2679         /* This field is one if this group can match the empty string,
2680          * zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
2681 #define MATCH_NULL_UNSET_VALUE 3
2682         unsigned match_null_string_p:2;
2683         unsigned is_active:1;
2684         unsigned matched_something:1;
2685         unsigned ever_matched_something:1;
2686     } bits;
2687 } register_info_type;
2688 static boolean alt_match_null_string_p(unsigned char *p, unsigned char *end, register_info_type *reg_info);
2689 static boolean common_op_match_null_string_p( unsigned char **p, unsigned char *end, register_info_type *reg_info);
2690 static int bcmp_translate(unsigned char const *s1, unsigned char const *s2, register int len, char *translate);
2691 static boolean group_match_null_string_p(unsigned char **p, unsigned char *end, register_info_type *reg_info);
2692 
2693 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
2694 #define IS_ACTIVE(R)  ((R).bits.is_active)
2695 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
2696 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
2697 
2698 /* Call this when have matched a real character; it sets `matched' flags
2699  * for the subexpressions which we are currently inside.  Also records
2700  * that those subexprs have matched.  */
2701 #define SET_REGS_MATCHED()                      \
2702   do                                    \
2703     {                                   \
2704       unsigned r;                           \
2705       for (r = lowest_active_reg; r <= highest_active_reg; r++)     \
2706         {                               \
2707           MATCHED_SOMETHING (reg_info[r])               \
2708             = EVER_MATCHED_SOMETHING (reg_info[r])          \
2709             = 1;                            \
2710         }                               \
2711     }                                   \
2712   while (0)
2713 
2714 /* This converts PTR, a pointer into one of the search strings `string1'
2715  * and `string2' into an offset from the beginning of that string.  */
2716 #define POINTER_TO_OFFSET(ptr)                      \
2717   (FIRST_STRING_P (ptr) ? (ptr) - string1 : (ptr) - string2 + size1)
2718 
2719 /* Registers are set to a sentinel when they haven't yet matched.  */
2720 #define REG_UNSET_VALUE ((char *) -1)
2721 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
2722 
2723 /* Macros for dealing with the split strings in re_match_2.  */
2724 
2725 #define MATCHING_IN_FIRST_STRING  (dend == end_match_1)
2726 
2727 /* Call before fetching a character with *d.  This switches over to
2728  * string2 if necessary.  */
2729 #define PREFETCH()                          \
2730   while (d == dend)                             \
2731     {                                   \
2732       /* End of string2 => fail.  */                    \
2733       if (dend == end_match_2)                      \
2734         goto fail;                          \
2735       /* End of string1 => advance to string2.  */          \
2736       d = string2;                              \
2737       dend = end_match_2;                       \
2738     }
2739 
2740 /* Test if at very beginning or at very end of the virtual concatenation
2741  * of `string1' and `string2'.  If only one string, it's `string2'.  */
2742 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
at_strings_end(const char * d,const char * end2)2743 static int at_strings_end(const char *d, const char *end2)
2744 {
2745     return d == end2;
2746 }
2747 
2748 /* Test if D points to a character which is word-constituent.  We have
2749  * two special cases to check for: if past the end of string1, look at
2750  * the first character in string2; and if before the beginning of
2751  * string2, look at the last character in string1.  */
2752 #define WORDCHAR_P(d)                           \
2753   (re_syntax_table[(d) == end1 ? *string2                   \
2754            : (d) == string2 - 1 ? *(end1 - 1) : *(d)]           \
2755    == Sword)
2756 static int
wordchar_p(const char * d,const char * end1,const char * string2)2757 wordchar_p(const char *d, const char *end1, const char *string2)
2758 {
2759     return re_syntax_table[(d) == end1 ? *string2
2760                            : (d) == string2 - 1 ? *(end1 - 1) : *(d)]
2761            == Sword;
2762 }
2763 
2764 /* Test if the character before D and the one at D differ with respect
2765  * to being word-constituent.  */
2766 #define AT_WORD_BOUNDARY(d)                     \
2767   (AT_STRINGS_BEG (d) || at_strings_end(d,end2)             \
2768    || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
2769 
2770 /* Free everything we malloc.  */
2771 #ifdef REGEX_MALLOC
2772 #define FREE_VAR(var) if (var) free (var); var = NULL
2773 #define FREE_VARIABLES()                        \
2774   do {                                  \
2775     FREE_VAR (fail_stack.stack);                    \
2776     FREE_VAR (regstart);                        \
2777     FREE_VAR (regend);                          \
2778     FREE_VAR (old_regstart);                        \
2779     FREE_VAR (old_regend);                      \
2780     FREE_VAR (best_regstart);                       \
2781     FREE_VAR (best_regend);                     \
2782     FREE_VAR (reg_info);                        \
2783     FREE_VAR (reg_dummy);                       \
2784     FREE_VAR (reg_info_dummy);                      \
2785   } while (0)
2786 #else /* not REGEX_MALLOC */
2787 /* Some MIPS systems (at least) want this to free alloca'd storage.  */
2788 #define FREE_VARIABLES() alloca (0)
2789 #endif /* not REGEX_MALLOC */
2790 
2791 /* These values must meet several constraints.  They must not be valid
2792  * register values; since we have a limit of 255 registers (because
2793  * we use only one byte in the pattern for the register number), we can
2794  * use numbers larger than 255.  They must differ by 1, because of
2795  * NUM_FAILURE_ITEMS above.  And the value for the lowest register must
2796  * be larger than the value for the highest register, so we do not try
2797  * to actually save any registers when none are active.  */
2798 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
2799 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
2800 
2801 /* Matching routines.  */
2802 
2803 /* re_match_2 matches the compiled pattern in BUFP against the
2804  * the (virtual) concatenation of STRING1 and STRING2 (of length SIZE1
2805  * and SIZE2, respectively).  We start matching at POS, and stop
2806  * matching at STOP.
2807  *
2808  * If REGS is non-null and the `no_sub' field of BUFP is nonzero, we
2809  * store offsets for the substring each group matched in REGS.  See the
2810  * documentation for exactly how many groups we fill.
2811  *
2812  * We return -1 if no match, -2 if an internal error (such as the
2813  * failure stack overflowing).  Otherwise, we return the length of the
2814  * matched substring.  */
2815 
2816 int
re_match_2(bufp,string1,size1,string2,size2,pos,regs,stop)2817 re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop)
2818 struct re_pattern_buffer *bufp;
2819 const char *string1, *string2;
2820 int size1, size2;
2821 int pos;
2822 struct re_registers *regs;
2823 int stop;
2824 {
2825     /* General temporaries.  */
2826     int mcnt;
2827     unsigned char *p1;
2828 
2829     /* Just past the end of the corresponding string.  */
2830     const char *end1, *end2;
2831 
2832     /* Pointers into string1 and string2, just past the last characters in
2833      * each to consider matching.  */
2834     const char *end_match_1, *end_match_2;
2835 
2836     /* Where we are in the data, and the end of the current string.  */
2837     const char *d, *dend;
2838 
2839     /* Where we are in the pattern, and the end of the pattern.  */
2840     unsigned char *p = bufp->buffer;
2841     register unsigned char *pend = p + bufp->used;
2842 
2843     /* We use this to map every character in the string.  */
2844     char *translate = bufp->translate;
2845 
2846     /* Failure point stack.  Each place that can handle a failure further
2847      * down the line pushes a failure point on this stack.  It consists of
2848      * restart, regend, and reg_info for all registers corresponding to
2849      * the subexpressions we're currently inside, plus the number of such
2850      * registers, and, finally, two char *'s.  The first char * is where
2851      * to resume scanning the pattern; the second one is where to resume
2852      * scanning the strings.  If the latter is zero, the failure point is
2853      * a ``dummy''; if a failure happens and the failure point is a dummy,
2854      * it gets discarded and the next next one is tried.  */
2855     fail_stack_type fail_stack;
2856 #ifdef DEBUG
2857     static unsigned failure_id = 0;
2858     unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
2859 #endif
2860 
2861     /* We fill all the registers internally, independent of what we
2862      * return, for use in backreferences.  The number here includes
2863      * an element for register zero.  */
2864     unsigned num_regs = bufp->re_nsub + 1;
2865 
2866     /* The currently active registers.  */
2867     unsigned long lowest_active_reg = NO_LOWEST_ACTIVE_REG;
2868     unsigned long highest_active_reg = NO_HIGHEST_ACTIVE_REG;
2869 
2870     /* Information on the contents of registers. These are pointers into
2871      * the input strings; they record just what was matched (on this
2872      * attempt) by a subexpression part of the pattern, that is, the
2873      * regnum-th regstart pointer points to where in the pattern we began
2874      * matching and the regnum-th regend points to right after where we
2875      * stopped matching the regnum-th subexpression.  (The zeroth register
2876      * keeps track of what the whole pattern matches.)  */
2877     const char **regstart = NULL, **regend = NULL;
2878 
2879     /* If a group that's operated upon by a repetition operator fails to
2880      * match anything, then the register for its start will need to be
2881      * restored because it will have been set to wherever in the string we
2882      * are when we last see its open-group operator.  Similarly for a
2883      * register's end.  */
2884     const char **old_regstart = NULL, **old_regend = NULL;
2885 
2886     /* The is_active field of reg_info helps us keep track of which (possibly
2887      * nested) subexpressions we are currently in. The matched_something
2888      * field of reg_info[reg_num] helps us tell whether or not we have
2889      * matched any of the pattern so far this time through the reg_num-th
2890      * subexpression.  These two fields get reset each time through any
2891      * loop their register is in.  */
2892     register_info_type *reg_info = NULL;
2893 
2894     /* The following record the register info as found in the above
2895      * variables when we find a match better than any we've seen before.
2896      * This happens as we backtrack through the failure points, which in
2897      * turn happens only if we have not yet matched the entire string. */
2898     unsigned best_regs_set = false;
2899     const char **best_regstart = NULL, **best_regend = NULL;
2900 
2901     /* Logically, this is `best_regend[0]'.  But we don't want to have to
2902      * allocate space for that if we're not allocating space for anything
2903      * else (see below).  Also, we never need info about register 0 for
2904      * any of the other register vectors, and it seems rather a kludge to
2905      * treat `best_regend' differently than the rest.  So we keep track of
2906      * the end of the best match so far in a separate variable.  We
2907      * initialize this to NULL so that when we backtrack the first time
2908      * and need to test it, it's not garbage.  */
2909     const char *match_end = NULL;
2910 
2911     /* Used when we pop values we don't care about.  */
2912     const char **reg_dummy = NULL;
2913     register_info_type *reg_info_dummy = NULL;
2914 
2915 #ifdef DEBUG
2916     /* Counts the total number of registers pushed.  */
2917     unsigned num_regs_pushed = 0;
2918 #endif
2919 
2920     DEBUG_PRINT1("\n\nEntering re_match_2.\n");
2921 
2922     INIT_FAIL_STACK();
2923 
2924     /* Do not bother to initialize all the register variables if there are
2925      * no groups in the pattern, as it takes a fair amount of time.  If
2926      * there are groups, we include space for register 0 (the whole
2927      * pattern), even though we never use it, since it simplifies the
2928      * array indexing.  We should fix this.  */
2929     if (bufp->re_nsub) {
2930         regstart = REGEX_TALLOC(num_regs, const char *);
2931         regend = REGEX_TALLOC(num_regs, const char *);
2932         old_regstart = REGEX_TALLOC(num_regs, const char *);
2933         old_regend = REGEX_TALLOC(num_regs, const char *);
2934         best_regstart = REGEX_TALLOC(num_regs, const char *);
2935         best_regend = REGEX_TALLOC(num_regs, const char *);
2936         reg_info = REGEX_TALLOC(num_regs, register_info_type);
2937         reg_dummy = REGEX_TALLOC(num_regs, const char *);
2938         reg_info_dummy = REGEX_TALLOC(num_regs, register_info_type);
2939 
2940         if (!(regstart && regend && old_regstart && old_regend && reg_info
2941                 && best_regstart && best_regend && reg_dummy && reg_info_dummy)) {
2942             FREE_VARIABLES();
2943             return -2;
2944         }
2945     }
2946 #ifdef REGEX_MALLOC
2947     else {
2948         /* We must initialize all our variables to NULL, so that
2949          * `FREE_VARIABLES' doesn't try to free them.  */
2950         regstart = regend = old_regstart = old_regend = best_regstart
2951                                            = best_regend = reg_dummy = NULL;
2952         reg_info = reg_info_dummy = (register_info_type *) NULL;
2953     }
2954 #endif /* REGEX_MALLOC */
2955 
2956     /* The starting position is bogus.  */
2957     if (pos < 0 || pos > size1 + size2) {
2958         FREE_VARIABLES();
2959         return -1;
2960     }
2961     /* Initialize subexpression text positions to -1 to mark ones that no
2962      * start_memory/stop_memory has been seen for. Also initialize the
2963      * register information struct.  */
2964     for (mcnt = 1; mcnt < num_regs; mcnt++) {
2965         regstart[mcnt] = regend[mcnt]
2966                          = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
2967 
2968         REG_MATCH_NULL_STRING_P(reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
2969         IS_ACTIVE(reg_info[mcnt]) = 0;
2970         MATCHED_SOMETHING(reg_info[mcnt]) = 0;
2971         EVER_MATCHED_SOMETHING(reg_info[mcnt]) = 0;
2972     }
2973 
2974     /* We move `string1' into `string2' if the latter's empty -- but not if
2975      * `string1' is null.  */
2976     if (size2 == 0 && string1 != NULL) {
2977         string2 = string1;
2978         size2 = size1;
2979         string1 = 0;
2980         size1 = 0;
2981     }
2982     end1 = string1 + size1;
2983     end2 = string2 + size2;
2984 
2985     /* Compute where to stop matching, within the two strings.  */
2986     if (stop <= size1) {
2987         end_match_1 = string1 + stop;
2988         end_match_2 = string2;
2989     } else {
2990         end_match_1 = end1;
2991         end_match_2 = string2 + stop - size1;
2992     }
2993 
2994     /* `p' scans through the pattern as `d' scans through the data.
2995      * `dend' is the end of the input string that `d' points within.  `d'
2996      * is advanced into the following input string whenever necessary, but
2997      * this happens before fetching; therefore, at the beginning of the
2998      * loop, `d' can be pointing at the end of a string, but it cannot
2999      * equal `string2'.  */
3000     if (size1 > 0 && pos <= size1) {
3001         d = string1 + pos;
3002         dend = end_match_1;
3003     } else {
3004         d = string2 + pos - size1;
3005         dend = end_match_2;
3006     }
3007 
3008     DEBUG_PRINT1("The compiled pattern is: ");
3009     DEBUG_PRINT_COMPILED_PATTERN(bufp, p, pend);
3010     DEBUG_PRINT1("The string to match is: `");
3011     DEBUG_PRINT_DOUBLE_STRING(d, string1, size1, string2, size2);
3012     DEBUG_PRINT1("'\n");
3013 
3014     /* This loops over pattern commands.  It exits by returning from the
3015      * function if the match is complete, or it drops through if the match
3016      * fails at this starting point in the input data.  */
3017     for (;;) {
3018         DEBUG_PRINT2("\n0x%x: ", p);
3019 
3020         if (p == pend) {    /* End of pattern means we might have succeeded.  */
3021             DEBUG_PRINT1("end of pattern ... ");
3022 
3023             /* If we haven't matched the entire string, and we want the
3024              * longest match, try backtracking.  */
3025             if (d != end_match_2) {
3026                 DEBUG_PRINT1("backtracking.\n");
3027 
3028                 if (!FAIL_STACK_EMPTY()) {  /* More failure points to try.  */
3029                     boolean same_str_p = (FIRST_STRING_P(match_end)
3030                                           == MATCHING_IN_FIRST_STRING);
3031 
3032                     /* If exceeds best match so far, save it.  */
3033                     if (!best_regs_set
3034                             || (same_str_p && d > match_end)
3035                             || (!same_str_p && !MATCHING_IN_FIRST_STRING)) {
3036                         best_regs_set = true;
3037                         match_end = d;
3038 
3039                         DEBUG_PRINT1("\nSAVING match as best so far.\n");
3040 
3041                         for (mcnt = 1; mcnt < num_regs; mcnt++) {
3042                             best_regstart[mcnt] = regstart[mcnt];
3043                             best_regend[mcnt] = regend[mcnt];
3044                         }
3045                     }
3046                     goto fail;
3047                 }
3048                 /* If no failure points, don't restore garbage.  */
3049                 else if (best_regs_set) {
3050 restore_best_regs:
3051                     /* Restore best match.  It may happen that `dend ==
3052                      * end_match_1' while the restored d is in string2.
3053                      * For example, the pattern `x.*y.*z' against the
3054                      * strings `x-' and `y-z-', if the two strings are
3055                      * not consecutive in memory.  */
3056                     DEBUG_PRINT1("Restoring best registers.\n");
3057 
3058                     d = match_end;
3059                     dend = ((d >= string1 && d <= end1)
3060                             ? end_match_1 : end_match_2);
3061 
3062                     for (mcnt = 1; mcnt < num_regs; mcnt++) {
3063                         regstart[mcnt] = best_regstart[mcnt];
3064                         regend[mcnt] = best_regend[mcnt];
3065                     }
3066                 }
3067             }           /* d != end_match_2 */
3068             DEBUG_PRINT1("Accepting match.\n");
3069 
3070             /* If caller wants register contents data back, do it.  */
3071             if (regs && !bufp->no_sub) {
3072                 /* Have the register data arrays been allocated?  */
3073                 if (bufp->regs_allocated == REGS_UNALLOCATED) {
3074                     /* No.  So allocate them with malloc.  We need one
3075                                          * extra element beyond `num_regs' for the `-1' marker
3076                                          * GNU code uses.  */
3077                     regs->num_regs = max(RE_NREGS, num_regs + 1);
3078                     regs->start = TALLOC(regs->num_regs, regoff_t);
3079                     regs->end = TALLOC(regs->num_regs, regoff_t);
3080                     if (regs->start == NULL || regs->end == NULL)
3081                         return -2;
3082                     bufp->regs_allocated = REGS_REALLOCATE;
3083                 } else if (bufp->regs_allocated == REGS_REALLOCATE) {
3084                     /* Yes.  If we need more elements than were already
3085                                          * allocated, reallocate them.  If we need fewer, just
3086                                          * leave it alone.  */
3087                     if (regs->num_regs < num_regs + 1) {
3088                         regs->num_regs = num_regs + 1;
3089                         RETALLOC(regs->start, regs->num_regs, regoff_t);
3090                         RETALLOC(regs->end, regs->num_regs, regoff_t);
3091                         if (regs->start == NULL || regs->end == NULL)
3092                             return -2;
3093                     }
3094                 } else
3095                     assert(bufp->regs_allocated == REGS_FIXED);
3096 
3097                 /* Convert the pointer data in `regstart' and `regend' to
3098                  * indices.  Register zero has to be set differently,
3099                  * since we haven't kept track of any info for it.  */
3100                 if (regs->num_regs > 0) {
3101                     regs->start[0] = pos;
3102                     regs->end[0] = (MATCHING_IN_FIRST_STRING ? d - string1
3103                                     : d - string2 + size1);
3104                 }
3105                 /* Go through the first `min (num_regs, regs->num_regs)'
3106                  * registers, since that is all we initialized.  */
3107                 for (mcnt = 1; mcnt < min(num_regs, regs->num_regs); mcnt++) {
3108                     if (REG_UNSET(regstart[mcnt]) || REG_UNSET(regend[mcnt]))
3109                         regs->start[mcnt] = regs->end[mcnt] = -1;
3110                     else {
3111                         regs->start[mcnt] = POINTER_TO_OFFSET(regstart[mcnt]);
3112                         regs->end[mcnt] = POINTER_TO_OFFSET(regend[mcnt]);
3113                     }
3114                 }
3115 
3116                 /* If the regs structure we return has more elements than
3117                  * were in the pattern, set the extra elements to -1.  If
3118                  * we (re)allocated the registers, this is the case,
3119                  * because we always allocate enough to have at least one
3120                  * -1 at the end.  */
3121                 for (mcnt = num_regs; mcnt < regs->num_regs; mcnt++)
3122                     regs->start[mcnt] = regs->end[mcnt] = -1;
3123             }           /* regs && !bufp->no_sub */
3124             FREE_VARIABLES();
3125             DEBUG_PRINT4("%u failure points pushed, %u popped (%u remain).\n",
3126                          nfailure_points_pushed, nfailure_points_popped,
3127                          nfailure_points_pushed - nfailure_points_popped);
3128             DEBUG_PRINT2("%u registers pushed.\n", num_regs_pushed);
3129 
3130             mcnt = d - pos - (MATCHING_IN_FIRST_STRING
3131                               ? string1
3132                               : string2 - size1);
3133 
3134             DEBUG_PRINT2("Returning %d from re_match_2.\n", mcnt);
3135 
3136             return mcnt;
3137         }
3138         /* Otherwise match next pattern command.  */
3139 #ifdef SWITCH_ENUM_BUG
3140         switch ((int) ((re_opcode_t) * p++))
3141 #else
3142         switch ((re_opcode_t) * p++)
3143 #endif
3144         {
3145         /* Ignore these.  Used to ignore the n of succeed_n's which
3146          * currently have n == 0.  */
3147         case no_op:
3148             DEBUG_PRINT1("EXECUTING no_op.\n");
3149             break;
3150 
3151         /* Match the next n pattern characters exactly.  The following
3152          * byte in the pattern defines n, and the n bytes after that
3153          * are the characters to match.  */
3154         case exactn:
3155             mcnt = *p++;
3156             DEBUG_PRINT2("EXECUTING exactn %d.\n", mcnt);
3157 
3158             /* This is written out as an if-else so we don't waste time
3159              * testing `translate' inside the loop.  */
3160             if (translate) {
3161                 do {
3162                     PREFETCH();
3163                     if (translate[(unsigned char) *d++] != (char) *p++)
3164                         goto fail;
3165                 } while (--mcnt);
3166             } else {
3167                 do {
3168                     PREFETCH();
3169                     if (*d++ != (char) *p++)
3170                         goto fail;
3171                 } while (--mcnt);
3172             }
3173             SET_REGS_MATCHED();
3174             break;
3175 
3176         /* Match any character except possibly a newline or a null.  */
3177         case anychar:
3178             DEBUG_PRINT1("EXECUTING anychar.\n");
3179 
3180             PREFETCH();
3181 
3182             if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE(*d) == '\n')
3183                     || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE(*d) == '\000'))
3184                 goto fail;
3185 
3186             SET_REGS_MATCHED();
3187             DEBUG_PRINT2("  Matched `%d'.\n", *d);
3188             d++;
3189             break;
3190 
3191         case charset:
3192         case charset_not: {
3193             register unsigned char c;
3194             boolean not = (re_opcode_t) * (p - 1) == charset_not;
3195 
3196             DEBUG_PRINT2("EXECUTING charset%s.\n", not ? "_not" : "");
3197 
3198             PREFETCH();
3199             c = TRANSLATE(*d);  /* The character to match.  */
3200 
3201             /* Cast to `unsigned' instead of `unsigned char' in case the
3202              * bit list is a full 32 bytes long.  */
3203             if (c < (unsigned) (*p * BYTEWIDTH)
3204                     && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3205                 not = !not;
3206 
3207             p += 1 + *p;
3208 
3209             if (!not)
3210                 goto fail;
3211 
3212             SET_REGS_MATCHED();
3213             d++;
3214             break;
3215         }
3216 
3217         /* The beginning of a group is represented by start_memory.
3218          * The arguments are the register number in the next byte, and the
3219          * number of groups inner to this one in the next.  The text
3220          * matched within the group is recorded (in the internal
3221          * registers data structure) under the register number.  */
3222         case start_memory:
3223             DEBUG_PRINT3("EXECUTING start_memory %d (%d):\n", *p, p[1]);
3224 
3225             /* Find out if this group can match the empty string.  */
3226             p1 = p;     /* To send to group_match_null_string_p.  */
3227 
3228             if (REG_MATCH_NULL_STRING_P(reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3229                 REG_MATCH_NULL_STRING_P(reg_info[*p])
3230                     = group_match_null_string_p(&p1, pend, reg_info);
3231 
3232             /* Save the position in the string where we were the last time
3233              * we were at this open-group operator in case the group is
3234              * operated upon by a repetition operator, e.g., with `(a*)*b'
3235              * against `ab'; then we want to ignore where we are now in
3236              * the string in case this attempt to match fails.  */
3237             old_regstart[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
3238                                ? REG_UNSET(regstart[*p]) ? d : regstart[*p]
3239                                : regstart[*p];
3240             DEBUG_PRINT2("  old_regstart: %d\n",
3241                          POINTER_TO_OFFSET(old_regstart[*p]));
3242 
3243             regstart[*p] = d;
3244             DEBUG_PRINT2("  regstart: %d\n", POINTER_TO_OFFSET(regstart[*p]));
3245 
3246             IS_ACTIVE(reg_info[*p]) = 1;
3247             MATCHED_SOMETHING(reg_info[*p]) = 0;
3248 
3249             /* This is the new highest active register.  */
3250             highest_active_reg = *p;
3251 
3252             /* If nothing was active before, this is the new lowest active
3253              * register.  */
3254             if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3255                 lowest_active_reg = *p;
3256 
3257             /* Move past the register number and inner group count.  */
3258             p += 2;
3259             break;
3260 
3261         /* The stop_memory opcode represents the end of a group.  Its
3262          * arguments are the same as start_memory's: the register
3263          * number, and the number of inner groups.  */
3264         case stop_memory:
3265             DEBUG_PRINT3("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
3266 
3267             /* We need to save the string position the last time we were at
3268              * this close-group operator in case the group is operated
3269              * upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3270              * against `aba'; then we want to ignore where we are now in
3271              * the string in case this attempt to match fails.  */
3272             old_regend[*p] = REG_MATCH_NULL_STRING_P(reg_info[*p])
3273                              ? REG_UNSET(regend[*p]) ? d : regend[*p]
3274                              : regend[*p];
3275             DEBUG_PRINT2("      old_regend: %d\n",
3276                          POINTER_TO_OFFSET(old_regend[*p]));
3277 
3278             regend[*p] = d;
3279             DEBUG_PRINT2("      regend: %d\n", POINTER_TO_OFFSET(regend[*p]));
3280 
3281             /* This register isn't active anymore.  */
3282             IS_ACTIVE(reg_info[*p]) = 0;
3283 
3284             /* If this was the only register active, nothing is active
3285              * anymore.  */
3286             if (lowest_active_reg == highest_active_reg) {
3287                 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3288                 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3289             } else {
3290                 /* We must scan for the new highest active register, since
3291                      * it isn't necessarily one less than now: consider
3292                      * (a(b)c(d(e)f)g).  When group 3 ends, after the f), the
3293                      * new highest active register is 1.  */
3294                 unsigned char r = *p - 1;
3295                 while (r > 0 && !IS_ACTIVE(reg_info[r]))
3296                     r--;
3297 
3298                 /* If we end up at register zero, that means that we saved
3299                  * the registers as the result of an `on_failure_jump', not
3300                  * a `start_memory', and we jumped to past the innermost
3301                  * `stop_memory'.  For example, in ((.)*) we save
3302                  * registers 1 and 2 as a result of the *, but when we pop
3303                  * back to the second ), we are at the stop_memory 1.
3304                  * Thus, nothing is active.  */
3305                 if (r == 0) {
3306                     lowest_active_reg = NO_LOWEST_ACTIVE_REG;
3307                     highest_active_reg = NO_HIGHEST_ACTIVE_REG;
3308                 } else
3309                     highest_active_reg = r;
3310             }
3311 
3312             /* If just failed to match something this time around with a
3313              * group that's operated on by a repetition operator, try to
3314              * force exit from the ``loop'', and restore the register
3315              * information for this group that we had before trying this
3316              * last match.  */
3317             if ((!MATCHED_SOMETHING(reg_info[*p])
3318                     || (re_opcode_t) p[-3] == start_memory)
3319                     && (p + 2) < pend) {
3320                 boolean is_a_jump_n = false;
3321 
3322                 p1 = p + 2;
3323                 mcnt = 0;
3324                 switch ((re_opcode_t) * p1++) {
3325                 case jump_n:
3326                     is_a_jump_n = true;
3327                 case pop_failure_jump:
3328                 case maybe_pop_jump:
3329                 case jump:
3330                 case dummy_failure_jump:
3331                     EXTRACT_NUMBER_AND_INCR(mcnt, p1);
3332                     if (is_a_jump_n)
3333                         p1 += 2;
3334                     break;
3335 
3336                 default:
3337                     /* do nothing */
3338                     ;
3339                 }
3340                 p1 += mcnt;
3341 
3342                 /* If the next operation is a jump backwards in the pattern
3343                  * to an on_failure_jump right before the start_memory
3344                  * corresponding to this stop_memory, exit from the loop
3345                  * by forcing a failure after pushing on the stack the
3346                  * on_failure_jump's jump in the pattern, and d.  */
3347                 if (mcnt < 0 && (re_opcode_t) * p1 == on_failure_jump
3348                         && (re_opcode_t) p1[3] == start_memory && p1[4] == *p) {
3349                     /* If this group ever matched anything, then restore
3350                      * what its registers were before trying this last
3351                      * failed match, e.g., with `(a*)*b' against `ab' for
3352                      * regstart[1], and, e.g., with `((a*)*(b*)*)*'
3353                      * against `aba' for regend[3].
3354                      *
3355                      * Also restore the registers for inner groups for,
3356                      * e.g., `((a*)(b*))*' against `aba' (register 3 would
3357                      * otherwise get trashed).  */
3358 
3359                     if (EVER_MATCHED_SOMETHING(reg_info[*p])) {
3360                         unsigned r;
3361 
3362                         EVER_MATCHED_SOMETHING(reg_info[*p]) = 0;
3363 
3364                         /* Restore this and inner groups' (if any) registers.  */
3365                         for (r = *p; r < *p + *(p + 1); r++) {
3366                             regstart[r] = old_regstart[r];
3367 
3368                             /* xx why this test?  */
3369                             if ((long) old_regend[r] >= (long) regstart[r])
3370                                 regend[r] = old_regend[r];
3371                         }
3372                     }
3373                     p1++;
3374                     EXTRACT_NUMBER_AND_INCR(mcnt, p1);
3375                     PUSH_FAILURE_POINT(p1 + mcnt, d, -2);
3376 
3377                     goto fail;
3378                 }
3379             }
3380             /* Move past the register number and the inner group count.  */
3381             p += 2;
3382             break;
3383 
3384         /* \<digit> has been turned into a `duplicate' command which is
3385          * followed by the numeric value of <digit> as the register number.  */
3386         case duplicate: {
3387             register const char *d2, *dend2;
3388             int regno = *p++;   /* Get which register to match against.  */
3389             DEBUG_PRINT2("EXECUTING duplicate %d.\n", regno);
3390 
3391             /* Can't back reference a group which we've never matched.  */
3392             if (REG_UNSET(regstart[regno]) || REG_UNSET(regend[regno]))
3393                 goto fail;
3394 
3395             /* Where in input to try to start matching.  */
3396             d2 = regstart[regno];
3397 
3398             /* Where to stop matching; if both the place to start and
3399              * the place to stop matching are in the same string, then
3400              * set to the place to stop, otherwise, for now have to use
3401              * the end of the first string.  */
3402 
3403             dend2 = ((FIRST_STRING_P(regstart[regno])
3404                       == FIRST_STRING_P(regend[regno]))
3405                      ? regend[regno] : end_match_1);
3406             for (;;) {
3407                 /* If necessary, advance to next segment in register
3408                  * contents.  */
3409                 while (d2 == dend2) {
3410                     if (dend2 == end_match_2)
3411                         break;
3412                     if (dend2 == regend[regno])
3413                         break;
3414 
3415                     /* End of string1 => advance to string2. */
3416                     d2 = string2;
3417                     dend2 = regend[regno];
3418                 }
3419                 /* At end of register contents => success */
3420                 if (d2 == dend2)
3421                     break;
3422 
3423                 /* If necessary, advance to next segment in data.  */
3424                 PREFETCH();
3425 
3426                 /* How many characters left in this segment to match.  */
3427                 mcnt = dend - d;
3428 
3429                 /* Want how many consecutive characters we can match in
3430                  * one shot, so, if necessary, adjust the count.  */
3431                 if (mcnt > dend2 - d2)
3432                     mcnt = dend2 - d2;
3433 
3434                 /* Compare that many; failure if mismatch, else move
3435                  * past them.  */
3436                 if (translate
3437                         ? bcmp_translate((unsigned char *)d, (unsigned char *)d2, mcnt, translate)
3438                         : memcmp(d, d2, mcnt))
3439                     goto fail;
3440                 d += mcnt, d2 += mcnt;
3441             }
3442         }
3443         break;
3444 
3445         /* begline matches the empty string at the beginning of the string
3446          * (unless `not_bol' is set in `bufp'), and, if
3447          * `newline_anchor' is set, after newlines.  */
3448         case begline:
3449             DEBUG_PRINT1("EXECUTING begline.\n");
3450 
3451             if (AT_STRINGS_BEG(d)) {
3452                 if (!bufp->not_bol)
3453                     break;
3454             } else if (d[-1] == '\n' && bufp->newline_anchor) {
3455                 break;
3456             }
3457             /* In all other cases, we fail.  */
3458             goto fail;
3459 
3460         /* endline is the dual of begline.  */
3461         case endline:
3462             DEBUG_PRINT1("EXECUTING endline.\n");
3463 
3464             if (at_strings_end(d,end2)) {
3465                 if (!bufp->not_eol)
3466                     break;
3467             }
3468             /* We have to ``prefetch'' the next character.  */
3469             else if ((d == end1 ? *string2 : *d) == '\n'
3470                      && bufp->newline_anchor) {
3471                 break;
3472             }
3473             goto fail;
3474 
3475         /* Match at the very beginning of the data.  */
3476         case begbuf:
3477             DEBUG_PRINT1("EXECUTING begbuf.\n");
3478             if (AT_STRINGS_BEG(d))
3479                 break;
3480             goto fail;
3481 
3482         /* Match at the very end of the data.  */
3483         case endbuf:
3484             DEBUG_PRINT1("EXECUTING endbuf.\n");
3485             if (at_strings_end(d,end2))
3486                 break;
3487             goto fail;
3488 
3489         /* on_failure_keep_string_jump is used to optimize `.*\n'.  It
3490          * pushes NULL as the value for the string on the stack.  Then
3491          * `pop_failure_point' will keep the current value for the
3492          * string, instead of restoring it.  To see why, consider
3493          * matching `foo\nbar' against `.*\n'.  The .* matches the foo;
3494          * then the . fails against the \n.  But the next thing we want
3495          * to do is match the \n against the \n; if we restored the
3496          * string value, we would be back at the foo.
3497          *
3498          * Because this is used only in specific cases, we don't need to
3499          * check all the things that `on_failure_jump' does, to make
3500          * sure the right things get saved on the stack.  Hence we don't
3501          * share its code.  The only reason to push anything on the
3502          * stack at all is that otherwise we would have to change
3503          * `anychar's code to do something besides goto fail in this
3504          * case; that seems worse than this.  */
3505         case on_failure_keep_string_jump:
3506             DEBUG_PRINT1("EXECUTING on_failure_keep_string_jump");
3507 
3508             EXTRACT_NUMBER_AND_INCR(mcnt, p);
3509             DEBUG_PRINT3(" %d (to 0x%x):\n", mcnt, p + mcnt);
3510 
3511             PUSH_FAILURE_POINT(p + mcnt, NULL, -2);
3512             break;
3513 
3514         /* Uses of on_failure_jump:
3515          *
3516          * Each alternative starts with an on_failure_jump that points
3517          * to the beginning of the next alternative.  Each alternative
3518          * except the last ends with a jump that in effect jumps past
3519          * the rest of the alternatives.  (They really jump to the
3520          * ending jump of the following alternative, because tensioning
3521          * these jumps is a hassle.)
3522          *
3523          * Repeats start with an on_failure_jump that points past both
3524          * the repetition text and either the following jump or
3525          * pop_failure_jump back to this on_failure_jump.  */
3526         case on_failure_jump:
3527 on_failure:
3528             DEBUG_PRINT1("EXECUTING on_failure_jump");
3529 
3530             EXTRACT_NUMBER_AND_INCR(mcnt, p);
3531             DEBUG_PRINT3(" %d (to 0x%x)", mcnt, p + mcnt);
3532 
3533             /* If this on_failure_jump comes right before a group (i.e.,
3534              * the original * applied to a group), save the information
3535              * for that group and all inner ones, so that if we fail back
3536              * to this point, the group's information will be correct.
3537              * For example, in \(a*\)*\1, we need the preceding group,
3538              * and in \(\(a*\)b*\)\2, we need the inner group.  */
3539 
3540             /* We can't use `p' to check ahead because we push
3541              * a failure point to `p + mcnt' after we do this.  */
3542             p1 = p;
3543 
3544             /* We need to skip no_op's before we look for the
3545              * start_memory in case this on_failure_jump is happening as
3546              * the result of a completed succeed_n, as in \(a\)\{1,3\}b\1
3547              * against aba.  */
3548             while (p1 < pend && (re_opcode_t) * p1 == no_op)
3549                 p1++;
3550 
3551             if (p1 < pend && (re_opcode_t) * p1 == start_memory) {
3552                 /* We have a new highest active register now.  This will
3553                  * get reset at the start_memory we are about to get to,
3554                  * but we will have saved all the registers relevant to
3555                  * this repetition op, as described above.  */
3556                 highest_active_reg = *(p1 + 1) + *(p1 + 2);
3557                 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
3558                     lowest_active_reg = *(p1 + 1);
3559             }
3560             DEBUG_PRINT1(":\n");
3561             PUSH_FAILURE_POINT(p + mcnt, d, -2);
3562             break;
3563 
3564         /* A smart repeat ends with `maybe_pop_jump'.
3565          * We change it to either `pop_failure_jump' or `jump'.  */
3566         case maybe_pop_jump:
3567             EXTRACT_NUMBER_AND_INCR(mcnt, p);
3568             DEBUG_PRINT2("EXECUTING maybe_pop_jump %d.\n", mcnt);
3569             {
3570                 register unsigned char *p2 = p;
3571 
3572                 /* Compare the beginning of the repeat with what in the
3573                  * pattern follows its end. If we can establish that there
3574                  * is nothing that they would both match, i.e., that we
3575                  * would have to backtrack because of (as in, e.g., `a*a')
3576                  * then we can change to pop_failure_jump, because we'll
3577                  * never have to backtrack.
3578                  *
3579                  * This is not true in the case of alternatives: in
3580                  * `(a|ab)*' we do need to backtrack to the `ab' alternative
3581                  * (e.g., if the string was `ab').  But instead of trying to
3582                  * detect that here, the alternative has put on a dummy
3583                  * failure point which is what we will end up popping.  */
3584 
3585                 /* Skip over open/close-group commands.  */
3586                 while (p2 + 2 < pend
3587                         && ((re_opcode_t) * p2 == stop_memory
3588                             || (re_opcode_t) * p2 == start_memory))
3589                     p2 += 3;    /* Skip over args, too.  */
3590 
3591                 /* If we're at the end of the pattern, we can change.  */
3592                 if (p2 == pend) {
3593                     /* Consider what happens when matching ":\(.*\)"
3594                      * against ":/".  I don't really understand this code
3595                      * yet.  */
3596                     p[-3] = (unsigned char) pop_failure_jump;
3597                     DEBUG_PRINT1
3598                     ("  End of pattern: change to `pop_failure_jump'.\n");
3599                 } else if ((re_opcode_t) * p2 == exactn
3600                            || (bufp->newline_anchor && (re_opcode_t) * p2 == endline)) {
3601                     register unsigned char c
3602                         = *p2 == (unsigned char) endline ? '\n' : p2[2];
3603                     p1 = p + mcnt;
3604 
3605                     /* p1[0] ... p1[2] are the `on_failure_jump' corresponding
3606                      * to the `maybe_finalize_jump' of this case.  Examine what
3607                      * follows.  */
3608                     if ((re_opcode_t) p1[3] == exactn && p1[5] != c) {
3609                         p[-3] = (unsigned char) pop_failure_jump;
3610                         DEBUG_PRINT3("  %c != %c => pop_failure_jump.\n",
3611                                      c, p1[5]);
3612                     } else if ((re_opcode_t) p1[3] == charset
3613                                || (re_opcode_t) p1[3] == charset_not) {
3614                         int not = (re_opcode_t) p1[3] == charset_not;
3615 
3616                         if (c < (unsigned char) (p1[4] * BYTEWIDTH)
3617                                 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
3618                             not = !not;
3619 
3620                         /* `not' is equal to 1 if c would match, which means
3621                          * that we can't change to pop_failure_jump.  */
3622                         if (!not) {
3623                             p[-3] = (unsigned char) pop_failure_jump;
3624                             DEBUG_PRINT1("  No match => pop_failure_jump.\n");
3625                         }
3626                     }
3627                 }
3628             }
3629             p -= 2;     /* Point at relative address again.  */
3630             if ((re_opcode_t) p[-1] != pop_failure_jump) {
3631                 p[-1] = (unsigned char) jump;
3632                 DEBUG_PRINT1("  Match => jump.\n");
3633                 goto unconditional_jump;
3634             }
3635         /* Note fall through.  */
3636 
3637         /* The end of a simple repeat has a pop_failure_jump back to
3638          * its matching on_failure_jump, where the latter will push a
3639          * failure point.  The pop_failure_jump takes off failure
3640          * points put on by this pop_failure_jump's matching
3641          * on_failure_jump; we got through the pattern to here from the
3642          * matching on_failure_jump, so didn't fail.  */
3643         case pop_failure_jump: {
3644             /* We need to pass separate storage for the lowest and
3645              * highest registers, even though we don't care about the
3646              * actual values.  Otherwise, we will restore only one
3647              * register from the stack, since lowest will == highest in
3648              * `pop_failure_point'.  */
3649             unsigned long dummy_low_reg, dummy_high_reg;
3650             unsigned char *pdummy;
3651             const char *sdummy;
3652 
3653             DEBUG_PRINT1("EXECUTING pop_failure_jump.\n");
3654             POP_FAILURE_POINT(sdummy, pdummy,
3655                               dummy_low_reg, dummy_high_reg,
3656                               reg_dummy, reg_dummy, reg_info_dummy);
3657             /* avoid GCC 4.6 set but unused variables warning. Does not matter here. */
3658             if (pdummy || sdummy)
3659                 (void)0;
3660         }
3661         /* Note fall through.  */
3662 
3663         /* Unconditionally jump (without popping any failure points).  */
3664         case jump:
3665 unconditional_jump:
3666             EXTRACT_NUMBER_AND_INCR(mcnt, p);   /* Get the amount to jump.  */
3667             DEBUG_PRINT2("EXECUTING jump %d ", mcnt);
3668             p += mcnt;      /* Do the jump.  */
3669             DEBUG_PRINT2("(to 0x%x).\n", p);
3670             break;
3671 
3672         /* We need this opcode so we can detect where alternatives end
3673          * in `group_match_null_string_p' et al.  */
3674         case jump_past_alt:
3675             DEBUG_PRINT1("EXECUTING jump_past_alt.\n");
3676             goto unconditional_jump;
3677 
3678         /* Normally, the on_failure_jump pushes a failure point, which
3679          * then gets popped at pop_failure_jump.  We will end up at
3680          * pop_failure_jump, also, and with a pattern of, say, `a+', we
3681          * are skipping over the on_failure_jump, so we have to push
3682          * something meaningless for pop_failure_jump to pop.  */
3683         case dummy_failure_jump:
3684             DEBUG_PRINT1("EXECUTING dummy_failure_jump.\n");
3685             /* It doesn't matter what we push for the string here.  What
3686              * the code at `fail' tests is the value for the pattern.  */
3687             PUSH_FAILURE_POINT(0, 0, -2);
3688             goto unconditional_jump;
3689 
3690         /* At the end of an alternative, we need to push a dummy failure
3691          * point in case we are followed by a `pop_failure_jump', because
3692          * we don't want the failure point for the alternative to be
3693          * popped.  For example, matching `(a|ab)*' against `aab'
3694          * requires that we match the `ab' alternative.  */
3695         case push_dummy_failure:
3696             DEBUG_PRINT1("EXECUTING push_dummy_failure.\n");
3697             /* See comments just above at `dummy_failure_jump' about the
3698              * two zeroes.  */
3699             PUSH_FAILURE_POINT(0, 0, -2);
3700             break;
3701 
3702         /* Have to succeed matching what follows at least n times.
3703          * After that, handle like `on_failure_jump'.  */
3704         case succeed_n:
3705             EXTRACT_NUMBER(mcnt, p + 2);
3706             DEBUG_PRINT2("EXECUTING succeed_n %d.\n", mcnt);
3707 
3708             assert(mcnt >= 0);
3709             /* Originally, this is how many times we HAVE to succeed.  */
3710             if (mcnt > 0) {
3711                 mcnt--;
3712                 p += 2;
3713                 STORE_NUMBER_AND_INCR(p, mcnt);
3714                 DEBUG_PRINT3("  Setting 0x%x to %d.\n", p, mcnt);
3715             } else if (mcnt == 0) {
3716                 DEBUG_PRINT2("  Setting two bytes from 0x%x to no_op.\n", p + 2);
3717                 p[2] = (unsigned char) no_op;
3718                 p[3] = (unsigned char) no_op;
3719                 goto on_failure;
3720             }
3721             break;
3722 
3723         case jump_n:
3724             EXTRACT_NUMBER(mcnt, p + 2);
3725             DEBUG_PRINT2("EXECUTING jump_n %d.\n", mcnt);
3726 
3727             /* Originally, this is how many times we CAN jump.  */
3728             if (mcnt) {
3729                 mcnt--;
3730                 STORE_NUMBER(p + 2, mcnt);
3731                 goto unconditional_jump;
3732             }
3733             /* If don't have to jump any more, skip over the rest of command.  */
3734             else
3735                 p += 4;
3736             break;
3737 
3738         case set_number_at: {
3739             DEBUG_PRINT1("EXECUTING set_number_at.\n");
3740 
3741             EXTRACT_NUMBER_AND_INCR(mcnt, p);
3742             p1 = p + mcnt;
3743             EXTRACT_NUMBER_AND_INCR(mcnt, p);
3744             DEBUG_PRINT3("  Setting 0x%x to %d.\n", p1, mcnt);
3745             STORE_NUMBER(p1, mcnt);
3746             break;
3747         }
3748 
3749         case wordbound:
3750             DEBUG_PRINT1("EXECUTING wordbound.\n");
3751             if (AT_WORD_BOUNDARY(d))
3752                 break;
3753             goto fail;
3754 
3755         case notwordbound:
3756             DEBUG_PRINT1("EXECUTING notwordbound.\n");
3757             if (AT_WORD_BOUNDARY(d))
3758                 goto fail;
3759             break;
3760 
3761         case wordbeg:
3762             DEBUG_PRINT1("EXECUTING wordbeg.\n");
3763             if (wordchar_p(d,end1,string2) && (AT_STRINGS_BEG(d) || !WORDCHAR_P(d - 1)))
3764                 break;
3765             goto fail;
3766 
3767         case wordend:
3768             DEBUG_PRINT1("EXECUTING wordend.\n");
3769             if (!AT_STRINGS_BEG(d) && WORDCHAR_P(d - 1)
3770                     && (!wordchar_p(d,end1,string2) || at_strings_end(d,end2)))
3771                 break;
3772             goto fail;
3773 
3774         case wordchar:
3775             DEBUG_PRINT1("EXECUTING non-Emacs wordchar.\n");
3776             PREFETCH();
3777             if (!wordchar_p(d,end1,string2))
3778                 goto fail;
3779             SET_REGS_MATCHED();
3780             d++;
3781             break;
3782 
3783         case notwordchar:
3784             DEBUG_PRINT1("EXECUTING non-Emacs notwordchar.\n");
3785             PREFETCH();
3786             if (wordchar_p(d,end1,string2))
3787                 goto fail;
3788             SET_REGS_MATCHED();
3789             d++;
3790             break;
3791 
3792         default:
3793             abort();
3794         }
3795         continue;       /* Successfully executed one pattern command; keep going.  */
3796 
3797         /* We goto here if a matching operation fails. */
3798 fail:
3799         if (!FAIL_STACK_EMPTY()) {  /* A restart point is known.  Restore to that state.  */
3800             DEBUG_PRINT1("\nFAIL:\n");
3801             POP_FAILURE_POINT(d, p,
3802                               lowest_active_reg, highest_active_reg,
3803                               regstart, regend, reg_info);
3804 
3805             /* If this failure point is a dummy, try the next one.  */
3806             if (!p)
3807                 goto fail;
3808 
3809             /* If we failed to the end of the pattern, don't examine *p.  */
3810             assert(p <= pend);
3811             if (p < pend) {
3812                 boolean is_a_jump_n = false;
3813 
3814                 /* If failed to a backwards jump that's part of a repetition
3815                  * loop, need to pop this failure point and use the next one.  */
3816                 switch ((re_opcode_t) * p) {
3817                 case jump_n:
3818                     is_a_jump_n = true;
3819                 case maybe_pop_jump:
3820                 case pop_failure_jump:
3821                 case jump:
3822                     p1 = p + 1;
3823                     EXTRACT_NUMBER_AND_INCR(mcnt, p1);
3824                     p1 += mcnt;
3825 
3826                     if ((is_a_jump_n && (re_opcode_t) * p1 == succeed_n)
3827                             || (!is_a_jump_n
3828                                 && (re_opcode_t) * p1 == on_failure_jump))
3829                         goto fail;
3830                     break;
3831                 default:
3832                     /* do nothing */
3833                     ;
3834                 }
3835             }
3836             if (d >= string1 && d <= end1)
3837                 dend = end_match_1;
3838         } else
3839             break;      /* Matching at this starting point really fails.  */
3840     }               /* for (;;) */
3841 
3842     if (best_regs_set)
3843         goto restore_best_regs;
3844 
3845     FREE_VARIABLES();
3846 
3847     return -1;          /* Failure to match.  */
3848 }               /* re_match_2 */
3849 
3850 /* Subroutine definitions for re_match_2.  */
3851 
3852 /* We are passed P pointing to a register number after a start_memory.
3853  *
3854  * Return true if the pattern up to the corresponding stop_memory can
3855  * match the empty string, and false otherwise.
3856  *
3857  * If we find the matching stop_memory, sets P to point to one past its number.
3858  * Otherwise, sets P to an undefined byte less than or equal to END.
3859  *
3860  * We don't handle duplicates properly (yet).  */
3861 
3862 boolean
group_match_null_string_p(unsigned char ** p,unsigned char * end,register_info_type * reg_info)3863 group_match_null_string_p(unsigned char **p, unsigned char *end, register_info_type *reg_info)
3864 {
3865     int mcnt;
3866     /* Point to after the args to the start_memory.  */
3867     unsigned char *p1 = *p + 2;
3868 
3869     while (p1 < end) {
3870         /* Skip over opcodes that can match nothing, and return true or
3871          * false, as appropriate, when we get to one that can't, or to the
3872          * matching stop_memory.  */
3873 
3874         switch ((re_opcode_t) * p1) {
3875         /* Could be either a loop or a series of alternatives.  */
3876         case on_failure_jump:
3877             p1++;
3878             EXTRACT_NUMBER_AND_INCR(mcnt, p1);
3879 
3880             /* If the next operation is not a jump backwards in the
3881              * pattern.  */
3882 
3883             if (mcnt >= 0) {
3884                 /* Go through the on_failure_jumps of the alternatives,
3885                  * seeing if any of the alternatives cannot match nothing.
3886                  * The last alternative starts with only a jump,
3887                  * whereas the rest start with on_failure_jump and end
3888                  * with a jump, e.g., here is the pattern for `a|b|c':
3889                  *
3890                  * /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
3891                  * /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
3892                  * /exactn/1/c
3893                  *
3894                  * So, we have to first go through the first (n-1)
3895                  * alternatives and then deal with the last one separately.  */
3896 
3897                 /* Deal with the first (n-1) alternatives, which start
3898                  * with an on_failure_jump (see above) that jumps to right
3899                  * past a jump_past_alt.  */
3900 
3901                 while ((re_opcode_t) p1[mcnt - 3] == jump_past_alt) {
3902                     /* `mcnt' holds how many bytes long the alternative
3903                      * is, including the ending `jump_past_alt' and
3904                      * its number.  */
3905 
3906                     if (!alt_match_null_string_p(p1, p1 + mcnt - 3,
3907                                                  reg_info))
3908                         return false;
3909 
3910                     /* Move to right after this alternative, including the
3911                      * jump_past_alt.  */
3912                     p1 += mcnt;
3913 
3914                     /* Break if it's the beginning of an n-th alternative
3915                      * that doesn't begin with an on_failure_jump.  */
3916                     if ((re_opcode_t) * p1 != on_failure_jump)
3917                         break;
3918 
3919                     /* Still have to check that it's not an n-th
3920                      * alternative that starts with an on_failure_jump.  */
3921                     p1++;
3922                     EXTRACT_NUMBER_AND_INCR(mcnt, p1);
3923                     if ((re_opcode_t) p1[mcnt - 3] != jump_past_alt) {
3924                         /* Get to the beginning of the n-th alternative.  */
3925                         p1 -= 3;
3926                         break;
3927                     }
3928                 }
3929 
3930                 /* Deal with the last alternative: go back and get number
3931                  * of the `jump_past_alt' just before it.  `mcnt' contains
3932                  * the length of the alternative.  */
3933                 EXTRACT_NUMBER(mcnt, p1 - 2);
3934 
3935                 if (!alt_match_null_string_p(p1, p1 + mcnt, reg_info))
3936                     return false;
3937 
3938                 p1 += mcnt; /* Get past the n-th alternative.  */
3939             }           /* if mcnt > 0 */
3940             break;
3941 
3942         case stop_memory:
3943             assert(p1[1] == **p);
3944             *p = p1 + 2;
3945             return true;
3946 
3947         default:
3948             if (!common_op_match_null_string_p(&p1, end, reg_info))
3949                 return false;
3950         }
3951     }               /* while p1 < end */
3952 
3953     return false;
3954 }               /* group_match_null_string_p */
3955 
3956 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
3957  * It expects P to be the first byte of a single alternative and END one
3958  * byte past the last. The alternative can contain groups.  */
3959 
3960 boolean
alt_match_null_string_p(unsigned char * p,unsigned char * end,register_info_type * reg_info)3961 alt_match_null_string_p(unsigned char *p, unsigned char *end, register_info_type *reg_info)
3962 {
3963     int mcnt;
3964     unsigned char *p1 = p;
3965 
3966     while (p1 < end) {
3967         /* Skip over opcodes that can match nothing, and break when we get
3968          * to one that can't.  */
3969 
3970         switch ((re_opcode_t) * p1) {
3971         /* It's a loop.  */
3972         case on_failure_jump:
3973             p1++;
3974             EXTRACT_NUMBER_AND_INCR(mcnt, p1);
3975             p1 += mcnt;
3976             break;
3977 
3978         default:
3979             if (!common_op_match_null_string_p(&p1, end, reg_info))
3980                 return false;
3981         }
3982     }               /* while p1 < end */
3983 
3984     return true;
3985 }               /* alt_match_null_string_p */
3986 
3987 /* Deals with the ops common to group_match_null_string_p and
3988  * alt_match_null_string_p.
3989  *
3990  * Sets P to one after the op and its arguments, if any.  */
3991 
3992 boolean
common_op_match_null_string_p(unsigned char ** p,unsigned char * end,register_info_type * reg_info)3993 common_op_match_null_string_p( unsigned char **p, unsigned char *end, register_info_type *reg_info)
3994 {
3995     int mcnt;
3996     boolean ret;
3997     int reg_no;
3998     unsigned char *p1 = *p;
3999 
4000     switch ((re_opcode_t) * p1++) {
4001     case no_op:
4002     case begline:
4003     case endline:
4004     case begbuf:
4005     case endbuf:
4006     case wordbeg:
4007     case wordend:
4008     case wordbound:
4009     case notwordbound:
4010         break;
4011 
4012     case start_memory:
4013         reg_no = *p1;
4014         assert(reg_no > 0 && reg_no <= MAX_REGNUM);
4015         ret = group_match_null_string_p(&p1, end, reg_info);
4016 
4017         /* Have to set this here in case we're checking a group which
4018          * contains a group and a back reference to it.  */
4019 
4020         if (REG_MATCH_NULL_STRING_P(reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4021             REG_MATCH_NULL_STRING_P(reg_info[reg_no]) = ret;
4022 
4023         if (!ret)
4024             return false;
4025         break;
4026 
4027     /* If this is an optimized succeed_n for zero times, make the jump.  */
4028     case jump:
4029         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
4030         if (mcnt >= 0)
4031             p1 += mcnt;
4032         else
4033             return false;
4034         break;
4035 
4036     case succeed_n:
4037         /* Get to the number of times to succeed.  */
4038         p1 += 2;
4039         EXTRACT_NUMBER_AND_INCR(mcnt, p1);
4040 
4041         if (mcnt == 0) {
4042             p1 -= 4;
4043             EXTRACT_NUMBER_AND_INCR(mcnt, p1);
4044             p1 += mcnt;
4045         } else
4046             return false;
4047         break;
4048 
4049     case duplicate:
4050         if (!REG_MATCH_NULL_STRING_P(reg_info[*p1]))
4051             return false;
4052         break;
4053 
4054     case set_number_at:
4055         p1 += 4;
4056 
4057     default:
4058         /* All other opcodes mean we cannot match the empty string.  */
4059         return false;
4060     }
4061 
4062     *p = p1;
4063     return true;
4064 }               /* common_op_match_null_string_p */
4065 
4066 /* Return zero if TRANSLATE[S1] and TRANSLATE[S2] are identical for LEN
4067  * bytes; nonzero otherwise.  */
4068 
4069 int
bcmp_translate(unsigned char const * s1,unsigned char const * s2,register int len,char * translate)4070 bcmp_translate(unsigned char const *s1, unsigned char const*s2, register int len, char *translate)
4071 {
4072     register unsigned char const *p1 = s1, *p2 = s2;
4073     while (len) {
4074         if (translate[*p1++] != translate[*p2++])
4075             return 1;
4076         len--;
4077     }
4078     return 0;
4079 }
4080 
4081 /* Entry points for GNU code.  */
4082 
4083 /* POSIX.2 functions */
4084 
4085 /* regcomp takes a regular expression as a string and compiles it.
4086  *
4087  * PREG is a regex_t *.  We do not expect any fields to be initialized,
4088  * since POSIX says we shouldn't.  Thus, we set
4089  *
4090  * `buffer' to the compiled pattern;
4091  * `used' to the length of the compiled pattern;
4092  * `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
4093  * REG_EXTENDED bit in CFLAGS is set; otherwise, to
4094  * RE_SYNTAX_POSIX_BASIC;
4095  * `newline_anchor' to REG_NEWLINE being set in CFLAGS;
4096  * `fastmap' and `fastmap_accurate' to zero;
4097  * `re_nsub' to the number of subexpressions in PATTERN.
4098  *
4099  * PATTERN is the address of the pattern string.
4100  *
4101  * CFLAGS is a series of bits which affect compilation.
4102  *
4103  * If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
4104  * use POSIX basic syntax.
4105  *
4106  * If REG_NEWLINE is set, then . and [^...] don't match newline.
4107  * Also, regexec will try a match beginning after every newline.
4108  *
4109  * If REG_ICASE is set, then we considers upper- and lowercase
4110  * versions of letters to be equivalent when matching.
4111  *
4112  * If REG_NOSUB is set, then when PREG is passed to regexec, that
4113  * routine will report only success or failure, and nothing about the
4114  * registers.
4115  *
4116  * It returns 0 if it succeeds, nonzero if it doesn't.  (See regex.h for
4117  * the return codes and their meanings.)  */
4118 
4119 int
regcomp(preg,pattern,cflags)4120 regcomp(preg, pattern, cflags)
4121 regex_t *preg;
4122 const char *pattern;
4123 int cflags;
4124 {
4125     reg_errcode_t ret;
4126     unsigned syntax
4127         = (cflags & REG_EXTENDED) ?
4128           RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
4129 
4130     /* regex_compile will allocate the space for the compiled pattern.  */
4131     preg->buffer = 0;
4132     preg->allocated = 0;
4133 
4134     /* Don't bother to use a fastmap when searching.  This simplifies the
4135      * REG_NEWLINE case: if we used a fastmap, we'd have to put all the
4136      * characters after newlines into the fastmap.  This way, we just try
4137      * every character.  */
4138     preg->fastmap = 0;
4139 
4140     if (cflags & REG_ICASE) {
4141         unsigned i;
4142 
4143         preg->translate = (char *) malloc(CHAR_SET_SIZE);
4144         if (preg->translate == NULL)
4145             return (int) REG_ESPACE;
4146 
4147         /* Map uppercase characters to corresponding lowercase ones.  */
4148         for (i = 0; i < CHAR_SET_SIZE; i++)
4149             preg->translate[i] = ISUPPER(i) ? tolower(i) : i;
4150     } else
4151         preg->translate = NULL;
4152 
4153     /* If REG_NEWLINE is set, newlines are treated differently.  */
4154     if (cflags & REG_NEWLINE) { /* REG_NEWLINE implies neither . nor [^...] match newline.  */
4155         syntax &= ~RE_DOT_NEWLINE;
4156         syntax |= RE_HAT_LISTS_NOT_NEWLINE;
4157         /* It also changes the matching behavior.  */
4158         preg->newline_anchor = 1;
4159     } else
4160         preg->newline_anchor = 0;
4161 
4162     preg->no_sub = !!(cflags & REG_NOSUB);
4163 
4164     /* POSIX says a null character in the pattern terminates it, so we
4165      * can use strlen here in compiling the pattern.  */
4166     ret = regex_compile(pattern, strlen(pattern), syntax, preg);
4167 
4168     /* POSIX doesn't distinguish between an unmatched open-group and an
4169      * unmatched close-group: both are REG_EPAREN.  */
4170     if (ret == REG_ERPAREN)
4171         ret = REG_EPAREN;
4172 
4173     return (int) ret;
4174 }
4175 
4176 /* regexec searches for a given pattern, specified by PREG, in the
4177  * string STRING.
4178  *
4179  * If NMATCH is zero or REG_NOSUB was set in the cflags argument to
4180  * `regcomp', we ignore PMATCH.  Otherwise, we assume PMATCH has at
4181  * least NMATCH elements, and we set them to the offsets of the
4182  * corresponding matched substrings.
4183  *
4184  * EFLAGS specifies `execution flags' which affect matching: if
4185  * REG_NOTBOL is set, then ^ does not match at the beginning of the
4186  * string; if REG_NOTEOL is set, then $ does not match at the end.
4187  *
4188  * We return 0 if we find a match and REG_NOMATCH if not.  */
4189 
4190 int
regexec(preg,string,nmatch,pmatch,eflags)4191 regexec(preg, string, nmatch, pmatch, eflags)
4192 const regex_t *preg;
4193 const char *string;
4194 size_t nmatch;
4195 regmatch_t pmatch[];
4196 int eflags;
4197 {
4198     int ret;
4199     struct re_registers regs;
4200     regex_t private_preg;
4201     int len = strlen(string);
4202     boolean want_reg_info = !preg->no_sub && nmatch > 0;
4203 
4204     private_preg = *preg;
4205 
4206     private_preg.not_bol = !!(eflags & REG_NOTBOL);
4207     private_preg.not_eol = !!(eflags & REG_NOTEOL);
4208 
4209     /* The user has told us exactly how many registers to return
4210      * information about, via `nmatch'.  We have to pass that on to the
4211      * matching routines.  */
4212     private_preg.regs_allocated = REGS_FIXED;
4213 
4214     if (want_reg_info) {
4215         regs.num_regs = nmatch;
4216         regs.start = TALLOC(nmatch, regoff_t);
4217         regs.end = TALLOC(nmatch, regoff_t);
4218         if (regs.start == NULL || regs.end == NULL)
4219             return (int) REG_NOMATCH;
4220     }
4221     /* Perform the searching operation.  */
4222     ret = re_search(&private_preg, string, len,
4223                     /* start: */ 0, /* range: */ len,
4224                     want_reg_info ? &regs : (struct re_registers *) 0);
4225 
4226     /* Copy the register information to the POSIX structure.  */
4227     if (want_reg_info) {
4228         if (ret >= 0) {
4229             unsigned r;
4230 
4231             for (r = 0; r < nmatch; r++) {
4232                 pmatch[r].rm_so = regs.start[r];
4233                 pmatch[r].rm_eo = regs.end[r];
4234             }
4235         }
4236         /* If we needed the temporary register info, free the space now.  */
4237         free(regs.start);
4238         free(regs.end);
4239     }
4240     /* We want zero return to mean success, unlike `re_search'.  */
4241     return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
4242 }
4243 
4244 /* Returns a message corresponding to an error code, ERRCODE, returned
4245  * from either regcomp or regexec.   We don't use PREG here.  */
4246 
4247 size_t
regerror(int errcode,const regex_t * preg,char * errbuf,size_t errbuf_size)4248 regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
4249 {
4250     const char *msg;
4251     size_t msg_size;
4252 
4253     if (errcode < 0
4254             || errcode >= (sizeof(re_error_msg) / sizeof(re_error_msg[0])))
4255         /* Only error codes returned by the rest of the code should be passed
4256          * to this routine.  If we are given anything else, or if other regex
4257          * code generates an invalid error code, then the program has a bug.
4258          * Dump core so we can fix it.  */
4259         abort();
4260 
4261     msg = re_error_msg[errcode];
4262 
4263     /* POSIX doesn't require that we do anything in this case, but why
4264      * not be nice.  */
4265     if (!msg)
4266         msg = "Success";
4267 
4268     msg_size = strlen(msg) + 1; /* Includes the null.  */
4269 
4270     if (errbuf_size != 0) {
4271         if (msg_size > errbuf_size) {
4272             strncpy(errbuf, msg, errbuf_size - 1);
4273             errbuf[errbuf_size - 1] = 0;
4274         } else
4275             strcpy(errbuf, msg);
4276     }
4277     return msg_size;
4278 }
4279 
4280 /* Free dynamically allocated space used by PREG.  */
4281 
4282 void
regfree(preg)4283 regfree(preg)
4284 regex_t *preg;
4285 {
4286     if (preg->buffer != NULL)
4287         free(preg->buffer);
4288     preg->buffer = NULL;
4289 
4290     preg->allocated = 0;
4291     preg->used = 0;
4292 
4293     if (preg->fastmap != NULL)
4294         free(preg->fastmap);
4295     preg->fastmap = NULL;
4296     preg->fastmap_accurate = 0;
4297 
4298     if (preg->translate != NULL)
4299         free(preg->translate);
4300     preg->translate = NULL;
4301 }
4302 #endif /* USE_GNUREGEX */
4303 
4304 /*
4305  * Local variables:
4306  * make-backup-files: t
4307  * version-control: t
4308  * trim-versions-without-asking: nil
4309  * End:
4310  */
4311 
4312