1 /* Extended regular expression matching and search library.
2    Copyright (C) 1985, 1989-90 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 1, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.  */
17 /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
18    Last change: May 21, 1993 by t^2  */
19 
20 
21 /* To test, compile with -Dtest.  This Dtestable feature turns this into
22    a self-contained program which reads a pattern, describes how it
23    compiles, then reads a string and searches for it.
24 
25    On the other hand, if you compile with both -Dtest and -Dcanned you
26    can run some tests we've already thought of.  */
27 
28 /*
29    Some small modifications for Namazu
30       by ccsatoru@vega.aichi-u.ac.jp [10/06/1998]
31 */
32 
33 /* We write fatal error messages on standard error.  */
34 #include <stdio.h>
35 
36 /* isalpha(3) etc. are used for the character classes.  */
37 #include <ctype.h>
38 #include <sys/types.h>
39 
40 #ifdef __STDC__
41 #define P(s)    s
42 #define MALLOC_ARG_T size_t
43 #else
44 #define P(s)    ()
45 #define MALLOC_ARG_T unsigned
46 #define volatile
47 #define const
48 #endif
49 
50 /* #include "config.h" */
51 
52 void *xmalloc P((unsigned long));
53 void *xcalloc P((unsigned long,unsigned long));
54 void *xrealloc P((void*,unsigned long));
55 void free P((void*));
56 
57 /* #define	NO_ALLOCA	* try it out for now */
58 #ifndef NO_ALLOCA
59 /* Make alloca work the best possible way.  */
60 #ifdef __GNUC__
61 # ifndef atarist
62 #  ifndef alloca
63 #   define alloca __builtin_alloca
64 #  endif
65 # endif /* atarist */
66 #else
67 # if defined(HAVE_ALLOCA_H)
68 #  include <alloca.h>
69 # else
70 char *alloca();
71 # endif
72 #endif /* __GNUC__ */
73 
74 #ifdef _AIX
75 #pragma alloca
76 #endif
77 
78 #ifdef HAVE_STRING_H
79 # include <string.h>
80 #else
81 # include <strings.h>
82 #endif
83 
84 #define RE_ALLOCATE alloca
85 #ifdef C_ALLOCA
86 #define FREE_VARIABLES() alloca(0)
87 #else
88 #define FREE_VARIABLES()
89 #endif
90 
91 #define FREE_AND_RETURN_VOID(stackb)	return
92 #define FREE_AND_RETURN(stackb,val)	return(val)
93 #define DOUBLE_STACK(stackx,stackb,len,type) \
94         (stackx = (type*) alloca(2 * len * sizeof(type)),		\
95 	/* Only copy what is in use.  */				\
96         (type*) memcpy(stackx, stackb, len * sizeof (type)))
97 #else  /* NO_ALLOCA defined */
98 
99 #define RE_ALLOCATE xmalloc
100 
101 #define FREE_VAR(var) if (var) free(var); var = NULL
102 #define FREE_VARIABLES()						\
103   do {									\
104     FREE_VAR(regstart);							\
105     FREE_VAR(regend);							\
106     FREE_VAR(old_regstart)						\
107     FREE_VAR(old_regend);						\
108     FREE_VAR(best_regstart);						\
109     FREE_VAR(best_regend);						\
110     FREE_VAR(reg_info);							\
111   } while (0)
112 
113 #define FREE_AND_RETURN_VOID(stackb)   free(stackb);return
114 #define FREE_AND_RETURN(stackb,val)    free(stackb);return(val)
115 #define DOUBLE_STACK(stackx,stackb,len,type) \
116         (type*)xrealloc(stackb, 2 * len * sizeof(type))
117 #endif /* NO_ALLOCA */
118 
119 #define RE_TALLOC(n,t)  ((t*)RE_ALLOCATE((n)*sizeof(t)))
120 #define TMALLOC(n,t)    ((t*)xmalloc((n)*sizeof(t)))
121 #define TREALLOC(s,n,t) (s=((t*)xrealloc(s,(n)*sizeof(t))))
122 
123 #define EXPAND_FAIL_STACK(stackx,stackb,len) \
124     do {\
125         /* Roughly double the size of the stack.  */			\
126         stackx = DOUBLE_STACK(stackx,stackb,len,unsigned char*);	\
127 	/* Rearrange the pointers. */					\
128 	stackp = stackx + (stackp - stackb);				\
129 	stackb = stackx;						\
130 	stacke = stackb + 2 * len;					\
131     } while (0)
132 
133 /* Get the interface, including the syntax bits.  */
134 #include "regex.h"
135 
136 /* Subroutines for re_compile_pattern.  */
137 static void store_jump P((char *, int, char *));
138 static void insert_jump P((int, char *, char *, char *));
139 static void store_jump_n P((char *, int, char *, unsigned));
140 static void insert_jump_n P((int, char *, char *, char *, unsigned));
141 /* static void insert_op P((int, char *, char *)); */
142 static void insert_op_2 P((int, char *, char *, int, int));
143 static int memcmp_translate P((unsigned char *, unsigned char *, int));
144 static int alt_match_null_string_p ();
145 static int common_op_match_null_string_p ();
146 static int group_match_null_string_p ();
147 
148 /* Define the syntax stuff, so we can do the \<, \>, etc.  */
149 
150 /* This must be nonzero for the wordchar and notwordchar pattern
151    commands in re_match.  */
152 #ifndef Sword
153 #define Sword 1
154 #endif
155 
156 #define SYNTAX(c) re_syntax_table[c]
157 
158 static char re_syntax_table[256];
159 static void init_syntax_once P((void));
160 static unsigned char *translate = 0;
161 static void init_regs P((struct re_registers*, unsigned int));
162 static void bm_init_skip P((int *, unsigned char*, int, char *));
163 
164 #undef P
165 
166 #include "util.h"
167 
168 static void
init_syntax_once()169 init_syntax_once()
170 {
171    register int c;
172    static int done = 0;
173 
174    if (done)
175      return;
176 
177    memset(re_syntax_table, 0, sizeof re_syntax_table);
178 
179    for (c = 'a'; c <= 'z'; c++)
180      re_syntax_table[c] = Sword;
181 
182    for (c = 'A'; c <= 'Z'; c++)
183      re_syntax_table[c] = Sword;
184 
185    for (c = '0'; c <= '9'; c++)
186      re_syntax_table[c] = Sword;
187 
188    re_syntax_table['_'] = Sword;
189 
190    /* Add specific syntax for ISO Latin-1.  */
191    for (c = 0300; c <= 0377; c++)
192      re_syntax_table[c] = Sword;
193    re_syntax_table[0327] = 0;
194    re_syntax_table[0367] = 0;
195 
196    done = 1;
197 }
198 
199 void
re_set_casetable(table)200 re_set_casetable(table)
201      char *table;
202 {
203   translate = (unsigned char*)table;
204 }
205 
206 /* Jim Meyering writes:
207 
208    "... Some ctype macros are valid only for character codes that
209    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
210    using /bin/cc or gcc but without giving an ansi option).  So, all
211    ctype uses should be through macros like ISPRINT...  If
212    STDC_HEADERS is defined, then autoconf has verified that the ctype
213    macros don't need to be guarded with references to isascii. ...
214    Defining isascii to 1 should let any compiler worth its salt
215    eliminate the && through constant folding."  */
216 #ifdef isblank
217 #define ISBLANK(c) isblank ((unsigned char)c)
218 #else
219 #define ISBLANK(c) ((c) == ' ' || (c) == '\t')
220 #endif
221 #ifdef isgraph
222 #define ISGRAPH(c) isgraph ((unsigned char)c)
223 #else
224 #define ISGRAPH(c) (isprint ((unsigned char)c) && !isspace ((unsigned char)c))
225 #endif
226 
227 #define ISPRINT(c) isprint ((unsigned char)c)
228 #define ISDIGIT(c) isdigit ((unsigned char)c)
229 #define ISALNUM(c) isalnum ((unsigned char)c)
230 #define ISALPHA(c) isalpha ((unsigned char)c)
231 #define ISCNTRL(c) iscntrl ((unsigned char)c)
232 #define ISLOWER(c) islower ((unsigned char)c)
233 #define ISPUNCT(c) ispunct ((unsigned char)c)
234 #define ISSPACE(c) isspace ((unsigned char)c)
235 #define ISUPPER(c) isupper ((unsigned char)c)
236 #define ISXDIGIT(c) isxdigit ((unsigned char)c)
237 
238 /* These are the command codes that appear in compiled regular
239    expressions, one per byte.  Some command codes are followed by
240    argument bytes.  A command code can specify any interpretation
241    whatsoever for its arguments.  Zero-bytes may appear in the compiled
242    regular expression.
243 
244    The value of `exactn' is needed in search.c (search_buffer) in emacs.
245    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
246    `exactn' we use here must also be 1.  */
247 
248 enum regexpcode
249   {
250     unused=0,
251     exactn=1, /* Followed by one byte giving n, then by n literal bytes.  */
252     begline,  /* Fail unless at beginning of line.  */
253     endline,  /* Fail unless at end of line.  */
254     begbuf,   /* Succeeds if at beginning of buffer (if emacs) or at beginning
255                  of string to be matched (if not).  */
256     endbuf,   /* Analogously, for end of buffer/string.  */
257     endbuf2,  /* End of buffer/string, or newline just before it.  */
258     jump,     /* Followed by two bytes giving relative address to jump to.  */
259     jump_past_alt,/* Same as jump, but marks the end of an alternative.  */
260     on_failure_jump,	 /* Followed by two bytes giving relative address of
261 			    place to resume at in case of failure.  */
262     finalize_jump,	 /* Throw away latest failure point and then jump to
263 			    address.  */
264     maybe_finalize_jump, /* Like jump but finalize if safe to do so.
265 			    This is used to jump back to the beginning
266 			    of a repeat.  If the command that follows
267 			    this jump is clearly incompatible with the
268 			    one at the beginning of the repeat, such that
269 			    we can be sure that there is no use backtracking
270 			    out of repetitions already completed,
271 			    then we finalize.  */
272     dummy_failure_jump,  /* Jump, and push a dummy failure point. This
273 			    failure point will be thrown away if an attempt
274                             is made to use it for a failure. A + construct
275                             makes this before the first repeat.  Also
276                             use it as an intermediary kind of jump when
277                             compiling an or construct.  */
278     push_dummy_failure, /* Push a dummy failure point and continue.  Used at the end of
279 			   alternatives.  */
280     succeed_n,	 /* Used like on_failure_jump except has to succeed n times;
281 		    then gets turned into an on_failure_jump. The relative
282                     address following it is useless until then.  The
283                     address is followed by two bytes containing n.  */
284     jump_n,	 /* Similar to jump, but jump n times only; also the relative
285 		    address following is in turn followed by yet two more bytes
286                     containing n.  */
287     try_next,    /* Jump to next pattern for the first time,
288 		    leaving this pattern on the failure stack. */
289     finalize_push,	/* Finalize stack and push the beginning of the pattern
290 			   on the stack to retry (used for non-greedy match) */
291     finalize_push_n,	/* Similar to finalize_push, buf finalize n time only */
292     set_number_at,	/* Set the following relative location to the
293 			   subsequent number.  */
294     anychar,	 /* Matches any (more or less) one character.  */
295     charset,     /* Matches any one char belonging to specified set.
296 		    First following byte is number of bitmap bytes.
297 		    Then come bytes for a bitmap saying which chars are in.
298 		    Bits in each byte are ordered low-bit-first.
299 		    A character is in the set if its bit is 1.
300 		    A character too large to have a bit in the map
301 		    is automatically not in the set.  */
302     charset_not, /* Same parameters as charset, but match any character
303                     that is not one of those specified.  */
304     start_memory, /* Start remembering the text that is matched, for
305 		    storing in a memory register.  Followed by one
306                     byte containing the register number.  Register numbers
307                     must be in the range 0 through RE_NREGS.  */
308     stop_memory, /* Stop remembering the text that is matched
309 		    and store it in a memory register.  Followed by
310                     one byte containing the register number. Register
311                     numbers must be in the range 0 through RE_NREGS.  */
312     casefold_on,   /* Turn on casefold flag. */
313     casefold_off,  /* Turn off casefold flag. */
314     start_nowidth, /* Save string point to the stack. */
315     stop_nowidth,  /* Restore string place at the point start_nowidth. */
316     pop_and_fail,  /* Fail after popping nowidth entry from stack. */
317     duplicate,   /* Match a duplicate of something remembered.
318 		    Followed by one byte containing the index of the memory
319                     register.  */
320     wordchar,    /* Matches any word-constituent character.  */
321     notwordchar, /* Matches any char that is not a word-constituent.  */
322     wordbeg,	 /* Succeeds if at word beginning.  */
323     wordend,	 /* Succeeds if at word end.  */
324     wordbound,   /* Succeeds if at a word boundary.  */
325     notwordbound,/* Succeeds if not at a word boundary.  */
326   };
327 
328 
329 /* Number of failure points to allocate space for initially,
330    when matching.  If this number is exceeded, more space is allocated,
331    so it is not a hard limit.  */
332 
333 #ifndef NFAILURES
334 #define NFAILURES 80
335 #endif
336 
337 #if defined(CHAR_UNSIGNED) || defined(__CHAR_UNSIGNED__)
338 #define SIGN_EXTEND_CHAR(c) ((c)>(char)127?(c)-256:(c)) /* for IBM RT */
339 #endif
340 #ifndef SIGN_EXTEND_CHAR
341 #define SIGN_EXTEND_CHAR(x) (x)
342 #endif
343 
344 
345 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
346 #define STORE_NUMBER(destination, number)				\
347   { (destination)[0] = (number) & 0377;					\
348     (destination)[1] = (number) >> 8; }
349 
350 /* Same as STORE_NUMBER, except increment the destination pointer to
351    the byte after where the number is stored.  Watch out that values for
352    DESTINATION such as p + 1 won't work, whereas p will.  */
353 #define STORE_NUMBER_AND_INCR(destination, number)			\
354   { STORE_NUMBER(destination, number);					\
355     (destination) += 2; }
356 
357 
358 /* Put into DESTINATION a number stored in two contingous bytes starting
359    at SOURCE.  */
360 #define EXTRACT_NUMBER(destination, source)				\
361   { (destination) = *(source) & 0377;					\
362     (destination) += SIGN_EXTEND_CHAR (*(char *)((source) + 1)) << 8; }
363 
364 /* Same as EXTRACT_NUMBER, except increment the pointer for source to
365    point to second byte of SOURCE.  Note that SOURCE has to be a value
366    such as p, not, e.g., p + 1. */
367 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
368   { EXTRACT_NUMBER(destination, source);				\
369     (source) += 2; }
370 
371 
372 /* Specify the precise syntax of regexps for compilation.  This provides
373    for compatibility for various utilities which historically have
374    different, incompatible syntaxes.
375 
376    The argument SYNTAX is a bit-mask comprised of the various bits
377    defined in regex.h.  */
378 
379 long
re_set_syntax(syntax)380 re_set_syntax(syntax)
381   long syntax;
382 {
383   long ret;
384 
385   ret = re_syntax_options;
386   re_syntax_options = syntax;
387   return ret;
388 }
389 
390 /* Set by re_set_syntax to the current regexp syntax to recognize.  */
391 long re_syntax_options = 0;
392 
393 
394 /* Macros for re_compile_pattern, which is found below these definitions.  */
395 
396 #define TRANSLATE_P() ((options&RE_OPTION_IGNORECASE) && translate)
397 #define MAY_TRANSLATE() ((bufp->options&(RE_OPTION_IGNORECASE|RE_MAY_IGNORECASE)) && translate)
398 /* Fetch the next character in the uncompiled pattern---translating it
399    if necessary.  Also cast from a signed character in the constant
400    string passed to us by the user to an unsigned char that we can use
401    as an array index (in, e.g., `translate').  */
402 #define PATFETCH(c)							\
403   do {if (p == pend) goto end_of_pattern;				\
404     c = (unsigned char) *p++; 						\
405     if (TRANSLATE_P()) c = (unsigned char)translate[c];	\
406   } while (0)
407 
408 /* Fetch the next character in the uncompiled pattern, with no
409    translation.  */
410 #define PATFETCH_RAW(c)							\
411   do {if (p == pend) goto end_of_pattern;				\
412     c = (unsigned char) *p++; 						\
413   } while (0)
414 
415 /* Go backwards one character in the pattern.  */
416 #define PATUNFETCH p--
417 
418 
419 /* If the buffer isn't allocated when it comes in, use this.  */
420 #define INIT_BUF_SIZE  28
421 
422 /* Make sure we have at least N more bytes of space in buffer.  */
423 #define GET_BUFFER_SPACE(n)						\
424   {								        \
425     while (b - bufp->buffer + (n) >= bufp->allocated)			\
426       EXTEND_BUFFER;							\
427   }
428 
429 /* Make sure we have one more byte of buffer space and then add CH to it.  */
430 #define BUFPUSH(ch)							\
431   {									\
432     GET_BUFFER_SPACE(1);						\
433     *b++ = (char)(ch);							\
434   }
435 
436 /* Extend the buffer by twice its current size via reallociation and
437    reset the pointers that pointed into the old allocation to point to
438    the correct places in the new allocation.  If extending the buffer
439    results in it being larger than 1 << 16, then flag memory exhausted.  */
440 #define EXTEND_BUFFER							\
441   { char *old_buffer = bufp->buffer;					\
442     if (bufp->allocated == (1L<<16)) goto too_big;			\
443     bufp->allocated *= 2;						\
444     if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16);		\
445     bufp->buffer = (char *) xrealloc (bufp->buffer, bufp->allocated);	\
446     if (bufp->buffer == 0)						\
447       goto memory_exhausted;						\
448     b = (b - old_buffer) + bufp->buffer;				\
449     if (fixup_alt_jump)							\
450       fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;	\
451     if (laststart)							\
452       laststart = (laststart - old_buffer) + bufp->buffer;		\
453     begalt = (begalt - old_buffer) + bufp->buffer;			\
454     if (pending_exact)							\
455       pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
456   }
457 
458 
459 /* Set the bit for character C in a character set list.  */
460 #define SET_LIST_BIT(c)							\
461   (b[(unsigned char)(c) / BYTEWIDTH]					\
462    |= 1 << ((unsigned char)(c) % BYTEWIDTH))
463 
464 /* Get the next unsigned number in the uncompiled pattern.  */
465 #define GET_UNSIGNED_NUMBER(num) 					\
466   { if (p != pend) 							\
467       { 								\
468         PATFETCH(c); 							\
469 	while (ISDIGIT(c)) 						\
470 	  { 								\
471 	    if (num < 0) 						\
472 	       num = 0; 						\
473             num = num * 10 + c - '0'; 					\
474 	    if (p == pend) 						\
475 	       break; 							\
476 	    PATFETCH(c); 						\
477 	  } 								\
478         } 								\
479   }
480 
481 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
482 
483 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
484 
485 #define IS_CHAR_CLASS(string)						\
486    (STREQ (string, "alpha") || STREQ (string, "upper")			\
487     || STREQ (string, "lower") || STREQ (string, "digit")		\
488     || STREQ (string, "alnum") || STREQ (string, "xdigit")		\
489     || STREQ (string, "space") || STREQ (string, "print")		\
490     || STREQ (string, "punct") || STREQ (string, "graph")		\
491     || STREQ (string, "cntrl") || STREQ (string, "blank"))
492 
493 #define STORE_MBC(p, c) \
494   ((p)[0] = (unsigned char)(c >> 8), (p)[1] = (unsigned char)(c))
495 #define STORE_MBC_AND_INCR(p, c) \
496   (*(p)++ = (unsigned char)(c >> 8), *(p)++ = (unsigned char)(c))
497 
498 #define EXTRACT_MBC(p) \
499   ((unsigned short)((unsigned char)(p)[0] << 8 | (unsigned char)(p)[1]))
500 #define EXTRACT_MBC_AND_INCR(p) \
501   ((unsigned short)((p) += 2, (unsigned char)(p)[-2] << 8 | (unsigned char)(p)[-1]))
502 
503 #define EXTRACT_UNSIGNED(p) \
504   ((unsigned char)(p)[0] | (unsigned char)(p)[1] << 8)
505 #define EXTRACT_UNSIGNED_AND_INCR(p) \
506   ((p) += 2, (unsigned char)(p)[-2] | (unsigned char)(p)[-1] << 8)
507 
508 /* Handle (mb)?charset(_not)?.
509 
510    Structure of mbcharset(_not)? in compiled pattern.
511 
512      struct {
513        unsinged char id;		mbcharset(_not)?
514        unsigned char sbc_size;
515        unsigned char sbc_map[sbc_size];	same as charset(_not)? up to here.
516        unsigned short mbc_size;		number of intervals.
517        struct {
518 	 unsigned short beg;		beginning of interval.
519 	 unsigned short end;		end of interval.
520        } intervals[mbc_size];
521      }; */
522 
523 static void
set_list_bits(c1,c2,b)524 set_list_bits(c1, c2, b)
525     unsigned short c1, c2;
526     unsigned char *b;
527 {
528   unsigned char sbc_size = b[-1];
529   unsigned short mbc_size = EXTRACT_UNSIGNED(&b[sbc_size]);
530   unsigned short beg, end, upb;
531 
532   if (c1 > c2)
533     return;
534   if ((int)c1 < 1 << BYTEWIDTH) {
535     upb = c2;
536     if (1 << BYTEWIDTH <= (int)upb)
537       upb = (1 << BYTEWIDTH) - 1;	/* The last single-byte char */
538     if (sbc_size <= (unsigned short)(upb / BYTEWIDTH)) {
539       /* Allocate maximum size so it never happens again.  */
540       /* NOTE: memcpy() would not work here.  */
541       memmove(&b[(1 << BYTEWIDTH) / BYTEWIDTH], &b[sbc_size], 2 + mbc_size*4);
542       memset(&b[sbc_size], 0, (1 << BYTEWIDTH) / BYTEWIDTH - sbc_size);
543       b[-1] = sbc_size = (1 << BYTEWIDTH) / BYTEWIDTH;
544     }
545     for (; c1 <= upb; c1++)
546 	if (!ismbchar(c1))
547 	    SET_LIST_BIT(c1);
548     if ((int)c2 < 1 << BYTEWIDTH)
549       return;
550     c1 = 0x8000;			/* The first wide char */
551   }
552   b = &b[sbc_size + 2];
553 
554   for (beg = 0, upb = mbc_size; beg < upb; ) {
555     unsigned short mid = (unsigned short)(beg + upb) >> 1;
556 
557     if ((int)c1 - 1 > (int)EXTRACT_MBC(&b[mid*4 + 2]))
558       beg = mid + 1;
559     else
560       upb = mid;
561   }
562 
563   for (end = beg, upb = mbc_size; end < upb; ) {
564     unsigned short mid = (unsigned short)(end + upb) >> 1;
565 
566     if ((int)c2 >= (int)EXTRACT_MBC(&b[mid*4]) - 1)
567       end = mid + 1;
568     else
569       upb = mid;
570   }
571 
572   if (beg != end) {
573     if (c1 > EXTRACT_MBC(&b[beg*4]))
574       c1 = EXTRACT_MBC(&b[beg*4]);
575     if (c2 < EXTRACT_MBC(&b[(end - 1)*4]))
576       c2 = EXTRACT_MBC(&b[(end - 1)*4]);
577   }
578   if (end < mbc_size && end != beg + 1)
579     /* NOTE: memcpy() would not work here.  */
580     memmove(&b[(beg + 1)*4], &b[end*4], (mbc_size - end)*4);
581   STORE_MBC(&b[beg*4 + 0], c1);
582   STORE_MBC(&b[beg*4 + 2], c2);
583   mbc_size += beg - end + 1;
584   STORE_NUMBER(&b[-2], mbc_size);
585 }
586 
587 static int
is_in_list(c,b)588 is_in_list(c, b)
589     unsigned short c;
590     const unsigned char *b;
591 {
592     unsigned short size;
593     unsigned short i, j;
594     int result = 0;
595 
596     size = *b++;
597     if ((int)c < 1<<BYTEWIDTH) {
598 	if ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) {
599 	    return 1;
600 	}
601     }
602     b += size + 2;
603     size = EXTRACT_UNSIGNED(&b[-2]);
604     if (size == 0) return 0;
605 
606     if (b[(size-1)*4] == 0xff) {
607 	i = c;
608 	if ((int)c >= 1<<BYTEWIDTH) {
609 	    i = i>>BYTEWIDTH;
610 	}
611 	while (size>0 && b[size*4-2] == 0xff) {
612 	    size--;
613 	    if (b[size*4+1] <= i && i <= b[size*4+3]) {
614 		result = 2;
615 		break;
616 	    }
617 	}
618     }
619     for (i = 0, j = size; i < j; ) {
620 	unsigned short k = (unsigned short)(i + j) >> 1;
621 
622 	if (c > EXTRACT_MBC(&b[k*4+2]))
623 	    i = k + 1;
624 	else
625 	    j = k;
626     }
627     if (i < size && EXTRACT_MBC(&b[i*4]) <= c
628 	&& ((unsigned char)c != '\n' && (unsigned char)c != '\0'))
629 	return 1;
630     return result;
631 }
632 
633 #if 0
634 static void
635 print_partial_compiled_pattern(start, end)
636     unsigned char *start;
637     unsigned char *end;
638 {
639   int mcnt, mcnt2;
640   unsigned char *p = start;
641   unsigned char *pend = end;
642 
643   if (start == NULL)
644     {
645       printf ("(null)\n");
646       return;
647     }
648 
649   /* Loop over pattern commands.  */
650   while (p < pend)
651     {
652       switch ((enum regexpcode) *p++)
653 	{
654 	case unused:
655 	  printf ("/unused");
656 	  break;
657 
658 	case exactn:
659 	  mcnt = *p++;
660           printf ("/exactn/%d", mcnt);
661           do
662 	    {
663               putchar('/');
664 	      printf("%c", *p++);
665             }
666           while (--mcnt);
667           break;
668 
669 	case start_memory:
670           mcnt = *p++;
671           printf ("/start_memory/%d/%d", mcnt, *p++);
672           break;
673 
674 	case stop_memory:
675           mcnt = *p++;
676 	  printf ("/stop_memory/%d/%d", mcnt, *p++);
677           break;
678 
679 	case casefold_on:
680 	  printf ("/casefold_on");
681 	  break;
682 
683 	case casefold_off:
684 	  printf ("/casefold_off");
685 	  break;
686 
687 	case start_nowidth:
688           EXTRACT_NUMBER_AND_INCR (mcnt, p);
689 	  printf ("/start_nowidth//%d", mcnt);
690 	  break;
691 
692 	case stop_nowidth:
693 	  printf ("/stop_nowidth//");
694 	  p += 2;
695 	  break;
696 
697 	case pop_and_fail:
698 	  printf ("/pop_and_fail");
699 	  break;
700 
701 	case duplicate:
702 	  printf ("/duplicate/%d", *p++);
703 	  break;
704 
705 	case anychar:
706 	  printf ("/anychar");
707 	  break;
708 
709 	case charset:
710         case charset_not:
711           {
712             register int c;
713 
714             printf ("/charset%s",
715 	            (enum regexpcode) *(p - 1) == charset_not ? "_not" : "");
716 
717             mcnt = *p++;
718 	    printf("/%d", mcnt);
719             for (c = 0; c < mcnt; c++)
720               {
721                 unsigned bit;
722                 unsigned char map_byte = p[c];
723 
724 		putchar ('/');
725 
726 		for (bit = 0; bit < BYTEWIDTH; bit++)
727                   if (map_byte & (1 << bit))
728 		    printf("%c", c * BYTEWIDTH + bit);
729               }
730 	    p += mcnt;
731 	    mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
732 	    while (mcnt--) {
733 		int beg, end;
734 		beg = EXTRACT_MBC_AND_INCR(p);
735 		end = EXTRACT_MBC_AND_INCR(p);
736 		printf("/%c%c-%c%c", beg>>BYTEWIDTH, beg&0xff, end>>BYTEWIDTH, end&0xff);
737 	    }
738 	    break;
739 	  }
740 
741 	case begline:
742 	  printf ("/begline");
743           break;
744 
745 	case endline:
746           printf ("/endline");
747           break;
748 
749 	case on_failure_jump:
750           EXTRACT_NUMBER_AND_INCR (mcnt, p);
751   	  printf ("/on_failure_jump//%d", mcnt);
752           break;
753 
754 	case dummy_failure_jump:
755           EXTRACT_NUMBER_AND_INCR (mcnt, p);
756   	  printf ("/dummy_failure_jump//%d", mcnt);
757           break;
758 
759 	case push_dummy_failure:
760           printf ("/push_dummy_failure");
761           break;
762 
763         case finalize_jump:
764           EXTRACT_NUMBER_AND_INCR (mcnt, p);
765   	  printf ("/finalize_jump//%d", mcnt);
766 	  break;
767 
768         case maybe_finalize_jump:
769           EXTRACT_NUMBER_AND_INCR (mcnt, p);
770   	  printf ("/maybe_finalize_jump//%d", mcnt);
771 	  break;
772 
773         case jump_past_alt:
774 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
775   	  printf ("/jump_past_alt//%d", mcnt);
776 	  break;
777 
778         case jump:
779 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
780   	  printf ("/jump//%d", mcnt);
781 	  break;
782 
783         case succeed_n:
784           EXTRACT_NUMBER_AND_INCR (mcnt, p);
785           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
786  	  printf ("/succeed_n//%d//%d", mcnt, mcnt2);
787           break;
788 
789         case jump_n:
790           EXTRACT_NUMBER_AND_INCR (mcnt, p);
791           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
792  	  printf ("/jump_n//%d//%d", mcnt, mcnt2);
793           break;
794 
795         case set_number_at:
796           EXTRACT_NUMBER_AND_INCR (mcnt, p);
797           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
798  	  printf ("/set_number_at//%d//%d", mcnt, mcnt2);
799           break;
800 
801 	case try_next:
802 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
803   	  printf ("/try_next//%d", mcnt);
804           break;
805 
806 	case finalize_push:
807 	  EXTRACT_NUMBER_AND_INCR (mcnt, p);
808   	  printf ("/finalize_push//%d", mcnt);
809           break;
810 
811 	case finalize_push_n:
812           EXTRACT_NUMBER_AND_INCR (mcnt, p);
813           EXTRACT_NUMBER_AND_INCR (mcnt2, p);
814  	  printf ("/finalize_push_n//%d//%d", mcnt, mcnt2);
815           break;
816 
817         case wordbound:
818 	  printf ("/wordbound");
819 	  break;
820 
821 	case notwordbound:
822 	  printf ("/notwordbound");
823           break;
824 
825 	case wordbeg:
826 	  printf ("/wordbeg");
827 	  break;
828 
829 	case wordend:
830 	  printf ("/wordend");
831 
832 	case wordchar:
833 	  printf ("/wordchar");
834           break;
835 
836 	case notwordchar:
837 	  printf ("/notwordchar");
838           break;
839 
840 	case begbuf:
841 	  printf ("/begbuf");
842           break;
843 
844 	case endbuf:
845 	  printf ("/endbuf");
846           break;
847 
848 	case endbuf2:
849 	  printf ("/endbuf2");
850           break;
851 
852         default:
853           printf ("?%d", *(p-1));
854 	}
855     }
856   printf ("/\n");
857 }
858 #endif /* 0 */
859 
860 #if 0
861 static void
862 print_compiled_pattern(bufp)
863     struct re_pattern_buffer *bufp;
864 {
865   unsigned char *buffer = (unsigned char*)bufp->buffer;
866 
867   print_partial_compiled_pattern (buffer, buffer + bufp->used);
868 }
869 #endif /* 0 */
870 
871 static char*
calculate_must_string(start,end)872 calculate_must_string(start, end)
873     char *start;
874     char *end;
875 {
876   int mcnt;
877   int max = 0;
878   char *p = start;
879   char *pend = end;
880   char *must = 0;
881 
882   if (start == NULL) return 0;
883 
884   /* Loop over pattern commands.  */
885   while (p < pend)
886     {
887       switch ((enum regexpcode) *p++)
888 	{
889 	case unused:
890 	  break;
891 
892 	case exactn:
893 	  mcnt = *p;
894 	  if (mcnt > max) {
895 	    must = p;
896 	    max = mcnt;
897 	  }
898 	  p += mcnt+1;
899           break;
900 
901 	case start_memory:
902 	case stop_memory:
903 	  p += 2;
904 	  break;
905 
906 	case duplicate:
907           p++;
908           break;
909 
910 	case casefold_on:
911 	case casefold_off:
912 	  return 0;		/* should not check must_string */
913 
914 	case pop_and_fail:
915 	case anychar:
916 	case begline:
917 	case endline:
918         case wordbound:
919 	case notwordbound:
920 	case wordbeg:
921 	case wordend:
922 	case wordchar:
923 	case notwordchar:
924 	case begbuf:
925 	case endbuf:
926 	case endbuf2:
927         case push_dummy_failure:
928 	  break;
929 
930 	case charset:
931         case charset_not:
932 	  mcnt = *p++;
933 	  p += mcnt;
934 	  mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
935 	  while (mcnt--) {
936             /* commented out by ccsatoru
937 	    EXTRACT_MBC_AND_INCR(p);
938 	    EXTRACT_MBC_AND_INCR(p);
939             */
940 	  }
941 	  break;
942 
943 	case on_failure_jump:
944           EXTRACT_NUMBER_AND_INCR (mcnt, p);
945 	  if (mcnt > 0) p += mcnt;
946 	  if ((enum regexpcode)p[-3] == jump) {
947 	    p -= 3;
948 	    EXTRACT_NUMBER_AND_INCR (mcnt, p);
949 	    if (mcnt > 0) p += mcnt;
950 	  }
951           break;
952 
953 	case dummy_failure_jump:
954         case succeed_n:
955 	case try_next:
956 	case jump:
957           EXTRACT_NUMBER_AND_INCR (mcnt, p);
958 	  if (mcnt > 0) p += mcnt;
959           break;
960 
961 	case start_nowidth:
962 	case stop_nowidth:
963         case finalize_jump:
964         case maybe_finalize_jump:
965 	case finalize_push:
966 	  p += 2;
967 	  break;
968 
969         case jump_n:
970         case set_number_at:
971 	case finalize_push_n:
972 	  p += 4;
973           break;
974 
975         default:
976 	  break;
977 	}
978     }
979   return must;
980 }
981 
982 
983 /* re_compile_pattern takes a regular-expression string
984    and converts it into a buffer full of byte commands for matching.
985 
986    PATTERN   is the address of the pattern string
987    SIZE      is the length of it.
988    BUFP	    is a  struct re_pattern_buffer *  which points to the info
989 	     on where to store the byte commands.
990 	     This structure contains a  char *  which points to the
991 	     actual space, which should have been obtained with malloc.
992 	     re_compile_pattern may use realloc to grow the buffer space.
993 
994    The number of bytes of commands can be found out by looking in
995    the `struct re_pattern_buffer' that bufp pointed to, after
996    re_compile_pattern returns. */
997 
998 char *
re_compile_pattern(pattern,size,bufp)999 re_compile_pattern(pattern, size, bufp)
1000      char *pattern;
1001      size_t size;
1002      struct re_pattern_buffer *bufp;
1003 {
1004     register char *b = bufp->buffer;
1005     register char *p = pattern;
1006     char *pend = pattern + size;
1007     register unsigned c, c1;
1008     char *p0;
1009     int numlen;
1010 
1011     /* Address of the count-byte of the most recently inserted `exactn'
1012        command.  This makes it possible to tell whether a new exact-match
1013        character can be added to that command or requires a new `exactn'
1014        command.  */
1015 
1016     char *pending_exact = 0;
1017 
1018     /* Address of the place where a forward-jump should go to the end of
1019        the containing expression.  Each alternative of an `or', except the
1020        last, ends with a forward-jump of this sort.  */
1021 
1022     char *fixup_alt_jump = 0;
1023 
1024     /* Address of start of the most recently finished expression.
1025        This tells postfix * where to find the start of its operand.  */
1026 
1027     char *laststart = 0;
1028 
1029     /* In processing a repeat, 1 means zero matches is allowed.  */
1030 
1031     char zero_times_ok;
1032 
1033     /* In processing a repeat, 1 means many matches is allowed.  */
1034 
1035     char many_times_ok;
1036 
1037     /* In processing a repeat, 1 means non-greedy matches.  */
1038 
1039     char greedy;
1040 
1041     /* Address of beginning of regexp, or inside of last \(.  */
1042 
1043     char *begalt = b;
1044 
1045     /* Place in the uncompiled pattern (i.e., the {) to
1046        which to go back if the interval is invalid.  */
1047     char *beg_interval;
1048 
1049     /* In processing an interval, at least this many matches must be made.  */
1050     int lower_bound;
1051 
1052     /* In processing an interval, at most this many matches can be made.  */
1053     int upper_bound;
1054 
1055     /* Stack of information saved by \( and restored by \).
1056        Five stack elements are pushed by each \(:
1057        First, the value of b.
1058        Second, the value of fixup_alt_jump.
1059        Third, the value of begalt.
1060        Fourth, the value of regnum.
1061        Fifth, the type of the paren. */
1062 
1063     int *stackb = RE_TALLOC(40, int);
1064     int *stackp = stackb;
1065     int *stacke = stackb + 40;
1066     int *stackt;
1067 
1068     /* Counts \('s as they are encountered.  Remembered for the matching \),
1069        where it becomes the register number to put in the stop_memory
1070        command.  */
1071 
1072     int regnum = 1;
1073 
1074     int range = 0;
1075     int had_char_class = 0;
1076 
1077     int options = bufp->options;
1078 
1079     bufp->fastmap_accurate = 0;
1080     bufp->must = 0;
1081     bufp->must_skip = 0;
1082     bufp->stclass = 0;
1083 
1084     /* Initialize the syntax table.  */
1085     init_syntax_once();
1086 
1087     if (bufp->allocated == 0) {
1088       bufp->allocated = INIT_BUF_SIZE;
1089       if (bufp->buffer)
1090 	/* EXTEND_BUFFER loses when bufp->allocated is 0.  */
1091 	bufp->buffer = (char *) xrealloc (bufp->buffer, INIT_BUF_SIZE);
1092       else
1093 	/* Caller did not allocate a buffer.  Do it for them.  */
1094 	bufp->buffer = (char *) xmalloc(INIT_BUF_SIZE);
1095       if (!bufp->buffer) goto memory_exhausted;
1096       begalt = b = bufp->buffer;
1097     }
1098 
1099     while (p != pend) {
1100       PATFETCH(c);
1101 
1102       switch (c)
1103 	{
1104 	case '$':
1105 	  {
1106 	    p0 = p;
1107 	    /* When testing what follows the $,
1108 	       look past the \-constructs that don't consume anything.  */
1109 
1110 	    while (p0 != pend)
1111 	      {
1112 		if (*p0 == '\\' && p0 + 1 != pend
1113 		    && (p0[1] == 'b' || p0[1] == 'B'))
1114 		  p0 += 2;
1115 		else
1116 		  break;
1117 	      }
1118 	    /* $ means succeed if at end of line, but only in special contexts.
1119 	      If validly in the middle of a pattern, it is a normal character. */
1120 
1121 	    if (p0 == pend || *p0 == '\n'
1122 		|| *p0 == ')'
1123 		|| *p0 == '|')
1124 	      {
1125 		BUFPUSH(endline);
1126 		break;
1127 	      }
1128 	    goto normal_char;
1129           }
1130 	case '^':
1131 	  /* ^ means succeed if at beg of line, but only if no preceding
1132              pattern.  */
1133 
1134           if (laststart)
1135             goto invalid_pattern;
1136           if (laststart && p - 2 >= pattern && p[-2] != '\n')
1137 	    goto normal_char;
1138 	  BUFPUSH(begline);
1139 	  break;
1140 
1141 	case '+':
1142 	case '?':
1143 	case '*':
1144 	  /* If there is no previous pattern, char not special. */
1145 	  if (!laststart) {
1146 	    goto invalid_pattern;
1147 	  }
1148 	  /* If there is a sequence of repetition chars,
1149 	     collapse it down to just one.  */
1150 	  zero_times_ok = c != '+';
1151 	  many_times_ok = c != '?';
1152 	  greedy = 1;
1153 	  if (p != pend) {
1154 	    PATFETCH(c);
1155 	    switch (c) {
1156 	      case '?':
1157 		greedy = 0;
1158 		break;
1159 	      case '*':
1160 	      case '+':
1161 		goto nested_meta;
1162 	      default:
1163 		PATUNFETCH;
1164 		break;
1165 	    }
1166 	  }
1167 
1168 	repeat:
1169 	  /* Star, etc. applied to an empty pattern is equivalent
1170 	     to an empty pattern.  */
1171 	  if (!laststart)
1172 	    break;
1173 
1174 	  /* Now we know whether or not zero matches is allowed
1175 	     and also whether or not two or more matches is allowed.  */
1176 	  if (many_times_ok) {
1177 	    /* If more than one repetition is allowed, put in at the
1178 	       end a backward relative jump from b to before the next
1179 	       jump we're going to put in below (which jumps from
1180 	       laststart to after this jump).  */
1181 	    GET_BUFFER_SPACE(3);
1182 	    store_jump(b,greedy?maybe_finalize_jump:finalize_push,laststart-3);
1183 	    b += 3;  	/* Because store_jump put stuff here.  */
1184 	  }
1185 
1186 	  /* On failure, jump from laststart to next pattern, which will be the
1187 	     end of the buffer after this jump is inserted.  */
1188 	  GET_BUFFER_SPACE(3);
1189 	  insert_jump(on_failure_jump, laststart, b + 3, b);
1190 	  b += 3;
1191 
1192 	  if (zero_times_ok) {
1193 	    if (greedy == 0) {
1194 	      GET_BUFFER_SPACE(3);
1195 	      insert_jump(try_next, laststart, b + 3, b);
1196 	      b += 3;
1197 	    }
1198 	  }
1199 	  else {
1200 	    /* At least one repetition is required, so insert a
1201 	       `dummy_failure_jump' before the initial
1202 	       `on_failure_jump' instruction of the loop. This
1203 	       effects a skip over that instruction the first time
1204 	       we hit that loop.  */
1205 	    GET_BUFFER_SPACE(3);
1206 	    insert_jump(dummy_failure_jump, laststart, laststart + 6, b);
1207 	    b += 3;
1208 	  }
1209 	  break;
1210 
1211 	case '.':
1212 	  laststart = b;
1213 	  BUFPUSH(anychar);
1214 	  break;
1215 
1216         case '[':
1217           if (p == pend)
1218             goto invalid_pattern;
1219 	  while (b - bufp->buffer
1220 		 > bufp->allocated - 9 - (1 << BYTEWIDTH) / BYTEWIDTH)
1221 	    EXTEND_BUFFER;
1222 
1223 	  laststart = b;
1224 	  if (*p == '^')
1225 	    {
1226               BUFPUSH(charset_not);
1227               p++;
1228             }
1229 	  else
1230 	    BUFPUSH(charset);
1231 	  p0 = p;
1232 
1233 	  BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
1234 	  /* Clear the whole map */
1235 	  memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
1236 
1237 	  if ((re_syntax_options & RE_HAT_NOT_NEWLINE) && b[-2] == charset_not)
1238             SET_LIST_BIT('\n');
1239 
1240 	  had_char_class = 0;
1241 	  /* Read in characters and ranges, setting map bits.  */
1242 	  for (;;)
1243 	    {
1244 	      int size;
1245 	      unsigned last = (unsigned)-1;
1246 
1247 	      if ((size = EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH]))) {
1248 		/* Ensure the space is enough to hold another interval
1249 		   of multi-byte chars in charset(_not)?.  */
1250 		size = (1 << BYTEWIDTH) / BYTEWIDTH + 2 + size*4 + 4;
1251 		while (b + size + 1 > bufp->buffer + bufp->allocated)
1252 		  EXTEND_BUFFER;
1253 	      }
1254 	    range_retry:
1255 	      PATFETCH(c);
1256 
1257               if (c == ']') {
1258                   if (p == p0 + 1) {
1259                       if (p == pend)
1260 			  goto invalid_pattern;
1261 		  }
1262                   else
1263 		    /* Stop if this isn't merely a ] inside a bracket
1264                        expression, but rather the end of a bracket
1265                        expression.  */
1266 		      break;
1267 	      }
1268 	      /* Look ahead to see if it's a range when the last thing
1269 		 was a character class.  */
1270 	      if (had_char_class && c == '-' && *p != ']')
1271 		  goto invalid_pattern;
1272 	      if (ismbchar(c)) {
1273 		PATFETCH(c1);
1274 		c = c << BYTEWIDTH | c1;
1275 	      }
1276 
1277 	      /* \ escapes characters when inside [...].  */
1278 	      if (c == '\\') {
1279 	          PATFETCH(c);
1280 		  switch (c) {
1281 		    case 'w':
1282 		      for (c = 0; c < (1 << BYTEWIDTH); c++)
1283 		          if (SYNTAX(c) == Sword)
1284 			      SET_LIST_BIT(c);
1285 		      last = -1;
1286 		      continue;
1287 
1288 		    case 'W':
1289 		      for (c = 0; c < (1 << BYTEWIDTH); c++)
1290 		          if (SYNTAX(c) != Sword)
1291 			      SET_LIST_BIT(c);
1292 		      if (current_mbctype) {
1293 			  set_list_bits(0x8000, 0xffff, (unsigned char*)b);
1294 		      }
1295 		      last = -1;
1296 		      continue;
1297 
1298 		    case 's':
1299 		      for (c = 0; c < 256; c++)
1300 			  if (ISSPACE(c))
1301 			      SET_LIST_BIT(c);
1302 		      last = -1;
1303 		      continue;
1304 
1305 		    case 'S':
1306 		      for (c = 0; c < 256; c++)
1307 			  if (!ISSPACE(c))
1308 			      SET_LIST_BIT(c);
1309 		      if (current_mbctype) {
1310 			  set_list_bits(0x8000, 0xffff, (unsigned char*)b);
1311 		      }
1312 		      last = -1;
1313 		      continue;
1314 
1315 		    case 'd':
1316 		      for (c = '0'; c <= '9'; c++)
1317 			  SET_LIST_BIT(c);
1318 		      last = -1;
1319 		      continue;
1320 
1321 		    case 'D':
1322 		      for (c = 0; c < 256; c++)
1323 			  if (!ISDIGIT(c))
1324 			      SET_LIST_BIT(c);
1325 		      if (current_mbctype) {
1326 			  set_list_bits(0x8000, 0xffff, (unsigned char*)b);
1327 		      }
1328 		      last = -1;
1329 		      continue;
1330 
1331 		    case 'x':
1332 		      c = scan_hex(p, 2, &numlen);
1333 		      if (current_mbctype && c > 0x7f)
1334 			  c = 0xff00 | c;
1335 		      p += numlen;
1336 		      break;
1337 
1338 		    case '0': case '1': case '2': case '3': case '4':
1339 		    case '5': case '6': case '7': case '8': case '9':
1340 		      PATUNFETCH;
1341 		      c = scan_oct(p, 3, &numlen);
1342 		      if (ismbchar(c))
1343 			  c |= 0xff00;
1344 		      p += numlen;
1345 		      break;
1346 
1347 		    default:
1348 		      if (ismbchar(c)) {
1349 			  PATFETCH(c1);
1350 			  c = c << 8 | c1;
1351 		      }
1352 		      break;
1353 		  }
1354 	      }
1355 
1356               /* Get a range.  */
1357 	      if (range) {
1358 		  if (last > c)
1359                     goto invalid_pattern;
1360 
1361 		  range = 0;
1362 		  if (last < 1 << BYTEWIDTH && c < 1 << BYTEWIDTH) {
1363 		      for (;last<=c;last++)
1364 			  SET_LIST_BIT(last);
1365 		  }
1366 		  else {
1367 		      set_list_bits(last, c, (unsigned char*)b);
1368 		  }
1369 	      }
1370               else if (p[0] == '-' && p[1] != ']') {
1371 		  last = c;
1372 		  PATFETCH(c1);
1373 		  range = 1;
1374 		  goto range_retry;
1375 	      }
1376 	      else if ((re_syntax_options & RE_CHAR_CLASSES)
1377 		       && c == '[' && *p == ':') {
1378 		  /* Leave room for the null.  */
1379 		  char str[CHAR_CLASS_MAX_LENGTH + 1];
1380 
1381 		  PATFETCH_RAW (c);
1382 		  c1 = 0;
1383 
1384 		  /* If pattern is `[[:'.  */
1385 		  if (p == pend)
1386 		      goto invalid_pattern;
1387 
1388 		  for (;;) {
1389 		      PATFETCH (c);
1390 		      if (c == ':' || c == ']' || p == pend
1391 			  || c1 == CHAR_CLASS_MAX_LENGTH)
1392                           break;
1393 		      str[c1++] = c;
1394 		  }
1395 		  str[c1] = '\0';
1396 
1397 		  /* If isn't a word bracketed by `[:' and:`]':
1398 		     undo the ending character, the letters, and leave
1399 		     the leading `:' and `[' (but set bits for them).  */
1400 		  if (c == ':' && *p == ']') {
1401 		      int ch;
1402 		      char is_alnum = STREQ (str, "alnum");
1403 		      char is_alpha = STREQ (str, "alpha");
1404 		      char is_blank = STREQ (str, "blank");
1405 		      char is_cntrl = STREQ (str, "cntrl");
1406 		      char is_digit = STREQ (str, "digit");
1407 		      char is_graph = STREQ (str, "graph");
1408 		      char is_lower = STREQ (str, "lower");
1409 		      char is_print = STREQ (str, "print");
1410 		      char is_punct = STREQ (str, "punct");
1411 		      char is_space = STREQ (str, "space");
1412 		      char is_upper = STREQ (str, "upper");
1413 		      char is_xdigit = STREQ (str, "xdigit");
1414 
1415 		      if (!IS_CHAR_CLASS (str))
1416 			  goto invalid_pattern;
1417 
1418 		      /* Throw away the ] at the end of the character
1419 			 class.  */
1420 		      PATFETCH (c);
1421 
1422 		      if (p == pend)
1423 			  goto invalid_pattern;
1424 
1425 
1426 		      for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
1427 			  if (   (is_alnum  && ISALNUM (ch))
1428 			      || (is_alpha  && ISALPHA (ch))
1429 			      || (is_blank  && ISBLANK (ch))
1430 			      || (is_cntrl  && ISCNTRL (ch))
1431 			      || (is_digit  && ISDIGIT (ch))
1432 			      || (is_graph  && ISGRAPH (ch))
1433 			      || (is_lower  && ISLOWER (ch))
1434 			      || (is_print  && ISPRINT (ch))
1435 			      || (is_punct  && ISPUNCT (ch))
1436 			      || (is_space  && ISSPACE (ch))
1437 			      || (is_upper  && ISUPPER (ch))
1438 			      || (is_xdigit && ISXDIGIT (ch)))
1439 			      SET_LIST_BIT (ch);
1440 		      }
1441 		      had_char_class = 1;
1442 		  }
1443 		  else {
1444 		      c1++;
1445 		      while (c1--)
1446                           PATUNFETCH;
1447 		      SET_LIST_BIT(translate?translate['[']:'[');
1448 		      SET_LIST_BIT(translate?translate[':']:':');
1449 		      had_char_class = 0;
1450 		      last = ':';
1451                   }
1452 	      }
1453 	      else if (c < 1 << BYTEWIDTH)
1454 		SET_LIST_BIT(c);
1455 	      else
1456 		set_list_bits(c, c, (unsigned char*)b);
1457 	    }
1458 
1459           /* Discard any character set/class bitmap bytes that are all
1460              0 at the end of the map. Decrement the map-length byte too.  */
1461           while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1462             b[-1]--;
1463 	  if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
1464 	    memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
1465 		    2 + EXTRACT_UNSIGNED (&b[(1 << BYTEWIDTH) / BYTEWIDTH])*4);
1466 	  b += b[-1] + 2 + EXTRACT_UNSIGNED (&b[b[-1]])*4;
1467           break;
1468 
1469 	case '(':
1470 	  PATFETCH(c);
1471 	  if (c == '?') {
1472 	      int negative = 0;
1473 	      PATFETCH_RAW(c);
1474 	      switch (c) {
1475 		case 'x': case 'i': case '-':
1476 		  for (;;) {
1477 		    switch (c) {
1478 		    case '-':
1479 		      negative = 1;
1480 		      break;
1481 
1482 		    case ')':
1483 		    case ':':
1484 		      break;
1485 
1486 		    case 'x':
1487 		      if (negative)
1488 			options &= ~RE_OPTION_EXTENDED;
1489 		      else
1490 			options |= RE_OPTION_EXTENDED;
1491 		      break;
1492 		    case 'i':
1493 		      if (negative) {
1494 			if (options&RE_OPTION_IGNORECASE) {
1495 			  options &= ~RE_OPTION_IGNORECASE;
1496 			  BUFPUSH(casefold_off);
1497 			}
1498 		      }
1499 		      else if (!(options&RE_OPTION_IGNORECASE)) {
1500 			options |= RE_OPTION_IGNORECASE;
1501 			BUFPUSH(casefold_on);
1502 		      }
1503 		      break;
1504 
1505 		    default:
1506 		      FREE_AND_RETURN(stackb, "undefined (?...) inline option");
1507 		    }
1508 		    if (c == ')') {
1509 		      c = '#';	/* read whole in-line options */
1510 		      break;
1511 		    }
1512 		    if (c == ':') break;
1513 		    PATFETCH_RAW(c);
1514 		  }
1515 		  break;
1516 
1517 		case '#':
1518 		  for (;;) {
1519 		      PATFETCH(c);
1520 		      if (c == ')') break;
1521 		  }
1522 		  c = '#';
1523 		  break;
1524 
1525 		case ':':
1526 		case '=':
1527 		case '!':
1528 		  break;
1529 
1530 		default:
1531 		  FREE_AND_RETURN(stackb, "undefined (?...) sequence");
1532 	      }
1533 	  }
1534 	  else {
1535 	    PATUNFETCH;
1536 	    c = '(';
1537 	  }
1538 	  if (c == '#') break;
1539 	  if (stackp+8 >= stacke) {
1540 	    int *stackx;
1541 	    unsigned int len = stacke - stackb;
1542 
1543 	    stackx = DOUBLE_STACK(stackx,stackb,len,int);
1544 	    /* Rearrange the pointers. */
1545 	    stackp = stackx + (stackp - stackb);
1546 	    stackb = stackx;
1547 	    stacke = stackb + 2 * len;
1548 	  }
1549 
1550 	  /* Laststart should point to the start_memory that we are about
1551 	     to push (unless the pattern has RE_NREGS or more ('s).  */
1552 	  /* obsolete: now RE_NREGS is just a default register size. */
1553 	  *stackp++ = b - bufp->buffer;
1554 	  *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1555 	  *stackp++ = begalt - bufp->buffer;
1556 	  switch (c) {
1557 	    case '(':
1558 	      BUFPUSH(start_memory);
1559 	      BUFPUSH(regnum);
1560 	      *stackp++ = regnum++;
1561 	      *stackp++ = b - bufp->buffer;
1562 	      BUFPUSH(0);
1563 	      /* too many ()'s to fit in a byte. (max 254) */
1564 	      if (regnum >= RE_REG_MAX) goto too_big;
1565 	      break;
1566 
1567 	    case '=':
1568 	    case '!':
1569 	      BUFPUSH(start_nowidth);
1570 	      *stackp++ = b - bufp->buffer;
1571 	      BUFPUSH(0);	/* temporary value */
1572 	      BUFPUSH(0);
1573 	      if (c == '=') break;
1574 
1575 	      BUFPUSH(on_failure_jump);
1576 	      *stackp++ = b - bufp->buffer;
1577 	      BUFPUSH(0);	/* temporary value */
1578 	      BUFPUSH(0);
1579 	      break;
1580 
1581 	    case ':':
1582 	      pending_exact = 0;
1583 	    default:
1584 	      break;
1585 	  }
1586 	  *stackp++ = c;
1587 	  *stackp++ = options;
1588 	  fixup_alt_jump = 0;
1589 	  laststart = 0;
1590 	  begalt = b;
1591 	  break;
1592 
1593 	case ')':
1594 	  if (stackp == stackb)
1595 	    FREE_AND_RETURN(stackb, "unmatched )");
1596 	  if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
1597 	    BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
1598 	  }
1599 	  pending_exact = 0;
1600 	  if (fixup_alt_jump)
1601 	  { /* Push a dummy failure point at the end of the
1602 	       alternative for a possible future
1603 	       `finalize_jump' to pop.  See comments at
1604 	       `push_dummy_failure' in `re_match'.  */
1605 	      BUFPUSH(push_dummy_failure);
1606 
1607 	      /* We allocated space for this jump when we assigned
1608 		 to `fixup_alt_jump', in the `handle_alt' case below.  */
1609 	      store_jump(fixup_alt_jump, jump, b);
1610 	  }
1611           options = *--stackp;
1612           switch (c = *--stackp) {
1613             case '(':
1614               {
1615 		char *loc = bufp->buffer + *--stackp;
1616 		*loc = regnum - stackp[-1];
1617 		BUFPUSH(stop_memory);
1618 		BUFPUSH(stackp[-1]);
1619 		BUFPUSH(regnum - stackp[-1]);
1620 		stackp--;
1621 	      }
1622 	      break;
1623 
1624 	    case '!':
1625 	      BUFPUSH(pop_and_fail);
1626 	      /* back patch */
1627 	      STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
1628 	      stackp--;
1629 	      /* fall through */
1630 	    case '=':
1631 	      BUFPUSH(stop_nowidth);
1632 	      /* tell stack-pos place to start_nowidth */
1633 	      STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
1634 	      BUFPUSH(0);	/* space to hold stack pos */
1635 	      BUFPUSH(0);
1636 	      stackp--;
1637 	      break;
1638 
1639 	    default:
1640 	      break;
1641 	  }
1642 	  begalt = *--stackp + bufp->buffer;
1643 	  stackp--;
1644 	  fixup_alt_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
1645 	  laststart = *--stackp + bufp->buffer;
1646 	  if (c == '!' || c == '=') laststart = b;
1647 	  break;
1648 
1649 	case '|':
1650 	  /* Insert before the previous alternative a jump which
1651 	     jumps to this alternative if the former fails.  */
1652 	  GET_BUFFER_SPACE(3);
1653 	  insert_jump(on_failure_jump, begalt, b + 6, b);
1654 	  pending_exact = 0;
1655 	  b += 3;
1656 	  /* The alternative before this one has a jump after it
1657 	     which gets executed if it gets matched.  Adjust that
1658 	     jump so it will jump to this alternative's analogous
1659 	     jump (put in below, which in turn will jump to the next
1660 	     (if any) alternative's such jump, etc.).  The last such
1661 	     jump jumps to the correct final destination.  A picture:
1662 	     	_____ _____
1663 	     	|   | |   |
1664 	     	|   v |   v
1665 	     	a | b   | c
1666 
1667 	     If we are at `b', then fixup_alt_jump right now points to a
1668 	     three-byte space after `a'.  We'll put in the jump, set
1669 	     fixup_alt_jump to right after `b', and leave behind three
1670 	     bytes which we'll fill in when we get to after `c'.  */
1671 
1672 	  if (fixup_alt_jump)
1673 	    store_jump(fixup_alt_jump, jump_past_alt, b);
1674 
1675 	  /* Mark and leave space for a jump after this alternative,
1676 	     to be filled in later either by next alternative or
1677 	     when know we're at the end of a series of alternatives.  */
1678 	  fixup_alt_jump = b;
1679 	  GET_BUFFER_SPACE(3);
1680 	  b += 3;
1681 
1682 	  laststart = 0;
1683 	  begalt = b;
1684 	  break;
1685 
1686 	case '{':
1687 	  /* If there is no previous pattern, this isn't an interval.  */
1688 	  if (!laststart || p == pend)
1689 	    {
1690 		goto normal_backsl;
1691 	    }
1692 
1693 	  beg_interval = p - 1;
1694 
1695 	  lower_bound = -1;			/* So can see if are set.  */
1696 	  upper_bound = -1;
1697 	  GET_UNSIGNED_NUMBER(lower_bound);
1698 	  if (c == ',') {
1699 	    GET_UNSIGNED_NUMBER(upper_bound);
1700 	  }
1701 	  else
1702 	    /* Interval such as `{1}' => match exactly once. */
1703 	    upper_bound = lower_bound;
1704 
1705 	  if (lower_bound < 0 || c != '}')
1706 	    goto unfetch_interval;
1707 
1708 	  if (lower_bound >= RE_DUP_MAX || upper_bound >= RE_DUP_MAX)
1709 	    FREE_AND_RETURN(stackb, "too big quantifier in {,}");
1710 	  if (upper_bound < 0) upper_bound = RE_DUP_MAX;
1711 	  if (lower_bound > upper_bound)
1712 	    FREE_AND_RETURN(stackb, "can't do {n,m} with n > m");
1713 
1714 	  beg_interval = 0;
1715 	  pending_exact = 0;
1716 
1717 	  greedy = 1;
1718 	  if (p != pend) {
1719 	    PATFETCH(c);
1720 	    if (c == '?') greedy = 0;
1721 	    else PATUNFETCH;
1722 	  }
1723 
1724 	  if (lower_bound == 0) {
1725 	    zero_times_ok = 1;
1726 	    if (upper_bound == RE_DUP_MAX) {
1727 	      many_times_ok = 1;
1728 	      goto repeat;
1729 	    }
1730 	    if (upper_bound == 1) {
1731 	      many_times_ok = 0;
1732 	      goto repeat;
1733 	    }
1734 	  }
1735 	  if (lower_bound == 1) {
1736 	    if (upper_bound == 1) {
1737 	      /* No need to repeat */
1738 	      break;
1739 	    }
1740 	    if (upper_bound == RE_DUP_MAX) {
1741 	      many_times_ok = 1;
1742 	      zero_times_ok = 0;
1743 	      goto repeat;
1744 	    }
1745 	  }
1746 
1747 	  /* If upper_bound is zero, don't want to succeed at all;
1748 	     jump from laststart to b + 3, which will be the end of
1749 	     the buffer after this jump is inserted.  */
1750 
1751 	  if (upper_bound == 0) {
1752 	    GET_BUFFER_SPACE(3);
1753 	    insert_jump(jump, laststart, b + 3, b);
1754 	    b += 3;
1755 	    break;
1756 	  }
1757 
1758 	  /* Otherwise, we have a nontrivial interval.  When
1759 	     we're all done, the pattern will look like:
1760 	     set_number_at <jump count> <upper bound>
1761 	     set_number_at <succeed_n count> <lower bound>
1762 	     succeed_n <after jump addr> <succed_n count>
1763 	     <body of loop>
1764 	     jump_n <succeed_n addr> <jump count>
1765 	     (The upper bound and `jump_n' are omitted if
1766 	     `upper_bound' is 1, though.)  */
1767 	  { /* If the upper bound is > 1, we need to insert
1768 	       more at the end of the loop.  */
1769 	    unsigned nbytes = upper_bound == 1 ? 10 : 20;
1770 
1771 	    GET_BUFFER_SPACE(nbytes);
1772 	    /* Initialize lower bound of the `succeed_n', even
1773 	       though it will be set during matching by its
1774 	       attendant `set_number_at' (inserted next),
1775 	       because `re_compile_fastmap' needs to know.
1776 	       Jump to the `jump_n' we might insert below.  */
1777 	    insert_jump_n(succeed_n, laststart, b + (nbytes/2),
1778 			  b, lower_bound);
1779 	    b += 5; 	/* Just increment for the succeed_n here.  */
1780 
1781 	    /* Code to initialize the lower bound.  Insert
1782 	       before the `succeed_n'.  The `5' is the last two
1783 	       bytes of this `set_number_at', plus 3 bytes of
1784 	       the following `succeed_n'.  */
1785 	    insert_op_2(set_number_at, laststart, b, 5, lower_bound);
1786 	    b += 5;
1787 
1788 	    if (upper_bound > 1)
1789 	      { /* More than one repetition is allowed, so
1790 		   append a backward jump to the `succeed_n'
1791 		   that starts this interval.
1792 
1793 		   When we've reached this during matching,
1794 		   we'll have matched the interval once, so
1795 		   jump back only `upper_bound - 1' times.  */
1796 		GET_BUFFER_SPACE(5);
1797 		store_jump_n(b, greedy?jump_n:finalize_push_n, laststart + 5,
1798 			     upper_bound - 1);
1799 		b += 5;
1800 
1801 		/* The location we want to set is the second
1802 		   parameter of the `jump_n'; that is `b-2' as
1803 		   an absolute address.  `laststart' will be
1804 		   the `set_number_at' we're about to insert;
1805 		   `laststart+3' the number to set, the source
1806 		   for the relative address.  But we are
1807 		   inserting into the middle of the pattern --
1808 		   so everything is getting moved up by 5.
1809 		   Conclusion: (b - 2) - (laststart + 3) + 5,
1810 		   i.e., b - laststart.
1811 
1812 		   We insert this at the beginning of the loop
1813 		   so that if we fail during matching, we'll
1814 		   reinitialize the bounds.  */
1815 		insert_op_2(set_number_at, laststart, b, b - laststart,
1816 			    upper_bound - 1);
1817 		b += 5;
1818 	      }
1819 	  }
1820 	  break;
1821 
1822 	unfetch_interval:
1823 	  /* If an invalid interval, match the characters as literals.  */
1824 	  p = beg_interval;
1825 	  beg_interval = 0;
1826 
1827 	  /* normal_char and normal_backslash need `c'.  */
1828 	  PATFETCH (c);
1829 	  goto normal_char;
1830 
1831         case '\\':
1832 	  if (p == pend) goto invalid_pattern;
1833           /* Do not translate the character after the \, so that we can
1834              distinguish, e.g., \B from \b, even if we normally would
1835              translate, e.g., B to b.  */
1836 	  PATFETCH_RAW(c);
1837 	  switch (c)
1838 	    {
1839 	    case 's':
1840 	    case 'S':
1841 	    case 'd':
1842 	    case 'D':
1843 	      while (b - bufp->buffer
1844 		     > bufp->allocated - 9 - (1 << BYTEWIDTH) / BYTEWIDTH)
1845 		EXTEND_BUFFER;
1846 
1847 	      laststart = b;
1848 	      if (c == 's' || c == 'd') {
1849 		BUFPUSH(charset);
1850 	      }
1851 	      else {
1852 		BUFPUSH(charset_not);
1853 	      }
1854 
1855 	      BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
1856 	      memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
1857 	      if (c == 's' || c == 'S') {
1858 		SET_LIST_BIT(' ');
1859 		SET_LIST_BIT('\t');
1860 		SET_LIST_BIT('\n');
1861 		SET_LIST_BIT('\r');
1862 		SET_LIST_BIT('\f');
1863 	      }
1864 	      else {
1865 		char cc;
1866 
1867 		for (cc = '0'; cc <= '9'; cc++) {
1868 		  SET_LIST_BIT(cc);
1869 		}
1870 	      }
1871 
1872 	      while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
1873 		b[-1]--;
1874 	      if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
1875 		memmove(&b[b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
1876 		  2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*4);
1877 	      b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[b[-1]])*4;
1878 	      break;
1879 
1880 	    case 'w':
1881 	      laststart = b;
1882 	      BUFPUSH(wordchar);
1883 	      break;
1884 
1885 	    case 'W':
1886 	      laststart = b;
1887 	      BUFPUSH(notwordchar);
1888 	      break;
1889 
1890 	    case '<':
1891 	      BUFPUSH(wordbeg);
1892 	      break;
1893 
1894 	    case '>':
1895 	      BUFPUSH(wordend);
1896 	      break;
1897 
1898 	    case 'b':
1899 	      BUFPUSH(wordbound);
1900 	      break;
1901 
1902 	    case 'B':
1903 	      BUFPUSH(notwordbound);
1904 	      break;
1905 
1906 	    case 'A':
1907 	      BUFPUSH(begbuf);
1908 	      break;
1909 
1910 	    case 'Z':
1911 	      BUFPUSH(endbuf2);
1912 	      break;
1913 
1914 	    case 'z':
1915 	      BUFPUSH(endbuf);
1916 	      break;
1917 
1918 	      /* hex */
1919 	    case 'x':
1920 	      c1 = 0;
1921 	      c = scan_hex(p, 2, &numlen);
1922 	      p += numlen;
1923 	      if (c > 0x7f)
1924 		  c1 = 0xff;
1925 	      goto numeric_char;
1926 
1927 	      /* octal */
1928 	    case '0':
1929 	      c1 = 0;
1930 	      c = scan_oct(p, 3, &numlen);
1931 	      p += numlen;
1932 	      if (c > 0x7f)
1933 		  c1 = 0xff;
1934 	      goto numeric_char;
1935 
1936 	      /* back-ref or octal */
1937 	    case '1': case '2': case '3':
1938 	    case '4': case '5': case '6':
1939 	    case '7': case '8': case '9':
1940 	      {
1941 		  char *p_save;
1942 
1943 		  PATUNFETCH;
1944 		  p_save = p;
1945 
1946 		  c1 = 0;
1947 		  GET_UNSIGNED_NUMBER(c1);
1948 		  if (!ISDIGIT(c)) PATUNFETCH;
1949 
1950 		  if (c1 >= regnum) {
1951 		      /* need to get octal */
1952 		      p = p_save;
1953 		      c = scan_oct(p_save, 3, &numlen) & 0xff;
1954 		      p = p_save + numlen;
1955 		      c1 = 0;
1956 		      if (c > 0x7f)
1957 			  c1 = 0xff;
1958 		      goto numeric_char;
1959 		  }
1960 	      }
1961 
1962               /* Can't back reference to a subexpression if inside of it.  */
1963               for (stackt = stackp - 2;  stackt > stackb;  stackt -= 5)
1964  		if (*stackt == c1)
1965 		  goto normal_char;
1966 	      laststart = b;
1967 	      BUFPUSH(duplicate);
1968 	      BUFPUSH(c1);
1969 	      break;
1970 
1971             default:
1972 	    normal_backsl:
1973 	      goto normal_char;
1974 	    }
1975 	  break;
1976 
1977 	case '#':
1978 	  if (options & RE_OPTION_EXTENDED)
1979 	    {
1980 	      while (p != pend) {
1981 		PATFETCH(c);
1982 		if (c == '\n') break;
1983 	      }
1984 	      break;
1985 	    }
1986 	  goto normal_char;
1987 
1988 	case ' ':
1989 	case '\t':
1990 	case '\f':
1991 	case '\r':
1992 	case '\n':
1993 	  if (options & RE_OPTION_EXTENDED)
1994 	    break;
1995 
1996 	default:
1997 	normal_char:		/* Expects the character in `c'.  */
1998 	  c1 = 0;
1999 	  if (ismbchar(c)) {
2000 	    c1 = c;
2001 	    PATFETCH_RAW(c);
2002 	  }
2003 	  else if (c > 0x7f) {
2004 	    c1 = 0xff;
2005 	  }
2006 	numeric_char:
2007 	  if (!pending_exact || pending_exact + *pending_exact + 1 != b
2008 	      || *pending_exact >= (c1 ? 0176 : 0177)
2009 	      || *p == '+' || *p == '?'
2010 	      || *p == '*' || *p == '^'
2011 	      || *p == '{')
2012 	    {
2013 	      laststart = b;
2014 	      BUFPUSH(exactn);
2015 	      pending_exact = b;
2016 	      BUFPUSH(0);
2017 	    }
2018 	  if (c1) {
2019 	    BUFPUSH(c1);
2020 	    (*pending_exact)++;
2021 	  }
2022 	  BUFPUSH(c);
2023 	  (*pending_exact)++;
2024 	}
2025     }
2026 
2027   if (fixup_alt_jump)
2028     store_jump(fixup_alt_jump, jump, b);
2029 
2030   if (stackp != stackb)
2031     FREE_AND_RETURN(stackb, "unmatched (");
2032 
2033   /* set optimize flags */
2034   laststart = bufp->buffer;
2035   if (*laststart == start_memory) laststart += 3;
2036   if (*laststart == dummy_failure_jump) laststart += 3;
2037   else if (*laststart == try_next) laststart += 3;
2038   if (*laststart == on_failure_jump) {
2039     int mcnt;
2040 
2041     laststart++;
2042     EXTRACT_NUMBER_AND_INCR(mcnt, laststart);
2043     if (mcnt == 4 && *laststart == anychar) {
2044       bufp->options |= RE_OPTIMIZE_ANCHOR;
2045     }
2046     else if (*laststart == charset || *laststart == charset_not) {
2047       p0 = laststart;
2048       mcnt = *++p0 ;
2049       p0 += mcnt+1;
2050       mcnt = EXTRACT_UNSIGNED_AND_INCR(p0);
2051       p0 += 4*mcnt;
2052       if (*p0 == maybe_finalize_jump) {
2053 	bufp->stclass = laststart;
2054       }
2055     }
2056   }
2057 
2058   bufp->used = b - bufp->buffer;
2059   bufp->re_nsub = regnum;
2060   laststart = bufp->buffer;
2061   if (*laststart == start_memory) laststart += 3;
2062   if (*laststart == exactn) {
2063     bufp->options |= RE_OPTIMIZE_EXACTN;
2064     bufp->must = laststart+1;
2065   }
2066   else {
2067     bufp->must = calculate_must_string(bufp->buffer, b);
2068   }
2069   if (current_mbctype == MBCTYPE_SJIS) bufp->options |= RE_OPTIMIZE_NO_BM;
2070   else if (bufp->must) {
2071     int i;
2072     int len = (unsigned char)bufp->must[0];
2073 
2074     for (i=1; i<len; i++) {
2075       if ((unsigned char)bufp->must[i] == 0xff ||
2076 	  (current_mbctype == MBCTYPE_EUC && ismbchar(bufp->must[i]))) {
2077 	bufp->options |= RE_OPTIMIZE_NO_BM;
2078 	break;
2079       }
2080     }
2081     if (!(bufp->options & RE_OPTIMIZE_NO_BM)) {
2082       bufp->must_skip = (int *) xmalloc((1 << BYTEWIDTH)*sizeof(int));
2083       bm_init_skip(bufp->must_skip, bufp->must+1,
2084 		   (unsigned char)bufp->must[0],
2085 		   MAY_TRANSLATE()?translate:0);
2086     }
2087   }
2088 
2089   FREE_AND_RETURN(stackb, 0);
2090 
2091  invalid_pattern:
2092   FREE_AND_RETURN(stackb, "invalid regular expression");
2093 
2094  end_of_pattern:
2095   FREE_AND_RETURN(stackb, "premature end of regular expression");
2096 
2097  too_big:
2098   FREE_AND_RETURN(stackb, "regular expression too big");
2099 
2100  memory_exhausted:
2101   FREE_AND_RETURN(stackb, "memory exhausted");
2102 
2103  nested_meta:
2104   FREE_AND_RETURN(stackb, "nested *?+ in regexp");
2105 }
2106 
2107 void
re_free_pattern(bufp)2108 re_free_pattern(bufp)
2109     struct re_pattern_buffer *bufp;
2110 {
2111     free(bufp->buffer);
2112     free(bufp->fastmap);
2113     if (bufp->must_skip) free(bufp->must_skip);
2114     free(bufp);
2115 }
2116 
2117 /* Store a jump of the form <OPCODE> <relative address>.
2118    Store in the location FROM a jump operation to jump to relative
2119    address FROM - TO.  OPCODE is the opcode to store.  */
2120 
2121 static void
store_jump(from,opcode,to)2122 store_jump(from, opcode, to)
2123      char *from, *to;
2124      int opcode;
2125 {
2126   from[0] = (char)opcode;
2127   STORE_NUMBER(from + 1, to - (from + 3));
2128 }
2129 
2130 
2131 /* Open up space before char FROM, and insert there a jump to TO.
2132    CURRENT_END gives the end of the storage not in use, so we know
2133    how much data to copy up. OP is the opcode of the jump to insert.
2134 
2135    If you call this function, you must zero out pending_exact.  */
2136 
2137 static void
insert_jump(op,from,to,current_end)2138 insert_jump(op, from, to, current_end)
2139      int op;
2140      char *from, *to, *current_end;
2141 {
2142   register char *pfrom = current_end;		/* Copy from here...  */
2143   register char *pto = current_end + 3;		/* ...to here.  */
2144 
2145   while (pfrom != from)
2146     *--pto = *--pfrom;
2147   store_jump(from, op, to);
2148 }
2149 
2150 
2151 /* Store a jump of the form <opcode> <relative address> <n> .
2152 
2153    Store in the location FROM a jump operation to jump to relative
2154    address FROM - TO.  OPCODE is the opcode to store, N is a number the
2155    jump uses, say, to decide how many times to jump.
2156 
2157    If you call this function, you must zero out pending_exact.  */
2158 
2159 static void
store_jump_n(from,opcode,to,n)2160 store_jump_n(from, opcode, to, n)
2161      char *from, *to;
2162      int opcode;
2163      unsigned n;
2164 {
2165   from[0] = (char)opcode;
2166   STORE_NUMBER(from + 1, to - (from + 3));
2167   STORE_NUMBER(from + 3, n);
2168 }
2169 
2170 
2171 /* Similar to insert_jump, but handles a jump which needs an extra
2172    number to handle minimum and maximum cases.  Open up space at
2173    location FROM, and insert there a jump to TO.  CURRENT_END gives the
2174    end of the storage in use, so we know how much data to copy up. OP is
2175    the opcode of the jump to insert.
2176 
2177    If you call this function, you must zero out pending_exact.  */
2178 
2179 static void
insert_jump_n(op,from,to,current_end,n)2180 insert_jump_n(op, from, to, current_end, n)
2181      int op;
2182      char *from, *to, *current_end;
2183      unsigned n;
2184 {
2185   register char *pfrom = current_end;		/* Copy from here...  */
2186   register char *pto = current_end + 5;		/* ...to here.  */
2187 
2188   while (pfrom != from)
2189     *--pto = *--pfrom;
2190   store_jump_n(from, op, to, n);
2191 }
2192 
2193 
2194 /* Open up space at location THERE, and insert operation OP.
2195    CURRENT_END gives the end of the storage in use, so
2196    we know how much data to copy up.
2197 
2198    If you call this function, you must zero out pending_exact.  */
2199 
2200 #if 0
2201 static void
2202 insert_op(op, there, current_end)
2203      int op;
2204      char *there, *current_end;
2205 {
2206   register char *pfrom = current_end;		/* Copy from here...  */
2207   register char *pto = current_end + 1;		/* ...to here.  */
2208 
2209   while (pfrom != there)
2210     *--pto = *--pfrom;
2211 
2212   there[0] = (char)op;
2213 }
2214 #endif /* 0 */
2215 
2216 
2217 /* Open up space at location THERE, and insert operation OP followed by
2218    NUM_1 and NUM_2.  CURRENT_END gives the end of the storage in use, so
2219    we know how much data to copy up.
2220 
2221    If you call this function, you must zero out pending_exact.  */
2222 
2223 static void
insert_op_2(op,there,current_end,num_1,num_2)2224 insert_op_2(op, there, current_end, num_1, num_2)
2225      int op;
2226      char *there, *current_end;
2227      int num_1, num_2;
2228 {
2229   register char *pfrom = current_end;		/* Copy from here...  */
2230   register char *pto = current_end + 5;		/* ...to here.  */
2231 
2232   while (pfrom != there)
2233     *--pto = *--pfrom;
2234 
2235   there[0] = (char)op;
2236   STORE_NUMBER(there + 1, num_1);
2237   STORE_NUMBER(there + 3, num_2);
2238 }
2239 
2240 
2241 #define trans_eq(c1, c2, translate) (translate?(translate[c1]==translate[c2]):((c1)==(c2)))
2242 static int
slow_match(little,lend,big,bend,translate)2243 slow_match(little, lend, big, bend, translate)
2244      unsigned char *little, *lend;
2245      unsigned char *big, *bend;
2246      unsigned char *translate;
2247 {
2248   int c;
2249 
2250   while (little < lend && big < bend) {
2251     c = *little++;
2252     if (c == 0xff)
2253       c = *little++;
2254     if (!trans_eq(*big++, c, translate)) break;
2255   }
2256   if (little == lend) return 1;
2257   return 0;
2258 }
2259 
2260 static int
slow_search(little,llen,big,blen,translate)2261 slow_search(little, llen, big, blen, translate)
2262      unsigned char *little;
2263      int llen;
2264      unsigned char *big;
2265      int blen;
2266      char *translate;
2267 {
2268   unsigned char *bsave = big;
2269   unsigned char *bend = big + blen;
2270   register int c;
2271   int fescape = 0;
2272 
2273   c = *little;
2274   if (c == 0xff) {
2275     c = little[1];
2276     fescape = 1;
2277   }
2278   else if (translate && !ismbchar(c)) {
2279     c = translate[c];
2280   }
2281 
2282   while (big < bend) {
2283     /* look for first character */
2284     if (fescape) {
2285       while (big < bend) {
2286 	if (*big == c) break;
2287 	big++;
2288       }
2289     }
2290     else if (translate && !ismbchar(c)) {
2291       while (big < bend) {
2292 	if (ismbchar(*big)) big++;
2293 	else if (translate[*big] == c) break;
2294 	big++;
2295       }
2296     }
2297     else {
2298       while (big < bend) {
2299 	if (*big == c) break;
2300 	if (ismbchar(*big)) big++;
2301 	big++;
2302       }
2303     }
2304 
2305     if (slow_match(little, little+llen, big, bend, translate))
2306       return big - bsave;
2307 
2308     if (ismbchar(*big)) big++;
2309     big++;
2310   }
2311   return -1;
2312 }
2313 
2314 static void
bm_init_skip(skip,pat,m,translate)2315 bm_init_skip(skip, pat, m, translate)
2316     int *skip;
2317     unsigned char *pat;
2318     int m;
2319     char *translate;
2320 {
2321     int j, c;
2322 
2323     for (c=0; c<256; c++) {
2324 	skip[c] = m;
2325     }
2326     if (translate) {
2327       for (j=0; j<m-1; j++) {
2328 	skip[translate[pat[j]]] = m-1-j;
2329       }
2330     }
2331     else {
2332       for (j=0; j<m-1; j++) {
2333 	skip[pat[j]] = m-1-j;
2334       }
2335     }
2336 }
2337 
2338 static int
bm_search(little,llen,big,blen,skip,translate)2339 bm_search(little, llen, big, blen, skip, translate)
2340      unsigned char *little;
2341      int llen;
2342      unsigned char *big;
2343      int blen;
2344      int *skip;
2345      unsigned char *translate;
2346 {
2347 /*  int next[256]; */
2348   int i, j, k;
2349 /*  unsigned char c; */
2350 
2351   i = llen-1;
2352   if (translate) {
2353     while (i < blen) {
2354       k = i;
2355       j = llen-1;
2356       while (j >= 0 && translate[big[k]] == translate[little[j]]) {
2357 	k--;
2358 	j--;
2359       }
2360       if (j < 0) return k+1;
2361 
2362       i += skip[translate[big[i]]];
2363     }
2364     return -1;
2365   }
2366   while (i < blen) {
2367     k = i;
2368     j = llen-1;
2369     while (j >= 0 && big[k] == little[j]) {
2370       k--;
2371       j--;
2372     }
2373     if (j < 0) return k+1;
2374 
2375     i += skip[big[i]];
2376   }
2377   return -1;
2378 }
2379 
2380 /* Given a pattern, compute a fastmap from it.  The fastmap records
2381    which of the (1 << BYTEWIDTH) possible characters can start a string
2382    that matches the pattern.  This fastmap is used by re_search to skip
2383    quickly over totally implausible text.
2384 
2385    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2386    area as bufp->fastmap.
2387    The other components of bufp describe the pattern to be used.  */
2388 void
re_compile_fastmap(bufp)2389 re_compile_fastmap(bufp)
2390      struct re_pattern_buffer *bufp;
2391 {
2392   unsigned char *pattern = (unsigned char *) bufp->buffer;
2393   int size = bufp->used;
2394   register char *fastmap = bufp->fastmap;
2395   register unsigned char *p = pattern;
2396   register unsigned char *pend = pattern + size;
2397   register int j, k;
2398   unsigned is_a_succeed_n;
2399 
2400   unsigned char **stackb = RE_TALLOC(NFAILURES, unsigned char*);
2401   unsigned char **stackp = stackb;
2402   unsigned char **stacke = stackb + NFAILURES;
2403   int options = bufp->options;
2404 
2405   memset(fastmap, 0, (1 << BYTEWIDTH));
2406   bufp->fastmap_accurate = 1;
2407   bufp->can_be_null = 0;
2408 
2409   while (p)
2410     {
2411       is_a_succeed_n = 0;
2412       if (p == pend)
2413 	{
2414 	  bufp->can_be_null = 1;
2415 	  break;
2416 	}
2417 #ifdef SWITCH_ENUM_BUG
2418       switch ((int) ((enum regexpcode)*p++))
2419 #else
2420       switch ((enum regexpcode)*p++)
2421 #endif
2422 	{
2423 	case exactn:
2424 	  if (p[1] == 0xff) {
2425 	    if (TRANSLATE_P())
2426 	      fastmap[translate[p[2]]] = 2;
2427 	    else
2428 	      fastmap[p[2]] = 2;
2429 	  }
2430 	  else if (TRANSLATE_P())
2431 	    fastmap[translate[p[1]]] = 1;
2432 	  else
2433 	    fastmap[p[1]] = 1;
2434 	  break;
2435 
2436         case begline:
2437 	case begbuf:
2438 	case endbuf:
2439 	case endbuf2:
2440 	case wordbound:
2441 	case notwordbound:
2442 	case wordbeg:
2443 	case wordend:
2444 	case pop_and_fail:
2445         case push_dummy_failure:
2446 	  continue;
2447 
2448 	case casefold_on:
2449 	  bufp->options |= RE_MAY_IGNORECASE;
2450 	case casefold_off:
2451 	  options ^= RE_OPTION_IGNORECASE;
2452 	  continue;
2453 
2454 	case endline:
2455 	  if (TRANSLATE_P())
2456 	    fastmap[translate['\n']] = 1;
2457 	  else
2458 	    fastmap['\n'] = 1;
2459 
2460 	  if (bufp->can_be_null == 0)
2461 	    bufp->can_be_null = 2;
2462 	  break;
2463 
2464 	case jump_n:
2465         case finalize_jump:
2466 	case maybe_finalize_jump:
2467 	case jump:
2468         case jump_past_alt:
2469 	case dummy_failure_jump:
2470           EXTRACT_NUMBER_AND_INCR(j, p);
2471 	  p += j;
2472 	  if (j > 0)
2473 	    continue;
2474           /* Jump backward reached implies we just went through
2475 	     the body of a loop and matched nothing.
2476 	     Opcode jumped to should be an on_failure_jump.
2477 	     Just treat it like an ordinary jump.
2478 	     For a * loop, it has pushed its failure point already;
2479 	     If so, discard that as redundant.  */
2480 
2481           if ((enum regexpcode) *p != on_failure_jump
2482 	      && (enum regexpcode) *p != try_next
2483 	      && (enum regexpcode) *p != succeed_n
2484 	      && (enum regexpcode) *p != finalize_push
2485 	      && (enum regexpcode) *p != finalize_push_n)
2486 	    continue;
2487           p++;
2488           EXTRACT_NUMBER_AND_INCR(j, p);
2489           p += j;
2490           if (stackp != stackb && *stackp == p)
2491             stackp--;		/* pop */
2492           continue;
2493 
2494         case start_nowidth:
2495 	case stop_nowidth:
2496         case finalize_push:
2497 	  p += 2;
2498 	  continue;
2499 
2500         case finalize_push_n:
2501 	  p += 4;
2502 	  continue;
2503 
2504 	case try_next:
2505         case on_failure_jump:
2506 	handle_on_failure_jump:
2507           EXTRACT_NUMBER_AND_INCR(j, p);
2508           if (p + j < pend) {
2509 	    if (stackp == stacke) {
2510 	      unsigned char **stackx;
2511 	      unsigned int len = stacke - stackb;
2512 
2513 	      EXPAND_FAIL_STACK(stackx, stackb, len);
2514 	    }
2515 	    *++stackp = p + j;	/* push */
2516 	  }
2517 	  else {
2518             bufp->can_be_null = 1;
2519 	  }
2520 	  if (is_a_succeed_n)
2521             EXTRACT_NUMBER_AND_INCR(k, p);	/* Skip the n.  */
2522 	  continue;
2523 
2524 	case succeed_n:
2525 	  is_a_succeed_n = 1;
2526           /* Get to the number of times to succeed.  */
2527           EXTRACT_NUMBER(k, p + 2);
2528 	  /* Increment p past the n for when k != 0.  */
2529           if (k == 0) {
2530 	    p += 4;
2531 	  }
2532 	  else {
2533 	    goto handle_on_failure_jump;
2534 	  }
2535           continue;
2536 
2537 	case set_number_at:
2538           p += 4;
2539           continue;
2540 
2541         case start_memory:
2542 	case stop_memory:
2543 	  p += 2;
2544 	  continue;
2545 
2546 	case duplicate:
2547 	  bufp->can_be_null = 1;
2548 	  fastmap['\n'] = 1;
2549 	case anychar:
2550 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2551 	    if (j != '\n')
2552 	      fastmap[j] = 1;
2553 	  if (bufp->can_be_null)
2554 	    {
2555 	      FREE_AND_RETURN_VOID(stackb);
2556 	    }
2557 	  /* Don't return; check the alternative paths
2558 	     so we can set can_be_null if appropriate.  */
2559 	  break;
2560 
2561 	case wordchar:
2562 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
2563 	    if (SYNTAX(j) == Sword)
2564 	      fastmap[j] = 1;
2565 	  break;
2566 
2567 	case notwordchar:
2568 	  for (j = 0; j < 0x80; j++)
2569 	    if (SYNTAX(j) != Sword)
2570 	      fastmap[j] = 1;
2571 	  for (j = 0x80; j < (1 << BYTEWIDTH); j++)
2572 	      fastmap[j] = 1;
2573 	  break;
2574 
2575 	case charset:
2576 	  /* NOTE: Charset for single-byte chars never contain
2577 		   multi-byte char.  See set_list_bits().  */
2578 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2579 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
2580 	      {
2581 		if (TRANSLATE_P())
2582 		  fastmap[translate[j]] = 1;
2583 		else
2584 		  fastmap[j] = 1;
2585 	      }
2586 	  {
2587 	    unsigned short size;
2588 	    unsigned c, end;
2589 
2590 	    p += p[-1] + 2;
2591 	    size = EXTRACT_UNSIGNED(&p[-2]);
2592 	    for (j = 0; j < (int)size; j++) {
2593 	      if ((unsigned char)p[j*4] == 0xff) {
2594 		for (c = (unsigned char)p[j*4+1],
2595 		    end = (unsigned char)p[j*4+3];
2596 		     c <= end; c++) {
2597 		  fastmap[c] = 2;
2598 		}
2599 	      }
2600 	      else {
2601 		/* set bits for 1st bytes of multi-byte chars.  */
2602 		for (c = (unsigned char)p[j*4],
2603 		     end = (unsigned char)p[j*4 + 2];
2604 		     c <= end; c++) {
2605 		  /* NOTE: Charset for multi-byte chars might contain
2606 		     single-byte chars.  We must reject them. */
2607 		  if (ismbchar(c))
2608 		    fastmap[c] = 1;
2609 		}
2610 	      }
2611 	    }
2612 	  }
2613 	  break;
2614 
2615 	case charset_not:
2616 	  /* S: set of all single-byte chars.
2617 	     M: set of all first bytes that can start multi-byte chars.
2618 	     s: any set of single-byte chars.
2619 	     m: any set of first bytes that can start multi-byte chars.
2620 
2621 	     We assume S+M = U.
2622 	       ___      _   _
2623 	       s+m = (S*s+M*m).  */
2624 	  /* Chars beyond end of map must be allowed */
2625 	  /* NOTE: Charset_not for single-byte chars might contain
2626 		   multi-byte chars.  See set_list_bits(). */
2627 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
2628 	    if (!ismbchar(j))
2629 	      fastmap[j] = 1;
2630 
2631 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
2632 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
2633 	      {
2634 		if (!ismbchar(j))
2635 		  fastmap[j] = 1;
2636 	      }
2637 	  {
2638 	    unsigned short size;
2639 	    unsigned char c, beg;
2640 	    int byte_match = 0;
2641 
2642 	    p += p[-1] + 2;
2643 	    size = EXTRACT_UNSIGNED(&p[-2]);
2644 	    if (size == 0) {
2645 	      for (j = 0x80; j < (1 << BYTEWIDTH); j++)
2646 		if (ismbchar(j))
2647 		  fastmap[j] = 1;
2648 	    }
2649 	    for (j = 0,c = 0x80;j < (int)size; j++) {
2650 	      if ((unsigned char)p[j*4] == 0xff) {
2651 		byte_match = 1;
2652 	        for (beg = (unsigned char)p[j*4+1]; c < beg; c++)
2653 		  fastmap[c] = 1;
2654 	        c = (unsigned char)p[j*4+3] + 1;
2655 	      }
2656 	      else {
2657 	        for (beg = (unsigned char)p[j*4 + 0]; c < beg; c++)
2658 		  if (ismbchar(c))
2659 		    fastmap[c] = 1;
2660 	        c = (unsigned char)p[j*4 + 2] + 1;
2661 	      }
2662 	    }
2663 	    if (byte_match) {
2664 	      for (j = c; j < (1 << BYTEWIDTH); j++)
2665 		fastmap[j] = 1;
2666 	      for (j = 0; j < (1 << BYTEWIDTH); j++)
2667 		if (fastmap[j])
2668 		  fastmap[j] = 2;
2669 	    }
2670 	    else {
2671 	      for (j = c; j < (1 << BYTEWIDTH); j++)
2672 		if (ismbchar(j))
2673 		  fastmap[j] = 1;
2674 	    }
2675 	  }
2676 	  break;
2677 
2678 	case unused:	/* pacify gcc -Wall */
2679 	  break;
2680 	}
2681 
2682       /* Get here means we have successfully found the possible starting
2683          characters of one path of the pattern.  We need not follow this
2684          path any farther.  Instead, look at the next alternative
2685          remembered in the stack.  */
2686       if (stackp != stackb)
2687 	p = *stackp--;		/* pop */
2688       else
2689 	break;
2690     }
2691    FREE_AND_RETURN_VOID(stackb);
2692 }
2693 
2694 
2695 /* Using the compiled pattern in BUFP->buffer, first tries to match
2696    STRING, starting first at index STARTPOS, then at STARTPOS + 1, and
2697    so on.  RANGE is the number of places to try before giving up.  If
2698    RANGE is negative, it searches backwards, i.e., the starting
2699    positions tried are STARTPOS, STARTPOS - 1, etc.  STRING is of SIZE.
2700    In REGS, return the indices of STRING that matched the entire
2701    BUFP->buffer and its contained subexpressions.
2702 
2703    The value returned is the position in the strings at which the match
2704    was found, or -1 if no match was found, or -2 if error (such as
2705    failure stack overflow).  */
2706 
2707 int
re_search(bufp,string,size,startpos,range,regs)2708 re_search(bufp, string, size, startpos, range, regs)
2709      struct re_pattern_buffer *bufp;
2710      char *string;
2711      int size, startpos, range;
2712      struct re_registers *regs;
2713 {
2714   register char *fastmap = bufp->fastmap;
2715   int val, anchor = 0;
2716 
2717   /* Check for out-of-range starting position.  */
2718   if (startpos < 0  ||  startpos > size)
2719     return -1;
2720 
2721   /* Update the fastmap now if not correct already.  */
2722   if (fastmap && !bufp->fastmap_accurate) {
2723       re_compile_fastmap(bufp);
2724   }
2725 
2726   /* If the search isn't to be a backwards one, don't waste time in a
2727      search for a pattern that must be anchored.  */
2728   if (bufp->used>0) {
2729     switch ((enum regexpcode)bufp->buffer[0]) {
2730     case begbuf:
2731       if (range > 0) {
2732 	if (startpos > 0)
2733 	  return -1;
2734 	else if (re_match(bufp, string, size, 0, regs) >= 0)
2735 	  return 0;
2736 	return -1;
2737       }
2738       break;
2739 
2740     case begline:
2741       anchor = 1;
2742       break;
2743 
2744     default:
2745       break;
2746     }
2747   }
2748   if (bufp->options & RE_OPTIMIZE_ANCHOR) {
2749     anchor = 1;
2750   }
2751 
2752   if (bufp->must) {
2753     int len = ((unsigned char*)bufp->must)[0];
2754     int pos, pbeg, pend;
2755 
2756     pbeg = startpos;
2757     pend = startpos + range;
2758     if (pbeg > pend) {		/* swap pbeg,pend */
2759       pos = pend; pend = pbeg; pbeg = pos;
2760     }
2761     if (pend > size) pend = size;
2762     if (bufp->options & RE_OPTIMIZE_NO_BM) {
2763       pos = slow_search(bufp->must+1, len,
2764 			string+pbeg, pend-pbeg,
2765 			MAY_TRANSLATE()?translate:0);
2766     }
2767     else {
2768       pos = bm_search(bufp->must+1, len,
2769 		      string+pbeg, pend-pbeg,
2770 		      bufp->must_skip,
2771 		      MAY_TRANSLATE()?translate:0);
2772     }
2773     if (pos == -1) return -1;
2774     if (range > 0 && (bufp->options & RE_OPTIMIZE_EXACTN)) {
2775       startpos += pos;
2776       range -= pos;
2777     }
2778   }
2779 
2780   for (;;)
2781     {
2782       /* If a fastmap is supplied, skip quickly over characters that
2783          cannot possibly be the start of a match.  Note, however, that
2784          if the pattern can possibly match the null string, we must
2785          test it at each starting point so that we take the first null
2786          string we get.  */
2787 
2788       if (fastmap && startpos < size
2789 	  && bufp->can_be_null != 1 && !(anchor && startpos == 0))
2790 	{
2791 	  if (range > 0)	/* Searching forwards.  */
2792 	    {
2793 	      register unsigned char *p, c;
2794 	      int irange = range;
2795 
2796 	      p = (unsigned char *)string+startpos;
2797 
2798 	      while (range > 0) {
2799 		c = *p++;
2800 		if (ismbchar(c)) {
2801 		  if (fastmap[c])
2802 		    break;
2803 		  c = *p++;
2804 		  range--;
2805 		  if (fastmap[c] == 2)
2806 		    break;
2807 		}
2808 		else
2809 		  if (fastmap[MAY_TRANSLATE() ? translate[c] : c])
2810 		    break;
2811 		range--;
2812 	      }
2813 	      startpos += irange - range;
2814 	    }
2815 	  else			/* Searching backwards.  */
2816 	    {
2817 	      register unsigned char c;
2818 
2819 	      c = string[startpos];
2820               c &= 0xff;
2821 	      if (MAY_TRANSLATE() ? !fastmap[translate[c]] : !fastmap[c])
2822 		goto advance;
2823 	    }
2824 	}
2825 
2826       if (fastmap && startpos == size && range >= 0
2827 	  && (bufp->can_be_null == 0 ||
2828 	      (bufp->can_be_null && size > 0
2829 	       && string[startpos-1] == '\n')))
2830 	return -1;
2831 
2832       val = re_match(bufp, string, size, startpos, regs);
2833       if (val >= 0)
2834 	return startpos;
2835       if (val == -2)
2836 	return -2;
2837 
2838 #ifndef NO_ALLOCA
2839 #ifdef C_ALLOCA
2840       alloca(0);
2841 #endif /* C_ALLOCA */
2842 #endif /* NO_ALLOCA */
2843 
2844       if (range > 0) {
2845 	if (anchor && startpos < size && startpos > 0 && string[startpos-1] != '\n') {
2846 	  while (range > 0 && string[startpos] != '\n') {
2847 	    range--;
2848 	    startpos++;
2849 	  }
2850 	}
2851 	else if (fastmap && (bufp->stclass)) {
2852 	  register unsigned char *p;
2853 	  register unsigned short c;
2854 	  int irange = range;
2855 
2856 	  p = (unsigned char *)string+startpos;
2857 	  while (range > 0) {
2858 	    c = *p++;
2859 	    if (ismbchar(c) && fastmap[c] != 2) {
2860 	      c = c << 8 | *p++;
2861 	    }
2862 	    else if (MAY_TRANSLATE())
2863 	      c = translate[c];
2864 	    if (*bufp->stclass == charset) {
2865 	      if (!is_in_list(c, bufp->stclass+1)) break;
2866 	    }
2867 	    else {
2868 	      if (is_in_list(c, bufp->stclass+1)) break;
2869 	    }
2870 	    range--;
2871 	    if (c > 256) range--;
2872 	  }
2873 	  startpos += irange - range;
2874 	}
2875       }
2876 
2877     advance:
2878       if (!range)
2879         break;
2880       else if (range > 0) {
2881 	const char *d = string + startpos;
2882 
2883 	if (ismbchar(*d)) {
2884 	  range--, startpos++;
2885 	  if (!range)
2886 	    break;
2887 	}
2888 	range--, startpos++;
2889       }
2890       else {
2891 	range++, startpos--;
2892 	{
2893 	  const char *s, *d, *p;
2894 
2895 	  s = string; d = string + startpos;
2896 	  for (p = d; p-- > s && ismbchar(*p); )
2897 	    /* --p >= s would not work on 80[12]?86.
2898 	      (when the offset of s equals 0 other than huge model.)  */
2899 	    ;
2900 	  if (!((d - p) & 1)) {
2901 	    if (!range)
2902 	      break;
2903 	    range++, startpos--;
2904 	  }
2905 	}
2906       }
2907     }
2908   return -1;
2909 }
2910 
2911 
2912 
2913 
2914 /* The following are used for re_match, defined below:  */
2915 
2916 /* Roughly the maximum number of failure points on the stack.  Would be
2917    exactly that if always pushed MAX_NUM_FAILURE_ITEMS each time we failed.  */
2918 
2919 int re_max_failures = 2000;
2920 
2921 /* Routine used by re_match.  */
2922 /* static int memcmp_translate(); *//* already declared */
2923 
2924 
2925 /* Structure and accessing macros used in re_match:  */
2926 
2927 typedef union
2928 {
2929   unsigned char *word;
2930   struct
2931   {
2932       /* This field is one if this group can match the empty string,
2933          zero if not.  If not yet determined,  `MATCH_NULL_UNSET_VALUE'.  */
2934 #define MATCH_NULL_UNSET_VALUE 3
2935     unsigned match_null_string_p : 2;
2936     unsigned is_active : 1;
2937     unsigned matched_something : 1;
2938     unsigned ever_matched_something : 1;
2939   } bits;
2940 } register_info_type;
2941 
2942 #define REG_MATCH_NULL_STRING_P(R)  ((R).bits.match_null_string_p)
2943 #define IS_ACTIVE(R)  ((R).bits.is_active)
2944 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
2945 #define EVER_MATCHED_SOMETHING(R)  ((R).bits.ever_matched_something)
2946 
2947 
2948 /* Macros used by re_match:  */
2949 
2950 /* I.e., regstart, regend, and reg_info.  */
2951 #define NUM_REG_ITEMS  3
2952 
2953 /* Individual items aside from the registers.  */
2954 #define NUM_NONREG_ITEMS 3
2955 
2956 /* We push at most this many things on the stack whenever we
2957    fail.  The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
2958    arguments to the PUSH_FAILURE_POINT macro.  */
2959 #define MAX_NUM_FAILURE_ITEMS   (num_regs * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
2960 
2961 /* We push this many things on the stack whenever we fail.  */
2962 #define NUM_FAILURE_ITEMS  (last_used_reg * NUM_REG_ITEMS + NUM_REG_ITEMS)
2963 
2964 
2965 /* This pushes most of the information about the current state we will want
2966    if we ever fail back to it.  */
2967 
2968 #define PUSH_FAILURE_POINT(pattern_place, string_place)			\
2969   do {									\
2970     long last_used_reg, this_reg;					\
2971 									\
2972     /* Find out how many registers are active or have been matched.	\
2973        (Aside from register zero, which is only set at the end.) */	\
2974     for (last_used_reg = num_regs - 1; last_used_reg > 0; last_used_reg--)\
2975       if (!REG_UNSET(regstart[last_used_reg]))				\
2976         break;								\
2977 									\
2978     if (stacke - stackp <= NUM_FAILURE_ITEMS)				\
2979       {									\
2980 	unsigned char **stackx;						\
2981 	unsigned int len = stacke - stackb;				\
2982 	/* if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS)		\
2983 	  {								\
2984 	    FREE_VARIABLES();						\
2985 	    FREE_AND_RETURN(stackb,(-2));				\
2986 	  }*/								\
2987 									\
2988         /* Roughly double the size of the stack.  */			\
2989         EXPAND_FAIL_STACK(stackx, stackb, len);				\
2990       }									\
2991 									\
2992     /* Now push the info for each of those registers.  */		\
2993     for (this_reg = 1; this_reg <= last_used_reg; this_reg++)		\
2994       {									\
2995         *stackp++ = regstart[this_reg];					\
2996         *stackp++ = regend[this_reg];					\
2997         *stackp++ = reg_info[this_reg].word;				\
2998       }									\
2999 									\
3000     /* Push how many registers we saved.  */				\
3001     *stackp++ = (unsigned char *)last_used_reg;				\
3002 									\
3003     *stackp++ = pattern_place;                                          \
3004     *stackp++ = string_place;                                           \
3005     *stackp++ = (unsigned char *)0; /* non-greedy flag */		\
3006   } while(0)
3007 
3008 
3009 /* This pops what PUSH_FAILURE_POINT pushes.  */
3010 
3011 #define POP_FAILURE_POINT()						\
3012   do {									\
3013     int temp;								\
3014     stackp -= NUM_NONREG_ITEMS;	/* Remove failure points (and flag). */	\
3015     temp = (int) *--stackp;	/* How many regs pushed.  */	        \
3016     temp *= NUM_REG_ITEMS;	/* How much to take off the stack.  */	\
3017     stackp -= temp; 		/* Remove the register info.  */	\
3018   } while(0)
3019 
3020 /* Registers are set to a sentinel when they haven't yet matched.  */
3021 #define REG_UNSET_VALUE ((unsigned char *) -1)
3022 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3023 
3024 #define PREFETCH if (d == dend) goto fail
3025 
3026 /* Call this when have matched something; it sets `matched' flags for the
3027    registers corresponding to the subexpressions of which we currently
3028    are inside.  */
3029 #define SET_REGS_MATCHED 						\
3030   do { unsigned this_reg;						\
3031     for (this_reg = 0; this_reg < num_regs; this_reg++) 		\
3032       { 								\
3033         if (IS_ACTIVE(reg_info[this_reg]))				\
3034           MATCHED_SOMETHING(reg_info[this_reg])				\
3035             = EVER_MATCHED_SOMETHING (reg_info[this_reg])		\
3036             = 1;							\
3037         else								\
3038           MATCHED_SOMETHING(reg_info[this_reg]) = 0;			\
3039       } 								\
3040   } while(0)
3041 
3042 #define AT_STRINGS_BEG(d)  (d == string)
3043 #define AT_STRINGS_END(d)  (d == dend)
3044 
3045 /* We have two special cases to check for:
3046      1) if we're past the end of string1, we have to look at the first
3047         character in string2;
3048      2) if we're before the beginning of string2, we have to look at the
3049         last character in string1; we assume there is a string1, so use
3050         this in conjunction with AT_STRINGS_BEG.  */
3051 #define IS_A_LETTER(d) (SYNTAX(*(d)) == Sword)
3052 
3053 static void
init_regs(regs,num_regs)3054 init_regs(regs, num_regs)
3055     struct re_registers *regs;
3056     unsigned int num_regs;
3057 {
3058     int i;
3059 
3060     regs->num_regs = num_regs;
3061     if (num_regs < RE_NREGS)
3062 	num_regs = RE_NREGS;
3063 
3064     if (regs->allocated == 0) {
3065 	regs->beg = TMALLOC(num_regs, int);
3066 	regs->end = TMALLOC(num_regs, int);
3067 	regs->allocated = num_regs;
3068     }
3069     else if (regs->allocated < num_regs) {
3070 	TREALLOC(regs->beg, num_regs, int);
3071 	TREALLOC(regs->end, num_regs, int);
3072     }
3073     for (i=0; i<num_regs; i++) {
3074 	regs->beg[i] = regs->end[i] = -1;
3075     }
3076 }
3077 
3078 /* Match the pattern described by BUFP against STRING, which is of
3079    SIZE.  Start the match at index POS in STRING.  In REGS, return the
3080    indices of STRING that matched the entire BUFP->buffer and its
3081    contained subexpressions.
3082 
3083    If bufp->fastmap is nonzero, then it had better be up to date.
3084 
3085    The reason that the data to match are specified as two components
3086    which are to be regarded as concatenated is so this function can be
3087    used directly on the contents of an Emacs buffer.
3088 
3089    -1 is returned if there is no match.  -2 is returned if there is an
3090    error (such as match stack overflow).  Otherwise the value is the
3091    length of the substring which was matched.  */
3092 
3093 int
re_match(bufp,string_arg,size,pos,regs)3094 re_match(bufp, string_arg, size, pos, regs)
3095      struct re_pattern_buffer *bufp;
3096      char *string_arg;
3097      int size, pos;
3098      struct re_registers *regs;
3099 {
3100   register unsigned char *p = (unsigned char *) bufp->buffer;
3101   unsigned char *p1;
3102 
3103   /* Pointer to beyond end of buffer.  */
3104   register unsigned char *pend = p + bufp->used;
3105 
3106   unsigned num_regs = bufp->re_nsub;
3107 
3108   unsigned char *string = (unsigned char *) string_arg;
3109 
3110   register unsigned char *d, *dend;
3111   register int mcnt;			/* Multipurpose.  */
3112   int options = bufp->options;
3113 
3114  /* Failure point stack.  Each place that can handle a failure further
3115     down the line pushes a failure point on this stack.  It consists of
3116     restart, regend, and reg_info for all registers corresponding to the
3117     subexpressions we're currently inside, plus the number of such
3118     registers, and, finally, two char *'s.  The first char * is where to
3119     resume scanning the pattern; the second one is where to resume
3120     scanning the strings.  If the latter is zero, the failure point is a
3121     ``dummy''; if a failure happens and the failure point is a dummy, it
3122     gets discarded and the next next one is tried.  */
3123 
3124   unsigned char **stackb;
3125   unsigned char **stackp;
3126   unsigned char **stacke;
3127 
3128 
3129   /* Information on the contents of registers. These are pointers into
3130      the input strings; they record just what was matched (on this
3131      attempt) by a subexpression part of the pattern, that is, the
3132      regnum-th regstart pointer points to where in the pattern we began
3133      matching and the regnum-th regend points to right after where we
3134      stopped matching the regnum-th subexpression.  (The zeroth register
3135      keeps track of what the whole pattern matches.)  */
3136 
3137   unsigned char **regstart = RE_TALLOC(num_regs, unsigned char*);
3138   unsigned char **regend = RE_TALLOC(num_regs, unsigned char*);
3139 
3140   /* If a group that's operated upon by a repetition operator fails to
3141      match anything, then the register for its start will need to be
3142      restored because it will have been set to wherever in the string we
3143      are when we last see its open-group operator.  Similarly for a
3144      register's end.  */
3145   unsigned char **old_regstart = RE_TALLOC(num_regs, unsigned char*);
3146   unsigned char **old_regend = RE_TALLOC(num_regs, unsigned char*);
3147 
3148   /* The is_active field of reg_info helps us keep track of which (possibly
3149      nested) subexpressions we are currently in. The matched_something
3150      field of reg_info[reg_num] helps us tell whether or not we have
3151      matched any of the pattern so far this time through the reg_num-th
3152      subexpression.  These two fields get reset each time through any
3153      loop their register is in.  */
3154 
3155   register_info_type *reg_info = RE_TALLOC(num_regs, register_info_type);
3156 
3157   /* The following record the register info as found in the above
3158      variables when we find a match better than any we've seen before.
3159      This happens as we backtrack through the failure points, which in
3160      turn happens only if we have not yet matched the entire string.  */
3161 
3162   unsigned best_regs_set = 0;
3163   unsigned char **best_regstart = RE_TALLOC(num_regs, unsigned char*);
3164   unsigned char **best_regend = RE_TALLOC(num_regs, unsigned char*);
3165 
3166   if (regs) {
3167     init_regs(regs, num_regs);
3168   }
3169 
3170   /* Initialize the stack. */
3171   stackb = RE_TALLOC(MAX_NUM_FAILURE_ITEMS * NFAILURES, unsigned char*);
3172   stackp = stackb;
3173   stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
3174 
3175 #ifdef DEBUG_REGEX
3176   fprintf (stderr, "Entering re_match(%s%s)\n", string1_arg, string2_arg);
3177 #endif
3178 
3179   /* Initialize subexpression text positions to -1 to mark ones that no
3180      \( or ( and \) or ) has been seen for. Also set all registers to
3181      inactive and mark them as not having matched anything or ever
3182      failed. */
3183   for (mcnt = 0; mcnt < num_regs; mcnt++) {
3184     regstart[mcnt] = regend[mcnt]
3185       = old_regstart[mcnt] = old_regend[mcnt]
3186       = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE;
3187 #ifdef __CHECKER__
3188     reg_info[mcnt].word = 0;
3189 #endif
3190     REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
3191     IS_ACTIVE (reg_info[mcnt]) = 0;
3192     MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3193     EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3194   }
3195 
3196   /* Set up pointers to ends of strings.
3197      Don't allow the second string to be empty unless both are empty.  */
3198 
3199 
3200   /* `p' scans through the pattern as `d' scans through the data. `dend'
3201      is the end of the input string that `d' points within. `d' is
3202      advanced into the following input string whenever necessary, but
3203      this happens before fetching; therefore, at the beginning of the
3204      loop, `d' can be pointing at the end of a string, but it cannot
3205      equal string2.  */
3206 
3207   d = string + pos, dend = string + size;
3208 
3209 
3210   /* This loops over pattern commands.  It exits by returning from the
3211      function if match is complete, or it drops through if match fails
3212      at this starting point in the input data.  */
3213 
3214   for (;;)
3215     {
3216 #ifdef DEBUG_REGEX
3217       fprintf(stderr,
3218 	      "regex loop(%d):  matching 0x%02d\n",
3219 	      p - (unsigned char *) bufp->buffer,
3220 	      *p);
3221 #endif
3222       /* End of pattern means we might have succeeded.  */
3223       if (p == pend)
3224 	{
3225 	  /* If not end of string, try backtracking.  Otherwise done.  */
3226           if (d != dend)
3227 	    {
3228 	      while (stackp != stackb && (int)stackp[-1] == 1)
3229 		POP_FAILURE_POINT();
3230               if (stackp != stackb)
3231                 {
3232 		  /* More failure points to try.  */
3233 
3234                   /* If exceeds best match so far, save it.  */
3235                   if (! best_regs_set || (d > best_regend[0]))
3236                     {
3237                       best_regs_set = 1;
3238                       best_regend[0] = d;	/* Never use regstart[0].  */
3239 
3240                       for (mcnt = 1; mcnt < num_regs; mcnt++)
3241                         {
3242                           best_regstart[mcnt] = regstart[mcnt];
3243                           best_regend[mcnt] = regend[mcnt];
3244                         }
3245                     }
3246                   goto fail;
3247                 }
3248               /* If no failure points, don't restore garbage.  */
3249               else if (best_regs_set)
3250                 {
3251 	      restore_best_regs:
3252                   /* Restore best match.  */
3253                   d = best_regend[0];
3254 
3255 		  for (mcnt = 0; mcnt < num_regs; mcnt++)
3256 		    {
3257 		      regstart[mcnt] = best_regstart[mcnt];
3258 		      regend[mcnt] = best_regend[mcnt];
3259 		    }
3260                 }
3261             }
3262 
3263 	  /* If caller wants register contents data back, convert it
3264 	     to indices.  */
3265 	  if (regs)
3266 	    {
3267 	      regs->beg[0] = pos;
3268 	      regs->end[0] = d - string;
3269 	      for (mcnt = 1; mcnt < num_regs; mcnt++)
3270 		{
3271 		  if (REG_UNSET(regend[mcnt]))
3272 		    {
3273 		      regs->beg[mcnt] = -1;
3274 		      regs->end[mcnt] = -1;
3275 		      continue;
3276 		    }
3277 		  regs->beg[mcnt] = regstart[mcnt] - string;
3278 		  regs->end[mcnt] = regend[mcnt] - string;
3279 		}
3280 	    }
3281 	  FREE_VARIABLES();
3282 	  FREE_AND_RETURN(stackb, (d - pos - string));
3283         }
3284 
3285       /* Otherwise match next pattern command.  */
3286 #ifdef SWITCH_ENUM_BUG
3287       switch ((int)((enum regexpcode)*p++))
3288 #else
3289       switch ((enum regexpcode)*p++)
3290 #endif
3291 	{
3292 
3293 	/* \( [or `(', as appropriate] is represented by start_memory,
3294            \) by stop_memory.  Both of those commands are followed by
3295            a register number in the next byte.  The text matched
3296            within the \( and \) is recorded under that number.  */
3297 	case start_memory:
3298           /* Find out if this group can match the empty string.  */
3299 	  p1 = p;		/* To send to group_match_null_string_p.  */
3300           if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
3301             REG_MATCH_NULL_STRING_P (reg_info[*p])
3302               = group_match_null_string_p (&p1, pend, reg_info);
3303 
3304           /* Save the position in the string where we were the last time
3305              we were at this open-group operator in case the group is
3306              operated upon by a repetition operator, e.g., with `(a*)*b'
3307              against `ab'; then we want to ignore where we are now in
3308              the string in case this attempt to match fails.  */
3309           old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3310                              ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
3311                              : regstart[*p];
3312           regstart[*p] = d;
3313           IS_ACTIVE(reg_info[*p]) = 1;
3314           MATCHED_SOMETHING(reg_info[*p]) = 0;
3315           p += 2;
3316 	  continue;
3317 
3318 	case stop_memory:
3319           /* We need to save the string position the last time we were at
3320              this close-group operator in case the group is operated
3321              upon by a repetition operator, e.g., with `((a*)*(b*)*)*'
3322              against `aba'; then we want to ignore where we are now in
3323              the string in case this attempt to match fails.  */
3324           old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
3325                            ? REG_UNSET (regend[*p]) ? d : regend[*p]
3326 			   : regend[*p];
3327 
3328           regend[*p] = d;
3329           IS_ACTIVE(reg_info[*p]) = 0;
3330 
3331           /* If just failed to match something this time around with a sub-
3332 	     expression that's in a loop, try to force exit from the loop.  */
3333           if ((! MATCHED_SOMETHING(reg_info[*p])
3334 	       || (enum regexpcode) p[-3] == start_memory)
3335 	      && (p + 1) != pend)
3336             {
3337 	      int is_a_jump_n = 0;
3338 	      register unsigned char *p2 = p + 2;
3339               mcnt = 0;
3340               switch (*p2++)
3341                 {
3342                   case jump_n:
3343 		  case finalize_push_n:
3344 		    is_a_jump_n = 1;
3345                   case finalize_jump:
3346 		  case maybe_finalize_jump:
3347 		  case jump:
3348 		  case dummy_failure_jump:
3349                     EXTRACT_NUMBER_AND_INCR(mcnt, p2);
3350 		    if (is_a_jump_n)
3351 		      p2 += 2;
3352                     break;
3353                 }
3354 	      p2 += mcnt;
3355 
3356               /* If the next operation is a jump backwards in the pattern
3357 	         to an on_failure_jump, exit from the loop by forcing a
3358                  failure after pushing on the stack the on_failure_jump's
3359                  jump in the pattern, and d.  */
3360 	      if (mcnt < 0 && (enum regexpcode) *p2 == on_failure_jump
3361                   && (enum regexpcode) p2[3] == start_memory && p2[4] == *p)
3362 		{
3363                   /* If this group ever matched anything, then restore
3364                      what its registers were before trying this last
3365                      failed match, e.g., with `(a*)*b' against `ab' for
3366                      regstart[1], and, e.g., with `((a*)*(b*)*)*'
3367                      against `aba' for regend[3].
3368 
3369                      Also restore the registers for inner groups for,
3370                      e.g., `((a*)(b*))*' against `aba' (register 3 would
3371                      otherwise get trashed).  */
3372 
3373                   if (EVER_MATCHED_SOMETHING (reg_info[*p]))
3374 		    {
3375 		      unsigned r;
3376 
3377                       EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
3378 
3379 		      /* Restore this and inner groups' (if any) registers.  */
3380                       for (r = *p; r < *p + *(p + 1); r++)
3381                         {
3382                           regstart[r] = old_regstart[r];
3383 
3384                           /* xx why this test?  */
3385                           if ((int) old_regend[r] >= (int) regstart[r])
3386                             regend[r] = old_regend[r];
3387                         }
3388                     }
3389 		  p2++;
3390                   EXTRACT_NUMBER_AND_INCR(mcnt, p2);
3391                   PUSH_FAILURE_POINT(p2 + mcnt, d);
3392                   goto fail;
3393                 }
3394             }
3395           p += 2;
3396 	  continue;
3397 
3398 	/* \<digit> has been turned into a `duplicate' command which is
3399            followed by the numeric value of <digit> as the register number.  */
3400         case duplicate:
3401 	  {
3402 	    int regno = *p++;   /* Get which register to match against */
3403 	    register unsigned char *d2, *dend2;
3404 
3405 	    if (IS_ACTIVE(reg_info[regno])) break;
3406 
3407 	    /* Where in input to try to start matching.  */
3408             d2 = regstart[regno];
3409 	    if (REG_UNSET(d2)) break;
3410 
3411             /* Where to stop matching; if both the place to start and
3412                the place to stop matching are in the same string, then
3413                set to the place to stop, otherwise, for now have to use
3414                the end of the first string.  */
3415 
3416             dend2 = regend[regno];
3417 	    if (REG_UNSET(dend2)) break;
3418 	    for (;;)
3419 	      {
3420 		/* At end of register contents => success */
3421 		if (d2 == dend2) break;
3422 
3423 		/* If necessary, advance to next segment in data.  */
3424 		PREFETCH;
3425 
3426 		/* How many characters left in this segment to match.  */
3427 		mcnt = dend - d;
3428 
3429 		/* Want how many consecutive characters we can match in
3430                    one shot, so, if necessary, adjust the count.  */
3431                 if (mcnt > dend2 - d2)
3432 		  mcnt = dend2 - d2;
3433 
3434 		/* Compare that many; failure if mismatch, else move
3435                    past them.  */
3436 		if ((options & RE_OPTION_IGNORECASE)
3437                     ? memcmp_translate(d, d2, mcnt)
3438                     : memcmp((char *)d, (char *)d2, mcnt))
3439 		  goto fail;
3440 		d += mcnt, d2 += mcnt;
3441 	      }
3442 	  }
3443 	  break;
3444 
3445 	case start_nowidth:
3446           PUSH_FAILURE_POINT(0, d);
3447 	  EXTRACT_NUMBER_AND_INCR(mcnt, p);
3448 	  STORE_NUMBER(p+mcnt, stackp - stackb);
3449 	  continue;
3450 
3451 	case stop_nowidth:
3452 	  EXTRACT_NUMBER_AND_INCR(mcnt, p);
3453 	  stackp = stackb + mcnt;
3454 	  d = stackp[-2];
3455 	  POP_FAILURE_POINT();
3456 	  continue;
3457 
3458 	case pop_and_fail:
3459 	  EXTRACT_NUMBER(mcnt, p+1);
3460 	  stackp = stackb + mcnt;
3461 	  POP_FAILURE_POINT();
3462 	  goto fail;
3463 
3464 	case anychar:
3465 	  PREFETCH;
3466 	  /* Match anything but a newline, maybe even a null.  */
3467 	  if (ismbchar(*d)) {
3468 	    if (d + 1 == dend || d[1] == '\n' || d[1] == '\0')
3469 	      goto fail;
3470 	    SET_REGS_MATCHED;
3471 	    d += 2;
3472 	    break;
3473 	  }
3474 	  if (((TRANSLATE_P()) ? translate[*d] : *d) == '\n')
3475 	    goto fail;
3476 	  SET_REGS_MATCHED;
3477           d++;
3478 	  break;
3479 
3480 	case charset:
3481 	case charset_not:
3482 	  {
3483 	    int not;	    /* Nonzero for charset_not.  */
3484 	    int half;	    /* 2 if need to match latter half of mbc */
3485 	    int c;
3486 
3487 	    PREFETCH;
3488 	    c = (unsigned char)*d;
3489 	    if (ismbchar(c)) {
3490 	      if (d + 1 != dend) {
3491 	        c <<= 8;
3492 		c |= (unsigned char)d[1];
3493 	      }
3494 	    }
3495 	    else if (TRANSLATE_P())
3496 	      c = (unsigned char)translate[c];
3497 
3498 	    half = not = is_in_list(c, p);
3499 	    if (*(p - 1) == (unsigned char)charset_not) {
3500 		not = !not;
3501 	    }
3502 
3503 	    p += 1 + *p + 2 + EXTRACT_UNSIGNED(&p[1 + *p])*4;
3504 
3505 	    if (!not) goto fail;
3506 	    SET_REGS_MATCHED;
3507 
3508             d++;
3509 	    if (half != 2 && d != dend && c >= 1 << BYTEWIDTH)
3510 		d++;
3511 	    break;
3512 	  }
3513 
3514 	case begline:
3515           if (size == 0
3516 	      || AT_STRINGS_BEG(d)
3517               || (d && d[-1] == '\n'))
3518             break;
3519           else
3520             goto fail;
3521 
3522 	case endline:
3523 	  if (AT_STRINGS_END(d) || *d == '\n')
3524 	    break;
3525 	  goto fail;
3526 
3527 	/* Match at the very beginning of the string. */
3528 	case begbuf:
3529           if (AT_STRINGS_BEG(d))
3530             break;
3531           goto fail;
3532 
3533 	/* Match at the very end of the data. */
3534         case endbuf:
3535 	  if (AT_STRINGS_END(d))
3536 	    break;
3537           goto fail;
3538 
3539 	/* Match at the very end of the data. */
3540         case endbuf2:
3541 	  if (AT_STRINGS_END(d))
3542 	    break;
3543 	  /* .. or newline just before the end of the data. */
3544 	  if (*d == '\n' && AT_STRINGS_END(d+1))
3545 	    break;
3546           goto fail;
3547 
3548 	/* `or' constructs are handled by starting each alternative with
3549            an on_failure_jump that points to the start of the next
3550            alternative.  Each alternative except the last ends with a
3551            jump to the joining point.  (Actually, each jump except for
3552            the last one really jumps to the following jump, because
3553            tensioning the jumps is a hassle.)  */
3554 
3555 	/* The start of a stupid repeat has an on_failure_jump that points
3556 	   past the end of the repeat text. This makes a failure point so
3557            that on failure to match a repetition, matching restarts past
3558            as many repetitions have been found with no way to fail and
3559            look for another one.  */
3560 
3561 	/* A smart repeat is similar but loops back to the on_failure_jump
3562 	   so that each repetition makes another failure point.  */
3563 
3564 	case on_failure_jump:
3565         on_failure:
3566           EXTRACT_NUMBER_AND_INCR(mcnt, p);
3567           PUSH_FAILURE_POINT(p + mcnt, d);
3568           continue;
3569 
3570 	/* The end of a smart repeat has a maybe_finalize_jump back.
3571 	   Change it either to a finalize_jump or an ordinary jump.  */
3572 	case maybe_finalize_jump:
3573           EXTRACT_NUMBER_AND_INCR(mcnt, p);
3574 	  {
3575 	    register unsigned char *p2 = p;
3576 
3577             /* Compare the beginning of the repeat with what in the
3578                pattern follows its end. If we can establish that there
3579                is nothing that they would both match, i.e., that we
3580                would have to backtrack because of (as in, e.g., `a*a')
3581                then we can change to pop_failure_jump, because we'll
3582                never have to backtrack.
3583 
3584                This is not true in the case of alternatives: in
3585                `(a|ab)*' we do need to backtrack to the `ab' alternative
3586                (e.g., if the string was `ab').  But instead of trying to
3587                detect that here, the alternative has put on a dummy
3588                failure point which is what we will end up popping.  */
3589 
3590 	    /* Skip over open/close-group commands.  */
3591 	    while (p2 + 2 < pend
3592 		   && ((enum regexpcode) *p2 == stop_memory
3593 		       || (enum regexpcode) *p2 == start_memory))
3594 	      p2 += 3;			/* Skip over args, too.  */
3595 
3596 	    if (p2 == pend)
3597 	      p[-3] = (unsigned char)finalize_jump;
3598 	    else if (*p2 == (unsigned char)exactn
3599 		     || *p2 == (unsigned char)endline)
3600 	      {
3601 		register int c = *p2 == (unsigned char)endline ? '\n' : p2[2];
3602 		register unsigned char *p1 = p + mcnt;
3603 		/* p1[0] ... p1[2] are an on_failure_jump.
3604 		   Examine what follows that.  */
3605 		if (p1[3] == (unsigned char)exactn && p1[5] != c)
3606 		  p[-3] = (unsigned char)finalize_jump;
3607 		else if (p1[3] == (unsigned char)charset
3608 			 || p1[3] == (unsigned char)charset_not) {
3609 		    int not;
3610 		    if (ismbchar(c))
3611 		      c = c << 8 | p2[3];
3612 		    /* `is_in_list()' is TRUE if c would match */
3613 		    /* That means it is not safe to finalize.  */
3614 		    not = is_in_list(c, p1 + 4);
3615 		    if (p1[3] == (unsigned char)charset_not)
3616 			not = !not;
3617 		    if (!not)
3618 			p[-3] = (unsigned char)finalize_jump;
3619 		  }
3620 	      }
3621 	  }
3622 	  p -= 2;		/* Point at relative address again.  */
3623 	  if (p[-1] != (unsigned char)finalize_jump)
3624 	    {
3625 	      p[-1] = (unsigned char)jump;
3626 	      goto nofinalize;
3627 	    }
3628         /* Note fall through.  */
3629 
3630 	/* The end of a stupid repeat has a finalize_jump back to the
3631            start, where another failure point will be made which will
3632            point to after all the repetitions found so far.  */
3633 
3634         /* Take off failure points put on by matching on_failure_jump
3635            because didn't fail.  Also remove the register information
3636            put on by the on_failure_jump.  */
3637         case finalize_jump:
3638           POP_FAILURE_POINT();
3639         /* Note fall through.  */
3640 
3641 	/* Jump without taking off any failure points.  */
3642         case jump:
3643 	nofinalize:
3644 	  EXTRACT_NUMBER_AND_INCR(mcnt, p);
3645 	  p += mcnt;
3646 	  continue;
3647 
3648         /* We need this opcode so we can detect where alternatives end
3649            in `group_match_null_string_p' et al.  */
3650         case jump_past_alt:
3651           goto nofinalize;
3652 
3653         case dummy_failure_jump:
3654           /* Normally, the on_failure_jump pushes a failure point, which
3655              then gets popped at finalize_jump.  We will end up at
3656              finalize_jump, also, and with a pattern of, say, `a+', we
3657              are skipping over the on_failure_jump, so we have to push
3658              something meaningless for finalize_jump to pop.  */
3659           PUSH_FAILURE_POINT(0, 0);
3660           goto nofinalize;
3661 
3662         /* At the end of an alternative, we need to push a dummy failure
3663            point in case we are followed by a `finalize_jump', because
3664            we don't want the failure point for the alternative to be
3665            popped.  For example, matching `(a|ab)*' against `aab'
3666            requires that we match the `ab' alternative.  */
3667         case push_dummy_failure:
3668           /* See comments just above at `dummy_failure_jump' about the
3669              two zeroes.  */
3670           PUSH_FAILURE_POINT(0, 0);
3671           break;
3672 
3673         /* Have to succeed matching what follows at least n times.  Then
3674           just handle like an on_failure_jump.  */
3675         case succeed_n:
3676           EXTRACT_NUMBER(mcnt, p + 2);
3677           /* Originally, this is how many times we HAVE to succeed.  */
3678           if (mcnt > 0)
3679             {
3680                mcnt--;
3681 	       p += 2;
3682                STORE_NUMBER_AND_INCR(p, mcnt);
3683 	       PUSH_FAILURE_POINT(0, 0);
3684             }
3685 	  else if (mcnt == 0)
3686             {
3687 	      p[2] = unused;
3688               p[3] = unused;
3689               goto on_failure;
3690             }
3691 	  continue;
3692 
3693         case jump_n:
3694           EXTRACT_NUMBER(mcnt, p + 2);
3695           /* Originally, this is how many times we CAN jump.  */
3696           if (mcnt)
3697             {
3698                mcnt--;
3699                STORE_NUMBER(p + 2, mcnt);
3700 	       goto nofinalize;	     /* Do the jump without taking off
3701 			                any failure points.  */
3702             }
3703           /* If don't have to jump any more, skip over the rest of command.  */
3704 	  else
3705 	    p += 4;
3706 	  continue;
3707 
3708 	case set_number_at:
3709 	  {
3710   	    register unsigned char *p1;
3711 
3712             EXTRACT_NUMBER_AND_INCR(mcnt, p);
3713             p1 = p + mcnt;
3714             EXTRACT_NUMBER_AND_INCR(mcnt, p);
3715 	    STORE_NUMBER(p1, mcnt);
3716             continue;
3717           }
3718 
3719 	case try_next:
3720 	  EXTRACT_NUMBER_AND_INCR(mcnt, p);
3721 	  if (p + mcnt < pend) {
3722 	    PUSH_FAILURE_POINT(p, d);
3723 	    stackp[-1] = (unsigned char*)1;
3724 	  }
3725 	  p += mcnt;
3726 	  continue;
3727 
3728 	case finalize_push:
3729           POP_FAILURE_POINT();
3730 	  EXTRACT_NUMBER_AND_INCR(mcnt, p);
3731           PUSH_FAILURE_POINT(p + mcnt, d);
3732 	  stackp[-1] = (unsigned char*)1;
3733 	  continue;
3734 
3735 	case finalize_push_n:
3736           EXTRACT_NUMBER(mcnt, p + 2);
3737          /* Originally, this is how many times we CAN jump.  */
3738           if (mcnt) {
3739 	    int pos, i;
3740 
3741 	    mcnt--;
3742 	    STORE_NUMBER(p + 2, mcnt);
3743 	    EXTRACT_NUMBER(pos, p);
3744 	    EXTRACT_NUMBER(i, p+pos+5);
3745 	    if (i > 0) goto nofinalize;
3746 	    POP_FAILURE_POINT();
3747 	    EXTRACT_NUMBER_AND_INCR(mcnt, p);
3748 	    PUSH_FAILURE_POINT(p + mcnt, d);
3749 	    stackp[-1] = (unsigned char*)1;
3750 	    p += 2;		/* skip n */
3751 	  }
3752           /* If don't have to push any more, skip over the rest of command.  */
3753 	  else
3754 	    p += 4;
3755 	  continue;
3756 
3757         /* Ignore these.  Used to ignore the n of succeed_n's which
3758            currently have n == 0.  */
3759         case unused:
3760 	  continue;
3761 
3762         case casefold_on:
3763 	  options |= RE_OPTION_IGNORECASE;
3764 	  continue;
3765 
3766         case casefold_off:
3767 	  options &= ~RE_OPTION_IGNORECASE;
3768 	  continue;
3769 
3770         case wordbound:
3771 	  if (AT_STRINGS_BEG(d)) {
3772 	    if (IS_A_LETTER(d)) break;
3773 	    else goto fail;
3774 	  }
3775 	  if (AT_STRINGS_BEG(d)) {
3776 	    if (IS_A_LETTER(d-1)) break;
3777 	    else goto fail;
3778 	  }
3779 	  if (IS_A_LETTER(d - 1) != IS_A_LETTER(d))
3780 	    break;
3781 	  goto fail;
3782 
3783 	case notwordbound:
3784 	  if (AT_STRINGS_BEG(d)) {
3785 	    if (IS_A_LETTER(d)) goto fail;
3786 	    else break;
3787 	  }
3788 	  if (AT_STRINGS_END(d)) {
3789 	    if (IS_A_LETTER(d-1)) goto fail;
3790 	    else break;
3791 	  }
3792 	  if (IS_A_LETTER(d - 1) != IS_A_LETTER(d))
3793 	    goto fail;
3794 	  break;
3795 
3796 	case wordbeg:
3797 	  if (IS_A_LETTER(d) && (AT_STRINGS_BEG(d) || !IS_A_LETTER(d - 1)))
3798 	    break;
3799           goto fail;
3800 
3801 	case wordend:
3802 	  if (!AT_STRINGS_BEG(d) && IS_A_LETTER(d - 1)
3803               && (!IS_A_LETTER(d) || AT_STRINGS_END(d)))
3804 	    break;
3805           goto fail;
3806 
3807 	case wordchar:
3808 	  PREFETCH;
3809           if (!IS_A_LETTER(d))
3810             goto fail;
3811 	  d++;
3812 	  SET_REGS_MATCHED;
3813 	  break;
3814 
3815 	case notwordchar:
3816 	  PREFETCH;
3817 	  if (IS_A_LETTER(d))
3818             goto fail;
3819 	  if (ismbchar(*d) && d + 1 != dend)
3820 	    d++;
3821 	  d++;
3822           SET_REGS_MATCHED;
3823 	  break;
3824 
3825 	case exactn:
3826 	  /* Match the next few pattern characters exactly.
3827 	     mcnt is how many characters to match.  */
3828 	  mcnt = *p++;
3829 	  /* This is written out as an if-else so we don't waste time
3830              testing `translate' inside the loop.  */
3831           if (TRANSLATE_P())
3832 	    {
3833 	      do
3834 		{
3835 		  unsigned char c;
3836 
3837 		  PREFETCH;
3838 		  c = *d++;
3839 		  if (*p == 0xff) {
3840 		    p++;
3841 		    if (!--mcnt
3842 			|| AT_STRINGS_END(d)
3843 			|| (unsigned char)*d++ != (unsigned char)*p++)
3844 		      goto fail;
3845 		    continue;
3846 		  }
3847 		  if (ismbchar(c)) {
3848 		    if (c != (unsigned char)*p++
3849 			|| !--mcnt	/* redundant check if pattern was
3850 					   compiled properly. */
3851 			|| AT_STRINGS_END(d)
3852 			|| (unsigned char)*d++ != (unsigned char)*p++)
3853 		      goto fail;
3854 		    continue;
3855 		  }
3856 		  /* compiled code translation needed for ruby */
3857 		  if ((unsigned char)translate[c]
3858 		      != (unsigned char)translate[*p++])
3859 		    goto fail;
3860 		}
3861 	      while (--mcnt);
3862 	    }
3863 	  else
3864 	    {
3865 	      do
3866 		{
3867 		  PREFETCH;
3868 		  if (*p == 0xff) {p++; mcnt--;}
3869 		  if (*d++ != *p++) goto fail;
3870 		}
3871 	      while (--mcnt);
3872 	    }
3873 	  SET_REGS_MATCHED;
3874           break;
3875 	}
3876       while (stackp != stackb && (int)stackp[-1] == 1)
3877 	POP_FAILURE_POINT();
3878       continue;  /* Successfully executed one pattern command; keep going.  */
3879 
3880     /* Jump here if any matching operation fails. */
3881     fail:
3882       if (stackp != stackb)
3883 	/* A restart point is known.  Restart there and pop it. */
3884 	{
3885           short last_used_reg, this_reg;
3886 
3887           /* If this failure point is from a dummy_failure_point, just
3888              skip it.  */
3889 	  if (stackp[-3] == 0) {
3890 	    POP_FAILURE_POINT();
3891 	    goto fail;
3892 	  }
3893 	  stackp--;		/* discard flag */
3894           d = *--stackp;
3895 	  p = *--stackp;
3896           /* Restore register info.  */
3897           last_used_reg = (long) *--stackp;
3898 
3899           /* Make the ones that weren't saved -1 or 0 again. */
3900           for (this_reg = num_regs - 1; this_reg > last_used_reg; this_reg--)
3901             {
3902               regend[this_reg] = REG_UNSET_VALUE;
3903               regstart[this_reg] = REG_UNSET_VALUE;
3904               IS_ACTIVE(reg_info[this_reg]) = 0;
3905               MATCHED_SOMETHING(reg_info[this_reg]) = 0;
3906             }
3907 
3908           /* And restore the rest from the stack.  */
3909           for ( ; this_reg > 0; this_reg--)
3910             {
3911               reg_info[this_reg].word = *--stackp;
3912               regend[this_reg] = *--stackp;
3913               regstart[this_reg] = *--stackp;
3914             }
3915           if (p < pend)
3916             {
3917               int is_a_jump_n = 0;
3918 	      unsigned char *p1 = p;
3919 
3920               /* If failed to a backwards jump that's part of a repetition
3921                  loop, need to pop this failure point and use the next one.  */
3922               switch ((enum regexpcode) *p1)
3923                 {
3924                 case jump_n:
3925                 case finalize_push_n:
3926                   is_a_jump_n = 1;
3927                 case maybe_finalize_jump:
3928                 case finalize_jump:
3929                 case finalize_push:
3930                 case jump:
3931                   p1++;
3932                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3933                   p1 += mcnt;
3934 
3935                   if ((is_a_jump_n && (enum regexpcode) *p1 == succeed_n)
3936                       || (!is_a_jump_n
3937                           && (enum regexpcode) *p1 == on_failure_jump))
3938                     goto fail;
3939                   break;
3940                 default:
3941                   /* do nothing */ ;
3942                 }
3943             }
3944         }
3945       else
3946         break;   /* Matching at this starting point really fails.  */
3947     }
3948 
3949   if (best_regs_set)
3950     goto restore_best_regs;
3951 
3952   FREE_AND_RETURN(stackb,(-1)); 	/* Failure to match.  */
3953 }
3954 
3955 
3956 /* We are passed P pointing to a register number after a start_memory.
3957 
3958    Return true if the pattern up to the corresponding stop_memory can
3959    match the empty string, and false otherwise.
3960 
3961    If we find the matching stop_memory, sets P to point to one past its number.
3962    Otherwise, sets P to an undefined byte less than or equal to END.
3963 
3964    We don't handle duplicates properly (yet).  */
3965 
3966 static int
group_match_null_string_p(p,end,reg_info)3967 group_match_null_string_p (p, end, reg_info)
3968     unsigned char **p, *end;
3969     register_info_type *reg_info;
3970 {
3971   int mcnt;
3972   /* Point to after the args to the start_memory.  */
3973   unsigned char *p1 = *p + 2;
3974 
3975   while (p1 < end)
3976     {
3977       /* Skip over opcodes that can match nothing, and return true or
3978 	 false, as appropriate, when we get to one that can't, or to the
3979          matching stop_memory.  */
3980 
3981       switch ((enum regexpcode) *p1)
3982         {
3983         /* Could be either a loop or a series of alternatives.  */
3984         case on_failure_jump:
3985           p1++;
3986           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
3987 
3988           /* If the next operation is not a jump backwards in the
3989 	     pattern.  */
3990 
3991 	  if (mcnt >= 0)
3992 	    {
3993               /* Go through the on_failure_jumps of the alternatives,
3994                  seeing if any of the alternatives cannot match nothing.
3995                  The last alternative starts with only a jump,
3996                  whereas the rest start with on_failure_jump and end
3997                  with a jump, e.g., here is the pattern for `a|b|c':
3998 
3999                  /on_failure_jump/0/6/exactn/1/a/jump_past_alt/0/6
4000                  /on_failure_jump/0/6/exactn/1/b/jump_past_alt/0/3
4001                  /exactn/1/c
4002 
4003                  So, we have to first go through the first (n-1)
4004                  alternatives and then deal with the last one separately.  */
4005 
4006 
4007               /* Deal with the first (n-1) alternatives, which start
4008                  with an on_failure_jump (see above) that jumps to right
4009                  past a jump_past_alt.  */
4010 
4011               while ((enum regexpcode) p1[mcnt-3] == jump_past_alt)
4012                 {
4013                   /* `mcnt' holds how many bytes long the alternative
4014                      is, including the ending `jump_past_alt' and
4015                      its number.  */
4016 
4017                   if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
4018 				                      reg_info))
4019                     return 0;
4020 
4021                   /* Move to right after this alternative, including the
4022 		     jump_past_alt.  */
4023                   p1 += mcnt;
4024 
4025                   /* Break if it's the beginning of an n-th alternative
4026                      that doesn't begin with an on_failure_jump.  */
4027                   if ((enum regexpcode) *p1 != on_failure_jump)
4028                     break;
4029 
4030 		  /* Still have to check that it's not an n-th
4031 		     alternative that starts with an on_failure_jump.  */
4032 		  p1++;
4033                   EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4034                   if ((enum regexpcode) p1[mcnt-3] != jump_past_alt)
4035                     {
4036 		      /* Get to the beginning of the n-th alternative.  */
4037                       p1 -= 3;
4038                       break;
4039                     }
4040                 }
4041 
4042               /* Deal with the last alternative: go back and get number
4043                  of the `jump_past_alt' just before it.  `mcnt' contains
4044                  the length of the alternative.  */
4045               EXTRACT_NUMBER (mcnt, p1 - 2);
4046 #if 0
4047               if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
4048                 return 0;
4049 #endif
4050               p1 += mcnt;	/* Get past the n-th alternative.  */
4051             } /* if mcnt > 0 */
4052           break;
4053 
4054 
4055         case stop_memory:
4056           *p = p1 + 2;
4057           return 1;
4058 
4059 
4060         default:
4061           if (!common_op_match_null_string_p (&p1, end, reg_info))
4062             return 0;
4063         }
4064     } /* while p1 < end */
4065 
4066   return 0;
4067 } /* group_match_null_string_p */
4068 
4069 
4070 /* Similar to group_match_null_string_p, but doesn't deal with alternatives:
4071    It expects P to be the first byte of a single alternative and END one
4072    byte past the last. The alternative can contain groups.  */
4073 
4074 static int
alt_match_null_string_p(p,end,reg_info)4075 alt_match_null_string_p (p, end, reg_info)
4076     unsigned char *p, *end;
4077     register_info_type *reg_info;
4078 {
4079   int mcnt;
4080   unsigned char *p1 = p;
4081 
4082   while (p1 < end)
4083     {
4084       /* Skip over opcodes that can match nothing, and break when we get
4085          to one that can't.  */
4086 
4087       switch ((enum regexpcode) *p1)
4088         {
4089 	/* It's a loop.  */
4090         case on_failure_jump:
4091           p1++;
4092           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4093           p1 += mcnt;
4094           break;
4095 
4096 	default:
4097           if (!common_op_match_null_string_p (&p1, end, reg_info))
4098             return 0;
4099         }
4100     }  /* while p1 < end */
4101 
4102   return 1;
4103 } /* alt_match_null_string_p */
4104 
4105 
4106 /* Deals with the ops common to group_match_null_string_p and
4107    alt_match_null_string_p.
4108 
4109    Sets P to one after the op and its arguments, if any.  */
4110 
4111 static int
common_op_match_null_string_p(p,end,reg_info)4112 common_op_match_null_string_p (p, end, reg_info)
4113     unsigned char **p, *end;
4114     register_info_type *reg_info;
4115 {
4116   int mcnt;
4117   int ret;
4118   int reg_no;
4119   unsigned char *p1 = *p;
4120 
4121   switch ((enum regexpcode) *p1++)
4122     {
4123     case unused:
4124     case begline:
4125     case endline:
4126     case begbuf:
4127     case endbuf:
4128     case endbuf2:
4129     case wordbeg:
4130     case wordend:
4131     case wordbound:
4132     case notwordbound:
4133 #ifdef emacs
4134     case before_dot:
4135     case at_dot:
4136     case after_dot:
4137 #endif
4138       break;
4139 
4140     case start_memory:
4141       reg_no = *p1;
4142       ret = group_match_null_string_p (&p1, end, reg_info);
4143 
4144       /* Have to set this here in case we're checking a group which
4145          contains a group and a back reference to it.  */
4146 
4147       if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
4148         REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
4149 
4150       if (!ret)
4151         return 0;
4152       break;
4153 
4154     /* If this is an optimized succeed_n for zero times, make the jump.  */
4155     case jump:
4156       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4157       if (mcnt >= 0)
4158         p1 += mcnt;
4159       else
4160         return 0;
4161       break;
4162 
4163     case succeed_n:
4164       /* Get to the number of times to succeed.  */
4165       p1 += 2;
4166       EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4167 
4168       if (mcnt == 0)
4169         {
4170           p1 -= 4;
4171           EXTRACT_NUMBER_AND_INCR (mcnt, p1);
4172           p1 += mcnt;
4173         }
4174       else
4175         return 0;
4176       break;
4177 
4178     case duplicate:
4179       if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
4180         return 0;
4181       break;
4182 
4183     case set_number_at:
4184       p1 += 4;
4185 
4186     default:
4187       /* All other opcodes mean we cannot match the empty string.  */
4188       return 0;
4189   }
4190 
4191   *p = p1;
4192   return 1;
4193 } /* common_op_match_null_string_p */
4194 
4195 
4196 static int
memcmp_translate(s1,s2,len)4197 memcmp_translate(s1, s2, len)
4198      unsigned char *s1, *s2;
4199      register int len;
4200 {
4201   register unsigned char *p1 = s1, *p2 = s2, c;
4202   while (len)
4203     {
4204       c = *p1++;
4205       if (ismbchar(c)) {
4206 	if (c != *p2++ || !--len || *p1++ != *p2++)
4207 	  return 1;
4208       }
4209       else
4210 	if (translate[c] != translate[*p2++])
4211 	  return 1;
4212       len--;
4213     }
4214   return 0;
4215 }
4216 
4217 void
re_copy_registers(regs1,regs2)4218 re_copy_registers(regs1, regs2)
4219      struct re_registers *regs1, *regs2;
4220 {
4221     int i;
4222 
4223     if (regs1 == regs2) return;
4224     if (regs1->allocated == 0) {
4225 	regs1->beg = TMALLOC(regs2->num_regs, int);
4226 	regs1->end = TMALLOC(regs2->num_regs, int);
4227 	regs1->allocated = regs2->num_regs;
4228     }
4229     else if (regs1->allocated < regs2->num_regs) {
4230 	TREALLOC(regs1->beg, regs2->num_regs, int);
4231 	TREALLOC(regs1->end, regs2->num_regs, int);
4232 	regs1->allocated = regs2->num_regs;
4233     }
4234     for (i=0; i<regs2->num_regs; i++) {
4235 	regs1->beg[i] = regs2->beg[i];
4236 	regs1->end[i] = regs2->end[i];
4237     }
4238     regs1->num_regs = regs2->num_regs;
4239 }
4240 
4241 void
re_free_registers(regs)4242 re_free_registers(regs)
4243      struct re_registers *regs;
4244 {
4245     if (regs->allocated == 0) return;
4246     if (regs->beg) free(regs->beg);
4247     if (regs->end) free(regs->end);
4248 }
4249 
4250 /* Functions for multi-byte support.
4251    Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
4252    Last change: Jul. 9, 1993 by t^2  */
4253 static const unsigned char mbctab_ascii[] = {
4254   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4255   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4256   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4257   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4258   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4259   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4260   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4261   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4262   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4263   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4264   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4265   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4266   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4267   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4268   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4269   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4270 };
4271 
4272 static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
4273   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4274   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4275   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4276   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4277   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4278   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4279   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4280   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4281   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
4282   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4283   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4284   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4285   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4286   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4287   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4288   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
4289 };
4290 
4291 static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
4292   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4293   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4294   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4295   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4296   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4297   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4298   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4299   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4300   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4301   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4302   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4303   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4304   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4305   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4306   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4307   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4308 };
4309 
4310 const unsigned char *mbctab = mbctab_ascii;
4311 int current_mbctype = MBCTYPE_ASCII;
4312 
4313 void
re_mbcinit(mbctype)4314 re_mbcinit(mbctype)
4315      int mbctype;
4316 {
4317   switch (mbctype) {
4318   case MBCTYPE_ASCII:
4319     mbctab = mbctab_ascii;
4320     current_mbctype = MBCTYPE_ASCII;
4321     break;
4322   case MBCTYPE_EUC:
4323     mbctab = mbctab_euc;
4324     current_mbctype = MBCTYPE_EUC;
4325     break;
4326   case MBCTYPE_SJIS:
4327     mbctab = mbctab_sjis;
4328     current_mbctype = MBCTYPE_SJIS;
4329     break;
4330   }
4331 }
4332