1 /* Add nmz prefix by Satoru Takabayashi */
2 /* Extended regular expression matching and search library.
3    Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
4 
5    The GNU C Library is free software; you can redistribute it and/or
6    modify it under the terms of the GNU Library General Public License as
7    published by the Free Software Foundation; either version 2 of the
8    License, or (at your option) any later version.
9 
10    The GNU C Library is distributed in the hope that it will be useful,
11    but WITHOUT ANY WARRANTY; without even the implied warranty of
12    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13    Library General Public License for more details.
14 
15    You should have received a copy of the GNU Library General Public
16    License along with the GNU C Library; see the file COPYING.LIB.  If not,
17    write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18    Boston, MA 02111-1307, USA.  */
19 /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
20    Last change: May 21, 1993 by t^2  */
21 /* removed gapped buffer support, multiple syntax support by matz <matz@nts.co.jp> */
22 /* Perl5 extension added by matz <matz@caelum.co.jp> */
23 /* UTF-8 extension added Jan 16 1999 by Yoshida Masato  <yoshidam@tau.bekkoame.ne.jp> */
24 
25 #ifdef HAVE_CONFIG_H
26 #  include "config.h"
27 #endif
28 #ifdef HAVE_SUPPORT_H
29 #  include "support.h"
30 #endif
31 
32 #ifdef HAVE_STRING_H
33 #  include <string.h>
34 #else
35 #  include <strings.h>
36 #endif
37 
38 /* We write fatal error messages on standard error.  */
39 #include <stdio.h>
40 
41 /* isalpha(3) etc. are used for the character classes.  */
42 #include <ctype.h>
43 #include <sys/types.h>
44 
45 #ifndef PARAMS
46 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
47 #  define PARAMS(args) args
48 # else
49 #  define PARAMS(args) ()
50 # endif  /* GCC.  */
51 #endif  /* Not PARAMS.  */
52 
53 #if defined(STDC_HEADERS)
54 # include <stddef.h>
55 #else
56 /* We need this for `regex.h', and perhaps for the Emacs include files.  */
57 # include <sys/types.h>
58 #endif
59 
60 #ifndef __STDC__
61 # define volatile
62 #endif
63 
64 #ifdef HAVE_PROTOTYPES
65 # define _(args) args
66 #else
67 # define _(args) ()
68 #endif
69 
70 #ifdef RUBY_PLATFORM
71 #include "defines.h"
72 
73 # define RUBY
74 extern int rb_prohibit_interrupt;
75 extern int rb_trap_pending;
76 void rb_trap_exec _((void));
77 
78 # define CHECK_INTS if (!rb_prohibit_interrupt) {\
79     if (rb_trap_pending) rb_trap_exec();\
80 }
81 
82 #define nmz_xmalloc ruby_xmalloc
83 #define xcalloc ruby_xcalloc
84 #define nmz_xrealloc ruby_xrealloc
85 #define xfree ruby_xfree
86 
87 void *nmz_xmalloc _((size_t));
88 void *xcalloc _((size_t,size_t));
89 void *nmz_xrealloc _((void*,size_t));
90 void xfree _((void*));
91 #endif
92 
93 /* for using libnamazu */
94 #ifdef HAVE_STDLIB_H
95 #  include <stdlib.h>
96 #endif
97 #include "util.h"
98 #define xfree free
99 
100 #ifndef NO_ALLOCA
101 /* Make alloca work the best possible way.  */
102 #ifdef __GNUC__
103 # ifndef atarist
104 #  ifndef alloca
105 #   define alloca __builtin_alloca
106 #  endif
107 # endif /* atarist */
108 #else
109 # ifdef _MSC_VER
110 #  include <malloc.h>
111 #  define alloca _alloca
112 # else
113 #  if HAVE_ALLOCA_H
114 #   include <alloca.h>
115 #  else
116 #   ifdef _AIX
117  #pragma alloca
118 #   else
119 #    ifndef alloca /* predefined by HP cc +Olibcalls */
120 char *alloca ();
121 #    endif
122 #   endif
123 #  endif
124 # endif
125 #endif
126 #endif /* NO_ALLOCA */
127 
128 #ifdef HAVE_STRING_H
129 # include <string.h>
130 #else
131 # include <strings.h>
132 #endif
133 
134 #ifndef NO_ALLOCA
135 #ifdef C_ALLOCA
136 #define FREE_VARIABLES() alloca(0)
137 #else
138 #define FREE_VARIABLES()
139 #endif
140 #else /* NO_ALLOCA */
141 #define FREE_VARIABLES()
142 #endif /* NO_ALLOCA */
143 
144 #define FREE_AND_RETURN_VOID(stackb)   do {				\
145   FREE_VARIABLES();							\
146   if (stackb != stacka) xfree(stackb);					\
147   return;								\
148 } while(0)
149 
150 #define FREE_AND_RETURN(stackb,val)    do {				\
151   FREE_VARIABLES();							\
152   if (stackb != stacka) xfree(stackb);					\
153   return(val);								\
154 } while(0)
155 
156 #define DOUBLE_STACK(type) do {						\
157   type *stackx;								\
158   unsigned int xlen = stacke - stackb; 					\
159   if (stackb == stacka) {						\
160     stackx = (type*)nmz_xmalloc(2 * xlen * sizeof(type));			\
161     memcpy(stackx, stackb, xlen * sizeof (type));			\
162   }									\
163   else {								\
164     stackx = (type*)nmz_xrealloc(stackb, 2 * xlen * sizeof(type));		\
165   }									\
166   /* Rearrange the pointers. */						\
167   stackp = stackx + (stackp - stackb);					\
168   stackb = stackx;							\
169   stacke = stackb + 2 * xlen;						\
170 } while (0)
171 
172 #define MALLOC_STACK(type, xlen) do {					\
173   type *stackx;								\
174   stackx = (type*)nmz_xmalloc((unsigned int)xlen * sizeof(type));	\
175   memset(stackx, 0, (unsigned int)xlen * sizeof (type));		\
176   stackb = stackx;							\
177   stackp = stackb;							\
178   stacke = stackb + (unsigned int)xlen;					\
179 } while (0)
180 
181 #define RE_TALLOC(n,t)  ((t*)nmz_xmalloc((n)*sizeof(t)))
182 #define TMALLOC(n,t)    ((t*)nmz_xmalloc((n)*sizeof(t)))
183 #define TREALLOC(s,n,t) (s=((t*)nmz_xrealloc(s,(n)*sizeof(t))))
184 
185 #define EXPAND_FAIL_STACK() DOUBLE_STACK(unsigned char*)
186 #define ENSURE_FAIL_STACK(n)						\
187   do {									\
188     if (stacke - stackp <= (n)) {					\
189 	/* if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS)		\
190 	   {								\
191 	   FREE_AND_RETURN(stackb,(-2));				\
192 	   }*/								\
193 									\
194         /* Roughly double the size of the stack.  */			\
195         EXPAND_FAIL_STACK();						\
196       }									\
197   } while (0)
198 
199 /* Get the interface, including the syntax bits.  */
200 #include "regex.h"
201 
202 /* Subroutines for nmz_re_compile_pattern.  */
203 static void store_jump _((char*, int, char*));
204 static void insert_jump _((int, char*, char*, char*));
205 static void store_jump_n _((char*, int, char*, unsigned));
206 static void insert_jump_n _((int, char*, char*, char*, unsigned));
207 #if 0
208 static void insert_op _((int, char*, char*));
209 #endif
210 static void insert_op_2 _((int, char*, char*, int, int));
211 static int memcmp_translate _((unsigned char*, unsigned char*, int));
212 
213 /* Define the syntax stuff, so we can do the \<, \>, etc.  */
214 
215 /* This must be nonzero for the wordchar and notwordchar pattern
216    commands in nmz_re_match.  */
217 #define Sword  1
218 #define Sword2 2
219 
220 #define SYNTAX(c) re_syntax_table[c]
221 
222 static char re_syntax_table[256];
223 static void init_syntax_once _((void));
224 static const unsigned char *translate = 0;
225 static void init_regs _((struct re_registers*, unsigned int));
226 static void bm_init_skip _((int *, unsigned char*, int, const unsigned char*));
227 static int current_mbctype = MBCTYPE_ASCII;
228 
229 #undef P
230 
231 #ifdef RUBY
232 #include "util.h"
233 #endif
234 
235 static void
init_syntax_once()236 init_syntax_once()
237 {
238    register int c;
239    static int done = 0;
240 
241    if (done)
242      return;
243 
244    memset(re_syntax_table, 0, sizeof re_syntax_table);
245 
246    for (c=0; c<=0x7f; c++)
247      if (isalnum(c))
248        re_syntax_table[c] = Sword;
249    re_syntax_table['_'] = Sword;
250 
251    for (c=0x80; c<=0xff; c++)
252      if (isalnum(c))
253        re_syntax_table[c] = Sword2;
254    done = 1;
255 }
256 
257 void
nmz_re_set_casetable(table)258 nmz_re_set_casetable(table)
259      const char *table;
260 {
261   translate = (const unsigned char*)table;
262 }
263 
264 /* Jim Meyering writes:
265 
266    "... Some ctype macros are valid only for character codes that
267    isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
268    using /bin/cc or gcc but without giving an ansi option).  So, all
269    ctype uses should be through macros like ISPRINT...  If
270    STDC_HEADERS is defined, then autoconf has verified that the ctype
271    macros don't need to be guarded with references to isascii. ...
272    Defining isascii to 1 should let any compiler worth its salt
273    eliminate the && through constant folding."
274    Solaris defines some of these symbols so we must undefine them first.  */
275 
276 #undef ISASCII
277 #if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
278 # define ISASCII(c) 1
279 #else
280 # define ISASCII(c) isascii(c)
281 #endif
282 
283 #ifdef isblank
284 # define ISBLANK(c) (ISASCII(c) && isblank(c))
285 #else
286 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
287 #endif
288 #ifdef isgraph
289 # define ISGRAPH(c) (ISASCII(c) && isgraph(c))
290 #else
291 # define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
292 #endif
293 
294 #undef ISPRINT
295 #define ISPRINT(c) (ISASCII(c) && isprint(c))
296 #define ISDIGIT(c) (ISASCII(c) && isdigit(c))
297 #define ISALNUM(c) (ISASCII(c) && isalnum(c))
298 #define ISALPHA(c) (ISASCII(c) && isalpha(c))
299 #define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
300 #define ISLOWER(c) (ISASCII(c) && islower(c))
301 #define ISPUNCT(c) (ISASCII(c) && ispunct(c))
302 #define ISSPACE(c) (ISASCII(c) && isspace(c))
303 #define ISUPPER(c) (ISASCII(c) && isupper(c))
304 #define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
305 
306 #ifndef NULL
307 # define NULL (void *)0
308 #endif
309 
310 /* We remove any previous definition of `SIGN_EXTEND_CHAR',
311    since ours (we hope) works properly with all combinations of
312    machines, compilers, `char' and `unsigned char' argument types.
313    (Per Bothner suggested the basic approach.)  */
314 #undef SIGN_EXTEND_CHAR
315 #if __STDC__
316 # define SIGN_EXTEND_CHAR(c) ((signed char)(c))
317 #else  /* not __STDC__ */
318 /* As in Harbison and Steele.  */
319 # define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
320 #endif
321 
322 /* These are the command codes that appear in compiled regular
323    expressions, one per byte.  Some command codes are followed by
324    argument bytes.  A command code can specify any interpretation
325    whatsoever for its arguments.  Zero-bytes may appear in the compiled
326    regular expression.
327 
328    The value of `exactn' is needed in search.c (search_buffer) in emacs.
329    So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
330    `exactn' we use here must also be 1.  */
331 
332 enum regexpcode
333   {
334     unused=0,
335     exactn=1, /* Followed by one byte giving n, then by n literal bytes.  */
336     begline,  /* Fail unless at beginning of line.  */
337     endline,  /* Fail unless at end of line.  */
338     begbuf,   /* Succeeds if at beginning of buffer (if emacs) or at beginning
339                  of string to be matched (if not).  */
340     endbuf,   /* Analogously, for end of buffer/string.  */
341     endbuf2,  /* End of buffer/string, or newline just before it.  */
342     begpos,   /* Matches where last scan//gsub left off.  */
343     jump,     /* Followed by two bytes giving relative address to jump to.  */
344     jump_past_alt,/* Same as jump, but marks the end of an alternative.  */
345     on_failure_jump,	 /* Followed by two bytes giving relative address of
346 			    place to resume at in case of failure.  */
347     finalize_jump,	 /* Throw away latest failure point and then jump to
348 			    address.  */
349     maybe_finalize_jump, /* Like jump but finalize if safe to do so.
350 			    This is used to jump back to the beginning
351 			    of a repeat.  If the command that follows
352 			    this jump is clearly incompatible with the
353 			    one at the beginning of the repeat, such that
354 			    we can be sure that there is no use backtracking
355 			    out of repetitions already completed,
356 			    then we finalize.  */
357     dummy_failure_jump,  /* Jump, and push a dummy failure point. This
358 			    failure point will be thrown away if an attempt
359                             is made to use it for a failure. A + construct
360                             makes this before the first repeat.  Also
361                             use it as an intermediary kind of jump when
362                             compiling an or construct.  */
363     push_dummy_failure, /* Push a dummy failure point and continue.  Used at the end of
364 			   alternatives.  */
365     succeed_n,	 /* Used like on_failure_jump except has to succeed n times;
366 		    then gets turned into an on_failure_jump. The relative
367                     address following it is useless until then.  The
368                     address is followed by two bytes containing n.  */
369     jump_n,	 /* Similar to jump, but jump n times only; also the relative
370 		    address following is in turn followed by yet two more bytes
371                     containing n.  */
372     try_next,    /* Jump to next pattern for the first time,
373 		    leaving this pattern on the failure stack. */
374     finalize_push,	/* Finalize stack and push the beginning of the pattern
375 			   on the stack to retry (used for non-greedy match) */
376     finalize_push_n,	/* Similar to finalize_push, buf finalize n time only */
377     set_number_at,	/* Set the following relative location to the
378 			   subsequent number.  */
379     anychar,	 /* Matches any (more or less) one character excluding newlines.  */
380     anychar_repeat,	 /* Matches sequence of characters excluding newlines.  */
381     charset,     /* Matches any one char belonging to specified set.
382 		    First following byte is number of bitmap bytes.
383 		    Then come bytes for a bitmap saying which chars are in.
384 		    Bits in each byte are ordered low-bit-first.
385 		    A character is in the set if its bit is 1.
386 		    A character too large to have a bit in the map
387 		    is automatically not in the set.  */
388     charset_not, /* Same parameters as charset, but match any character
389                     that is not one of those specified.  */
390     start_memory, /* Start remembering the text that is matched, for
391 		    storing in a memory register.  Followed by one
392                     byte containing the register number.  Register numbers
393                     must be in the range 0 through RE_NREGS.  */
394     stop_memory, /* Stop remembering the text that is matched
395 		    and store it in a memory register.  Followed by
396                     one byte containing the register number. Register
397                     numbers must be in the range 0 through RE_NREGS.  */
398     start_paren,    /* Place holder at the start of (?:..). */
399     stop_paren,    /* Place holder at the end of (?:..). */
400     casefold_on,   /* Turn on casefold flag. */
401     casefold_off,  /* Turn off casefold flag. */
402     option_set,	   /* Turn on multi line match (match with newlines). */
403     start_nowidth, /* Save string point to the stack. */
404     stop_nowidth,  /* Restore string place at the point start_nowidth. */
405     pop_and_fail,  /* Fail after popping nowidth entry from stack. */
406     stop_backtrack,  /* Restore backtrack stack at the point start_nowidth. */
407     duplicate,   /* Match a duplicate of something remembered.
408 		    Followed by one byte containing the index of the memory
409                     register.  */
410 #if 0
411     fail,        /* always fails. */
412 #endif
413     wordchar,    /* Matches any word-constituent character.  */
414     notwordchar, /* Matches any char that is not a word-constituent.  */
415     wordbeg,	 /* Succeeds if at word beginning.  */
416     wordend,	 /* Succeeds if at word end.  */
417     wordbound,   /* Succeeds if at a word boundary.  */
418     notwordbound /* Succeeds if not at a word boundary.  */
419   };
420 
421 
422 /* Number of failure points to allocate space for initially,
423    when matching.  If this number is exceeded, more space is allocated,
424    so it is not a hard limit.  */
425 
426 #ifndef NFAILURES
427 #define NFAILURES 160
428 #endif
429 
430 /* Store NUMBER in two contiguous bytes starting at DESTINATION.  */
431 #define STORE_NUMBER(destination, number)				\
432   do { (destination)[0] = (number) & 0377;				\
433     (destination)[1] = (number) >> 8; } while (0)
434 
435 /* Same as STORE_NUMBER, except increment the destination pointer to
436    the byte after where the number is stored.  Watch out that values for
437    DESTINATION such as p + 1 won't work, whereas p will.  */
438 #define STORE_NUMBER_AND_INCR(destination, number)			\
439   do { STORE_NUMBER(destination, number);				\
440     (destination) += 2; } while (0)
441 
442 
443 /* Put into DESTINATION a number stored in two contingous bytes starting
444    at SOURCE.  */
445 #define EXTRACT_NUMBER(destination, source)				\
446   do { (destination) = *(source) & 0377;				\
447     (destination) += SIGN_EXTEND_CHAR(*(char*)((source) + 1)) << 8; } while (0)
448 
449 /* Same as EXTRACT_NUMBER, except increment the pointer for source to
450    point to second byte of SOURCE.  Note that SOURCE has to be a value
451    such as p, not, e.g., p + 1. */
452 #define EXTRACT_NUMBER_AND_INCR(destination, source)			\
453   do { EXTRACT_NUMBER(destination, source);				\
454        (source) += 2; } while (0)
455 
456 
457 /* Specify the precise syntax of regexps for compilation.  This provides
458    for compatibility for various utilities which historically have
459    different, incompatible syntaxes.
460 
461    The argument SYNTAX is a bit-mask comprised of the various bits
462    defined in regex.h.  */
463 
464 long
nmz_re_set_syntax(syntax)465 nmz_re_set_syntax(syntax)
466   long syntax;
467 {
468     /* obsolete */
469     return 0;
470 }
471 
472 
473 /* Macros for nmz_re_compile_pattern, which is found below these definitions.  */
474 
475 #define TRANSLATE_P() ((options&RE_OPTION_IGNORECASE) && translate)
476 #define MAY_TRANSLATE() ((bufp->options&(RE_OPTION_IGNORECASE|RE_MAY_IGNORECASE)) && translate)
477 /* Fetch the next character in the uncompiled pattern---translating it
478    if necessary.  Also cast from a signed character in the constant
479    string passed to us by the user to an unsigned char that we can use
480    as an array index (in, e.g., `translate').  */
481 #define PATFETCH(c)							\
482   do {if (p == pend) goto end_of_pattern;				\
483     c = (unsigned char) *p++; 						\
484     if (TRANSLATE_P()) c = (unsigned char)translate[c];	\
485   } while (0)
486 
487 /* Fetch the next character in the uncompiled pattern, with no
488    translation.  */
489 #define PATFETCH_RAW(c)							\
490   do {if (p == pend) goto end_of_pattern;				\
491     c = (unsigned char)*p++; 						\
492   } while (0)
493 
494 /* Go backwards one character in the pattern.  */
495 #define PATUNFETCH p--
496 
497 #define MBC2WC(c, p)							\
498   do {									\
499     if (current_mbctype == MBCTYPE_UTF8) {				\
500       int n = mbclen(c) - 1;						\
501       c &= (1<<(BYTEWIDTH-2-n)) - 1;					\
502       while (n--) {							\
503 	c = c << 6 | (*p++ & ((1<<6)-1));				\
504       }									\
505     }									\
506     else {								\
507       c <<= 8;								\
508       c |= (unsigned char)*(p)++;					\
509     }									\
510   } while (0)
511 
512 #define PATFETCH_MBC(c)							\
513   do {									\
514     if (p + mbclen(c) - 1 >= pend) goto end_of_pattern;			\
515     MBC2WC(c, p);							\
516   } while(0)
517 
518 #define WC2MBC1ST(c)							\
519  ((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
520 
521 static unsigned int
utf8_firstbyte(c)522 utf8_firstbyte(c)
523      unsigned long c;
524 {
525   if (c < 0x80) return c;
526   if (c <= 0x7ff) return ((c>>6)&0xff)|0xc0;
527   if (c <= 0xffff) return ((c>>12)&0xff)|0xe0;
528   if (c <= 0x1fffff) return ((c>>18)&0xff)|0xf0;
529   if (c <= 0x3ffffff) return ((c>>24)&0xff)|0xf8;
530   if (c <= 0x7fffffff) return ((c>>30)&0xff)|0xfc;
531 #if SIZEOF_INT > 4
532   if (c <= 0xfffffffff) return 0xfe;
533 #else
534   return 0xfe;
535 #endif
536 }
537 
538 #if 0
539 static void
540 print_mbc(c)
541      unsigned int c;
542 {
543   if (current_mbctype == MBCTYPE_UTF8) {
544     if (c < 0x80)
545       printf("%c", (int)c);
546     else if (c <= 0x7ff)
547       printf("%c%c", (int)utf8_firstbyte(c), (int)(c & 0x3f));
548     else if (c <= 0xffff)
549       printf("%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 6) & 0x3f),
550 	     (int)(c & 0x3f));
551     else if (c <= 0x1fffff)
552       printf("%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 12) & 0x3f),
553 	     (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
554     else if (c <= 0x3ffffff)
555       printf("%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 18) & 0x3f),
556 	     (int)((c >> 12) & 0x3f), (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
557     else if (c <= 0x7fffffff)
558       printf("%c%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 24) & 0x3f),
559 	     (int)((c >> 18) & 0x3f), (int)((c >> 12) & 0x3f),
560 	     (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
561   }
562   else if (c < 0xff) {
563     printf("\\%o", (int)c);
564   }
565   else {
566     printf("%c%c", (int)(c >> BYTEWIDTH), (int)(c &0xff));
567   }
568 }
569 #endif
570 
571 /* If the buffer isn't allocated when it comes in, use this.  */
572 #define INIT_BUF_SIZE  28
573 
574 /* Make sure we have at least N more bytes of space in buffer.  */
575 #define GET_BUFFER_SPACE(n)						\
576   do {								        \
577     while (b - bufp->buffer + (n) >= bufp->allocated)			\
578       EXTEND_BUFFER;							\
579   } while (0)
580 
581 /* Make sure we have one more byte of buffer space and then add CH to it.  */
582 #define BUFPUSH(ch)							\
583   do {									\
584     GET_BUFFER_SPACE(1);						\
585     *b++ = (char)(ch);							\
586   } while (0)
587 
588 /* Extend the buffer by twice its current size via reallociation and
589    reset the pointers that pointed into the old allocation to point to
590    the correct places in the new allocation.  If extending the buffer
591    results in it being larger than 1 << 16, then flag memory exhausted.  */
592 #define EXTEND_BUFFER							\
593   do { char *old_buffer = bufp->buffer;					\
594     if (bufp->allocated == (1L<<16)) goto too_big;			\
595     bufp->allocated *= 2;						\
596     if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16);		\
597     bufp->buffer = (char*)nmz_xrealloc(bufp->buffer, bufp->allocated);	\
598     if (bufp->buffer == 0)						\
599       goto memory_exhausted;						\
600     b = (b - old_buffer) + bufp->buffer;				\
601     if (fixup_alt_jump)							\
602       fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;	\
603     if (laststart)							\
604       laststart = (laststart - old_buffer) + bufp->buffer;		\
605     begalt = (begalt - old_buffer) + bufp->buffer;			\
606     if (pending_exact)							\
607       pending_exact = (pending_exact - old_buffer) + bufp->buffer;	\
608   } while (0)
609 
610 
611 /* Set the bit for character C in a character set list.  */
612 #define SET_LIST_BIT(c)							\
613   (b[(unsigned char)(c) / BYTEWIDTH]					\
614    |= 1 << ((unsigned char)(c) % BYTEWIDTH))
615 
616 /* Get the next unsigned number in the uncompiled pattern.  */
617 #define GET_UNSIGNED_NUMBER(num) 					\
618   do { if (p != pend) { 						\
619         PATFETCH(c); 							\
620 	while (ISDIGIT(c)) { 						\
621 	  if (num < 0) 							\
622 	     num = 0; 							\
623 	  num = num * 10 + c - '0'; 					\
624 	  if (p == pend) 						\
625 	     break; 							\
626 	  PATFETCH(c); 							\
627 	} 								\
628      } 									\
629   } while (0)
630 
631 #define STREQ(s1, s2) ((strcmp(s1, s2) == 0))
632 
633 #define CHAR_CLASS_MAX_LENGTH  6 /* Namely, `xdigit'.  */
634 
635 #define IS_CHAR_CLASS(string)						\
636    (STREQ(string, "alpha") || STREQ(string, "upper")			\
637     || STREQ(string, "lower") || STREQ(string, "digit")			\
638     || STREQ(string, "alnum") || STREQ(string, "xdigit")		\
639     || STREQ(string, "space") || STREQ(string, "print")			\
640     || STREQ(string, "punct") || STREQ(string, "graph")			\
641     || STREQ(string, "cntrl") || STREQ(string, "blank"))
642 
643 #define STORE_MBC(p, c)							\
644   do {									\
645     (p)[0] = (unsigned char)(((c) >>24) & 0xff);			\
646     (p)[1] = (unsigned char)(((c) >>16) & 0xff);			\
647     (p)[2] = (unsigned char)(((c) >> 8) & 0xff);			\
648     (p)[3] = (unsigned char)(((c) >> 0) & 0xff);			\
649   } while (0)
650 
651 #define STORE_MBC_AND_INCR(p, c) 					\
652   do {									\
653     *(p)++ = (unsigned char)(((c) >>24) & 0xff);			\
654     *(p)++ = (unsigned char)(((c) >>16) & 0xff);			\
655     *(p)++ = (unsigned char)(((c) >> 8) & 0xff);			\
656     *(p)++ = (unsigned char)(((c) >> 0) & 0xff);			\
657   } while (0)
658 
659 #define EXTRACT_MBC(p) 							\
660   ((unsigned int)((unsigned char)(p)[0] << 24 |				\
661 		    (unsigned char)(p)[1] << 16 |			\
662                     (unsigned char)(p)[2] <<  8 |			\
663 		    (unsigned char)(p)[3]))
664 
665 #define EXTRACT_MBC_AND_INCR(p) 					\
666   ((unsigned int)((p) += 4, 						\
667 		    (unsigned char)(p)[-4] << 24 |			\
668 		    (unsigned char)(p)[-3] << 16 |			\
669                     (unsigned char)(p)[-2] <<  8 |			\
670 		    (unsigned char)(p)[-1]))
671 
672 #define EXTRACT_UNSIGNED(p) \
673   ((unsigned char)(p)[0] | (unsigned char)(p)[1] << 8)
674 #define EXTRACT_UNSIGNED_AND_INCR(p) \
675   ((p) += 2, (unsigned char)(p)[-2] | (unsigned char)(p)[-1] << 8)
676 
677 /* Handle (mb)?charset(_not)?.
678 
679    Structure of mbcharset(_not)? in compiled pattern.
680 
681      struct {
682        unsinged char id;		mbcharset(_not)?
683        unsigned char sbc_size;
684        unsigned char sbc_map[sbc_size];	same as charset(_not)? up to here.
685        unsigned short mbc_size;		number of intervals.
686        struct {
687 	 unsigned long beg;		beginning of interval.
688 	 unsigned long end;		end of interval.
689        } intervals[mbc_size];
690      }; */
691 
692 static void
set_list_bits(c1,c2,b)693 set_list_bits(c1, c2, b)
694     unsigned long c1, c2;
695     unsigned char *b;
696 {
697   unsigned char sbc_size = b[-1];
698   unsigned short mbc_size = EXTRACT_UNSIGNED(&b[sbc_size]);
699   unsigned short beg, end, upb;
700 
701   if (c1 > c2)
702     return;
703   b = &b[sbc_size + 2];
704 
705   for (beg = 0, upb = mbc_size; beg < upb; ) {
706     unsigned short mid = (unsigned short)(beg + upb) >> 1;
707 
708     if ((int)c1 - 1 > (int)EXTRACT_MBC(&b[mid*8+4]))
709       beg = mid + 1;
710     else
711       upb = mid;
712   }
713 
714   for (end = beg, upb = mbc_size; end < upb; ) {
715     unsigned short mid = (unsigned short)(end + upb) >> 1;
716 
717     if ((int)c2 >= (int)EXTRACT_MBC(&b[mid*8]) - 1)
718       end = mid + 1;
719     else
720       upb = mid;
721   }
722 
723   if (beg != end) {
724     if (c1 > EXTRACT_MBC(&b[beg*8]))
725       c1 = EXTRACT_MBC(&b[beg*8]);
726     if (c2 < EXTRACT_MBC(&b[(end - 1)*8+4]))
727       c2 = EXTRACT_MBC(&b[(end - 1)*8+4]);
728   }
729   if (end < mbc_size && end != beg + 1)
730     /* NOTE: memcpy() would not work here.  */
731     memmove(&b[(beg + 1)*8], &b[end*8], (mbc_size - end)*8);
732   STORE_MBC(&b[beg*8 + 0], c1);
733   STORE_MBC(&b[beg*8 + 4], c2);
734   mbc_size += beg - end + 1;
735   STORE_NUMBER(&b[-2], mbc_size);
736 }
737 
738 static int
is_in_list(c,b)739 is_in_list(c, b)
740     unsigned long c;
741     const unsigned char *b;
742 {
743   unsigned short size;
744   unsigned short i, j;
745 
746   size = *b++;
747   if ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH) {
748     return 1;
749   }
750   b += size + 2;
751   size = EXTRACT_UNSIGNED(&b[-2]);
752   if (size == 0) return 0;
753 
754   for (i = 0, j = size; i < j; ) {
755     unsigned short k = (unsigned short)(i + j) >> 1;
756 
757     if (c > EXTRACT_MBC(&b[k*8+4]))
758       i = k + 1;
759     else
760       j = k;
761   }
762   if (i < size && EXTRACT_MBC(&b[i*8]) <= c
763       && ((unsigned char)c != '\n' && (unsigned char)c != '\0'))
764     return 1;
765   return 0;
766 }
767 
768 #if 0
769 static void
770 print_partial_compiled_pattern(start, end)
771     unsigned char *start;
772     unsigned char *end;
773 {
774   int mcnt, mcnt2;
775   unsigned char *p = start;
776   unsigned char *pend = end;
777 
778   if (start == NULL) {
779     printf("(null)\n");
780     return;
781   }
782 
783   /* Loop over pattern commands.  */
784   while (p < pend) {
785     switch ((enum regexpcode)*p++) {
786     case unused:
787       printf("/unused");
788       break;
789 
790     case exactn:
791       mcnt = *p++;
792       printf("/exactn/%d", mcnt);
793       do {
794 	putchar('/');
795 	printf("%c", *p++);
796       }
797       while (--mcnt);
798       break;
799 
800     case start_memory:
801       mcnt = *p++;
802       printf("/start_memory/%d/%d", mcnt, *p++);
803       break;
804 
805     case stop_memory:
806       mcnt = *p++;
807       printf("/stop_memory/%d/%d", mcnt, *p++);
808       break;
809 
810     case start_paren:
811       printf("/start_paren");
812       break;
813 
814     case stop_paren:
815       printf("/stop_paren");
816       break;
817 
818     case casefold_on:
819       printf("/casefold_on");
820       break;
821 
822     case casefold_off:
823       printf("/casefold_off");
824       break;
825 
826     case option_set:
827       printf("/option_set/%d", *p++);
828       break;
829 
830     case start_nowidth:
831       EXTRACT_NUMBER_AND_INCR(mcnt, p);
832       printf("/start_nowidth//%d", mcnt);
833       break;
834 
835     case stop_nowidth:
836       printf("/stop_nowidth//");
837       p += 2;
838       break;
839 
840     case pop_and_fail:
841       printf("/pop_and_fail");
842       break;
843 
844     case stop_backtrack:
845       printf("/stop_backtrack//");
846       p += 2;
847       break;
848 
849     case duplicate:
850       printf("/duplicate/%d", *p++);
851       break;
852 
853     case anychar:
854       printf("/anychar");
855       break;
856 
857     case anychar_repeat:
858       printf("/anychar_repeat");
859       break;
860 
861     case charset:
862     case charset_not:
863       {
864 	register int c;
865 
866 	printf("/charset%s",
867 	       (enum regexpcode)*(p - 1) == charset_not ? "_not" : "");
868 
869 	mcnt = *p++;
870 	printf("/%d", mcnt);
871 	for (c = 0; c < mcnt; c++) {
872 	  unsigned bit;
873 	  unsigned char map_byte = p[c];
874 
875 	  putchar ('/');
876 
877 	  for (bit = 0; bit < BYTEWIDTH; bit++)
878 	    if (map_byte & (1 << bit))
879 	      printf("%c", c * BYTEWIDTH + bit);
880 	}
881 	p += mcnt;
882 	mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
883 	printf("/");
884 	while (mcnt--) {
885 	  print_mbc(EXTRACT_MBC_AND_INCR(p));
886 	  printf("-");
887 	  print_mbc(EXTRACT_MBC_AND_INCR(p));
888 	}
889 	break;
890       }
891 
892     case begline:
893       printf("/begline");
894       break;
895 
896     case endline:
897       printf("/endline");
898       break;
899 
900     case on_failure_jump:
901       EXTRACT_NUMBER_AND_INCR(mcnt, p);
902       printf("/on_failure_jump//%d", mcnt);
903       break;
904 
905     case dummy_failure_jump:
906       EXTRACT_NUMBER_AND_INCR(mcnt, p);
907       printf("/dummy_failure_jump//%d", mcnt);
908       break;
909 
910     case push_dummy_failure:
911       printf("/push_dummy_failure");
912       break;
913 
914     case finalize_jump:
915       EXTRACT_NUMBER_AND_INCR(mcnt, p);
916       printf("/finalize_jump//%d", mcnt);
917       break;
918 
919     case maybe_finalize_jump:
920       EXTRACT_NUMBER_AND_INCR(mcnt, p);
921       printf("/maybe_finalize_jump//%d", mcnt);
922       break;
923 
924     case jump_past_alt:
925       EXTRACT_NUMBER_AND_INCR(mcnt, p);
926       printf("/jump_past_alt//%d", mcnt);
927       break;
928 
929     case jump:
930       EXTRACT_NUMBER_AND_INCR(mcnt, p);
931       printf("/jump//%d", mcnt);
932       break;
933 
934     case succeed_n:
935       EXTRACT_NUMBER_AND_INCR(mcnt, p);
936       EXTRACT_NUMBER_AND_INCR(mcnt2, p);
937       printf("/succeed_n//%d//%d", mcnt, mcnt2);
938       break;
939 
940     case jump_n:
941       EXTRACT_NUMBER_AND_INCR(mcnt, p);
942       EXTRACT_NUMBER_AND_INCR(mcnt2, p);
943       printf("/jump_n//%d//%d", mcnt, mcnt2);
944       break;
945 
946     case set_number_at:
947       EXTRACT_NUMBER_AND_INCR(mcnt, p);
948       EXTRACT_NUMBER_AND_INCR(mcnt2, p);
949       printf("/set_number_at//%d//%d", mcnt, mcnt2);
950       break;
951 
952     case try_next:
953       EXTRACT_NUMBER_AND_INCR(mcnt, p);
954       printf("/try_next//%d", mcnt);
955       break;
956 
957     case finalize_push:
958       EXTRACT_NUMBER_AND_INCR(mcnt, p);
959       printf("/finalize_push//%d", mcnt);
960       break;
961 
962     case finalize_push_n:
963       EXTRACT_NUMBER_AND_INCR(mcnt, p);
964       EXTRACT_NUMBER_AND_INCR(mcnt2, p);
965       printf("/finalize_push_n//%d//%d", mcnt, mcnt2);
966       break;
967 
968     case wordbound:
969       printf("/wordbound");
970       break;
971 
972     case notwordbound:
973       printf("/notwordbound");
974       break;
975 
976     case wordbeg:
977       printf("/wordbeg");
978       break;
979 
980     case wordend:
981       printf("/wordend");
982 
983     case wordchar:
984       printf("/wordchar");
985       break;
986 
987     case notwordchar:
988       printf("/notwordchar");
989       break;
990 
991     case begbuf:
992       printf("/begbuf");
993       break;
994 
995     case endbuf:
996       printf("/endbuf");
997       break;
998 
999     case endbuf2:
1000       printf("/endbuf2");
1001       break;
1002 
1003     case begpos:
1004       printf("/begpos");
1005       break;
1006 
1007     default:
1008       printf("?%d", *(p-1));
1009     }
1010   }
1011   printf("/\n");
1012 }
1013 
1014 
1015 static void
1016 print_compiled_pattern(bufp)
1017      struct re_pattern_buffer *bufp;
1018 {
1019   unsigned char *buffer = (unsigned char*)bufp->buffer;
1020 
1021   print_partial_compiled_pattern(buffer, buffer + bufp->used);
1022 }
1023 #endif
1024 
1025 static char*
calculate_must_string(start,end)1026 calculate_must_string(start, end)
1027      char *start;
1028      char *end;
1029 {
1030   int mcnt;
1031   int max = 0;
1032   char *p = start;
1033   char *pend = end;
1034   char *must = 0;
1035 
1036   if (start == NULL) return 0;
1037 
1038   /* Loop over pattern commands.  */
1039   while (p < pend) {
1040     switch ((enum regexpcode)*p++) {
1041     case unused:
1042       break;
1043 
1044     case exactn:
1045       mcnt = *p;
1046       if (mcnt > max) {
1047 	must = p;
1048 	max = mcnt;
1049       }
1050       p += mcnt+1;
1051       break;
1052 
1053     case start_memory:
1054     case stop_memory:
1055       p += 2;
1056       break;
1057 
1058     case duplicate:
1059       p++;
1060       break;
1061 
1062     case casefold_on:
1063     case casefold_off:
1064       return 0;		/* should not check must_string */
1065 
1066     case pop_and_fail:
1067     case anychar:
1068     case anychar_repeat:
1069     case begline:
1070     case endline:
1071     case wordbound:
1072     case notwordbound:
1073     case wordbeg:
1074     case wordend:
1075     case wordchar:
1076     case notwordchar:
1077     case begbuf:
1078     case endbuf:
1079     case endbuf2:
1080     case begpos:
1081     case push_dummy_failure:
1082     case start_paren:
1083     case stop_paren:
1084     case option_set:
1085       break;
1086 
1087     case charset:
1088     case charset_not:
1089       mcnt = *p++;
1090       p += mcnt;
1091       mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
1092       while (mcnt--) {
1093 	p += 4;
1094       }
1095       break;
1096 
1097     case on_failure_jump:
1098       EXTRACT_NUMBER_AND_INCR(mcnt, p);
1099       if (mcnt > 0) p += mcnt;
1100       if ((enum regexpcode)p[-3] == jump) {
1101        p -= 2;
1102 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
1103 	if (mcnt > 0) p += mcnt;
1104       }
1105       break;
1106 
1107     case dummy_failure_jump:
1108     case succeed_n:
1109     case try_next:
1110     case jump:
1111       EXTRACT_NUMBER_AND_INCR(mcnt, p);
1112       if (mcnt > 0) p += mcnt;
1113       break;
1114 
1115     case start_nowidth:
1116     case stop_nowidth:
1117     case stop_backtrack:
1118     case finalize_jump:
1119     case maybe_finalize_jump:
1120     case finalize_push:
1121       p += 2;
1122       break;
1123 
1124     case jump_n:
1125     case set_number_at:
1126     case finalize_push_n:
1127       p += 4;
1128       break;
1129 
1130     default:
1131       break;
1132     }
1133   }
1134   return must;
1135 }
1136 
1137 static unsigned int
read_backslash(c)1138 read_backslash(c)
1139      int c;
1140 {
1141   switch (c) {
1142   case 'n':
1143     return '\n';
1144 
1145   case 't':
1146     return '\t';
1147 
1148   case 'r':
1149     return '\r';
1150 
1151   case 'f':
1152     return '\f';
1153 
1154   case 'v':
1155     return '\v';
1156 
1157   case 'a':
1158     return '\007';
1159 
1160   case 'b':
1161     return '\010';
1162 
1163   case 'e':
1164     return '\033';
1165   }
1166   return c;
1167 }
1168 
1169 static unsigned int
read_special(p,pend,pp)1170 read_special(p, pend, pp)
1171      const char *p, *pend, **pp;
1172 {
1173   int c;
1174 
1175   PATFETCH_RAW(c);
1176   switch (c) {
1177   case 'M':
1178     PATFETCH_RAW(c);
1179     if (c != '-') return -1;
1180     PATFETCH_RAW(c);
1181     *pp = p;
1182     if (c == '\\') {
1183       return read_special(p, pend, pp) | 0x80;
1184     }
1185     else if (c == -1) return ~0;
1186     else {
1187       return ((c & 0xff) | 0x80);
1188     }
1189 
1190   case 'C':
1191     PATFETCH_RAW(c);
1192     if (c != '-') return ~0;
1193   case 'c':
1194     PATFETCH_RAW(c);
1195     *pp = p;
1196     if (c == '\\') {
1197       c = read_special(p, pend, pp);
1198     }
1199     else if (c == '?') return 0177;
1200     else if (c == -1) return ~0;
1201     return c & 0x9f;
1202   default:
1203     return read_backslash(c);
1204   }
1205 
1206  end_of_pattern:
1207   return ~0;
1208 }
1209 
1210 /* nmz_re_compile_pattern takes a regular-expression string
1211    and converts it into a buffer full of byte commands for matching.
1212 
1213    PATTERN   is the address of the pattern string
1214    SIZE      is the length of it.
1215    BUFP	    is a  struct re_pattern_buffer *  which points to the info
1216 	     on where to store the byte commands.
1217 	     This structure contains a  char *  which points to the
1218 	     actual space, which should have been obtained with malloc.
1219 	     nmz_re_compile_pattern may use realloc to grow the buffer space.
1220 
1221    The number of bytes of commands can be found out by looking in
1222    the `struct re_pattern_buffer' that bufp pointed to, after
1223    nmz_re_compile_pattern returns. */
1224 
1225 char *
nmz_re_compile_pattern(pattern,size,bufp)1226 nmz_re_compile_pattern(pattern, size, bufp)
1227      const char *pattern;
1228      int size;
1229      struct re_pattern_buffer *bufp;
1230 {
1231   register char *b = bufp->buffer;
1232   register const char *p = pattern;
1233   const char *nextp;
1234   const char *pend = pattern + size;
1235   register unsigned int c, c1 = 0;
1236   const char *p0;
1237   int numlen;
1238 #define ERROR_MSG_MAX_SIZE 200
1239   static char error_msg[ERROR_MSG_MAX_SIZE+1];
1240 
1241   /* Address of the count-byte of the most recently inserted `exactn'
1242      command.  This makes it possible to tell whether a new exact-match
1243      character can be added to that command or requires a new `exactn'
1244      command.  */
1245 
1246   char *pending_exact = 0;
1247 
1248   /* Address of the place where a forward-jump should go to the end of
1249      the containing expression.  Each alternative of an `or', except the
1250      last, ends with a forward-jump of this sort.  */
1251 
1252   char *fixup_alt_jump = 0;
1253 
1254   /* Address of start of the most recently finished expression.
1255      This tells postfix * where to find the start of its operand.  */
1256 
1257   char *laststart = 0;
1258 
1259   /* In processing a repeat, 1 means zero matches is allowed.  */
1260 
1261   char zero_times_ok;
1262 
1263   /* In processing a repeat, 1 means many matches is allowed.  */
1264 
1265   char many_times_ok;
1266 
1267   /* In processing a repeat, 1 means non-greedy matches.  */
1268 
1269   char greedy;
1270 
1271   /* Address of beginning of regexp, or inside of last (.  */
1272 
1273   char *begalt = b;
1274 
1275   /* Place in the uncompiled pattern (i.e., the {) to
1276      which to go back if the interval is invalid.  */
1277   const char *beg_interval;
1278 
1279   /* In processing an interval, at least this many matches must be made.  */
1280   int lower_bound;
1281 
1282   /* In processing an interval, at most this many matches can be made.  */
1283   int upper_bound;
1284 
1285   /* Stack of information saved by ( and restored by ).
1286      Five stack elements are pushed by each (:
1287      First, the value of b.
1288      Second, the value of fixup_alt_jump.
1289      Third, the value of begalt.
1290      Fourth, the value of regnum.
1291      Fifth, the type of the paren. */
1292 
1293   int stacka[40];
1294   int *stackb = stacka;
1295   int *stackp = stackb;
1296   int *stacke = stackb + 40;
1297 #if 0
1298   int *stackt;
1299 #endif
1300 
1301   /* Counts ('s as they are encountered.  Remembered for the matching ),
1302      where it becomes the register number to put in the stop_memory
1303      command.  */
1304 
1305   int regnum = 1;
1306 
1307   int range = 0;
1308   int had_mbchar = 0;
1309   int had_num_literal = 0;
1310   int had_char_class = 0;
1311 
1312   int options = bufp->options;
1313 
1314   bufp->fastmap_accurate = 0;
1315   bufp->must = 0;
1316   bufp->must_skip = 0;
1317   bufp->stclass = 0;
1318 
1319   /* Initialize the syntax table.  */
1320   init_syntax_once();
1321 
1322   if (bufp->allocated == 0) {
1323     bufp->allocated = INIT_BUF_SIZE;
1324     if (bufp->buffer)
1325       /* EXTEND_BUFFER loses when bufp->allocated is 0.  */
1326       bufp->buffer = (char*)nmz_xrealloc(bufp->buffer, INIT_BUF_SIZE);
1327     else
1328       /* Caller did not allocate a buffer.  Do it for them.  */
1329       bufp->buffer = (char*)nmz_xmalloc(INIT_BUF_SIZE);
1330     if (!bufp->buffer) goto memory_exhausted;
1331     begalt = b = bufp->buffer;
1332   }
1333 
1334   while (p != pend) {
1335     PATFETCH(c);
1336 
1337     switch (c) {
1338     case '$':
1339       if (bufp->options & RE_OPTION_SINGLELINE) {
1340 	BUFPUSH(endbuf);
1341       }
1342       else {
1343 	p0 = p;
1344 	/* When testing what follows the $,
1345 	   look past the \-constructs that don't consume anything.  */
1346 
1347 	while (p0 != pend) {
1348 	  if (*p0 == '\\' && p0 + 1 != pend
1349 	      && (p0[1] == 'b' || p0[1] == 'B'))
1350 	    p0 += 2;
1351 	  else
1352 	    break;
1353 	}
1354 	BUFPUSH(endline);
1355       }
1356       break;
1357 
1358     case '^':
1359       if (bufp->options & RE_OPTION_SINGLELINE)
1360 	BUFPUSH(begbuf);
1361       else
1362 	BUFPUSH(begline);
1363       break;
1364 
1365     case '+':
1366     case '?':
1367     case '*':
1368       /* If there is no previous pattern, char not special. */
1369       if (!laststart) {
1370 	snprintf(error_msg, ERROR_MSG_MAX_SIZE,
1371 		 "invalid regular expression; there's no previous pattern, to which '%c' would define cardinality at %d",
1372 		 c, p-pattern);
1373 	FREE_AND_RETURN(stackb, error_msg);
1374       }
1375       /* If there is a sequence of repetition chars,
1376 	 collapse it down to just one.  */
1377       zero_times_ok = c != '+';
1378       many_times_ok = c != '?';
1379       greedy = 1;
1380       if (p != pend) {
1381 	PATFETCH(c);
1382 	switch (c) {
1383 	case '?':
1384 	  greedy = 0;
1385 	  break;
1386 	case '*':
1387 	case '+':
1388 	  goto nested_meta;
1389 	default:
1390 	  PATUNFETCH;
1391 	  break;
1392 	}
1393       }
1394 
1395     repeat:
1396       /* Star, etc. applied to an empty pattern is equivalent
1397 	 to an empty pattern.  */
1398       if (!laststart)
1399 	break;
1400 
1401       if (greedy && many_times_ok && *laststart == anychar && b - laststart <= 2) {
1402 	if (b[-1] == stop_paren)
1403 	  b--;
1404 	if (zero_times_ok)
1405 	  *laststart = anychar_repeat;
1406 	else {
1407 	  BUFPUSH(anychar_repeat);
1408 	}
1409 	break;
1410       }
1411       /* Now we know whether or not zero matches is allowed
1412 	 and also whether or not two or more matches is allowed.  */
1413       if (many_times_ok) {
1414 	/* If more than one repetition is allowed, put in at the
1415 	   end a backward relative jump from b to before the next
1416 	   jump we're going to put in below (which jumps from
1417 	   laststart to after this jump).  */
1418 	GET_BUFFER_SPACE(3);
1419 	store_jump(b,greedy?maybe_finalize_jump:finalize_push,laststart-3);
1420 	b += 3;  	/* Because store_jump put stuff here.  */
1421       }
1422 
1423       /* On failure, jump from laststart to next pattern, which will be the
1424 	 end of the buffer after this jump is inserted.  */
1425       GET_BUFFER_SPACE(3);
1426       insert_jump(on_failure_jump, laststart, b + 3, b);
1427       b += 3;
1428 
1429       if (zero_times_ok) {
1430 	if (greedy == 0) {
1431 	  GET_BUFFER_SPACE(3);
1432 	  insert_jump(try_next, laststart, b + 3, b);
1433 	  b += 3;
1434 	}
1435       }
1436       else {
1437 	/* At least one repetition is required, so insert a
1438 	   `dummy_failure_jump' before the initial
1439 	   `on_failure_jump' instruction of the loop. This
1440 	   effects a skip over that instruction the first time
1441 	   we hit that loop.  */
1442 	GET_BUFFER_SPACE(3);
1443 	insert_jump(dummy_failure_jump, laststart, laststart + 6, b);
1444 	b += 3;
1445       }
1446       break;
1447 
1448     case '.':
1449       laststart = b;
1450       BUFPUSH(anychar);
1451       break;
1452 
1453     case '[':
1454       if (p == pend)
1455 	FREE_AND_RETURN(stackb, "invalid regular expression; '[' can't be the last character ie. can't start range at the end of pattern");
1456       while ((b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH)
1457 	     > bufp->allocated)
1458 	EXTEND_BUFFER;
1459 
1460       laststart = b;
1461       if (*p == '^') {
1462 	BUFPUSH(charset_not);
1463 	p++;
1464       }
1465       else
1466 	BUFPUSH(charset);
1467       p0 = p;
1468 
1469       BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
1470       /* Clear the whole map */
1471       memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
1472 
1473       had_mbchar = 0;
1474       had_num_literal = 0;
1475       had_char_class = 0;
1476 
1477       /* Read in characters and ranges, setting map bits.  */
1478       for (;;) {
1479 	int size;
1480 	unsigned last = (unsigned)-1;
1481 
1482 	if ((size = EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH]))
1483 	    || current_mbctype) {
1484 	  /* Ensure the space is enough to hold another interval
1485 	     of multi-byte chars in charset(_not)?.  */
1486 	  size = (1 << BYTEWIDTH) / BYTEWIDTH + 2 + size*8 + 8;
1487 	  while (b + size + 1 > bufp->buffer + bufp->allocated)
1488 	    EXTEND_BUFFER;
1489 	}
1490       range_retry:
1491 	if (range && had_char_class) {
1492 	  FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as an end value of range");
1493 	}
1494 	PATFETCH(c);
1495 
1496 	if (c == ']') {
1497 	  if (p == p0 + 1) {
1498 	    if (p == pend)
1499 	      FREE_AND_RETURN(stackb, "invalid regular expression; empty character class");
1500 	  }
1501 	  else
1502 	    /* Stop if this isn't merely a ] inside a bracket
1503 	       expression, but rather the end of a bracket
1504 	       expression.  */
1505 	    break;
1506 	}
1507 	/* Look ahead to see if it's a range when the last thing
1508 	   was a character class.  */
1509 	if (had_char_class && c == '-' && *p != ']')
1510 	  FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as a start value of range");
1511 	if (ismbchar(c)) {
1512 	  PATFETCH_MBC(c);
1513 	  had_mbchar++;
1514 	}
1515 	had_char_class = 0;
1516 
1517 	/* \ escapes characters when inside [...].  */
1518 	if (c == '\\') {
1519 	  PATFETCH_RAW(c);
1520 	  switch (c) {
1521 	  case 'w':
1522 	    for (c = 0; c < (1 << BYTEWIDTH); c++) {
1523 	      if (SYNTAX(c) == Sword ||
1524 		  (!current_mbctype && SYNTAX(c) == Sword2))
1525 		SET_LIST_BIT(c);
1526 	    }
1527 	    if (current_mbctype) {
1528 	      set_list_bits(0x80, 0xffffffff, b);
1529 	    }
1530 	    had_char_class = 1;
1531 	    last = -1;
1532 	    continue;
1533 
1534 	  case 'W':
1535 	    for (c = 0; c < (1 << BYTEWIDTH); c++) {
1536 	      if (SYNTAX(c) != Sword &&
1537 		  ((current_mbctype && !re_mbctab[c]) ||
1538 		  (!current_mbctype && SYNTAX(c) != Sword2)))
1539 		SET_LIST_BIT(c);
1540 	    }
1541 	    had_char_class = 1;
1542 	    last = -1;
1543 	    continue;
1544 
1545 	  case 's':
1546 	    for (c = 0; c < 256; c++)
1547 	      if (ISSPACE(c))
1548 		SET_LIST_BIT(c);
1549 	    had_char_class = 1;
1550 	    last = -1;
1551 	    continue;
1552 
1553 	  case 'S':
1554 	    for (c = 0; c < 256; c++)
1555 	      if (!ISSPACE(c))
1556 		SET_LIST_BIT(c);
1557 	    if (current_mbctype)
1558 	      set_list_bits(0x80, 0xffffffff, b);
1559 	    had_char_class = 1;
1560 	    last = -1;
1561 	    continue;
1562 
1563 	  case 'd':
1564 	    for (c = '0'; c <= '9'; c++)
1565 	      SET_LIST_BIT(c);
1566 	    had_char_class = 1;
1567 	    last = -1;
1568 	    continue;
1569 
1570 	  case 'D':
1571 	    for (c = 0; c < 256; c++)
1572 	      if (!ISDIGIT(c))
1573 		SET_LIST_BIT(c);
1574 	    if (current_mbctype)
1575 	      set_list_bits(0x80, 0xffffffff, b);
1576 	    had_char_class = 1;
1577 	    last = -1;
1578 	    continue;
1579 
1580 	  case 'x':
1581 	    c = nmz_scan_hex(p, 2, &numlen);
1582 	    p += numlen;
1583 	    had_num_literal = 1;
1584 	    break;
1585 
1586 	  case '0': case '1': case '2': case '3': case '4':
1587 	  case '5': case '6': case '7': case '8': case '9':
1588 	    PATUNFETCH;
1589 	    c = nmz_scan_oct(p, 3, &numlen);
1590 	    p += numlen;
1591 	    had_num_literal = 1;
1592 	    break;
1593 
1594 	  case 'M':
1595 	  case 'C':
1596 	  case 'c':
1597 	    {
1598 	      char *pp;
1599 
1600 	      --p;
1601 	      c = read_special(p, pend, &pp);
1602 	      if (c > 255) goto invalid_escape;
1603 	      p = pp;
1604 	      had_num_literal = 1;
1605 	    }
1606 	    break;
1607 
1608 	  default:
1609 	    c = read_backslash(c);
1610 	    if (ismbchar(c)) {
1611 	      PATFETCH_MBC(c);
1612 	      had_mbchar++;
1613 	    }
1614 	    break;
1615 	  }
1616 	}
1617 
1618 	/* Get a range.  */
1619 	if (range) {
1620 	  if (last > c)
1621 	    goto invalid_pattern;
1622 
1623 	  range = 0;
1624 	  if (had_mbchar == 0) {
1625 	    for (;last<=c;last++)
1626 	      SET_LIST_BIT(last);
1627 	  }
1628 	  else if (had_mbchar == 2) {
1629 	    set_list_bits(last, c, b);
1630 	  }
1631 	  else {
1632 	    /* restriction: range between sbc and mbc */
1633 	    goto invalid_pattern;
1634 	  }
1635 	}
1636 	else if (p[0] == '-' && p[1] != ']') {
1637 	  last = c;
1638 	  PATFETCH(c1);
1639 	  range = 1;
1640 	  goto range_retry;
1641 	}
1642 	else if (c == '[' && *p == ':') {
1643 	  /* Leave room for the null.  */
1644 	  char str[CHAR_CLASS_MAX_LENGTH + 1];
1645 
1646 	  PATFETCH_RAW(c);
1647 	  c1 = 0;
1648 
1649 	  /* If pattern is `[[:'.  */
1650 	  if (p == pend)
1651 	    FREE_AND_RETURN(stackb, "invalid regular expression; re can't end '[[:'");
1652 
1653 	  for (;;) {
1654 	    PATFETCH (c);
1655 	    if (c == ':' || c == ']' || p == pend
1656 		|| c1 == CHAR_CLASS_MAX_LENGTH)
1657 	      break;
1658 	    str[c1++] = c;
1659 	  }
1660 	  str[c1] = '\0';
1661 
1662 	  /* If isn't a word bracketed by `[:' and:`]':
1663 	     undo the ending character, the letters, and leave
1664 	     the leading `:' and `[' (but set bits for them).  */
1665 	  if (c == ':' && *p == ']') {
1666 	    int ch;
1667 	    char is_alnum = STREQ(str, "alnum");
1668 	    char is_alpha = STREQ(str, "alpha");
1669 	    char is_blank = STREQ(str, "blank");
1670 	    char is_cntrl = STREQ(str, "cntrl");
1671 	    char is_digit = STREQ(str, "digit");
1672 	    char is_graph = STREQ(str, "graph");
1673 	    char is_lower = STREQ(str, "lower");
1674 	    char is_print = STREQ(str, "print");
1675 	    char is_punct = STREQ(str, "punct");
1676 	    char is_space = STREQ(str, "space");
1677 	    char is_upper = STREQ(str, "upper");
1678 	    char is_xdigit = STREQ(str, "xdigit");
1679 
1680 	    if (!IS_CHAR_CLASS(str)){
1681 	      snprintf(error_msg, ERROR_MSG_MAX_SIZE,
1682 		       "invalid regular expression; [:%s:] is not a character class", str);
1683 	      FREE_AND_RETURN(stackb, error_msg);
1684 	    }
1685 
1686 	    /* Throw away the ] at the end of the character class.  */
1687 	    PATFETCH(c);
1688 
1689 	    if (p == pend)
1690 	      FREE_AND_RETURN(stackb, "invalid regular expression; range doesn't have ending ']' after a character class");
1691 
1692 	    for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
1693 	      if (   (is_alnum  && ISALNUM(ch))
1694 		  || (is_alpha  && ISALPHA(ch))
1695 		  || (is_blank  && ISBLANK(ch))
1696 		  || (is_cntrl  && ISCNTRL(ch))
1697 		  || (is_digit  && ISDIGIT(ch))
1698 		  || (is_graph  && ISGRAPH(ch))
1699 		  || (is_lower  && ISLOWER(ch))
1700 		  || (is_print  && ISPRINT(ch))
1701 		  || (is_punct  && ISPUNCT(ch))
1702 		  || (is_space  && ISSPACE(ch))
1703 		  || (is_upper  && ISUPPER(ch))
1704 		  || (is_xdigit && ISXDIGIT(ch)))
1705 		SET_LIST_BIT(ch);
1706 	    }
1707 	    had_char_class = 1;
1708 	  }
1709 	  else {
1710 	    c1++;
1711 	    while (c1--)
1712 	      PATUNFETCH;
1713 	    SET_LIST_BIT(TRANSLATE_P()?translate['[']:'[');
1714 	    SET_LIST_BIT(TRANSLATE_P()?translate[':']:':');
1715 	    had_char_class = 0;
1716 	    last = ':';
1717 	  }
1718 	}
1719 	else if (had_mbchar == 0 && (!current_mbctype || !had_num_literal)) {
1720 	  SET_LIST_BIT(c);
1721 	  had_num_literal = 0;
1722 	}
1723 	else
1724 	  set_list_bits(c, c, b);
1725 	had_mbchar = 0;
1726       }
1727 
1728       /* Discard any character set/class bitmap bytes that are all
1729 	 0 at the end of the map. Decrement the map-length byte too.  */
1730       while ((int)b[-1] > 0 && b[b[-1] - 1] == 0)
1731 	b[-1]--;
1732       if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
1733 	memmove(&b[(int)b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
1734 		2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
1735       b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(int)b[-1]])*8;
1736       break;
1737 
1738     case '(':
1739       {
1740 	int old_options = options;
1741 	int push_option = 0;
1742 	int casefold = 0;
1743 
1744       PATFETCH(c);
1745       if (c == '?') {
1746 	int negative = 0;
1747 
1748 	PATFETCH_RAW(c);
1749 	switch (c) {
1750 	case 'x': case 'p': case 'm': case 'i': case '-':
1751 	  for (;;) {
1752 	    switch (c) {
1753 	    case '-':
1754 	      negative = 1;
1755 	      break;
1756 
1757 	    case ':':
1758 	    case ')':
1759 	      break;
1760 
1761 	    case 'x':
1762 	      if (negative)
1763 		options &= ~RE_OPTION_EXTENDED;
1764 	      else
1765 		options |= RE_OPTION_EXTENDED;
1766 	      break;
1767 
1768 	    case 'p':
1769 	      if (negative) {
1770 		if ((options&RE_OPTION_POSIXLINE) == RE_OPTION_POSIXLINE) {
1771 		  options &= ~RE_OPTION_POSIXLINE;
1772 		}
1773 	      }
1774 	      else if ((options&RE_OPTION_POSIXLINE) != RE_OPTION_POSIXLINE) {
1775 		options |= RE_OPTION_POSIXLINE;
1776 	      }
1777 	      push_option = 1;
1778 	      break;
1779 
1780 	    case 'm':
1781 	      if (negative) {
1782 		if (options&RE_OPTION_MULTILINE) {
1783 		  options &= ~RE_OPTION_MULTILINE;
1784 		}
1785 	      }
1786 	      else if (!(options&RE_OPTION_MULTILINE)) {
1787 		options |= RE_OPTION_MULTILINE;
1788 	      }
1789 	      push_option = 1;
1790 	      break;
1791 
1792 	    case 'i':
1793 	      if (negative) {
1794 		if (options&RE_OPTION_IGNORECASE) {
1795 		  options &= ~RE_OPTION_IGNORECASE;
1796 		}
1797 	      }
1798 	      else if (!(options&RE_OPTION_IGNORECASE)) {
1799 		options |= RE_OPTION_IGNORECASE;
1800 	      }
1801 		casefold = 1;
1802 	      break;
1803 
1804 	    default:
1805 	      FREE_AND_RETURN(stackb, "undefined (?...) inline option");
1806 	    }
1807 	    if (c == ')') {
1808 	      c = '#';	/* read whole in-line options */
1809 	      break;
1810 	    }
1811 	    if (c == ':') break;
1812 	    PATFETCH_RAW(c);
1813 	  }
1814 	  break;
1815 
1816 	case '#':
1817 	  for (;;) {
1818 	    PATFETCH(c);
1819 	    if (c == ')') break;
1820 	  }
1821 	  c = '#';
1822 	  break;
1823 
1824 	case ':':
1825 	case '=':
1826 	case '!':
1827 	case '>':
1828 	  break;
1829 
1830 	default:
1831 	  FREE_AND_RETURN(stackb, "undefined (?...) sequence");
1832 	}
1833 	}
1834 	else {
1835 	  PATUNFETCH;
1836 	  c = '(';
1837 	}
1838 	if (c == '#') {
1839 	if (push_option) {
1840 	  BUFPUSH(option_set);
1841 	  BUFPUSH(options);
1842 	}
1843 	  if (casefold) {
1844 	    if (options & RE_OPTION_IGNORECASE)
1845 	      BUFPUSH(casefold_on);
1846 	    else
1847 	      BUFPUSH(casefold_off);
1848       }
1849 	  break;
1850       }
1851       if (stackp+8 >= stacke) {
1852 	DOUBLE_STACK(int);
1853       }
1854 
1855       /* Laststart should point to the start_memory that we are about
1856 	 to push (unless the pattern has RE_NREGS or more ('s).  */
1857       /* obsolete: now RE_NREGS is just a default register size. */
1858       *stackp++ = b - bufp->buffer;
1859       *stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
1860       *stackp++ = begalt - bufp->buffer;
1861       switch (c) {
1862       case '(':
1863 	BUFPUSH(start_memory);
1864 	BUFPUSH(regnum);
1865 	*stackp++ = regnum++;
1866 	*stackp++ = b - bufp->buffer;
1867 	BUFPUSH(0);
1868 	/* too many ()'s to fit in a byte. (max 254) */
1869 	if (regnum >= RE_REG_MAX) goto too_big;
1870 	break;
1871 
1872       case '=':
1873       case '!':
1874       case '>':
1875 	BUFPUSH(start_nowidth);
1876 	*stackp++ = b - bufp->buffer;
1877 	BUFPUSH(0);	/* temporary value */
1878 	BUFPUSH(0);
1879 	if (c != '!') break;
1880 
1881 	BUFPUSH(on_failure_jump);
1882 	*stackp++ = b - bufp->buffer;
1883 	BUFPUSH(0);	/* temporary value */
1884 	BUFPUSH(0);
1885 	break;
1886 
1887       case ':':
1888 	BUFPUSH(start_paren);
1889 	pending_exact = 0;
1890       default:
1891 	break;
1892       }
1893 	if (push_option) {
1894 	  BUFPUSH(option_set);
1895 	  BUFPUSH(options);
1896 	}
1897 	if (casefold) {
1898 	  if (options & RE_OPTION_IGNORECASE)
1899 	    BUFPUSH(casefold_on);
1900 	  else
1901 	    BUFPUSH(casefold_off);
1902 	}
1903       *stackp++ = c;
1904       *stackp++ = old_options;
1905       fixup_alt_jump = 0;
1906       laststart = 0;
1907       begalt = b;
1908       }
1909       break;
1910 
1911     case ')':
1912       if (stackp == stackb)
1913 	FREE_AND_RETURN(stackb, "unmatched )");
1914 
1915       pending_exact = 0;
1916       if (fixup_alt_jump) {
1917 	/* Push a dummy failure point at the end of the
1918 	   alternative for a possible future
1919 	   `finalize_jump' to pop.  See comments at
1920 	   `push_dummy_failure' in `nmz_re_match'.  */
1921 	BUFPUSH(push_dummy_failure);
1922 
1923 	/* We allocated space for this jump when we assigned
1924 	   to `fixup_alt_jump', in the `handle_alt' case below.  */
1925 	store_jump(fixup_alt_jump, jump, b);
1926       }
1927       if (options != stackp[-1]) {
1928 	if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
1929 	  BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
1930 	}
1931 	if ((options ^ stackp[-1]) != RE_OPTION_IGNORECASE) {
1932 	  BUFPUSH(option_set);
1933 	  BUFPUSH(stackp[-1]);
1934 	}
1935       }
1936       p0 = b;
1937       options = *--stackp;
1938       switch (c = *--stackp) {
1939       case '(':
1940 	{
1941 	  char *loc = bufp->buffer + *--stackp;
1942 	  *loc = regnum - stackp[-1];
1943 	  BUFPUSH(stop_memory);
1944 	  BUFPUSH(stackp[-1]);
1945 	  BUFPUSH(regnum - stackp[-1]);
1946 	  stackp--;
1947 	}
1948 	break;
1949 
1950       case '!':
1951 	BUFPUSH(pop_and_fail);
1952 	/* back patch */
1953 	STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
1954 	stackp--;
1955 	/* fall through */
1956       case '=':
1957 	BUFPUSH(stop_nowidth);
1958 	/* tell stack-pos place to start_nowidth */
1959 	STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
1960 	BUFPUSH(0);	/* space to hold stack pos */
1961 	BUFPUSH(0);
1962 	stackp--;
1963 	break;
1964 
1965       case '>':
1966 	BUFPUSH(stop_backtrack);
1967 	/* tell stack-pos place to start_nowidth */
1968 	STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
1969 	BUFPUSH(0);	/* space to hold stack pos */
1970 	BUFPUSH(0);
1971 	stackp--;
1972 	break;
1973 
1974       case ':':
1975 	BUFPUSH(stop_paren);
1976 	break;
1977 
1978       default:
1979 	break;
1980       }
1981       begalt = *--stackp + bufp->buffer;
1982       stackp--;
1983       fixup_alt_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
1984       laststart = *--stackp + bufp->buffer;
1985       if (c == '!' || c == '=') laststart = b;
1986       break;
1987 
1988     case '|':
1989       /* Insert before the previous alternative a jump which
1990 	 jumps to this alternative if the former fails.  */
1991       GET_BUFFER_SPACE(3);
1992       insert_jump(on_failure_jump, begalt, b + 6, b);
1993       pending_exact = 0;
1994       b += 3;
1995       /* The alternative before this one has a jump after it
1996 	 which gets executed if it gets matched.  Adjust that
1997 	 jump so it will jump to this alternative's analogous
1998 	 jump (put in below, which in turn will jump to the next
1999 	 (if any) alternative's such jump, etc.).  The last such
2000 	 jump jumps to the correct final destination.  A picture:
2001 	 _____ _____
2002 	 |   | |   |
2003 	 |   v |   v
2004 	 a | b   | c
2005 
2006 	 If we are at `b', then fixup_alt_jump right now points to a
2007 	 three-byte space after `a'.  We'll put in the jump, set
2008 	 fixup_alt_jump to right after `b', and leave behind three
2009 	 bytes which we'll fill in when we get to after `c'.  */
2010 
2011       if (fixup_alt_jump)
2012 	store_jump(fixup_alt_jump, jump_past_alt, b);
2013 
2014       /* Mark and leave space for a jump after this alternative,
2015 	 to be filled in later either by next alternative or
2016 	 when know we're at the end of a series of alternatives.  */
2017       fixup_alt_jump = b;
2018       GET_BUFFER_SPACE(3);
2019       b += 3;
2020 
2021       laststart = 0;
2022       begalt = b;
2023       break;
2024 
2025     case '{':
2026       /* If there is no previous pattern, this is an invalid pattern.  */
2027       if (!laststart) {
2028 	snprintf(error_msg, ERROR_MSG_MAX_SIZE,
2029 		 "invalid regular expression; there's no previous pattern, to which '{' would define cardinality at %d",
2030 		 p-pattern);
2031 	FREE_AND_RETURN(stackb, error_msg);
2032       }
2033       if( p == pend)
2034 	FREE_AND_RETURN(stackb, "invalid regular expression; '{' can't be last character" );
2035 
2036       beg_interval = p - 1;
2037 
2038       lower_bound = -1;			/* So can see if are set.  */
2039       upper_bound = -1;
2040       GET_UNSIGNED_NUMBER(lower_bound);
2041       if (c == ',') {
2042 	GET_UNSIGNED_NUMBER(upper_bound);
2043       }
2044       else
2045 	/* Interval such as `{1}' => match exactly once. */
2046 	upper_bound = lower_bound;
2047 
2048       if (lower_bound < 0 || c != '}')
2049 	goto unfetch_interval;
2050 
2051       if (lower_bound >= RE_DUP_MAX || upper_bound >= RE_DUP_MAX)
2052 	FREE_AND_RETURN(stackb, "too big quantifier in {,}");
2053       if (upper_bound < 0) upper_bound = RE_DUP_MAX;
2054       if (lower_bound > upper_bound)
2055 	FREE_AND_RETURN(stackb, "can't do {n,m} with n > m");
2056 
2057       beg_interval = 0;
2058       pending_exact = 0;
2059 
2060       greedy = 1;
2061       if (p != pend) {
2062 	PATFETCH(c);
2063 	if (c == '?') greedy = 0;
2064 	else PATUNFETCH;
2065       }
2066 
2067       if (lower_bound == 0) {
2068 	zero_times_ok = 1;
2069 	if (upper_bound == RE_DUP_MAX) {
2070 	  many_times_ok = 1;
2071 	  goto repeat;
2072 	}
2073 	if (upper_bound == 1) {
2074 	  many_times_ok = 0;
2075 	  goto repeat;
2076 	}
2077       }
2078       if (lower_bound == 1) {
2079 	if (upper_bound == 1) {
2080 	  /* No need to repeat */
2081 	  break;
2082 	}
2083 	if (upper_bound == RE_DUP_MAX) {
2084 	  many_times_ok = 1;
2085 	  zero_times_ok = 0;
2086 	  goto repeat;
2087 	}
2088       }
2089 
2090       /* If upper_bound is zero, don't want to succeed at all;
2091 	 jump from laststart to b + 3, which will be the end of
2092 	 the buffer after this jump is inserted.  */
2093 
2094       if (upper_bound == 0) {
2095 	GET_BUFFER_SPACE(3);
2096 	insert_jump(jump, laststart, b + 3, b);
2097 	b += 3;
2098 	break;
2099       }
2100 
2101       /* If lower_bound == upper_bound, repeat count can be removed */
2102       if (lower_bound == upper_bound) {
2103 	int mcnt;
2104 	int skip_stop_paren = 0;
2105 
2106 	if (b[-1] == stop_paren) {
2107 	  skip_stop_paren = 1;
2108 	  b--;
2109 	}
2110 
2111 	if (*laststart == exactn && laststart[1]+2 == b - laststart
2112 	    && laststart[1]*lower_bound < 256) {
2113 	  mcnt = laststart[1];
2114 	  GET_BUFFER_SPACE((lower_bound-1)*mcnt);
2115 	  laststart[1] = lower_bound*mcnt;
2116 	  while (--lower_bound) {
2117 	    memcpy(b, laststart+2, mcnt);
2118 	    b += mcnt;
2119 	  }
2120 	  if (skip_stop_paren) BUFPUSH(stop_paren);
2121 	  break;
2122 	}
2123 
2124 	if (lower_bound < 5 && b - laststart < 10) {
2125 	  /* 5 and 10 are the magic numbers */
2126 
2127 	  mcnt = b - laststart;
2128 	  GET_BUFFER_SPACE((lower_bound-1)*mcnt);
2129 	  while (--lower_bound) {
2130 	    memcpy(b, laststart, mcnt);
2131 	    b += mcnt;
2132 	  }
2133 	  if (skip_stop_paren) BUFPUSH(stop_paren);
2134 	  break;
2135 	}
2136 	if (skip_stop_paren) b++; /* push back stop_paren */
2137       }
2138 
2139       /* Otherwise, we have a nontrivial interval.  When
2140 	 we're all done, the pattern will look like:
2141 	 set_number_at <jump count> <upper bound>
2142 	 set_number_at <succeed_n count> <lower bound>
2143 	 succeed_n <after jump addr> <succed_n count>
2144 	 <body of loop>
2145 	 jump_n <succeed_n addr> <jump count>
2146 	 (The upper bound and `jump_n' are omitted if
2147 	 `upper_bound' is 1, though.)  */
2148       { /* If the upper bound is > 1, we need to insert
2149 	   more at the end of the loop.  */
2150 	unsigned nbytes = upper_bound == 1 ? 10 : 20;
2151 
2152 	GET_BUFFER_SPACE(nbytes);
2153 	/* Initialize lower bound of the `succeed_n', even
2154 	   though it will be set during matching by its
2155 	   attendant `set_number_at' (inserted next),
2156 	   because `nmz_re_compile_fastmap' needs to know.
2157 	   Jump to the `jump_n' we might insert below.  */
2158 	insert_jump_n(succeed_n, laststart, b + (nbytes/2),
2159 		      b, lower_bound);
2160 	b += 5; 	/* Just increment for the succeed_n here.  */
2161 
2162 	/* Code to initialize the lower bound.  Insert
2163 	   before the `succeed_n'.  The `5' is the last two
2164 	   bytes of this `set_number_at', plus 3 bytes of
2165 	   the following `succeed_n'.  */
2166 	insert_op_2(set_number_at, laststart, b, 5, lower_bound);
2167 	b += 5;
2168 
2169 	if (upper_bound > 1) {
2170 	  /* More than one repetition is allowed, so
2171 	     append a backward jump to the `succeed_n'
2172 	     that starts this interval.
2173 
2174 	     When we've reached this during matching,
2175 	     we'll have matched the interval once, so
2176 	     jump back only `upper_bound - 1' times.  */
2177 	  GET_BUFFER_SPACE(5);
2178 	  store_jump_n(b, greedy?jump_n:finalize_push_n, laststart + 5,
2179 		       upper_bound - 1);
2180 	  b += 5;
2181 
2182 	  /* The location we want to set is the second
2183 	     parameter of the `jump_n'; that is `b-2' as
2184 	     an absolute address.  `laststart' will be
2185 	     the `set_number_at' we're about to insert;
2186 	     `laststart+3' the number to set, the source
2187 	     for the relative address.  But we are
2188 	     inserting into the middle of the pattern --
2189 	     so everything is getting moved up by 5.
2190 	     Conclusion: (b - 2) - (laststart + 3) + 5,
2191 	     i.e., b - laststart.
2192 
2193 	     We insert this at the beginning of the loop
2194 	     so that if we fail during matching, we'll
2195 	     reinitialize the bounds.  */
2196 	  insert_op_2(set_number_at, laststart, b, b - laststart,
2197 		      upper_bound - 1);
2198 	  b += 5;
2199 	}
2200       }
2201       break;
2202 
2203     unfetch_interval:
2204       /* If an invalid interval, match the characters as literals.  */
2205       p = beg_interval;
2206       beg_interval = 0;
2207 
2208       /* normal_char and normal_backslash need `c'.  */
2209       PATFETCH(c);
2210       goto normal_char;
2211 
2212     case '\\':
2213       if (p == pend)
2214 	FREE_AND_RETURN(stackb, "invalid regular expression; '\\' can't be last character");
2215       /* Do not translate the character after the \, so that we can
2216 	 distinguish, e.g., \B from \b, even if we normally would
2217 	 translate, e.g., B to b.  */
2218       PATFETCH_RAW(c);
2219       switch (c) {
2220       case 's':
2221       case 'S':
2222       case 'd':
2223       case 'D':
2224 	while (b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH
2225 	       > bufp->allocated)
2226 	  EXTEND_BUFFER;
2227 
2228 	laststart = b;
2229 	if (c == 's' || c == 'd') {
2230 	  BUFPUSH(charset);
2231 	}
2232 	else {
2233 	  BUFPUSH(charset_not);
2234 	}
2235 
2236 	BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
2237 	memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
2238 	if (c == 's' || c == 'S') {
2239 	  SET_LIST_BIT(' ');
2240 	  SET_LIST_BIT('\t');
2241 	  SET_LIST_BIT('\n');
2242 	  SET_LIST_BIT('\r');
2243 	  SET_LIST_BIT('\f');
2244 	}
2245 	else {
2246 	  char cc;
2247 
2248 	  for (cc = '0'; cc <= '9'; cc++) {
2249 	    SET_LIST_BIT(cc);
2250 	  }
2251 	}
2252 
2253 	while ((int)b[-1] > 0 && b[b[-1] - 1] == 0)
2254 	  b[-1]--;
2255 	if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
2256 	  memmove(&b[(int)b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
2257 		  2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
2258 	b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(int)b[-1]])*8;
2259 	break;
2260 
2261       case 'w':
2262 	laststart = b;
2263 	BUFPUSH(wordchar);
2264 	break;
2265 
2266       case 'W':
2267 	laststart = b;
2268 	BUFPUSH(notwordchar);
2269 	break;
2270 
2271 #ifndef RUBY
2272       case '<':
2273 	BUFPUSH(wordbeg);
2274 	break;
2275 
2276       case '>':
2277 	BUFPUSH(wordend);
2278 	break;
2279 #endif
2280 
2281       case 'b':
2282 	BUFPUSH(wordbound);
2283 	break;
2284 
2285       case 'B':
2286 	BUFPUSH(notwordbound);
2287 	break;
2288 
2289       case 'A':
2290 	BUFPUSH(begbuf);
2291 	break;
2292 
2293       case 'Z':
2294 	if ((bufp->options & RE_OPTION_SINGLELINE) == 0) {
2295 	  BUFPUSH(endbuf2);
2296 	  break;
2297 	}
2298 	/* fall through */
2299       case 'z':
2300 	BUFPUSH(endbuf);
2301 	break;
2302 
2303       case 'G':
2304 	BUFPUSH(begpos);
2305 	break;
2306 
2307 	/* hex */
2308       case 'x':
2309 	had_mbchar = 0;
2310 	c = nmz_scan_hex(p, 2, &numlen);
2311 	p += numlen;
2312 	had_num_literal = 1;
2313 	goto numeric_char;
2314 
2315 	/* octal */
2316       case '0':
2317 	had_mbchar = 0;
2318 	c = nmz_scan_oct(p, 3, &numlen);
2319 	p += numlen;
2320 	had_num_literal = 1;
2321 	goto numeric_char;
2322 
2323 	/* back-ref or octal */
2324       case '1': case '2': case '3':
2325       case '4': case '5': case '6':
2326       case '7': case '8': case '9':
2327 	  PATUNFETCH;
2328 	p0 = p;
2329 
2330 	  had_mbchar = 0;
2331 	  c1 = 0;
2332 	  GET_UNSIGNED_NUMBER(c1);
2333 	  if (!ISDIGIT(c)) PATUNFETCH;
2334 
2335 	if (9 < c1 && c1 >= regnum) {
2336 	    /* need to get octal */
2337 	  c = nmz_scan_oct(p0, 3, &numlen) & 0xff;
2338 	  p = p0 + numlen;
2339 	    c1 = 0;
2340 	    had_num_literal = 1;
2341 	    goto numeric_char;
2342 	  }
2343 
2344 	laststart = b;
2345 	BUFPUSH(duplicate);
2346 	BUFPUSH(c1);
2347 	break;
2348 
2349       case 'M':
2350       case 'C':
2351       case 'c':
2352 	p0 = --p;
2353 	c = read_special(p, pend, &p0);
2354 	if (c > 255) goto invalid_escape;
2355 	p = p0;
2356 	had_num_literal = 1;
2357 	goto numeric_char;
2358 
2359       default:
2360 	c = read_backslash(c);
2361 	goto normal_char;
2362       }
2363       break;
2364 
2365     case '#':
2366       if (options & RE_OPTION_EXTENDED) {
2367 	while (p != pend) {
2368 	  PATFETCH(c);
2369 	  if (c == '\n') break;
2370 	}
2371 	break;
2372       }
2373       goto normal_char;
2374 
2375     case ' ':
2376     case '\t':
2377     case '\f':
2378     case '\r':
2379     case '\n':
2380       if (options & RE_OPTION_EXTENDED)
2381 	break;
2382 
2383     default:
2384     normal_char:		/* Expects the character in `c'.  */
2385       had_mbchar = 0;
2386       if (ismbchar(c)) {
2387 	had_mbchar = 1;
2388 	c1 = p - pattern;
2389       }
2390     numeric_char:
2391       nextp = p + mbclen(c) - 1;
2392       if (!pending_exact || pending_exact + *pending_exact + 1 != b
2393 	  || *pending_exact >= (c1 ? 0176 : 0177)
2394 	  || *nextp == '+' || *nextp == '?'
2395 	  || *nextp == '*' || *nextp == '^'
2396 	  || *nextp == '{') {
2397 	laststart = b;
2398 	BUFPUSH(exactn);
2399 	pending_exact = b;
2400 	BUFPUSH(0);
2401       }
2402       if (had_num_literal || c == 0xff) {
2403 	BUFPUSH(0xff);
2404 	(*pending_exact)++;
2405 	had_num_literal = 0;
2406       }
2407       BUFPUSH(c);
2408       (*pending_exact)++;
2409       if (had_mbchar) {
2410 	int len = mbclen(c) - 1;
2411 	while (len--) {
2412 	  PATFETCH_RAW(c);
2413 	  BUFPUSH(c);
2414 	  (*pending_exact)++;
2415 	}
2416       }
2417     }
2418   }
2419 
2420   if (fixup_alt_jump)
2421     store_jump(fixup_alt_jump, jump, b);
2422 
2423   if (stackp != stackb)
2424     FREE_AND_RETURN(stackb, "unmatched (");
2425 
2426   /* set optimize flags */
2427   laststart = bufp->buffer;
2428   if (laststart != b) {
2429     if (*laststart == start_memory) laststart += 3;
2430     if (*laststart == dummy_failure_jump) laststart += 3;
2431     else if (*laststart == try_next) laststart += 3;
2432     if (*laststart == anychar_repeat) {
2433       bufp->options |= RE_OPTIMIZE_ANCHOR;
2434     }
2435     else if (*laststart == on_failure_jump) {
2436       int mcnt;
2437 
2438       laststart++;
2439       EXTRACT_NUMBER_AND_INCR(mcnt, laststart);
2440       if (*laststart == charset || *laststart == charset_not) {
2441 	p0 = laststart;
2442 	mcnt = *++p0;
2443 	p0 += mcnt+1;
2444 	mcnt = EXTRACT_UNSIGNED_AND_INCR(p0);
2445 	p0 += 8*mcnt;
2446 	if (*p0 == maybe_finalize_jump) {
2447 	  bufp->stclass = laststart;
2448 	}
2449       }
2450     }
2451   }
2452 
2453   bufp->used = b - bufp->buffer;
2454   bufp->re_nsub = regnum;
2455   laststart = bufp->buffer;
2456   if (laststart != b) {
2457     if (*laststart == start_memory) laststart += 3;
2458     if (*laststart == exactn) {
2459       bufp->options |= RE_OPTIMIZE_EXACTN;
2460       bufp->must = laststart+1;
2461     }
2462   }
2463   if (!bufp->must) {
2464     bufp->must = calculate_must_string(bufp->buffer, b);
2465   }
2466   if (current_mbctype == MBCTYPE_SJIS) bufp->options |= RE_OPTIMIZE_NO_BM;
2467   else if (bufp->must) {
2468     int i;
2469     int len = (unsigned char)bufp->must[0];
2470 
2471     for (i=1; i<len; i++) {
2472       if ((unsigned char)bufp->must[i] == 0xff ||
2473 	  (current_mbctype && ismbchar(bufp->must[i]))) {
2474 	bufp->options |= RE_OPTIMIZE_NO_BM;
2475 	break;
2476       }
2477     }
2478     if (!(bufp->options & RE_OPTIMIZE_NO_BM)) {
2479       bufp->must_skip = (int *) nmz_xmalloc((1 << BYTEWIDTH)*sizeof(int));
2480       bm_init_skip(bufp->must_skip, (unsigned char*)bufp->must+1,
2481 		   (unsigned char)bufp->must[0],
2482 		   (unsigned char*)(MAY_TRANSLATE()?translate:0));
2483     }
2484   }
2485 
2486   bufp->regstart = TMALLOC(regnum, unsigned char*);
2487   bufp->regend = TMALLOC(regnum, unsigned char*);
2488   bufp->old_regstart = TMALLOC(regnum, unsigned char*);
2489   bufp->old_regend = TMALLOC(regnum, unsigned char*);
2490   bufp->reg_info = TMALLOC(regnum, register_info_type);
2491   bufp->best_regstart = TMALLOC(regnum, unsigned char*);
2492   bufp->best_regend = TMALLOC(regnum, unsigned char*);
2493   FREE_AND_RETURN(stackb, 0);
2494 
2495  invalid_pattern:
2496   FREE_AND_RETURN(stackb, "invalid regular expression");
2497 
2498  end_of_pattern:
2499   FREE_AND_RETURN(stackb, "premature end of regular expression");
2500 
2501  too_big:
2502   FREE_AND_RETURN(stackb, "regular expression too big");
2503 
2504  memory_exhausted:
2505   FREE_AND_RETURN(stackb, "memory exhausted");
2506 
2507  nested_meta:
2508   FREE_AND_RETURN(stackb, "nested *?+ in regexp");
2509 
2510  invalid_escape:
2511   FREE_AND_RETURN(stackb, "Invalid escape character syntax");
2512 }
2513 
2514 void
nmz_re_free_pattern(bufp)2515 nmz_re_free_pattern(bufp)
2516      struct re_pattern_buffer *bufp;
2517 {
2518   xfree(bufp->buffer);
2519   xfree(bufp->fastmap);
2520   if (bufp->must_skip) xfree(bufp->must_skip);
2521 
2522   xfree(bufp->regstart);
2523   xfree(bufp->regend);
2524   xfree(bufp->old_regstart);
2525   xfree(bufp->old_regend);
2526   xfree(bufp->best_regstart);
2527   xfree(bufp->best_regend);
2528   xfree(bufp->reg_info);
2529   xfree(bufp);
2530 }
2531 
2532 /* Store a jump of the form <OPCODE> <relative address>.
2533    Store in the location FROM a jump operation to jump to relative
2534    address FROM - TO.  OPCODE is the opcode to store.  */
2535 
2536 static void
store_jump(from,opcode,to)2537 store_jump(from, opcode, to)
2538      char *from, *to;
2539      int opcode;
2540 {
2541   from[0] = (char)opcode;
2542   STORE_NUMBER(from + 1, to - (from + 3));
2543 }
2544 
2545 
2546 /* Open up space before char FROM, and insert there a jump to TO.
2547    CURRENT_END gives the end of the storage not in use, so we know
2548    how much data to copy up. OP is the opcode of the jump to insert.
2549 
2550    If you call this function, you must zero out pending_exact.  */
2551 
2552 static void
insert_jump(op,from,to,current_end)2553 insert_jump(op, from, to, current_end)
2554      int op;
2555      char *from, *to, *current_end;
2556 {
2557   register char *pfrom = current_end;		/* Copy from here...  */
2558   register char *pto = current_end + 3;		/* ...to here.  */
2559 
2560   while (pfrom != from)
2561     *--pto = *--pfrom;
2562   store_jump(from, op, to);
2563 }
2564 
2565 
2566 /* Store a jump of the form <opcode> <relative address> <n> .
2567 
2568    Store in the location FROM a jump operation to jump to relative
2569    address FROM - TO.  OPCODE is the opcode to store, N is a number the
2570    jump uses, say, to decide how many times to jump.
2571 
2572    If you call this function, you must zero out pending_exact.  */
2573 
2574 static void
store_jump_n(from,opcode,to,n)2575 store_jump_n(from, opcode, to, n)
2576      char *from, *to;
2577      int opcode;
2578      unsigned n;
2579 {
2580   from[0] = (char)opcode;
2581   STORE_NUMBER(from + 1, to - (from + 3));
2582   STORE_NUMBER(from + 3, n);
2583 }
2584 
2585 
2586 /* Similar to insert_jump, but handles a jump which needs an extra
2587    number to handle minimum and maximum cases.  Open up space at
2588    location FROM, and insert there a jump to TO.  CURRENT_END gives the
2589    end of the storage in use, so we know how much data to copy up. OP is
2590    the opcode of the jump to insert.
2591 
2592    If you call this function, you must zero out pending_exact.  */
2593 
2594 static void
insert_jump_n(op,from,to,current_end,n)2595 insert_jump_n(op, from, to, current_end, n)
2596      int op;
2597      char *from, *to, *current_end;
2598      unsigned n;
2599 {
2600   register char *pfrom = current_end;		/* Copy from here...  */
2601   register char *pto = current_end + 5;		/* ...to here.  */
2602 
2603   while (pfrom != from)
2604     *--pto = *--pfrom;
2605   store_jump_n(from, op, to, n);
2606 }
2607 
2608 
2609 /* Open up space at location THERE, and insert operation OP.
2610    CURRENT_END gives the end of the storage in use, so
2611    we know how much data to copy up.
2612 
2613    If you call this function, you must zero out pending_exact.  */
2614 
2615 #if 0
2616 static void
2617 insert_op(op, there, current_end)
2618      int op;
2619      char *there, *current_end;
2620 {
2621   register char *pfrom = current_end;		/* Copy from here...  */
2622   register char *pto = current_end + 1;		/* ...to here.  */
2623 
2624   while (pfrom != there)
2625     *--pto = *--pfrom;
2626 
2627   there[0] = (char)op;
2628 }
2629 #endif
2630 
2631 
2632 /* Open up space at location THERE, and insert operation OP followed by
2633    NUM_1 and NUM_2.  CURRENT_END gives the end of the storage in use, so
2634    we know how much data to copy up.
2635 
2636    If you call this function, you must zero out pending_exact.  */
2637 
2638 static void
insert_op_2(op,there,current_end,num_1,num_2)2639 insert_op_2(op, there, current_end, num_1, num_2)
2640      int op;
2641      char *there, *current_end;
2642      int num_1, num_2;
2643 {
2644   register char *pfrom = current_end;		/* Copy from here...  */
2645   register char *pto = current_end + 5;		/* ...to here.  */
2646 
2647   while (pfrom != there)
2648     *--pto = *--pfrom;
2649 
2650   there[0] = (char)op;
2651   STORE_NUMBER(there + 1, num_1);
2652   STORE_NUMBER(there + 3, num_2);
2653 }
2654 
2655 
2656 #define trans_eq(c1, c2, translate) (translate?(translate[c1]==translate[c2]):((c1)==(c2)))
2657 static int
slow_match(little,lend,big,bend,translate)2658 slow_match(little, lend, big, bend, translate)
2659      unsigned char *little, *lend;
2660      unsigned char *big, *bend;
2661      unsigned char *translate;
2662 {
2663   int c;
2664 
2665   while (little < lend && big < bend) {
2666     c = *little++;
2667     if (c == 0xff)
2668       c = *little++;
2669     if (!trans_eq(*big++, c, translate)) break;
2670   }
2671   if (little == lend) return 1;
2672   return 0;
2673 }
2674 
2675 static int
slow_search(little,llen,big,blen,translate)2676 slow_search(little, llen, big, blen, translate)
2677      unsigned char *little;
2678      int llen;
2679      unsigned char *big;
2680      int blen;
2681      char *translate;
2682 {
2683   unsigned char *bsave = big;
2684   unsigned char *bend = big + blen;
2685   register int c;
2686   int fescape = 0;
2687 
2688   c = *little;
2689   if (c == 0xff) {
2690     c = little[1];
2691     fescape = 1;
2692   }
2693   else if (translate && !ismbchar(c)) {
2694     c = translate[c];
2695   }
2696 
2697   while (big < bend) {
2698     /* look for first character */
2699     if (fescape) {
2700       while (big < bend) {
2701 	if (*big == c) break;
2702 	big++;
2703       }
2704     }
2705     else if (translate && !ismbchar(c)) {
2706       while (big < bend) {
2707 	if (ismbchar(*big)) big+=mbclen(*big)-1;
2708 	else if (translate[*big] == c) break;
2709 	big++;
2710       }
2711     }
2712     else {
2713       while (big < bend) {
2714 	if (*big == c) break;
2715 	if (ismbchar(*big)) big+=mbclen(*big)-1;
2716 	big++;
2717       }
2718     }
2719 
2720     if (slow_match(little, little+llen, big, bend, translate))
2721       return big - bsave;
2722 
2723     big+=mbclen(*big);
2724   }
2725   return -1;
2726 }
2727 
2728 static void
bm_init_skip(skip,pat,m,translate)2729 bm_init_skip(skip, pat, m, translate)
2730      int *skip;
2731      unsigned char *pat;
2732      int m;
2733      const unsigned char *translate;
2734 {
2735   int j, c;
2736 
2737   for (c=0; c<256; c++) {
2738     skip[c] = m;
2739   }
2740   if (translate) {
2741     for (j=0; j<m-1; j++) {
2742       skip[translate[pat[j]]] = m-1-j;
2743     }
2744   }
2745   else {
2746     for (j=0; j<m-1; j++) {
2747       skip[pat[j]] = m-1-j;
2748     }
2749   }
2750 }
2751 
2752 static int
bm_search(little,llen,big,blen,skip,translate)2753 bm_search(little, llen, big, blen, skip, translate)
2754      unsigned char *little;
2755      int llen;
2756      unsigned char *big;
2757      int blen;
2758      int *skip;
2759      unsigned char *translate;
2760 {
2761   int i, j, k;
2762 
2763   i = llen-1;
2764   if (translate) {
2765     while (i < blen) {
2766       k = i;
2767       j = llen-1;
2768       while (j >= 0 && translate[big[k]] == translate[little[j]]) {
2769 	k--;
2770 	j--;
2771       }
2772       if (j < 0) return k+1;
2773 
2774       i += skip[translate[big[i]]];
2775     }
2776     return -1;
2777   }
2778   while (i < blen) {
2779     k = i;
2780     j = llen-1;
2781     while (j >= 0 && big[k] == little[j]) {
2782       k--;
2783       j--;
2784     }
2785     if (j < 0) return k+1;
2786 
2787     i += skip[big[i]];
2788   }
2789   return -1;
2790 }
2791 
2792 /* Given a pattern, compute a fastmap from it.  The fastmap records
2793    which of the (1 << BYTEWIDTH) possible characters can start a string
2794    that matches the pattern.  This fastmap is used by nmz_re_search to skip
2795    quickly over totally implausible text.
2796 
2797    The caller must supply the address of a (1 << BYTEWIDTH)-byte data
2798    area as bufp->fastmap.
2799    The other components of bufp describe the pattern to be used.  */
2800 void
nmz_re_compile_fastmap(bufp)2801 nmz_re_compile_fastmap(bufp)
2802      struct re_pattern_buffer *bufp;
2803 {
2804   unsigned char *pattern = (unsigned char*)bufp->buffer;
2805   int size = bufp->used;
2806   register char *fastmap = bufp->fastmap;
2807   register unsigned char *p = pattern;
2808   register unsigned char *pend = pattern + size;
2809   register int j, k;
2810   unsigned is_a_succeed_n;
2811 
2812 
2813   unsigned char *stacka[NFAILURES];
2814   unsigned char **stackb = stacka;
2815   unsigned char **stackp = stackb;
2816   unsigned char **stacke = stackb + NFAILURES;
2817   int options = bufp->options;
2818 
2819   memset(fastmap, 0, (1 << BYTEWIDTH));
2820   bufp->fastmap_accurate = 1;
2821   bufp->can_be_null = 0;
2822 
2823   while (p) {
2824     is_a_succeed_n = 0;
2825     if (p == pend) {
2826       bufp->can_be_null = 1;
2827       break;
2828     }
2829 #ifdef SWITCH_ENUM_BUG
2830     switch ((int)((enum regexpcode)*p++))
2831 #else
2832     switch ((enum regexpcode)*p++)
2833 #endif
2834       {
2835       case exactn:
2836 	if (p[1] == 0xff) {
2837 	  if (TRANSLATE_P())
2838 	    fastmap[translate[p[2]]] = 2;
2839 	  else
2840 	    fastmap[p[2]] = 2;
2841 	  bufp->options |= RE_OPTIMIZE_BMATCH;
2842 	}
2843 	else if (TRANSLATE_P())
2844 	  fastmap[translate[p[1]]] = 1;
2845 	else
2846 	  fastmap[p[1]] = 1;
2847 	break;
2848 
2849       case begline:
2850       case begbuf:
2851       case endbuf:
2852       case endbuf2:
2853       case wordbound:
2854       case notwordbound:
2855       case wordbeg:
2856       case wordend:
2857       case pop_and_fail:
2858       case push_dummy_failure:
2859       case start_paren:
2860       case stop_paren:
2861 	continue;
2862 
2863       case casefold_on:
2864 	bufp->options |= RE_MAY_IGNORECASE;
2865       case casefold_off:
2866 	options ^= RE_OPTION_IGNORECASE;
2867 	continue;
2868 
2869       case option_set:
2870 	options = *p++;
2871 	continue;
2872 
2873       case endline:
2874 	if (TRANSLATE_P())
2875 	  fastmap[translate['\n']] = 1;
2876 	else
2877 	  fastmap['\n'] = 1;
2878 	if ((options & RE_OPTION_SINGLELINE) == 0 && bufp->can_be_null == 0)
2879 	  bufp->can_be_null = 2;
2880 	break;
2881 
2882       case jump_n:
2883       case finalize_jump:
2884       case maybe_finalize_jump:
2885       case jump:
2886       case jump_past_alt:
2887       case dummy_failure_jump:
2888       case finalize_push:
2889       case finalize_push_n:
2890 	EXTRACT_NUMBER_AND_INCR(j, p);
2891 	p += j;
2892 	if (j > 0)
2893 	  continue;
2894 	/* Jump backward reached implies we just went through
2895 	   the body of a loop and matched nothing.
2896 	   Opcode jumped to should be an on_failure_jump.
2897 	   Just treat it like an ordinary jump.
2898 	   For a * loop, it has pushed its failure point already;
2899 	   If so, discard that as redundant.  */
2900 
2901 	if ((enum regexpcode)*p != on_failure_jump
2902 	    && (enum regexpcode)*p != try_next
2903 	    && (enum regexpcode)*p != succeed_n)
2904 	  continue;
2905 	p++;
2906 	EXTRACT_NUMBER_AND_INCR(j, p);
2907 	p += j;
2908 	if (stackp != stackb && *stackp == p)
2909 	  stackp--;		/* pop */
2910 	continue;
2911 
2912       case try_next:
2913       case start_nowidth:
2914       case stop_nowidth:
2915       case stop_backtrack:
2916 	p += 2;
2917 	continue;
2918 
2919       case succeed_n:
2920 	is_a_succeed_n = 1;
2921 	/* Get to the number of times to succeed.  */
2922 	EXTRACT_NUMBER(k, p + 2);
2923 	/* Increment p past the n for when k != 0.  */
2924 	if (k != 0) {
2925 	  p += 4;
2926 	  continue;
2927 	}
2928 	/* fall through */
2929 
2930       case on_failure_jump:
2931       EXTRACT_NUMBER_AND_INCR(j, p);
2932       if (p + j < pend) {
2933 	if (stackp == stacke) {
2934 	  EXPAND_FAIL_STACK();
2935 	}
2936 	*++stackp = p + j;	/* push */
2937       }
2938       else {
2939 	bufp->can_be_null = 1;
2940       }
2941       if (is_a_succeed_n)
2942 	EXTRACT_NUMBER_AND_INCR(k, p);	/* Skip the n.  */
2943       continue;
2944 
2945       case set_number_at:
2946 	p += 4;
2947 	continue;
2948 
2949       case start_memory:
2950       case stop_memory:
2951 	p += 2;
2952 	continue;
2953 
2954       case duplicate:
2955 	bufp->can_be_null = 1;
2956 	fastmap['\n'] = 1;
2957       case anychar_repeat:
2958       case anychar:
2959 	for (j = 0; j < (1 << BYTEWIDTH); j++) {
2960 	  if (j != '\n' || (options & RE_OPTION_MULTILINE))
2961 	    fastmap[j] = 1;
2962 	}
2963 	if (bufp->can_be_null) {
2964 	  FREE_AND_RETURN_VOID(stackb);
2965 	}
2966 	/* Don't return; check the alternative paths
2967 	   so we can set can_be_null if appropriate.  */
2968 	if ((enum regexpcode)p[-1] == anychar_repeat) {
2969 	    continue;
2970 	}
2971 	break;
2972 
2973       case wordchar:
2974 	for (j = 0; j < 0x80; j++) {
2975 	  if (SYNTAX(j) == Sword)
2976 	    fastmap[j] = 1;
2977 	}
2978 	switch (current_mbctype) {
2979 	case MBCTYPE_ASCII:
2980 	  for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
2981 	    if (SYNTAX(j) == Sword2)
2982 	      fastmap[j] = 1;
2983 	  }
2984 	  break;
2985 	case MBCTYPE_EUC:
2986 	case MBCTYPE_SJIS:
2987 	case MBCTYPE_UTF8:
2988 	  for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
2989 	    if (re_mbctab[j])
2990 	      fastmap[j] = 1;
2991 	  }
2992 	  break;
2993 	}
2994 	break;
2995 
2996       case notwordchar:
2997 	for (j = 0; j < 0x80; j++)
2998 	  if (SYNTAX(j) != Sword)
2999 	    fastmap[j] = 1;
3000 	switch (current_mbctype) {
3001 	case MBCTYPE_ASCII:
3002 	  for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
3003 	    if (SYNTAX(j) != Sword2)
3004 	      fastmap[j] = 1;
3005 	  }
3006 	  break;
3007 	case MBCTYPE_EUC:
3008 	case MBCTYPE_SJIS:
3009 	case MBCTYPE_UTF8:
3010 	  for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
3011 	    if (!re_mbctab[j])
3012 	      fastmap[j] = 1;
3013 	  }
3014 	  break;
3015 	}
3016 	break;
3017 
3018       case charset:
3019 	/* NOTE: Charset for single-byte chars never contain
3020 	   multi-byte char.  See set_list_bits().  */
3021 	for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3022 	  if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) {
3023 	    int tmp = TRANSLATE_P()?translate[j]:j;
3024 	    fastmap[tmp] = 1;
3025 	  }
3026 	{
3027 	  unsigned short size;
3028 	  unsigned long c, beg, end;
3029 
3030 	  p += p[-1] + 2;
3031 	  size = EXTRACT_UNSIGNED(&p[-2]);
3032 	  for (j = 0; j < (int)size; j++) {
3033 	    c = EXTRACT_MBC(&p[j*8]);
3034 	    beg = WC2MBC1ST(c);
3035 	    c = EXTRACT_MBC(&p[j*8+4]);
3036 	    end = WC2MBC1ST(c);
3037 	    /* set bits for 1st bytes of multi-byte chars.  */
3038 	    while (beg <= end) {
3039 	      /* NOTE: Charset for multi-byte chars might contain
3040 		 single-byte chars.  We must reject them. */
3041 	      if (c < 0x100) {
3042 		fastmap[beg] = 2;
3043 		bufp->options |= RE_OPTIMIZE_BMATCH;
3044 	      }
3045 	      else if (ismbchar(beg))
3046 		fastmap[beg] = 1;
3047 	      beg++;
3048 	    }
3049 	  }
3050 	}
3051 	break;
3052 
3053       case charset_not:
3054 	/* S: set of all single-byte chars.
3055 	   M: set of all first bytes that can start multi-byte chars.
3056 	   s: any set of single-byte chars.
3057 	   m: any set of first bytes that can start multi-byte chars.
3058 
3059 	   We assume S+M = U.
3060 	   ___      _   _
3061 	   s+m = (S*s+M*m).  */
3062 	/* Chars beyond end of map must be allowed */
3063 	/* NOTE: Charset_not for single-byte chars might contain
3064 	   multi-byte chars.  See set_list_bits(). */
3065 	for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
3066 	  if (!ismbchar(j))
3067 	    fastmap[j] = 1;
3068 
3069 	for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
3070 	  if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) {
3071 	    if (!ismbchar(j))
3072 	      fastmap[j] = 1;
3073 	  }
3074 	{
3075 	  unsigned short size;
3076 	  unsigned long c, beg;
3077 	  int num_literal = 0;
3078 
3079 	  p += p[-1] + 2;
3080 	  size = EXTRACT_UNSIGNED(&p[-2]);
3081 	  if (size == 0) {
3082 	    for (j = 0x80; j < (1 << BYTEWIDTH); j++)
3083 	      if (ismbchar(j))
3084 		fastmap[j] = 1;
3085 	    break;
3086 	  }
3087 	  for (j = 0,c = 0;j < (int)size; j++) {
3088 	    unsigned int cc = EXTRACT_MBC(&p[j*8]);
3089 	    beg = WC2MBC1ST(cc);
3090 	    while (c <= beg) {
3091 	      if (ismbchar(c))
3092 		fastmap[c] = 1;
3093 	      c++;
3094 	    }
3095 
3096 	    cc = EXTRACT_MBC(&p[j*8+4]);
3097 	    if (cc < 0xff) {
3098 	      num_literal = 1;
3099 	      while (c <= cc) {
3100 		if (ismbchar(c))
3101 		  fastmap[c] = 1;
3102 		c++;
3103 	      }
3104 	    }
3105 	    c = WC2MBC1ST(cc);
3106 	  }
3107 
3108 	  for (j = c; j < (1 << BYTEWIDTH); j++) {
3109 	    if (num_literal)
3110 	      fastmap[j] = 1;
3111 	    if (ismbchar(j))
3112 	      fastmap[j] = 1;
3113 	  }
3114 	}
3115 	break;
3116 
3117       case begpos:
3118       case unused:	/* pacify gcc -Wall */
3119 	break;
3120       }
3121 
3122     /* Get here means we have successfully found the possible starting
3123        characters of one path of the pattern.  We need not follow this
3124        path any farther.  Instead, look at the next alternative
3125        remembered in the stack.  */
3126     if (stackp != stackb)
3127       p = *stackp--;		/* pop */
3128     else
3129       break;
3130   }
3131   FREE_AND_RETURN_VOID(stackb);
3132 }
3133 
3134 /* adjust startpos value to the position between characters. */
3135 int
nmz_re_adjust_startpos(bufp,string,size,startpos,range)3136 nmz_re_adjust_startpos(bufp, string, size, startpos, range)
3137      struct re_pattern_buffer *bufp;
3138      const char *string;
3139      int size, startpos, range;
3140 {
3141   /* Update the fastmap now if not correct already.  */
3142   if (!bufp->fastmap_accurate) {
3143     nmz_re_compile_fastmap(bufp);
3144   }
3145 
3146   /* Adjust startpos for mbc string */
3147   if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) {
3148     int i = 0;
3149 
3150     if (range > 0) {
3151       while (i<size) {
3152 	i += mbclen(string[i]);
3153 	if (startpos <= i) {
3154 	  startpos = i;
3155 	  break;
3156 	}
3157       }
3158     }
3159     else {
3160       int w;
3161 
3162       while (i<size) {
3163 	w = mbclen(string[i]);
3164 	if (startpos < i + w) {
3165 	  startpos = i;
3166 	  break;
3167 	}
3168 	i += w;
3169       }
3170     }
3171   }
3172   return startpos;
3173 }
3174 
3175 
3176 /* Using the compiled pattern in BUFP->buffer, first tries to match
3177    STRING, starting first at index STARTPOS, then at STARTPOS + 1, and
3178    so on.  RANGE is the number of places to try before giving up.  If
3179    RANGE is negative, it searches backwards, i.e., the starting
3180    positions tried are STARTPOS, STARTPOS - 1, etc.  STRING is of SIZE.
3181    In REGS, return the indices of STRING that matched the entire
3182    BUFP->buffer and its contained subexpressions.
3183 
3184    The value returned is the position in the strings at which the match
3185    was found, or -1 if no match was found, or -2 if error (such as
3186    failure stack overflow).  */
3187 
3188 int
nmz_re_search(bufp,string,size,startpos,range,regs)3189 nmz_re_search(bufp, string, size, startpos, range, regs)
3190      struct re_pattern_buffer *bufp;
3191      const char *string;
3192      int size, startpos, range;
3193      struct re_registers *regs;
3194 {
3195   register char *fastmap = bufp->fastmap;
3196   int val, anchor = 0;
3197 
3198   /* Check for out-of-range starting position.  */
3199   if (startpos < 0  ||  startpos > size)
3200     return -1;
3201 
3202   /* Update the fastmap now if not correct already.  */
3203   if (fastmap && !bufp->fastmap_accurate) {
3204     nmz_re_compile_fastmap(bufp);
3205   }
3206 
3207 
3208   /* If the search isn't to be a backwards one, don't waste time in a
3209      search for a pattern that must be anchored.  */
3210   if (bufp->used > 0) {
3211     switch ((enum regexpcode)bufp->buffer[0]) {
3212     case begbuf:
3213     begbuf_match:
3214       if (range > 0) {
3215 	if (startpos > 0) return -1;
3216 	else {
3217 	  val = nmz_re_match(bufp, string, size, 0, regs);
3218 	  if (val >= 0) return 0;
3219 	  return val;
3220 	}
3221       }
3222       break;
3223 
3224     case begline:
3225       anchor = 1;
3226       break;
3227 
3228     case begpos:
3229       val = nmz_re_match(bufp, string, size, startpos, regs);
3230       if (val >= 0) return startpos;
3231       return val;
3232 
3233     default:
3234       break;
3235     }
3236   }
3237   if (bufp->options & RE_OPTIMIZE_ANCHOR) {
3238     if (bufp->options&RE_OPTION_SINGLELINE) {
3239       goto begbuf_match;
3240     }
3241     anchor = 1;
3242   }
3243 
3244   if (bufp->must) {
3245     int len = ((unsigned char*)bufp->must)[0];
3246     int pos, pbeg, pend;
3247 
3248     pbeg = startpos;
3249     pend = startpos + range;
3250     if (pbeg > pend) {		/* swap pbeg,pend */
3251       pos = pend; pend = pbeg; pbeg = pos;
3252     }
3253     pend = size;
3254     if (bufp->options & RE_OPTIMIZE_NO_BM) {
3255       pos = slow_search(bufp->must+1, len,
3256 			string+pbeg, pend-pbeg,
3257 			MAY_TRANSLATE()?translate:0);
3258     }
3259     else {
3260       pos = bm_search(bufp->must+1, len,
3261 		      string+pbeg, pend-pbeg,
3262 		      bufp->must_skip,
3263 		      MAY_TRANSLATE()?translate:0);
3264     }
3265     if (pos == -1) return -1;
3266     if (range > 0 && (bufp->options & RE_OPTIMIZE_EXACTN)) {
3267       startpos += pos;
3268       range -= pos;
3269       if (range < 0) return -1;
3270     }
3271   }
3272 
3273   for (;;) {
3274     /* If a fastmap is supplied, skip quickly over characters that
3275        cannot possibly be the start of a match.  Note, however, that
3276        if the pattern can possibly match the null string, we must
3277        test it at each starting point so that we take the first null
3278        string we get.  */
3279 
3280     if (fastmap && startpos < size
3281 	&& bufp->can_be_null != 1 && !(anchor && startpos == 0)) {
3282       if (range > 0) {	/* Searching forwards.  */
3283 	register unsigned char *p, c;
3284 	int irange = range;
3285 
3286 	p = (unsigned char*)string+startpos;
3287 
3288 	while (range > 0) {
3289 	  c = *p++;
3290 	  if (ismbchar(c)) {
3291 	    int len;
3292 
3293 	    if (fastmap[c])
3294 	      break;
3295 	    len = mbclen(c) - 1;
3296 	    while (len--) {
3297 	      c = *p++;
3298 	      range--;
3299 	      if (fastmap[c] == 2)
3300 		goto startpos_adjust;
3301 	    }
3302 	  }
3303 	  else {
3304 	    if (fastmap[MAY_TRANSLATE() ? translate[c] : c])
3305 	      break;
3306 	  }
3307 	  range--;
3308 	}
3309       startpos_adjust:
3310 	startpos += irange - range;
3311       }
3312       else {			/* Searching backwards.  */
3313 	register unsigned char c;
3314 
3315 	c = string[startpos];
3316 	c &= 0xff;
3317 	if (MAY_TRANSLATE() ? !fastmap[translate[c]] : !fastmap[c])
3318 	  goto advance;
3319       }
3320     }
3321 
3322     if (startpos > size) return -1;
3323     if ((anchor || !bufp->can_be_null) && range > 0 && size > 0 && startpos == size)
3324       return -1;
3325     val = nmz_re_match(bufp, string, size, startpos, regs);
3326     if (val >= 0) return startpos;
3327     if (val == -2) return -2;
3328 
3329 #ifndef NO_ALLOCA
3330 #ifdef C_ALLOCA
3331     alloca(0);
3332 #endif /* C_ALLOCA */
3333 #endif /* NO_ALLOCA */
3334 
3335     if (range > 0) {
3336       if (anchor && startpos < size &&
3337 	  (startpos < 1 || string[startpos-1] != '\n')) {
3338 	while (range > 0 && string[startpos] != '\n') {
3339 	  range--;
3340 	  startpos++;
3341 	}
3342       }
3343       else if (fastmap && (bufp->stclass)) {
3344 	register unsigned char *p;
3345 	unsigned long c;
3346 	int irange = range;
3347 
3348 	p = (unsigned char*)string+startpos;
3349 	while (range > 0) {
3350 	  c = *p++;
3351 	  if (ismbchar(c) && fastmap[c] != 2) {
3352 	    MBC2WC(c, p);
3353 	  }
3354 	  else if (MAY_TRANSLATE())
3355 	    c = translate[c];
3356 	  if (*bufp->stclass == charset) {
3357 	    if (!is_in_list(c, bufp->stclass+1)) break;
3358 	  }
3359 	  else {
3360 	    if (is_in_list(c, bufp->stclass+1)) break;
3361 	  }
3362 	  range--;
3363 	  if (c > 256) range--;
3364 	}
3365 	startpos += irange - range;
3366       }
3367     }
3368 
3369   advance:
3370     if (!range)
3371       break;
3372     else if (range > 0) {
3373       const char *d = string + startpos;
3374 
3375       if (ismbchar(*d)) {
3376 	int len = mbclen(*d) - 1;
3377 	range-=len, startpos+=len;
3378 	if (!range)
3379 	  break;
3380       }
3381       range--, startpos++;
3382     }
3383     else {
3384       range++, startpos--;
3385       {
3386 	const char *s, *d, *p;
3387 
3388 	s = string; d = string + startpos;
3389 	for (p = d; p-- > s && ismbchar(*p); )
3390 	  /* --p >= s would not work on 80[12]?86.
3391 	     (when the offset of s equals 0 other than huge model.)  */
3392 	  ;
3393 	if (!((d - p) & 1)) {
3394 	  if (!range)
3395 	    break;
3396 	  range++, startpos--;
3397 	}
3398       }
3399     }
3400   }
3401   return -1;
3402 }
3403 
3404 
3405 
3406 
3407 /* The following are used for nmz_re_match, defined below:  */
3408 
3409 /* Accessing macros used in nmz_re_match: */
3410 
3411 #define IS_ACTIVE(R)  ((R).bits.is_active)
3412 #define MATCHED_SOMETHING(R)  ((R).bits.matched_something)
3413 
3414 
3415 /* Macros used by nmz_re_match:  */
3416 
3417 /* I.e., regstart, regend, and reg_info.  */
3418 #define NUM_REG_ITEMS  3
3419 
3420 /* I.e., ptr and count.  */
3421 #define NUM_COUNT_ITEMS 2
3422 
3423 /* Individual items aside from the registers.  */
3424 #define NUM_NONREG_ITEMS 4
3425 
3426 /* We push at most this many things on the stack whenever we
3427    fail.  The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
3428    arguments to the PUSH_FAILURE_POINT macro.  */
3429 #define MAX_NUM_FAILURE_ITEMS   (num_regs * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
3430 
3431 /* We push this many things on the stack whenever we fail.  */
3432 #define NUM_FAILURE_ITEMS  (last_used_reg * NUM_REG_ITEMS + NUM_NONREG_ITEMS + 1)
3433 
3434 /* This pushes counter information for succeed_n and jump_n */
3435 #define PUSH_FAILURE_COUNT(ptr)						\
3436   do {									\
3437     int c;								\
3438     EXTRACT_NUMBER(c, ptr);						\
3439     ENSURE_FAIL_STACK(NUM_COUNT_ITEMS);					\
3440     *stackp++ = (unsigned char*)(long)c;				\
3441     *stackp++ = (ptr);							\
3442     num_failure_counts++;						\
3443   } while (0)
3444 
3445 /* This pushes most of the information about the current state we will want
3446    if we ever fail back to it.  */
3447 
3448 #define PUSH_FAILURE_POINT(pattern_place, string_place)			\
3449   do {									\
3450     long last_used_reg, this_reg;					\
3451 									\
3452     /* Find out how many registers are active or have been matched.	\
3453        (Aside from register zero, which is only set at the end.) */	\
3454     for (last_used_reg = num_regs-1; last_used_reg > 0; last_used_reg--)\
3455       if (!REG_UNSET(regstart[last_used_reg]))				\
3456         break;								\
3457 									\
3458     ENSURE_FAIL_STACK(NUM_FAILURE_ITEMS);				\
3459     *stackp++ = (unsigned char*)(long)num_failure_counts;		\
3460     num_failure_counts = 0;						\
3461 									\
3462     /* Now push the info for each of those registers.  */		\
3463     for (this_reg = 1; this_reg <= last_used_reg; this_reg++) {		\
3464       *stackp++ = regstart[this_reg];					\
3465       *stackp++ = regend[this_reg];					\
3466       *stackp++ = reg_info[this_reg].word;				\
3467     }									\
3468 									\
3469     /* Push how many registers we saved.  */				\
3470     *stackp++ = (unsigned char*)last_used_reg;				\
3471 									\
3472     *stackp++ = pattern_place;                                          \
3473     *stackp++ = string_place;                                           \
3474     *stackp++ = (unsigned char*)(long)options; /* current option status */	\
3475     *stackp++ = (unsigned char*)0; /* non-greedy flag */		\
3476   } while(0)
3477 
3478 #define NON_GREEDY ((unsigned char*)1)
3479 
3480 #define POP_FAILURE_COUNT()						\
3481   do {									\
3482     unsigned char *ptr = *--stackp;					\
3483     int count = (long)*--stackp;					\
3484     STORE_NUMBER(ptr, count);						\
3485   } while (0)
3486 
3487 /* This pops what PUSH_FAILURE_POINT pushes.  */
3488 
3489 #define POP_FAILURE_POINT()						\
3490   do {									\
3491     long temp;								\
3492     stackp -= NUM_NONREG_ITEMS;	/* Remove failure points (and flag). */	\
3493     temp = (long)*--stackp;	/* How many regs pushed.  */	        \
3494     temp *= NUM_REG_ITEMS;	/* How much to take off the stack.  */	\
3495     stackp -= temp; 		/* Remove the register info.  */	\
3496     temp = (long)*--stackp;	/* How many counters pushed.  */	\
3497     while (temp--) {							\
3498       POP_FAILURE_COUNT();      /* Remove the counter info.  */		\
3499     }									\
3500     num_failure_counts = 0;	/* Reset num_failure_counts.  */	\
3501   } while(0)
3502 
3503      /* Registers are set to a sentinel when they haven't yet matched.  */
3504 #define REG_UNSET_VALUE ((unsigned char*)-1)
3505 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
3506 
3507 #define PREFETCH if (d == dend) goto fail
3508 
3509      /* Call this when have matched something; it sets `matched' flags for the
3510    registers corresponding to the subexpressions of which we currently
3511    are inside.  */
3512 #define SET_REGS_MATCHED 						\
3513   do { unsigned this_reg;						\
3514     for (this_reg = 0; this_reg < num_regs; this_reg++) { 		\
3515         if (IS_ACTIVE(reg_info[this_reg]))				\
3516           MATCHED_SOMETHING(reg_info[this_reg]) = 1;			\
3517         else								\
3518           MATCHED_SOMETHING(reg_info[this_reg]) = 0;			\
3519       } 								\
3520   } while(0)
3521 
3522 #define AT_STRINGS_BEG(d)  ((d) == string)
3523 #define AT_STRINGS_END(d)  ((d) == dend)
3524 
3525 #define IS_A_LETTER(d) (SYNTAX(*(d)) == Sword ||			\
3526 			(current_mbctype ?				\
3527 			 (re_mbctab[*(d)] && ((d)+mbclen(*(d)))<=dend):	\
3528 			 SYNTAX(*(d)) == Sword2))
3529 
3530 #define PREV_IS_A_LETTER(d) ((current_mbctype == MBCTYPE_SJIS)?		\
3531 			     IS_A_LETTER((d)-(!AT_STRINGS_BEG((d)-1)&&	\
3532 					      ismbchar((d)[-2])?2:1)):	\
3533                              ((current_mbctype && ((d)[-1] >= 0x80)) ||	\
3534 			      IS_A_LETTER((d)-1)))
3535 
3536 static void
init_regs(regs,num_regs)3537 init_regs(regs, num_regs)
3538      struct re_registers *regs;
3539      unsigned int num_regs;
3540 {
3541   int i;
3542 
3543   regs->num_regs = num_regs;
3544   if (num_regs < RE_NREGS)
3545     num_regs = RE_NREGS;
3546 
3547   if (regs->allocated == 0) {
3548     regs->beg = TMALLOC(num_regs, int);
3549     regs->end = TMALLOC(num_regs, int);
3550     regs->allocated = num_regs;
3551   }
3552   else if (regs->allocated < num_regs) {
3553     TREALLOC(regs->beg, num_regs, int);
3554     TREALLOC(regs->end, num_regs, int);
3555     regs->allocated = num_regs;
3556   }
3557   for (i=0; i<num_regs; i++) {
3558     regs->beg[i] = regs->end[i] = -1;
3559   }
3560 }
3561 
3562 /* Match the pattern described by BUFP against STRING, which is of
3563    SIZE.  Start the match at index POS in STRING.  In REGS, return the
3564    indices of STRING that matched the entire BUFP->buffer and its
3565    contained subexpressions.
3566 
3567    If bufp->fastmap is nonzero, then it had better be up to date.
3568 
3569    The reason that the data to match are specified as two components
3570    which are to be regarded as concatenated is so this function can be
3571    used directly on the contents of an Emacs buffer.
3572 
3573    -1 is returned if there is no match.  -2 is returned if there is an
3574    error (such as match stack overflow).  Otherwise the value is the
3575    length of the substring which was matched.  */
3576 
3577 int
nmz_re_match(bufp,string_arg,size,pos,regs)3578 nmz_re_match(bufp, string_arg, size, pos, regs)
3579      struct re_pattern_buffer *bufp;
3580      const char *string_arg;
3581      int size, pos;
3582      struct re_registers *regs;
3583 {
3584   register unsigned char *p = (unsigned char*)bufp->buffer;
3585   unsigned char *p1;
3586 
3587   /* Pointer to beyond end of buffer.  */
3588   register unsigned char *pend = p + bufp->used;
3589 
3590   unsigned num_regs = bufp->re_nsub;
3591 
3592   unsigned char *string = (unsigned char*)string_arg;
3593 
3594   register unsigned char *d, *dend;
3595   register int mcnt;			/* Multipurpose.  */
3596   int options = bufp->options;
3597 
3598   /* Failure point stack.  Each place that can handle a failure further
3599      down the line pushes a failure point on this stack.  It consists of
3600      restart, regend, and reg_info for all registers corresponding to the
3601      subexpressions we're currently inside, plus the number of such
3602      registers, and, finally, two char *'s.  The first char * is where to
3603      resume scanning the pattern; the second one is where to resume
3604      scanning the strings.  If the latter is zero, the failure point is a
3605      ``dummy''; if a failure happens and the failure point is a dummy, it
3606      gets discarded and the next next one is tried.  */
3607 
3608   unsigned char *stacka[1];
3609   unsigned char **stackb = NULL;
3610   unsigned char **stackp = NULL;
3611   unsigned char **stacke = NULL;
3612 
3613   /* Information on the contents of registers. These are pointers into
3614      the input strings; they record just what was matched (on this
3615      attempt) by a subexpression part of the pattern, that is, the
3616      regnum-th regstart pointer points to where in the pattern we began
3617      matching and the regnum-th regend points to right after where we
3618      stopped matching the regnum-th subexpression.  (The zeroth register
3619      keeps track of what the whole pattern matches.)  */
3620 
3621   unsigned char **regstart = bufp->regstart;
3622   unsigned char **regend = bufp->regend;
3623 
3624   /* If a group that's operated upon by a repetition operator fails to
3625      match anything, then the register for its start will need to be
3626      restored because it will have been set to wherever in the string we
3627      are when we last see its open-group operator.  Similarly for a
3628      register's end.  */
3629   unsigned char **old_regstart = bufp->old_regstart;
3630   unsigned char **old_regend = bufp->old_regend;
3631 
3632   /* The is_active field of reg_info helps us keep track of which (possibly
3633      nested) subexpressions we are currently in. The matched_something
3634      field of reg_info[reg_num] helps us tell whether or not we have
3635      matched any of the pattern so far this time through the reg_num-th
3636      subexpression.  These two fields get reset each time through any
3637      loop their register is in.  */
3638 
3639   register_info_type *reg_info = bufp->reg_info;
3640 
3641   /* The following record the register info as found in the above
3642      variables when we find a match better than any we've seen before.
3643      This happens as we backtrack through the failure points, which in
3644      turn happens only if we have not yet matched the entire string.  */
3645 
3646   unsigned best_regs_set = 0;
3647   unsigned char **best_regstart = bufp->best_regstart;
3648   unsigned char **best_regend = bufp->best_regend;
3649 
3650   int num_failure_counts = 0;
3651 
3652   if (regs) {
3653     init_regs(regs, num_regs);
3654   }
3655 
3656   /* Initialize the stack. */
3657   MALLOC_STACK(unsigned char *, MAX_NUM_FAILURE_ITEMS * NFAILURES);
3658 
3659 #ifdef DEBUG_REGEX
3660   fprintf(stderr, "Entering nmz_re_match(%s)\n", string_arg);
3661 #endif
3662 
3663   /* Initialize subexpression text positions to -1 to mark ones that no
3664      ( or ( and ) or ) has been seen for. Also set all registers to
3665      inactive and mark them as not having matched anything or ever
3666      failed. */
3667   for (mcnt = 0; mcnt < num_regs; mcnt++) {
3668     regstart[mcnt] = regend[mcnt]
3669       = old_regstart[mcnt] = old_regend[mcnt]
3670       = best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE;
3671 #ifdef __CHECKER__
3672     reg_info[mcnt].word = 0;
3673 #endif
3674     IS_ACTIVE (reg_info[mcnt]) = 0;
3675     MATCHED_SOMETHING (reg_info[mcnt]) = 0;
3676   }
3677 
3678   /* Set up pointers to ends of strings.
3679      Don't allow the second string to be empty unless both are empty.  */
3680 
3681 
3682   /* `p' scans through the pattern as `d' scans through the data. `dend'
3683      is the end of the input string that `d' points within. `d' is
3684      advanced into the following input string whenever necessary, but
3685      this happens before fetching; therefore, at the beginning of the
3686      loop, `d' can be pointing at the end of a string, but it cannot
3687      equal string2.  */
3688 
3689   d = string + pos, dend = string + size;
3690 
3691   /* This loops over pattern commands.  It exits by returning from the
3692      function if match is complete, or it drops through if match fails
3693      at this starting point in the input data.  */
3694 
3695   for (;;) {
3696 #ifdef DEBUG_REGEX
3697     fprintf(stderr,
3698 	    "regex loop(%d):  matching 0x%02d\n",
3699 	    p - (unsigned char*)bufp->buffer,
3700 	    *p);
3701 #endif
3702     /* End of pattern means we might have succeeded.  */
3703     if (p == pend) {
3704       /* If not end of string, try backtracking.  Otherwise done.  */
3705       if ((bufp->options & RE_OPTION_LONGEST) && d != dend) {
3706 	if (best_regs_set) /* non-greedy, no need to backtrack */
3707 	  goto restore_best_regs;
3708 	while (stackp != stackb && stackp[-1] == NON_GREEDY) {
3709 	  if (best_regs_set) /* non-greedy, no need to backtrack */
3710 	    goto restore_best_regs;
3711 	  POP_FAILURE_POINT();
3712 	}
3713 	if (stackp != stackb) {
3714 	  /* More failure points to try.  */
3715 
3716 	  /* If exceeds best match so far, save it.  */
3717 	  if (! best_regs_set || (d > best_regend[0])) {
3718 	    best_regs_set = 1;
3719 	    best_regend[0] = d;	/* Never use regstart[0].  */
3720 
3721 	    for (mcnt = 1; mcnt < num_regs; mcnt++) {
3722 	      best_regstart[mcnt] = regstart[mcnt];
3723 	      best_regend[mcnt] = regend[mcnt];
3724 	    }
3725 	  }
3726 	  goto fail;
3727 	}
3728 	/* If no failure points, don't restore garbage.  */
3729 	else if (best_regs_set) {
3730 	restore_best_regs:
3731 	  /* Restore best match.  */
3732 	  d = best_regend[0];
3733 
3734 	  for (mcnt = 0; mcnt < num_regs; mcnt++) {
3735 	    regstart[mcnt] = best_regstart[mcnt];
3736 	    regend[mcnt] = best_regend[mcnt];
3737 	  }
3738 	}
3739       }
3740 
3741       /* If caller wants register contents data back, convert it
3742 	 to indices.  */
3743       if (regs) {
3744 	regs->beg[0] = pos;
3745 	regs->end[0] = d - string;
3746 	for (mcnt = 1; mcnt < num_regs; mcnt++) {
3747 	  if (REG_UNSET(regend[mcnt])) {
3748 	    regs->beg[mcnt] = -1;
3749 	    regs->end[mcnt] = -1;
3750 	    continue;
3751 	  }
3752 	  regs->beg[mcnt] = regstart[mcnt] - string;
3753 	  regs->end[mcnt] = regend[mcnt] - string;
3754 	}
3755       }
3756       FREE_AND_RETURN(stackb, (d - pos - string));
3757     }
3758 
3759     /* Otherwise match next pattern command.  */
3760 #ifdef SWITCH_ENUM_BUG
3761     switch ((int)((enum regexpcode)*p++))
3762 #else
3763     switch ((enum regexpcode)*p++)
3764 #endif
3765       {
3766 	/* ( [or `(', as appropriate] is represented by start_memory,
3767 	   ) by stop_memory.  Both of those commands are followed by
3768 	   a register number in the next byte.  The text matched
3769 	   within the ( and ) is recorded under that number.  */
3770       case start_memory:
3771 	old_regstart[*p] = regstart[*p];
3772 	regstart[*p] = d;
3773 	IS_ACTIVE(reg_info[*p]) = 1;
3774 	MATCHED_SOMETHING(reg_info[*p]) = 0;
3775 	p += 2;
3776 	continue;
3777 
3778       case stop_memory:
3779 	old_regend[*p] = regend[*p];
3780 	regend[*p] = d;
3781 	IS_ACTIVE(reg_info[*p]) = 0;
3782 	p += 2;
3783 	continue;
3784 
3785       case start_paren:
3786       case stop_paren:
3787 	break;
3788 
3789 	/* \<digit> has been turned into a `duplicate' command which is
3790 	   followed by the numeric value of <digit> as the register number.  */
3791       case duplicate:
3792 	{
3793 	  int regno = *p++;   /* Get which register to match against */
3794 	  register unsigned char *d2, *dend2;
3795 
3796 	  /* Check if there's corresponding group */
3797 	  if (regno >= num_regs) goto fail;
3798 	  /* Check if corresponding group is still open */
3799 	  if (IS_ACTIVE(reg_info[regno])) goto fail;
3800 
3801 	  /* Where in input to try to start matching.  */
3802 	  d2 = regstart[regno];
3803 	  if (REG_UNSET(d2)) goto fail;
3804 
3805 	  /* Where to stop matching; if both the place to start and
3806 	     the place to stop matching are in the same string, then
3807 	     set to the place to stop, otherwise, for now have to use
3808 	     the end of the first string.  */
3809 
3810 	  dend2 = regend[regno];
3811 	  if (REG_UNSET(dend2)) goto fail;
3812 	  for (;;) {
3813 	    /* At end of register contents => success */
3814 	    if (d2 == dend2) break;
3815 
3816 	    /* If necessary, advance to next segment in data.  */
3817 	    PREFETCH;
3818 
3819 	    /* How many characters left in this segment to match.  */
3820 	    mcnt = dend - d;
3821 
3822 	    /* Want how many consecutive characters we can match in
3823 	       one shot, so, if necessary, adjust the count.  */
3824 	    if (mcnt > dend2 - d2)
3825 	      mcnt = dend2 - d2;
3826 
3827 	    /* Compare that many; failure if mismatch, else move
3828 	       past them.  */
3829 	    if ((options & RE_OPTION_IGNORECASE)
3830 		? memcmp_translate(d, d2, mcnt)
3831 		: memcmp((char*)d, (char*)d2, mcnt))
3832 	      goto fail;
3833 	    d += mcnt, d2 += mcnt;
3834 	  }
3835 	}
3836 	break;
3837 
3838       case start_nowidth:
3839 	PUSH_FAILURE_POINT(0, d);
3840 	if (stackp - stackb > RE_DUP_MAX) {
3841 	   FREE_AND_RETURN(stackb,(-2));
3842 	}
3843 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
3844 	STORE_NUMBER(p+mcnt, stackp - stackb);
3845 	continue;
3846 
3847       case stop_nowidth:
3848 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
3849 	stackp = stackb + mcnt;
3850 	d = stackp[-3];
3851 	POP_FAILURE_POINT();
3852 	continue;
3853 
3854       case stop_backtrack:
3855 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
3856 	stackp = stackb + mcnt;
3857 	POP_FAILURE_POINT();
3858 	continue;
3859 
3860       case pop_and_fail:
3861 	EXTRACT_NUMBER(mcnt, p+1);
3862 	stackp = stackb + mcnt;
3863 	POP_FAILURE_POINT();
3864 	goto fail;
3865 
3866       case anychar:
3867 	PREFETCH;
3868 	if (ismbchar(*d)) {
3869 	  if (d + mbclen(*d) > dend)
3870 	    goto fail;
3871 	  SET_REGS_MATCHED;
3872 	  d += mbclen(*d);
3873 	  break;
3874 	}
3875 	if (!(options&RE_OPTION_MULTILINE)
3876 	    && (TRANSLATE_P() ? translate[*d] : *d) == '\n')
3877 	  goto fail;
3878 	SET_REGS_MATCHED;
3879 	d++;
3880 	break;
3881 
3882       case anychar_repeat:
3883 	for (;;) {
3884 	  PUSH_FAILURE_POINT(p, d);
3885 	  PREFETCH;
3886 	  if (ismbchar(*d)) {
3887 	    if (d + mbclen(*d) > dend)
3888 	      goto fail;
3889 	    SET_REGS_MATCHED;
3890 	    d += mbclen(*d);
3891 	    continue;
3892 	  }
3893 	  if (!(options&RE_OPTION_MULTILINE) &&
3894 	      (TRANSLATE_P() ? translate[*d] : *d) == '\n')
3895 	    goto fail;
3896 	  SET_REGS_MATCHED;
3897 	  d++;
3898 	}
3899 	break;
3900 
3901       case charset:
3902       case charset_not:
3903 	{
3904 	  int not;	    /* Nonzero for charset_not.  */
3905 	  int part = 0;	    /* true if matched part of mbc */
3906 	  unsigned char *dsave = d + 1;
3907 	  int cc, c;
3908 
3909 	  PREFETCH;
3910 	  cc = c = (unsigned char)*d++;
3911 	  if (ismbchar(c)) {
3912 	    if (d + mbclen(c) - 1 <= dend) {
3913 	      MBC2WC(c, d);
3914 	    }
3915 	  }
3916 	  else if (TRANSLATE_P())
3917 	    cc = c = (unsigned char)translate[c];
3918 
3919 	  not = is_in_list(c, p);
3920 	  if (!not && cc != c) {
3921 	      part = not = is_in_list(cc, p);
3922 	  }
3923 	  if (*(p - 1) == (unsigned char)charset_not) {
3924 	    not = !not;
3925 	  }
3926 	  if (!not) goto fail;
3927 
3928 	  p += 1 + *p + 2 + EXTRACT_UNSIGNED(&p[1 + *p])*8;
3929 	  SET_REGS_MATCHED;
3930 
3931 	  if (part) d = dsave;
3932 	  break;
3933 	}
3934 
3935       case begline:
3936 	if (size == 0 || AT_STRINGS_BEG(d))
3937 	  break;
3938 	if (d[-1] == '\n' && !AT_STRINGS_END(d))
3939 	  break;
3940 	goto fail;
3941 
3942       case endline:
3943 	if (AT_STRINGS_END(d)) {
3944 	  if (size == 0 || d[-1] != '\n')
3945 	    break;
3946 	}
3947 	else if (*d == '\n')
3948 	  break;
3949 	goto fail;
3950 
3951 	/* Match at the very beginning of the string. */
3952       case begbuf:
3953 	if (AT_STRINGS_BEG(d))
3954 	  break;
3955 	goto fail;
3956 
3957 	/* Match at the very end of the data. */
3958       case endbuf:
3959 	if (AT_STRINGS_END(d))
3960 	  break;
3961 	goto fail;
3962 
3963 	/* Match at the very end of the data. */
3964       case endbuf2:
3965 	if (AT_STRINGS_END(d)) {
3966 	  if (size == 0 || d[-1] != '\n')
3967 	    break;
3968 	}
3969 	/* .. or newline just before the end of the data. */
3970 	if (*d == '\n' && AT_STRINGS_END(d+1))
3971 	  break;
3972 	goto fail;
3973 
3974 	/* `or' constructs are handled by starting each alternative with
3975 	   an on_failure_jump that points to the start of the next
3976 	   alternative.  Each alternative except the last ends with a
3977 	   jump to the joining point.  (Actually, each jump except for
3978 	   the last one really jumps to the following jump, because
3979 	   tensioning the jumps is a hassle.)  */
3980 
3981 	/* The start of a stupid repeat has an on_failure_jump that points
3982 	   past the end of the repeat text. This makes a failure point so
3983 	   that on failure to match a repetition, matching restarts past
3984 	   as many repetitions have been found with no way to fail and
3985 	   look for another one.  */
3986 
3987 	/* A smart repeat is similar but loops back to the on_failure_jump
3988 	   so that each repetition makes another failure point.  */
3989 
3990 	/* Match at the starting position. */
3991       case begpos:
3992 	if (d - string == pos)
3993 	  break;
3994 	goto fail;
3995 
3996       case on_failure_jump:
3997       on_failure:
3998       EXTRACT_NUMBER_AND_INCR(mcnt, p);
3999       PUSH_FAILURE_POINT(p + mcnt, d);
4000       continue;
4001 
4002       /* The end of a smart repeat has a maybe_finalize_jump back.
4003 	 Change it either to a finalize_jump or an ordinary jump.  */
4004       case maybe_finalize_jump:
4005 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
4006 	p1 = p;
4007 
4008 	/* Compare the beginning of the repeat with what in the
4009 	   pattern follows its end. If we can establish that there
4010 	   is nothing that they would both match, i.e., that we
4011 	   would have to backtrack because of (as in, e.g., `a*a')
4012 	   then we can change to finalize_jump, because we'll
4013 	   never have to backtrack.
4014 
4015 	   This is not true in the case of alternatives: in
4016 	   `(a|ab)*' we do need to backtrack to the `ab' alternative
4017 	   (e.g., if the string was `ab').  But instead of trying to
4018 	   detect that here, the alternative has put on a dummy
4019 	   failure point which is what we will end up popping.  */
4020 
4021 	/* Skip over open/close-group commands.  */
4022 	while (p1 + 2 < pend) {
4023 	  if ((enum regexpcode)*p1 == stop_memory ||
4024 	      (enum regexpcode)*p1 == start_memory)
4025 	    p1 += 3;	/* Skip over args, too.  */
4026 	  else if (/*(enum regexpcode)*p1 == start_paren ||*/
4027 		   (enum regexpcode)*p1 == stop_paren)
4028 	      p1 += 1;
4029 	  else
4030 	    break;
4031 	}
4032 
4033 	if (p1 == pend)
4034 	  p[-3] = (unsigned char)finalize_jump;
4035 	else if (*p1 == (unsigned char)exactn ||
4036 		 *p1 == (unsigned char)endline) {
4037 	  register int c = *p1 == (unsigned char)endline ? '\n' : p1[2];
4038 	  register unsigned char *p2 = p + mcnt;
4039 	    /* p2[0] ... p2[2] are an on_failure_jump.
4040 	       Examine what follows that.  */
4041 	  if (p2[3] == (unsigned char)exactn && p2[5] != c)
4042 	    p[-3] = (unsigned char)finalize_jump;
4043 	  else if (p2[3] == (unsigned char)charset ||
4044 		   p2[3] == (unsigned char)charset_not) {
4045 	    int not;
4046 	    if (ismbchar(c)) {
4047 	      unsigned char *pp = p1+3;
4048 	      MBC2WC(c, pp);
4049 	    }
4050 	    /* `is_in_list()' is TRUE if c would match */
4051 	    /* That means it is not safe to finalize.  */
4052 	    not = is_in_list(c, p2 + 4);
4053 	    if (p2[3] == (unsigned char)charset_not)
4054 	      not = !not;
4055 	    if (!not)
4056 	      p[-3] = (unsigned char)finalize_jump;
4057 	  }
4058 	}
4059 	p -= 2;		/* Point at relative address again.  */
4060 	if (p[-1] != (unsigned char)finalize_jump) {
4061 	  p[-1] = (unsigned char)jump;
4062 	  goto nofinalize;
4063 	}
4064 	/* Note fall through.  */
4065 
4066 	/* The end of a stupid repeat has a finalize_jump back to the
4067 	   start, where another failure point will be made which will
4068 	   point to after all the repetitions found so far.  */
4069 
4070 	/* Take off failure points put on by matching on_failure_jump
4071 	   because didn't fail.  Also remove the register information
4072 	   put on by the on_failure_jump.  */
4073       case finalize_jump:
4074 	if (stackp > stackb && stackp[-3] == d) {
4075 	  p = stackp[-4];
4076 	  POP_FAILURE_POINT();
4077 	  continue;
4078 	}
4079 	POP_FAILURE_POINT();
4080 	/* Note fall through.  */
4081 
4082       /* We need this opcode so we can detect where alternatives end
4083 	 in `group_match_null_string_p' et al.  */
4084       case jump_past_alt:
4085 	/* fall through */
4086 
4087 	/* Jump without taking off any failure points.  */
4088       case jump:
4089       nofinalize:
4090         EXTRACT_NUMBER_AND_INCR(mcnt, p);
4091         if (mcnt < 0 && stackp > stackb && stackp[-3] == d) /* avoid infinite loop */
4092 	   goto fail;
4093         p += mcnt;
4094         continue;
4095 
4096       case dummy_failure_jump:
4097 	/* Normally, the on_failure_jump pushes a failure point, which
4098 	   then gets popped at finalize_jump.  We will end up at
4099 	   finalize_jump, also, and with a pattern of, say, `a+', we
4100 	   are skipping over the on_failure_jump, so we have to push
4101 	   something meaningless for finalize_jump to pop.  */
4102 	PUSH_FAILURE_POINT(0, 0);
4103 	goto nofinalize;
4104 
4105 	/* At the end of an alternative, we need to push a dummy failure
4106 	   point in case we are followed by a `finalize_jump', because
4107 	   we don't want the failure point for the alternative to be
4108 	   popped.  For example, matching `(a|ab)*' against `aab'
4109 	   requires that we match the `ab' alternative.  */
4110       case push_dummy_failure:
4111 	/* See comments just above at `dummy_failure_jump' about the
4112 	   two zeroes.  */
4113 	p1 = p;
4114 	/* Skip over open/close-group commands.  */
4115 	while (p1 + 2 < pend) {
4116 	  if ((enum regexpcode)*p1 == stop_memory ||
4117 	      (enum regexpcode)*p1 == start_memory)
4118 	    p1 += 3;	/* Skip over args, too.  */
4119 	  else if (/*(enum regexpcode)*p1 == start_paren ||*/
4120 		   (enum regexpcode)*p1 == stop_paren)
4121 	      p1 += 1;
4122 	  else
4123 	    break;
4124 	}
4125 	if ((enum regexpcode)*p1 == jump)
4126 	  p[-1] = unused;
4127 	else
4128 	  PUSH_FAILURE_POINT(0, 0);
4129 	break;
4130 
4131 	/* Have to succeed matching what follows at least n times.  Then
4132 	   just handle like an on_failure_jump.  */
4133       case succeed_n:
4134 	EXTRACT_NUMBER(mcnt, p + 2);
4135 	/* Originally, this is how many times we HAVE to succeed.  */
4136 	if (mcnt != 0) {
4137 	  mcnt--;
4138 	  p += 2;
4139 	  PUSH_FAILURE_COUNT(p);
4140 	  STORE_NUMBER_AND_INCR(p, mcnt);
4141 	  PUSH_FAILURE_POINT(0, 0);
4142 	}
4143 	else  {
4144 	  goto on_failure;
4145 	}
4146 	continue;
4147 
4148       case jump_n:
4149 	EXTRACT_NUMBER(mcnt, p + 2);
4150 	/* Originally, this is how many times we CAN jump.  */
4151 	if (mcnt) {
4152 	  mcnt--;
4153 	  PUSH_FAILURE_COUNT(p + 2);
4154 	  STORE_NUMBER(p + 2, mcnt);
4155 	  goto nofinalize;	     /* Do the jump without taking off
4156 					any failure points.  */
4157 	}
4158 	/* If don't have to jump any more, skip over the rest of command.  */
4159 	else
4160 	  p += 4;
4161 	continue;
4162 
4163       case set_number_at:
4164 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
4165 	p1 = p + mcnt;
4166 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
4167 	STORE_NUMBER(p1, mcnt);
4168 	continue;
4169 
4170       case try_next:
4171 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
4172 	if (p + mcnt < pend) {
4173 	  PUSH_FAILURE_POINT(p, d);
4174 	  stackp[-1] = NON_GREEDY;
4175 	}
4176 	p += mcnt;
4177 	continue;
4178 
4179       case finalize_push:
4180 	POP_FAILURE_POINT();
4181 	EXTRACT_NUMBER_AND_INCR(mcnt, p);
4182         if (mcnt < 0 && stackp > stackb  && stackp[-3] == d) /* avoid infinite loop */
4183 	   goto fail;
4184 	PUSH_FAILURE_POINT(p + mcnt, d);
4185 	stackp[-1] = NON_GREEDY;
4186 	continue;
4187 
4188       case finalize_push_n:
4189 	EXTRACT_NUMBER(mcnt, p + 2);
4190 	/* Originally, this is how many times we CAN jump.  */
4191 	if (mcnt) {
4192 	  int pos, i;
4193 
4194 	  mcnt--;
4195 	  STORE_NUMBER(p + 2, mcnt);
4196 	  EXTRACT_NUMBER(pos, p);
4197 	  EXTRACT_NUMBER(i, p+pos+5);
4198 	  if (i > 0) goto nofinalize;
4199 	  POP_FAILURE_POINT();
4200 	  EXTRACT_NUMBER_AND_INCR(mcnt, p);
4201 	  PUSH_FAILURE_POINT(p + mcnt, d);
4202 	  stackp[-1] = NON_GREEDY;
4203 	  p += 2;		/* skip n */
4204 	}
4205 	/* If don't have to push any more, skip over the rest of command.  */
4206 	else
4207 	  p += 4;
4208 	continue;
4209 
4210 	/* Ignore these.  Used to ignore the n of succeed_n's which
4211 	   currently have n == 0.  */
4212       case unused:
4213 	continue;
4214 
4215       case casefold_on:
4216 	options |= RE_OPTION_IGNORECASE;
4217 	continue;
4218 
4219       case casefold_off:
4220 	options &= ~RE_OPTION_IGNORECASE;
4221 	continue;
4222 
4223       case option_set:
4224 	options = *p++;
4225 	continue;
4226 
4227       case wordbound:
4228 	if (AT_STRINGS_BEG(d)) {
4229 	  if (IS_A_LETTER(d)) break;
4230 	  else goto fail;
4231 	}
4232 	if (AT_STRINGS_END(d)) {
4233 	  if (PREV_IS_A_LETTER(d)) break;
4234 	  else goto fail;
4235 	}
4236 	if (PREV_IS_A_LETTER(d) != IS_A_LETTER(d))
4237 	  break;
4238 	goto fail;
4239 
4240       case notwordbound:
4241 	if (AT_STRINGS_BEG(d)) {
4242 	  if (IS_A_LETTER(d)) goto fail;
4243 	  else break;
4244 	}
4245 	if (AT_STRINGS_END(d)) {
4246 	  if (PREV_IS_A_LETTER(d)) goto fail;
4247 	  else break;
4248 	}
4249 	if (PREV_IS_A_LETTER(d) != IS_A_LETTER(d))
4250 	  goto fail;
4251 	break;
4252 
4253       case wordbeg:
4254 	if (IS_A_LETTER(d) && (AT_STRINGS_BEG(d) || !PREV_IS_A_LETTER(d)))
4255 	  break;
4256 	goto fail;
4257 
4258       case wordend:
4259 	if (!AT_STRINGS_BEG(d) && PREV_IS_A_LETTER(d)
4260 	    && (!IS_A_LETTER(d) || AT_STRINGS_END(d)))
4261 	  break;
4262 	goto fail;
4263 
4264       case wordchar:
4265 	PREFETCH;
4266 	if (!IS_A_LETTER(d))
4267 	  goto fail;
4268 	if (ismbchar(*d) && d + mbclen(*d) - 1 < dend)
4269 	  d += mbclen(*d) - 1;
4270 	d++;
4271 	SET_REGS_MATCHED;
4272 	break;
4273 
4274       case notwordchar:
4275 	PREFETCH;
4276 	if (IS_A_LETTER(d))
4277 	  goto fail;
4278 	if (ismbchar(*d) && d + mbclen(*d) - 1 < dend)
4279 	  d += mbclen(*d) - 1;
4280 	d++;
4281 	SET_REGS_MATCHED;
4282 	break;
4283 
4284       case exactn:
4285 	/* Match the next few pattern characters exactly.
4286 	   mcnt is how many characters to match.  */
4287 	mcnt = *p++;
4288 	/* This is written out as an if-else so we don't waste time
4289 	   testing `translate' inside the loop.  */
4290 	if (TRANSLATE_P()) {
4291 	  do {
4292 	    unsigned char c;
4293 
4294 	    PREFETCH;
4295 	    if (*p == 0xff) {
4296 	      p++;
4297 	      if (!--mcnt
4298 		  || AT_STRINGS_END(d)
4299 		  || (unsigned char)*d++ != (unsigned char)*p++)
4300 		goto fail;
4301 	      continue;
4302 	    }
4303 	    c = *d++;
4304 	    if (ismbchar(c)) {
4305 	      int n;
4306 
4307 	      if (c != (unsigned char)*p++)
4308 		goto fail;
4309 	      for (n = mbclen(c) - 1; n > 0; n--)
4310 		if (!--mcnt	/* redundant check if pattern was
4311 				   compiled properly. */
4312 		    || AT_STRINGS_END(d)
4313 		    || (unsigned char)*d++ != (unsigned char)*p++)
4314 		  goto fail;
4315 	      continue;
4316 	    }
4317 	    /* compiled code translation needed for ruby */
4318 	    if ((unsigned char)translate[c] != (unsigned char)translate[*p++])
4319 	      goto fail;
4320 	  }
4321 	  while (--mcnt);
4322 	}
4323 	else {
4324 	  do {
4325 	    PREFETCH;
4326 	    if (*p == 0xff) {p++; mcnt--;}
4327 	    if (*d++ != *p++) goto fail;
4328 	  }
4329 	  while (--mcnt);
4330 	}
4331 	SET_REGS_MATCHED;
4332 	break;
4333       }
4334 #ifdef RUBY
4335     CHECK_INTS;
4336 #endif
4337     continue;  /* Successfully executed one pattern command; keep going.  */
4338 
4339     /* Jump here if any matching operation fails. */
4340   fail:
4341     if (stackp != stackb) {
4342       /* A restart point is known.  Restart there and pop it. */
4343       short last_used_reg, this_reg;
4344 
4345       /* If this failure point is from a dummy_failure_point, just
4346 	 skip it.  */
4347       if (stackp[-4] == 0 || (best_regs_set && stackp[-1] == NON_GREEDY)) {
4348 	POP_FAILURE_POINT();
4349 	goto fail;
4350       }
4351       stackp--;		/* discard greedy flag */
4352       options = (long)*--stackp;
4353       d = *--stackp;
4354       p = *--stackp;
4355       /* Restore register info.  */
4356       last_used_reg = (long)*--stackp;
4357 
4358       /* Make the ones that weren't saved -1 or 0 again. */
4359       for (this_reg = num_regs - 1; this_reg > last_used_reg; this_reg--) {
4360 	regend[this_reg] = REG_UNSET_VALUE;
4361 	regstart[this_reg] = REG_UNSET_VALUE;
4362 	IS_ACTIVE(reg_info[this_reg]) = 0;
4363 	MATCHED_SOMETHING(reg_info[this_reg]) = 0;
4364       }
4365 
4366       /* And restore the rest from the stack.  */
4367       for ( ; this_reg > 0; this_reg--) {
4368 	reg_info[this_reg].word = *--stackp;
4369 	regend[this_reg] = *--stackp;
4370 	regstart[this_reg] = *--stackp;
4371       }
4372       mcnt = (long)*--stackp;
4373       while (mcnt--) {
4374 	POP_FAILURE_COUNT();
4375       }
4376       if (p < pend) {
4377 	int is_a_jump_n = 0;
4378 	int failed_paren = 0;
4379 
4380 	p1 = p;
4381 	/* If failed to a backwards jump that's part of a repetition
4382 	   loop, need to pop this failure point and use the next one.  */
4383 	switch ((enum regexpcode)*p1) {
4384 	case jump_n:
4385 	case finalize_push_n:
4386 	  is_a_jump_n = 1;
4387 	case maybe_finalize_jump:
4388 	case finalize_jump:
4389 	case finalize_push:
4390 	case jump:
4391 	  p1++;
4392 	  EXTRACT_NUMBER_AND_INCR(mcnt, p1);
4393 
4394 	  if (mcnt >= 0) break;	/* should be backward jump */
4395 	  p1 += mcnt;
4396 
4397 	  if (( is_a_jump_n && (enum regexpcode)*p1 == succeed_n) ||
4398 	      (!is_a_jump_n && (enum regexpcode)*p1 == on_failure_jump)) {
4399 	    if (failed_paren) {
4400 	      p1++;
4401 	      EXTRACT_NUMBER_AND_INCR(mcnt, p1);
4402 	      PUSH_FAILURE_POINT(p1 + mcnt, d);
4403 	    }
4404 	    goto fail;
4405 	  }
4406 	  break;
4407 	default:
4408 	  /* do nothing */;
4409 	}
4410       }
4411     }
4412     else
4413       break;   /* Matching at this starting point really fails.  */
4414   }
4415 
4416   if (best_regs_set)
4417     goto restore_best_regs;
4418 
4419   FREE_AND_RETURN(stackb,(-1)); 	/* Failure to match.  */
4420 }
4421 
4422 
4423 static int
memcmp_translate(s1,s2,len)4424 memcmp_translate(s1, s2, len)
4425      unsigned char *s1, *s2;
4426      register int len;
4427 {
4428   register unsigned char *p1 = s1, *p2 = s2, c;
4429   while (len) {
4430     c = *p1++;
4431     if (ismbchar(c)) {
4432       int n;
4433 
4434       if (c != *p2++) return 1;
4435       for (n = mbclen(c) - 1; n > 0; n--)
4436 	if (!--len || *p1++ != *p2++)
4437 	  return 1;
4438     }
4439     else
4440       if (translate[c] != translate[*p2++])
4441 	return 1;
4442     len--;
4443   }
4444   return 0;
4445 }
4446 
4447 void
nmz_re_copy_registers(regs1,regs2)4448 nmz_re_copy_registers(regs1, regs2)
4449      struct re_registers *regs1, *regs2;
4450 {
4451   int i;
4452 
4453   if (regs1 == regs2) return;
4454   if (regs1->allocated == 0) {
4455     regs1->beg = TMALLOC(regs2->num_regs, int);
4456     regs1->end = TMALLOC(regs2->num_regs, int);
4457     regs1->allocated = regs2->num_regs;
4458   }
4459   else if (regs1->allocated < regs2->num_regs) {
4460     TREALLOC(regs1->beg, regs2->num_regs, int);
4461     TREALLOC(regs1->end, regs2->num_regs, int);
4462     regs1->allocated = regs2->num_regs;
4463   }
4464   for (i=0; i<regs2->num_regs; i++) {
4465     regs1->beg[i] = regs2->beg[i];
4466     regs1->end[i] = regs2->end[i];
4467   }
4468   regs1->num_regs = regs2->num_regs;
4469 }
4470 
4471 void
nmz_re_free_registers(regs)4472 nmz_re_free_registers(regs)
4473      struct re_registers *regs;
4474 {
4475   if (regs->allocated == 0) return;
4476   if (regs->beg) xfree(regs->beg);
4477   if (regs->end) xfree(regs->end);
4478 }
4479 
4480 /* Functions for multi-byte support.
4481    Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
4482    Last change: Jul. 9, 1993 by t^2  */
4483 static const unsigned char mbctab_ascii[] = {
4484   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4485   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4486   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4487   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4488   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4489   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4490   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4491   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4492   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4493   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4494   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4495   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4496   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4497   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4498   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4499   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
4500 };
4501 
4502 static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
4503   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4504   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4505   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4506   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4507   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4508   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4509   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4510   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4511   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0,
4512   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4513   0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4514   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4515   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4516   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4517   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4518   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0
4519 };
4520 
4521 static const unsigned char mbctab_sjis[] = { /* 0x80-0x9f,0xE0-0xFF */
4522   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4523   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4524   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4525   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4526   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4527   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4528   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4529   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4530   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4531   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4532   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4533   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4534   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4535   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4536   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4537   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1
4538 };
4539 
4540 static const unsigned char mbctab_utf8[] = {
4541   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4542   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4543   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4544   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4545   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4546   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4547   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4548   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4549   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4550   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4551   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4552   0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
4553   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4554   1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
4555   2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
4556   3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0
4557 };
4558 
4559 const unsigned char *re_mbctab = mbctab_ascii;
4560 
4561 void
nmz_re_mbcinit(mbctype)4562 nmz_re_mbcinit(mbctype)
4563      int mbctype;
4564 {
4565   switch (mbctype) {
4566   case MBCTYPE_ASCII:
4567     re_mbctab = mbctab_ascii;
4568     current_mbctype = MBCTYPE_ASCII;
4569     break;
4570   case MBCTYPE_EUC:
4571     re_mbctab = mbctab_euc;
4572     current_mbctype = MBCTYPE_EUC;
4573     break;
4574   case MBCTYPE_SJIS:
4575     re_mbctab = mbctab_sjis;
4576     current_mbctype = MBCTYPE_SJIS;
4577     break;
4578   case MBCTYPE_UTF8:
4579     re_mbctab = mbctab_utf8;
4580     current_mbctype = MBCTYPE_UTF8;
4581     break;
4582   }
4583 }
4584