xref: /386bsd/usr/src/usr.bin/gdb/regex.c (revision a2142627)
1 /* Extended regular expression matching and search library.
2    Copyright (C) 1985, 1989 Free Software Foundation, Inc.
3 
4    This program is free software; you can redistribute it and/or modify
5    it under the terms of the GNU General Public License as published by
6    the Free Software Foundation; either version 1, or (at your option)
7    any later version.
8 
9    This program is distributed in the hope that it will be useful,
10    but WITHOUT ANY WARRANTY; without even the implied warranty of
11    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
12    GNU General Public License for more details.
13 
14    You should have received a copy of the GNU General Public License
15    along with this program; if not, write to the Free Software
16    Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 
18 
19    In other words, you are welcome to use, share and improve this program.
20    You are forbidden to forbid anyone else to use, share and improve
21    what you give them.   Help stamp out software-hoarding!  */
22 
23 
24 /* To test, compile with -Dtest.
25  This Dtestable feature turns this into a self-contained program
26  which reads a pattern, describes how it compiles,
27  then reads a string and searches for it.  */
28 
29 #ifdef emacs
30 
31 /* The `emacs' switch turns on certain special matching commands
32  that make sense only in emacs. */
33 
34 #include "config.h"
35 #include "lisp.h"
36 #include "buffer.h"
37 #include "syntax.h"
38 
39 #else  /* not emacs */
40 
41 #ifdef USG
42 #ifndef BSTRING
43 #define bcopy(s,d,n)	memcpy((d),(s),(n))
44 #define bcmp(s1,s2,n)	memcmp((s1),(s2),(n))
45 #define bzero(s,n)	memset((s),0,(n))
46 #endif
47 #endif
48 
49 /* Make alloca work the best possible way.  */
50 #ifdef __GNUC__
51 #define alloca __builtin_alloca
52 #else
53 #ifdef sparc
54 #include <alloca.h>
55 #endif
56 #endif
57 
58 /*
59  * Define the syntax stuff, so we can do the \<...\> things.
60  */
61 
62 #ifndef Sword /* must be non-zero in some of the tests below... */
63 #define Sword 1
64 #endif
65 
66 #define SYNTAX(c) re_syntax_table[c]
67 
68 #ifdef SYNTAX_TABLE
69 
70 char *re_syntax_table;
71 
72 #else
73 
74 static char re_syntax_table[256];
75 
76 static void
init_syntax_once()77 init_syntax_once ()
78 {
79    register int c;
80    static int done = 0;
81 
82    if (done)
83      return;
84 
85    bzero (re_syntax_table, sizeof re_syntax_table);
86 
87    for (c = 'a'; c <= 'z'; c++)
88      re_syntax_table[c] = Sword;
89 
90    for (c = 'A'; c <= 'Z'; c++)
91      re_syntax_table[c] = Sword;
92 
93    for (c = '0'; c <= '9'; c++)
94      re_syntax_table[c] = Sword;
95 
96    done = 1;
97 }
98 
99 #endif /* SYNTAX_TABLE */
100 #endif /* not emacs */
101 
102 #include "regex.h"
103 
104 /* Number of failure points to allocate space for initially,
105  when matching.  If this number is exceeded, more space is allocated,
106  so it is not a hard limit.  */
107 
108 #ifndef NFAILURES
109 #define NFAILURES 80
110 #endif /* NFAILURES */
111 
112 /* width of a byte in bits */
113 
114 #define BYTEWIDTH 8
115 
116 #ifndef SIGN_EXTEND_CHAR
117 #define SIGN_EXTEND_CHAR(x) (x)
118 #endif
119 
120 static int obscure_syntax = 0;
121 
122 /* Specify the precise syntax of regexp for compilation.
123    This provides for compatibility for various utilities
124    which historically have different, incompatible syntaxes.
125 
126    The argument SYNTAX is a bit-mask containing the two bits
127    RE_NO_BK_PARENS and RE_NO_BK_VBAR.  */
128 
129 int
re_set_syntax(syntax)130 re_set_syntax (syntax)
131 {
132   int ret;
133 
134   ret = obscure_syntax;
135   obscure_syntax = syntax;
136   return ret;
137 }
138 
139 /* re_compile_pattern takes a regular-expression string
140    and converts it into a buffer full of byte commands for matching.
141 
142   PATTERN   is the address of the pattern string
143   SIZE      is the length of it.
144   BUFP	    is a  struct re_pattern_buffer *  which points to the info
145 	    on where to store the byte commands.
146 	    This structure contains a  char *  which points to the
147 	    actual space, which should have been obtained with malloc.
148 	    re_compile_pattern may use  realloc  to grow the buffer space.
149 
150   The number of bytes of commands can be found out by looking in
151   the  struct re_pattern_buffer  that bufp pointed to,
152   after re_compile_pattern returns.
153 */
154 
155 #define PATPUSH(ch) (*b++ = (char) (ch))
156 
157 #define PATFETCH(c) \
158  {if (p == pend) goto end_of_pattern; \
159   c = * (unsigned char *) p++; \
160   if (translate) c = translate[c]; }
161 
162 #define PATFETCH_RAW(c) \
163  {if (p == pend) goto end_of_pattern; \
164   c = * (unsigned char *) p++; }
165 
166 #define PATUNFETCH p--
167 
168 #define EXTEND_BUFFER \
169   { char *old_buffer = bufp->buffer; \
170     if (bufp->allocated == (1<<16)) goto too_big; \
171     bufp->allocated *= 2; \
172     if (bufp->allocated > (1<<16)) bufp->allocated = (1<<16); \
173     if (!(bufp->buffer = (char *) realloc (bufp->buffer, bufp->allocated))) \
174       goto memory_exhausted; \
175     c = bufp->buffer - old_buffer; \
176     b += c; \
177     if (fixup_jump) \
178       fixup_jump += c; \
179     if (laststart) \
180       laststart += c; \
181     begalt += c; \
182     if (pending_exact) \
183       pending_exact += c; \
184   }
185 
186 static int store_jump (), insert_jump ();
187 
188 char *
re_compile_pattern(pattern,size,bufp)189 re_compile_pattern (pattern, size, bufp)
190      char *pattern;
191      int size;
192      struct re_pattern_buffer *bufp;
193 {
194   register char *b = bufp->buffer;
195   register char *p = pattern;
196   char *pend = pattern + size;
197   register unsigned c, c1;
198   char *p1;
199   unsigned char *translate = (unsigned char *) bufp->translate;
200 
201   /* address of the count-byte of the most recently inserted "exactn" command.
202     This makes it possible to tell whether a new exact-match character
203     can be added to that command or requires a new "exactn" command. */
204 
205   char *pending_exact = 0;
206 
207   /* address of the place where a forward-jump should go
208     to the end of the containing expression.
209     Each alternative of an "or", except the last, ends with a forward-jump
210     of this sort. */
211 
212   char *fixup_jump = 0;
213 
214   /* address of start of the most recently finished expression.
215     This tells postfix * where to find the start of its operand. */
216 
217   char *laststart = 0;
218 
219   /* In processing a repeat, 1 means zero matches is allowed */
220 
221   char zero_times_ok;
222 
223   /* In processing a repeat, 1 means many matches is allowed */
224 
225   char many_times_ok;
226 
227   /* address of beginning of regexp, or inside of last \( */
228 
229   char *begalt = b;
230 
231   /* Stack of information saved by \( and restored by \).
232      Four stack elements are pushed by each \(:
233        First, the value of b.
234        Second, the value of fixup_jump.
235        Third, the value of regnum.
236        Fourth, the value of begalt.  */
237 
238   int stackb[40];
239   int *stackp = stackb;
240   int *stacke = stackb + 40;
241   int *stackt;
242 
243   /* Counts \('s as they are encountered.  Remembered for the matching \),
244      where it becomes the "register number" to put in the stop_memory command */
245 
246   int regnum = 1;
247 
248   bufp->fastmap_accurate = 0;
249 
250 #ifndef emacs
251 #ifndef SYNTAX_TABLE
252   /*
253    * Initialize the syntax table.
254    */
255    init_syntax_once();
256 #endif
257 #endif
258 
259   if (bufp->allocated == 0)
260     {
261       bufp->allocated = 28;
262       if (bufp->buffer)
263 	/* EXTEND_BUFFER loses when bufp->allocated is 0 */
264 	bufp->buffer = (char *) realloc (bufp->buffer, 28);
265       else
266 	/* Caller did not allocate a buffer.  Do it for him */
267 	bufp->buffer = (char *) malloc (28);
268       if (!bufp->buffer) goto memory_exhausted;
269       begalt = b = bufp->buffer;
270     }
271 
272   while (p != pend)
273     {
274       if (b - bufp->buffer > bufp->allocated - 10)
275 	/* Note that EXTEND_BUFFER clobbers c */
276 	EXTEND_BUFFER;
277 
278       PATFETCH (c);
279 
280       switch (c)
281 	{
282 	case '$':
283 	  if (obscure_syntax & RE_TIGHT_VBAR)
284 	    {
285 	      if (! (obscure_syntax & RE_CONTEXT_INDEP_OPS) && p != pend)
286 		goto normal_char;
287 	      /* Make operand of last vbar end before this `$'.  */
288 	      if (fixup_jump)
289 		store_jump (fixup_jump, jump, b);
290 	      fixup_jump = 0;
291 	      PATPUSH (endline);
292 	      break;
293 	    }
294 
295 	  /* $ means succeed if at end of line, but only in special contexts.
296 	    If randomly in the middle of a pattern, it is a normal character. */
297 	  if (p == pend || *p == '\n'
298 	      || (obscure_syntax & RE_CONTEXT_INDEP_OPS)
299 	      || (obscure_syntax & RE_NO_BK_PARENS
300 		  ? *p == ')'
301 		  : *p == '\\' && p[1] == ')')
302 	      || (obscure_syntax & RE_NO_BK_VBAR
303 		  ? *p == '|'
304 		  : *p == '\\' && p[1] == '|'))
305 	    {
306 	      PATPUSH (endline);
307 	      break;
308 	    }
309 	  goto normal_char;
310 
311 	case '^':
312 	  /* ^ means succeed if at beg of line, but only if no preceding pattern. */
313 
314 	  if (laststart && p[-2] != '\n'
315 	      && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
316 	    goto normal_char;
317 	  if (obscure_syntax & RE_TIGHT_VBAR)
318 	    {
319 	      if (p != pattern + 1
320 		  && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
321 		goto normal_char;
322 	      PATPUSH (begline);
323 	      begalt = b;
324 	    }
325 	  else
326 	    PATPUSH (begline);
327 	  break;
328 
329 	case '+':
330 	case '?':
331 	  if (obscure_syntax & RE_BK_PLUS_QM)
332 	    goto normal_char;
333 	handle_plus:
334 	case '*':
335 	  /* If there is no previous pattern, char not special. */
336 	  if (!laststart && ! (obscure_syntax & RE_CONTEXT_INDEP_OPS))
337 	    goto normal_char;
338 	  /* If there is a sequence of repetition chars,
339 	     collapse it down to equivalent to just one.  */
340 	  zero_times_ok = 0;
341 	  many_times_ok = 0;
342 	  while (1)
343 	    {
344 	      zero_times_ok |= c != '+';
345 	      many_times_ok |= c != '?';
346 	      if (p == pend)
347 		break;
348 	      PATFETCH (c);
349 	      if (c == '*')
350 		;
351 	      else if (!(obscure_syntax & RE_BK_PLUS_QM)
352 		       && (c == '+' || c == '?'))
353 		;
354 	      else if ((obscure_syntax & RE_BK_PLUS_QM)
355 		       && c == '\\')
356 		{
357 		  int c1;
358 		  PATFETCH (c1);
359 		  if (!(c1 == '+' || c1 == '?'))
360 		    {
361 		      PATUNFETCH;
362 		      PATUNFETCH;
363 		      break;
364 		    }
365 		  c = c1;
366 		}
367 	      else
368 		{
369 		  PATUNFETCH;
370 		  break;
371 		}
372 	    }
373 
374 	  /* Star, etc. applied to an empty pattern is equivalent
375 	     to an empty pattern.  */
376 	  if (!laststart)
377 	    break;
378 
379 	  /* Now we know whether 0 matches is allowed,
380 	     and whether 2 or more matches is allowed.  */
381 	  if (many_times_ok)
382 	    {
383 	      /* If more than one repetition is allowed,
384 		 put in a backward jump at the end.  */
385 	      store_jump (b, maybe_finalize_jump, laststart - 3);
386 	      b += 3;
387 	    }
388 	  insert_jump (on_failure_jump, laststart, b + 3, b);
389 	  pending_exact = 0;
390 	  b += 3;
391 	  if (!zero_times_ok)
392 	    {
393 	      /* At least one repetition required: insert before the loop
394 		 a skip over the initial on-failure-jump instruction */
395 	      insert_jump (dummy_failure_jump, laststart, laststart + 6, b);
396 	      b += 3;
397 	    }
398 	  break;
399 
400 	case '.':
401 	  laststart = b;
402 	  PATPUSH (anychar);
403 	  break;
404 
405 	case '[':
406 	  while (b - bufp->buffer
407 		 > bufp->allocated - 3 - (1 << BYTEWIDTH) / BYTEWIDTH)
408 	    /* Note that EXTEND_BUFFER clobbers c */
409 	    EXTEND_BUFFER;
410 
411 	  laststart = b;
412 	  if (*p == '^')
413 	    PATPUSH (charset_not), p++;
414 	  else
415 	    PATPUSH (charset);
416 	  p1 = p;
417 
418 	  PATPUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
419 	  /* Clear the whole map */
420 	  bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
421 	  /* Read in characters and ranges, setting map bits */
422 	  while (1)
423 	    {
424 	      PATFETCH (c);
425 	      if (c == ']' && p != p1 + 1) break;
426 	      if (*p == '-' && p[1] != ']')
427 		{
428 		  PATFETCH (c1);
429 		  PATFETCH (c1);
430 		  while (c <= c1)
431 		    b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH), c++;
432 		}
433 	      else
434 		{
435 		  b[c / BYTEWIDTH] |= 1 << (c % BYTEWIDTH);
436 		}
437 	    }
438 	  /* Discard any bitmap bytes that are all 0 at the end of the map.
439 	     Decrement the map-length byte too. */
440 	  while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
441 	    b[-1]--;
442 	  b += b[-1];
443 	  break;
444 
445 	case '(':
446 	  if (! (obscure_syntax & RE_NO_BK_PARENS))
447 	    goto normal_char;
448 	  else
449 	    goto handle_open;
450 
451 	case ')':
452 	  if (! (obscure_syntax & RE_NO_BK_PARENS))
453 	    goto normal_char;
454 	  else
455 	    goto handle_close;
456 
457 	case '\n':
458 	  if (! (obscure_syntax & RE_NEWLINE_OR))
459 	    goto normal_char;
460 	  else
461 	    goto handle_bar;
462 
463 	case '|':
464 	  if (! (obscure_syntax & RE_NO_BK_VBAR))
465 	    goto normal_char;
466 	  else
467 	    goto handle_bar;
468 
469         case '\\':
470 	  if (p == pend) goto invalid_pattern;
471 	  PATFETCH_RAW (c);
472 	  switch (c)
473 	    {
474 	    case '(':
475 	      if (obscure_syntax & RE_NO_BK_PARENS)
476 		goto normal_backsl;
477 	    handle_open:
478 	      if (stackp == stacke) goto nesting_too_deep;
479 	      if (regnum < RE_NREGS)
480 	        {
481 		  PATPUSH (start_memory);
482 		  PATPUSH (regnum);
483 	        }
484 	      *stackp++ = b - bufp->buffer;
485 	      *stackp++ = fixup_jump ? fixup_jump - bufp->buffer + 1 : 0;
486 	      *stackp++ = regnum++;
487 	      *stackp++ = begalt - bufp->buffer;
488 	      fixup_jump = 0;
489 	      laststart = 0;
490 	      begalt = b;
491 	      break;
492 
493 	    case ')':
494 	      if (obscure_syntax & RE_NO_BK_PARENS)
495 		goto normal_backsl;
496 	    handle_close:
497 	      if (stackp == stackb) goto unmatched_close;
498 	      begalt = *--stackp + bufp->buffer;
499 	      if (fixup_jump)
500 		store_jump (fixup_jump, jump, b);
501 	      if (stackp[-1] < RE_NREGS)
502 		{
503 		  PATPUSH (stop_memory);
504 		  PATPUSH (stackp[-1]);
505 		}
506 	      stackp -= 2;
507 	      fixup_jump = 0;
508 	      if (*stackp)
509 		fixup_jump = *stackp + bufp->buffer - 1;
510 	      laststart = *--stackp + bufp->buffer;
511 	      break;
512 
513 	    case '|':
514 	      if (obscure_syntax & RE_NO_BK_VBAR)
515 		goto normal_backsl;
516 	    handle_bar:
517 	      insert_jump (on_failure_jump, begalt, b + 6, b);
518 	      pending_exact = 0;
519 	      b += 3;
520 	      if (fixup_jump)
521 		store_jump (fixup_jump, jump, b);
522 	      fixup_jump = b;
523 	      b += 3;
524 	      laststart = 0;
525 	      begalt = b;
526 	      break;
527 
528 #ifdef emacs
529 	    case '=':
530 	      PATPUSH (at_dot);
531 	      break;
532 
533 	    case 's':
534 	      laststart = b;
535 	      PATPUSH (syntaxspec);
536 	      PATFETCH (c);
537 	      PATPUSH (syntax_spec_code[c]);
538 	      break;
539 
540 	    case 'S':
541 	      laststart = b;
542 	      PATPUSH (notsyntaxspec);
543 	      PATFETCH (c);
544 	      PATPUSH (syntax_spec_code[c]);
545 	      break;
546 #endif /* emacs */
547 
548 	    case 'w':
549 	      laststart = b;
550 	      PATPUSH (wordchar);
551 	      break;
552 
553 	    case 'W':
554 	      laststart = b;
555 	      PATPUSH (notwordchar);
556 	      break;
557 
558 	    case '<':
559 	      PATPUSH (wordbeg);
560 	      break;
561 
562 	    case '>':
563 	      PATPUSH (wordend);
564 	      break;
565 
566 	    case 'b':
567 	      PATPUSH (wordbound);
568 	      break;
569 
570 	    case 'B':
571 	      PATPUSH (notwordbound);
572 	      break;
573 
574 	    case '`':
575 	      PATPUSH (begbuf);
576 	      break;
577 
578 	    case '\'':
579 	      PATPUSH (endbuf);
580 	      break;
581 
582 	    case '1':
583 	    case '2':
584 	    case '3':
585 	    case '4':
586 	    case '5':
587 	    case '6':
588 	    case '7':
589 	    case '8':
590 	    case '9':
591 	      c1 = c - '0';
592 	      if (c1 >= regnum)
593 		goto normal_char;
594 	      for (stackt = stackp - 2;  stackt > stackb;  stackt -= 4)
595  		if (*stackt == c1)
596 		  goto normal_char;
597 	      laststart = b;
598 	      PATPUSH (duplicate);
599 	      PATPUSH (c1);
600 	      break;
601 
602 	    case '+':
603 	    case '?':
604 	      if (obscure_syntax & RE_BK_PLUS_QM)
605 		goto handle_plus;
606 
607 	    default:
608 	    normal_backsl:
609 	      /* You might think it would be useful for \ to mean
610 		 not to translate; but if we don't translate it
611 		 it will never match anything.  */
612 	      if (translate) c = translate[c];
613 	      goto normal_char;
614 	    }
615 	  break;
616 
617 	default:
618 	normal_char:
619 	  if (!pending_exact || pending_exact + *pending_exact + 1 != b
620 	      || *pending_exact == 0177 || *p == '*' || *p == '^'
621 	      || ((obscure_syntax & RE_BK_PLUS_QM)
622 		  ? *p == '\\' && (p[1] == '+' || p[1] == '?')
623 		  : (*p == '+' || *p == '?')))
624 	    {
625 	      laststart = b;
626 	      PATPUSH (exactn);
627 	      pending_exact = b;
628 	      PATPUSH (0);
629 	    }
630 	  PATPUSH (c);
631 	  (*pending_exact)++;
632 	}
633     }
634 
635   if (fixup_jump)
636     store_jump (fixup_jump, jump, b);
637 
638   if (stackp != stackb) goto unmatched_open;
639 
640   bufp->used = b - bufp->buffer;
641   return 0;
642 
643  invalid_pattern:
644   return "Invalid regular expression";
645 
646  unmatched_open:
647   return "Unmatched \\(";
648 
649  unmatched_close:
650   return "Unmatched \\)";
651 
652  end_of_pattern:
653   return "Premature end of regular expression";
654 
655  nesting_too_deep:
656   return "Nesting too deep";
657 
658  too_big:
659   return "Regular expression too big";
660 
661  memory_exhausted:
662   return "Memory exhausted";
663 }
664 
665 /* Store where `from' points a jump operation to jump to where `to' points.
666   `opcode' is the opcode to store. */
667 
668 static int
store_jump(from,opcode,to)669 store_jump (from, opcode, to)
670      char *from, *to;
671      char opcode;
672 {
673   from[0] = opcode;
674   from[1] = (to - (from + 3)) & 0377;
675   from[2] = (to - (from + 3)) >> 8;
676 }
677 
678 /* Open up space at char FROM, and insert there a jump to TO.
679    CURRENT_END gives te end of the storage no in use,
680    so we know how much data to copy up.
681    OP is the opcode of the jump to insert.
682 
683    If you call this function, you must zero out pending_exact.  */
684 
685 static int
insert_jump(op,from,to,current_end)686 insert_jump (op, from, to, current_end)
687      char op;
688      char *from, *to, *current_end;
689 {
690   register char *pto = current_end + 3;
691   register char *pfrom = current_end;
692   while (pfrom != from)
693     *--pto = *--pfrom;
694   store_jump (from, op, to);
695 }
696 
697 /* Given a pattern, compute a fastmap from it.
698  The fastmap records which of the (1 << BYTEWIDTH) possible characters
699  can start a string that matches the pattern.
700  This fastmap is used by re_search to skip quickly over totally implausible text.
701 
702  The caller must supply the address of a (1 << BYTEWIDTH)-byte data area
703  as bufp->fastmap.
704  The other components of bufp describe the pattern to be used.  */
705 
706 void
re_compile_fastmap(bufp)707 re_compile_fastmap (bufp)
708      struct re_pattern_buffer *bufp;
709 {
710   unsigned char *pattern = (unsigned char *) bufp->buffer;
711   int size = bufp->used;
712   register char *fastmap = bufp->fastmap;
713   register unsigned char *p = pattern;
714   register unsigned char *pend = pattern + size;
715   register int j, k;
716   unsigned char *translate = (unsigned char *) bufp->translate;
717 
718   unsigned char *stackb[NFAILURES];
719   unsigned char **stackp = stackb;
720 
721   bzero (fastmap, (1 << BYTEWIDTH));
722   bufp->fastmap_accurate = 1;
723   bufp->can_be_null = 0;
724 
725   while (p)
726     {
727       if (p == pend)
728 	{
729 	  bufp->can_be_null = 1;
730 	  break;
731 	}
732 #ifdef SWITCH_ENUM_BUG
733       switch ((int) ((enum regexpcode) *p++))
734 #else
735       switch ((enum regexpcode) *p++)
736 #endif
737 	{
738 	case exactn:
739 	  if (translate)
740 	    fastmap[translate[p[1]]] = 1;
741 	  else
742 	    fastmap[p[1]] = 1;
743 	  break;
744 
745         case begline:
746         case before_dot:
747 	case at_dot:
748 	case after_dot:
749 	case begbuf:
750 	case endbuf:
751 	case wordbound:
752 	case notwordbound:
753 	case wordbeg:
754 	case wordend:
755 	  continue;
756 
757 	case endline:
758 	  if (translate)
759 	    fastmap[translate['\n']] = 1;
760 	  else
761 	    fastmap['\n'] = 1;
762 	  if (bufp->can_be_null != 1)
763 	    bufp->can_be_null = 2;
764 	  break;
765 
766 	case finalize_jump:
767 	case maybe_finalize_jump:
768 	case jump:
769 	case dummy_failure_jump:
770 	  bufp->can_be_null = 1;
771 	  j = *p++ & 0377;
772 	  j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
773 	  p += j + 1;		/* The 1 compensates for missing ++ above */
774 	  if (j > 0)
775 	    continue;
776 	  /* Jump backward reached implies we just went through
777 	     the body of a loop and matched nothing.
778 	     Opcode jumped to should be an on_failure_jump.
779 	     Just treat it like an ordinary jump.
780 	     For a * loop, it has pushed its failure point already;
781 	     if so, discard that as redundant.  */
782 	  if ((enum regexpcode) *p != on_failure_jump)
783 	    continue;
784 	  p++;
785 	  j = *p++ & 0377;
786 	  j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
787 	  p += j + 1;		/* The 1 compensates for missing ++ above */
788 	  if (stackp != stackb && *stackp == p)
789 	    stackp--;
790 	  continue;
791 
792 	case on_failure_jump:
793 	  j = *p++ & 0377;
794 	  j += SIGN_EXTEND_CHAR (*(char *)p) << 8;
795 	  p++;
796 	  *++stackp = p + j;
797 	  continue;
798 
799 	case start_memory:
800 	case stop_memory:
801 	  p++;
802 	  continue;
803 
804 	case duplicate:
805 	  bufp->can_be_null = 1;
806 	  fastmap['\n'] = 1;
807 	case anychar:
808 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
809 	    if (j != '\n')
810 	      fastmap[j] = 1;
811 	  if (bufp->can_be_null)
812 	    return;
813 	  /* Don't return; check the alternative paths
814 	     so we can set can_be_null if appropriate.  */
815 	  break;
816 
817 	case wordchar:
818 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
819 	    if (SYNTAX (j) == Sword)
820 	      fastmap[j] = 1;
821 	  break;
822 
823 	case notwordchar:
824 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
825 	    if (SYNTAX (j) != Sword)
826 	      fastmap[j] = 1;
827 	  break;
828 
829 #ifdef emacs
830 	case syntaxspec:
831 	  k = *p++;
832 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
833 	    if (SYNTAX (j) == (enum syntaxcode) k)
834 	      fastmap[j] = 1;
835 	  break;
836 
837 	case notsyntaxspec:
838 	  k = *p++;
839 	  for (j = 0; j < (1 << BYTEWIDTH); j++)
840 	    if (SYNTAX (j) != (enum syntaxcode) k)
841 	      fastmap[j] = 1;
842 	  break;
843 #endif /* emacs */
844 
845 	case charset:
846 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
847 	    if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
848 	      {
849 		if (translate)
850 		  fastmap[translate[j]] = 1;
851 		else
852 		  fastmap[j] = 1;
853 	      }
854 	  break;
855 
856 	case charset_not:
857 	  /* Chars beyond end of map must be allowed */
858 	  for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
859 	    if (translate)
860 	      fastmap[translate[j]] = 1;
861 	    else
862 	      fastmap[j] = 1;
863 
864 	  for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
865 	    if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
866 	      {
867 		if (translate)
868 		  fastmap[translate[j]] = 1;
869 		else
870 		  fastmap[j] = 1;
871 	      }
872 	  break;
873 	}
874 
875       /* Get here means we have successfully found the possible starting characters
876 	 of one path of the pattern.  We need not follow this path any farther.
877 	 Instead, look at the next alternative remembered in the stack. */
878       if (stackp != stackb)
879 	p = *stackp--;
880       else
881 	break;
882     }
883 }
884 
885 /* Like re_search_2, below, but only one string is specified. */
886 
887 int
re_search(pbufp,string,size,startpos,range,regs)888 re_search (pbufp, string, size, startpos, range, regs)
889      struct re_pattern_buffer *pbufp;
890      char *string;
891      int size, startpos, range;
892      struct re_registers *regs;
893 {
894   return re_search_2 (pbufp, 0, 0, string, size, startpos, range, regs, size);
895 }
896 
897 /* Like re_match_2 but tries first a match starting at index STARTPOS,
898    then at STARTPOS + 1, and so on.
899    RANGE is the number of places to try before giving up.
900    If RANGE is negative, the starting positions tried are
901     STARTPOS, STARTPOS - 1, etc.
902    It is up to the caller to make sure that range is not so large
903    as to take the starting position outside of the input strings.
904 
905 The value returned is the position at which the match was found,
906  or -1 if no match was found,
907  or -2 if error (such as failure stack overflow).  */
908 
909 int
re_search_2(pbufp,string1,size1,string2,size2,startpos,range,regs,mstop)910 re_search_2 (pbufp, string1, size1, string2, size2, startpos, range, regs, mstop)
911      struct re_pattern_buffer *pbufp;
912      char *string1, *string2;
913      int size1, size2;
914      int startpos;
915      register int range;
916      struct re_registers *regs;
917      int mstop;
918 {
919   register char *fastmap = pbufp->fastmap;
920   register unsigned char *translate = (unsigned char *) pbufp->translate;
921   int total = size1 + size2;
922   int val;
923 
924   /* Update the fastmap now if not correct already */
925   if (fastmap && !pbufp->fastmap_accurate)
926     re_compile_fastmap (pbufp);
927 
928   /* Don't waste time in a long search for a pattern
929      that says it is anchored.  */
930   if (pbufp->used > 0 && (enum regexpcode) pbufp->buffer[0] == begbuf
931       && range > 0)
932     {
933       if (startpos > 0)
934 	return -1;
935       else
936 	range = 1;
937     }
938 
939   while (1)
940     {
941       /* If a fastmap is supplied, skip quickly over characters
942 	 that cannot possibly be the start of a match.
943 	 Note, however, that if the pattern can possibly match
944 	 the null string, we must test it at each starting point
945 	 so that we take the first null string we get.  */
946 
947       if (fastmap && startpos < total && pbufp->can_be_null != 1)
948 	{
949 	  if (range > 0)
950 	    {
951 	      register int lim = 0;
952 	      register unsigned char *p;
953 	      int irange = range;
954 	      if (startpos < size1 && startpos + range >= size1)
955 		lim = range - (size1 - startpos);
956 
957 	      p = ((unsigned char *)
958 		   &(startpos >= size1 ? string2 - size1 : string1)[startpos]);
959 
960 	      if (translate)
961 		{
962 		  while (range > lim && !fastmap[translate[*p++]])
963 		    range--;
964 		}
965 	      else
966 		{
967 		  while (range > lim && !fastmap[*p++])
968 		    range--;
969 		}
970 	      startpos += irange - range;
971 	    }
972 	  else
973 	    {
974 	      register unsigned char c;
975 	      if (startpos >= size1)
976 		c = string2[startpos - size1];
977 	      else
978 		c = string1[startpos];
979 	      c &= 0xff;
980 	      if (translate ? !fastmap[translate[c]] : !fastmap[c])
981 		goto advance;
982 	    }
983 	}
984 
985       if (range >= 0 && startpos == total
986 	  && fastmap && pbufp->can_be_null == 0)
987 	return -1;
988 
989       val = re_match_2 (pbufp, string1, size1, string2, size2, startpos, regs, mstop);
990       if (0 <= val)
991 	{
992 	  if (val == -2)
993 	    return -2;
994 	  return startpos;
995 	}
996 
997 #ifdef C_ALLOCA
998       alloca (0);
999 #endif /* C_ALLOCA */
1000 
1001     advance:
1002       if (!range) break;
1003       if (range > 0) range--, startpos++; else range++, startpos--;
1004     }
1005   return -1;
1006 }
1007 
1008 #ifndef emacs   /* emacs never uses this */
1009 int
re_match(pbufp,string,size,pos,regs)1010 re_match (pbufp, string, size, pos, regs)
1011      struct re_pattern_buffer *pbufp;
1012      char *string;
1013      int size, pos;
1014      struct re_registers *regs;
1015 {
1016   return re_match_2 (pbufp, 0, 0, string, size, pos, regs, size);
1017 }
1018 #endif /* emacs */
1019 
1020 /* Maximum size of failure stack.  Beyond this, overflow is an error.  */
1021 
1022 int re_max_failures = 2000;
1023 
1024 static int bcmp_translate();
1025 /* Match the pattern described by PBUFP
1026    against data which is the virtual concatenation of STRING1 and STRING2.
1027    SIZE1 and SIZE2 are the sizes of the two data strings.
1028    Start the match at position POS.
1029    Do not consider matching past the position MSTOP.
1030 
1031    If pbufp->fastmap is nonzero, then it had better be up to date.
1032 
1033    The reason that the data to match are specified as two components
1034    which are to be regarded as concatenated
1035    is so this function can be used directly on the contents of an Emacs buffer.
1036 
1037    -1 is returned if there is no match.  -2 is returned if there is
1038    an error (such as match stack overflow).  Otherwise the value is the length
1039    of the substring which was matched.  */
1040 
1041 int
re_match_2(pbufp,string1,size1,string2,size2,pos,regs,mstop)1042 re_match_2 (pbufp, string1, size1, string2, size2, pos, regs, mstop)
1043      struct re_pattern_buffer *pbufp;
1044      unsigned char *string1, *string2;
1045      int size1, size2;
1046      int pos;
1047      struct re_registers *regs;
1048      int mstop;
1049 {
1050   register unsigned char *p = (unsigned char *) pbufp->buffer;
1051   register unsigned char *pend = p + pbufp->used;
1052   /* End of first string */
1053   unsigned char *end1;
1054   /* End of second string */
1055   unsigned char *end2;
1056   /* Pointer just past last char to consider matching */
1057   unsigned char *end_match_1, *end_match_2;
1058   register unsigned char *d, *dend;
1059   register int mcnt;
1060   unsigned char *translate = (unsigned char *) pbufp->translate;
1061 
1062  /* Failure point stack.  Each place that can handle a failure further down the line
1063     pushes a failure point on this stack.  It consists of two char *'s.
1064     The first one pushed is where to resume scanning the pattern;
1065     the second pushed is where to resume scanning the strings.
1066     If the latter is zero, the failure point is a "dummy".
1067     If a failure happens and the innermost failure point is dormant,
1068     it discards that failure point and tries the next one. */
1069 
1070   unsigned char *initial_stack[2 * NFAILURES];
1071   unsigned char **stackb = initial_stack;
1072   unsigned char **stackp = stackb, **stacke = &stackb[2 * NFAILURES];
1073 
1074   /* Information on the "contents" of registers.
1075      These are pointers into the input strings; they record
1076      just what was matched (on this attempt) by some part of the pattern.
1077      The start_memory command stores the start of a register's contents
1078      and the stop_memory command stores the end.
1079 
1080      At that point, regstart[regnum] points to the first character in the register,
1081      regend[regnum] points to the first character beyond the end of the register,
1082      regstart_seg1[regnum] is true iff regstart[regnum] points into string1,
1083      and regend_seg1[regnum] is true iff regend[regnum] points into string1.  */
1084 
1085   unsigned char *regstart[RE_NREGS];
1086   unsigned char *regend[RE_NREGS];
1087   unsigned char regstart_seg1[RE_NREGS], regend_seg1[RE_NREGS];
1088 
1089   /* Set up pointers to ends of strings.
1090      Don't allow the second string to be empty unless both are empty.  */
1091   if (!size2)
1092     {
1093       string2 = string1;
1094       size2 = size1;
1095       string1 = 0;
1096       size1 = 0;
1097     }
1098   end1 = string1 + size1;
1099   end2 = string2 + size2;
1100 
1101   /* Compute where to stop matching, within the two strings */
1102   if (mstop <= size1)
1103     {
1104       end_match_1 = string1 + mstop;
1105       end_match_2 = string2;
1106     }
1107   else
1108     {
1109       end_match_1 = end1;
1110       end_match_2 = string2 + mstop - size1;
1111     }
1112 
1113   /* Initialize \) text positions to -1
1114      to mark ones that no \( or \) has been seen for.  */
1115 
1116   for (mcnt = 0; mcnt < sizeof (regend) / sizeof (*regend); mcnt++)
1117     regend[mcnt] = (unsigned char *) -1;
1118 
1119   /* `p' scans through the pattern as `d' scans through the data.
1120      `dend' is the end of the input string that `d' points within.
1121      `d' is advanced into the following input string whenever necessary,
1122      but this happens before fetching;
1123      therefore, at the beginning of the loop,
1124      `d' can be pointing at the end of a string,
1125      but it cannot equal string2.  */
1126 
1127   if (pos <= size1)
1128     d = string1 + pos, dend = end_match_1;
1129   else
1130     d = string2 + pos - size1, dend = end_match_2;
1131 
1132 /* Write PREFETCH; just before fetching a character with *d.  */
1133 #define PREFETCH \
1134  while (d == dend)						    \
1135   { if (dend == end_match_2) goto fail;  /* end of string2 => failure */   \
1136     d = string2;  /* end of string1 => advance to string2. */       \
1137     dend = end_match_2; }
1138 
1139   /* This loop loops over pattern commands.
1140      It exits by returning from the function if match is complete,
1141      or it drops through if match fails at this starting point in the input data. */
1142 
1143   while (1)
1144     {
1145       if (p == pend)
1146 	/* End of pattern means we have succeeded! */
1147 	{
1148 	  /* If caller wants register contents data back, convert it to indices */
1149 	  if (regs)
1150 	    {
1151  	      regs->start[0] = pos;
1152  	      if (dend == end_match_1)
1153  		regs->end[0] = d - string1;
1154  	      else
1155  		regs->end[0] = d - string2 + size1;
1156  	      for (mcnt = 1; mcnt < RE_NREGS; mcnt++)
1157 		{
1158 		  if (regend[mcnt] == (unsigned char *) -1)
1159 		    {
1160 		      regs->start[mcnt] = -1;
1161 		      regs->end[mcnt] = -1;
1162 		      continue;
1163 		    }
1164  		  if (regstart_seg1[mcnt])
1165 		    regs->start[mcnt] = regstart[mcnt] - string1;
1166 		  else
1167 		    regs->start[mcnt] = regstart[mcnt] - string2 + size1;
1168  		  if (regend_seg1[mcnt])
1169 		    regs->end[mcnt] = regend[mcnt] - string1;
1170 		  else
1171 		    regs->end[mcnt] = regend[mcnt] - string2 + size1;
1172 		}
1173 	    }
1174  	  if (dend == end_match_1)
1175 	    return (d - string1 - pos);
1176 	  else
1177 	    return d - string2 + size1 - pos;
1178 	}
1179 
1180       /* Otherwise match next pattern command */
1181 #ifdef SWITCH_ENUM_BUG
1182       switch ((int) ((enum regexpcode) *p++))
1183 #else
1184       switch ((enum regexpcode) *p++)
1185 #endif
1186 	{
1187 
1188 	/* \( is represented by a start_memory, \) by a stop_memory.
1189 	    Both of those commands contain a "register number" argument.
1190 	    The text matched within the \( and \) is recorded under that number.
1191 	    Then, \<digit> turns into a `duplicate' command which
1192 	    is followed by the numeric value of <digit> as the register number. */
1193 
1194 	case start_memory:
1195 	  regstart[*p] = d;
1196  	  regstart_seg1[*p++] = (dend == end_match_1);
1197 	  break;
1198 
1199 	case stop_memory:
1200 	  regend[*p] = d;
1201  	  regend_seg1[*p++] = (dend == end_match_1);
1202 	  break;
1203 
1204 	case duplicate:
1205 	  {
1206 	    int regno = *p++;   /* Get which register to match against */
1207 	    register unsigned char *d2, *dend2;
1208 
1209 	    d2 = regstart[regno];
1210  	    dend2 = ((regstart_seg1[regno] == regend_seg1[regno])
1211 		     ? regend[regno] : end_match_1);
1212 	    while (1)
1213 	      {
1214 		/* Advance to next segment in register contents, if necessary */
1215 		while (d2 == dend2)
1216 		  {
1217 		    if (dend2 == end_match_2) break;
1218 		    if (dend2 == regend[regno]) break;
1219 		    d2 = string2, dend2 = regend[regno];  /* end of string1 => advance to string2. */
1220 		  }
1221 		/* At end of register contents => success */
1222 		if (d2 == dend2) break;
1223 
1224 		/* Advance to next segment in data being matched, if necessary */
1225 		PREFETCH;
1226 
1227 		/* mcnt gets # consecutive chars to compare */
1228 		mcnt = dend - d;
1229 		if (mcnt > dend2 - d2)
1230 		  mcnt = dend2 - d2;
1231 		/* Compare that many; failure if mismatch, else skip them. */
1232 		if (translate ? bcmp_translate (d, d2, mcnt, translate) : bcmp (d, d2, mcnt))
1233 		  goto fail;
1234 		d += mcnt, d2 += mcnt;
1235 	      }
1236 	  }
1237 	  break;
1238 
1239 	case anychar:
1240 	  /* fetch a data character */
1241 	  PREFETCH;
1242 	  /* Match anything but a newline.  */
1243 	  if ((translate ? translate[*d++] : *d++) == '\n')
1244 	    goto fail;
1245 	  break;
1246 
1247 	case charset:
1248 	case charset_not:
1249 	  {
1250 	    /* Nonzero for charset_not */
1251 	    int not = 0;
1252 	    register int c;
1253 	    if (*(p - 1) == (unsigned char) charset_not)
1254 	      not = 1;
1255 
1256 	    /* fetch a data character */
1257 	    PREFETCH;
1258 
1259 	    if (translate)
1260 	      c = translate [*d];
1261 	    else
1262 	      c = *d;
1263 
1264 	    if (c < *p * BYTEWIDTH
1265 		&& p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1266 	      not = !not;
1267 
1268 	    p += 1 + *p;
1269 
1270 	    if (!not) goto fail;
1271 	    d++;
1272 	    break;
1273 	  }
1274 
1275 	case begline:
1276 	  if (d == string1 || d[-1] == '\n')
1277 	    break;
1278 	  goto fail;
1279 
1280 	case endline:
1281 	  if (d == end2
1282 	      || (d == end1 ? (size2 == 0 || *string2 == '\n') : *d == '\n'))
1283 	    break;
1284 	  goto fail;
1285 
1286 	/* "or" constructs ("|") are handled by starting each alternative
1287 	    with an on_failure_jump that points to the start of the next alternative.
1288 	    Each alternative except the last ends with a jump to the joining point.
1289 	    (Actually, each jump except for the last one really jumps
1290 	     to the following jump, because tensioning the jumps is a hassle.) */
1291 
1292 	/* The start of a stupid repeat has an on_failure_jump that points
1293 	   past the end of the repeat text.
1294 	   This makes a failure point so that, on failure to match a repetition,
1295 	   matching restarts past as many repetitions have been found
1296 	   with no way to fail and look for another one.  */
1297 
1298 	/* A smart repeat is similar but loops back to the on_failure_jump
1299 	   so that each repetition makes another failure point. */
1300 
1301 	case on_failure_jump:
1302 	  if (stackp == stacke)
1303 	    {
1304 	      unsigned char **stackx;
1305 	      if (stacke - stackb > re_max_failures * 2)
1306 		return -2;
1307 	      stackx = (unsigned char **) alloca (2 * (stacke - stackb)
1308 					 * sizeof (char *));
1309 	      bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1310 	      stackp = stackx + (stackp - stackb);
1311 	      stacke = stackx + 2 * (stacke - stackb);
1312 	      stackb = stackx;
1313 	    }
1314 	  mcnt = *p++ & 0377;
1315 	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1316 	  p++;
1317 	  *stackp++ = mcnt + p;
1318 	  *stackp++ = d;
1319 	  break;
1320 
1321 	/* The end of a smart repeat has an maybe_finalize_jump back.
1322 	   Change it either to a finalize_jump or an ordinary jump. */
1323 
1324 	case maybe_finalize_jump:
1325 	  mcnt = *p++ & 0377;
1326 	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1327 	  p++;
1328 	  {
1329 	    register unsigned char *p2 = p;
1330 	    /* Compare what follows with the begining of the repeat.
1331 	       If we can establish that there is nothing that they would
1332 	       both match, we can change to finalize_jump */
1333 	    while (p2 != pend
1334 		   && (*p2 == (unsigned char) stop_memory
1335 		       || *p2 == (unsigned char) start_memory))
1336 	      p2++;
1337 	    if (p2 == pend)
1338 	      p[-3] = (unsigned char) finalize_jump;
1339 	    else if (*p2 == (unsigned char) exactn
1340 		     || *p2 == (unsigned char) endline)
1341 	      {
1342 		register int c = *p2 == (unsigned char) endline ? '\n' : p2[2];
1343 		register unsigned char *p1 = p + mcnt;
1344 		/* p1[0] ... p1[2] are an on_failure_jump.
1345 		   Examine what follows that */
1346 		if (p1[3] == (unsigned char) exactn && p1[5] != c)
1347 		  p[-3] = (unsigned char) finalize_jump;
1348 		else if (p1[3] == (unsigned char) charset
1349 			 || p1[3] == (unsigned char) charset_not)
1350 		  {
1351 		    int not = p1[3] == (unsigned char) charset_not;
1352 		    if (c < p1[4] * BYTEWIDTH
1353 			&& p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
1354 		      not = !not;
1355 		    /* not is 1 if c would match */
1356 		    /* That means it is not safe to finalize */
1357 		    if (!not)
1358 		      p[-3] = (unsigned char) finalize_jump;
1359 		  }
1360 	      }
1361 	  }
1362 	  p -= 2;
1363 	  if (p[-1] != (unsigned char) finalize_jump)
1364 	    {
1365 	      p[-1] = (unsigned char) jump;
1366 	      goto nofinalize;
1367 	    }
1368 
1369 	/* The end of a stupid repeat has a finalize-jump
1370 	   back to the start, where another failure point will be made
1371 	   which will point after all the repetitions found so far. */
1372 
1373 	case finalize_jump:
1374 	  stackp -= 2;
1375 
1376 	case jump:
1377 	nofinalize:
1378 	  mcnt = *p++ & 0377;
1379 	  mcnt += SIGN_EXTEND_CHAR (*(char *)p) << 8;
1380 	  p += mcnt + 1;	/* The 1 compensates for missing ++ above */
1381 	  break;
1382 
1383 	case dummy_failure_jump:
1384 	  if (stackp == stacke)
1385 	    {
1386 	      unsigned char **stackx
1387 		= (unsigned char **) alloca (2 * (stacke - stackb)
1388 					     * sizeof (char *));
1389 	      bcopy (stackb, stackx, (stacke - stackb) * sizeof (char *));
1390 	      stackp = stackx + (stackp - stackb);
1391 	      stacke = stackx + 2 * (stacke - stackb);
1392 	      stackb = stackx;
1393 	    }
1394 	  *stackp++ = 0;
1395 	  *stackp++ = 0;
1396 	  goto nofinalize;
1397 
1398 	case wordbound:
1399 	  if (d == string1  /* Points to first char */
1400 	      || d == end2  /* Points to end */
1401 	      || (d == end1 && size2 == 0)) /* Points to end */
1402 	    break;
1403 	  if ((SYNTAX (d[-1]) == Sword)
1404 	      != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1405 	    break;
1406 	  goto fail;
1407 
1408 	case notwordbound:
1409 	  if (d == string1  /* Points to first char */
1410 	      || d == end2  /* Points to end */
1411 	      || (d == end1 && size2 == 0)) /* Points to end */
1412 	    goto fail;
1413 	  if ((SYNTAX (d[-1]) == Sword)
1414 	      != (SYNTAX (d == end1 ? *string2 : *d) == Sword))
1415 	    goto fail;
1416 	  break;
1417 
1418 	case wordbeg:
1419 	  if (d == end2  /* Points to end */
1420 	      || (d == end1 && size2 == 0) /* Points to end */
1421 	      || SYNTAX (* (d == end1 ? string2 : d)) != Sword) /* Next char not a letter */
1422 	    goto fail;
1423 	  if (d == string1  /* Points to first char */
1424 	      || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
1425 	    break;
1426 	  goto fail;
1427 
1428 	case wordend:
1429 	  if (d == string1  /* Points to first char */
1430 	      || SYNTAX (d[-1]) != Sword)  /* prev char not letter */
1431 	    goto fail;
1432 	  if (d == end2  /* Points to end */
1433 	      || (d == end1 && size2 == 0) /* Points to end */
1434 	      || SYNTAX (d == end1 ? *string2 : *d) != Sword) /* Next char not a letter */
1435 	    break;
1436 	  goto fail;
1437 
1438 #ifdef emacs
1439 	case before_dot:
1440 	  if (((d - string2 <= (unsigned) size2)
1441 	       ? d - bf_p2 : d - bf_p1)
1442 	      <= point)
1443 	    goto fail;
1444 	  break;
1445 
1446 	case at_dot:
1447 	  if (((d - string2 <= (unsigned) size2)
1448 	       ? d - bf_p2 : d - bf_p1)
1449 	      == point)
1450 	    goto fail;
1451 	  break;
1452 
1453 	case after_dot:
1454 	  if (((d - string2 <= (unsigned) size2)
1455 	       ? d - bf_p2 : d - bf_p1)
1456 	      >= point)
1457 	    goto fail;
1458 	  break;
1459 
1460 	case wordchar:
1461 	  mcnt = (int) Sword;
1462 	  goto matchsyntax;
1463 
1464 	case syntaxspec:
1465 	  mcnt = *p++;
1466 	matchsyntax:
1467 	  PREFETCH;
1468 	  if (SYNTAX (*d++) != (enum syntaxcode) mcnt) goto fail;
1469 	  break;
1470 
1471 	case notwordchar:
1472 	  mcnt = (int) Sword;
1473 	  goto matchnotsyntax;
1474 
1475 	case notsyntaxspec:
1476 	  mcnt = *p++;
1477 	matchnotsyntax:
1478 	  PREFETCH;
1479 	  if (SYNTAX (*d++) == (enum syntaxcode) mcnt) goto fail;
1480 	  break;
1481 #else
1482 	case wordchar:
1483 	  PREFETCH;
1484 	  if (SYNTAX (*d++) == 0) goto fail;
1485 	  break;
1486 
1487 	case notwordchar:
1488 	  PREFETCH;
1489 	  if (SYNTAX (*d++) != 0) goto fail;
1490 	  break;
1491 #endif /* not emacs */
1492 
1493 	case begbuf:
1494 	  if (d == string1)	/* Note, d cannot equal string2 */
1495 	    break;		/* unless string1 == string2.  */
1496 	  goto fail;
1497 
1498 	case endbuf:
1499 	  if (d == end2 || (d == end1 && size2 == 0))
1500 	    break;
1501 	  goto fail;
1502 
1503 	case exactn:
1504 	  /* Match the next few pattern characters exactly.
1505 	     mcnt is how many characters to match. */
1506 	  mcnt = *p++;
1507 	  if (translate)
1508 	    {
1509 	      do
1510 		{
1511 		  PREFETCH;
1512 		  if (translate[*d++] != *p++) goto fail;
1513 		}
1514 	      while (--mcnt);
1515 	    }
1516 	  else
1517 	    {
1518 	      do
1519 		{
1520 		  PREFETCH;
1521 		  if (*d++ != *p++) goto fail;
1522 		}
1523 	      while (--mcnt);
1524 	    }
1525 	  break;
1526 	}
1527       continue;    /* Successfully matched one pattern command; keep matching */
1528 
1529       /* Jump here if any matching operation fails. */
1530     fail:
1531       if (stackp != stackb)
1532 	/* A restart point is known.  Restart there and pop it. */
1533 	{
1534 	  if (!stackp[-2])
1535 	    {   /* If innermost failure point is dormant, flush it and keep looking */
1536 	      stackp -= 2;
1537 	      goto fail;
1538 	    }
1539 	  d = *--stackp;
1540 	  p = *--stackp;
1541 	  if (d >= string1 && d <= end1)
1542 	    dend = end_match_1;
1543 	}
1544       else break;   /* Matching at this starting point really fails! */
1545     }
1546   return -1;         /* Failure to match */
1547 }
1548 
1549 static int
bcmp_translate(s1,s2,len,translate)1550 bcmp_translate (s1, s2, len, translate)
1551      unsigned char *s1, *s2;
1552      register int len;
1553      unsigned char *translate;
1554 {
1555   register unsigned char *p1 = s1, *p2 = s2;
1556   while (len)
1557     {
1558       if (translate [*p1++] != translate [*p2++]) return 1;
1559       len--;
1560     }
1561   return 0;
1562 }
1563 
1564 /* Entry points compatible with bsd4.2 regex library */
1565 
1566 #ifndef emacs
1567 
1568 static struct re_pattern_buffer re_comp_buf;
1569 
1570 char *
re_comp(s)1571 re_comp (s)
1572      char *s;
1573 {
1574   if (!s)
1575     {
1576       if (!re_comp_buf.buffer)
1577 	return "No previous regular expression";
1578       return 0;
1579     }
1580 
1581   if (!re_comp_buf.buffer)
1582     {
1583       if (!(re_comp_buf.buffer = (char *) malloc (200)))
1584 	return "Memory exhausted";
1585       re_comp_buf.allocated = 200;
1586       if (!(re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH)))
1587 	return "Memory exhausted";
1588     }
1589   return re_compile_pattern (s, strlen (s), &re_comp_buf);
1590 }
1591 
1592 int
re_exec(s)1593 re_exec (s)
1594      char *s;
1595 {
1596   int len = strlen (s);
1597   return 0 <= re_search (&re_comp_buf, s, len, 0, len, 0);
1598 }
1599 
1600 #endif /* emacs */
1601 
1602 #ifdef test
1603 
1604 #include <stdio.h>
1605 
1606 /* Indexed by a character, gives the upper case equivalent of the character */
1607 
1608 static char upcase[0400] =
1609   { 000, 001, 002, 003, 004, 005, 006, 007,
1610     010, 011, 012, 013, 014, 015, 016, 017,
1611     020, 021, 022, 023, 024, 025, 026, 027,
1612     030, 031, 032, 033, 034, 035, 036, 037,
1613     040, 041, 042, 043, 044, 045, 046, 047,
1614     050, 051, 052, 053, 054, 055, 056, 057,
1615     060, 061, 062, 063, 064, 065, 066, 067,
1616     070, 071, 072, 073, 074, 075, 076, 077,
1617     0100, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1618     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1619     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1620     0130, 0131, 0132, 0133, 0134, 0135, 0136, 0137,
1621     0140, 0101, 0102, 0103, 0104, 0105, 0106, 0107,
1622     0110, 0111, 0112, 0113, 0114, 0115, 0116, 0117,
1623     0120, 0121, 0122, 0123, 0124, 0125, 0126, 0127,
1624     0130, 0131, 0132, 0173, 0174, 0175, 0176, 0177,
1625     0200, 0201, 0202, 0203, 0204, 0205, 0206, 0207,
1626     0210, 0211, 0212, 0213, 0214, 0215, 0216, 0217,
1627     0220, 0221, 0222, 0223, 0224, 0225, 0226, 0227,
1628     0230, 0231, 0232, 0233, 0234, 0235, 0236, 0237,
1629     0240, 0241, 0242, 0243, 0244, 0245, 0246, 0247,
1630     0250, 0251, 0252, 0253, 0254, 0255, 0256, 0257,
1631     0260, 0261, 0262, 0263, 0264, 0265, 0266, 0267,
1632     0270, 0271, 0272, 0273, 0274, 0275, 0276, 0277,
1633     0300, 0301, 0302, 0303, 0304, 0305, 0306, 0307,
1634     0310, 0311, 0312, 0313, 0314, 0315, 0316, 0317,
1635     0320, 0321, 0322, 0323, 0324, 0325, 0326, 0327,
1636     0330, 0331, 0332, 0333, 0334, 0335, 0336, 0337,
1637     0340, 0341, 0342, 0343, 0344, 0345, 0346, 0347,
1638     0350, 0351, 0352, 0353, 0354, 0355, 0356, 0357,
1639     0360, 0361, 0362, 0363, 0364, 0365, 0366, 0367,
1640     0370, 0371, 0372, 0373, 0374, 0375, 0376, 0377
1641   };
1642 
main(argc,argv)1643 main (argc, argv)
1644      int argc;
1645      char **argv;
1646 {
1647   char pat[80];
1648   struct re_pattern_buffer buf;
1649   int i;
1650   char c;
1651   char fastmap[(1 << BYTEWIDTH)];
1652 
1653   /* Allow a command argument to specify the style of syntax.  */
1654   if (argc > 1)
1655     obscure_syntax = atoi (argv[1]);
1656 
1657   buf.allocated = 40;
1658   buf.buffer = (char *) malloc (buf.allocated);
1659   buf.fastmap = fastmap;
1660   buf.translate = upcase;
1661 
1662   while (1)
1663     {
1664       gets (pat);
1665 
1666       if (*pat)
1667 	{
1668           re_compile_pattern (pat, strlen(pat), &buf);
1669 
1670 	  for (i = 0; i < buf.used; i++)
1671 	    printchar (buf.buffer[i]);
1672 
1673 	  putchar ('\n');
1674 
1675 	  printf ("%d allocated, %d used.\n", buf.allocated, buf.used);
1676 
1677 	  re_compile_fastmap (&buf);
1678 	  printf ("Allowed by fastmap: ");
1679 	  for (i = 0; i < (1 << BYTEWIDTH); i++)
1680 	    if (fastmap[i]) printchar (i);
1681 	  putchar ('\n');
1682 	}
1683 
1684       gets (pat);	/* Now read the string to match against */
1685 
1686       i = re_match (&buf, pat, strlen (pat), 0, 0);
1687       printf ("Match value %d.\n", i);
1688     }
1689 }
1690 
1691 #ifdef NOTDEF
1692 print_buf (bufp)
1693      struct re_pattern_buffer *bufp;
1694 {
1695   int i;
1696 
1697   printf ("buf is :\n----------------\n");
1698   for (i = 0; i < bufp->used; i++)
1699     printchar (bufp->buffer[i]);
1700 
1701   printf ("\n%d allocated, %d used.\n", bufp->allocated, bufp->used);
1702 
1703   printf ("Allowed by fastmap: ");
1704   for (i = 0; i < (1 << BYTEWIDTH); i++)
1705     if (bufp->fastmap[i])
1706       printchar (i);
1707   printf ("\nAllowed by translate: ");
1708   if (bufp->translate)
1709     for (i = 0; i < (1 << BYTEWIDTH); i++)
1710       if (bufp->translate[i])
1711 	printchar (i);
1712   printf ("\nfastmap is%s accurate\n", bufp->fastmap_accurate ? "" : "n't");
1713   printf ("can %s be null\n----------", bufp->can_be_null ? "" : "not");
1714 }
1715 #endif
1716 
printchar(c)1717 printchar (c)
1718      char c;
1719 {
1720   if (c < 041 || c >= 0177)
1721     {
1722       putchar ('\\');
1723       putchar (((c >> 6) & 3) + '0');
1724       putchar (((c >> 3) & 7) + '0');
1725       putchar ((c & 7) + '0');
1726     }
1727   else
1728     putchar (c);
1729 }
1730 
error(string)1731 error (string)
1732      char *string;
1733 {
1734   puts (string);
1735   exit (1);
1736 }
1737 
1738 #endif /* test */
1739