1 /* -*-C-*-
2 
3 Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
4     1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
5     2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Massachusetts
6     Institute of Technology
7 
8 This file is part of MIT/GNU Scheme.
9 
10 MIT/GNU Scheme is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or (at
13 your option) any later version.
14 
15 MIT/GNU Scheme is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with MIT/GNU Scheme; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
23 USA.
24 
25 */
26 
27 /* Regular expression matching and search */
28 
29 /* NOTE: This program was created by translation from the regular
30    expression code of GNU Emacs; it was translated from the original C
31    to 68000 assembly language (in 1986), and then translated back from
32    68000 assembly language to C (in 1987).  */
33 
34 #include "scheme.h"
35 #include "syntax.h"
36 #include "regex.h"
37 
38 #if defined(__IRIX__) || defined(_AIX)
39 #define SIGN_EXTEND_CHAR(x)						\
40   ((((int) (x)) >= 0x80) ? (((int) (x)) - 0x100) : ((int) (x)))
41 #endif
42 
43 #ifndef SIGN_EXTEND_CHAR
44 #define SIGN_EXTEND_CHAR(x) (x)
45 #endif /* not SIGN_EXTEND_CHAR */
46 
47 #ifndef SWITCH_ENUM
48 #define SWITCH_ENUM(enum_type, expression)				\
49   switch ((enum enum_type) (expression))
50 #endif /* not SWITCH_ENUM */
51 
52 #ifndef RE_NFAILURES
53 #define RE_NFAILURES 512
54 #endif
55 
56 #define FOR_INDEX_RANGE(index, start, end)				\
57   for (index = (start); (index < (end)); index += 1)
58 
59 #define FOR_INDEX_BELOW(index, limit)					\
60   FOR_INDEX_RANGE (index, 0, (limit))
61 
62 #define FOR_ALL_ASCII(index)						\
63   FOR_INDEX_BELOW (index, MAX_ASCII)
64 
65 #define FOR_ALL_ASCII_SUCH_THAT(index, expression)			\
66   FOR_ALL_ASCII (index)							\
67     if (expression)
68 
69 #define TRANSLATE_CHAR(ascii)						\
70   ((translation == NULL) ? (ascii) : (translation [(ascii)]))
71 
72 #define WORD_CONSTITUENT_P(ascii)					\
73   (SYNTAX_CONSTITUENT_P (syntaxcode_word, (ascii)))
74 
75 #define SYNTAX_CONSTITUENT_P(code, ascii)				\
76   ((SYNTAX_ENTRY_CODE (SYNTAX_TABLE_REF (syntax_table, (ascii)))) == (code))
77 
78 #define CHAR_SET_MEMBER_P(length, char_set, ascii)			\
79   (((ascii) < ((length) * ASCII_LENGTH)) &&				\
80    (CHAR_SET_MEMBER_P_INTERNAL (char_set, ascii)))
81 
82 #define CHAR_SET_MEMBER_P_INTERNAL(char_set, ascii)			\
83   ((((char_set) [((ascii) / ASCII_LENGTH)]) &				\
84     (1 << ((ascii) % ASCII_LENGTH)))					\
85    != 0)
86 
87 #define READ_PATTERN_CHAR(target) do					\
88 {									\
89   if (pattern_pc >= pattern_end)					\
90     BAD_PATTERN ();							\
91   (target) = (*pattern_pc++);						\
92 } while (0)
93 
94 #define READ_PATTERN_OFFSET(target) do					\
95 {									\
96   signed char _fetched;							\
97   if ((pattern_pc + 1) >= pattern_end)					\
98     BAD_PATTERN ();							\
99   (target) = (*pattern_pc++);						\
100   _fetched = (* ((signed char *) (pattern_pc++)));			\
101   (target) += ((SIGN_EXTEND_CHAR (_fetched)) << ASCII_LENGTH);		\
102   if (((pattern_pc + (target)) < pattern_start) ||			\
103       ((pattern_pc + (target)) > pattern_end))				\
104     BAD_PATTERN ();							\
105 } while (0)
106 
107 #define READ_PATTERN_LENGTH(target) do					\
108 {									\
109   int _len;								\
110 									\
111   if (pattern_pc >= pattern_end)					\
112     BAD_PATTERN ();							\
113   _len = ((int) (*pattern_pc++));					\
114   if ((pattern_pc + _len) > pattern_end)				\
115     BAD_PATTERN ();							\
116   (target) = _len;							\
117 } while (0)
118 
119 #define READ_PATTERN_REGISTER(target) do				\
120 {									\
121   if ((pattern_pc >= pattern_end) ||					\
122       (((target) = (*pattern_pc++)) >= RE_NREGS))			\
123     BAD_PATTERN ();							\
124 } while (0)
125 
126 #define READ_PATTERN_SYNTAXCODE(target) do				\
127 {									\
128   if ((pattern_pc >= pattern_end) ||					\
129       (((int) ((target) = ((enum syntaxcode) (*pattern_pc++))))		\
130        >= ((int) syntaxcode_max)))					\
131     BAD_PATTERN ();							\
132 } while (0)
133 
134 #define BAD_PATTERN() RE_RETURN (-2)
135 
136 #define PUSH_FAILURE_POINT(pattern_pc, match_pc) do			\
137 {									\
138   if (stack_pointer == stack_end)					\
139     {									\
140       long stack_length;						\
141       unsigned char **stack_temporary;					\
142 									\
143       stack_length = ((stack_end - stack_start) * 2);			\
144       if (stack_length > (re_max_failures * 2))				\
145 	RE_RETURN (-4);							\
146       stack_temporary =							\
147 	((unsigned char **)						\
148 	 (realloc							\
149 	  (stack_start, (stack_length * (sizeof (unsigned char *))))));	\
150       if (stack_temporary == NULL)					\
151 	RE_RETURN (-3);							\
152       stack_end = (& (stack_temporary [stack_length]));			\
153       stack_pointer =							\
154 	(& (stack_temporary [(stack_pointer - stack_start)]));		\
155       stack_start = stack_temporary;					\
156     }									\
157   (*stack_pointer++) = (pattern_pc);					\
158   (*stack_pointer++) = (match_pc);					\
159 } while (0)
160 
161 #define RE_RETURN(value)						\
162 {									\
163   return_value = (value);						\
164   goto return_point;							\
165 }
166 
167 void
re_buffer_initialize(struct re_buffer * buffer,unsigned char * translation,SYNTAX_TABLE_TYPE syntax_table,unsigned char * text,unsigned long text_start_index,unsigned long text_end_index,unsigned long gap_start_index,unsigned long gap_end_index)168 re_buffer_initialize (struct re_buffer * buffer,
169        unsigned char * translation,
170        SYNTAX_TABLE_TYPE syntax_table,
171        unsigned char * text,
172        unsigned long text_start_index,
173        unsigned long text_end_index,
174        unsigned long gap_start_index,
175        unsigned long gap_end_index)
176 {
177   unsigned char *text_start, *text_end, *gap_start, *gap_end;
178 
179   /* Assumes that
180      ((text_start_index <= gap_start_index) &&
181       (gap_start_index <= gap_end_index) &&
182       (gap_end_index <= text_end_index)) */
183 
184   text_start = (text + text_start_index);
185   text_end = (text + text_end_index);
186   gap_start = (text + gap_start_index);
187   gap_end = (text + gap_end_index);
188 
189   (buffer -> translation) = translation;
190   (buffer -> syntax_table) = syntax_table;
191   (buffer -> text) = text;
192   (buffer -> text_start) = ((text_start == gap_start) ? gap_end : text_start);
193   (buffer -> text_end) = ((text_end == gap_end) ? gap_start : text_end);
194   (buffer -> gap_start) = gap_start;
195   (buffer -> gap_end) = gap_end;
196   return;
197 }
198 
199 /* Given a compiled pattern between `pattern_start' and `pattern_end',
200    generate a character set which is true of all characters which can
201    be the first character of a match.
202 
203    See the documentation of `struct re_buffer' for a description of
204    `translation' and `syntax_table'.
205 
206    `fastmap' is the resulting character set.  It is a character array
207    whose elements are either `FASTMAP_FALSE' or `FASTMAP_TRUE'.
208 
209    Return values:
210    0 => pattern cannot match the null string.
211    1 => pattern can match the null string.
212    2 => pattern can match the null string, but only at end of match
213      text or to left of a character in `fastmap'.
214    -2 => the pattern is improperly formed.
215    else => undefined. */
216 
217 #define FASTMAP_FALSE '\0'
218 #define FASTMAP_TRUE '\1'
219 
220 int
re_compile_fastmap(unsigned char * pattern_start,unsigned char * pattern_end,unsigned char * translation,SYNTAX_TABLE_TYPE syntax_table,unsigned char * fastmap)221 re_compile_fastmap (unsigned char * pattern_start,
222        unsigned char * pattern_end,
223        unsigned char * translation,
224        SYNTAX_TABLE_TYPE syntax_table,
225        unsigned char * fastmap)
226 {
227   unsigned char *pattern_pc;
228   unsigned char *stack_start[RE_NFAILURES];
229   unsigned char **stack_pointer;
230   int return_value;
231 
232   pattern_pc = pattern_start;
233   return_value = 0;
234   stack_pointer = stack_start;
235 
236   {
237     int i;
238 
239     FOR_ALL_ASCII (i)
240       (fastmap [i]) = FASTMAP_FALSE;
241   }
242 
243  loop:
244   if (pattern_pc >= pattern_end)
245     RE_RETURN (1);
246 
247   SWITCH_ENUM (regexpcode, (*pattern_pc++))
248     {
249     case regexpcode_unused:
250     case regexpcode_line_start:
251     case regexpcode_buffer_start:
252     case regexpcode_buffer_end:
253     case regexpcode_word_start:
254     case regexpcode_word_end:
255     case regexpcode_word_bound:
256     case regexpcode_not_word_bound:
257       goto loop;
258 
259     case regexpcode_line_end:
260       {
261 	(fastmap [(TRANSLATE_CHAR ('\n'))]) = FASTMAP_TRUE;
262 	if (return_value == 0)
263 	  return_value = 2;
264 	goto next;
265       }
266 
267     case regexpcode_exact_1:
268       {
269 	int ascii;
270 
271 	READ_PATTERN_CHAR (ascii);
272 	(fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
273 	goto next;
274       }
275 
276     case regexpcode_exact_n:
277       {
278 	int length;
279 
280 	READ_PATTERN_LENGTH (length);
281 	if (length == 0)
282 	  goto loop;
283 	(fastmap [(TRANSLATE_CHAR (pattern_pc [0]))]) = FASTMAP_TRUE;
284 	goto next;
285       }
286 
287     case regexpcode_any_char:
288       {
289 	int ascii;
290 
291 	FOR_ALL_ASCII_SUCH_THAT (ascii, (ascii != '\n'))
292 	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
293 	if (return_value != 0)
294 	  goto return_point;
295 	goto next;
296       }
297 
298     case regexpcode_char_set:
299       {
300 	int length;
301 	int ascii;
302 
303 	READ_PATTERN_LENGTH (length);
304 	length = (length * ASCII_LENGTH);
305 	FOR_INDEX_BELOW (ascii, length)
306 	  if (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii))
307 	    (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
308 	goto next;
309       }
310 
311     case regexpcode_not_char_set:
312       {
313 	int length;
314 	int ascii;
315 
316 	READ_PATTERN_LENGTH (length);
317 	length = (length * ASCII_LENGTH);
318 	FOR_INDEX_BELOW (ascii, length)
319 	  if (! (CHAR_SET_MEMBER_P_INTERNAL (pattern_pc, ascii)))
320 	    (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
321 	FOR_INDEX_RANGE (ascii, length, MAX_ASCII)
322 	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
323 	goto next;
324       }
325 
326     case regexpcode_word_char:
327       {
328 	int ascii;
329 
330 	FOR_ALL_ASCII_SUCH_THAT (ascii, (WORD_CONSTITUENT_P (ascii)))
331 	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
332 	goto next;
333       }
334 
335     case regexpcode_not_word_char:
336       {
337 	int ascii;
338 
339 	FOR_ALL_ASCII_SUCH_THAT (ascii, (! (WORD_CONSTITUENT_P (ascii))))
340 	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
341 	goto next;
342       }
343 
344     case regexpcode_syntax_spec:
345       {
346 	enum syntaxcode code;
347 	int ascii;
348 
349 	READ_PATTERN_SYNTAXCODE (code);
350 	FOR_ALL_ASCII_SUCH_THAT (ascii, (SYNTAX_CONSTITUENT_P (code, ascii)))
351 	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
352 	goto next;
353       }
354 
355     case regexpcode_not_syntax_spec:
356       {
357 	enum syntaxcode code;
358 	int ascii;
359 
360 	READ_PATTERN_SYNTAXCODE (code);
361 	FOR_ALL_ASCII_SUCH_THAT (ascii,
362 				 (! (SYNTAX_CONSTITUENT_P (code, ascii))))
363 	  (fastmap [(TRANSLATE_CHAR (ascii))]) = FASTMAP_TRUE;
364 	goto next;
365       }
366 
367     case regexpcode_start_memory:
368     case regexpcode_stop_memory:
369       {
370 	int register_number;
371 
372 	READ_PATTERN_REGISTER (register_number);
373 	goto loop;
374       }
375 
376     case regexpcode_duplicate:
377       {
378 	int register_number;
379 	int ascii;
380 
381 	READ_PATTERN_REGISTER (register_number);
382 	FOR_ALL_ASCII (ascii)
383 	  (fastmap [ascii]) = FASTMAP_TRUE;
384 	RE_RETURN (1);
385       }
386 
387     case regexpcode_jump:
388     case regexpcode_finalize_jump:
389     case regexpcode_maybe_finalize_jump:
390     case regexpcode_dummy_failure_jump:
391       {
392 	int offset;
393 
394 	return_value = 1;
395 	READ_PATTERN_OFFSET (offset);
396 	pattern_pc += offset;
397 	if (offset > 0)
398 	  goto loop;
399 
400 	/* Jump backward reached implies we just went through the
401 	   body of a loop and matched nothing.  Opcode jumped to
402 	   should be an on_failure_jump.  Just treat it like an
403 	   ordinary jump.  For a * loop, it has pushed its failure
404 	   point already; if so, discard that as redundant. */
405 	if (pattern_pc >= pattern_end)
406 	  BAD_PATTERN ();
407 	if (((enum regexpcode) (pattern_pc [0])) !=
408 	    regexpcode_on_failure_jump)
409 	  goto loop;
410 	READ_PATTERN_OFFSET (offset);
411 	pattern_pc += offset;
412 	if ((stack_pointer != stack_start) &&
413 	    ((stack_pointer [-1]) == pattern_pc))
414 	  stack_pointer -= 1;
415 	goto loop;
416       }
417 
418     case regexpcode_on_failure_jump:
419       {
420 	int offset;
421 
422 	READ_PATTERN_OFFSET (offset);
423 	(*stack_pointer++) = (pattern_pc + offset);
424 	goto loop;
425       }
426 
427     default:
428       BAD_PATTERN ();
429     }
430 
431  next:
432   if (stack_pointer != stack_start)
433     {
434       pattern_pc = (*--stack_pointer);
435       goto loop;
436     }
437 
438  return_point:
439   return (return_value);
440 }
441 
442 /* Match the compiled pattern described by `pattern_start' and
443    `pattern_end' against the characters in `buffer' between
444    `match_start' and `match_end'.
445 
446    `registers', if not NULL, will be filled with the start and end
447    indices of the match registers if the match succeeds.
448 
449    It is assumed that the following is true:
450 
451    (! ((gap_start < gap_end) &&
452        (match_start < match_end) &&
453        ((match_start == gap_start) || (match_end == gap_end))))
454 
455    Return values:
456 
457    non-negative => the end index (exclusive) of the match.
458    -1 => no match.
459    -2 => the pattern is badly formed.
460    -3 => memory allocation error.
461    -4 => match stack overflow.
462    other => undefined. */
463 
464 #define RE_MATCH_FAILED (-1)
465 
466 /* This macro is used by search procedures to decide when a match at a
467    particular place has failed.  If true, the search continues by
468    advancing to the next match point.  */
469 #define RE_MATCH_FAILURE_RESULT(result)					\
470   (((result) == RE_MATCH_FAILED) || ((result) == (-4)))
471 
472 #define ADDRESS_TO_INDEX(address)					\
473   ((((address) > gap_start) ? ((address) - gap_length) : (address))	\
474    - (buffer -> text))
475 
476 #define READ_MATCH_CHAR(target) do					\
477 {									\
478   if (match_pc >= match_end)						\
479     goto re_match_fail;							\
480   (target) = (TRANSLATE_CHAR (*match_pc++));				\
481   if (match_pc == gap_start)						\
482     match_pc = gap_end;							\
483 } while (0)
484 
485 static bool
beq_translate(unsigned char * scan1,unsigned char * scan2,long length,unsigned char * translation)486 beq_translate (unsigned char * scan1,
487        unsigned char * scan2,
488        long length,
489        unsigned char * translation)
490 {
491   while ((length--) > 0)
492     if ((TRANSLATE_CHAR (*scan1++)) != (TRANSLATE_CHAR (*scan2++)))
493       return (false);
494   return (true);
495 }
496 
497 int re_max_failures = 1000;
498 
499 int
re_match(unsigned char * pattern_start,unsigned char * pattern_end,struct re_buffer * buffer,struct re_registers * registers,unsigned char * match_start,unsigned char * match_end)500 re_match (unsigned char * pattern_start,
501        unsigned char * pattern_end,
502        struct re_buffer * buffer,
503        struct re_registers * registers,
504        unsigned char * match_start,
505        unsigned char * match_end)
506 {
507   unsigned char *pattern_pc, *match_pc;
508   unsigned char *gap_start, *gap_end;
509   unsigned char *translation;
510   SYNTAX_TABLE_TYPE syntax_table;
511   long gap_length;
512   int return_value;
513 
514   /* Failure point stack.  Each place that can handle a failure
515      further down the line pushes a failure point on this stack.  It
516      consists of two char *'s.  The first one pushed is where to
517      resume scanning the pattern; the second pushed is where to resume
518      scanning the match text.  If the latter is NULL, the failure
519      point is a "dummy".  If a failure happens and the innermost
520      failure point is dormant, it discards that failure point and
521      tries the next one. */
522 
523   unsigned char **stack_start, **stack_end, **stack_pointer;
524 
525   /* Information on the "contents" of registers.  These are pointers
526      into the match text; they record just what was matched (on this
527      attempt) by some part of the pattern.  The start_memory command
528      stores the start of a register's contents and the stop_memory
529      command stores the end.
530 
531      At that point, (register_start [regnum]) points to the first
532      character in the register, and (register_end [regnum]) points to
533      the first character beyond the end of the register. */
534 
535   unsigned char *register_start[RE_NREGS];
536   unsigned char *register_end[RE_NREGS];
537 
538   pattern_pc = pattern_start;
539   match_pc = match_start;
540   gap_start = (buffer -> gap_start);
541   gap_end = (buffer -> gap_end);
542   gap_length = (gap_end - gap_start);
543   translation = (buffer -> translation);
544   syntax_table = (buffer -> syntax_table);
545 
546   stack_start = (malloc ((2 * RE_NFAILURES) * (sizeof (*stack_start))));
547   if (stack_start == NULL)
548     RE_RETURN (-3);
549 
550   stack_end = (& (stack_start [(2 * RE_NFAILURES)]));
551   stack_pointer = stack_start;
552 
553   {
554     int i;
555 
556     FOR_INDEX_BELOW (i, RE_NREGS)
557       {
558 	(register_start [i]) = NULL;
559 	(register_end [i]) = NULL;
560       }
561   }
562 
563  re_match_loop:
564   if (pattern_pc >= pattern_end)
565     {
566       /* Reaching here indicates that match was successful. */
567       if (registers != NULL)
568 	{
569 	  int i;
570 
571 	  (register_start [0]) = match_start;
572 	  (register_end [0]) = match_pc;
573 	  FOR_INDEX_BELOW (i, RE_NREGS)
574 	    {
575 	      ((registers -> start) [i]) =
576 		(((register_start [i]) == NULL)
577 		 ? -1
578 		 : (ADDRESS_TO_INDEX (register_start [i])));
579 	      ((registers -> end) [i]) =
580 		(((register_end [i]) == NULL)
581 		 ? -1
582 		 : (ADDRESS_TO_INDEX (register_end [i])));
583 	    }
584 	}
585       RE_RETURN (ADDRESS_TO_INDEX (match_pc));
586     }
587 
588   SWITCH_ENUM (regexpcode, (*pattern_pc++))
589     {
590     case regexpcode_unused:
591       goto re_match_loop;
592 
593     case regexpcode_exact_1:
594       {
595 	int ascii;
596 	int ascii_p;
597 
598 	READ_MATCH_CHAR (ascii);
599 	READ_PATTERN_CHAR (ascii_p);
600 	if (ascii == ascii_p)
601 	  goto re_match_loop;
602 	goto re_match_fail;
603       }
604 
605     case regexpcode_exact_n:
606       {
607 	int length;
608 	int ascii;
609 
610 	READ_PATTERN_LENGTH (length);
611 	while ((length--) > 0)
612 	  {
613 	    READ_MATCH_CHAR (ascii);
614 	    if (ascii != (*pattern_pc++))
615 	      goto re_match_fail;
616 	  }
617 	goto re_match_loop;
618       }
619 
620     case regexpcode_any_char:
621       {
622 	int ascii;
623 
624 	READ_MATCH_CHAR (ascii);
625 	if (ascii == '\n')
626 	  goto re_match_fail;
627 	goto re_match_loop;
628       }
629 
630 #define RE_MATCH_CHAR_SET(winning_label, losing_label)			\
631       {									\
632 	int ascii;							\
633 	int length;							\
634 									\
635 	READ_MATCH_CHAR (ascii);					\
636 	READ_PATTERN_LENGTH (length);					\
637 	if (CHAR_SET_MEMBER_P (length, pattern_pc, ascii))		\
638 	  {								\
639 	    pattern_pc += length;					\
640 	    goto winning_label;						\
641 	  }								\
642 	else								\
643 	  {								\
644 	    pattern_pc += length;					\
645 	    goto losing_label;						\
646 	  }								\
647       }
648 
649     case regexpcode_char_set:
650       RE_MATCH_CHAR_SET (re_match_loop, re_match_fail);
651 
652     case regexpcode_not_char_set:
653       RE_MATCH_CHAR_SET (re_match_fail, re_match_loop);
654 
655 #undef RE_MATCH_CHAR_SET
656 
657     /* \( is represented by a start_memory, \) by a stop_memory.  Both
658        of those commands contain a "register number" argument.  The
659        text matched within the \( and \) is recorded under that
660        number.  Then, \<digit> turns into a `duplicate' command which
661        is followed by the numeric value of <digit> as the register
662        number. */
663 
664     case regexpcode_start_memory:
665       {
666 	int register_number;
667 
668 	READ_PATTERN_REGISTER (register_number);
669 	(register_start [register_number]) = match_pc;
670 	goto re_match_loop;
671       }
672 
673     case regexpcode_stop_memory:
674       {
675 	int register_number;
676 
677 	READ_PATTERN_REGISTER (register_number);
678 	(register_end [register_number]) =
679 	  ((match_pc == gap_end) ? gap_start : match_pc);
680 	goto re_match_loop;
681       }
682 
683     case regexpcode_duplicate:
684       {
685 	int register_number;
686 	unsigned char *start, *end, *new_end;
687 	long length;
688 
689 	READ_PATTERN_REGISTER (register_number);
690 	start = (register_start [register_number]);
691 	end = (register_end [register_number]);
692 	length = (end - start);
693 	if (length <= 0)
694 	  goto re_match_loop;
695 	new_end = (match_pc + length);
696 	if (new_end > match_end)
697 	  goto re_match_fail;
698 	if ((match_pc <= gap_start) && (new_end > gap_start))
699 	  {
700 	    long length1, length2;
701 
702 	    new_end += gap_length;
703 	    if (new_end > match_end)
704 	      goto re_match_fail;
705 	    length1 = (gap_start - match_pc);
706 	    length2 = (length - length1);
707 	    if (!
708 		((beq_translate (match_pc, start, length1, translation)) &&
709 		 (beq_translate (gap_end, (start + length1), length2,
710 				 translation))))
711 	      goto re_match_fail;
712 	  }
713 	else if ((start <= gap_start) && (end > gap_start))
714 	  {
715 	    long length1, length2;
716 
717 	    length1 = (gap_start - start);
718 	    length2 = (end - gap_end);
719 	    if (!
720 		((beq_translate (match_pc, start, length1, translation)) &&
721 		 (beq_translate ((match_pc + length1), gap_end, length2,
722 				 translation))))
723 	      goto re_match_fail;
724 	  }
725 	else
726 	  {
727 	    if (! (beq_translate (match_pc, start, length, translation)))
728 	      goto re_match_fail;
729 	  }
730 	match_pc = ((new_end == gap_start) ? gap_end : new_end);
731 	goto re_match_loop;
732       }
733 
734     case regexpcode_buffer_start:
735       {
736 	if ((ADDRESS_TO_INDEX (match_pc))
737 	    == (ADDRESS_TO_INDEX (buffer -> text_start)))
738 	  goto re_match_loop;
739 	goto re_match_fail;
740       }
741 
742     case regexpcode_buffer_end:
743       {
744 	if ((ADDRESS_TO_INDEX (match_pc))
745 	    == (ADDRESS_TO_INDEX (buffer -> text_end)))
746 	  goto re_match_loop;
747 	goto re_match_fail;
748       }
749 
750     case regexpcode_line_start:
751       {
752 	if ((ADDRESS_TO_INDEX (match_pc))
753 	    == (ADDRESS_TO_INDEX (buffer -> text_start)))
754 	  goto re_match_loop;
755 	if ((TRANSLATE_CHAR
756 	     (((match_pc == gap_end) ? gap_start : match_pc) [-1]))
757 	    == '\n')
758 	  goto re_match_loop;
759 	goto re_match_fail;
760       }
761 
762     case regexpcode_line_end:
763       {
764 	if (((ADDRESS_TO_INDEX (match_pc))
765 	     == (ADDRESS_TO_INDEX (buffer -> text_end)))
766 	    || ((TRANSLATE_CHAR (match_pc [0])) == '\n'))
767 	  goto re_match_loop;
768 	goto re_match_fail;
769       }
770 
771 #define RE_MATCH_WORD_BOUND(word_bound_p)				\
772   if ((match_pc == gap_end)						\
773       ? (word_bound_p							\
774 	 (((gap_start != (buffer -> text_start))			\
775 	   && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_start[-1])))),	\
776 	  ((gap_end != (buffer -> text_end))				\
777 	   && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (gap_end[0]))))))	\
778       : (word_bound_p							\
779 	 (((match_pc != (buffer -> text_start))				\
780 	   && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc[-1])))),	\
781 	  ((match_pc != (buffer -> text_end))				\
782 	   && (WORD_CONSTITUENT_P (TRANSLATE_CHAR (match_pc[0])))))))	\
783       goto re_match_loop;						\
784     goto re_match_fail
785 
786     case regexpcode_word_bound:
787 #define WORD_BOUND_P(left_p, right_p) ((left_p) != (right_p))
788       RE_MATCH_WORD_BOUND (WORD_BOUND_P);
789 #undef WORD_BOUND_P
790 
791     case regexpcode_not_word_bound:
792 #define NOT_WORD_BOUND_P(left_p, right_p) ((left_p) == (right_p))
793       RE_MATCH_WORD_BOUND (NOT_WORD_BOUND_P);
794 #undef NOT_WORD_BOUND_P
795 
796     case regexpcode_word_start:
797 #define WORD_START_P(left_p, right_p) ((! (left_p)) && (right_p))
798       RE_MATCH_WORD_BOUND (WORD_START_P);
799 #undef WORD_START_P
800 
801     case regexpcode_word_end:
802 #define WORD_END_P(left_p, right_p) ((left_p) && (! (right_p)))
803       RE_MATCH_WORD_BOUND (WORD_END_P);
804 #undef WORD_END_P
805 
806 #undef RE_MATCH_WORD_BOUND
807 
808     case regexpcode_syntax_spec:
809       {
810 	int ascii;
811 	enum syntaxcode code;
812 
813 	READ_MATCH_CHAR (ascii);
814 	READ_PATTERN_SYNTAXCODE (code);
815 	if (SYNTAX_CONSTITUENT_P (code, ascii))
816 	  goto re_match_loop;
817 	goto re_match_fail;
818       }
819 
820     case regexpcode_not_syntax_spec:
821       {
822 	int ascii;
823 	enum syntaxcode code;
824 
825 	READ_MATCH_CHAR (ascii);
826 	READ_PATTERN_SYNTAXCODE (code);
827 	if (! (SYNTAX_CONSTITUENT_P (code, ascii)))
828 	  goto re_match_loop;
829 	goto re_match_fail;
830       }
831 
832     case regexpcode_word_char:
833       {
834 	int ascii;
835 
836 	READ_MATCH_CHAR (ascii);
837 	if (WORD_CONSTITUENT_P (ascii))
838 	  goto re_match_loop;
839 	goto re_match_fail;
840       }
841 
842     case regexpcode_not_word_char:
843       {
844 	int ascii;
845 
846 	READ_MATCH_CHAR (ascii);
847 	if (! (WORD_CONSTITUENT_P (ascii)))
848 	  goto re_match_loop;
849 	goto re_match_fail;
850       }
851 
852     /* "or" constructs ("|") are handled by starting each alternative
853        with an on_failure_jump that points to the start of the next
854        alternative.  Each alternative except the last ends with a jump
855        to the joining point.  (Actually, each jump except for the last
856        one really jumps to the following jump, because tensioning the
857        jumps is a hassle.)
858 
859        The start of a stupid repeat has an on_failure_jump that points
860        past the end of the repeat text.  This makes a failure point so
861        that, on failure to match a repetition, matching restarts past
862        as many repetitions have been found with no way to fail and
863        look for another one.
864 
865        A smart repeat is similar but loops back to the on_failure_jump
866        so that each repetition makes another failure point. */
867 
868     case regexpcode_on_failure_jump:
869       {
870 	long offset;
871 
872 	READ_PATTERN_OFFSET (offset);
873 	PUSH_FAILURE_POINT ((pattern_pc + offset), match_pc);
874 	goto re_match_loop;
875       }
876 
877     /* The end of a smart repeat has a maybe_finalize_jump back.
878        Change it either to a finalize_jump or an ordinary jump. */
879 
880     case regexpcode_maybe_finalize_jump:
881       {
882 	long offset;
883 	long ascii;
884 
885 	READ_PATTERN_OFFSET (offset);
886 	if (pattern_pc == pattern_end)
887 	  goto finalize_jump;
888 
889 	/* Compare what follows with the beginning of the repeat.
890 	   If we can establish that there is nothing that they
891 	   would both match, we can change to `finalize_jump'. */
892 
893 	SWITCH_ENUM (regexpcode, (pattern_pc [0]))
894 	  {
895 	  case regexpcode_exact_1:
896 	    ascii = (pattern_pc [1]);
897 	    break;
898 
899 	  case regexpcode_exact_n:
900 	    ascii = (pattern_pc [2]);
901 	    break;
902 
903 	  case regexpcode_line_end:
904 	    ascii = ('\n');
905 	    break;
906 
907 	  default:
908 	    goto dont_finalize_jump;
909 	  }
910 
911 	/* (pattern_pc [(offset - 3)]) is an `on_failure_jump'.
912 	   Examine what follows that. */
913 	SWITCH_ENUM (regexpcode, (pattern_pc [offset]))
914 	  {
915 	  case regexpcode_exact_1:
916 	    {
917 	      if (ascii != (pattern_pc [(offset + 1)]))
918 		goto finalize_jump;
919 	      goto dont_finalize_jump;
920 	    }
921 
922 	  case regexpcode_exact_n:
923 	    {
924 	      if (ascii != (pattern_pc [(offset + 2)]))
925 		goto finalize_jump;
926 	      goto dont_finalize_jump;
927 	    }
928 
929 	  case regexpcode_char_set:
930 	    {
931 	      if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
932 				     (& (pattern_pc [(offset + 2)])),
933 				     ascii))
934 		goto dont_finalize_jump;
935 	      goto finalize_jump;
936 	    }
937 
938 	  case regexpcode_not_char_set:
939 	    {
940 	      if (CHAR_SET_MEMBER_P ((pattern_pc [(offset + 1)]),
941 				     (& (pattern_pc [(offset + 2)])),
942 				     ascii))
943 		goto finalize_jump;
944 	      goto dont_finalize_jump;
945 	    }
946 
947 	  default:
948 	    goto dont_finalize_jump;
949 	  }
950 
951       finalize_jump:
952 	pattern_pc -= 2;
953 	(pattern_pc [-1]) = ((unsigned char) regexpcode_finalize_jump);
954 	goto re_match_finalize_jump;
955 
956       dont_finalize_jump:
957 	pattern_pc -= 2;
958 	(pattern_pc [-1]) = ((unsigned char) regexpcode_jump);
959 	goto re_match_jump;
960       }
961 
962     case regexpcode_finalize_jump:
963     re_match_finalize_jump:
964       {
965 	stack_pointer -= 2;
966 	goto re_match_jump;
967       }
968 
969     case regexpcode_jump:
970     re_match_jump:
971       {
972 	long offset;
973 
974 	READ_PATTERN_OFFSET (offset);
975 	pattern_pc += offset;
976 	goto re_match_loop;
977       }
978 
979     case regexpcode_dummy_failure_jump:
980       {
981 	PUSH_FAILURE_POINT (NULL, NULL);
982 	goto re_match_jump;
983       }
984 
985     default:
986       {
987 	BAD_PATTERN ();
988       }
989     }
990 
991  re_match_fail:
992   if (stack_pointer == stack_start)
993     RE_RETURN (RE_MATCH_FAILED);
994   match_pc = (*--stack_pointer);
995   pattern_pc = (*--stack_pointer);
996   if (pattern_pc != NULL)
997     goto re_match_loop;
998   goto re_match_fail;
999 
1000  return_point:
1001   if (stack_start != NULL)
1002     free (stack_start);
1003   return (return_value);
1004 }
1005 
1006 #define DEFINE_RE_SEARCH(name)						\
1007 int									\
1008 name (unsigned char * pattern_start,					\
1009       unsigned char * pattern_end,					\
1010       struct re_buffer * buffer,					\
1011       struct re_registers * registers,					\
1012       unsigned char * match_start,					\
1013       unsigned char * match_end)
1014 
1015 #define INITIALIZE_RE_SEARCH(pc, limit, gap_limit)			\
1016   int can_be_null;							\
1017   unsigned char *translation;						\
1018   int match_result;							\
1019 									\
1020   unsigned char *match_pc;						\
1021   unsigned char *match_limit;						\
1022   unsigned char *gap_limit;						\
1023   unsigned char *fastmap;						\
1024   unsigned char fastmap_array[MAX_ASCII];				\
1025 									\
1026   fastmap = &fastmap_array[0];						\
1027   translation = (buffer -> translation);				\
1028   can_be_null =								\
1029     (re_compile_fastmap							\
1030      (pattern_start, pattern_end, translation,				\
1031       (buffer -> syntax_table), fastmap));				\
1032   if (can_be_null < 0)							\
1033     return (can_be_null);						\
1034 									\
1035   match_pc = (pc);							\
1036   match_limit = (limit);						\
1037   gap_limit = (buffer -> gap_limit)
1038 
1039 #define RE_SEARCH_TEST(start)						\
1040   (re_match								\
1041    (pattern_start, pattern_end, buffer, registers, (start), match_end))
1042 
1043 #define RE_SEARCH_FORWARD_FAST(limit) do				\
1044 {									\
1045   while (true)								\
1046     {									\
1047       if (match_pc >= (limit))						\
1048 	break;								\
1049 									\
1050       if ((fastmap [(TRANSLATE_CHAR (*match_pc++))]) == FASTMAP_FALSE)	\
1051 	continue;							\
1052 									\
1053       match_result = (RE_SEARCH_TEST (match_pc - 1));			\
1054       if (RE_MATCH_FAILURE_RESULT (match_result))			\
1055 	continue;							\
1056 									\
1057       return (match_result);						\
1058     }									\
1059 } while (0)
1060 
DEFINE_RE_SEARCH(re_search_forward)1061 DEFINE_RE_SEARCH (re_search_forward)
1062 {
1063   INITIALIZE_RE_SEARCH (match_start, match_end, gap_start);
1064 
1065   if (can_be_null != 1)
1066     {
1067       if ((match_pc < gap_start) && (gap_start < match_limit))
1068 	RE_SEARCH_FORWARD_FAST (gap_start);
1069       if (match_pc == gap_start)
1070 	match_pc = (buffer -> gap_end);
1071       RE_SEARCH_FORWARD_FAST (match_limit);
1072       return
1073 	((can_be_null == 0)
1074 	 ? RE_MATCH_FAILED
1075 	 : (RE_SEARCH_TEST (match_limit)));
1076     }
1077   else
1078     {
1079       while (true)
1080 	{
1081 	  match_result = (RE_SEARCH_TEST (match_pc));
1082 	  if (! (RE_MATCH_FAILURE_RESULT (match_result)))
1083 	    return (match_result);
1084 	  match_pc += 1;
1085 	  if (match_pc == gap_start)
1086 	    match_pc = (buffer -> gap_end);
1087 	  if (match_pc > match_limit)
1088 	    return (match_result);
1089 	}
1090     }
1091 }
1092 
1093 #define RE_SEARCH_BACKWARD_FAST(limit) do				\
1094 {									\
1095   while (true)								\
1096     {									\
1097       if (match_pc <= (limit))						\
1098 	break;								\
1099 									\
1100       if ((fastmap [(TRANSLATE_CHAR (*--match_pc))]) == FASTMAP_FALSE)	\
1101 	continue;							\
1102 									\
1103       match_result = (RE_SEARCH_TEST (match_pc));			\
1104       if (RE_MATCH_FAILURE_RESULT (match_result))			\
1105 	continue;							\
1106 									\
1107       RE_SEARCH_BACKWARD_RETURN (match_pc);				\
1108     }									\
1109 } while (0)
1110 
1111 #define RE_SEARCH_BACKWARD_RETURN(start)				\
1112   return								\
1113     ((match_result < 0)							\
1114      ? match_result							\
1115      : ((((start) > (buffer -> gap_start))				\
1116 	 ? ((start) - (gap_end - (buffer -> gap_start)))		\
1117 	 : (start))							\
1118 	- (buffer -> text)))
1119 
DEFINE_RE_SEARCH(re_search_backward)1120 DEFINE_RE_SEARCH (re_search_backward)
1121 {
1122   INITIALIZE_RE_SEARCH (match_end, match_start, gap_end);
1123 
1124   if (can_be_null != 1)
1125     {
1126       if ((match_pc > gap_end) && (gap_end > match_limit))
1127 	RE_SEARCH_BACKWARD_FAST (gap_end);
1128       if (match_pc == gap_end)
1129 	match_pc = (buffer -> gap_start);
1130       RE_SEARCH_BACKWARD_FAST (match_limit);
1131       if (can_be_null == 0)
1132 	return (RE_MATCH_FAILED);
1133       match_result = (RE_SEARCH_TEST (match_limit));
1134       RE_SEARCH_BACKWARD_RETURN (match_limit);
1135     }
1136   else
1137     {
1138       while (true)
1139 	{
1140 	  match_result = (RE_SEARCH_TEST (match_pc));
1141 	  if (! (RE_MATCH_FAILURE_RESULT (match_result)))
1142 	    RE_SEARCH_BACKWARD_RETURN (match_pc);
1143 	  if (match_pc == gap_end)
1144 	    match_pc = (buffer -> gap_start);
1145 	  match_pc -= 1;
1146 	  if (match_pc < match_limit)
1147 	    RE_SEARCH_BACKWARD_RETURN (match_pc);
1148 	}
1149     }
1150 }
1151