1 /* -*-C-*-
2 
3 Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994,
4     1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005,
5     2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Massachusetts
6     Institute of Technology
7 
8 This file is part of MIT/GNU Scheme.
9 
10 MIT/GNU Scheme is free software; you can redistribute it and/or modify
11 it under the terms of the GNU General Public License as published by
12 the Free Software Foundation; either version 2 of the License, or (at
13 your option) any later version.
14 
15 MIT/GNU Scheme is distributed in the hope that it will be useful, but
16 WITHOUT ANY WARRANTY; without even the implied warranty of
17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
18 General Public License for more details.
19 
20 You should have received a copy of the GNU General Public License
21 along with MIT/GNU Scheme; if not, write to the Free Software
22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301,
23 USA.
24 
25 */
26 
27 /* NOTE: This program was created by translation from the regular
28    expression code of GNU Emacs; it was translated from the original C
29    to 68000 assembly language (in 1986), and then translated back from
30    68000 assembly language to C (in 1987).  */
31 
32 /* Structure to represent a buffer of text to match against.
33    This contains the information that an editor buffer would have
34    to supply for the matching process to be executed.
35 
36    `translation' is an array of MAX_ASCII characters which is used to
37    map each character before matching.  Both the pattern and the match
38    text are mapped.  This is normally used to implement case
39    insensitive searches.
40 
41    `syntax_table' describes the syntax of the match text.  See the
42    syntax table primitives for more information.
43 
44    `text' points to the beginning of the match text.  It is used only
45    for translating match text pointers into indices.
46 
47    `text_start' and `text_end' delimit the match text.  They define
48    the buffer-start and buffer-end for those matching commands that
49    refer to them.  Also, all matching must take place within these
50    limits.
51 
52    `gap_start' and `gap_end' delimit a gap in the match text.  Editor
53    buffers normally have such a gap.  For applications without a gap,
54    it is recommended that these be set to the same value as
55    `text_end'.
56 
57    Both `text_start' and `gap_start' are inclusive indices, while
58    `text_end' and `gap_end' are exclusive.
59 
60    The following conditions must be true:
61 
62    (text <= text_start)
63    (text_start <= text_end)
64    (gap_start <= gap_end)
65    (! ((text_start < text_end) &&
66        (gap_start < gap_end) &&
67        ((text_start == gap_start) || (text_end == gap_end))))
68 
69    */
70 
71 struct re_buffer
72   {
73     unsigned char *translation;
74     SYNTAX_TABLE_TYPE syntax_table;
75     unsigned char *text;
76     unsigned char *text_start;
77     unsigned char *text_end;
78     unsigned char *gap_start;
79     unsigned char *gap_end;
80   };
81 
82 /* Structure to store "register" contents data in.
83 
84    Pass the address of such a structure as an argument to re_match,
85    etc., if you want this information back.
86 
87    start[i] and end[i] record the string matched by \( ... \) grouping
88    i, for i from 1 to RE_NREGS - 1.
89 
90    start[0] and end[0] record the entire string matched. */
91 
92 #define RE_NREGS 10
93 
94 struct re_registers
95   {
96     long start[RE_NREGS];
97     long end[RE_NREGS];
98   };
99 
100 /* These are the command codes that appear in compiled regular
101    expressions, one per byte.  Some command codes are followed by
102    argument bytes.  A command code can specify any interpretation
103    whatever for its arguments.  Zero-bytes may appear in the compiled
104    regular expression. */
105 
106 enum regexpcode
107   {
108     regexpcode_unused,
109     regexpcode_exact_1,		/* Followed by 1 literal byte */
110 
111     /* Followed by one byte giving n, and then by n literal bytes. */
112     regexpcode_exact_n,
113 
114     regexpcode_line_start,	/* Fails unless at beginning of line */
115     regexpcode_line_end,	/* Fails unless at end of line */
116 
117     /* Followed by two bytes giving relative address to jump to. */
118     regexpcode_jump,
119 
120     /* Followed by two bytes giving relative address of place to
121        resume at in case of failure. */
122     regexpcode_on_failure_jump,
123 
124     /* Throw away latest failure point and then jump to address. */
125     regexpcode_finalize_jump,
126 
127     /* Like jump but finalize if safe to do so.  This is used to jump
128        back to the beginning of a repeat.  If the command that follows
129        this jump is clearly incompatible with the one at the beginning
130        of the repeat, such that we can be sure that there is no use
131        backtracking out of repetitions already completed, then we
132        finalize. */
133     regexpcode_maybe_finalize_jump,
134 
135     /* jump, and push a dummy failure point.  This failure point will
136        be thrown away if an attempt is made to use it for a failure.
137        A + construct makes this before the first repeat. */
138     regexpcode_dummy_failure_jump,
139 
140     regexpcode_any_char,	/* Matches any one character */
141 
142     /* Matches any one char belonging to specified set.  First
143        following byte is # bitmap bytes.  Then come bytes for a
144        bit-map saying which chars are in.  Bits in each byte are
145        ordered low-bit-first.  A character is in the set if its bit is
146        1.  A character too large to have a bit in the map is
147        automatically not in the set. */
148     regexpcode_char_set,
149 
150     /* Similar but match any character that is NOT one of those
151        specified. */
152     regexpcode_not_char_set,
153 
154     /* Starts remembering the text that is matched and stores it in a
155        memory register.  Followed by one byte containing the register
156        number.  Register numbers must be in the range 0 through
157        (RE_NREGS - 1) inclusive.  */
158     regexpcode_start_memory,
159 
160     /* Stops remembering the text that is matched and stores it in a
161        memory register.  Followed by one byte containing the register
162        number.  Register numbers must be in the range 0 through
163        (RE_NREGS - 1) inclusive.  */
164     regexpcode_stop_memory,
165 
166     /* Match a duplicate of something remembered.  Followed by one
167        byte containing the index of the memory register. */
168     regexpcode_duplicate,
169 
170     regexpcode_buffer_start,	/* Succeeds if at beginning of buffer */
171     regexpcode_buffer_end,	/* Succeeds if at end of buffer */
172     regexpcode_word_char,	/* Matches any word-constituent character */
173 
174     /* Matches any char that is not a word-constituent. */
175     regexpcode_not_word_char,
176 
177     regexpcode_word_start,	/* Succeeds if at word beginning */
178     regexpcode_word_end,	/* Succeeds if at word end */
179     regexpcode_word_bound,	/* Succeeds if at a word boundary */
180     regexpcode_not_word_bound,	/* Succeeds if not at a word boundary */
181 
182     /* Matches any character whose syntax is specified.  Followed by a
183        byte which contains a syntax code, Sword or such like. */
184     regexpcode_syntax_spec,
185 
186     /* Matches any character whose syntax differs from the specified. */
187     regexpcode_not_syntax_spec
188   };
189 
190 extern void
191   re_buffer_initialize (struct re_buffer *, unsigned char *, SYNTAX_TABLE_TYPE,
192 	  unsigned char *, unsigned long, unsigned long,
193 	  unsigned long, unsigned long);
194 
195 extern int
196   re_compile_fastmap (unsigned char *, unsigned char *, unsigned char *,
197 	  SYNTAX_TABLE_TYPE, unsigned char *);
198 
199 extern int
200   re_match (unsigned char *, unsigned char *, struct re_buffer *,
201 	  struct re_registers *, unsigned char *, unsigned char *);
202 
203 extern int
204   re_search_forward (unsigned char *, unsigned char *, struct re_buffer *,
205 	  struct re_registers *, unsigned char *, unsigned char *);
206 
207 extern int
208   re_search_backward (unsigned char *, unsigned char *, struct re_buffer *,
209 	  struct re_registers *, unsigned char *, unsigned char *);
210