1 /* -*-C-*- 2 3 Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 4 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 5 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014 Massachusetts 6 Institute of Technology 7 8 This file is part of MIT/GNU Scheme. 9 10 MIT/GNU Scheme is free software; you can redistribute it and/or modify 11 it under the terms of the GNU General Public License as published by 12 the Free Software Foundation; either version 2 of the License, or (at 13 your option) any later version. 14 15 MIT/GNU Scheme is distributed in the hope that it will be useful, but 16 WITHOUT ANY WARRANTY; without even the implied warranty of 17 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 18 General Public License for more details. 19 20 You should have received a copy of the GNU General Public License 21 along with MIT/GNU Scheme; if not, write to the Free Software 22 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, 23 USA. 24 25 */ 26 27 /* NOTE: This program was created by translation from the regular 28 expression code of GNU Emacs; it was translated from the original C 29 to 68000 assembly language (in 1986), and then translated back from 30 68000 assembly language to C (in 1987). */ 31 32 /* Structure to represent a buffer of text to match against. 33 This contains the information that an editor buffer would have 34 to supply for the matching process to be executed. 35 36 `translation' is an array of MAX_ASCII characters which is used to 37 map each character before matching. Both the pattern and the match 38 text are mapped. This is normally used to implement case 39 insensitive searches. 40 41 `syntax_table' describes the syntax of the match text. See the 42 syntax table primitives for more information. 43 44 `text' points to the beginning of the match text. It is used only 45 for translating match text pointers into indices. 46 47 `text_start' and `text_end' delimit the match text. They define 48 the buffer-start and buffer-end for those matching commands that 49 refer to them. Also, all matching must take place within these 50 limits. 51 52 `gap_start' and `gap_end' delimit a gap in the match text. Editor 53 buffers normally have such a gap. For applications without a gap, 54 it is recommended that these be set to the same value as 55 `text_end'. 56 57 Both `text_start' and `gap_start' are inclusive indices, while 58 `text_end' and `gap_end' are exclusive. 59 60 The following conditions must be true: 61 62 (text <= text_start) 63 (text_start <= text_end) 64 (gap_start <= gap_end) 65 (! ((text_start < text_end) && 66 (gap_start < gap_end) && 67 ((text_start == gap_start) || (text_end == gap_end)))) 68 69 */ 70 71 struct re_buffer 72 { 73 unsigned char *translation; 74 SYNTAX_TABLE_TYPE syntax_table; 75 unsigned char *text; 76 unsigned char *text_start; 77 unsigned char *text_end; 78 unsigned char *gap_start; 79 unsigned char *gap_end; 80 }; 81 82 /* Structure to store "register" contents data in. 83 84 Pass the address of such a structure as an argument to re_match, 85 etc., if you want this information back. 86 87 start[i] and end[i] record the string matched by \( ... \) grouping 88 i, for i from 1 to RE_NREGS - 1. 89 90 start[0] and end[0] record the entire string matched. */ 91 92 #define RE_NREGS 10 93 94 struct re_registers 95 { 96 long start[RE_NREGS]; 97 long end[RE_NREGS]; 98 }; 99 100 /* These are the command codes that appear in compiled regular 101 expressions, one per byte. Some command codes are followed by 102 argument bytes. A command code can specify any interpretation 103 whatever for its arguments. Zero-bytes may appear in the compiled 104 regular expression. */ 105 106 enum regexpcode 107 { 108 regexpcode_unused, 109 regexpcode_exact_1, /* Followed by 1 literal byte */ 110 111 /* Followed by one byte giving n, and then by n literal bytes. */ 112 regexpcode_exact_n, 113 114 regexpcode_line_start, /* Fails unless at beginning of line */ 115 regexpcode_line_end, /* Fails unless at end of line */ 116 117 /* Followed by two bytes giving relative address to jump to. */ 118 regexpcode_jump, 119 120 /* Followed by two bytes giving relative address of place to 121 resume at in case of failure. */ 122 regexpcode_on_failure_jump, 123 124 /* Throw away latest failure point and then jump to address. */ 125 regexpcode_finalize_jump, 126 127 /* Like jump but finalize if safe to do so. This is used to jump 128 back to the beginning of a repeat. If the command that follows 129 this jump is clearly incompatible with the one at the beginning 130 of the repeat, such that we can be sure that there is no use 131 backtracking out of repetitions already completed, then we 132 finalize. */ 133 regexpcode_maybe_finalize_jump, 134 135 /* jump, and push a dummy failure point. This failure point will 136 be thrown away if an attempt is made to use it for a failure. 137 A + construct makes this before the first repeat. */ 138 regexpcode_dummy_failure_jump, 139 140 regexpcode_any_char, /* Matches any one character */ 141 142 /* Matches any one char belonging to specified set. First 143 following byte is # bitmap bytes. Then come bytes for a 144 bit-map saying which chars are in. Bits in each byte are 145 ordered low-bit-first. A character is in the set if its bit is 146 1. A character too large to have a bit in the map is 147 automatically not in the set. */ 148 regexpcode_char_set, 149 150 /* Similar but match any character that is NOT one of those 151 specified. */ 152 regexpcode_not_char_set, 153 154 /* Starts remembering the text that is matched and stores it in a 155 memory register. Followed by one byte containing the register 156 number. Register numbers must be in the range 0 through 157 (RE_NREGS - 1) inclusive. */ 158 regexpcode_start_memory, 159 160 /* Stops remembering the text that is matched and stores it in a 161 memory register. Followed by one byte containing the register 162 number. Register numbers must be in the range 0 through 163 (RE_NREGS - 1) inclusive. */ 164 regexpcode_stop_memory, 165 166 /* Match a duplicate of something remembered. Followed by one 167 byte containing the index of the memory register. */ 168 regexpcode_duplicate, 169 170 regexpcode_buffer_start, /* Succeeds if at beginning of buffer */ 171 regexpcode_buffer_end, /* Succeeds if at end of buffer */ 172 regexpcode_word_char, /* Matches any word-constituent character */ 173 174 /* Matches any char that is not a word-constituent. */ 175 regexpcode_not_word_char, 176 177 regexpcode_word_start, /* Succeeds if at word beginning */ 178 regexpcode_word_end, /* Succeeds if at word end */ 179 regexpcode_word_bound, /* Succeeds if at a word boundary */ 180 regexpcode_not_word_bound, /* Succeeds if not at a word boundary */ 181 182 /* Matches any character whose syntax is specified. Followed by a 183 byte which contains a syntax code, Sword or such like. */ 184 regexpcode_syntax_spec, 185 186 /* Matches any character whose syntax differs from the specified. */ 187 regexpcode_not_syntax_spec 188 }; 189 190 extern void 191 re_buffer_initialize (struct re_buffer *, unsigned char *, SYNTAX_TABLE_TYPE, 192 unsigned char *, unsigned long, unsigned long, 193 unsigned long, unsigned long); 194 195 extern int 196 re_compile_fastmap (unsigned char *, unsigned char *, unsigned char *, 197 SYNTAX_TABLE_TYPE, unsigned char *); 198 199 extern int 200 re_match (unsigned char *, unsigned char *, struct re_buffer *, 201 struct re_registers *, unsigned char *, unsigned char *); 202 203 extern int 204 re_search_forward (unsigned char *, unsigned char *, struct re_buffer *, 205 struct re_registers *, unsigned char *, unsigned char *); 206 207 extern int 208 re_search_backward (unsigned char *, unsigned char *, struct re_buffer *, 209 struct re_registers *, unsigned char *, unsigned char *); 210