1 /*------------------------------------------------------------*
2 | rexp.c |
3 | copyright 1999, Andrew Sumner (andrewsumner@yahoo.com) |
4 | |
5 | This is a source file for the awka package, a translator |
6 | of the AWK programming language to ANSI C. |
7 | |
8 | This library is free software; you can redistribute it |
9 | and/or modify it under the terms of the GNU General |
10 | Public License (GPL). |
11 | |
12 | This library is distributed in the hope that it will be |
13 | useful, but WITHOUT ANY WARRANTY; without even the implied |
14 | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR |
15 | PURPOSE. |
16 *------------------------------------------------------------*/
17
18 /*
19 * The functions in this module act as a wrapper for calling
20 * awka_regcomp(), and as a static storage for all compiled
21 * regular expressions, preventing the same expression from
22 * having to be compiled more than once. The regexps are stored
23 * in a fixed-size hash table.
24 */
25
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <signal.h>
29 #include <string.h>
30
31 #include "libawka.h"
32
33 typedef struct regexp_list_struct regexp_list;
34
35 struct regexp_list_struct {
36 regexp_list *next;
37 awka_regexp *re_nofs;
38 awka_regexp *re_fs;
39 awka_regexp *re_gsub;
40 char *str;
41 unsigned int hval;
42 };
43
44 regexp_list **re_list = NULL;
45 #define RE_LIST_SIZE 17
46
47 static char *
_awka_fixescapes(char * str,unsigned int len)48 _awka_fixescapes(char *str, unsigned int len)
49 {
50 static char *dest = NULL;
51 static unsigned int alloc = 0;
52 register char *p, *r;
53
54
55 if (!dest)
56 alloc = malloc(&dest, len+1);
57 else if (alloc <= len)
58 alloc = realloc(&dest, len+1);
59
60 p = str; r = dest;
61
62 do {
63 *(r++) = *p;
64 /*
65 if (*p == '\\' && *(p+1) == '\\')
66 p++;
67 */
68 } while (*(++p));
69 *r = '\0';
70
71 return dest;
72 }
73
74 awka_regexp *
awka_re_isexactstr(char * str,int len,unsigned can_be_null)75 awka_re_isexactstr(char *str, int len, unsigned can_be_null)
76 {
77 register int i;
78 static char meta[] = ".*+(){}[]|?\\";
79 int found_meta = 0;
80 int bol = 0, eol = 0;
81 int advance = 0, end_adv = 0;
82 awka_regexp *re = NULL;
83
84 for (i=0; i<len; i++)
85 if (strchr(meta, str[i]) != NULL)
86 return NULL;
87
88 if (str[0] == '/' && str[len-1] == '/')
89 {
90 advance = 1;
91 end_adv = 2;
92 if (len == 2)
93 return NULL;
94 }
95
96 if (str[advance] == '^' && len - end_adv > 1)
97 bol = REG_ISBOL;
98 else if (strchr(str, '^') != NULL)
99 return NULL;
100
101 if (str[len-(1+advance)] == '$' && len - end_adv > 1)
102 eol = REG_ISEOL;
103 else if (strchr(str, '$') != NULL)
104 return NULL;
105
106 /* its an exact string, so we can handle as such */
107 malloc( &re, sizeof(awka_regexp) );
108 memset( re, 0, sizeof(awka_regexp) );
109
110 re->strlen = len;
111 re->isexact = 1;
112 re->reganch |= bol | eol;
113 re->can_be_null = can_be_null;
114 malloc( &re->origstr, len+1 );
115 strcpy( re->origstr, str );
116
117 malloc( &re->buffer, len+1 );
118 memset( re->buffer, 0, len+1 );
119
120 switch (re->reganch)
121 {
122 case 0:
123 strncpy( (char *) re->buffer, str+advance, len-end_adv ); break;
124 case REG_ISBOL:
125 strncpy( (char *) re->buffer, str+1+advance, len-(end_adv+1) ); break;
126 case REG_ISEOL:
127 strncpy( (char *) re->buffer, str+advance, len-(end_adv+1) ); break;
128 case (REG_ISBOL | REG_ISEOL):
129 strncpy( (char *) re->buffer, str+1+advance, len-(end_adv+2) ); break;
130 }
131
132 return re;
133 }
134
135 #define _return_re_SPLIT \
136 if (list != re_list[idx]) \
137 { \
138 list->next = re_list[idx]; \
139 re_list[idx] = list; \
140 } \
141 if (!(list->re_fs = awka_re_isexactstr(list->str, len, FALSE))) \
142 list->re_fs = awka_regcomp(list->str, FALSE); \
143 if (!list->re_fs) \
144 awka_error("fail to compile regular expression '%s'\n",list->str); \
145 list->re_fs->dfa = (void *) dfacomp(list->str, strlen(list->str), TRUE); \
146 list->re_fs->cant_be_null = 1; \
147 return list->re_fs;
148
149 #define _return_re_MATCH \
150 if (list != toplist) \
151 { \
152 list->next = toplist; \
153 re_list[idx] = list; \
154 } \
155 if (!(list->re_nofs = awka_re_isexactstr(list->str, len, FALSE))) \
156 list->re_nofs = awka_regcomp(list->str, FALSE); \
157 if (!list->re_nofs) \
158 awka_error("fail to compile regular expression '%s'\n",list->str); \
159 list->re_nofs->dfa = (void *) dfacomp(list->str, strlen(list->str), TRUE); \
160 return list->re_nofs;
161
162 #define _return_re_GSUB \
163 if (list != toplist) \
164 { \
165 list->next = toplist; \
166 re_list[idx] = list; \
167 } \
168 if (!(list->re_gsub = awka_re_isexactstr(list->str, len, TRUE))) \
169 list->re_gsub = awka_regcomp(list->str, TRUE); \
170 if (!list->re_gsub) \
171 awka_error("fail to compile regular expression '%s'\n",list->str); \
172 list->re_gsub->dfa = (void *) dfacomp(list->str, strlen(list->str), TRUE); \
173 return list->re_gsub;
174
175
176 awka_regexp *
_awka_compile_regexp_SPLIT(char * str,unsigned int len)177 _awka_compile_regexp_SPLIT(char *str, unsigned int len)
178 {
179 register unsigned int idx, hval;
180 regexp_list *list = NULL, *prevlist = NULL;
181
182 if (!str)
183 return NULL;
184
185 if (!re_list)
186 {
187 malloc(&re_list, RE_LIST_SIZE * sizeof(regexp_list *));
188 memset(re_list, 0, RE_LIST_SIZE * sizeof(regexp_list *));
189 }
190
191 idx = (hval = _awka_hashstr(str, len)) % RE_LIST_SIZE;
192 list = re_list[idx];
193
194 while (list)
195 {
196 if (list->hval == hval)
197 {
198 if (!strncmp(str, list->str, len))
199 {
200 /* we have a match */
201 if (list->re_fs)
202 {
203 if (list != re_list[idx])
204 {
205 prevlist->next = list->next;
206 list->next = re_list[idx];
207 re_list[idx] = list;
208 }
209 return list->re_fs;
210 }
211 if (prevlist)
212 prevlist->next = list->next;
213
214 _return_re_SPLIT;
215 }
216 }
217 prevlist = list;
218 list = list->next;
219 }
220
221 /* this expression not yet created */
222 malloc( &list, sizeof(regexp_list) );
223 malloc( &list->str, len+1 );
224 strcpy(list->str, str);
225 list->re_fs = list->re_nofs = list->re_gsub = NULL;
226 list->hval = hval;
227 re_list[idx] = list;
228
229 _return_re_SPLIT;
230 }
231
232
233 awka_regexp *
_awka_compile_regexp_MATCH(char * str,unsigned int len)234 _awka_compile_regexp_MATCH(char *str, unsigned int len)
235 {
236 register unsigned int idx, hval;
237 regexp_list *list = NULL, *prevlist = NULL, *toplist;
238
239 if (!str)
240 return NULL;
241
242 if (!re_list)
243 {
244 malloc(&re_list, RE_LIST_SIZE * sizeof(regexp_list *));
245 memset(re_list, 0, RE_LIST_SIZE * sizeof(regexp_list *));
246 }
247
248 idx = (hval = _awka_hashstr(str, len)) % RE_LIST_SIZE;
249 list = toplist = re_list[idx];
250
251 while (list)
252 {
253 if (list->hval == hval)
254 {
255 if (!strncmp(str, list->str, len))
256 {
257 /* we have a match */
258 if (list->re_nofs)
259 {
260 if (list != toplist)
261 {
262 prevlist->next = list->next;
263 list->next = toplist;
264 re_list[idx] = list;
265 }
266 return list->re_nofs;
267 }
268 if (prevlist)
269 prevlist->next = list->next;
270
271 _return_re_MATCH;
272 }
273 }
274 prevlist = list;
275 list = list->next;
276 }
277
278 /* this expression not yet created */
279 malloc( &list, sizeof(regexp_list) );
280 malloc( &list->str, len+1 );
281 strcpy(list->str, str);
282 list->re_fs = list->re_nofs = list->re_gsub = NULL;
283 list->hval = hval;
284 re_list[idx] = list;
285
286 _return_re_MATCH;
287 }
288
289
290 awka_regexp *
_awka_compile_regexp_GSUB(char * str,unsigned int len)291 _awka_compile_regexp_GSUB(char *str, unsigned int len)
292 {
293 register unsigned int idx, hval;
294 regexp_list *list = NULL, *prevlist = NULL, *toplist;
295
296 if (!str)
297 return NULL;
298
299 if (!re_list)
300 {
301 malloc(&re_list, RE_LIST_SIZE * sizeof(regexp_list *));
302 memset(re_list, 0, RE_LIST_SIZE * sizeof(regexp_list *));
303 }
304
305 idx = (hval = _awka_hashstr(str, len)) % RE_LIST_SIZE;
306 list = toplist = re_list[idx];
307
308 while (list)
309 {
310 if (list->hval == hval)
311 {
312 if (!strncmp(str, list->str, len))
313 {
314 /* we have a match */
315 if (list->re_gsub)
316 {
317 if (list != toplist)
318 {
319 prevlist->next = list->next;
320 list->next = toplist;
321 re_list[idx] = list;
322 }
323 return list->re_gsub;
324 }
325 if (prevlist)
326 prevlist->next = list->next;
327
328 _return_re_GSUB;
329 }
330 }
331 prevlist = list;
332 list = list->next;
333 }
334
335 /* this expression not yet created */
336 malloc( &list, sizeof(regexp_list) );
337 malloc( &list->str, len+1 );
338 strcpy(list->str, str);
339 list->re_fs = list->re_nofs = list->re_gsub = NULL;
340 list->hval = hval;
341 re_list[idx] = list;
342
343 _return_re_GSUB;
344 }
345
346