1 /*------------------------------------------------------------*
2  | rexp.c                                                     |
3  | copyright 1999,  Andrew Sumner (andrewsumner@yahoo.com)    |
4  |                                                            |
5  | This is a source file for the awka package, a translator   |
6  | of the AWK programming language to ANSI C.                 |
7  |                                                            |
8  | This library is free software; you can redistribute it     |
9  | and/or modify it under the terms of the GNU General        |
10  | Public License (GPL).                                      |
11  |                                                            |
12  | This library is distributed in the hope that it will be    |
13  | useful, but WITHOUT ANY WARRANTY; without even the implied |
14  | warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR    |
15  | PURPOSE.                                                   |
16  *------------------------------------------------------------*/
17 
18 /*
19  * The functions in this module act as a wrapper for calling
20  * awka_regcomp(), and as a static storage for all compiled
21  * regular expressions, preventing the same expression from
22  * having to be compiled more than once.  The regexps are stored
23  * in a fixed-size hash table.
24  */
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <signal.h>
29 #include <string.h>
30 
31 #include "libawka.h"
32 
33 typedef struct regexp_list_struct regexp_list;
34 
35 struct regexp_list_struct {
36   regexp_list *next;
37   awka_regexp *re_nofs;
38   awka_regexp *re_fs;
39   awka_regexp *re_gsub;
40   char *str;
41   unsigned int hval;
42 };
43 
44 regexp_list **re_list = NULL;
45 #define RE_LIST_SIZE 17
46 
47 static char *
_awka_fixescapes(char * str,unsigned int len)48 _awka_fixescapes(char *str, unsigned int len)
49 {
50   static char *dest = NULL;
51   static unsigned int alloc = 0;
52   register char *p, *r;
53 
54 
55   if (!dest)
56     alloc = malloc(&dest, len+1);
57   else if (alloc <= len)
58     alloc = realloc(&dest, len+1);
59 
60   p = str; r = dest;
61 
62   do {
63     *(r++) = *p;
64     /*
65     if (*p == '\\' && *(p+1) == '\\')
66       p++;
67       */
68   } while (*(++p));
69   *r = '\0';
70 
71   return dest;
72 }
73 
74 awka_regexp *
awka_re_isexactstr(char * str,int len,unsigned can_be_null)75 awka_re_isexactstr(char *str, int len, unsigned can_be_null)
76 {
77   register int i;
78   static char meta[] = ".*+(){}[]|?\\";
79   int found_meta = 0;
80   int bol = 0, eol = 0;
81   int advance = 0, end_adv = 0;
82   awka_regexp *re = NULL;
83 
84   for (i=0; i<len; i++)
85     if (strchr(meta, str[i]) != NULL)
86       return NULL;
87 
88   if (str[0] == '/' && str[len-1] == '/')
89   {
90     advance = 1;
91     end_adv = 2;
92     if (len == 2)
93       return NULL;
94   }
95 
96   if (str[advance] == '^' && len - end_adv > 1)
97     bol = REG_ISBOL;
98   else if (strchr(str, '^') != NULL)
99     return NULL;
100 
101   if (str[len-(1+advance)] == '$' && len - end_adv > 1)
102     eol = REG_ISEOL;
103   else if (strchr(str, '$') != NULL)
104     return NULL;
105 
106   /* its an exact string, so we can handle as such */
107   malloc( &re, sizeof(awka_regexp) );
108   memset( re, 0, sizeof(awka_regexp) );
109 
110   re->strlen = len;
111   re->isexact = 1;
112   re->reganch |= bol | eol;
113   re->can_be_null = can_be_null;
114   malloc( &re->origstr, len+1 );
115   strcpy( re->origstr, str );
116 
117   malloc( &re->buffer, len+1 );
118   memset( re->buffer, 0, len+1 );
119 
120   switch (re->reganch)
121   {
122     case 0:
123       strncpy( (char *) re->buffer, str+advance, len-end_adv ); break;
124     case REG_ISBOL:
125       strncpy( (char *) re->buffer, str+1+advance, len-(end_adv+1) ); break;
126     case REG_ISEOL:
127       strncpy( (char *) re->buffer, str+advance, len-(end_adv+1) ); break;
128     case (REG_ISBOL | REG_ISEOL):
129       strncpy( (char *) re->buffer, str+1+advance, len-(end_adv+2) ); break;
130   }
131 
132   return re;
133 }
134 
135 #define _return_re_SPLIT \
136   if (list != re_list[idx]) \
137   { \
138     list->next = re_list[idx]; \
139     re_list[idx] = list; \
140   } \
141   if (!(list->re_fs = awka_re_isexactstr(list->str, len, FALSE))) \
142     list->re_fs = awka_regcomp(list->str, FALSE); \
143   if (!list->re_fs) \
144     awka_error("fail to compile regular expression '%s'\n",list->str); \
145   list->re_fs->dfa = (void *) dfacomp(list->str, strlen(list->str), TRUE); \
146   list->re_fs->cant_be_null = 1; \
147   return list->re_fs;
148 
149 #define _return_re_MATCH \
150   if (list != toplist) \
151   { \
152     list->next = toplist; \
153     re_list[idx] = list; \
154   } \
155   if (!(list->re_nofs = awka_re_isexactstr(list->str, len, FALSE))) \
156     list->re_nofs = awka_regcomp(list->str, FALSE); \
157   if (!list->re_nofs) \
158     awka_error("fail to compile regular expression '%s'\n",list->str); \
159   list->re_nofs->dfa = (void *) dfacomp(list->str, strlen(list->str), TRUE); \
160   return list->re_nofs;
161 
162 #define _return_re_GSUB \
163   if (list != toplist) \
164   { \
165     list->next = toplist; \
166     re_list[idx] = list; \
167   } \
168   if (!(list->re_gsub = awka_re_isexactstr(list->str, len, TRUE))) \
169     list->re_gsub = awka_regcomp(list->str, TRUE); \
170   if (!list->re_gsub) \
171     awka_error("fail to compile regular expression '%s'\n",list->str); \
172   list->re_gsub->dfa = (void *) dfacomp(list->str, strlen(list->str), TRUE); \
173   return list->re_gsub;
174 
175 
176 awka_regexp *
_awka_compile_regexp_SPLIT(char * str,unsigned int len)177 _awka_compile_regexp_SPLIT(char *str, unsigned int len)
178 {
179   register unsigned int idx, hval;
180   regexp_list *list = NULL, *prevlist = NULL;
181 
182   if (!str)
183     return NULL;
184 
185   if (!re_list)
186   {
187     malloc(&re_list, RE_LIST_SIZE * sizeof(regexp_list *));
188     memset(re_list, 0, RE_LIST_SIZE * sizeof(regexp_list *));
189   }
190 
191   idx = (hval = _awka_hashstr(str, len)) % RE_LIST_SIZE;
192   list = re_list[idx];
193 
194   while (list)
195   {
196     if (list->hval == hval)
197     {
198       if (!strncmp(str, list->str, len))
199       {
200         /* we have a match */
201         if (list->re_fs)
202         {
203           if (list != re_list[idx])
204           {
205             prevlist->next = list->next;
206             list->next = re_list[idx];
207             re_list[idx] = list;
208           }
209           return list->re_fs;
210         }
211         if (prevlist)
212           prevlist->next = list->next;
213 
214         _return_re_SPLIT;
215       }
216     }
217     prevlist = list;
218     list  = list->next;
219   }
220 
221   /* this expression not yet created */
222   malloc( &list, sizeof(regexp_list) );
223   malloc( &list->str, len+1 );
224   strcpy(list->str, str);
225   list->re_fs = list->re_nofs = list->re_gsub = NULL;
226   list->hval = hval;
227   re_list[idx] = list;
228 
229   _return_re_SPLIT;
230 }
231 
232 
233 awka_regexp *
_awka_compile_regexp_MATCH(char * str,unsigned int len)234 _awka_compile_regexp_MATCH(char *str, unsigned int len)
235 {
236   register unsigned int idx, hval;
237   regexp_list *list = NULL, *prevlist = NULL, *toplist;
238 
239   if (!str)
240     return NULL;
241 
242   if (!re_list)
243   {
244     malloc(&re_list, RE_LIST_SIZE * sizeof(regexp_list *));
245     memset(re_list, 0, RE_LIST_SIZE * sizeof(regexp_list *));
246   }
247 
248   idx = (hval = _awka_hashstr(str, len)) % RE_LIST_SIZE;
249   list = toplist = re_list[idx];
250 
251   while (list)
252   {
253     if (list->hval == hval)
254     {
255       if (!strncmp(str, list->str, len))
256       {
257         /* we have a match */
258         if (list->re_nofs)
259         {
260           if (list != toplist)
261           {
262             prevlist->next = list->next;
263             list->next = toplist;
264             re_list[idx] = list;
265           }
266           return list->re_nofs;
267         }
268         if (prevlist)
269           prevlist->next = list->next;
270 
271         _return_re_MATCH;
272       }
273     }
274     prevlist = list;
275     list  = list->next;
276   }
277 
278   /* this expression not yet created */
279   malloc( &list, sizeof(regexp_list) );
280   malloc( &list->str, len+1 );
281   strcpy(list->str, str);
282   list->re_fs = list->re_nofs = list->re_gsub = NULL;
283   list->hval = hval;
284   re_list[idx] = list;
285 
286   _return_re_MATCH;
287 }
288 
289 
290 awka_regexp *
_awka_compile_regexp_GSUB(char * str,unsigned int len)291 _awka_compile_regexp_GSUB(char *str, unsigned int len)
292 {
293   register unsigned int idx, hval;
294   regexp_list *list = NULL, *prevlist = NULL, *toplist;
295 
296   if (!str)
297     return NULL;
298 
299   if (!re_list)
300   {
301     malloc(&re_list, RE_LIST_SIZE * sizeof(regexp_list *));
302     memset(re_list, 0, RE_LIST_SIZE * sizeof(regexp_list *));
303   }
304 
305   idx = (hval = _awka_hashstr(str, len)) % RE_LIST_SIZE;
306   list = toplist = re_list[idx];
307 
308   while (list)
309   {
310     if (list->hval == hval)
311     {
312       if (!strncmp(str, list->str, len))
313       {
314         /* we have a match */
315         if (list->re_gsub)
316         {
317           if (list != toplist)
318           {
319             prevlist->next = list->next;
320             list->next = toplist;
321             re_list[idx] = list;
322           }
323           return list->re_gsub;
324         }
325         if (prevlist)
326           prevlist->next = list->next;
327 
328         _return_re_GSUB;
329       }
330     }
331     prevlist = list;
332     list  = list->next;
333   }
334 
335   /* this expression not yet created */
336   malloc( &list, sizeof(regexp_list) );
337   malloc( &list->str, len+1 );
338   strcpy(list->str, str);
339   list->re_fs = list->re_nofs = list->re_gsub = NULL;
340   list->hval = hval;
341   re_list[idx] = list;
342 
343   _return_re_GSUB;
344 }
345 
346