1 #include <stdlib.h>
2 #include "plan9.h"
3 #include "regexp9.h"
4 #include "regcomp.h"
5
6
7 /*
8 * return 0 if no match
9 * >0 if a match
10 * <0 if we ran out of _relist space
11 */
12 static int
regexec1(Reprog * progp,char * bol,Resub * mp,int ms,Reljunk * j)13 regexec1(Reprog *progp, /* program to run */
14 char *bol, /* string to run machine on */
15 Resub *mp, /* subexpression elements */
16 int ms, /* number of elements at mp */
17 Reljunk *j
18 )
19 {
20 int flag=0;
21 Reinst *inst;
22 Relist *tlp;
23 char *s;
24 int i, checkstart;
25 Rune r, *rp, *ep;
26 int n;
27 Relist* tl; /* This list, next list */
28 Relist* nl;
29 Relist* tle; /* ends of this and next list */
30 Relist* nle;
31 int match;
32 char *p;
33
34 match = 0;
35 checkstart = j->starttype;
36 if(mp)
37 for(i=0; i<ms; i++) {
38 mp[i].s.sp = 0;
39 mp[i].e.ep = 0;
40 }
41 j->relist[0][0].inst = 0;
42 j->relist[1][0].inst = 0;
43
44 /* Execute machine once for each character, including terminal NUL */
45 s = j->starts;
46 do{
47 /* fast check for first char */
48 if(checkstart) {
49 switch(j->starttype) {
50 case RUNE:
51 p = utfrune(s, j->startchar);
52 if(p == 0 || s == j->eol)
53 return match;
54 s = p;
55 break;
56 case BOL:
57 if(s == bol)
58 break;
59 p = utfrune(s, '\n');
60 if(p == 0 || s == j->eol)
61 return match;
62 s = p;
63 break;
64 }
65 }
66 r = *(uchar*)s;
67 if(r < Runeself)
68 n = 1;
69 else
70 n = chartorune(&r, s);
71
72 /* switch run lists */
73 tl = j->relist[flag];
74 tle = j->reliste[flag];
75 nl = j->relist[flag^=1];
76 nle = j->reliste[flag];
77 nl->inst = 0;
78
79 /* Add first instruction to current list */
80 if(match == 0)
81 _renewemptythread(tl, progp->startinst, ms, s);
82
83 /* Execute machine until current list is empty */
84 for(tlp=tl; tlp->inst; tlp++){ /* assignment = */
85 for(inst = tlp->inst; ; inst = inst->u2.next){
86 switch(inst->type){
87 case RUNE: /* regular character */
88 if(inst->u1.r == r){
89 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
90 return -1;
91 }
92 break;
93 case LBRA:
94 tlp->se.m[inst->u1.subid].s.sp = s;
95 continue;
96 case RBRA:
97 tlp->se.m[inst->u1.subid].e.ep = s;
98 continue;
99 case ANY:
100 if(r != '\n')
101 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
102 return -1;
103 break;
104 case ANYNL:
105 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
106 return -1;
107 break;
108 case BOL:
109 if(s == bol || *(s-1) == '\n')
110 continue;
111 break;
112 case EOL:
113 if(s == j->eol || r == 0 || r == '\n')
114 continue;
115 break;
116 case CCLASS:
117 ep = inst->u1.cp->end;
118 for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
119 if(r >= rp[0] && r <= rp[1]){
120 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
121 return -1;
122 break;
123 }
124 break;
125 case NCCLASS:
126 ep = inst->u1.cp->end;
127 for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
128 if(r >= rp[0] && r <= rp[1])
129 break;
130 if(rp == ep)
131 if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
132 return -1;
133 break;
134 case OR:
135 /* evaluate right choice later */
136 if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
137 return -1;
138 /* efficiency: advance and re-evaluate */
139 continue;
140 case END: /* Match! */
141 match = 1;
142 tlp->se.m[0].e.ep = s;
143 if(mp != 0)
144 _renewmatch(mp, ms, &tlp->se);
145 break;
146 }
147 break;
148 }
149 }
150 if(s == j->eol)
151 break;
152 checkstart = j->starttype && nl->inst==0;
153 s += n;
154 }while(r);
155 return match;
156 }
157
158 static int
regexec2(Reprog * progp,char * bol,Resub * mp,int ms,Reljunk * j)159 regexec2(Reprog *progp, /* program to run */
160 char *bol, /* string to run machine on */
161 Resub *mp, /* subexpression elements */
162 int ms, /* number of elements at mp */
163 Reljunk *j
164 )
165 {
166 int rv;
167 Relist *relist0, *relist1;
168
169 /* mark space */
170 relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
171 if(relist0 == nil)
172 return -1;
173 relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
174 if(relist1 == nil){
175 free(relist1);
176 return -1;
177 }
178 j->relist[0] = relist0;
179 j->relist[1] = relist1;
180 j->reliste[0] = relist0 + BIGLISTSIZE - 2;
181 j->reliste[1] = relist1 + BIGLISTSIZE - 2;
182
183 rv = regexec1(progp, bol, mp, ms, j);
184 free(relist0);
185 free(relist1);
186 return rv;
187 }
188
189 extern int
regexec(Reprog * progp,char * bol,Resub * mp,int ms)190 regexec(Reprog *progp, /* program to run */
191 char *bol, /* string to run machine on */
192 Resub *mp, /* subexpression elements */
193 int ms) /* number of elements at mp */
194 {
195 Reljunk j;
196 Relist relist0[LISTSIZE], relist1[LISTSIZE];
197 int rv;
198
199 /*
200 * use user-specified starting/ending location if specified
201 */
202 j.starts = bol;
203 j.eol = 0;
204 if(mp && ms>0){
205 if(mp->s.sp)
206 j.starts = mp->s.sp;
207 if(mp->e.ep)
208 j.eol = mp->e.ep;
209 }
210 j.starttype = 0;
211 j.startchar = 0;
212 if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
213 j.starttype = RUNE;
214 j.startchar = progp->startinst->u1.r;
215 }
216 if(progp->startinst->type == BOL)
217 j.starttype = BOL;
218
219 /* mark space */
220 j.relist[0] = relist0;
221 j.relist[1] = relist1;
222 j.reliste[0] = relist0 + nelem(relist0) - 2;
223 j.reliste[1] = relist1 + nelem(relist1) - 2;
224
225 rv = regexec1(progp, bol, mp, ms, &j);
226 if(rv >= 0)
227 return rv;
228 rv = regexec2(progp, bol, mp, ms, &j);
229 if(rv >= 0)
230 return rv;
231 return -1;
232 }
233