1 #include <stdlib.h>
2 #include "plan9.h"
3 #include "regexp9.h"
4 #include "regcomp.h"
5 
6 
7 /*
8  *  return	0 if no match
9  *		>0 if a match
10  *		<0 if we ran out of _relist space
11  */
12 static int
regexec1(Reprog * progp,char * bol,Resub * mp,int ms,Reljunk * j)13 regexec1(Reprog *progp,	/* program to run */
14 	char *bol,	/* string to run machine on */
15 	Resub *mp,	/* subexpression elements */
16 	int ms,		/* number of elements at mp */
17 	Reljunk *j
18 )
19 {
20 	int flag=0;
21 	Reinst *inst;
22 	Relist *tlp;
23 	char *s;
24 	int i, checkstart;
25 	Rune r, *rp, *ep;
26 	int n;
27 	Relist* tl;		/* This list, next list */
28 	Relist* nl;
29 	Relist* tle;		/* ends of this and next list */
30 	Relist* nle;
31 	int match;
32 	char *p;
33 
34 	match = 0;
35 	checkstart = j->starttype;
36 	if(mp)
37 		for(i=0; i<ms; i++) {
38 			mp[i].s.sp = 0;
39 			mp[i].e.ep = 0;
40 		}
41 	j->relist[0][0].inst = 0;
42 	j->relist[1][0].inst = 0;
43 
44 	/* Execute machine once for each character, including terminal NUL */
45 	s = j->starts;
46 	do{
47 		/* fast check for first char */
48 		if(checkstart) {
49 			switch(j->starttype) {
50 			case RUNE:
51 				p = utfrune(s, j->startchar);
52 				if(p == 0 || s == j->eol)
53 					return match;
54 				s = p;
55 				break;
56 			case BOL:
57 				if(s == bol)
58 					break;
59 				p = utfrune(s, '\n');
60 				if(p == 0 || s == j->eol)
61 					return match;
62 				s = p;
63 				break;
64 			}
65 		}
66 		r = *(uchar*)s;
67 		if(r < Runeself)
68 			n = 1;
69 		else
70 			n = chartorune(&r, s);
71 
72 		/* switch run lists */
73 		tl = j->relist[flag];
74 		tle = j->reliste[flag];
75 		nl = j->relist[flag^=1];
76 		nle = j->reliste[flag];
77 		nl->inst = 0;
78 
79 		/* Add first instruction to current list */
80 		if(match == 0)
81 			_renewemptythread(tl, progp->startinst, ms, s);
82 
83 		/* Execute machine until current list is empty */
84 		for(tlp=tl; tlp->inst; tlp++){	/* assignment = */
85 			for(inst = tlp->inst; ; inst = inst->u2.next){
86 				switch(inst->type){
87 				case RUNE:	/* regular character */
88 					if(inst->u1.r == r){
89 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
90 							return -1;
91 					}
92 					break;
93 				case LBRA:
94 					tlp->se.m[inst->u1.subid].s.sp = s;
95 					continue;
96 				case RBRA:
97 					tlp->se.m[inst->u1.subid].e.ep = s;
98 					continue;
99 				case ANY:
100 					if(r != '\n')
101 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
102 							return -1;
103 					break;
104 				case ANYNL:
105 					if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
106 							return -1;
107 					break;
108 				case BOL:
109 					if(s == bol || *(s-1) == '\n')
110 						continue;
111 					break;
112 				case EOL:
113 					if(s == j->eol || r == 0 || r == '\n')
114 						continue;
115 					break;
116 				case CCLASS:
117 					ep = inst->u1.cp->end;
118 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
119 						if(r >= rp[0] && r <= rp[1]){
120 							if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
121 								return -1;
122 							break;
123 						}
124 					break;
125 				case NCCLASS:
126 					ep = inst->u1.cp->end;
127 					for(rp = inst->u1.cp->spans; rp < ep; rp += 2)
128 						if(r >= rp[0] && r <= rp[1])
129 							break;
130 					if(rp == ep)
131 						if(_renewthread(nl, inst->u2.next, ms, &tlp->se)==nle)
132 							return -1;
133 					break;
134 				case OR:
135 					/* evaluate right choice later */
136 					if(_renewthread(tlp, inst->u1.right, ms, &tlp->se) == tle)
137 						return -1;
138 					/* efficiency: advance and re-evaluate */
139 					continue;
140 				case END:	/* Match! */
141 					match = 1;
142 					tlp->se.m[0].e.ep = s;
143 					if(mp != 0)
144 						_renewmatch(mp, ms, &tlp->se);
145 					break;
146 				}
147 				break;
148 			}
149 		}
150 		if(s == j->eol)
151 			break;
152 		checkstart = j->starttype && nl->inst==0;
153 		s += n;
154 	}while(r);
155 	return match;
156 }
157 
158 static int
regexec2(Reprog * progp,char * bol,Resub * mp,int ms,Reljunk * j)159 regexec2(Reprog *progp,	/* program to run */
160 	char *bol,	/* string to run machine on */
161 	Resub *mp,	/* subexpression elements */
162 	int ms,		/* number of elements at mp */
163 	Reljunk *j
164 )
165 {
166 	int rv;
167 	Relist *relist0, *relist1;
168 
169 	/* mark space */
170 	relist0 = malloc(BIGLISTSIZE*sizeof(Relist));
171 	if(relist0 == nil)
172 		return -1;
173 	relist1 = malloc(BIGLISTSIZE*sizeof(Relist));
174 	if(relist1 == nil){
175 		free(relist1);
176 		return -1;
177 	}
178 	j->relist[0] = relist0;
179 	j->relist[1] = relist1;
180 	j->reliste[0] = relist0 + BIGLISTSIZE - 2;
181 	j->reliste[1] = relist1 + BIGLISTSIZE - 2;
182 
183 	rv = regexec1(progp, bol, mp, ms, j);
184 	free(relist0);
185 	free(relist1);
186 	return rv;
187 }
188 
189 extern int
regexec(Reprog * progp,char * bol,Resub * mp,int ms)190 regexec(Reprog *progp,	/* program to run */
191 	char *bol,	/* string to run machine on */
192 	Resub *mp,	/* subexpression elements */
193 	int ms)		/* number of elements at mp */
194 {
195 	Reljunk j;
196 	Relist relist0[LISTSIZE], relist1[LISTSIZE];
197 	int rv;
198 
199 	/*
200  	 *  use user-specified starting/ending location if specified
201 	 */
202 	j.starts = bol;
203 	j.eol = 0;
204 	if(mp && ms>0){
205 		if(mp->s.sp)
206 			j.starts = mp->s.sp;
207 		if(mp->e.ep)
208 			j.eol = mp->e.ep;
209 	}
210 	j.starttype = 0;
211 	j.startchar = 0;
212 	if(progp->startinst->type == RUNE && progp->startinst->u1.r < Runeself) {
213 		j.starttype = RUNE;
214 		j.startchar = progp->startinst->u1.r;
215 	}
216 	if(progp->startinst->type == BOL)
217 		j.starttype = BOL;
218 
219 	/* mark space */
220 	j.relist[0] = relist0;
221 	j.relist[1] = relist1;
222 	j.reliste[0] = relist0 + nelem(relist0) - 2;
223 	j.reliste[1] = relist1 + nelem(relist1) - 2;
224 
225 	rv = regexec1(progp, bol, mp, ms, &j);
226 	if(rv >= 0)
227 		return rv;
228 	rv = regexec2(progp, bol, mp, ms, &j);
229 	if(rv >= 0)
230 		return rv;
231 	return -1;
232 }
233