1*b30d1939SAndy Fiddaman /***********************************************************************
2*b30d1939SAndy Fiddaman * *
3*b30d1939SAndy Fiddaman * This software is part of the ast package *
4*b30d1939SAndy Fiddaman * Copyright (c) 1985-2012 AT&T Intellectual Property *
5*b30d1939SAndy Fiddaman * and is licensed under the *
6*b30d1939SAndy Fiddaman * Eclipse Public License, Version 1.0 *
7*b30d1939SAndy Fiddaman * by AT&T Intellectual Property *
8*b30d1939SAndy Fiddaman * *
9*b30d1939SAndy Fiddaman * A copy of the License is available at *
10*b30d1939SAndy Fiddaman * http://www.eclipse.org/org/documents/epl-v10.html *
11*b30d1939SAndy Fiddaman * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12*b30d1939SAndy Fiddaman * *
13*b30d1939SAndy Fiddaman * Information and Software Systems Research *
14*b30d1939SAndy Fiddaman * AT&T Research *
15*b30d1939SAndy Fiddaman * Florham Park NJ *
16*b30d1939SAndy Fiddaman * *
17*b30d1939SAndy Fiddaman * Glenn Fowler <gsf@research.att.com> *
18*b30d1939SAndy Fiddaman * David Korn <dgk@research.att.com> *
19*b30d1939SAndy Fiddaman * Phong Vo <kpv@research.att.com> *
20*b30d1939SAndy Fiddaman * *
21*b30d1939SAndy Fiddaman ***********************************************************************/
22*b30d1939SAndy Fiddaman #pragma prototyped
23*b30d1939SAndy Fiddaman
24*b30d1939SAndy Fiddaman /*
25*b30d1939SAndy Fiddaman * posix regex executor
26*b30d1939SAndy Fiddaman * single sized-record interface
27*b30d1939SAndy Fiddaman */
28*b30d1939SAndy Fiddaman
29*b30d1939SAndy Fiddaman #include "reglib.h"
30*b30d1939SAndy Fiddaman
31*b30d1939SAndy Fiddaman #if _AST_REGEX_DEBUG
32*b30d1939SAndy Fiddaman
33*b30d1939SAndy Fiddaman #define DEBUG_TEST(f,y,n) ((debug&(debug_flag=f))?(y):(n))
34*b30d1939SAndy Fiddaman #define DEBUG_CODE(f,y,n) do if(debug&(f)){y}else{n} while(0)
35*b30d1939SAndy Fiddaman #define DEBUG_INIT() do { char* t; if (!debug) { debug = 0x80000000; if (t = getenv("_AST_regex_exec_debug")) debug |= strtoul(t, NiL, 0); } } while (0)
36*b30d1939SAndy Fiddaman
37*b30d1939SAndy Fiddaman static unsigned long debug;
38*b30d1939SAndy Fiddaman static unsigned long debug_flag;
39*b30d1939SAndy Fiddaman
40*b30d1939SAndy Fiddaman static const char* rexnames[] =
41*b30d1939SAndy Fiddaman {
42*b30d1939SAndy Fiddaman "REX_NULL",
43*b30d1939SAndy Fiddaman "REX_ALT",
44*b30d1939SAndy Fiddaman "REX_ALT_CATCH",
45*b30d1939SAndy Fiddaman "REX_BACK",
46*b30d1939SAndy Fiddaman "REX_BEG",
47*b30d1939SAndy Fiddaman "REX_BEG_STR",
48*b30d1939SAndy Fiddaman "REX_BM",
49*b30d1939SAndy Fiddaman "REX_CAT",
50*b30d1939SAndy Fiddaman "REX_CLASS",
51*b30d1939SAndy Fiddaman "REX_COLL_CLASS",
52*b30d1939SAndy Fiddaman "REX_CONJ",
53*b30d1939SAndy Fiddaman "REX_CONJ_LEFT",
54*b30d1939SAndy Fiddaman "REX_CONJ_RIGHT",
55*b30d1939SAndy Fiddaman "REX_DONE",
56*b30d1939SAndy Fiddaman "REX_DOT",
57*b30d1939SAndy Fiddaman "REX_END",
58*b30d1939SAndy Fiddaman "REX_END_STR",
59*b30d1939SAndy Fiddaman "REX_EXEC",
60*b30d1939SAndy Fiddaman "REX_FIN_STR",
61*b30d1939SAndy Fiddaman "REX_GROUP",
62*b30d1939SAndy Fiddaman "REX_GROUP_CATCH",
63*b30d1939SAndy Fiddaman "REX_GROUP_AHEAD",
64*b30d1939SAndy Fiddaman "REX_GROUP_AHEAD_CATCH",
65*b30d1939SAndy Fiddaman "REX_GROUP_AHEAD_NOT",
66*b30d1939SAndy Fiddaman "REX_GROUP_BEHIND",
67*b30d1939SAndy Fiddaman "REX_GROUP_BEHIND_CATCH",
68*b30d1939SAndy Fiddaman "REX_GROUP_BEHIND_NOT",
69*b30d1939SAndy Fiddaman "REX_GROUP_BEHIND_NOT_CATCH",
70*b30d1939SAndy Fiddaman "REX_GROUP_COND",
71*b30d1939SAndy Fiddaman "REX_GROUP_COND_CATCH",
72*b30d1939SAndy Fiddaman "REX_GROUP_CUT",
73*b30d1939SAndy Fiddaman "REX_GROUP_CUT_CATCH",
74*b30d1939SAndy Fiddaman "REX_KMP",
75*b30d1939SAndy Fiddaman "REX_NEG",
76*b30d1939SAndy Fiddaman "REX_NEG_CATCH",
77*b30d1939SAndy Fiddaman "REX_NEST",
78*b30d1939SAndy Fiddaman "REX_ONECHAR",
79*b30d1939SAndy Fiddaman "REX_REP",
80*b30d1939SAndy Fiddaman "REX_REP_CATCH",
81*b30d1939SAndy Fiddaman "REX_STRING",
82*b30d1939SAndy Fiddaman "REX_TRIE",
83*b30d1939SAndy Fiddaman "REX_WBEG",
84*b30d1939SAndy Fiddaman "REX_WEND",
85*b30d1939SAndy Fiddaman "REX_WORD",
86*b30d1939SAndy Fiddaman "REX_WORD_NOT"
87*b30d1939SAndy Fiddaman };
88*b30d1939SAndy Fiddaman
rexname(Rex_t * rex)89*b30d1939SAndy Fiddaman static const char* rexname(Rex_t* rex)
90*b30d1939SAndy Fiddaman {
91*b30d1939SAndy Fiddaman if (!rex)
92*b30d1939SAndy Fiddaman return "NIL";
93*b30d1939SAndy Fiddaman if (rex->type >= elementsof(rexnames))
94*b30d1939SAndy Fiddaman return "ERROR";
95*b30d1939SAndy Fiddaman return rexnames[rex->type];
96*b30d1939SAndy Fiddaman }
97*b30d1939SAndy Fiddaman
98*b30d1939SAndy Fiddaman #else
99*b30d1939SAndy Fiddaman
100*b30d1939SAndy Fiddaman #define DEBUG_INIT()
101*b30d1939SAndy Fiddaman #define DEBUG_TEST(f,y,n) (n)
102*b30d1939SAndy Fiddaman #define DEBUG_CODE(f,y,n) do {n} while(0)
103*b30d1939SAndy Fiddaman
104*b30d1939SAndy Fiddaman #endif
105*b30d1939SAndy Fiddaman
106*b30d1939SAndy Fiddaman #define BEG_ALT 1 /* beginning of an alt */
107*b30d1939SAndy Fiddaman #define BEG_ONE 2 /* beginning of one iteration of a rep */
108*b30d1939SAndy Fiddaman #define BEG_REP 3 /* beginning of a repetition */
109*b30d1939SAndy Fiddaman #define BEG_SUB 4 /* beginning of a subexpression */
110*b30d1939SAndy Fiddaman #define END_ANY 5 /* end of any of above */
111*b30d1939SAndy Fiddaman
112*b30d1939SAndy Fiddaman /*
113*b30d1939SAndy Fiddaman * returns from parse()
114*b30d1939SAndy Fiddaman */
115*b30d1939SAndy Fiddaman
116*b30d1939SAndy Fiddaman #define NONE 0 /* no parse found */
117*b30d1939SAndy Fiddaman #define GOOD 1 /* some parse was found */
118*b30d1939SAndy Fiddaman #define CUT 2 /* no match and no backtrack */
119*b30d1939SAndy Fiddaman #define BEST 3 /* an unbeatable parse was found */
120*b30d1939SAndy Fiddaman #define BAD 4 /* error ocurred */
121*b30d1939SAndy Fiddaman
122*b30d1939SAndy Fiddaman /*
123*b30d1939SAndy Fiddaman * REG_SHELL_DOT test
124*b30d1939SAndy Fiddaman */
125*b30d1939SAndy Fiddaman
126*b30d1939SAndy Fiddaman #define LEADING(e,r,s) (*(s)==(e)->leading&&((s)==(e)->beg||*((s)-1)==(r)->explicit))
127*b30d1939SAndy Fiddaman
128*b30d1939SAndy Fiddaman /*
129*b30d1939SAndy Fiddaman * Pos_t is for comparing parses. An entry is made in the
130*b30d1939SAndy Fiddaman * array at the beginning and at the end of each Group_t,
131*b30d1939SAndy Fiddaman * each iteration in a Group_t, and each Binary_t.
132*b30d1939SAndy Fiddaman */
133*b30d1939SAndy Fiddaman
134*b30d1939SAndy Fiddaman typedef struct
135*b30d1939SAndy Fiddaman {
136*b30d1939SAndy Fiddaman unsigned char* p; /* where in string */
137*b30d1939SAndy Fiddaman size_t length; /* length in string */
138*b30d1939SAndy Fiddaman short serial; /* preorder subpattern number */
139*b30d1939SAndy Fiddaman short be; /* which end of pair */
140*b30d1939SAndy Fiddaman } Pos_t;
141*b30d1939SAndy Fiddaman
142*b30d1939SAndy Fiddaman /* ===== begin library support ===== */
143*b30d1939SAndy Fiddaman
144*b30d1939SAndy Fiddaman #define vector(t,v,i) (((i)<(v)->max)?(t*)((v)->vec+(i)*(v)->siz):(t*)vecseek(&(v),i))
145*b30d1939SAndy Fiddaman
146*b30d1939SAndy Fiddaman static Vector_t*
vecopen(int inc,int siz)147*b30d1939SAndy Fiddaman vecopen(int inc, int siz)
148*b30d1939SAndy Fiddaman {
149*b30d1939SAndy Fiddaman Vector_t* v;
150*b30d1939SAndy Fiddaman Stk_t* sp;
151*b30d1939SAndy Fiddaman
152*b30d1939SAndy Fiddaman if (inc <= 0)
153*b30d1939SAndy Fiddaman inc = 16;
154*b30d1939SAndy Fiddaman if (!(sp = stkopen(STK_SMALL|STK_NULL)))
155*b30d1939SAndy Fiddaman return 0;
156*b30d1939SAndy Fiddaman if (!(v = (Vector_t*)stkseek(sp, sizeof(Vector_t) + inc * siz)))
157*b30d1939SAndy Fiddaman {
158*b30d1939SAndy Fiddaman stkclose(sp);
159*b30d1939SAndy Fiddaman return 0;
160*b30d1939SAndy Fiddaman }
161*b30d1939SAndy Fiddaman v->stk = sp;
162*b30d1939SAndy Fiddaman v->vec = (char*)v + sizeof(Vector_t);
163*b30d1939SAndy Fiddaman v->max = v->inc = inc;
164*b30d1939SAndy Fiddaman v->siz = siz;
165*b30d1939SAndy Fiddaman v->cur = 0;
166*b30d1939SAndy Fiddaman return v;
167*b30d1939SAndy Fiddaman }
168*b30d1939SAndy Fiddaman
169*b30d1939SAndy Fiddaman static void*
vecseek(Vector_t ** p,int index)170*b30d1939SAndy Fiddaman vecseek(Vector_t** p, int index)
171*b30d1939SAndy Fiddaman {
172*b30d1939SAndy Fiddaman Vector_t* v = *p;
173*b30d1939SAndy Fiddaman
174*b30d1939SAndy Fiddaman if (index >= v->max)
175*b30d1939SAndy Fiddaman {
176*b30d1939SAndy Fiddaman while ((v->max += v->inc) <= index);
177*b30d1939SAndy Fiddaman if (!(v = (Vector_t*)stkseek(v->stk, sizeof(Vector_t) + v->max * v->siz)))
178*b30d1939SAndy Fiddaman return 0;
179*b30d1939SAndy Fiddaman *p = v;
180*b30d1939SAndy Fiddaman v->vec = (char*)v + sizeof(Vector_t);
181*b30d1939SAndy Fiddaman }
182*b30d1939SAndy Fiddaman return v->vec + index * v->siz;
183*b30d1939SAndy Fiddaman }
184*b30d1939SAndy Fiddaman
185*b30d1939SAndy Fiddaman static void
vecclose(Vector_t * v)186*b30d1939SAndy Fiddaman vecclose(Vector_t* v)
187*b30d1939SAndy Fiddaman {
188*b30d1939SAndy Fiddaman if (v)
189*b30d1939SAndy Fiddaman stkclose(v->stk);
190*b30d1939SAndy Fiddaman }
191*b30d1939SAndy Fiddaman
192*b30d1939SAndy Fiddaman typedef struct
193*b30d1939SAndy Fiddaman {
194*b30d1939SAndy Fiddaman Stk_pos_t pos;
195*b30d1939SAndy Fiddaman char data[1];
196*b30d1939SAndy Fiddaman } Stk_frame_t;
197*b30d1939SAndy Fiddaman
198*b30d1939SAndy Fiddaman #define stknew(s,p) ((p)->offset=stktell(s),(p)->base=stkfreeze(s,0))
199*b30d1939SAndy Fiddaman #define stkold(s,p) stkset(s,(p)->base,(p)->offset)
200*b30d1939SAndy Fiddaman
201*b30d1939SAndy Fiddaman #define stkframe(s) (*((Stk_frame_t**)stktop(s)-1))
202*b30d1939SAndy Fiddaman #define stkdata(s,t) ((t*)stkframe(s)->data)
203*b30d1939SAndy Fiddaman #define stkpop(s) stkold(s,&(stkframe(s)->pos))
204*b30d1939SAndy Fiddaman
205*b30d1939SAndy Fiddaman static void*
stkpush(Stk_t * sp,size_t size)206*b30d1939SAndy Fiddaman stkpush(Stk_t* sp, size_t size)
207*b30d1939SAndy Fiddaman {
208*b30d1939SAndy Fiddaman Stk_frame_t* f;
209*b30d1939SAndy Fiddaman Stk_pos_t p;
210*b30d1939SAndy Fiddaman
211*b30d1939SAndy Fiddaman stknew(sp, &p);
212*b30d1939SAndy Fiddaman size = sizeof(Stk_frame_t) + sizeof(size_t) + size - 1;
213*b30d1939SAndy Fiddaman if (!(f = (Stk_frame_t*)stkalloc(sp, sizeof(Stk_frame_t) + sizeof(Stk_frame_t*) + size - 1)))
214*b30d1939SAndy Fiddaman return 0;
215*b30d1939SAndy Fiddaman f->pos = p;
216*b30d1939SAndy Fiddaman stkframe(sp) = f;
217*b30d1939SAndy Fiddaman return f->data;
218*b30d1939SAndy Fiddaman }
219*b30d1939SAndy Fiddaman
220*b30d1939SAndy Fiddaman /* ===== end library support ===== */
221*b30d1939SAndy Fiddaman
222*b30d1939SAndy Fiddaman /*
223*b30d1939SAndy Fiddaman * Match_frame_t is for saving and restoring match records
224*b30d1939SAndy Fiddaman * around alternate attempts, so that fossils will not be
225*b30d1939SAndy Fiddaman * left in the match array. These are the only entries in
226*b30d1939SAndy Fiddaman * the match array that are not otherwise guaranteed to
227*b30d1939SAndy Fiddaman * have current data in them when they get used.
228*b30d1939SAndy Fiddaman */
229*b30d1939SAndy Fiddaman
230*b30d1939SAndy Fiddaman typedef struct
231*b30d1939SAndy Fiddaman {
232*b30d1939SAndy Fiddaman size_t size;
233*b30d1939SAndy Fiddaman regmatch_t* match;
234*b30d1939SAndy Fiddaman regmatch_t save[1];
235*b30d1939SAndy Fiddaman } Match_frame_t;
236*b30d1939SAndy Fiddaman
237*b30d1939SAndy Fiddaman #define matchpush(e,x) ((x)->re.group.number?_matchpush(e,x):0)
238*b30d1939SAndy Fiddaman #define matchcopy(e,x) do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); } while (0)
239*b30d1939SAndy Fiddaman #define matchpop(e,x) do if ((x)->re.group.number) { Match_frame_t* fp = (void*)stkframe(stkstd)->data; memcpy(fp->match, fp->save, fp->size); stkpop(stkstd); } while (0)
240*b30d1939SAndy Fiddaman
241*b30d1939SAndy Fiddaman #define pospop(e) (--(e)->pos->cur)
242*b30d1939SAndy Fiddaman
243*b30d1939SAndy Fiddaman /*
244*b30d1939SAndy Fiddaman * allocate a frame and push a match onto the stack
245*b30d1939SAndy Fiddaman */
246*b30d1939SAndy Fiddaman
247*b30d1939SAndy Fiddaman static int
_matchpush(Env_t * env,Rex_t * rex)248*b30d1939SAndy Fiddaman _matchpush(Env_t* env, Rex_t* rex)
249*b30d1939SAndy Fiddaman {
250*b30d1939SAndy Fiddaman Match_frame_t* f;
251*b30d1939SAndy Fiddaman regmatch_t* m;
252*b30d1939SAndy Fiddaman regmatch_t* e;
253*b30d1939SAndy Fiddaman regmatch_t* s;
254*b30d1939SAndy Fiddaman int num;
255*b30d1939SAndy Fiddaman
256*b30d1939SAndy Fiddaman if (rex->re.group.number <= 0 || (num = rex->re.group.last - rex->re.group.number + 1) <= 0)
257*b30d1939SAndy Fiddaman num = 0;
258*b30d1939SAndy Fiddaman if (!(f = (Match_frame_t*)stkpush(stkstd, sizeof(Match_frame_t) + (num - 1) * sizeof(regmatch_t))))
259*b30d1939SAndy Fiddaman {
260*b30d1939SAndy Fiddaman env->error = REG_ESPACE;
261*b30d1939SAndy Fiddaman return 1;
262*b30d1939SAndy Fiddaman }
263*b30d1939SAndy Fiddaman f->size = num * sizeof(regmatch_t);
264*b30d1939SAndy Fiddaman f->match = m = env->match + rex->re.group.number;
265*b30d1939SAndy Fiddaman e = m + num;
266*b30d1939SAndy Fiddaman s = f->save;
267*b30d1939SAndy Fiddaman while (m < e)
268*b30d1939SAndy Fiddaman {
269*b30d1939SAndy Fiddaman *s++ = *m;
270*b30d1939SAndy Fiddaman *m++ = state.nomatch;
271*b30d1939SAndy Fiddaman }
272*b30d1939SAndy Fiddaman return 0;
273*b30d1939SAndy Fiddaman }
274*b30d1939SAndy Fiddaman
275*b30d1939SAndy Fiddaman /*
276*b30d1939SAndy Fiddaman * allocate a frame and push a pos onto the stack
277*b30d1939SAndy Fiddaman */
278*b30d1939SAndy Fiddaman
279*b30d1939SAndy Fiddaman static int
pospush(Env_t * env,Rex_t * rex,unsigned char * p,int be)280*b30d1939SAndy Fiddaman pospush(Env_t* env, Rex_t* rex, unsigned char* p, int be)
281*b30d1939SAndy Fiddaman {
282*b30d1939SAndy Fiddaman Pos_t* pos;
283*b30d1939SAndy Fiddaman
284*b30d1939SAndy Fiddaman if (!(pos = vector(Pos_t, env->pos, env->pos->cur)))
285*b30d1939SAndy Fiddaman {
286*b30d1939SAndy Fiddaman env->error = REG_ESPACE;
287*b30d1939SAndy Fiddaman return 1;
288*b30d1939SAndy Fiddaman }
289*b30d1939SAndy Fiddaman pos->serial = rex->serial;
290*b30d1939SAndy Fiddaman pos->p = p;
291*b30d1939SAndy Fiddaman pos->be = be;
292*b30d1939SAndy Fiddaman env->pos->cur++;
293*b30d1939SAndy Fiddaman return 0;
294*b30d1939SAndy Fiddaman }
295*b30d1939SAndy Fiddaman
296*b30d1939SAndy Fiddaman /*
297*b30d1939SAndy Fiddaman * two matches are known to have the same length
298*b30d1939SAndy Fiddaman * os is start of old pos array, ns is start of new,
299*b30d1939SAndy Fiddaman * oend and nend are end+1 pointers to ends of arrays.
300*b30d1939SAndy Fiddaman * oe and ne are ends (not end+1) of subarrays.
301*b30d1939SAndy Fiddaman * returns 1 if new is better, -1 if old, else 0.
302*b30d1939SAndy Fiddaman */
303*b30d1939SAndy Fiddaman
304*b30d1939SAndy Fiddaman static int
better(Env_t * env,Pos_t * os,Pos_t * ns,Pos_t * oend,Pos_t * nend,int level)305*b30d1939SAndy Fiddaman better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
306*b30d1939SAndy Fiddaman {
307*b30d1939SAndy Fiddaman Pos_t* oe;
308*b30d1939SAndy Fiddaman Pos_t* ne;
309*b30d1939SAndy Fiddaman int k;
310*b30d1939SAndy Fiddaman int n;
311*b30d1939SAndy Fiddaman
312*b30d1939SAndy Fiddaman if (env->error)
313*b30d1939SAndy Fiddaman return -1;
314*b30d1939SAndy Fiddaman for (;;)
315*b30d1939SAndy Fiddaman {
316*b30d1939SAndy Fiddaman DEBUG_CODE(0x0080,{sfprintf(sfstdout, " %-*.*sold ", (level + 3) * 4, (level + 3) * 4, "");for (oe = os; oe < oend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n %-*.*snew ", (level + 3) * 4, (level + 3) * 4, "");for (oe = ns; oe < nend; oe++)sfprintf(sfstdout, "<%d,%d,%d>", oe->p - env->beg, oe->serial, oe->be);sfprintf(sfstdout, "\n");},{0;});
317*b30d1939SAndy Fiddaman if (ns >= nend)
318*b30d1939SAndy Fiddaman return DEBUG_TEST(0x8000,(os < oend),(0));
319*b30d1939SAndy Fiddaman if (os >= oend)
320*b30d1939SAndy Fiddaman return DEBUG_TEST(0x8000,(-1),(1));
321*b30d1939SAndy Fiddaman n = os->serial;
322*b30d1939SAndy Fiddaman if (ns->serial > n)
323*b30d1939SAndy Fiddaman return -1;
324*b30d1939SAndy Fiddaman if (n > ns->serial)
325*b30d1939SAndy Fiddaman {
326*b30d1939SAndy Fiddaman env->error = REG_PANIC;
327*b30d1939SAndy Fiddaman return -1;
328*b30d1939SAndy Fiddaman }
329*b30d1939SAndy Fiddaman if (ns->p > os->p)
330*b30d1939SAndy Fiddaman return 1;
331*b30d1939SAndy Fiddaman if (os->p > ns->p)
332*b30d1939SAndy Fiddaman return -1;
333*b30d1939SAndy Fiddaman oe = os;
334*b30d1939SAndy Fiddaman k = 0;
335*b30d1939SAndy Fiddaman for (;;)
336*b30d1939SAndy Fiddaman if ((++oe)->serial == n)
337*b30d1939SAndy Fiddaman {
338*b30d1939SAndy Fiddaman if (oe->be != END_ANY)
339*b30d1939SAndy Fiddaman k++;
340*b30d1939SAndy Fiddaman else if (k-- <= 0)
341*b30d1939SAndy Fiddaman break;
342*b30d1939SAndy Fiddaman }
343*b30d1939SAndy Fiddaman ne = ns;
344*b30d1939SAndy Fiddaman k = 0;
345*b30d1939SAndy Fiddaman for (;;)
346*b30d1939SAndy Fiddaman if ((++ne)->serial == n)
347*b30d1939SAndy Fiddaman {
348*b30d1939SAndy Fiddaman if (ne->be != END_ANY)
349*b30d1939SAndy Fiddaman k++;
350*b30d1939SAndy Fiddaman else if (k-- <= 0)
351*b30d1939SAndy Fiddaman break;
352*b30d1939SAndy Fiddaman }
353*b30d1939SAndy Fiddaman if (ne->p > oe->p)
354*b30d1939SAndy Fiddaman return 1;
355*b30d1939SAndy Fiddaman if (oe->p > ne->p)
356*b30d1939SAndy Fiddaman return -1;
357*b30d1939SAndy Fiddaman if (k = better(env, os + 1, ns + 1, oe, ne, level + 1))
358*b30d1939SAndy Fiddaman return k;
359*b30d1939SAndy Fiddaman os = oe + 1;
360*b30d1939SAndy Fiddaman ns = ne + 1;
361*b30d1939SAndy Fiddaman }
362*b30d1939SAndy Fiddaman }
363*b30d1939SAndy Fiddaman
364*b30d1939SAndy Fiddaman #if _AST_REGEX_DEBUG
365*b30d1939SAndy Fiddaman
366*b30d1939SAndy Fiddaman static void
showmatch(regmatch_t * p)367*b30d1939SAndy Fiddaman showmatch(regmatch_t* p)
368*b30d1939SAndy Fiddaman {
369*b30d1939SAndy Fiddaman sfputc(sfstdout, '(');
370*b30d1939SAndy Fiddaman if (p->rm_so < 0)
371*b30d1939SAndy Fiddaman sfputc(sfstdout, '?');
372*b30d1939SAndy Fiddaman else
373*b30d1939SAndy Fiddaman sfprintf(sfstdout, "%z", p->rm_so);
374*b30d1939SAndy Fiddaman sfputc(sfstdout, ',');
375*b30d1939SAndy Fiddaman if (p->rm_eo < 0)
376*b30d1939SAndy Fiddaman sfputc(sfstdout, '?');
377*b30d1939SAndy Fiddaman else
378*b30d1939SAndy Fiddaman sfprintf(sfstdout, "%z", p->rm_eo);
379*b30d1939SAndy Fiddaman sfputc(sfstdout, ')');
380*b30d1939SAndy Fiddaman }
381*b30d1939SAndy Fiddaman
382*b30d1939SAndy Fiddaman static int
_better(Env_t * env,Pos_t * os,Pos_t * ns,Pos_t * oend,Pos_t * nend,int level)383*b30d1939SAndy Fiddaman _better(Env_t* env, Pos_t* os, Pos_t* ns, Pos_t* oend, Pos_t* nend, int level)
384*b30d1939SAndy Fiddaman {
385*b30d1939SAndy Fiddaman int i;
386*b30d1939SAndy Fiddaman
387*b30d1939SAndy Fiddaman DEBUG_CODE(0x0040,{sfprintf(sfstdout, "AHA better old ");for (i = 0; i <= env->nsub; i++)showmatch(&env->best[i]);sfprintf(sfstdout, "\n new ");for (i = 0; i <= env->nsub; i++)showmatch(&env->match[i]);sfprintf(sfstdout, "\n");},{0;});
388*b30d1939SAndy Fiddaman i = better(env, os, ns, oend, nend, 0);
389*b30d1939SAndy Fiddaman DEBUG_TEST(0x0040,(sfprintf(sfstdout, " %s\n", i <= 0 ? "OLD" : "NEW")),(0));
390*b30d1939SAndy Fiddaman return i;
391*b30d1939SAndy Fiddaman }
392*b30d1939SAndy Fiddaman
393*b30d1939SAndy Fiddaman #define better _better
394*b30d1939SAndy Fiddaman
395*b30d1939SAndy Fiddaman #endif
396*b30d1939SAndy Fiddaman
397*b30d1939SAndy Fiddaman #define follow(e,r,c,s) ((r)->next?parse(e,(r)->next,c,s):(c)?parse(e,c,0,s):BEST)
398*b30d1939SAndy Fiddaman
399*b30d1939SAndy Fiddaman static int parse(Env_t*, Rex_t*, Rex_t*, unsigned char*);
400*b30d1939SAndy Fiddaman
401*b30d1939SAndy Fiddaman static int
parserep(Env_t * env,Rex_t * rex,Rex_t * cont,unsigned char * s,int n)402*b30d1939SAndy Fiddaman parserep(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s, int n)
403*b30d1939SAndy Fiddaman {
404*b30d1939SAndy Fiddaman int i;
405*b30d1939SAndy Fiddaman int r = NONE;
406*b30d1939SAndy Fiddaman Rex_t catcher;
407*b30d1939SAndy Fiddaman
408*b30d1939SAndy Fiddaman DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep %s %d %d %d %d `%-.*s'\n", __LINE__, debug_flag, rexname(rex->re.group.expr.rex), rex->re.group.number, rex->lo, n, rex->hi, env->end - s, s)),(0));
409*b30d1939SAndy Fiddaman if ((rex->flags & REG_MINIMAL) && n >= rex->lo && n < rex->hi)
410*b30d1939SAndy Fiddaman {
411*b30d1939SAndy Fiddaman if (env->stack && pospush(env, rex, s, END_ANY))
412*b30d1939SAndy Fiddaman return BAD;
413*b30d1939SAndy Fiddaman i = follow(env, rex, cont, s);
414*b30d1939SAndy Fiddaman if (env->stack)
415*b30d1939SAndy Fiddaman pospop(env);
416*b30d1939SAndy Fiddaman switch (i)
417*b30d1939SAndy Fiddaman {
418*b30d1939SAndy Fiddaman case BAD:
419*b30d1939SAndy Fiddaman return BAD;
420*b30d1939SAndy Fiddaman case CUT:
421*b30d1939SAndy Fiddaman return CUT;
422*b30d1939SAndy Fiddaman case BEST:
423*b30d1939SAndy Fiddaman case GOOD:
424*b30d1939SAndy Fiddaman return BEST;
425*b30d1939SAndy Fiddaman }
426*b30d1939SAndy Fiddaman }
427*b30d1939SAndy Fiddaman if (n < rex->hi)
428*b30d1939SAndy Fiddaman {
429*b30d1939SAndy Fiddaman catcher.type = REX_REP_CATCH;
430*b30d1939SAndy Fiddaman catcher.serial = rex->serial;
431*b30d1939SAndy Fiddaman catcher.re.rep_catch.ref = rex;
432*b30d1939SAndy Fiddaman catcher.re.rep_catch.cont = cont;
433*b30d1939SAndy Fiddaman catcher.re.rep_catch.beg = s;
434*b30d1939SAndy Fiddaman catcher.re.rep_catch.n = n + 1;
435*b30d1939SAndy Fiddaman catcher.next = rex->next;
436*b30d1939SAndy Fiddaman if (n == 0)
437*b30d1939SAndy Fiddaman rex->re.rep_catch.beg = s;
438*b30d1939SAndy Fiddaman if (env->stack)
439*b30d1939SAndy Fiddaman {
440*b30d1939SAndy Fiddaman if (matchpush(env, rex))
441*b30d1939SAndy Fiddaman return BAD;
442*b30d1939SAndy Fiddaman if (pospush(env, rex, s, BEG_ONE))
443*b30d1939SAndy Fiddaman return BAD;
444*b30d1939SAndy Fiddaman DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x PUSH %d (%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)(%z,%z)\n", __LINE__, debug_flag, rex->re.group.number, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
445*b30d1939SAndy Fiddaman }
446*b30d1939SAndy Fiddaman r = parse(env, rex->re.group.expr.rex, &catcher, s);
447*b30d1939SAndy Fiddaman DEBUG_TEST(0x0010,(sfprintf(sfstdout, "AHA#%04d 0x%04x parserep parse %d %d `%-.*s'\n", __LINE__, debug_flag, rex->re.group.number, r, env->end - s, s)),(0));
448*b30d1939SAndy Fiddaman if (env->stack)
449*b30d1939SAndy Fiddaman {
450*b30d1939SAndy Fiddaman pospop(env);
451*b30d1939SAndy Fiddaman matchpop(env, rex);
452*b30d1939SAndy Fiddaman DEBUG_TEST(0x0004,(sfprintf(sfstdout,"AHA#%04d 0x%04x POP %d %d (%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)(%z,%z)\n", __LINE__, debug_flag, rex->re.group.number, r, env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo, env->match[2].rm_so, env->match[2].rm_eo)),(0));
453*b30d1939SAndy Fiddaman }
454*b30d1939SAndy Fiddaman switch (r)
455*b30d1939SAndy Fiddaman {
456*b30d1939SAndy Fiddaman case BAD:
457*b30d1939SAndy Fiddaman return BAD;
458*b30d1939SAndy Fiddaman case BEST:
459*b30d1939SAndy Fiddaman return BEST;
460*b30d1939SAndy Fiddaman case CUT:
461*b30d1939SAndy Fiddaman r = NONE;
462*b30d1939SAndy Fiddaman break;
463*b30d1939SAndy Fiddaman case GOOD:
464*b30d1939SAndy Fiddaman if (rex->flags & REG_MINIMAL)
465*b30d1939SAndy Fiddaman return BEST;
466*b30d1939SAndy Fiddaman r = GOOD;
467*b30d1939SAndy Fiddaman break;
468*b30d1939SAndy Fiddaman }
469*b30d1939SAndy Fiddaman }
470*b30d1939SAndy Fiddaman if (n < rex->lo)
471*b30d1939SAndy Fiddaman return r;
472*b30d1939SAndy Fiddaman if (!(rex->flags & REG_MINIMAL) || n >= rex->hi)
473*b30d1939SAndy Fiddaman {
474*b30d1939SAndy Fiddaman if (env->stack && pospush(env, rex, s, END_ANY))
475*b30d1939SAndy Fiddaman return BAD;
476*b30d1939SAndy Fiddaman i = follow(env, rex, cont, s);
477*b30d1939SAndy Fiddaman if (env->stack)
478*b30d1939SAndy Fiddaman pospop(env);
479*b30d1939SAndy Fiddaman switch (i)
480*b30d1939SAndy Fiddaman {
481*b30d1939SAndy Fiddaman case BAD:
482*b30d1939SAndy Fiddaman r = BAD;
483*b30d1939SAndy Fiddaman break;
484*b30d1939SAndy Fiddaman case CUT:
485*b30d1939SAndy Fiddaman r = CUT;
486*b30d1939SAndy Fiddaman break;
487*b30d1939SAndy Fiddaman case BEST:
488*b30d1939SAndy Fiddaman r = BEST;
489*b30d1939SAndy Fiddaman break;
490*b30d1939SAndy Fiddaman case GOOD:
491*b30d1939SAndy Fiddaman r = (rex->flags & REG_MINIMAL) ? BEST : GOOD;
492*b30d1939SAndy Fiddaman break;
493*b30d1939SAndy Fiddaman }
494*b30d1939SAndy Fiddaman }
495*b30d1939SAndy Fiddaman return r;
496*b30d1939SAndy Fiddaman }
497*b30d1939SAndy Fiddaman
498*b30d1939SAndy Fiddaman static int
parsetrie(Env_t * env,Trie_node_t * x,Rex_t * rex,Rex_t * cont,unsigned char * s)499*b30d1939SAndy Fiddaman parsetrie(Env_t* env, Trie_node_t* x, Rex_t* rex, Rex_t* cont, unsigned char* s)
500*b30d1939SAndy Fiddaman {
501*b30d1939SAndy Fiddaman unsigned char* p;
502*b30d1939SAndy Fiddaman int r;
503*b30d1939SAndy Fiddaman
504*b30d1939SAndy Fiddaman if (p = rex->map)
505*b30d1939SAndy Fiddaman {
506*b30d1939SAndy Fiddaman for (;;)
507*b30d1939SAndy Fiddaman {
508*b30d1939SAndy Fiddaman if (s >= env->end)
509*b30d1939SAndy Fiddaman return NONE;
510*b30d1939SAndy Fiddaman while (x->c != p[*s])
511*b30d1939SAndy Fiddaman if (!(x = x->sib))
512*b30d1939SAndy Fiddaman return NONE;
513*b30d1939SAndy Fiddaman if (x->end)
514*b30d1939SAndy Fiddaman break;
515*b30d1939SAndy Fiddaman x = x->son;
516*b30d1939SAndy Fiddaman s++;
517*b30d1939SAndy Fiddaman }
518*b30d1939SAndy Fiddaman }
519*b30d1939SAndy Fiddaman else
520*b30d1939SAndy Fiddaman {
521*b30d1939SAndy Fiddaman for (;;)
522*b30d1939SAndy Fiddaman {
523*b30d1939SAndy Fiddaman if (s >= env->end)
524*b30d1939SAndy Fiddaman return NONE;
525*b30d1939SAndy Fiddaman while (x->c != *s)
526*b30d1939SAndy Fiddaman if (!(x = x->sib))
527*b30d1939SAndy Fiddaman return NONE;
528*b30d1939SAndy Fiddaman if (x->end)
529*b30d1939SAndy Fiddaman break;
530*b30d1939SAndy Fiddaman x = x->son;
531*b30d1939SAndy Fiddaman s++;
532*b30d1939SAndy Fiddaman }
533*b30d1939SAndy Fiddaman }
534*b30d1939SAndy Fiddaman s++;
535*b30d1939SAndy Fiddaman if (rex->flags & REG_MINIMAL)
536*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
537*b30d1939SAndy Fiddaman {
538*b30d1939SAndy Fiddaman case BAD:
539*b30d1939SAndy Fiddaman return BAD;
540*b30d1939SAndy Fiddaman case CUT:
541*b30d1939SAndy Fiddaman return CUT;
542*b30d1939SAndy Fiddaman case BEST:
543*b30d1939SAndy Fiddaman case GOOD:
544*b30d1939SAndy Fiddaman return BEST;
545*b30d1939SAndy Fiddaman }
546*b30d1939SAndy Fiddaman if (x->son)
547*b30d1939SAndy Fiddaman switch (parsetrie(env, x->son, rex, cont, s))
548*b30d1939SAndy Fiddaman {
549*b30d1939SAndy Fiddaman case BAD:
550*b30d1939SAndy Fiddaman return BAD;
551*b30d1939SAndy Fiddaman case CUT:
552*b30d1939SAndy Fiddaman return CUT;
553*b30d1939SAndy Fiddaman case BEST:
554*b30d1939SAndy Fiddaman return BEST;
555*b30d1939SAndy Fiddaman case GOOD:
556*b30d1939SAndy Fiddaman if (rex->flags & REG_MINIMAL)
557*b30d1939SAndy Fiddaman return BEST;
558*b30d1939SAndy Fiddaman r = GOOD;
559*b30d1939SAndy Fiddaman break;
560*b30d1939SAndy Fiddaman default:
561*b30d1939SAndy Fiddaman r = NONE;
562*b30d1939SAndy Fiddaman break;
563*b30d1939SAndy Fiddaman }
564*b30d1939SAndy Fiddaman else
565*b30d1939SAndy Fiddaman r = NONE;
566*b30d1939SAndy Fiddaman if (!(rex->flags & REG_MINIMAL))
567*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
568*b30d1939SAndy Fiddaman {
569*b30d1939SAndy Fiddaman case BAD:
570*b30d1939SAndy Fiddaman return BAD;
571*b30d1939SAndy Fiddaman case CUT:
572*b30d1939SAndy Fiddaman return CUT;
573*b30d1939SAndy Fiddaman case BEST:
574*b30d1939SAndy Fiddaman return BEST;
575*b30d1939SAndy Fiddaman case GOOD:
576*b30d1939SAndy Fiddaman return GOOD;
577*b30d1939SAndy Fiddaman }
578*b30d1939SAndy Fiddaman return r;
579*b30d1939SAndy Fiddaman }
580*b30d1939SAndy Fiddaman
581*b30d1939SAndy Fiddaman static int
collelt(register Celt_t * ce,char * key,int c,int x)582*b30d1939SAndy Fiddaman collelt(register Celt_t* ce, char* key, int c, int x)
583*b30d1939SAndy Fiddaman {
584*b30d1939SAndy Fiddaman Ckey_t elt;
585*b30d1939SAndy Fiddaman
586*b30d1939SAndy Fiddaman mbxfrm(elt, key, COLL_KEY_MAX);
587*b30d1939SAndy Fiddaman for (;; ce++)
588*b30d1939SAndy Fiddaman {
589*b30d1939SAndy Fiddaman switch (ce->typ)
590*b30d1939SAndy Fiddaman {
591*b30d1939SAndy Fiddaman case COLL_call:
592*b30d1939SAndy Fiddaman if (!x && (*ce->fun)(c))
593*b30d1939SAndy Fiddaman return 1;
594*b30d1939SAndy Fiddaman continue;
595*b30d1939SAndy Fiddaman case COLL_char:
596*b30d1939SAndy Fiddaman if (!strcmp((char*)ce->beg, (char*)elt))
597*b30d1939SAndy Fiddaman return 1;
598*b30d1939SAndy Fiddaman continue;
599*b30d1939SAndy Fiddaman case COLL_range:
600*b30d1939SAndy Fiddaman if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max)
601*b30d1939SAndy Fiddaman return 1;
602*b30d1939SAndy Fiddaman continue;
603*b30d1939SAndy Fiddaman case COLL_range_lc:
604*b30d1939SAndy Fiddaman if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswlower(c) || !iswupper(c)))
605*b30d1939SAndy Fiddaman return 1;
606*b30d1939SAndy Fiddaman continue;
607*b30d1939SAndy Fiddaman case COLL_range_uc:
608*b30d1939SAndy Fiddaman if (strcmp((char*)ce->beg, (char*)elt) <= ce->min && strcmp((char*)elt, (char*)ce->end) <= ce->max && (iswupper(c) || !iswlower(c)))
609*b30d1939SAndy Fiddaman return 1;
610*b30d1939SAndy Fiddaman continue;
611*b30d1939SAndy Fiddaman }
612*b30d1939SAndy Fiddaman break;
613*b30d1939SAndy Fiddaman }
614*b30d1939SAndy Fiddaman return 0;
615*b30d1939SAndy Fiddaman }
616*b30d1939SAndy Fiddaman
617*b30d1939SAndy Fiddaman static int
collic(register Celt_t * ce,char * key,register char * nxt,int c,int x)618*b30d1939SAndy Fiddaman collic(register Celt_t* ce, char* key, register char* nxt, int c, int x)
619*b30d1939SAndy Fiddaman {
620*b30d1939SAndy Fiddaman if (!x)
621*b30d1939SAndy Fiddaman {
622*b30d1939SAndy Fiddaman if (collelt(ce, key, c, x))
623*b30d1939SAndy Fiddaman return 1;
624*b30d1939SAndy Fiddaman if (iswlower(c))
625*b30d1939SAndy Fiddaman c = towupper(c);
626*b30d1939SAndy Fiddaman else if (iswupper(c))
627*b30d1939SAndy Fiddaman c = towlower(c);
628*b30d1939SAndy Fiddaman else
629*b30d1939SAndy Fiddaman return 0;
630*b30d1939SAndy Fiddaman x = mbconv(key, c);
631*b30d1939SAndy Fiddaman key[x] = 0;
632*b30d1939SAndy Fiddaman return collelt(ce, key, c, 0);
633*b30d1939SAndy Fiddaman }
634*b30d1939SAndy Fiddaman while (*nxt)
635*b30d1939SAndy Fiddaman {
636*b30d1939SAndy Fiddaman if (collic(ce, key, nxt + 1, c, x))
637*b30d1939SAndy Fiddaman return 1;
638*b30d1939SAndy Fiddaman if (islower(*nxt))
639*b30d1939SAndy Fiddaman *nxt = toupper(*nxt);
640*b30d1939SAndy Fiddaman else if (isupper(*nxt))
641*b30d1939SAndy Fiddaman *nxt = tolower(*nxt);
642*b30d1939SAndy Fiddaman else
643*b30d1939SAndy Fiddaman return 0;
644*b30d1939SAndy Fiddaman nxt++;
645*b30d1939SAndy Fiddaman }
646*b30d1939SAndy Fiddaman return collelt(ce, key, c, x);
647*b30d1939SAndy Fiddaman }
648*b30d1939SAndy Fiddaman
649*b30d1939SAndy Fiddaman static int
collmatch(Rex_t * rex,unsigned char * s,unsigned char * e,unsigned char ** p)650*b30d1939SAndy Fiddaman collmatch(Rex_t* rex, unsigned char* s, unsigned char* e, unsigned char** p)
651*b30d1939SAndy Fiddaman {
652*b30d1939SAndy Fiddaman unsigned char* t;
653*b30d1939SAndy Fiddaman wchar_t c;
654*b30d1939SAndy Fiddaman int w;
655*b30d1939SAndy Fiddaman int r;
656*b30d1939SAndy Fiddaman int x;
657*b30d1939SAndy Fiddaman int ic;
658*b30d1939SAndy Fiddaman Ckey_t key;
659*b30d1939SAndy Fiddaman Ckey_t elt;
660*b30d1939SAndy Fiddaman
661*b30d1939SAndy Fiddaman ic = (rex->flags & REG_ICASE);
662*b30d1939SAndy Fiddaman if ((w = MBSIZE(s)) > 1)
663*b30d1939SAndy Fiddaman {
664*b30d1939SAndy Fiddaman memcpy((char*)key, (char*)s, w);
665*b30d1939SAndy Fiddaman key[w] = 0;
666*b30d1939SAndy Fiddaman t = s;
667*b30d1939SAndy Fiddaman c = mbchar(t);
668*b30d1939SAndy Fiddaman #if !_lib_wctype
669*b30d1939SAndy Fiddaman c &= 0xff;
670*b30d1939SAndy Fiddaman #endif
671*b30d1939SAndy Fiddaman x = 0;
672*b30d1939SAndy Fiddaman }
673*b30d1939SAndy Fiddaman else
674*b30d1939SAndy Fiddaman {
675*b30d1939SAndy Fiddaman c = s[0];
676*b30d1939SAndy Fiddaman if (ic && isupper(c))
677*b30d1939SAndy Fiddaman c = tolower(c);
678*b30d1939SAndy Fiddaman key[0] = c;
679*b30d1939SAndy Fiddaman key[1] = 0;
680*b30d1939SAndy Fiddaman if (isalpha(c))
681*b30d1939SAndy Fiddaman {
682*b30d1939SAndy Fiddaman x = e - s;
683*b30d1939SAndy Fiddaman if (x > COLL_KEY_MAX)
684*b30d1939SAndy Fiddaman x = COLL_KEY_MAX;
685*b30d1939SAndy Fiddaman while (w < x)
686*b30d1939SAndy Fiddaman {
687*b30d1939SAndy Fiddaman c = s[w];
688*b30d1939SAndy Fiddaman if (!isalpha(c))
689*b30d1939SAndy Fiddaman break;
690*b30d1939SAndy Fiddaman r = mbxfrm(elt, key, COLL_KEY_MAX);
691*b30d1939SAndy Fiddaman if (ic && isupper(c))
692*b30d1939SAndy Fiddaman c = tolower(c);
693*b30d1939SAndy Fiddaman key[w] = c;
694*b30d1939SAndy Fiddaman key[w + 1] = 0;
695*b30d1939SAndy Fiddaman if (mbxfrm(elt, key, COLL_KEY_MAX) != r)
696*b30d1939SAndy Fiddaman break;
697*b30d1939SAndy Fiddaman w++;
698*b30d1939SAndy Fiddaman }
699*b30d1939SAndy Fiddaman }
700*b30d1939SAndy Fiddaman key[w] = 0;
701*b30d1939SAndy Fiddaman c = key[0];
702*b30d1939SAndy Fiddaman x = w - 1;
703*b30d1939SAndy Fiddaman }
704*b30d1939SAndy Fiddaman r = 1;
705*b30d1939SAndy Fiddaman for (;;)
706*b30d1939SAndy Fiddaman {
707*b30d1939SAndy Fiddaman if (ic ? collic(rex->re.collate.elements, (char*)key, (char*)key, c, x) : collelt(rex->re.collate.elements, (char*)key, c, x))
708*b30d1939SAndy Fiddaman break;
709*b30d1939SAndy Fiddaman if (!x)
710*b30d1939SAndy Fiddaman {
711*b30d1939SAndy Fiddaman r = 0;
712*b30d1939SAndy Fiddaman break;
713*b30d1939SAndy Fiddaman }
714*b30d1939SAndy Fiddaman w = x--;
715*b30d1939SAndy Fiddaman key[w] = 0;
716*b30d1939SAndy Fiddaman }
717*b30d1939SAndy Fiddaman *p = s + w;
718*b30d1939SAndy Fiddaman return rex->re.collate.invert ? !r : r;
719*b30d1939SAndy Fiddaman }
720*b30d1939SAndy Fiddaman
721*b30d1939SAndy Fiddaman static unsigned char*
nestmatch(register unsigned char * s,register unsigned char * e,const unsigned short * type,register int co)722*b30d1939SAndy Fiddaman nestmatch(register unsigned char* s, register unsigned char* e, const unsigned short* type, register int co)
723*b30d1939SAndy Fiddaman {
724*b30d1939SAndy Fiddaman register int c;
725*b30d1939SAndy Fiddaman register int cc;
726*b30d1939SAndy Fiddaman unsigned int n;
727*b30d1939SAndy Fiddaman int oc;
728*b30d1939SAndy Fiddaman
729*b30d1939SAndy Fiddaman if (type[co] & (REX_NEST_literal|REX_NEST_quote))
730*b30d1939SAndy Fiddaman {
731*b30d1939SAndy Fiddaman n = (type[co] & REX_NEST_literal) ? REX_NEST_terminator : (REX_NEST_escape|REX_NEST_terminator);
732*b30d1939SAndy Fiddaman while (s < e)
733*b30d1939SAndy Fiddaman {
734*b30d1939SAndy Fiddaman c = *s++;
735*b30d1939SAndy Fiddaman if (c == co)
736*b30d1939SAndy Fiddaman return s;
737*b30d1939SAndy Fiddaman else if (type[c] & n)
738*b30d1939SAndy Fiddaman {
739*b30d1939SAndy Fiddaman if (s >= e || (type[c] & REX_NEST_terminator))
740*b30d1939SAndy Fiddaman break;
741*b30d1939SAndy Fiddaman s++;
742*b30d1939SAndy Fiddaman }
743*b30d1939SAndy Fiddaman }
744*b30d1939SAndy Fiddaman }
745*b30d1939SAndy Fiddaman else
746*b30d1939SAndy Fiddaman {
747*b30d1939SAndy Fiddaman cc = type[co] >> REX_NEST_SHIFT;
748*b30d1939SAndy Fiddaman oc = type[co] & (REX_NEST_open|REX_NEST_close);
749*b30d1939SAndy Fiddaman n = 1;
750*b30d1939SAndy Fiddaman while (s < e)
751*b30d1939SAndy Fiddaman {
752*b30d1939SAndy Fiddaman c = *s++;
753*b30d1939SAndy Fiddaman switch (type[c] & (REX_NEST_escape|REX_NEST_open|REX_NEST_close|REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
754*b30d1939SAndy Fiddaman {
755*b30d1939SAndy Fiddaman case REX_NEST_delimiter:
756*b30d1939SAndy Fiddaman case REX_NEST_terminator:
757*b30d1939SAndy Fiddaman return oc ? 0 : s;
758*b30d1939SAndy Fiddaman case REX_NEST_separator:
759*b30d1939SAndy Fiddaman if (!oc)
760*b30d1939SAndy Fiddaman return s;
761*b30d1939SAndy Fiddaman break;
762*b30d1939SAndy Fiddaman case REX_NEST_escape:
763*b30d1939SAndy Fiddaman if (s >= e)
764*b30d1939SAndy Fiddaman return 0;
765*b30d1939SAndy Fiddaman s++;
766*b30d1939SAndy Fiddaman break;
767*b30d1939SAndy Fiddaman case REX_NEST_open|REX_NEST_close:
768*b30d1939SAndy Fiddaman if (c == cc)
769*b30d1939SAndy Fiddaman {
770*b30d1939SAndy Fiddaman if (!--n)
771*b30d1939SAndy Fiddaman return s;
772*b30d1939SAndy Fiddaman }
773*b30d1939SAndy Fiddaman /*FALLTHROUGH*/
774*b30d1939SAndy Fiddaman case REX_NEST_open:
775*b30d1939SAndy Fiddaman if (c == co)
776*b30d1939SAndy Fiddaman {
777*b30d1939SAndy Fiddaman if (!++n)
778*b30d1939SAndy Fiddaman return 0;
779*b30d1939SAndy Fiddaman }
780*b30d1939SAndy Fiddaman else if (!(s = nestmatch(s, e, type, c)))
781*b30d1939SAndy Fiddaman return 0;
782*b30d1939SAndy Fiddaman break;
783*b30d1939SAndy Fiddaman case REX_NEST_close:
784*b30d1939SAndy Fiddaman if (c != cc)
785*b30d1939SAndy Fiddaman return 0;
786*b30d1939SAndy Fiddaman if (!--n)
787*b30d1939SAndy Fiddaman return s;
788*b30d1939SAndy Fiddaman break;
789*b30d1939SAndy Fiddaman }
790*b30d1939SAndy Fiddaman }
791*b30d1939SAndy Fiddaman return (oc || !(type[UCHAR_MAX+1] & REX_NEST_terminator)) ? 0 : s;
792*b30d1939SAndy Fiddaman }
793*b30d1939SAndy Fiddaman return 0;
794*b30d1939SAndy Fiddaman }
795*b30d1939SAndy Fiddaman
796*b30d1939SAndy Fiddaman static int
parse(Env_t * env,Rex_t * rex,Rex_t * cont,unsigned char * s)797*b30d1939SAndy Fiddaman parse(Env_t* env, Rex_t* rex, Rex_t* cont, unsigned char* s)
798*b30d1939SAndy Fiddaman {
799*b30d1939SAndy Fiddaman int c;
800*b30d1939SAndy Fiddaman int d;
801*b30d1939SAndy Fiddaman int m;
802*b30d1939SAndy Fiddaman int r;
803*b30d1939SAndy Fiddaman ssize_t i;
804*b30d1939SAndy Fiddaman ssize_t n;
805*b30d1939SAndy Fiddaman int* f;
806*b30d1939SAndy Fiddaman unsigned char* p;
807*b30d1939SAndy Fiddaman unsigned char* t;
808*b30d1939SAndy Fiddaman unsigned char* b;
809*b30d1939SAndy Fiddaman unsigned char* e;
810*b30d1939SAndy Fiddaman char* u;
811*b30d1939SAndy Fiddaman regmatch_t* o;
812*b30d1939SAndy Fiddaman Trie_node_t* x;
813*b30d1939SAndy Fiddaman Rex_t* q;
814*b30d1939SAndy Fiddaman Rex_t catcher;
815*b30d1939SAndy Fiddaman Rex_t next;
816*b30d1939SAndy Fiddaman
817*b30d1939SAndy Fiddaman for (;;)
818*b30d1939SAndy Fiddaman {
819*b30d1939SAndy Fiddaman DEBUG_TEST(0x0008,(sfprintf(sfstdout, "AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
820*b30d1939SAndy Fiddaman switch (rex->type)
821*b30d1939SAndy Fiddaman {
822*b30d1939SAndy Fiddaman case REX_ALT:
823*b30d1939SAndy Fiddaman if (env->stack)
824*b30d1939SAndy Fiddaman {
825*b30d1939SAndy Fiddaman if (matchpush(env, rex))
826*b30d1939SAndy Fiddaman return BAD;
827*b30d1939SAndy Fiddaman if (pospush(env, rex, s, BEG_ALT))
828*b30d1939SAndy Fiddaman return BAD;
829*b30d1939SAndy Fiddaman catcher.type = REX_ALT_CATCH;
830*b30d1939SAndy Fiddaman catcher.serial = rex->serial;
831*b30d1939SAndy Fiddaman catcher.re.alt_catch.cont = cont;
832*b30d1939SAndy Fiddaman catcher.next = rex->next;
833*b30d1939SAndy Fiddaman r = parse(env, rex->re.group.expr.binary.left, &catcher, s);
834*b30d1939SAndy Fiddaman if (r < BEST || (rex->flags & REG_MINIMAL))
835*b30d1939SAndy Fiddaman {
836*b30d1939SAndy Fiddaman matchcopy(env, rex);
837*b30d1939SAndy Fiddaman ((Pos_t*)env->pos->vec + env->pos->cur - 1)->serial = catcher.serial = rex->re.group.expr.binary.serial;
838*b30d1939SAndy Fiddaman n = parse(env, rex->re.group.expr.binary.right, &catcher, s);
839*b30d1939SAndy Fiddaman if (n != NONE)
840*b30d1939SAndy Fiddaman r = n;
841*b30d1939SAndy Fiddaman }
842*b30d1939SAndy Fiddaman pospop(env);
843*b30d1939SAndy Fiddaman matchpop(env, rex);
844*b30d1939SAndy Fiddaman }
845*b30d1939SAndy Fiddaman else
846*b30d1939SAndy Fiddaman {
847*b30d1939SAndy Fiddaman if ((r = parse(env, rex->re.group.expr.binary.left, cont, s)) == NONE)
848*b30d1939SAndy Fiddaman r = parse(env, rex->re.group.expr.binary.right, cont, s);
849*b30d1939SAndy Fiddaman if (r == GOOD)
850*b30d1939SAndy Fiddaman r = BEST;
851*b30d1939SAndy Fiddaman }
852*b30d1939SAndy Fiddaman return r;
853*b30d1939SAndy Fiddaman case REX_ALT_CATCH:
854*b30d1939SAndy Fiddaman if (pospush(env, rex, s, END_ANY))
855*b30d1939SAndy Fiddaman return BAD;
856*b30d1939SAndy Fiddaman r = follow(env, rex, rex->re.alt_catch.cont, s);
857*b30d1939SAndy Fiddaman pospop(env);
858*b30d1939SAndy Fiddaman return r;
859*b30d1939SAndy Fiddaman case REX_BACK:
860*b30d1939SAndy Fiddaman o = &env->match[rex->lo];
861*b30d1939SAndy Fiddaman if (o->rm_so < 0)
862*b30d1939SAndy Fiddaman return NONE;
863*b30d1939SAndy Fiddaman i = o->rm_eo - o->rm_so;
864*b30d1939SAndy Fiddaman e = s + i;
865*b30d1939SAndy Fiddaman if (e > env->end)
866*b30d1939SAndy Fiddaman return NONE;
867*b30d1939SAndy Fiddaman t = env->beg + o->rm_so;
868*b30d1939SAndy Fiddaman if (!(p = rex->map))
869*b30d1939SAndy Fiddaman {
870*b30d1939SAndy Fiddaman while (s < e)
871*b30d1939SAndy Fiddaman if (*s++ != *t++)
872*b30d1939SAndy Fiddaman return NONE;
873*b30d1939SAndy Fiddaman }
874*b30d1939SAndy Fiddaman else if (!mbwide())
875*b30d1939SAndy Fiddaman {
876*b30d1939SAndy Fiddaman while (s < e)
877*b30d1939SAndy Fiddaman if (p[*s++] != p[*t++])
878*b30d1939SAndy Fiddaman return NONE;
879*b30d1939SAndy Fiddaman }
880*b30d1939SAndy Fiddaman else
881*b30d1939SAndy Fiddaman {
882*b30d1939SAndy Fiddaman while (s < e)
883*b30d1939SAndy Fiddaman {
884*b30d1939SAndy Fiddaman c = mbchar(s);
885*b30d1939SAndy Fiddaman d = mbchar(t);
886*b30d1939SAndy Fiddaman if (towupper(c) != towupper(d))
887*b30d1939SAndy Fiddaman return NONE;
888*b30d1939SAndy Fiddaman }
889*b30d1939SAndy Fiddaman }
890*b30d1939SAndy Fiddaman break;
891*b30d1939SAndy Fiddaman case REX_BEG:
892*b30d1939SAndy Fiddaman if ((!(rex->flags & REG_NEWLINE) || s <= env->beg || *(s - 1) != '\n') && ((env->flags & REG_NOTBOL) || s != env->beg))
893*b30d1939SAndy Fiddaman return NONE;
894*b30d1939SAndy Fiddaman break;
895*b30d1939SAndy Fiddaman case REX_CLASS:
896*b30d1939SAndy Fiddaman if (LEADING(env, rex, s))
897*b30d1939SAndy Fiddaman return NONE;
898*b30d1939SAndy Fiddaman n = rex->hi;
899*b30d1939SAndy Fiddaman if (n > env->end - s)
900*b30d1939SAndy Fiddaman n = env->end - s;
901*b30d1939SAndy Fiddaman m = rex->lo;
902*b30d1939SAndy Fiddaman if (m > n)
903*b30d1939SAndy Fiddaman return NONE;
904*b30d1939SAndy Fiddaman r = NONE;
905*b30d1939SAndy Fiddaman if (!(rex->flags & REG_MINIMAL))
906*b30d1939SAndy Fiddaman {
907*b30d1939SAndy Fiddaman for (i = 0; i < n; i++)
908*b30d1939SAndy Fiddaman if (!settst(rex->re.charclass, s[i]))
909*b30d1939SAndy Fiddaman {
910*b30d1939SAndy Fiddaman n = i;
911*b30d1939SAndy Fiddaman break;
912*b30d1939SAndy Fiddaman }
913*b30d1939SAndy Fiddaman for (s += n; n-- >= m; s--)
914*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
915*b30d1939SAndy Fiddaman {
916*b30d1939SAndy Fiddaman case BAD:
917*b30d1939SAndy Fiddaman return BAD;
918*b30d1939SAndy Fiddaman case CUT:
919*b30d1939SAndy Fiddaman return CUT;
920*b30d1939SAndy Fiddaman case BEST:
921*b30d1939SAndy Fiddaman return BEST;
922*b30d1939SAndy Fiddaman case GOOD:
923*b30d1939SAndy Fiddaman r = GOOD;
924*b30d1939SAndy Fiddaman break;
925*b30d1939SAndy Fiddaman }
926*b30d1939SAndy Fiddaman }
927*b30d1939SAndy Fiddaman else
928*b30d1939SAndy Fiddaman {
929*b30d1939SAndy Fiddaman for (e = s + m; s < e; s++)
930*b30d1939SAndy Fiddaman if (!settst(rex->re.charclass, *s))
931*b30d1939SAndy Fiddaman return r;
932*b30d1939SAndy Fiddaman e += n - m;
933*b30d1939SAndy Fiddaman for (;;)
934*b30d1939SAndy Fiddaman {
935*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
936*b30d1939SAndy Fiddaman {
937*b30d1939SAndy Fiddaman case BAD:
938*b30d1939SAndy Fiddaman return BAD;
939*b30d1939SAndy Fiddaman case CUT:
940*b30d1939SAndy Fiddaman return CUT;
941*b30d1939SAndy Fiddaman case BEST:
942*b30d1939SAndy Fiddaman case GOOD:
943*b30d1939SAndy Fiddaman return BEST;
944*b30d1939SAndy Fiddaman }
945*b30d1939SAndy Fiddaman if (s >= e || !settst(rex->re.charclass, *s))
946*b30d1939SAndy Fiddaman break;
947*b30d1939SAndy Fiddaman s++;
948*b30d1939SAndy Fiddaman }
949*b30d1939SAndy Fiddaman }
950*b30d1939SAndy Fiddaman return r;
951*b30d1939SAndy Fiddaman case REX_COLL_CLASS:
952*b30d1939SAndy Fiddaman if (LEADING(env, rex, s))
953*b30d1939SAndy Fiddaman return NONE;
954*b30d1939SAndy Fiddaman n = rex->hi;
955*b30d1939SAndy Fiddaman if (n > env->end - s)
956*b30d1939SAndy Fiddaman n = env->end - s;
957*b30d1939SAndy Fiddaman m = rex->lo;
958*b30d1939SAndy Fiddaman if (m > n)
959*b30d1939SAndy Fiddaman return NONE;
960*b30d1939SAndy Fiddaman r = NONE;
961*b30d1939SAndy Fiddaman e = env->end;
962*b30d1939SAndy Fiddaman if (!(rex->flags & REG_MINIMAL))
963*b30d1939SAndy Fiddaman {
964*b30d1939SAndy Fiddaman if (!(b = (unsigned char*)stkpush(stkstd, n)))
965*b30d1939SAndy Fiddaman {
966*b30d1939SAndy Fiddaman env->error = REG_ESPACE;
967*b30d1939SAndy Fiddaman return BAD;
968*b30d1939SAndy Fiddaman }
969*b30d1939SAndy Fiddaman for (i = 0; s < e && i < n && collmatch(rex, s, e, &t); i++)
970*b30d1939SAndy Fiddaman {
971*b30d1939SAndy Fiddaman b[i] = t - s;
972*b30d1939SAndy Fiddaman s = t;
973*b30d1939SAndy Fiddaman }
974*b30d1939SAndy Fiddaman for (; i-- >= rex->lo; s -= b[i])
975*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
976*b30d1939SAndy Fiddaman {
977*b30d1939SAndy Fiddaman case BAD:
978*b30d1939SAndy Fiddaman stkpop(stkstd);
979*b30d1939SAndy Fiddaman return BAD;
980*b30d1939SAndy Fiddaman case CUT:
981*b30d1939SAndy Fiddaman stkpop(stkstd);
982*b30d1939SAndy Fiddaman return CUT;
983*b30d1939SAndy Fiddaman case BEST:
984*b30d1939SAndy Fiddaman stkpop(stkstd);
985*b30d1939SAndy Fiddaman return BEST;
986*b30d1939SAndy Fiddaman case GOOD:
987*b30d1939SAndy Fiddaman r = GOOD;
988*b30d1939SAndy Fiddaman break;
989*b30d1939SAndy Fiddaman }
990*b30d1939SAndy Fiddaman stkpop(stkstd);
991*b30d1939SAndy Fiddaman }
992*b30d1939SAndy Fiddaman else
993*b30d1939SAndy Fiddaman {
994*b30d1939SAndy Fiddaman for (i = 0; i < m && s < e; i++, s = t)
995*b30d1939SAndy Fiddaman if (!collmatch(rex, s, e, &t))
996*b30d1939SAndy Fiddaman return r;
997*b30d1939SAndy Fiddaman while (i++ <= n)
998*b30d1939SAndy Fiddaman {
999*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1000*b30d1939SAndy Fiddaman {
1001*b30d1939SAndy Fiddaman case BAD:
1002*b30d1939SAndy Fiddaman return BAD;
1003*b30d1939SAndy Fiddaman case CUT:
1004*b30d1939SAndy Fiddaman return CUT;
1005*b30d1939SAndy Fiddaman case BEST:
1006*b30d1939SAndy Fiddaman case GOOD:
1007*b30d1939SAndy Fiddaman return BEST;
1008*b30d1939SAndy Fiddaman }
1009*b30d1939SAndy Fiddaman if (s >= e || !collmatch(rex, s, e, &s))
1010*b30d1939SAndy Fiddaman break;
1011*b30d1939SAndy Fiddaman }
1012*b30d1939SAndy Fiddaman }
1013*b30d1939SAndy Fiddaman return r;
1014*b30d1939SAndy Fiddaman case REX_CONJ:
1015*b30d1939SAndy Fiddaman next.type = REX_CONJ_RIGHT;
1016*b30d1939SAndy Fiddaman next.re.conj_right.cont = cont;
1017*b30d1939SAndy Fiddaman next.next = rex->next;
1018*b30d1939SAndy Fiddaman catcher.type = REX_CONJ_LEFT;
1019*b30d1939SAndy Fiddaman catcher.re.conj_left.right = rex->re.group.expr.binary.right;
1020*b30d1939SAndy Fiddaman catcher.re.conj_left.cont = &next;
1021*b30d1939SAndy Fiddaman catcher.re.conj_left.beg = s;
1022*b30d1939SAndy Fiddaman catcher.next = 0;
1023*b30d1939SAndy Fiddaman return parse(env, rex->re.group.expr.binary.left, &catcher, s);
1024*b30d1939SAndy Fiddaman case REX_CONJ_LEFT:
1025*b30d1939SAndy Fiddaman rex->re.conj_left.cont->re.conj_right.end = s;
1026*b30d1939SAndy Fiddaman cont = rex->re.conj_left.cont;
1027*b30d1939SAndy Fiddaman s = rex->re.conj_left.beg;
1028*b30d1939SAndy Fiddaman rex = rex->re.conj_left.right;
1029*b30d1939SAndy Fiddaman continue;
1030*b30d1939SAndy Fiddaman case REX_CONJ_RIGHT:
1031*b30d1939SAndy Fiddaman if (rex->re.conj_right.end != s)
1032*b30d1939SAndy Fiddaman return NONE;
1033*b30d1939SAndy Fiddaman cont = rex->re.conj_right.cont;
1034*b30d1939SAndy Fiddaman break;
1035*b30d1939SAndy Fiddaman case REX_DONE:
1036*b30d1939SAndy Fiddaman if (!env->stack)
1037*b30d1939SAndy Fiddaman return BEST;
1038*b30d1939SAndy Fiddaman n = s - env->beg;
1039*b30d1939SAndy Fiddaman r = env->nsub;
1040*b30d1939SAndy Fiddaman DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%z,%z)(%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1041*b30d1939SAndy Fiddaman if ((i = env->best[0].rm_eo) >= 0)
1042*b30d1939SAndy Fiddaman {
1043*b30d1939SAndy Fiddaman if (rex->flags & REG_MINIMAL)
1044*b30d1939SAndy Fiddaman {
1045*b30d1939SAndy Fiddaman if (n > i)
1046*b30d1939SAndy Fiddaman return GOOD;
1047*b30d1939SAndy Fiddaman }
1048*b30d1939SAndy Fiddaman else
1049*b30d1939SAndy Fiddaman {
1050*b30d1939SAndy Fiddaman if (n < i)
1051*b30d1939SAndy Fiddaman return GOOD;
1052*b30d1939SAndy Fiddaman }
1053*b30d1939SAndy Fiddaman if (n == i && better(env,
1054*b30d1939SAndy Fiddaman (Pos_t*)env->bestpos->vec,
1055*b30d1939SAndy Fiddaman (Pos_t*)env->pos->vec,
1056*b30d1939SAndy Fiddaman (Pos_t*)env->bestpos->vec+env->bestpos->cur,
1057*b30d1939SAndy Fiddaman (Pos_t*)env->pos->vec+env->pos->cur,
1058*b30d1939SAndy Fiddaman 0) <= 0)
1059*b30d1939SAndy Fiddaman return GOOD;
1060*b30d1939SAndy Fiddaman }
1061*b30d1939SAndy Fiddaman env->best[0].rm_eo = n;
1062*b30d1939SAndy Fiddaman memcpy(&env->best[1], &env->match[1], r * sizeof(regmatch_t));
1063*b30d1939SAndy Fiddaman n = env->pos->cur;
1064*b30d1939SAndy Fiddaman if (!vector(Pos_t, env->bestpos, n))
1065*b30d1939SAndy Fiddaman {
1066*b30d1939SAndy Fiddaman env->error = REG_ESPACE;
1067*b30d1939SAndy Fiddaman return BAD;
1068*b30d1939SAndy Fiddaman }
1069*b30d1939SAndy Fiddaman env->bestpos->cur = n;
1070*b30d1939SAndy Fiddaman memcpy(env->bestpos->vec, env->pos->vec, n * sizeof(Pos_t));
1071*b30d1939SAndy Fiddaman DEBUG_TEST(0x0100,(sfprintf(sfstdout,"AHA#%04d 0x%04x %s (%z,%z)(%z,%z)(%z,%z)(%z,%z) (%z,%z)(%z,%z)\n", __LINE__, debug_flag, rexname(rex), env->best[0].rm_so, env->best[0].rm_eo, env->best[1].rm_so, env->best[1].rm_eo, env->best[2].rm_so, env->best[2].rm_eo, env->best[3].rm_so, env->best[3].rm_eo, env->match[0].rm_so, env->match[0].rm_eo, env->match[1].rm_so, env->match[1].rm_eo)),(0));
1072*b30d1939SAndy Fiddaman return GOOD;
1073*b30d1939SAndy Fiddaman case REX_DOT:
1074*b30d1939SAndy Fiddaman if (LEADING(env, rex, s))
1075*b30d1939SAndy Fiddaman return NONE;
1076*b30d1939SAndy Fiddaman n = rex->hi;
1077*b30d1939SAndy Fiddaman if (n > env->end - s)
1078*b30d1939SAndy Fiddaman n = env->end - s;
1079*b30d1939SAndy Fiddaman m = rex->lo;
1080*b30d1939SAndy Fiddaman if (m > n)
1081*b30d1939SAndy Fiddaman return NONE;
1082*b30d1939SAndy Fiddaman if ((c = rex->explicit) >= 0 && !mbwide())
1083*b30d1939SAndy Fiddaman for (i = 0; i < n; i++)
1084*b30d1939SAndy Fiddaman if (s[i] == c)
1085*b30d1939SAndy Fiddaman {
1086*b30d1939SAndy Fiddaman n = i;
1087*b30d1939SAndy Fiddaman break;
1088*b30d1939SAndy Fiddaman }
1089*b30d1939SAndy Fiddaman r = NONE;
1090*b30d1939SAndy Fiddaman if (!(rex->flags & REG_MINIMAL))
1091*b30d1939SAndy Fiddaman {
1092*b30d1939SAndy Fiddaman if (!mbwide())
1093*b30d1939SAndy Fiddaman {
1094*b30d1939SAndy Fiddaman for (s += n; n-- >= m; s--)
1095*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1096*b30d1939SAndy Fiddaman {
1097*b30d1939SAndy Fiddaman case BAD:
1098*b30d1939SAndy Fiddaman return BAD;
1099*b30d1939SAndy Fiddaman case CUT:
1100*b30d1939SAndy Fiddaman return CUT;
1101*b30d1939SAndy Fiddaman case BEST:
1102*b30d1939SAndy Fiddaman return BEST;
1103*b30d1939SAndy Fiddaman case GOOD:
1104*b30d1939SAndy Fiddaman r = GOOD;
1105*b30d1939SAndy Fiddaman break;
1106*b30d1939SAndy Fiddaman }
1107*b30d1939SAndy Fiddaman }
1108*b30d1939SAndy Fiddaman else
1109*b30d1939SAndy Fiddaman {
1110*b30d1939SAndy Fiddaman if (!(b = (unsigned char*)stkpush(stkstd, n)))
1111*b30d1939SAndy Fiddaman {
1112*b30d1939SAndy Fiddaman env->error = REG_ESPACE;
1113*b30d1939SAndy Fiddaman return BAD;
1114*b30d1939SAndy Fiddaman }
1115*b30d1939SAndy Fiddaman e = env->end;
1116*b30d1939SAndy Fiddaman for (i = 0; s < e && i < n && *s != c; i++)
1117*b30d1939SAndy Fiddaman s += b[i] = MBSIZE(s);
1118*b30d1939SAndy Fiddaman for (; i-- >= m; s -= b[i])
1119*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1120*b30d1939SAndy Fiddaman {
1121*b30d1939SAndy Fiddaman case BAD:
1122*b30d1939SAndy Fiddaman stkpop(stkstd);
1123*b30d1939SAndy Fiddaman return BAD;
1124*b30d1939SAndy Fiddaman case CUT:
1125*b30d1939SAndy Fiddaman stkpop(stkstd);
1126*b30d1939SAndy Fiddaman return CUT;
1127*b30d1939SAndy Fiddaman case BEST:
1128*b30d1939SAndy Fiddaman stkpop(stkstd);
1129*b30d1939SAndy Fiddaman return BEST;
1130*b30d1939SAndy Fiddaman case GOOD:
1131*b30d1939SAndy Fiddaman r = GOOD;
1132*b30d1939SAndy Fiddaman break;
1133*b30d1939SAndy Fiddaman }
1134*b30d1939SAndy Fiddaman stkpop(stkstd);
1135*b30d1939SAndy Fiddaman }
1136*b30d1939SAndy Fiddaman }
1137*b30d1939SAndy Fiddaman else
1138*b30d1939SAndy Fiddaman {
1139*b30d1939SAndy Fiddaman if (!mbwide())
1140*b30d1939SAndy Fiddaman {
1141*b30d1939SAndy Fiddaman e = s + n;
1142*b30d1939SAndy Fiddaman for (s += m; s <= e; s++)
1143*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1144*b30d1939SAndy Fiddaman {
1145*b30d1939SAndy Fiddaman case BAD:
1146*b30d1939SAndy Fiddaman return BAD;
1147*b30d1939SAndy Fiddaman case CUT:
1148*b30d1939SAndy Fiddaman return CUT;
1149*b30d1939SAndy Fiddaman case BEST:
1150*b30d1939SAndy Fiddaman case GOOD:
1151*b30d1939SAndy Fiddaman return BEST;
1152*b30d1939SAndy Fiddaman }
1153*b30d1939SAndy Fiddaman }
1154*b30d1939SAndy Fiddaman else
1155*b30d1939SAndy Fiddaman {
1156*b30d1939SAndy Fiddaman e = env->end;
1157*b30d1939SAndy Fiddaman for (i = 0; s < e && i < m && *s != c; i++)
1158*b30d1939SAndy Fiddaman s += MBSIZE(s);
1159*b30d1939SAndy Fiddaman if (i >= m)
1160*b30d1939SAndy Fiddaman for (; s <= e && i <= n; s += MBSIZE(s), i++)
1161*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1162*b30d1939SAndy Fiddaman {
1163*b30d1939SAndy Fiddaman case BAD:
1164*b30d1939SAndy Fiddaman return BAD;
1165*b30d1939SAndy Fiddaman case CUT:
1166*b30d1939SAndy Fiddaman return CUT;
1167*b30d1939SAndy Fiddaman case BEST:
1168*b30d1939SAndy Fiddaman case GOOD:
1169*b30d1939SAndy Fiddaman return BEST;
1170*b30d1939SAndy Fiddaman }
1171*b30d1939SAndy Fiddaman }
1172*b30d1939SAndy Fiddaman }
1173*b30d1939SAndy Fiddaman return r;
1174*b30d1939SAndy Fiddaman case REX_END:
1175*b30d1939SAndy Fiddaman if ((!(rex->flags & REG_NEWLINE) || *s != '\n') && ((env->flags & REG_NOTEOL) || s < env->end))
1176*b30d1939SAndy Fiddaman return NONE;
1177*b30d1939SAndy Fiddaman break;
1178*b30d1939SAndy Fiddaman case REX_GROUP:
1179*b30d1939SAndy Fiddaman DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), env->end - s, s)),(0));
1180*b30d1939SAndy Fiddaman if (env->stack)
1181*b30d1939SAndy Fiddaman {
1182*b30d1939SAndy Fiddaman if (rex->re.group.number)
1183*b30d1939SAndy Fiddaman env->match[rex->re.group.number].rm_so = s - env->beg;
1184*b30d1939SAndy Fiddaman if (pospush(env, rex, s, BEG_SUB))
1185*b30d1939SAndy Fiddaman return BAD;
1186*b30d1939SAndy Fiddaman catcher.re.group_catch.eo = rex->re.group.number ? &env->match[rex->re.group.number].rm_eo : (regoff_t*)0;
1187*b30d1939SAndy Fiddaman }
1188*b30d1939SAndy Fiddaman catcher.type = REX_GROUP_CATCH;
1189*b30d1939SAndy Fiddaman catcher.serial = rex->serial;
1190*b30d1939SAndy Fiddaman catcher.re.group_catch.cont = cont;
1191*b30d1939SAndy Fiddaman catcher.next = rex->next;
1192*b30d1939SAndy Fiddaman r = parse(env, rex->re.group.expr.rex, &catcher, s);
1193*b30d1939SAndy Fiddaman if (env->stack)
1194*b30d1939SAndy Fiddaman {
1195*b30d1939SAndy Fiddaman pospop(env);
1196*b30d1939SAndy Fiddaman if (rex->re.group.number)
1197*b30d1939SAndy Fiddaman env->match[rex->re.group.number].rm_so = -1;
1198*b30d1939SAndy Fiddaman }
1199*b30d1939SAndy Fiddaman return r;
1200*b30d1939SAndy Fiddaman case REX_GROUP_CATCH:
1201*b30d1939SAndy Fiddaman DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s=>%s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rexname(rex->re.group_catch.cont), env->end - s, s)),(0));
1202*b30d1939SAndy Fiddaman if (env->stack)
1203*b30d1939SAndy Fiddaman {
1204*b30d1939SAndy Fiddaman if (rex->re.group_catch.eo)
1205*b30d1939SAndy Fiddaman *rex->re.group_catch.eo = s - env->beg;
1206*b30d1939SAndy Fiddaman if (pospush(env, rex, s, END_ANY))
1207*b30d1939SAndy Fiddaman return BAD;
1208*b30d1939SAndy Fiddaman }
1209*b30d1939SAndy Fiddaman r = follow(env, rex, rex->re.group_catch.cont, s);
1210*b30d1939SAndy Fiddaman if (env->stack)
1211*b30d1939SAndy Fiddaman {
1212*b30d1939SAndy Fiddaman pospop(env);
1213*b30d1939SAndy Fiddaman if (rex->re.group_catch.eo)
1214*b30d1939SAndy Fiddaman *rex->re.group_catch.eo = -1;
1215*b30d1939SAndy Fiddaman }
1216*b30d1939SAndy Fiddaman return r;
1217*b30d1939SAndy Fiddaman case REX_GROUP_AHEAD:
1218*b30d1939SAndy Fiddaman catcher.type = REX_GROUP_AHEAD_CATCH;
1219*b30d1939SAndy Fiddaman catcher.flags = rex->flags;
1220*b30d1939SAndy Fiddaman catcher.serial = rex->serial;
1221*b30d1939SAndy Fiddaman catcher.re.rep_catch.beg = s;
1222*b30d1939SAndy Fiddaman catcher.re.rep_catch.cont = cont;
1223*b30d1939SAndy Fiddaman catcher.next = rex->next;
1224*b30d1939SAndy Fiddaman return parse(env, rex->re.group.expr.rex, &catcher, s);
1225*b30d1939SAndy Fiddaman case REX_GROUP_AHEAD_CATCH:
1226*b30d1939SAndy Fiddaman return follow(env, rex, rex->re.rep_catch.cont, rex->re.rep_catch.beg);
1227*b30d1939SAndy Fiddaman case REX_GROUP_AHEAD_NOT:
1228*b30d1939SAndy Fiddaman r = parse(env, rex->re.group.expr.rex, NiL, s);
1229*b30d1939SAndy Fiddaman if (r == NONE)
1230*b30d1939SAndy Fiddaman r = follow(env, rex, cont, s);
1231*b30d1939SAndy Fiddaman else if (r != BAD)
1232*b30d1939SAndy Fiddaman r = NONE;
1233*b30d1939SAndy Fiddaman return r;
1234*b30d1939SAndy Fiddaman case REX_GROUP_BEHIND:
1235*b30d1939SAndy Fiddaman if ((s - env->beg) < rex->re.group.size)
1236*b30d1939SAndy Fiddaman return NONE;
1237*b30d1939SAndy Fiddaman catcher.type = REX_GROUP_BEHIND_CATCH;
1238*b30d1939SAndy Fiddaman catcher.flags = rex->flags;
1239*b30d1939SAndy Fiddaman catcher.serial = rex->serial;
1240*b30d1939SAndy Fiddaman catcher.re.behind_catch.beg = s;
1241*b30d1939SAndy Fiddaman catcher.re.behind_catch.end = e = env->end;
1242*b30d1939SAndy Fiddaman catcher.re.behind_catch.cont = cont;
1243*b30d1939SAndy Fiddaman catcher.next = rex->next;
1244*b30d1939SAndy Fiddaman for (t = s - rex->re.group.size; t >= env->beg; t--)
1245*b30d1939SAndy Fiddaman {
1246*b30d1939SAndy Fiddaman env->end = s;
1247*b30d1939SAndy Fiddaman r = parse(env, rex->re.group.expr.rex, &catcher, t);
1248*b30d1939SAndy Fiddaman env->end = e;
1249*b30d1939SAndy Fiddaman if (r != NONE)
1250*b30d1939SAndy Fiddaman return r;
1251*b30d1939SAndy Fiddaman }
1252*b30d1939SAndy Fiddaman return NONE;
1253*b30d1939SAndy Fiddaman case REX_GROUP_BEHIND_CATCH:
1254*b30d1939SAndy Fiddaman if (s != rex->re.behind_catch.beg)
1255*b30d1939SAndy Fiddaman return NONE;
1256*b30d1939SAndy Fiddaman env->end = rex->re.behind_catch.end;
1257*b30d1939SAndy Fiddaman return follow(env, rex, rex->re.behind_catch.cont, rex->re.behind_catch.beg);
1258*b30d1939SAndy Fiddaman case REX_GROUP_BEHIND_NOT:
1259*b30d1939SAndy Fiddaman if ((s - env->beg) < rex->re.group.size)
1260*b30d1939SAndy Fiddaman r = NONE;
1261*b30d1939SAndy Fiddaman else
1262*b30d1939SAndy Fiddaman {
1263*b30d1939SAndy Fiddaman catcher.type = REX_GROUP_BEHIND_NOT_CATCH;
1264*b30d1939SAndy Fiddaman catcher.re.neg_catch.beg = s;
1265*b30d1939SAndy Fiddaman catcher.next = 0;
1266*b30d1939SAndy Fiddaman e = env->end;
1267*b30d1939SAndy Fiddaman env->end = s;
1268*b30d1939SAndy Fiddaman for (t = s - rex->re.group.size; t >= env->beg; t--)
1269*b30d1939SAndy Fiddaman {
1270*b30d1939SAndy Fiddaman r = parse(env, rex->re.group.expr.rex, &catcher, t);
1271*b30d1939SAndy Fiddaman if (r != NONE)
1272*b30d1939SAndy Fiddaman break;
1273*b30d1939SAndy Fiddaman }
1274*b30d1939SAndy Fiddaman env->end = e;
1275*b30d1939SAndy Fiddaman }
1276*b30d1939SAndy Fiddaman if (r == NONE)
1277*b30d1939SAndy Fiddaman r = follow(env, rex, cont, s);
1278*b30d1939SAndy Fiddaman else if (r != BAD)
1279*b30d1939SAndy Fiddaman r = NONE;
1280*b30d1939SAndy Fiddaman return r;
1281*b30d1939SAndy Fiddaman case REX_GROUP_BEHIND_NOT_CATCH:
1282*b30d1939SAndy Fiddaman return s == rex->re.neg_catch.beg ? GOOD : NONE;
1283*b30d1939SAndy Fiddaman case REX_GROUP_COND:
1284*b30d1939SAndy Fiddaman if (q = rex->re.group.expr.binary.right)
1285*b30d1939SAndy Fiddaman {
1286*b30d1939SAndy Fiddaman catcher.re.cond_catch.next[0] = q->re.group.expr.binary.right;
1287*b30d1939SAndy Fiddaman catcher.re.cond_catch.next[1] = q->re.group.expr.binary.left;
1288*b30d1939SAndy Fiddaman }
1289*b30d1939SAndy Fiddaman else
1290*b30d1939SAndy Fiddaman catcher.re.cond_catch.next[0] = catcher.re.cond_catch.next[1] = 0;
1291*b30d1939SAndy Fiddaman if (q = rex->re.group.expr.binary.left)
1292*b30d1939SAndy Fiddaman {
1293*b30d1939SAndy Fiddaman catcher.type = REX_GROUP_COND_CATCH;
1294*b30d1939SAndy Fiddaman catcher.flags = rex->flags;
1295*b30d1939SAndy Fiddaman catcher.serial = rex->serial;
1296*b30d1939SAndy Fiddaman catcher.re.cond_catch.yes = 0;
1297*b30d1939SAndy Fiddaman catcher.re.cond_catch.beg = s;
1298*b30d1939SAndy Fiddaman catcher.re.cond_catch.cont = cont;
1299*b30d1939SAndy Fiddaman catcher.next = rex->next;
1300*b30d1939SAndy Fiddaman r = parse(env, q, &catcher, s);
1301*b30d1939SAndy Fiddaman if (r == BAD || catcher.re.cond_catch.yes)
1302*b30d1939SAndy Fiddaman return r;
1303*b30d1939SAndy Fiddaman }
1304*b30d1939SAndy Fiddaman else if (!rex->re.group.size || rex->re.group.size > 0 && env->match[rex->re.group.size].rm_so >= 0)
1305*b30d1939SAndy Fiddaman r = GOOD;
1306*b30d1939SAndy Fiddaman else
1307*b30d1939SAndy Fiddaman r = NONE;
1308*b30d1939SAndy Fiddaman if (q = catcher.re.cond_catch.next[r != NONE])
1309*b30d1939SAndy Fiddaman {
1310*b30d1939SAndy Fiddaman catcher.type = REX_CAT;
1311*b30d1939SAndy Fiddaman catcher.flags = q->flags;
1312*b30d1939SAndy Fiddaman catcher.serial = q->serial;
1313*b30d1939SAndy Fiddaman catcher.re.group_catch.cont = cont;
1314*b30d1939SAndy Fiddaman catcher.next = rex->next;
1315*b30d1939SAndy Fiddaman return parse(env, q, &catcher, s);
1316*b30d1939SAndy Fiddaman }
1317*b30d1939SAndy Fiddaman return follow(env, rex, cont, s);
1318*b30d1939SAndy Fiddaman case REX_GROUP_COND_CATCH:
1319*b30d1939SAndy Fiddaman rex->re.cond_catch.yes = 1;
1320*b30d1939SAndy Fiddaman catcher.type = REX_CAT;
1321*b30d1939SAndy Fiddaman catcher.flags = rex->flags;
1322*b30d1939SAndy Fiddaman catcher.serial = rex->serial;
1323*b30d1939SAndy Fiddaman catcher.re.group_catch.cont = rex->re.cond_catch.cont;
1324*b30d1939SAndy Fiddaman catcher.next = rex->next;
1325*b30d1939SAndy Fiddaman return parse(env, rex->re.cond_catch.next[1], &catcher, rex->re.cond_catch.beg);
1326*b30d1939SAndy Fiddaman case REX_CAT:
1327*b30d1939SAndy Fiddaman return follow(env, rex, rex->re.group_catch.cont, s);
1328*b30d1939SAndy Fiddaman case REX_GROUP_CUT:
1329*b30d1939SAndy Fiddaman catcher.type = REX_GROUP_CUT_CATCH;
1330*b30d1939SAndy Fiddaman catcher.flags = rex->flags;
1331*b30d1939SAndy Fiddaman catcher.serial = rex->serial;
1332*b30d1939SAndy Fiddaman catcher.re.group_catch.cont = cont;
1333*b30d1939SAndy Fiddaman catcher.next = rex->next;
1334*b30d1939SAndy Fiddaman return parse(env, rex->re.group.expr.rex, &catcher, s);
1335*b30d1939SAndy Fiddaman case REX_GROUP_CUT_CATCH:
1336*b30d1939SAndy Fiddaman switch (r = follow(env, rex, rex->re.group_catch.cont, s))
1337*b30d1939SAndy Fiddaman {
1338*b30d1939SAndy Fiddaman case GOOD:
1339*b30d1939SAndy Fiddaman r = BEST;
1340*b30d1939SAndy Fiddaman break;
1341*b30d1939SAndy Fiddaman case NONE:
1342*b30d1939SAndy Fiddaman r = CUT;
1343*b30d1939SAndy Fiddaman break;
1344*b30d1939SAndy Fiddaman }
1345*b30d1939SAndy Fiddaman return r;
1346*b30d1939SAndy Fiddaman case REX_KMP:
1347*b30d1939SAndy Fiddaman f = rex->re.string.fail;
1348*b30d1939SAndy Fiddaman b = rex->re.string.base;
1349*b30d1939SAndy Fiddaman n = rex->re.string.size;
1350*b30d1939SAndy Fiddaman t = s;
1351*b30d1939SAndy Fiddaman e = env->end;
1352*b30d1939SAndy Fiddaman if (p = rex->map)
1353*b30d1939SAndy Fiddaman {
1354*b30d1939SAndy Fiddaman while (t + n <= e)
1355*b30d1939SAndy Fiddaman {
1356*b30d1939SAndy Fiddaman for (i = -1; t < e; t++)
1357*b30d1939SAndy Fiddaman {
1358*b30d1939SAndy Fiddaman while (i >= 0 && b[i+1] != p[*t])
1359*b30d1939SAndy Fiddaman i = f[i];
1360*b30d1939SAndy Fiddaman if (b[i+1] == p[*t])
1361*b30d1939SAndy Fiddaman i++;
1362*b30d1939SAndy Fiddaman if (i + 1 == n)
1363*b30d1939SAndy Fiddaman {
1364*b30d1939SAndy Fiddaman t++;
1365*b30d1939SAndy Fiddaman if (env->stack)
1366*b30d1939SAndy Fiddaman env->best[0].rm_so = t - s - n;
1367*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, t))
1368*b30d1939SAndy Fiddaman {
1369*b30d1939SAndy Fiddaman case BAD:
1370*b30d1939SAndy Fiddaman return BAD;
1371*b30d1939SAndy Fiddaman case CUT:
1372*b30d1939SAndy Fiddaman return CUT;
1373*b30d1939SAndy Fiddaman case BEST:
1374*b30d1939SAndy Fiddaman case GOOD:
1375*b30d1939SAndy Fiddaman return BEST;
1376*b30d1939SAndy Fiddaman }
1377*b30d1939SAndy Fiddaman t -= n - 1;
1378*b30d1939SAndy Fiddaman break;
1379*b30d1939SAndy Fiddaman }
1380*b30d1939SAndy Fiddaman }
1381*b30d1939SAndy Fiddaman }
1382*b30d1939SAndy Fiddaman }
1383*b30d1939SAndy Fiddaman else
1384*b30d1939SAndy Fiddaman {
1385*b30d1939SAndy Fiddaman while (t + n <= e)
1386*b30d1939SAndy Fiddaman {
1387*b30d1939SAndy Fiddaman for (i = -1; t < e; t++)
1388*b30d1939SAndy Fiddaman {
1389*b30d1939SAndy Fiddaman while (i >= 0 && b[i+1] != *t)
1390*b30d1939SAndy Fiddaman i = f[i];
1391*b30d1939SAndy Fiddaman if (b[i+1] == *t)
1392*b30d1939SAndy Fiddaman i++;
1393*b30d1939SAndy Fiddaman if (i + 1 == n)
1394*b30d1939SAndy Fiddaman {
1395*b30d1939SAndy Fiddaman t++;
1396*b30d1939SAndy Fiddaman if (env->stack)
1397*b30d1939SAndy Fiddaman env->best[0].rm_so = t - s - n;
1398*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, t))
1399*b30d1939SAndy Fiddaman {
1400*b30d1939SAndy Fiddaman case BAD:
1401*b30d1939SAndy Fiddaman return BAD;
1402*b30d1939SAndy Fiddaman case CUT:
1403*b30d1939SAndy Fiddaman return CUT;
1404*b30d1939SAndy Fiddaman case BEST:
1405*b30d1939SAndy Fiddaman case GOOD:
1406*b30d1939SAndy Fiddaman return BEST;
1407*b30d1939SAndy Fiddaman }
1408*b30d1939SAndy Fiddaman t -= n - 1;
1409*b30d1939SAndy Fiddaman break;
1410*b30d1939SAndy Fiddaman }
1411*b30d1939SAndy Fiddaman }
1412*b30d1939SAndy Fiddaman }
1413*b30d1939SAndy Fiddaman }
1414*b30d1939SAndy Fiddaman return NONE;
1415*b30d1939SAndy Fiddaman case REX_NEG:
1416*b30d1939SAndy Fiddaman if (LEADING(env, rex, s))
1417*b30d1939SAndy Fiddaman return NONE;
1418*b30d1939SAndy Fiddaman i = env->end - s;
1419*b30d1939SAndy Fiddaman n = ((i + 7) >> 3) + 1;
1420*b30d1939SAndy Fiddaman catcher.type = REX_NEG_CATCH;
1421*b30d1939SAndy Fiddaman catcher.re.neg_catch.beg = s;
1422*b30d1939SAndy Fiddaman if (!(p = (unsigned char*)stkpush(stkstd, n)))
1423*b30d1939SAndy Fiddaman return BAD;
1424*b30d1939SAndy Fiddaman memset(catcher.re.neg_catch.index = p, 0, n);
1425*b30d1939SAndy Fiddaman catcher.next = rex->next;
1426*b30d1939SAndy Fiddaman if (parse(env, rex->re.group.expr.rex, &catcher, s) == BAD)
1427*b30d1939SAndy Fiddaman r = BAD;
1428*b30d1939SAndy Fiddaman else
1429*b30d1939SAndy Fiddaman {
1430*b30d1939SAndy Fiddaman r = NONE;
1431*b30d1939SAndy Fiddaman for (; i >= 0; i--)
1432*b30d1939SAndy Fiddaman if (!bittst(p, i))
1433*b30d1939SAndy Fiddaman {
1434*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s + i))
1435*b30d1939SAndy Fiddaman {
1436*b30d1939SAndy Fiddaman case BAD:
1437*b30d1939SAndy Fiddaman r = BAD;
1438*b30d1939SAndy Fiddaman break;
1439*b30d1939SAndy Fiddaman case BEST:
1440*b30d1939SAndy Fiddaman r = BEST;
1441*b30d1939SAndy Fiddaman break;
1442*b30d1939SAndy Fiddaman case CUT:
1443*b30d1939SAndy Fiddaman r = CUT;
1444*b30d1939SAndy Fiddaman break;
1445*b30d1939SAndy Fiddaman case GOOD:
1446*b30d1939SAndy Fiddaman r = GOOD;
1447*b30d1939SAndy Fiddaman /*FALLTHROUGH*/
1448*b30d1939SAndy Fiddaman default:
1449*b30d1939SAndy Fiddaman continue;
1450*b30d1939SAndy Fiddaman }
1451*b30d1939SAndy Fiddaman break;
1452*b30d1939SAndy Fiddaman }
1453*b30d1939SAndy Fiddaman }
1454*b30d1939SAndy Fiddaman stkpop(stkstd);
1455*b30d1939SAndy Fiddaman return r;
1456*b30d1939SAndy Fiddaman case REX_NEG_CATCH:
1457*b30d1939SAndy Fiddaman bitset(rex->re.neg_catch.index, s - rex->re.neg_catch.beg);
1458*b30d1939SAndy Fiddaman return NONE;
1459*b30d1939SAndy Fiddaman case REX_NEST:
1460*b30d1939SAndy Fiddaman if (s >= env->end)
1461*b30d1939SAndy Fiddaman return NONE;
1462*b30d1939SAndy Fiddaman do
1463*b30d1939SAndy Fiddaman {
1464*b30d1939SAndy Fiddaman if ((c = *s++) == rex->re.nest.primary)
1465*b30d1939SAndy Fiddaman {
1466*b30d1939SAndy Fiddaman if (s >= env->end || !(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1467*b30d1939SAndy Fiddaman return NONE;
1468*b30d1939SAndy Fiddaman break;
1469*b30d1939SAndy Fiddaman }
1470*b30d1939SAndy Fiddaman if (rex->re.nest.primary >= 0)
1471*b30d1939SAndy Fiddaman return NONE;
1472*b30d1939SAndy Fiddaman if (rex->re.nest.type[c] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator))
1473*b30d1939SAndy Fiddaman break;
1474*b30d1939SAndy Fiddaman if (!(s = nestmatch(s, env->end, rex->re.nest.type, c)))
1475*b30d1939SAndy Fiddaman return NONE;
1476*b30d1939SAndy Fiddaman } while (s < env->end && !(rex->re.nest.type[*(s-1)] & (REX_NEST_delimiter|REX_NEST_separator|REX_NEST_terminator)));
1477*b30d1939SAndy Fiddaman break;
1478*b30d1939SAndy Fiddaman case REX_NULL:
1479*b30d1939SAndy Fiddaman break;
1480*b30d1939SAndy Fiddaman case REX_ONECHAR:
1481*b30d1939SAndy Fiddaman n = rex->hi;
1482*b30d1939SAndy Fiddaman if (n > env->end - s)
1483*b30d1939SAndy Fiddaman n = env->end - s;
1484*b30d1939SAndy Fiddaman m = rex->lo;
1485*b30d1939SAndy Fiddaman if (m > n)
1486*b30d1939SAndy Fiddaman return NONE;
1487*b30d1939SAndy Fiddaman r = NONE;
1488*b30d1939SAndy Fiddaman c = rex->re.onechar;
1489*b30d1939SAndy Fiddaman if (!(rex->flags & REG_MINIMAL))
1490*b30d1939SAndy Fiddaman {
1491*b30d1939SAndy Fiddaman if (!mbwide())
1492*b30d1939SAndy Fiddaman {
1493*b30d1939SAndy Fiddaman if (p = rex->map)
1494*b30d1939SAndy Fiddaman {
1495*b30d1939SAndy Fiddaman for (i = 0; i < n; i++, s++)
1496*b30d1939SAndy Fiddaman if (p[*s] != c)
1497*b30d1939SAndy Fiddaman break;
1498*b30d1939SAndy Fiddaman }
1499*b30d1939SAndy Fiddaman else
1500*b30d1939SAndy Fiddaman {
1501*b30d1939SAndy Fiddaman for (i = 0; i < n; i++, s++)
1502*b30d1939SAndy Fiddaman if (*s != c)
1503*b30d1939SAndy Fiddaman break;
1504*b30d1939SAndy Fiddaman }
1505*b30d1939SAndy Fiddaman for (; i-- >= m; s--)
1506*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1507*b30d1939SAndy Fiddaman {
1508*b30d1939SAndy Fiddaman case BAD:
1509*b30d1939SAndy Fiddaman return BAD;
1510*b30d1939SAndy Fiddaman case BEST:
1511*b30d1939SAndy Fiddaman return BEST;
1512*b30d1939SAndy Fiddaman case CUT:
1513*b30d1939SAndy Fiddaman return CUT;
1514*b30d1939SAndy Fiddaman case GOOD:
1515*b30d1939SAndy Fiddaman r = GOOD;
1516*b30d1939SAndy Fiddaman break;
1517*b30d1939SAndy Fiddaman }
1518*b30d1939SAndy Fiddaman }
1519*b30d1939SAndy Fiddaman else
1520*b30d1939SAndy Fiddaman {
1521*b30d1939SAndy Fiddaman if (!(b = (unsigned char*)stkpush(stkstd, n)))
1522*b30d1939SAndy Fiddaman {
1523*b30d1939SAndy Fiddaman env->error = REG_ESPACE;
1524*b30d1939SAndy Fiddaman return BAD;
1525*b30d1939SAndy Fiddaman }
1526*b30d1939SAndy Fiddaman e = env->end;
1527*b30d1939SAndy Fiddaman if (!(rex->flags & REG_ICASE))
1528*b30d1939SAndy Fiddaman {
1529*b30d1939SAndy Fiddaman for (i = 0; s < e && i < n; i++, s = t)
1530*b30d1939SAndy Fiddaman {
1531*b30d1939SAndy Fiddaman t = s;
1532*b30d1939SAndy Fiddaman if (mbchar(t) != c)
1533*b30d1939SAndy Fiddaman break;
1534*b30d1939SAndy Fiddaman b[i] = t - s;
1535*b30d1939SAndy Fiddaman }
1536*b30d1939SAndy Fiddaman }
1537*b30d1939SAndy Fiddaman else
1538*b30d1939SAndy Fiddaman {
1539*b30d1939SAndy Fiddaman for (i = 0; s < e && i < n; i++, s = t)
1540*b30d1939SAndy Fiddaman {
1541*b30d1939SAndy Fiddaman t = s;
1542*b30d1939SAndy Fiddaman if (towupper(mbchar(t)) != c)
1543*b30d1939SAndy Fiddaman break;
1544*b30d1939SAndy Fiddaman b[i] = t - s;
1545*b30d1939SAndy Fiddaman }
1546*b30d1939SAndy Fiddaman }
1547*b30d1939SAndy Fiddaman for (; i-- >= m; s -= b[i])
1548*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1549*b30d1939SAndy Fiddaman {
1550*b30d1939SAndy Fiddaman case BAD:
1551*b30d1939SAndy Fiddaman stkpop(stkstd);
1552*b30d1939SAndy Fiddaman return BAD;
1553*b30d1939SAndy Fiddaman case BEST:
1554*b30d1939SAndy Fiddaman stkpop(stkstd);
1555*b30d1939SAndy Fiddaman return BEST;
1556*b30d1939SAndy Fiddaman case CUT:
1557*b30d1939SAndy Fiddaman stkpop(stkstd);
1558*b30d1939SAndy Fiddaman return CUT;
1559*b30d1939SAndy Fiddaman case GOOD:
1560*b30d1939SAndy Fiddaman r = GOOD;
1561*b30d1939SAndy Fiddaman break;
1562*b30d1939SAndy Fiddaman }
1563*b30d1939SAndy Fiddaman stkpop(stkstd);
1564*b30d1939SAndy Fiddaman }
1565*b30d1939SAndy Fiddaman }
1566*b30d1939SAndy Fiddaman else
1567*b30d1939SAndy Fiddaman {
1568*b30d1939SAndy Fiddaman if (!mbwide())
1569*b30d1939SAndy Fiddaman {
1570*b30d1939SAndy Fiddaman e = s + m;
1571*b30d1939SAndy Fiddaman if (p = rex->map)
1572*b30d1939SAndy Fiddaman {
1573*b30d1939SAndy Fiddaman for (; s < e; s++)
1574*b30d1939SAndy Fiddaman if (p[*s] != c)
1575*b30d1939SAndy Fiddaman return r;
1576*b30d1939SAndy Fiddaman e += n - m;
1577*b30d1939SAndy Fiddaman for (;;)
1578*b30d1939SAndy Fiddaman {
1579*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1580*b30d1939SAndy Fiddaman {
1581*b30d1939SAndy Fiddaman case BAD:
1582*b30d1939SAndy Fiddaman return BAD;
1583*b30d1939SAndy Fiddaman case CUT:
1584*b30d1939SAndy Fiddaman return CUT;
1585*b30d1939SAndy Fiddaman case BEST:
1586*b30d1939SAndy Fiddaman case GOOD:
1587*b30d1939SAndy Fiddaman return BEST;
1588*b30d1939SAndy Fiddaman }
1589*b30d1939SAndy Fiddaman if (s >= e || p[*s++] != c)
1590*b30d1939SAndy Fiddaman break;
1591*b30d1939SAndy Fiddaman }
1592*b30d1939SAndy Fiddaman }
1593*b30d1939SAndy Fiddaman else
1594*b30d1939SAndy Fiddaman {
1595*b30d1939SAndy Fiddaman for (; s < e; s++)
1596*b30d1939SAndy Fiddaman if (*s != c)
1597*b30d1939SAndy Fiddaman return r;
1598*b30d1939SAndy Fiddaman e += n - m;
1599*b30d1939SAndy Fiddaman for (;;)
1600*b30d1939SAndy Fiddaman {
1601*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1602*b30d1939SAndy Fiddaman {
1603*b30d1939SAndy Fiddaman case BAD:
1604*b30d1939SAndy Fiddaman return BAD;
1605*b30d1939SAndy Fiddaman case CUT:
1606*b30d1939SAndy Fiddaman return CUT;
1607*b30d1939SAndy Fiddaman case BEST:
1608*b30d1939SAndy Fiddaman case GOOD:
1609*b30d1939SAndy Fiddaman return BEST;
1610*b30d1939SAndy Fiddaman }
1611*b30d1939SAndy Fiddaman if (s >= e || *s++ != c)
1612*b30d1939SAndy Fiddaman break;
1613*b30d1939SAndy Fiddaman }
1614*b30d1939SAndy Fiddaman }
1615*b30d1939SAndy Fiddaman }
1616*b30d1939SAndy Fiddaman else
1617*b30d1939SAndy Fiddaman {
1618*b30d1939SAndy Fiddaman e = env->end;
1619*b30d1939SAndy Fiddaman if (!(rex->flags & REG_ICASE))
1620*b30d1939SAndy Fiddaman {
1621*b30d1939SAndy Fiddaman for (i = 0; i < m && s < e; i++, s = t)
1622*b30d1939SAndy Fiddaman {
1623*b30d1939SAndy Fiddaman t = s;
1624*b30d1939SAndy Fiddaman if (mbchar(t) != c)
1625*b30d1939SAndy Fiddaman return r;
1626*b30d1939SAndy Fiddaman }
1627*b30d1939SAndy Fiddaman while (i++ <= n)
1628*b30d1939SAndy Fiddaman {
1629*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1630*b30d1939SAndy Fiddaman {
1631*b30d1939SAndy Fiddaman case BAD:
1632*b30d1939SAndy Fiddaman return BAD;
1633*b30d1939SAndy Fiddaman case CUT:
1634*b30d1939SAndy Fiddaman return CUT;
1635*b30d1939SAndy Fiddaman case BEST:
1636*b30d1939SAndy Fiddaman case GOOD:
1637*b30d1939SAndy Fiddaman return BEST;
1638*b30d1939SAndy Fiddaman }
1639*b30d1939SAndy Fiddaman if (s >= e || mbchar(s) != c)
1640*b30d1939SAndy Fiddaman break;
1641*b30d1939SAndy Fiddaman }
1642*b30d1939SAndy Fiddaman }
1643*b30d1939SAndy Fiddaman else
1644*b30d1939SAndy Fiddaman {
1645*b30d1939SAndy Fiddaman for (i = 0; i < m && s < e; i++, s = t)
1646*b30d1939SAndy Fiddaman {
1647*b30d1939SAndy Fiddaman t = s;
1648*b30d1939SAndy Fiddaman if (towupper(mbchar(t)) != c)
1649*b30d1939SAndy Fiddaman return r;
1650*b30d1939SAndy Fiddaman }
1651*b30d1939SAndy Fiddaman while (i++ <= n)
1652*b30d1939SAndy Fiddaman {
1653*b30d1939SAndy Fiddaman switch (follow(env, rex, cont, s))
1654*b30d1939SAndy Fiddaman {
1655*b30d1939SAndy Fiddaman case BAD:
1656*b30d1939SAndy Fiddaman return BAD;
1657*b30d1939SAndy Fiddaman case CUT:
1658*b30d1939SAndy Fiddaman return CUT;
1659*b30d1939SAndy Fiddaman case BEST:
1660*b30d1939SAndy Fiddaman case GOOD:
1661*b30d1939SAndy Fiddaman return BEST;
1662*b30d1939SAndy Fiddaman }
1663*b30d1939SAndy Fiddaman if (s >= e || towupper(mbchar(s)) != c)
1664*b30d1939SAndy Fiddaman break;
1665*b30d1939SAndy Fiddaman }
1666*b30d1939SAndy Fiddaman }
1667*b30d1939SAndy Fiddaman }
1668*b30d1939SAndy Fiddaman }
1669*b30d1939SAndy Fiddaman return r;
1670*b30d1939SAndy Fiddaman case REX_REP:
1671*b30d1939SAndy Fiddaman if (env->stack && pospush(env, rex, s, BEG_REP))
1672*b30d1939SAndy Fiddaman return BAD;
1673*b30d1939SAndy Fiddaman r = parserep(env, rex, cont, s, 0);
1674*b30d1939SAndy Fiddaman if (env->stack)
1675*b30d1939SAndy Fiddaman pospop(env);
1676*b30d1939SAndy Fiddaman return r;
1677*b30d1939SAndy Fiddaman case REX_REP_CATCH:
1678*b30d1939SAndy Fiddaman DEBUG_TEST(0x0020,(sfprintf(sfstdout, "AHA#%04d 0x%04x %s n %d len %d s `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.rep_catch.n, s - rex->re.rep_catch.beg, env->end - s, s)),(0));
1679*b30d1939SAndy Fiddaman if (env->stack && pospush(env, rex, s, END_ANY))
1680*b30d1939SAndy Fiddaman return BAD;
1681*b30d1939SAndy Fiddaman if (s == rex->re.rep_catch.beg && rex->re.rep_catch.n > rex->re.rep_catch.ref->lo)
1682*b30d1939SAndy Fiddaman {
1683*b30d1939SAndy Fiddaman /*
1684*b30d1939SAndy Fiddaman * optional empty iteration
1685*b30d1939SAndy Fiddaman */
1686*b30d1939SAndy Fiddaman
1687*b30d1939SAndy Fiddaman DEBUG_TEST(0x0002,(sfprintf(sfstdout, "AHA#%04d %p re.group.back=%d re.group.expr.rex=%s\n", __LINE__, rex->re.rep_catch.ref->re.group.expr.rex, rex->re.rep_catch.ref->re.group.expr.rex->re.group.back, rexname(rex->re.rep_catch.ref->re.group.expr.rex))),(0));
1688*b30d1939SAndy Fiddaman if (!env->stack || s != rex->re.rep_catch.ref->re.rep_catch.beg && !rex->re.rep_catch.ref->re.group.expr.rex->re.group.back)
1689*b30d1939SAndy Fiddaman r = NONE;
1690*b30d1939SAndy Fiddaman else if (pospush(env, rex, s, END_ANY))
1691*b30d1939SAndy Fiddaman r = BAD;
1692*b30d1939SAndy Fiddaman else
1693*b30d1939SAndy Fiddaman {
1694*b30d1939SAndy Fiddaman r = follow(env, rex, rex->re.rep_catch.cont, s);
1695*b30d1939SAndy Fiddaman pospop(env);
1696*b30d1939SAndy Fiddaman }
1697*b30d1939SAndy Fiddaman }
1698*b30d1939SAndy Fiddaman else
1699*b30d1939SAndy Fiddaman r = parserep(env, rex->re.rep_catch.ref, rex->re.rep_catch.cont, s, rex->re.rep_catch.n);
1700*b30d1939SAndy Fiddaman if (env->stack)
1701*b30d1939SAndy Fiddaman pospop(env);
1702*b30d1939SAndy Fiddaman return r;
1703*b30d1939SAndy Fiddaman case REX_STRING:
1704*b30d1939SAndy Fiddaman DEBUG_TEST(0x0200,(sfprintf(sfstdout,"AHA#%04d 0x%04x parse %s \"%-.*s\" `%-.*s'\n", __LINE__, debug_flag, rexname(rex), rex->re.string.size, rex->re.string.base, env->end - s, s)),(0));
1705*b30d1939SAndy Fiddaman if (rex->re.string.size > (env->end - s))
1706*b30d1939SAndy Fiddaman return NONE;
1707*b30d1939SAndy Fiddaman t = rex->re.string.base;
1708*b30d1939SAndy Fiddaman e = t + rex->re.string.size;
1709*b30d1939SAndy Fiddaman if (!(p = rex->map))
1710*b30d1939SAndy Fiddaman {
1711*b30d1939SAndy Fiddaman while (t < e)
1712*b30d1939SAndy Fiddaman if (*s++ != *t++)
1713*b30d1939SAndy Fiddaman return NONE;
1714*b30d1939SAndy Fiddaman }
1715*b30d1939SAndy Fiddaman else if (!mbwide())
1716*b30d1939SAndy Fiddaman {
1717*b30d1939SAndy Fiddaman while (t < e)
1718*b30d1939SAndy Fiddaman if (p[*s++] != *t++)
1719*b30d1939SAndy Fiddaman return NONE;
1720*b30d1939SAndy Fiddaman }
1721*b30d1939SAndy Fiddaman else
1722*b30d1939SAndy Fiddaman {
1723*b30d1939SAndy Fiddaman while (t < e)
1724*b30d1939SAndy Fiddaman {
1725*b30d1939SAndy Fiddaman c = mbchar(s);
1726*b30d1939SAndy Fiddaman d = mbchar(t);
1727*b30d1939SAndy Fiddaman if (towupper(c) != d)
1728*b30d1939SAndy Fiddaman return NONE;
1729*b30d1939SAndy Fiddaman }
1730*b30d1939SAndy Fiddaman }
1731*b30d1939SAndy Fiddaman break;
1732*b30d1939SAndy Fiddaman case REX_TRIE:
1733*b30d1939SAndy Fiddaman if (((s + rex->re.trie.min) > env->end) || !(x = rex->re.trie.root[rex->map ? rex->map[*s] : *s]))
1734*b30d1939SAndy Fiddaman return NONE;
1735*b30d1939SAndy Fiddaman return parsetrie(env, x, rex, cont, s);
1736*b30d1939SAndy Fiddaman case REX_EXEC:
1737*b30d1939SAndy Fiddaman u = 0;
1738*b30d1939SAndy Fiddaman r = (*env->disc->re_execf)(env->regex, rex->re.exec.data, rex->re.exec.text, rex->re.exec.size, (const char*)s, env->end - s, &u, env->disc);
1739*b30d1939SAndy Fiddaman e = (unsigned char*)u;
1740*b30d1939SAndy Fiddaman if (e >= s && e <= env->end)
1741*b30d1939SAndy Fiddaman s = e;
1742*b30d1939SAndy Fiddaman switch (r)
1743*b30d1939SAndy Fiddaman {
1744*b30d1939SAndy Fiddaman case 0:
1745*b30d1939SAndy Fiddaman break;
1746*b30d1939SAndy Fiddaman case REG_NOMATCH:
1747*b30d1939SAndy Fiddaman return NONE;
1748*b30d1939SAndy Fiddaman default:
1749*b30d1939SAndy Fiddaman env->error = r;
1750*b30d1939SAndy Fiddaman return BAD;
1751*b30d1939SAndy Fiddaman }
1752*b30d1939SAndy Fiddaman break;
1753*b30d1939SAndy Fiddaman case REX_WBEG:
1754*b30d1939SAndy Fiddaman if (!isword(*s) || s > env->beg && isword(*(s - 1)))
1755*b30d1939SAndy Fiddaman return NONE;
1756*b30d1939SAndy Fiddaman break;
1757*b30d1939SAndy Fiddaman case REX_WEND:
1758*b30d1939SAndy Fiddaman if (isword(*s) || s > env->beg && !isword(*(s - 1)))
1759*b30d1939SAndy Fiddaman return NONE;
1760*b30d1939SAndy Fiddaman break;
1761*b30d1939SAndy Fiddaman case REX_WORD:
1762*b30d1939SAndy Fiddaman if (s > env->beg && isword(*(s - 1)) == isword(*s))
1763*b30d1939SAndy Fiddaman return NONE;
1764*b30d1939SAndy Fiddaman break;
1765*b30d1939SAndy Fiddaman case REX_WORD_NOT:
1766*b30d1939SAndy Fiddaman if (s == env->beg || isword(*(s - 1)) != isword(*s))
1767*b30d1939SAndy Fiddaman return NONE;
1768*b30d1939SAndy Fiddaman break;
1769*b30d1939SAndy Fiddaman case REX_BEG_STR:
1770*b30d1939SAndy Fiddaman if (s != env->beg)
1771*b30d1939SAndy Fiddaman return NONE;
1772*b30d1939SAndy Fiddaman break;
1773*b30d1939SAndy Fiddaman case REX_END_STR:
1774*b30d1939SAndy Fiddaman for (t = s; t < env->end && *t == '\n'; t++);
1775*b30d1939SAndy Fiddaman if (t < env->end)
1776*b30d1939SAndy Fiddaman return NONE;
1777*b30d1939SAndy Fiddaman break;
1778*b30d1939SAndy Fiddaman case REX_FIN_STR:
1779*b30d1939SAndy Fiddaman if (s < env->end)
1780*b30d1939SAndy Fiddaman return NONE;
1781*b30d1939SAndy Fiddaman break;
1782*b30d1939SAndy Fiddaman }
1783*b30d1939SAndy Fiddaman if (!(rex = rex->next))
1784*b30d1939SAndy Fiddaman {
1785*b30d1939SAndy Fiddaman if (!(rex = cont))
1786*b30d1939SAndy Fiddaman break;
1787*b30d1939SAndy Fiddaman cont = 0;
1788*b30d1939SAndy Fiddaman }
1789*b30d1939SAndy Fiddaman }
1790*b30d1939SAndy Fiddaman return GOOD;
1791*b30d1939SAndy Fiddaman }
1792*b30d1939SAndy Fiddaman
1793*b30d1939SAndy Fiddaman #if _AST_REGEX_DEBUG
1794*b30d1939SAndy Fiddaman
1795*b30d1939SAndy Fiddaman static void
listnode(Rex_t * e,int level)1796*b30d1939SAndy Fiddaman listnode(Rex_t* e, int level)
1797*b30d1939SAndy Fiddaman {
1798*b30d1939SAndy Fiddaman int i;
1799*b30d1939SAndy Fiddaman
1800*b30d1939SAndy Fiddaman if (e)
1801*b30d1939SAndy Fiddaman {
1802*b30d1939SAndy Fiddaman do
1803*b30d1939SAndy Fiddaman {
1804*b30d1939SAndy Fiddaman for (i = 0; i < level; i++)
1805*b30d1939SAndy Fiddaman sfprintf(sfstderr, " ");
1806*b30d1939SAndy Fiddaman sfprintf(sfstderr, "%s\n", rexname(e));
1807*b30d1939SAndy Fiddaman switch (e->type)
1808*b30d1939SAndy Fiddaman {
1809*b30d1939SAndy Fiddaman case REX_ALT:
1810*b30d1939SAndy Fiddaman case REX_CONJ:
1811*b30d1939SAndy Fiddaman listnode(e->re.group.expr.binary.left, level + 1);
1812*b30d1939SAndy Fiddaman listnode(e->re.group.expr.binary.right, level + 1);
1813*b30d1939SAndy Fiddaman break;
1814*b30d1939SAndy Fiddaman case REX_GROUP:
1815*b30d1939SAndy Fiddaman case REX_GROUP_AHEAD:
1816*b30d1939SAndy Fiddaman case REX_GROUP_AHEAD_NOT:
1817*b30d1939SAndy Fiddaman case REX_GROUP_BEHIND:
1818*b30d1939SAndy Fiddaman case REX_GROUP_BEHIND_NOT:
1819*b30d1939SAndy Fiddaman case REX_GROUP_CUT:
1820*b30d1939SAndy Fiddaman case REX_NEG:
1821*b30d1939SAndy Fiddaman case REX_REP:
1822*b30d1939SAndy Fiddaman listnode(e->re.group.expr.rex, level + 1);
1823*b30d1939SAndy Fiddaman break;
1824*b30d1939SAndy Fiddaman }
1825*b30d1939SAndy Fiddaman } while (e = e->next);
1826*b30d1939SAndy Fiddaman }
1827*b30d1939SAndy Fiddaman }
1828*b30d1939SAndy Fiddaman
1829*b30d1939SAndy Fiddaman static int
list(Env_t * env,Rex_t * rex)1830*b30d1939SAndy Fiddaman list(Env_t* env, Rex_t* rex)
1831*b30d1939SAndy Fiddaman {
1832*b30d1939SAndy Fiddaman sfprintf(sfstderr, "AHA regex hard=%d stack=%p\n", env->hard, env->stack);
1833*b30d1939SAndy Fiddaman if (rex)
1834*b30d1939SAndy Fiddaman listnode(rex, 1);
1835*b30d1939SAndy Fiddaman return 0;
1836*b30d1939SAndy Fiddaman }
1837*b30d1939SAndy Fiddaman
1838*b30d1939SAndy Fiddaman #endif
1839*b30d1939SAndy Fiddaman
1840*b30d1939SAndy Fiddaman /*
1841*b30d1939SAndy Fiddaman * returning REG_BADPAT or REG_ESPACE is not explicitly
1842*b30d1939SAndy Fiddaman * countenanced by the standard
1843*b30d1939SAndy Fiddaman */
1844*b30d1939SAndy Fiddaman
1845*b30d1939SAndy Fiddaman int
regnexec(const regex_t * p,const char * s,size_t len,size_t nmatch,regmatch_t * match,regflags_t flags)1846*b30d1939SAndy Fiddaman regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, regmatch_t* match, regflags_t flags)
1847*b30d1939SAndy Fiddaman {
1848*b30d1939SAndy Fiddaman register ssize_t n;
1849*b30d1939SAndy Fiddaman register int i;
1850*b30d1939SAndy Fiddaman int j;
1851*b30d1939SAndy Fiddaman int k;
1852*b30d1939SAndy Fiddaman int m;
1853*b30d1939SAndy Fiddaman int advance;
1854*b30d1939SAndy Fiddaman Env_t* env;
1855*b30d1939SAndy Fiddaman Rex_t* e;
1856*b30d1939SAndy Fiddaman
1857*b30d1939SAndy Fiddaman DEBUG_INIT();
1858*b30d1939SAndy Fiddaman DEBUG_TEST(0x0001,(sfprintf(sfstdout, "AHA#%04d 0x%04x regnexec %d 0x%08x `%-.*s'\n", __LINE__, debug_flag, nmatch, flags, len, s)),(0));
1859*b30d1939SAndy Fiddaman if (!p || !(env = p->env))
1860*b30d1939SAndy Fiddaman return REG_BADPAT;
1861*b30d1939SAndy Fiddaman if (!s)
1862*b30d1939SAndy Fiddaman return fatal(env->disc, REG_BADPAT, NiL);
1863*b30d1939SAndy Fiddaman if (len < env->min)
1864*b30d1939SAndy Fiddaman {
1865*b30d1939SAndy Fiddaman DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, env->min)),(0));
1866*b30d1939SAndy Fiddaman return REG_NOMATCH;
1867*b30d1939SAndy Fiddaman }
1868*b30d1939SAndy Fiddaman env->regex = p;
1869*b30d1939SAndy Fiddaman env->beg = (unsigned char*)s;
1870*b30d1939SAndy Fiddaman env->end = env->beg + len;
1871*b30d1939SAndy Fiddaman stknew(stkstd, &env->stk);
1872*b30d1939SAndy Fiddaman env->flags &= ~REG_EXEC;
1873*b30d1939SAndy Fiddaman env->flags |= (flags & REG_EXEC);
1874*b30d1939SAndy Fiddaman advance = 0;
1875*b30d1939SAndy Fiddaman if (env->stack = env->hard || !(env->flags & REG_NOSUB) && nmatch)
1876*b30d1939SAndy Fiddaman {
1877*b30d1939SAndy Fiddaman n = env->nsub;
1878*b30d1939SAndy Fiddaman if (!(env->match = (regmatch_t*)stkpush(stkstd, 2 * (n + 1) * sizeof(regmatch_t))) ||
1879*b30d1939SAndy Fiddaman !env->pos && !(env->pos = vecopen(16, sizeof(Pos_t))) ||
1880*b30d1939SAndy Fiddaman !env->bestpos && !(env->bestpos = vecopen(16, sizeof(Pos_t))))
1881*b30d1939SAndy Fiddaman {
1882*b30d1939SAndy Fiddaman k = REG_ESPACE;
1883*b30d1939SAndy Fiddaman goto done;
1884*b30d1939SAndy Fiddaman }
1885*b30d1939SAndy Fiddaman env->pos->cur = env->bestpos->cur = 0;
1886*b30d1939SAndy Fiddaman env->best = &env->match[n + 1];
1887*b30d1939SAndy Fiddaman env->best[0].rm_so = 0;
1888*b30d1939SAndy Fiddaman env->best[0].rm_eo = -1;
1889*b30d1939SAndy Fiddaman for (i = 0; i <= n; i++)
1890*b30d1939SAndy Fiddaman env->match[i] = state.nomatch;
1891*b30d1939SAndy Fiddaman if (flags & REG_ADVANCE)
1892*b30d1939SAndy Fiddaman advance = 1;
1893*b30d1939SAndy Fiddaman }
1894*b30d1939SAndy Fiddaman DEBUG_TEST(0x1000,(list(env,env->rex)),(0));
1895*b30d1939SAndy Fiddaman k = REG_NOMATCH;
1896*b30d1939SAndy Fiddaman if ((e = env->rex)->type == REX_BM)
1897*b30d1939SAndy Fiddaman {
1898*b30d1939SAndy Fiddaman DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM\n", __LINE__)),(0));
1899*b30d1939SAndy Fiddaman if (len < e->re.bm.right)
1900*b30d1939SAndy Fiddaman {
1901*b30d1939SAndy Fiddaman DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, len, e->re.bm.right)),(0));
1902*b30d1939SAndy Fiddaman goto done;
1903*b30d1939SAndy Fiddaman }
1904*b30d1939SAndy Fiddaman else if (!(flags & REG_LEFT))
1905*b30d1939SAndy Fiddaman {
1906*b30d1939SAndy Fiddaman register unsigned char* buf = (unsigned char*)s;
1907*b30d1939SAndy Fiddaman register size_t index = e->re.bm.left + e->re.bm.size;
1908*b30d1939SAndy Fiddaman register size_t mid = len - e->re.bm.right;
1909*b30d1939SAndy Fiddaman register size_t* skip = e->re.bm.skip;
1910*b30d1939SAndy Fiddaman register size_t* fail = e->re.bm.fail;
1911*b30d1939SAndy Fiddaman register Bm_mask_t** mask = e->re.bm.mask;
1912*b30d1939SAndy Fiddaman Bm_mask_t m;
1913*b30d1939SAndy Fiddaman size_t x;
1914*b30d1939SAndy Fiddaman
1915*b30d1939SAndy Fiddaman DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REX_BM len=%d right=%d left=%d size=%d %d %d\n", __LINE__, len, e->re.bm.right, e->re.bm.left, e->re.bm.size, index, mid)),(0));
1916*b30d1939SAndy Fiddaman for (;;)
1917*b30d1939SAndy Fiddaman {
1918*b30d1939SAndy Fiddaman while ((index += skip[buf[index]]) < mid);
1919*b30d1939SAndy Fiddaman if (index < HIT)
1920*b30d1939SAndy Fiddaman {
1921*b30d1939SAndy Fiddaman DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d REG_NOMATCH %d %d\n", __LINE__, index, HIT)),(0));
1922*b30d1939SAndy Fiddaman goto done;
1923*b30d1939SAndy Fiddaman }
1924*b30d1939SAndy Fiddaman index -= HIT;
1925*b30d1939SAndy Fiddaman m = mask[n = e->re.bm.size - 1][buf[index]];
1926*b30d1939SAndy Fiddaman do
1927*b30d1939SAndy Fiddaman {
1928*b30d1939SAndy Fiddaman if (!n--)
1929*b30d1939SAndy Fiddaman {
1930*b30d1939SAndy Fiddaman if (e->re.bm.back < 0)
1931*b30d1939SAndy Fiddaman goto possible;
1932*b30d1939SAndy Fiddaman if (advance)
1933*b30d1939SAndy Fiddaman {
1934*b30d1939SAndy Fiddaman i = index - e->re.bm.back;
1935*b30d1939SAndy Fiddaman s += i;
1936*b30d1939SAndy Fiddaman if (env->stack)
1937*b30d1939SAndy Fiddaman env->best[0].rm_so += i;
1938*b30d1939SAndy Fiddaman goto possible;
1939*b30d1939SAndy Fiddaman }
1940*b30d1939SAndy Fiddaman x = index;
1941*b30d1939SAndy Fiddaman if (index < e->re.bm.back)
1942*b30d1939SAndy Fiddaman index = 0;
1943*b30d1939SAndy Fiddaman else
1944*b30d1939SAndy Fiddaman index -= e->re.bm.back;
1945*b30d1939SAndy Fiddaman while (index <= x)
1946*b30d1939SAndy Fiddaman {
1947*b30d1939SAndy Fiddaman if ((i = parse(env, e->next, &env->done, buf + index)) != NONE)
1948*b30d1939SAndy Fiddaman {
1949*b30d1939SAndy Fiddaman if (env->stack)
1950*b30d1939SAndy Fiddaman env->best[0].rm_so = index;
1951*b30d1939SAndy Fiddaman n = env->nsub;
1952*b30d1939SAndy Fiddaman goto hit;
1953*b30d1939SAndy Fiddaman }
1954*b30d1939SAndy Fiddaman index++;
1955*b30d1939SAndy Fiddaman }
1956*b30d1939SAndy Fiddaman index += e->re.bm.size;
1957*b30d1939SAndy Fiddaman break;
1958*b30d1939SAndy Fiddaman }
1959*b30d1939SAndy Fiddaman } while (m &= mask[n][buf[--index]]);
1960*b30d1939SAndy Fiddaman if ((index += fail[n + 1]) >= len)
1961*b30d1939SAndy Fiddaman goto done;
1962*b30d1939SAndy Fiddaman }
1963*b30d1939SAndy Fiddaman }
1964*b30d1939SAndy Fiddaman possible:
1965*b30d1939SAndy Fiddaman n = env->nsub;
1966*b30d1939SAndy Fiddaman e = e->next;
1967*b30d1939SAndy Fiddaman }
1968*b30d1939SAndy Fiddaman j = env->once || (flags & REG_LEFT);
1969*b30d1939SAndy Fiddaman DEBUG_TEST(0x0080,(sfprintf(sfstdout, "AHA#%04d parse once=%d\n", __LINE__, j)),(0));
1970*b30d1939SAndy Fiddaman while ((i = parse(env, e, &env->done, (unsigned char*)s)) == NONE || advance && !env->best[0].rm_eo && !(advance = 0))
1971*b30d1939SAndy Fiddaman {
1972*b30d1939SAndy Fiddaman if (j)
1973*b30d1939SAndy Fiddaman goto done;
1974*b30d1939SAndy Fiddaman i = MBSIZE(s);
1975*b30d1939SAndy Fiddaman s += i;
1976*b30d1939SAndy Fiddaman if ((unsigned char*)s > env->end - env->min)
1977*b30d1939SAndy Fiddaman goto done;
1978*b30d1939SAndy Fiddaman if (env->stack)
1979*b30d1939SAndy Fiddaman env->best[0].rm_so += i;
1980*b30d1939SAndy Fiddaman }
1981*b30d1939SAndy Fiddaman if ((flags & REG_LEFT) && env->stack && env->best[0].rm_so)
1982*b30d1939SAndy Fiddaman goto done;
1983*b30d1939SAndy Fiddaman hit:
1984*b30d1939SAndy Fiddaman if (k = env->error)
1985*b30d1939SAndy Fiddaman goto done;
1986*b30d1939SAndy Fiddaman if (i == CUT)
1987*b30d1939SAndy Fiddaman {
1988*b30d1939SAndy Fiddaman k = env->error = REG_NOMATCH;
1989*b30d1939SAndy Fiddaman goto done;
1990*b30d1939SAndy Fiddaman }
1991*b30d1939SAndy Fiddaman if (!(env->flags & REG_NOSUB))
1992*b30d1939SAndy Fiddaman {
1993*b30d1939SAndy Fiddaman k = (env->flags & (REG_SHELL|REG_AUGMENTED)) == (REG_SHELL|REG_AUGMENTED);
1994*b30d1939SAndy Fiddaman for (i = j = m = 0; j < nmatch; i++)
1995*b30d1939SAndy Fiddaman if (!i || !k || (i & 1))
1996*b30d1939SAndy Fiddaman {
1997*b30d1939SAndy Fiddaman if (i > n)
1998*b30d1939SAndy Fiddaman match[j] = state.nomatch;
1999*b30d1939SAndy Fiddaman else
2000*b30d1939SAndy Fiddaman match[m = j] = env->best[i];
2001*b30d1939SAndy Fiddaman j++;
2002*b30d1939SAndy Fiddaman }
2003*b30d1939SAndy Fiddaman if (k)
2004*b30d1939SAndy Fiddaman {
2005*b30d1939SAndy Fiddaman while (m > 0 && match[m].rm_so == -1 && match[m].rm_eo == -1)
2006*b30d1939SAndy Fiddaman m--;
2007*b30d1939SAndy Fiddaman ((regex_t*)p)->re_nsub = m;
2008*b30d1939SAndy Fiddaman }
2009*b30d1939SAndy Fiddaman }
2010*b30d1939SAndy Fiddaman k = 0;
2011*b30d1939SAndy Fiddaman done:
2012*b30d1939SAndy Fiddaman stkold(stkstd, &env->stk);
2013*b30d1939SAndy Fiddaman env->stk.base = 0;
2014*b30d1939SAndy Fiddaman if (k > REG_NOMATCH)
2015*b30d1939SAndy Fiddaman fatal(p->env->disc, k, NiL);
2016*b30d1939SAndy Fiddaman return k;
2017*b30d1939SAndy Fiddaman }
2018*b30d1939SAndy Fiddaman
2019*b30d1939SAndy Fiddaman void
regfree(regex_t * p)2020*b30d1939SAndy Fiddaman regfree(regex_t* p)
2021*b30d1939SAndy Fiddaman {
2022*b30d1939SAndy Fiddaman Env_t* env;
2023*b30d1939SAndy Fiddaman
2024*b30d1939SAndy Fiddaman if (p && (env = p->env))
2025*b30d1939SAndy Fiddaman {
2026*b30d1939SAndy Fiddaman #if _REG_subcomp
2027*b30d1939SAndy Fiddaman if (env->sub)
2028*b30d1939SAndy Fiddaman {
2029*b30d1939SAndy Fiddaman regsubfree(p);
2030*b30d1939SAndy Fiddaman p->re_sub = 0;
2031*b30d1939SAndy Fiddaman }
2032*b30d1939SAndy Fiddaman #endif
2033*b30d1939SAndy Fiddaman p->env = 0;
2034*b30d1939SAndy Fiddaman if (--env->refs <= 0 && !(env->disc->re_flags & REG_NOFREE))
2035*b30d1939SAndy Fiddaman {
2036*b30d1939SAndy Fiddaman drop(env->disc, env->rex);
2037*b30d1939SAndy Fiddaman if (env->pos)
2038*b30d1939SAndy Fiddaman vecclose(env->pos);
2039*b30d1939SAndy Fiddaman if (env->bestpos)
2040*b30d1939SAndy Fiddaman vecclose(env->bestpos);
2041*b30d1939SAndy Fiddaman if (env->stk.base)
2042*b30d1939SAndy Fiddaman stkold(stkstd, &env->stk);
2043*b30d1939SAndy Fiddaman alloc(env->disc, env, 0);
2044*b30d1939SAndy Fiddaman }
2045*b30d1939SAndy Fiddaman }
2046*b30d1939SAndy Fiddaman }
2047*b30d1939SAndy Fiddaman
2048*b30d1939SAndy Fiddaman /*
2049*b30d1939SAndy Fiddaman * 20120528: regoff_t changed from int to ssize_t
2050*b30d1939SAndy Fiddaman */
2051*b30d1939SAndy Fiddaman
2052*b30d1939SAndy Fiddaman #if defined(__EXPORT__)
2053*b30d1939SAndy Fiddaman #define extern __EXPORT__
2054*b30d1939SAndy Fiddaman #endif
2055*b30d1939SAndy Fiddaman
2056*b30d1939SAndy Fiddaman #undef regnexec
2057*b30d1939SAndy Fiddaman #if _map_libc
2058*b30d1939SAndy Fiddaman #define regnexec _ast_regnexec
2059*b30d1939SAndy Fiddaman #endif
2060*b30d1939SAndy Fiddaman
2061*b30d1939SAndy Fiddaman extern int
regnexec(const regex_t * p,const char * s,size_t len,size_t nmatch,oldregmatch_t * oldmatch,regflags_t flags)2062*b30d1939SAndy Fiddaman regnexec(const regex_t* p, const char* s, size_t len, size_t nmatch, oldregmatch_t* oldmatch, regflags_t flags)
2063*b30d1939SAndy Fiddaman {
2064*b30d1939SAndy Fiddaman if (oldmatch)
2065*b30d1939SAndy Fiddaman {
2066*b30d1939SAndy Fiddaman regmatch_t* match;
2067*b30d1939SAndy Fiddaman ssize_t i;
2068*b30d1939SAndy Fiddaman int r;
2069*b30d1939SAndy Fiddaman
2070*b30d1939SAndy Fiddaman if (!(match = oldof(0, regmatch_t, nmatch, 0)))
2071*b30d1939SAndy Fiddaman return -1;
2072*b30d1939SAndy Fiddaman if (!(r = regnexec_20120528(p, s, len, nmatch, match, flags)))
2073*b30d1939SAndy Fiddaman for (i = 0; i < nmatch; i++)
2074*b30d1939SAndy Fiddaman {
2075*b30d1939SAndy Fiddaman oldmatch[i].rm_so = match[i].rm_so;
2076*b30d1939SAndy Fiddaman oldmatch[i].rm_eo = match[i].rm_eo;
2077*b30d1939SAndy Fiddaman }
2078*b30d1939SAndy Fiddaman free(match);
2079*b30d1939SAndy Fiddaman return r;
2080*b30d1939SAndy Fiddaman }
2081*b30d1939SAndy Fiddaman return regnexec_20120528(p, s, len, 0, NiL, flags);
2082*b30d1939SAndy Fiddaman }
2083