1 /* see copyright notice in squirrel.h */
2 #include "../squirrel.h"
3 #include <string.h>
4 #include <ctype.h>
5 #include <setjmp.h>
6 #include "../sqstdstring.h"
7 
8 #ifdef _DEBUG
9 #include <stdio.h>
10 
11 static const SQChar *g_nnames[] =
12 {
13 	_SC("NONE"),_SC("OP_GREEDY"),   _SC("OP_OR"),
14 	_SC("OP_EXPR"),_SC("OP_NOCAPEXPR"),_SC("OP_DOT"),   _SC("OP_CLASS"),
15 	_SC("OP_CCLASS"),_SC("OP_NCLASS"),_SC("OP_RANGE"),_SC("OP_CHAR"),
16 	_SC("OP_EOL"),_SC("OP_BOL"),_SC("OP_WB"),_SC("OP_MB")
17 };
18 
19 #endif
20 
21 #define OP_GREEDY	   (MAX_CHAR+1) // * + ? {n}
22 #define OP_OR		   (MAX_CHAR+2)
23 #define OP_EXPR		 (MAX_CHAR+3) //parentesis ()
24 #define OP_NOCAPEXPR	(MAX_CHAR+4) //parentesis (?:)
25 #define OP_DOT		  (MAX_CHAR+5)
26 #define OP_CLASS		(MAX_CHAR+6)
27 #define OP_CCLASS	   (MAX_CHAR+7)
28 #define OP_NCLASS	   (MAX_CHAR+8) //negates class the [^
29 #define OP_RANGE		(MAX_CHAR+9)
30 #define OP_CHAR		 (MAX_CHAR+10)
31 #define OP_EOL		  (MAX_CHAR+11)
32 #define OP_BOL		  (MAX_CHAR+12)
33 #define OP_WB		   (MAX_CHAR+13)
34 #define OP_MB		   (MAX_CHAR+14) //match balanced
35 
36 #define SQREX_SYMBOL_ANY_CHAR ('.')
37 #define SQREX_SYMBOL_GREEDY_ONE_OR_MORE ('+')
38 #define SQREX_SYMBOL_GREEDY_ZERO_OR_MORE ('*')
39 #define SQREX_SYMBOL_GREEDY_ZERO_OR_ONE ('?')
40 #define SQREX_SYMBOL_BRANCH ('|')
41 #define SQREX_SYMBOL_END_OF_STRING ('$')
42 #define SQREX_SYMBOL_BEGINNING_OF_STRING ('^')
43 #define SQREX_SYMBOL_ESCAPE_CHAR ('\\')
44 
45 
46 typedef int SQRexNodeType;
47 
48 typedef struct tagSQRexNode{
49 	SQRexNodeType type;
50 	SQInteger left;
51 	SQInteger right;
52 	SQInteger next;
53 }SQRexNode;
54 
55 struct SQRex{
56 	const SQChar *_eol;
57 	const SQChar *_bol;
58 	const SQChar *_p;
59 	SQInteger _first;
60 	SQInteger _op;
61 	SQRexNode *_nodes;
62 	SQInteger _nallocated;
63 	SQInteger _nsize;
64 	SQInteger _nsubexpr;
65 	SQRexMatch *_matches;
66 	SQInteger _currsubexp;
67 	void *_jmpbuf;
68 	const SQChar **_error;
69 };
70 
71 static SQInteger sqstd_rex_list(SQRex *exp);
72 
sqstd_rex_newnode(SQRex * exp,SQRexNodeType type)73 static SQInteger sqstd_rex_newnode(SQRex *exp, SQRexNodeType type)
74 {
75 	SQRexNode n;
76 	n.type = type;
77 	n.next = n.right = n.left = -1;
78 	if(type == OP_EXPR)
79 		n.right = exp->_nsubexpr++;
80 	if(exp->_nallocated < (exp->_nsize + 1)) {
81 		SQInteger oldsize = exp->_nallocated;
82 		exp->_nallocated *= 2;
83 		exp->_nodes = (SQRexNode *)sq_realloc(exp->_nodes, oldsize * sizeof(SQRexNode) ,exp->_nallocated * sizeof(SQRexNode));
84 	}
85 	exp->_nodes[exp->_nsize++] = n;
86 	SQInteger newid = exp->_nsize - 1;
87 	return (SQInteger)newid;
88 }
89 
sqstd_rex_error(SQRex * exp,const SQChar * error)90 static void sqstd_rex_error(SQRex *exp,const SQChar *error)
91 {
92 	if(exp->_error) *exp->_error = error;
93 	longjmp(*((jmp_buf*)exp->_jmpbuf),-1);
94 }
95 
sqstd_rex_expect(SQRex * exp,SQInteger n)96 static void sqstd_rex_expect(SQRex *exp, SQInteger n){
97 	if((*exp->_p) != n)
98 		sqstd_rex_error(exp, _SC("expected paren"));
99 	exp->_p++;
100 }
101 
sqstd_rex_escapechar(SQRex * exp)102 static SQChar sqstd_rex_escapechar(SQRex *exp)
103 {
104 	if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR){
105 		exp->_p++;
106 		switch(*exp->_p) {
107 		case 'v': exp->_p++; return '\v';
108 		case 'n': exp->_p++; return '\n';
109 		case 't': exp->_p++; return '\t';
110 		case 'r': exp->_p++; return '\r';
111 		case 'f': exp->_p++; return '\f';
112 		default: return (*exp->_p++);
113 		}
114 	} else if(!scisprint(*exp->_p)) sqstd_rex_error(exp,_SC("letter expected"));
115 	return (*exp->_p++);
116 }
117 
sqstd_rex_charclass(SQRex * exp,SQInteger classid)118 static SQInteger sqstd_rex_charclass(SQRex *exp,SQInteger classid)
119 {
120 	SQInteger n = sqstd_rex_newnode(exp,OP_CCLASS);
121 	exp->_nodes[n].left = classid;
122 	return n;
123 }
124 
sqstd_rex_charnode(SQRex * exp,SQBool isclass)125 static SQInteger sqstd_rex_charnode(SQRex *exp,SQBool isclass)
126 {
127 	SQChar t;
128 	if(*exp->_p == SQREX_SYMBOL_ESCAPE_CHAR) {
129 		exp->_p++;
130 		switch(*exp->_p) {
131 			case 'n': exp->_p++; return sqstd_rex_newnode(exp,'\n');
132 			case 't': exp->_p++; return sqstd_rex_newnode(exp,'\t');
133 			case 'r': exp->_p++; return sqstd_rex_newnode(exp,'\r');
134 			case 'f': exp->_p++; return sqstd_rex_newnode(exp,'\f');
135 			case 'v': exp->_p++; return sqstd_rex_newnode(exp,'\v');
136 			case 'a': case 'A': case 'w': case 'W': case 's': case 'S':
137 			case 'd': case 'D': case 'x': case 'X': case 'c': case 'C':
138 			case 'p': case 'P': case 'l': case 'u':
139 				{
140 				t = *exp->_p; exp->_p++;
141 				return sqstd_rex_charclass(exp,t);
142 				}
143 			case 'm':
144 				{
145 					 SQChar cb, ce; //cb = character begin match ce = character end match
146 					 cb = *++exp->_p; //skip 'm'
147 					 ce = *++exp->_p;
148 					 exp->_p++; //points to the next char to be parsed
149 					 if ((!cb) || (!ce)) sqstd_rex_error(exp,_SC("balanced chars expected"));
150 					 if ( cb == ce ) sqstd_rex_error(exp,_SC("open/close char can't be the same"));
151 					 SQInteger node =  sqstd_rex_newnode(exp,OP_MB);
152 					 exp->_nodes[node].left = cb;
153 					 exp->_nodes[node].right = ce;
154 					 return node;
155 				}
156 			case 0:
157 				sqstd_rex_error(exp,_SC("letter expected for argument of escape sequence"));
158 				break;
159 			case 'b':
160 			case 'B':
161 				if(!isclass) {
162 					SQInteger node = sqstd_rex_newnode(exp,OP_WB);
163 					exp->_nodes[node].left = *exp->_p;
164 					exp->_p++;
165 					return node;
166 				} //else default
167 			default:
168 				t = *exp->_p; exp->_p++;
169 				return sqstd_rex_newnode(exp,t);
170 		}
171 	}
172 	else if(!scisprint(*exp->_p)) {
173 
174 		sqstd_rex_error(exp,_SC("letter expected"));
175 	}
176 	t = *exp->_p; exp->_p++;
177 	return sqstd_rex_newnode(exp,t);
178 }
sqstd_rex_class(SQRex * exp)179 static SQInteger sqstd_rex_class(SQRex *exp)
180 {
181 	SQInteger ret = -1;
182 	SQInteger first = -1,chain;
183 	if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING){
184 		ret = sqstd_rex_newnode(exp,OP_NCLASS);
185 		exp->_p++;
186 	}else ret = sqstd_rex_newnode(exp,OP_CLASS);
187 
188 	if(*exp->_p == ']') sqstd_rex_error(exp,_SC("empty class"));
189 	chain = ret;
190 	while(*exp->_p != ']' && exp->_p != exp->_eol) {
191 		if(*exp->_p == '-' && first != -1){
192 			SQInteger r;
193 			if(*exp->_p++ == ']') sqstd_rex_error(exp,_SC("unfinished range"));
194 			r = sqstd_rex_newnode(exp,OP_RANGE);
195 			if(exp->_nodes[first].type>*exp->_p) sqstd_rex_error(exp,_SC("invalid range"));
196 			if(exp->_nodes[first].type == OP_CCLASS) sqstd_rex_error(exp,_SC("cannot use character classes in ranges"));
197 			exp->_nodes[r].left = exp->_nodes[first].type;
198 			SQInteger t = sqstd_rex_escapechar(exp);
199 			exp->_nodes[r].right = t;
200 			exp->_nodes[chain].next = r;
201 			chain = r;
202 			first = -1;
203 		}
204 		else{
205 			if(first!=-1){
206 				SQInteger c = first;
207 				exp->_nodes[chain].next = c;
208 				chain = c;
209 				first = sqstd_rex_charnode(exp,SQTrue);
210 			}
211 			else{
212 				first = sqstd_rex_charnode(exp,SQTrue);
213 			}
214 		}
215 	}
216 	if(first!=-1){
217 		SQInteger c = first;
218 		exp->_nodes[chain].next = c;
219 	}
220 	/* hack? */
221 	exp->_nodes[ret].left = exp->_nodes[ret].next;
222 	exp->_nodes[ret].next = -1;
223 	return ret;
224 }
225 
sqstd_rex_parsenumber(SQRex * exp)226 static SQInteger sqstd_rex_parsenumber(SQRex *exp)
227 {
228 	SQInteger ret = *exp->_p-'0';
229 	SQInteger positions = 10;
230 	exp->_p++;
231 	while(isdigit(*exp->_p)) {
232 		ret = ret*10+(*exp->_p++-'0');
233 		if(positions==1000000000) sqstd_rex_error(exp,_SC("overflow in numeric constant"));
234 		positions *= 10;
235 	};
236 	return ret;
237 }
238 
sqstd_rex_element(SQRex * exp)239 static SQInteger sqstd_rex_element(SQRex *exp)
240 {
241 	SQInteger ret = -1;
242 	switch(*exp->_p)
243 	{
244 	case '(': {
245 		SQInteger expr;
246 		exp->_p++;
247 
248 
249 		if(*exp->_p =='?') {
250 			exp->_p++;
251 			sqstd_rex_expect(exp,':');
252 			expr = sqstd_rex_newnode(exp,OP_NOCAPEXPR);
253 		}
254 		else
255 			expr = sqstd_rex_newnode(exp,OP_EXPR);
256 		SQInteger newn = sqstd_rex_list(exp);
257 		exp->_nodes[expr].left = newn;
258 		ret = expr;
259 		sqstd_rex_expect(exp,')');
260 			  }
261 			  break;
262 	case '[':
263 		exp->_p++;
264 		ret = sqstd_rex_class(exp);
265 		sqstd_rex_expect(exp,']');
266 		break;
267 	case SQREX_SYMBOL_END_OF_STRING: exp->_p++; ret = sqstd_rex_newnode(exp,OP_EOL);break;
268 	case SQREX_SYMBOL_ANY_CHAR: exp->_p++; ret = sqstd_rex_newnode(exp,OP_DOT);break;
269 	default:
270 		ret = sqstd_rex_charnode(exp,SQFalse);
271 		break;
272 	}
273 
274 
275 	SQBool isgreedy = SQFalse;
276 	unsigned short p0 = 0, p1 = 0;
277 	switch(*exp->_p){
278 		case SQREX_SYMBOL_GREEDY_ZERO_OR_MORE: p0 = 0; p1 = 0xFFFF; exp->_p++; isgreedy = SQTrue; break;
279 		case SQREX_SYMBOL_GREEDY_ONE_OR_MORE: p0 = 1; p1 = 0xFFFF; exp->_p++; isgreedy = SQTrue; break;
280 		case SQREX_SYMBOL_GREEDY_ZERO_OR_ONE: p0 = 0; p1 = 1; exp->_p++; isgreedy = SQTrue; break;
281 		case '{':
282 			exp->_p++;
283 			if(!isdigit(*exp->_p)) sqstd_rex_error(exp,_SC("number expected"));
284 			p0 = (unsigned short)sqstd_rex_parsenumber(exp);
285 			/*******************************/
286 			switch(*exp->_p) {
287 		case '}':
288 			p1 = p0; exp->_p++;
289 			break;
290 		case ',':
291 			exp->_p++;
292 			p1 = 0xFFFF;
293 			if(isdigit(*exp->_p)){
294 				p1 = (unsigned short)sqstd_rex_parsenumber(exp);
295 			}
296 			sqstd_rex_expect(exp,'}');
297 			break;
298 		default:
299 			sqstd_rex_error(exp,_SC(", or } expected"));
300 			}
301 			/*******************************/
302 			isgreedy = SQTrue;
303 			break;
304 
305 	}
306 	if(isgreedy) {
307 		SQInteger nnode = sqstd_rex_newnode(exp,OP_GREEDY);
308 		exp->_nodes[nnode].left = ret;
309 		exp->_nodes[nnode].right = ((p0)<<16)|p1;
310 		ret = nnode;
311 	}
312 
313 	if((*exp->_p != SQREX_SYMBOL_BRANCH) && (*exp->_p != ')') && (*exp->_p != SQREX_SYMBOL_GREEDY_ZERO_OR_MORE) && (*exp->_p != SQREX_SYMBOL_GREEDY_ONE_OR_MORE) && (*exp->_p != '\0')) {
314 		SQInteger nnode = sqstd_rex_element(exp);
315 		exp->_nodes[ret].next = nnode;
316 	}
317 
318 	return ret;
319 }
320 
sqstd_rex_list(SQRex * exp)321 static SQInteger sqstd_rex_list(SQRex *exp)
322 {
323 	SQInteger ret=-1,e;
324 	if(*exp->_p == SQREX_SYMBOL_BEGINNING_OF_STRING) {
325 		exp->_p++;
326 		ret = sqstd_rex_newnode(exp,OP_BOL);
327 	}
328 	e = sqstd_rex_element(exp);
329 	if(ret != -1) {
330 		exp->_nodes[ret].next = e;
331 	}
332 	else ret = e;
333 
334 	if(*exp->_p == SQREX_SYMBOL_BRANCH) {
335 		SQInteger temp,tright;
336 		exp->_p++;
337 		temp = sqstd_rex_newnode(exp,OP_OR);
338 		exp->_nodes[temp].left = ret;
339 		tright = sqstd_rex_list(exp);
340 		exp->_nodes[temp].right = tright;
341 		ret = temp;
342 	}
343 	return ret;
344 }
345 
sqstd_rex_matchcclass(SQInteger cclass,SQChar c)346 static SQBool sqstd_rex_matchcclass(SQInteger cclass,SQChar c)
347 {
348 	switch(cclass) {
349 	case 'a': return isalpha(c)?SQTrue:SQFalse;
350 	case 'A': return !isalpha(c)?SQTrue:SQFalse;
351 	case 'w': return (isalnum(c) || c == '_')?SQTrue:SQFalse;
352 	case 'W': return (!isalnum(c) && c != '_')?SQTrue:SQFalse;
353 	case 's': return isspace(c)?SQTrue:SQFalse;
354 	case 'S': return !isspace(c)?SQTrue:SQFalse;
355 	case 'd': return isdigit(c)?SQTrue:SQFalse;
356 	case 'D': return !isdigit(c)?SQTrue:SQFalse;
357 	case 'x': return isxdigit(c)?SQTrue:SQFalse;
358 	case 'X': return !isxdigit(c)?SQTrue:SQFalse;
359 	case 'c': return iscntrl(c)?SQTrue:SQFalse;
360 	case 'C': return !iscntrl(c)?SQTrue:SQFalse;
361 	case 'p': return ispunct(c)?SQTrue:SQFalse;
362 	case 'P': return !ispunct(c)?SQTrue:SQFalse;
363 	case 'l': return islower(c)?SQTrue:SQFalse;
364 	case 'u': return isupper(c)?SQTrue:SQFalse;
365 	}
366 	return SQFalse; /*cannot happen*/
367 }
368 
sqstd_rex_matchclass(SQRex * exp,SQRexNode * node,SQChar c)369 static SQBool sqstd_rex_matchclass(SQRex* exp,SQRexNode *node,SQChar c)
370 {
371 	do {
372 		switch(node->type) {
373 			case OP_RANGE:
374 				if(c >= node->left && c <= node->right) return SQTrue;
375 				break;
376 			case OP_CCLASS:
377 				if(sqstd_rex_matchcclass(node->left,c)) return SQTrue;
378 				break;
379 			default:
380 				if(c == node->type)return SQTrue;
381 		}
382 	} while((node->next != -1) && (node = &exp->_nodes[node->next]));
383 	return SQFalse;
384 }
385 
sqstd_rex_matchnode(SQRex * exp,SQRexNode * node,const SQChar * str,SQRexNode * next)386 static const SQChar *sqstd_rex_matchnode(SQRex* exp,SQRexNode *node,const SQChar *str,SQRexNode *next)
387 {
388 
389 	SQRexNodeType type = node->type;
390 	switch(type) {
391 	case OP_GREEDY: {
392 		//SQRexNode *greedystop = (node->next != -1) ? &exp->_nodes[node->next] : NULL;
393 		SQRexNode *greedystop = NULL;
394 		SQInteger p0 = (node->right >> 16)&0x0000FFFF, p1 = node->right&0x0000FFFF, nmaches = 0;
395 		const SQChar *s=str, *good = str;
396 
397 		if(node->next != -1) {
398 			greedystop = &exp->_nodes[node->next];
399 		}
400 		else {
401 			greedystop = next;
402 		}
403 
404 		while((nmaches == 0xFFFF || nmaches < p1)) {
405 
406 			const SQChar *stop;
407 			if(!(s = sqstd_rex_matchnode(exp,&exp->_nodes[node->left],s,greedystop)))
408 				break;
409 			nmaches++;
410 			good=s;
411 			if(greedystop) {
412 				//checks that 0 matches satisfy the expression(if so skips)
413 				//if not would always stop(for instance if is a '?')
414 				if(greedystop->type != OP_GREEDY ||
415 				(greedystop->type == OP_GREEDY && ((greedystop->right >> 16)&0x0000FFFF) != 0))
416 				{
417 					SQRexNode *gnext = NULL;
418 					if(greedystop->next != -1) {
419 						gnext = &exp->_nodes[greedystop->next];
420 					}else if(next && next->next != -1){
421 						gnext = &exp->_nodes[next->next];
422 					}
423 					stop = sqstd_rex_matchnode(exp,greedystop,s,gnext);
424 					if(stop) {
425 						//if satisfied stop it
426 						if(p0 == p1 && p0 == nmaches) break;
427 						else if(nmaches >= p0 && p1 == 0xFFFF) break;
428 						else if(nmaches >= p0 && nmaches <= p1) break;
429 					}
430 				}
431 			}
432 
433 			if(s >= exp->_eol)
434 				break;
435 		}
436 		if(p0 == p1 && p0 == nmaches) return good;
437 		else if(nmaches >= p0 && p1 == 0xFFFF) return good;
438 		else if(nmaches >= p0 && nmaches <= p1) return good;
439 		return NULL;
440 	}
441 	case OP_OR: {
442 			const SQChar *asd = str;
443 			SQRexNode *temp=&exp->_nodes[node->left];
444 			while( (asd = sqstd_rex_matchnode(exp,temp,asd,NULL)) ) {
445 				if(temp->next != -1)
446 					temp = &exp->_nodes[temp->next];
447 				else
448 					return asd;
449 			}
450 			asd = str;
451 			temp = &exp->_nodes[node->right];
452 			while( (asd = sqstd_rex_matchnode(exp,temp,asd,NULL)) ) {
453 				if(temp->next != -1)
454 					temp = &exp->_nodes[temp->next];
455 				else
456 					return asd;
457 			}
458 			return NULL;
459 			break;
460 	}
461 	case OP_EXPR:
462 	case OP_NOCAPEXPR:{
463 			SQRexNode *n = &exp->_nodes[node->left];
464 			const SQChar *cur = str;
465 			SQInteger capture = -1;
466 			if(node->type != OP_NOCAPEXPR && node->right == exp->_currsubexp) {
467 				capture = exp->_currsubexp;
468 				exp->_matches[capture].begin = cur;
469 				exp->_currsubexp++;
470 			}
471 			SQInteger tempcap = exp->_currsubexp;
472 			do {
473 				SQRexNode *subnext = NULL;
474 				if(n->next != -1) {
475 					subnext = &exp->_nodes[n->next];
476 				}else {
477 					subnext = next;
478 				}
479 				if(!(cur = sqstd_rex_matchnode(exp,n,cur,subnext))) {
480 					if(capture != -1){
481 						exp->_matches[capture].begin = 0;
482 						exp->_matches[capture].len = 0;
483 					}
484 					return NULL;
485 				}
486 			} while((n->next != -1) && (n = &exp->_nodes[n->next]));
487 
488 			exp->_currsubexp = tempcap;
489 			if(capture != -1)
490 				exp->_matches[capture].len = cur - exp->_matches[capture].begin;
491 			return cur;
492 	}
493 	case OP_WB:
494 		if((str == exp->_bol && !isspace(*str))
495 		 || (str == exp->_eol && !isspace(*(str-1)))
496 		 || (!isspace(*str) && isspace(*(str+1)))
497 		 || (isspace(*str) && !isspace(*(str+1))) ) {
498 			return (node->left == 'b')?str:NULL;
499 		}
500 		return (node->left == 'b')?NULL:str;
501 	case OP_BOL:
502 		if(str == exp->_bol) return str;
503 		return NULL;
504 	case OP_EOL:
505 		if(str == exp->_eol) return str;
506 		return NULL;
507 	case OP_DOT:{
508 		if (str == exp->_eol) return NULL;
509 		str++;
510 				}
511 		return str;
512 	case OP_NCLASS:
513 	case OP_CLASS:
514 		if (str == exp->_eol) return NULL;
515 		if(sqstd_rex_matchclass(exp,&exp->_nodes[node->left],*str)?(type == OP_CLASS?SQTrue:SQFalse):(type == OP_NCLASS?SQTrue:SQFalse)) {
516 			str++;
517 			return str;
518 		}
519 		return NULL;
520 	case OP_CCLASS:
521 		if (str == exp->_eol) return NULL;
522 		if(sqstd_rex_matchcclass(node->left,*str)) {
523 			str++;
524 			return str;
525 		}
526 		return NULL;
527 	case OP_MB:
528 		{
529 			SQInteger cb = node->left; //char that opens a balanced expression
530 			if(*str != cb) return NULL; // string doesnt start with open char
531 			SQInteger ce = node->right; //char that closes a balanced expression
532 			SQInteger cont = 1;
533 			const SQChar *streol = exp->_eol;
534 			while (++str < streol) {
535 			  if (*str == ce) {
536 				if (--cont == 0) {
537 					return ++str;
538 				}
539 			  }
540 			  else if (*str == cb) cont++;
541 			}
542 		}
543 		return NULL; // string ends out of balance
544 	default: /* char */
545 		if (str == exp->_eol) return NULL;
546 		if(*str != node->type) return NULL;
547 		str++;
548 		return str;
549 	}
550 	return NULL;
551 }
552 
553 /* public api */
sqstd_rex_compile(const SQChar * pattern,const SQChar ** error)554 SQRex *sqstd_rex_compile(const SQChar *pattern,const SQChar **error)
555 {
556 	SQRex * volatile exp = (SQRex *)sq_malloc(sizeof(SQRex)); // "volatile" is needed for setjmp()
557 	exp->_eol = exp->_bol = NULL;
558 	exp->_p = pattern;
559 	exp->_nallocated = (SQInteger)scstrlen(pattern) * sizeof(SQChar);
560 	exp->_nodes = (SQRexNode *)sq_malloc(exp->_nallocated * sizeof(SQRexNode));
561 	exp->_nsize = 0;
562 	exp->_matches = 0;
563 	exp->_nsubexpr = 0;
564 	exp->_first = sqstd_rex_newnode(exp,OP_EXPR);
565 	exp->_error = error;
566 	exp->_jmpbuf = sq_malloc(sizeof(jmp_buf));
567 	if(setjmp(*((jmp_buf*)exp->_jmpbuf)) == 0) {
568 		SQInteger res = sqstd_rex_list(exp);
569 		exp->_nodes[exp->_first].left = res;
570 		if(*exp->_p!='\0')
571 			sqstd_rex_error(exp,_SC("unexpected character"));
572 #ifdef _DEBUG
573 		{
574 			SQInteger nsize,i;
575 			SQRexNode *t;
576 			nsize = exp->_nsize;
577 			t = &exp->_nodes[0];
578 			scprintf(_SC("\n"));
579 			for(i = 0;i < nsize; i++) {
580 				if(exp->_nodes[i].type>MAX_CHAR)
581 					scprintf(_SC("[%02d] %10s "), (SQInt32)i,g_nnames[exp->_nodes[i].type-MAX_CHAR]);
582 				else
583 					scprintf(_SC("[%02d] %10c "), (SQInt32)i,exp->_nodes[i].type);
584 				scprintf(_SC("left %02d right %02d next %02d\n"), (SQInt32)exp->_nodes[i].left, (SQInt32)exp->_nodes[i].right, (SQInt32)exp->_nodes[i].next);
585 			}
586 			scprintf(_SC("\n"));
587 		}
588 #endif
589 		exp->_matches = (SQRexMatch *) sq_malloc(exp->_nsubexpr * sizeof(SQRexMatch));
590 		memset(exp->_matches,0,exp->_nsubexpr * sizeof(SQRexMatch));
591 	}
592 	else{
593 		sqstd_rex_free(exp);
594 		return NULL;
595 	}
596 	return exp;
597 }
598 
sqstd_rex_free(SQRex * exp)599 void sqstd_rex_free(SQRex *exp)
600 {
601 	if(exp) {
602 		if(exp->_nodes) sq_free(exp->_nodes,exp->_nallocated * sizeof(SQRexNode));
603 		if(exp->_jmpbuf) sq_free(exp->_jmpbuf,sizeof(jmp_buf));
604 		if(exp->_matches) sq_free(exp->_matches,exp->_nsubexpr * sizeof(SQRexMatch));
605 		sq_free(exp,sizeof(SQRex));
606 	}
607 }
608 
sqstd_rex_match(SQRex * exp,const SQChar * text)609 SQBool sqstd_rex_match(SQRex* exp,const SQChar* text)
610 {
611 	const SQChar* res = NULL;
612 	exp->_bol = text;
613 	exp->_eol = text + scstrlen(text);
614 	exp->_currsubexp = 0;
615 	res = sqstd_rex_matchnode(exp,exp->_nodes,text,NULL);
616 	if(res == NULL || res != exp->_eol)
617 		return SQFalse;
618 	return SQTrue;
619 }
620 
sqstd_rex_searchrange(SQRex * exp,const SQChar * text_begin,const SQChar * text_end,const SQChar ** out_begin,const SQChar ** out_end)621 SQBool sqstd_rex_searchrange(SQRex* exp,const SQChar* text_begin,const SQChar* text_end,const SQChar** out_begin, const SQChar** out_end)
622 {
623 	const SQChar *cur = NULL;
624 	SQInteger node = exp->_first;
625 	if(text_begin >= text_end) return SQFalse;
626 	exp->_bol = text_begin;
627 	exp->_eol = text_end;
628 	do {
629 		cur = text_begin;
630 		while(node != -1) {
631 			exp->_currsubexp = 0;
632 			cur = sqstd_rex_matchnode(exp,&exp->_nodes[node],cur,NULL);
633 			if(!cur)
634 				break;
635 			node = exp->_nodes[node].next;
636 		}
637 		text_begin++;
638 	} while(cur == NULL && text_begin != text_end);
639 
640 	if(cur == NULL)
641 		return SQFalse;
642 
643 	--text_begin;
644 
645 	if(out_begin) *out_begin = text_begin;
646 	if(out_end) *out_end = cur;
647 	return SQTrue;
648 }
649 
sqstd_rex_search(SQRex * exp,const SQChar * text,const SQChar ** out_begin,const SQChar ** out_end)650 SQBool sqstd_rex_search(SQRex* exp,const SQChar* text, const SQChar** out_begin, const SQChar** out_end)
651 {
652 	return sqstd_rex_searchrange(exp,text,text + scstrlen(text),out_begin,out_end);
653 }
654 
sqstd_rex_getsubexpcount(SQRex * exp)655 SQInteger sqstd_rex_getsubexpcount(SQRex* exp)
656 {
657 	return exp->_nsubexpr;
658 }
659 
sqstd_rex_getsubexp(SQRex * exp,SQInteger n,SQRexMatch * subexp)660 SQBool sqstd_rex_getsubexp(SQRex* exp, SQInteger n, SQRexMatch *subexp)
661 {
662 	if( n<0 || n >= exp->_nsubexpr) return SQFalse;
663 	*subexp = exp->_matches[n];
664 	return SQTrue;
665 }
666