1 /*
2  *  Copyright 2001-2006 Adrian Thurston <thurston@complang.org>
3  */
4 
5 /*  This file is part of Ragel.
6  *
7  *  Ragel is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  Ragel is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with Ragel; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  */
21 
22 #include <iostream>
23 #include <iomanip>
24 #include <errno.h>
25 #include <limits.h>
26 #include <stdlib.h>
27 
28 /* Parsing. */
29 #include "ragel.h"
30 #include "rlparse.h"
31 #include "parsetree.h"
32 
33 using namespace std;
34 ostream &operator<<( ostream &out, const NameRef &nameRef );
35 ostream &operator<<( ostream &out, const NameInst &nameInst );
36 
37 /* Convert the literal string which comes in from the scanner into an array of
38  * characters with escapes and options interpreted. Also null terminates the
39  * string. Though this null termination should not be relied on for
40  * interpreting literals in the parser because the string may contain \0 */
prepareLitString(const InputLoc & loc,const char * data,long length,long & resLen,bool & caseInsensitive)41 char *prepareLitString( const InputLoc &loc, const char *data, long length,
42 		long &resLen, bool &caseInsensitive )
43 {
44 	char *resData = new char[length+1];
45 	caseInsensitive = false;
46 
47 	const char *src = data + 1;
48 	const char *end = data + length - 1;
49 
50 	while ( *end != '\'' && *end != '\"' ) {
51 		if ( *end == 'i' )
52 			caseInsensitive = true;
53 		else {
54 			error( loc ) << "literal string '" << *end <<
55 					"' option not supported" << endl;
56 		}
57 		end -= 1;
58 	}
59 
60 	char *dest = resData;
61 	long len = 0;
62 	while ( src != end ) {
63 		if ( *src == '\\' ) {
64 			switch ( src[1] ) {
65 			case '0': dest[len++] = '\0'; break;
66 			case 'a': dest[len++] = '\a'; break;
67 			case 'b': dest[len++] = '\b'; break;
68 			case 't': dest[len++] = '\t'; break;
69 			case 'n': dest[len++] = '\n'; break;
70 			case 'v': dest[len++] = '\v'; break;
71 			case 'f': dest[len++] = '\f'; break;
72 			case 'r': dest[len++] = '\r'; break;
73 			case '\n':  break;
74 			default: dest[len++] = src[1]; break;
75 			}
76 			src += 2;
77 		}
78 		else {
79 			dest[len++] = *src++;
80 		}
81 	}
82 
83 	resLen = len;
84 	resData[resLen] = 0;
85 	return resData;
86 }
87 
walk(ParseData * pd)88 FsmAp *VarDef::walk( ParseData *pd )
89 {
90 	/* We enter into a new name scope. */
91 	NameFrame nameFrame = pd->enterNameScope( true, 1 );
92 
93 	/* Recurse on the expression. */
94 	FsmAp *rtnVal = machineDef->walk( pd );
95 
96 	/* Do the tranfer of local error actions. */
97 	LocalErrDictEl *localErrDictEl = pd->localErrDict.find( name );
98 	if ( localErrDictEl != 0 ) {
99 		for ( StateList::Iter state = rtnVal->stateList; state.lte(); state++ )
100 			rtnVal->transferErrorActions( state, localErrDictEl->value );
101 	}
102 
103 	/* If the expression below is a join operation with multiple expressions
104 	 * then it just had epsilon transisions resolved. If it is a join
105 	 * with only a single expression then run the epsilon op now. */
106 	if ( machineDef->type == MachineDef::JoinType && machineDef->join->exprList.length() == 1 )
107 		rtnVal->epsilonOp();
108 
109 	/* We can now unset entry points that are not longer used. */
110 	pd->unsetObsoleteEntries( rtnVal );
111 
112 	/* If the name of the variable is referenced then add the entry point to
113 	 * the graph. */
114 	if ( pd->curNameInst->numRefs > 0 )
115 		rtnVal->setEntry( pd->curNameInst->id, rtnVal->startState );
116 
117 	/* Pop the name scope. */
118 	pd->popNameScope( nameFrame );
119 	return rtnVal;
120 }
121 
makeNameTree(const InputLoc & loc,ParseData * pd)122 void VarDef::makeNameTree( const InputLoc &loc, ParseData *pd )
123 {
124 	/* The variable definition enters a new scope. */
125 	NameInst *prevNameInst = pd->curNameInst;
126 	pd->curNameInst = pd->addNameInst( loc, name, false );
127 
128 	if ( machineDef->type == MachineDef::LongestMatchType )
129 		pd->curNameInst->isLongestMatch = true;
130 
131 	/* Recurse. */
132 	machineDef->makeNameTree( pd );
133 
134 	/* The name scope ends, pop the name instantiation. */
135 	pd->curNameInst = prevNameInst;
136 }
137 
resolveNameRefs(ParseData * pd)138 void VarDef::resolveNameRefs( ParseData *pd )
139 {
140 	/* Entering into a new scope. */
141 	NameFrame nameFrame = pd->enterNameScope( true, 1 );
142 
143 	/* Recurse. */
144 	machineDef->resolveNameRefs( pd );
145 
146 	/* The name scope ends, pop the name instantiation. */
147 	pd->popNameScope( nameFrame );
148 }
149 
getLoc()150 InputLoc LongestMatchPart::getLoc()
151 {
152 	return action != 0 ? action->loc : semiLoc;
153 }
154 
155 /*
156  * If there are any LMs then all of the following entry points must reset
157  * tokstart:
158  *
159  *  1. fentry(StateRef)
160  *  2. ftoto(StateRef), fcall(StateRef), fnext(StateRef)
161  *  3. targt of any transition that has an fcall (the return loc).
162  *  4. start state of all longest match routines.
163  */
164 
newAction(ParseData * pd,const InputLoc & loc,const char * name,InlineList * inlineList)165 Action *LongestMatch::newAction( ParseData *pd, const InputLoc &loc,
166 		const char *name, InlineList *inlineList )
167 {
168 	Action *action = new Action( loc, name, inlineList, pd->nextCondId++ );
169 	action->actionRefs.append( pd->curNameInst );
170 	pd->actionList.append( action );
171 	action->isLmAction = true;
172 	return action;
173 }
174 
makeActions(ParseData * pd)175 void LongestMatch::makeActions( ParseData *pd )
176 {
177 	/* Make actions that set the action id. */
178 	for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
179 		/* For each part create actions for setting the match type.  We need
180 		 * to do this so that the actions will go into the actionIndex. */
181 		InlineList *inlineList = new InlineList;
182 		inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
183 				InlineItem::LmSetActId ) );
184 		char *actName = new char[50];
185 		sprintf( actName, "store%i", lmi->longestMatchId );
186 		lmi->setActId = newAction( pd, lmi->getLoc(), actName, inlineList );
187 	}
188 
189 	/* Make actions that execute the user action and restart on the last
190 	 * character. */
191 	for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
192 		/* For each part create actions for setting the match type.  We need
193 		 * to do this so that the actions will go into the actionIndex. */
194 		InlineList *inlineList = new InlineList;
195 		inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
196 				InlineItem::LmOnLast ) );
197 		char *actName = new char[50];
198 		sprintf( actName, "last%i", lmi->longestMatchId );
199 		lmi->actOnLast = newAction( pd, lmi->getLoc(), actName, inlineList );
200 	}
201 
202 	/* Make actions that execute the user action and restart on the next
203 	 * character.  These actions will set tokend themselves (it is the current
204 	 * char). */
205 	for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
206 		/* For each part create actions for setting the match type.  We need
207 		 * to do this so that the actions will go into the actionIndex. */
208 		InlineList *inlineList = new InlineList;
209 		inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
210 				InlineItem::LmOnNext ) );
211 		char *actName = new char[50];
212 		sprintf( actName, "next%i", lmi->longestMatchId );
213 		lmi->actOnNext = newAction( pd, lmi->getLoc(), actName, inlineList );
214 	}
215 
216 	/* Make actions that execute the user action and restart at tokend. These
217 	 * actions execute some time after matching the last char. */
218 	for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
219 		/* For each part create actions for setting the match type.  We need
220 		 * to do this so that the actions will go into the actionIndex. */
221 		InlineList *inlineList = new InlineList;
222 		inlineList->append( new InlineItem( lmi->getLoc(), this, lmi,
223 				InlineItem::LmOnLagBehind ) );
224 		char *actName = new char[50];
225 		sprintf( actName, "lag%i", lmi->longestMatchId );
226 		lmi->actLagBehind = newAction( pd, lmi->getLoc(), actName, inlineList );
227 	}
228 
229 	InputLoc loc;
230 	loc.line = 1;
231 	loc.col = 1;
232 	loc.fileName = "NONE";
233 
234 	/* Create the error action. */
235 	InlineList *il6 = new InlineList;
236 	il6->append( new InlineItem( loc, this, 0, InlineItem::LmSwitch ) );
237 	lmActSelect = newAction( pd, loc, "switch", il6 );
238 }
239 
findName(ParseData * pd)240 void LongestMatch::findName( ParseData *pd )
241 {
242 	NameInst *nameInst = pd->curNameInst;
243 	while ( nameInst->name == 0 ) {
244 		nameInst = nameInst->parent;
245 		/* Since every machine must must have a name, we should always find a
246 		 * name for the longest match. */
247 		assert( nameInst != 0 );
248 	}
249 	name = nameInst->name;
250 }
251 
makeNameTree(ParseData * pd)252 void LongestMatch::makeNameTree( ParseData *pd )
253 {
254 	/* Create an anonymous scope for the longest match. Will be used for
255 	 * restarting machine after matching a token. */
256 	NameInst *prevNameInst = pd->curNameInst;
257 	pd->curNameInst = pd->addNameInst( loc, 0, false );
258 
259 	/* Recurse into all parts of the longest match operator. */
260 	for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ )
261 		lmi->join->makeNameTree( pd );
262 
263 	/* Traverse the name tree upwards to find a name for this lm. */
264 	findName( pd );
265 
266 	/* Also make the longest match's actions at this point. */
267 	makeActions( pd );
268 
269 	/* The name scope ends, pop the name instantiation. */
270 	pd->curNameInst = prevNameInst;
271 }
272 
resolveNameRefs(ParseData * pd)273 void LongestMatch::resolveNameRefs( ParseData *pd )
274 {
275 	/* The longest match gets its own name scope. */
276 	NameFrame nameFrame = pd->enterNameScope( true, 1 );
277 
278 	/* Take an action reference for each longest match item and recurse. */
279 	for ( LmPartList::Iter lmi = *longestMatchList; lmi.lte(); lmi++ ) {
280 		/* Record the reference if the item has an action. */
281 		if ( lmi->action != 0 )
282 			lmi->action->actionRefs.append( pd->localNameScope );
283 
284 		/* Recurse down the join. */
285 		lmi->join->resolveNameRefs( pd );
286 	}
287 
288 	/* The name scope ends, pop the name instantiation. */
289 	pd->popNameScope( nameFrame );
290 }
291 
restart(FsmAp * graph,TransAp * trans)292 void LongestMatch::restart( FsmAp *graph, TransAp *trans )
293 {
294 	StateAp *fromState = trans->fromState;
295 	graph->detachTrans( fromState, trans->toState, trans );
296 	graph->attachTrans( fromState, graph->startState, trans );
297 }
298 
runLongestMatch(ParseData * pd,FsmAp * graph)299 void LongestMatch::runLongestMatch( ParseData *pd, FsmAp *graph )
300 {
301 	graph->markReachableFromHereStopFinal( graph->startState );
302 	for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
303 		if ( ms->stateBits & STB_ISMARKED ) {
304 			ms->lmItemSet.insert( 0 );
305 			ms->stateBits &= ~ STB_ISMARKED;
306 		}
307 	}
308 
309 	/* Transfer the first item of non-empty lmAction tables to the item sets
310 	 * of the states that follow. Exclude states that have no transitions out.
311 	 * This must happen on a separate pass so that on each iteration of the
312 	 * next pass we have the item set entries from all lmAction tables. */
313 	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
314 		for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
315 			if ( trans->lmActionTable.length() > 0 ) {
316 				LmActionTableEl *lmAct = trans->lmActionTable.data;
317 				StateAp *toState = trans->toState;
318 				assert( toState );
319 
320 				/* Can only optimize this if there are no transitions out.
321 				 * Note there can be out transitions going nowhere with
322 				 * actions and they too must inhibit this optimization. */
323 				if ( toState->outList.length() > 0 ) {
324 					/* Fill the item sets. */
325 					graph->markReachableFromHereStopFinal( toState );
326 					for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
327 						if ( ms->stateBits & STB_ISMARKED ) {
328 							ms->lmItemSet.insert( lmAct->value );
329 							ms->stateBits &= ~ STB_ISMARKED;
330 						}
331 					}
332 				}
333 			}
334 		}
335 	}
336 
337 	/* The lmItem sets are now filled, telling us which longest match rules
338 	 * can succeed in which states. First determine if we need to make sure
339 	 * act is defaulted to zero. We need to do this if there are any states
340 	 * with lmItemSet.length() > 1 and NULL is included. That is, that the
341 	 * switch may get called when in fact nothing has been matched. */
342 	int maxItemSetLength = 0;
343 	graph->markReachableFromHereStopFinal( graph->startState );
344 	for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
345 		if ( ms->stateBits & STB_ISMARKED ) {
346 			if ( ms->lmItemSet.length() > maxItemSetLength )
347 				maxItemSetLength = ms->lmItemSet.length();
348 			ms->stateBits &= ~ STB_ISMARKED;
349 		}
350 	}
351 
352 	/* The actions executed on starting to match a token. */
353 	graph->isolateStartState();
354 	graph->startState->toStateActionTable.setAction( pd->initTokStartOrd, pd->initTokStart );
355 	graph->startState->fromStateActionTable.setAction( pd->setTokStartOrd, pd->setTokStart );
356 	if ( maxItemSetLength > 1 ) {
357 		/* The longest match action switch may be called when tokens are
358 		 * matched, in which case act must be initialized, there must be a
359 		 * case to handle the error, and the generated machine will require an
360 		 * error state. */
361 		lmSwitchHandlesError = true;
362 		pd->lmRequiresErrorState = true;
363 		graph->startState->toStateActionTable.setAction( pd->initActIdOrd, pd->initActId );
364 	}
365 
366 	/* The place to store transitions to restart. It maybe possible for the
367 	 * restarting to affect the searching through the graph that follows. For
368 	 * now take the safe route and save the list of transitions to restart
369 	 * until after all searching is done. */
370 	Vector<TransAp*> restartTrans;
371 
372 	/* Set actions that do immediate token recognition, set the longest match part
373 	 * id and set the token ending. */
374 	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
375 		for ( TransList::Iter trans = st->outList; trans.lte(); trans++ ) {
376 			if ( trans->lmActionTable.length() > 0 ) {
377 				LmActionTableEl *lmAct = trans->lmActionTable.data;
378 				StateAp *toState = trans->toState;
379 				assert( toState );
380 
381 				/* Can only optimize this if there are no transitions out.
382 				 * Note there can be out transitions going nowhere with
383 				 * actions and they too must inhibit this optimization. */
384 				if ( toState->outList.length() == 0 ) {
385 					/* Can execute the immediate action for the longest match
386 					 * part. Redirect the action to the start state.
387 					 *
388 					 * NOTE: When we need to inhibit on_last due to leaving
389 					 * actions the above test suffices. If the state has out
390 					 * actions then it will fail because the out action will
391 					 * have been transferred to an error transition, which
392 					 * makes the outlist non-empty. */
393 					trans->actionTable.setAction( lmAct->key,
394 							lmAct->value->actOnLast );
395 					restartTrans.append( trans );
396 				}
397 				else {
398 					/* Look for non final states that have a non-empty item
399 					 * set. If these are present then we need to record the
400 					 * end of the token.  Also Find the highest item set
401 					 * length reachable from here (excluding at transtions to
402 					 * final states). */
403 					bool nonFinalNonEmptyItemSet = false;
404 					maxItemSetLength = 0;
405 					graph->markReachableFromHereStopFinal( toState );
406 					for ( StateList::Iter ms = graph->stateList; ms.lte(); ms++ ) {
407 						if ( ms->stateBits & STB_ISMARKED ) {
408 							if ( ms->lmItemSet.length() > 0 && !ms->isFinState() )
409 								nonFinalNonEmptyItemSet = true;
410 							if ( ms->lmItemSet.length() > maxItemSetLength )
411 								maxItemSetLength = ms->lmItemSet.length();
412 							ms->stateBits &= ~ STB_ISMARKED;
413 						}
414 					}
415 
416 					/* If there are reachable states that are not final and
417 					 * have non empty item sets or that have an item set
418 					 * length greater than one then we need to set tokend
419 					 * because the error action that matches the token will
420 					 * require it. */
421 					if ( nonFinalNonEmptyItemSet || maxItemSetLength > 1 )
422 						trans->actionTable.setAction( pd->setTokEndOrd, pd->setTokEnd );
423 
424 					/* Some states may not know which longest match item to
425 					 * execute, must set it. */
426 					if ( maxItemSetLength > 1 ) {
427 						/* There are transitions out, another match may come. */
428 						trans->actionTable.setAction( lmAct->key,
429 								lmAct->value->setActId );
430 					}
431 				}
432 			}
433 		}
434 	}
435 
436 	/* Now that all graph searching is done it certainly safe set the
437 	 * restarting. It may be safe above, however this must be verified. */
438 	for ( Vector<TransAp*>::Iter pt = restartTrans; pt.lte(); pt++ )
439 		restart( graph, *pt );
440 
441 	int lmErrActionOrd = pd->curActionOrd++;
442 
443 	/* Embed the error for recognizing a char. */
444 	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
445 		if ( st->lmItemSet.length() == 1 && st->lmItemSet[0] != 0 ) {
446 			if ( st->isFinState() ) {
447 				/* On error execute the onActNext action, which knows that
448 				 * the last character of the token was one back and restart. */
449 				graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
450 						&st->lmItemSet[0]->actOnNext, 1 );
451 				st->eofActionTable.setAction( lmErrActionOrd,
452 						st->lmItemSet[0]->actOnNext );
453 				st->eofTarget = graph->startState;
454 			}
455 			else {
456 				graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
457 						&st->lmItemSet[0]->actLagBehind, 1 );
458 				st->eofActionTable.setAction( lmErrActionOrd,
459 						st->lmItemSet[0]->actLagBehind );
460 				st->eofTarget = graph->startState;
461 			}
462 		}
463 		else if ( st->lmItemSet.length() > 1 ) {
464 			/* Need to use the select. Take note of which items the select
465 			 * is needed for so only the necessary actions are included. */
466 			for ( LmItemSet::Iter plmi = st->lmItemSet; plmi.lte(); plmi++ ) {
467 				if ( *plmi != 0 )
468 					(*plmi)->inLmSelect = true;
469 			}
470 			/* On error, execute the action select and go to the start state. */
471 			graph->setErrorTarget( st, graph->startState, &lmErrActionOrd,
472 					&lmActSelect, 1 );
473 			st->eofActionTable.setAction( lmErrActionOrd, lmActSelect );
474 			st->eofTarget = graph->startState;
475 		}
476 	}
477 
478 	/* Finally, the start state should be made final. */
479 	graph->setFinState( graph->startState );
480 }
481 
transferScannerLeavingActions(FsmAp * graph)482 void LongestMatch::transferScannerLeavingActions( FsmAp *graph )
483 {
484 	for ( StateList::Iter st = graph->stateList; st.lte(); st++ ) {
485 		if ( st->outActionTable.length() > 0 )
486 			graph->setErrorActions( st, st->outActionTable );
487 	}
488 }
489 
walk(ParseData * pd)490 FsmAp *LongestMatch::walk( ParseData *pd )
491 {
492 	/* The longest match has it's own name scope. */
493 	NameFrame nameFrame = pd->enterNameScope( true, 1 );
494 
495 	/* Make each part of the longest match. */
496 	FsmAp **parts = new FsmAp*[longestMatchList->length()];
497 	LmPartList::Iter lmi = *longestMatchList;
498 	for ( int i = 0; lmi.lte(); lmi++, i++ ) {
499 		/* Create the machine and embed the setting of the longest match id. */
500 		parts[i] = lmi->join->walk( pd );
501 		parts[i]->longMatchAction( pd->curActionOrd++, lmi );
502 	}
503 
504 	/* Before we union the patterns we need to deal with leaving actions. They
505 	 * are transfered to error transitions out of the final states (like local
506 	 * error actions) and to eof actions. In the scanner we need to forbid
507 	 * on_last for any final state that has an leaving action. */
508 	for ( int i = 0; i < longestMatchList->length(); i++ )
509 		transferScannerLeavingActions( parts[i] );
510 
511 	/* Union machines one and up with machine zero. The grammar dictates that
512 	 * there will always be at least one part. */
513 	FsmAp *rtnVal = parts[0];
514 	for ( int i = 1; i < longestMatchList->length(); i++ ) {
515 		rtnVal->unionOp( parts[i] );
516 		afterOpMinimize( rtnVal );
517 	}
518 
519 	runLongestMatch( pd, rtnVal );
520 
521 	/* Pop the name scope. */
522 	pd->popNameScope( nameFrame );
523 
524 	delete[] parts;
525 	return rtnVal;
526 }
527 
walk(ParseData * pd)528 FsmAp *MachineDef::walk( ParseData *pd )
529 {
530 	FsmAp *rtnVal = 0;
531 	switch ( type ) {
532 	case JoinType:
533 		rtnVal = join->walk( pd );
534 		break;
535 	case LongestMatchType:
536 		rtnVal = longestMatch->walk( pd );
537 		break;
538 	case LengthDefType:
539 		condData->lastCondKey.increment();
540 		rtnVal = new FsmAp();
541 		rtnVal->concatFsm( condData->lastCondKey );
542 		break;
543 	}
544 	return rtnVal;
545 }
546 
makeNameTree(ParseData * pd)547 void MachineDef::makeNameTree( ParseData *pd )
548 {
549 	switch ( type ) {
550 	case JoinType:
551 		join->makeNameTree( pd );
552 		break;
553 	case LongestMatchType:
554 		longestMatch->makeNameTree( pd );
555 		break;
556 	case LengthDefType:
557 		break;
558 	}
559 }
560 
resolveNameRefs(ParseData * pd)561 void MachineDef::resolveNameRefs( ParseData *pd )
562 {
563 	switch ( type ) {
564 	case JoinType:
565 		join->resolveNameRefs( pd );
566 		break;
567 	case LongestMatchType:
568 		longestMatch->resolveNameRefs( pd );
569 		break;
570 	case LengthDefType:
571 		break;
572 	}
573 }
574 
575 
576 /* Construct with a location and the first expression. */
Join(const InputLoc & loc,Expression * expr)577 Join::Join( const InputLoc &loc, Expression *expr )
578 :
579 	loc(loc)
580 {
581 	exprList.append( expr );
582 }
583 
584 /* Construct with a location and the first expression. */
Join(Expression * expr)585 Join::Join( Expression *expr )
586 {
587 	exprList.append( expr );
588 }
589 
590 /* Walk an expression node. */
walk(ParseData * pd)591 FsmAp *Join::walk( ParseData *pd )
592 {
593 	if ( exprList.length() > 1 )
594 		return walkJoin( pd );
595 	else
596 		return exprList.head->walk( pd );
597 }
598 
599 /* There is a list of expressions to join. */
walkJoin(ParseData * pd)600 FsmAp *Join::walkJoin( ParseData *pd )
601 {
602 	/* We enter into a new name scope. */
603 	NameFrame nameFrame = pd->enterNameScope( true, 1 );
604 
605 	/* Evaluate the machines. */
606 	FsmAp **fsms = new FsmAp*[exprList.length()];
607 	ExprList::Iter expr = exprList;
608 	for ( int e = 0; e < exprList.length(); e++, expr++ )
609 		fsms[e] = expr->walk( pd );
610 
611 	/* Get the start and final names. Final is
612 	 * guaranteed to exist, start is not. */
613 	NameInst *startName = pd->curNameInst->start;
614 	NameInst *finalName = pd->curNameInst->final;
615 
616 	int startId = -1;
617 	if ( startName != 0 ) {
618 		/* Take note that there was an implicit link to the start machine. */
619 		pd->localNameScope->referencedNames.append( startName );
620 		startId = startName->id;
621 	}
622 
623 	/* A final id of -1 indicates there is no epsilon that references the
624 	 * final state, therefor do not create one or set an entry point to it. */
625 	int finalId = -1;
626 	if ( finalName->numRefs > 0 )
627 		finalId = finalName->id;
628 
629 	/* Join machines 1 and up onto machine 0. */
630 	FsmAp *retFsm = fsms[0];
631 	retFsm->joinOp( startId, finalId, fsms+1, exprList.length()-1 );
632 
633 	/* We can now unset entry points that are not longer used. */
634 	pd->unsetObsoleteEntries( retFsm );
635 
636 	/* Pop the name scope. */
637 	pd->popNameScope( nameFrame );
638 
639 	delete[] fsms;
640 	return retFsm;
641 }
642 
makeNameTree(ParseData * pd)643 void Join::makeNameTree( ParseData *pd )
644 {
645 	if ( exprList.length() > 1 ) {
646 		/* Create the new anonymous scope. */
647 		NameInst *prevNameInst = pd->curNameInst;
648 		pd->curNameInst = pd->addNameInst( loc, 0, false );
649 
650 		/* Join scopes need an implicit "final" target. */
651 		pd->curNameInst->final = new NameInst( InputLoc(), pd->curNameInst, "final",
652 				pd->nextNameId++, false );
653 
654 		/* Recurse into all expressions in the list. */
655 		for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
656 			expr->makeNameTree( pd );
657 
658 		/* The name scope ends, pop the name instantiation. */
659 		pd->curNameInst = prevNameInst;
660 	}
661 	else {
662 		/* Recurse into the single expression. */
663 		exprList.head->makeNameTree( pd );
664 	}
665 }
666 
667 
resolveNameRefs(ParseData * pd)668 void Join::resolveNameRefs( ParseData *pd )
669 {
670 	/* Branch on whether or not there is to be a join. */
671 	if ( exprList.length() > 1 ) {
672 		/* The variable definition enters a new scope. */
673 		NameFrame nameFrame = pd->enterNameScope( true, 1 );
674 
675 		/* The join scope must contain a start label. */
676 		NameSet resolved = pd->resolvePart( pd->localNameScope, "start", true );
677 		if ( resolved.length() > 0 ) {
678 			/* Take the first. */
679 			pd->curNameInst->start = resolved[0];
680 			if ( resolved.length() > 1 ) {
681 				/* Complain about the multiple references. */
682 				error(loc) << "join operation has multiple start labels" << endl;
683 				errorStateLabels( resolved );
684 			}
685 		}
686 
687 		/* Make sure there is a start label. */
688 		if ( pd->curNameInst->start != 0 ) {
689 			/* There is an implicit reference to start name. */
690 			pd->curNameInst->start->numRefs += 1;
691 		}
692 		else {
693 			/* No start label. */
694 			error(loc) << "join operation has no start label" << endl;
695 		}
696 
697 		/* Recurse into all expressions in the list. */
698 		for ( ExprList::Iter expr = exprList; expr.lte(); expr++ )
699 			expr->resolveNameRefs( pd );
700 
701 		/* The name scope ends, pop the name instantiation. */
702 		pd->popNameScope( nameFrame );
703 	}
704 	else {
705 		/* Recurse into the single expression. */
706 		exprList.head->resolveNameRefs( pd );
707 	}
708 }
709 
710 /* Clean up after an expression node. */
~Expression()711 Expression::~Expression()
712 {
713 	switch ( type ) {
714 		case OrType: case IntersectType: case SubtractType:
715 		case StrongSubtractType:
716 			delete expression;
717 			delete term;
718 			break;
719 		case TermType:
720 			delete term;
721 			break;
722 		case BuiltinType:
723 			break;
724 	}
725 }
726 
727 /* Evaluate a single expression node. */
walk(ParseData * pd,bool lastInSeq)728 FsmAp *Expression::walk( ParseData *pd, bool lastInSeq )
729 {
730 	FsmAp *rtnVal = 0;
731 	switch ( type ) {
732 		case OrType: {
733 			/* Evaluate the expression. */
734 			rtnVal = expression->walk( pd, false );
735 			/* Evaluate the term. */
736 			FsmAp *rhs = term->walk( pd );
737 			/* Perform union. */
738 			rtnVal->unionOp( rhs );
739 			afterOpMinimize( rtnVal, lastInSeq );
740 			break;
741 		}
742 		case IntersectType: {
743 			/* Evaluate the expression. */
744 			rtnVal = expression->walk( pd );
745 			/* Evaluate the term. */
746 			FsmAp *rhs = term->walk( pd );
747 			/* Perform intersection. */
748 			rtnVal->intersectOp( rhs );
749 			afterOpMinimize( rtnVal, lastInSeq );
750 			break;
751 		}
752 		case SubtractType: {
753 			/* Evaluate the expression. */
754 			rtnVal = expression->walk( pd );
755 			/* Evaluate the term. */
756 			FsmAp *rhs = term->walk( pd );
757 			/* Perform subtraction. */
758 			rtnVal->subtractOp( rhs );
759 			afterOpMinimize( rtnVal, lastInSeq );
760 			break;
761 		}
762 		case StrongSubtractType: {
763 			/* Evaluate the expression. */
764 			rtnVal = expression->walk( pd );
765 
766 			/* Evaluate the term and pad it with any* machines. */
767 			FsmAp *rhs = dotStarFsm( pd );
768 			FsmAp *termFsm = term->walk( pd );
769 			FsmAp *trailAnyStar = dotStarFsm( pd );
770 			rhs->concatOp( termFsm );
771 			rhs->concatOp( trailAnyStar );
772 
773 			/* Perform subtraction. */
774 			rtnVal->subtractOp( rhs );
775 			afterOpMinimize( rtnVal, lastInSeq );
776 			break;
777 		}
778 		case TermType: {
779 			/* Return result of the term. */
780 			rtnVal = term->walk( pd );
781 			break;
782 		}
783 		case BuiltinType: {
784 			/* Duplicate the builtin. */
785 			rtnVal = makeBuiltin( builtin, pd );
786 			break;
787 		}
788 	}
789 
790 	return rtnVal;
791 }
792 
makeNameTree(ParseData * pd)793 void Expression::makeNameTree( ParseData *pd )
794 {
795 	switch ( type ) {
796 	case OrType:
797 	case IntersectType:
798 	case SubtractType:
799 	case StrongSubtractType:
800 		expression->makeNameTree( pd );
801 		term->makeNameTree( pd );
802 		break;
803 	case TermType:
804 		term->makeNameTree( pd );
805 		break;
806 	case BuiltinType:
807 		break;
808 	}
809 }
810 
resolveNameRefs(ParseData * pd)811 void Expression::resolveNameRefs( ParseData *pd )
812 {
813 	switch ( type ) {
814 	case OrType:
815 	case IntersectType:
816 	case SubtractType:
817 	case StrongSubtractType:
818 		expression->resolveNameRefs( pd );
819 		term->resolveNameRefs( pd );
820 		break;
821 	case TermType:
822 		term->resolveNameRefs( pd );
823 		break;
824 	case BuiltinType:
825 		break;
826 	}
827 }
828 
829 /* Clean up after a term node. */
~Term()830 Term::~Term()
831 {
832 	switch ( type ) {
833 		case ConcatType:
834 		case RightStartType:
835 		case RightFinishType:
836 		case LeftType:
837 			delete term;
838 			delete factorWithAug;
839 			break;
840 		case FactorWithAugType:
841 			delete factorWithAug;
842 			break;
843 	}
844 }
845 
846 /* Evaluate a term node. */
walk(ParseData * pd,bool lastInSeq)847 FsmAp *Term::walk( ParseData *pd, bool lastInSeq )
848 {
849 	FsmAp *rtnVal = 0;
850 	switch ( type ) {
851 		case ConcatType: {
852 			/* Evaluate the Term. */
853 			rtnVal = term->walk( pd, false );
854 			/* Evaluate the FactorWithRep. */
855 			FsmAp *rhs = factorWithAug->walk( pd );
856 			/* Perform concatenation. */
857 			rtnVal->concatOp( rhs );
858 			afterOpMinimize( rtnVal, lastInSeq );
859 			break;
860 		}
861 		case RightStartType: {
862 			/* Evaluate the Term. */
863 			rtnVal = term->walk( pd );
864 
865 			/* Evaluate the FactorWithRep. */
866 			FsmAp *rhs = factorWithAug->walk( pd );
867 
868 			/* Set up the priority descriptors. The left machine gets the
869 			 * lower priority where as the right get the higher start priority. */
870 			priorDescs[0].key = pd->nextPriorKey++;
871 			priorDescs[0].priority = 0;
872 			rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
873 
874 			/* The start transitions of the right machine gets the higher
875 			 * priority. Use the same unique key. */
876 			priorDescs[1].key = priorDescs[0].key;
877 			priorDescs[1].priority = 1;
878 			rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
879 
880 			/* Perform concatenation. */
881 			rtnVal->concatOp( rhs );
882 			afterOpMinimize( rtnVal, lastInSeq );
883 			break;
884 		}
885 		case RightFinishType: {
886 			/* Evaluate the Term. */
887 			rtnVal = term->walk( pd );
888 
889 			/* Evaluate the FactorWithRep. */
890 			FsmAp *rhs = factorWithAug->walk( pd );
891 
892 			/* Set up the priority descriptors. The left machine gets the
893 			 * lower priority where as the finishing transitions to the right
894 			 * get the higher priority. */
895 			priorDescs[0].key = pd->nextPriorKey++;
896 			priorDescs[0].priority = 0;
897 			rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
898 
899 			/* The finishing transitions of the right machine get the higher
900 			 * priority. Use the same unique key. */
901 			priorDescs[1].key = priorDescs[0].key;
902 			priorDescs[1].priority = 1;
903 			rhs->finishFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
904 
905 			/* If the right machine's start state is final we need to guard
906 			 * against the left machine persisting by moving through the empty
907 			 * string. */
908 			if ( rhs->startState->isFinState() ) {
909 				rhs->startState->outPriorTable.setPrior(
910 						pd->curPriorOrd++, &priorDescs[1] );
911 			}
912 
913 			/* Perform concatenation. */
914 			rtnVal->concatOp( rhs );
915 			afterOpMinimize( rtnVal, lastInSeq );
916 			break;
917 		}
918 		case LeftType: {
919 			/* Evaluate the Term. */
920 			rtnVal = term->walk( pd );
921 
922 			/* Evaluate the FactorWithRep. */
923 			FsmAp *rhs = factorWithAug->walk( pd );
924 
925 			/* Set up the priority descriptors. The left machine gets the
926 			 * higher priority. */
927 			priorDescs[0].key = pd->nextPriorKey++;
928 			priorDescs[0].priority = 1;
929 			rtnVal->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
930 
931 			/* The right machine gets the lower priority. We cannot use
932 			 * allTransPrior here in case the start state of the right machine
933 			 * is final. It would allow the right machine thread to run along
934 			 * with the left if just passing through the start state. Using
935 			 * startFsmPrior prevents this. */
936 			priorDescs[1].key = priorDescs[0].key;
937 			priorDescs[1].priority = 0;
938 			rhs->startFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
939 
940 			/* Perform concatenation. */
941 			rtnVal->concatOp( rhs );
942 			afterOpMinimize( rtnVal, lastInSeq );
943 			break;
944 		}
945 		case FactorWithAugType: {
946 			rtnVal = factorWithAug->walk( pd );
947 			break;
948 		}
949 	}
950 	return rtnVal;
951 }
952 
makeNameTree(ParseData * pd)953 void Term::makeNameTree( ParseData *pd )
954 {
955 	switch ( type ) {
956 	case ConcatType:
957 	case RightStartType:
958 	case RightFinishType:
959 	case LeftType:
960 		term->makeNameTree( pd );
961 		factorWithAug->makeNameTree( pd );
962 		break;
963 	case FactorWithAugType:
964 		factorWithAug->makeNameTree( pd );
965 		break;
966 	}
967 }
968 
resolveNameRefs(ParseData * pd)969 void Term::resolveNameRefs( ParseData *pd )
970 {
971 	switch ( type ) {
972 	case ConcatType:
973 	case RightStartType:
974 	case RightFinishType:
975 	case LeftType:
976 		term->resolveNameRefs( pd );
977 		factorWithAug->resolveNameRefs( pd );
978 		break;
979 	case FactorWithAugType:
980 		factorWithAug->resolveNameRefs( pd );
981 		break;
982 	}
983 }
984 
985 /* Clean up after a factor with augmentation node. */
~FactorWithAug()986 FactorWithAug::~FactorWithAug()
987 {
988 	delete factorWithRep;
989 
990 	/* Walk the vector of parser actions, deleting function names. */
991 
992 	/* Clean up priority descriptors. */
993 	if ( priorDescs != 0 )
994 		delete[] priorDescs;
995 }
996 
assignActions(ParseData * pd,FsmAp * graph,int * actionOrd)997 void FactorWithAug::assignActions( ParseData *pd, FsmAp *graph, int *actionOrd )
998 {
999 	/* Assign actions. */
1000 	for ( int i = 0; i < actions.length(); i++ )  {
1001 		switch ( actions[i].type ) {
1002 		/* Transition actions. */
1003 		case at_start:
1004 			graph->startFsmAction( actionOrd[i], actions[i].action );
1005 			afterOpMinimize( graph );
1006 			break;
1007 		case at_all:
1008 			graph->allTransAction( actionOrd[i], actions[i].action );
1009 			break;
1010 		case at_finish:
1011 			graph->finishFsmAction( actionOrd[i], actions[i].action );
1012 			break;
1013 		case at_leave:
1014 			graph->leaveFsmAction( actionOrd[i], actions[i].action );
1015 			break;
1016 
1017 		/* Global error actions. */
1018 		case at_start_gbl_error:
1019 			graph->startErrorAction( actionOrd[i], actions[i].action, 0 );
1020 			afterOpMinimize( graph );
1021 			break;
1022 		case at_all_gbl_error:
1023 			graph->allErrorAction( actionOrd[i], actions[i].action, 0 );
1024 			break;
1025 		case at_final_gbl_error:
1026 			graph->finalErrorAction( actionOrd[i], actions[i].action, 0 );
1027 			break;
1028 		case at_not_start_gbl_error:
1029 			graph->notStartErrorAction( actionOrd[i], actions[i].action, 0 );
1030 			break;
1031 		case at_not_final_gbl_error:
1032 			graph->notFinalErrorAction( actionOrd[i], actions[i].action, 0 );
1033 			break;
1034 		case at_middle_gbl_error:
1035 			graph->middleErrorAction( actionOrd[i], actions[i].action, 0 );
1036 			break;
1037 
1038 		/* Local error actions. */
1039 		case at_start_local_error:
1040 			graph->startErrorAction( actionOrd[i], actions[i].action,
1041 					actions[i].localErrKey );
1042 			afterOpMinimize( graph );
1043 			break;
1044 		case at_all_local_error:
1045 			graph->allErrorAction( actionOrd[i], actions[i].action,
1046 					actions[i].localErrKey );
1047 			break;
1048 		case at_final_local_error:
1049 			graph->finalErrorAction( actionOrd[i], actions[i].action,
1050 					actions[i].localErrKey );
1051 			break;
1052 		case at_not_start_local_error:
1053 			graph->notStartErrorAction( actionOrd[i], actions[i].action,
1054 					actions[i].localErrKey );
1055 			break;
1056 		case at_not_final_local_error:
1057 			graph->notFinalErrorAction( actionOrd[i], actions[i].action,
1058 					actions[i].localErrKey );
1059 			break;
1060 		case at_middle_local_error:
1061 			graph->middleErrorAction( actionOrd[i], actions[i].action,
1062 					actions[i].localErrKey );
1063 			break;
1064 
1065 		/* EOF actions. */
1066 		case at_start_eof:
1067 			graph->startEOFAction( actionOrd[i], actions[i].action );
1068 			afterOpMinimize( graph );
1069 			break;
1070 		case at_all_eof:
1071 			graph->allEOFAction( actionOrd[i], actions[i].action );
1072 			break;
1073 		case at_final_eof:
1074 			graph->finalEOFAction( actionOrd[i], actions[i].action );
1075 			break;
1076 		case at_not_start_eof:
1077 			graph->notStartEOFAction( actionOrd[i], actions[i].action );
1078 			break;
1079 		case at_not_final_eof:
1080 			graph->notFinalEOFAction( actionOrd[i], actions[i].action );
1081 			break;
1082 		case at_middle_eof:
1083 			graph->middleEOFAction( actionOrd[i], actions[i].action );
1084 			break;
1085 
1086 		/* To State Actions. */
1087 		case at_start_to_state:
1088 			graph->startToStateAction( actionOrd[i], actions[i].action );
1089 			afterOpMinimize( graph );
1090 			break;
1091 		case at_all_to_state:
1092 			graph->allToStateAction( actionOrd[i], actions[i].action );
1093 			break;
1094 		case at_final_to_state:
1095 			graph->finalToStateAction( actionOrd[i], actions[i].action );
1096 			break;
1097 		case at_not_start_to_state:
1098 			graph->notStartToStateAction( actionOrd[i], actions[i].action );
1099 			break;
1100 		case at_not_final_to_state:
1101 			graph->notFinalToStateAction( actionOrd[i], actions[i].action );
1102 			break;
1103 		case at_middle_to_state:
1104 			graph->middleToStateAction( actionOrd[i], actions[i].action );
1105 			break;
1106 
1107 		/* From State Actions. */
1108 		case at_start_from_state:
1109 			graph->startFromStateAction( actionOrd[i], actions[i].action );
1110 			afterOpMinimize( graph );
1111 			break;
1112 		case at_all_from_state:
1113 			graph->allFromStateAction( actionOrd[i], actions[i].action );
1114 			break;
1115 		case at_final_from_state:
1116 			graph->finalFromStateAction( actionOrd[i], actions[i].action );
1117 			break;
1118 		case at_not_start_from_state:
1119 			graph->notStartFromStateAction( actionOrd[i], actions[i].action );
1120 			break;
1121 		case at_not_final_from_state:
1122 			graph->notFinalFromStateAction( actionOrd[i], actions[i].action );
1123 			break;
1124 		case at_middle_from_state:
1125 			graph->middleFromStateAction( actionOrd[i], actions[i].action );
1126 			break;
1127 
1128 		/* Remaining cases, prevented by the parser. */
1129 		default:
1130 			assert( false );
1131 			break;
1132 		}
1133 	}
1134 }
1135 
assignPriorities(FsmAp * graph,int * priorOrd)1136 void FactorWithAug::assignPriorities( FsmAp *graph, int *priorOrd )
1137 {
1138 	/* Assign priorities. */
1139 	for ( int i = 0; i < priorityAugs.length(); i++ ) {
1140 		switch ( priorityAugs[i].type ) {
1141 		case at_start:
1142 			graph->startFsmPrior( priorOrd[i], &priorDescs[i]);
1143 			/* Start fsm priorities are a special case that may require
1144 			 * minimization afterwards. */
1145 			afterOpMinimize( graph );
1146 			break;
1147 		case at_all:
1148 			graph->allTransPrior( priorOrd[i], &priorDescs[i] );
1149 			break;
1150 		case at_finish:
1151 			graph->finishFsmPrior( priorOrd[i], &priorDescs[i] );
1152 			break;
1153 		case at_leave:
1154 			graph->leaveFsmPrior( priorOrd[i], &priorDescs[i] );
1155 			break;
1156 
1157 		default:
1158 			/* Parser Prevents this case. */
1159 			break;
1160 		}
1161 	}
1162 }
1163 
assignConditions(FsmAp * graph)1164 void FactorWithAug::assignConditions( FsmAp *graph )
1165 {
1166 	for ( int i = 0; i < conditions.length(); i++ )  {
1167 		switch ( conditions[i].type ) {
1168 		/* Transition actions. */
1169 		case at_start:
1170 			graph->startFsmCondition( conditions[i].action, conditions[i].sense );
1171 			afterOpMinimize( graph );
1172 			break;
1173 		case at_all:
1174 			graph->allTransCondition( conditions[i].action, conditions[i].sense );
1175 			break;
1176 		case at_leave:
1177 			graph->leaveFsmCondition( conditions[i].action, conditions[i].sense );
1178 			break;
1179 		default:
1180 			break;
1181 		}
1182 	}
1183 }
1184 
1185 
1186 /* Evaluate a factor with augmentation node. */
walk(ParseData * pd)1187 FsmAp *FactorWithAug::walk( ParseData *pd )
1188 {
1189 	/* Enter into the scopes created for the labels. */
1190 	NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1191 
1192 	/* Make the array of function orderings. */
1193 	int *actionOrd = 0;
1194 	if ( actions.length() > 0 )
1195 		actionOrd = new int[actions.length()];
1196 
1197 	/* First walk the list of actions, assigning order to all starting
1198 	 * actions. */
1199 	for ( int i = 0; i < actions.length(); i++ ) {
1200 		if ( actions[i].type == at_start ||
1201 				actions[i].type == at_start_gbl_error ||
1202 				actions[i].type == at_start_local_error ||
1203 				actions[i].type == at_start_to_state ||
1204 				actions[i].type == at_start_from_state ||
1205 				actions[i].type == at_start_eof )
1206 			actionOrd[i] = pd->curActionOrd++;
1207 	}
1208 
1209 	/* Evaluate the factor with repetition. */
1210 	FsmAp *rtnVal = factorWithRep->walk( pd );
1211 
1212 	/* Compute the remaining action orderings. */
1213 	for ( int i = 0; i < actions.length(); i++ ) {
1214 		if ( actions[i].type != at_start &&
1215 				actions[i].type != at_start_gbl_error &&
1216 				actions[i].type != at_start_local_error &&
1217 				actions[i].type != at_start_to_state &&
1218 				actions[i].type != at_start_from_state &&
1219 				actions[i].type != at_start_eof )
1220 			actionOrd[i] = pd->curActionOrd++;
1221 	}
1222 
1223 	/* Embed conditions. */
1224 	assignConditions( rtnVal );
1225 
1226 	/* Embed actions. */
1227 	assignActions( pd, rtnVal , actionOrd );
1228 
1229 	/* Make the array of priority orderings. Orderings are local to this walk
1230 	 * of the factor with augmentation. */
1231 	int *priorOrd = 0;
1232 	if ( priorityAugs.length() > 0 )
1233 		priorOrd = new int[priorityAugs.length()];
1234 
1235 	/* Walk all priorities, assigning the priority ordering. */
1236 	for ( int i = 0; i < priorityAugs.length(); i++ )
1237 		priorOrd[i] = pd->curPriorOrd++;
1238 
1239 	/* If the priority descriptors have not been made, make them now.  Make
1240 	 * priority descriptors for each priority asignment that will be passed to
1241 	 * the fsm. Used to keep track of the key, value and used bit. */
1242 	if ( priorDescs == 0 && priorityAugs.length() > 0 ) {
1243 		priorDescs = new PriorDesc[priorityAugs.length()];
1244 		for ( int i = 0; i < priorityAugs.length(); i++ ) {
1245 			/* Init the prior descriptor for the priority setting. */
1246 			priorDescs[i].key = priorityAugs[i].priorKey;
1247 			priorDescs[i].priority = priorityAugs[i].priorValue;
1248 		}
1249 	}
1250 
1251 	/* Assign priorities into the machine. */
1252 	assignPriorities( rtnVal, priorOrd );
1253 
1254 	/* Assign epsilon transitions. */
1255 	for ( int e = 0; e < epsilonLinks.length(); e++ ) {
1256 		/* Get the name, which may not exist. If it doesn't then silently
1257 		 * ignore it because an error has already been reported. */
1258 		NameInst *epTarg = pd->epsilonResolvedLinks[pd->nextEpsilonResolvedLink++];
1259 		if ( epTarg != 0 ) {
1260 			/* Make the epsilon transitions. */
1261 			rtnVal->epsilonTrans( epTarg->id );
1262 
1263 			/* Note that we have made a link to the name. */
1264 			pd->localNameScope->referencedNames.append( epTarg );
1265 		}
1266 	}
1267 
1268 	/* Set entry points for labels. */
1269 	if ( labels.length() > 0 ) {
1270 		/* Pop the names. */
1271 		pd->resetNameScope( nameFrame );
1272 
1273 		/* Make labels that are referenced into entry points. */
1274 		for ( int i = 0; i < labels.length(); i++ ) {
1275 			pd->enterNameScope( false, 1 );
1276 
1277 			/* Will always be found. */
1278 			NameInst *name = pd->curNameInst;
1279 
1280 			/* If the name is referenced then set the entry point. */
1281 			if ( name->numRefs > 0 )
1282 				rtnVal->setEntry( name->id, rtnVal->startState );
1283 		}
1284 
1285 		pd->popNameScope( nameFrame );
1286 	}
1287 
1288 	if ( priorOrd != 0 )
1289 		delete[] priorOrd;
1290 	if ( actionOrd != 0 )
1291 		delete[] actionOrd;
1292 	return rtnVal;
1293 }
1294 
makeNameTree(ParseData * pd)1295 void FactorWithAug::makeNameTree( ParseData *pd )
1296 {
1297 	/* Add the labels to the tree of instantiated names. Each label
1298 	 * makes a new scope. */
1299 	NameInst *prevNameInst = pd->curNameInst;
1300 	for ( int i = 0; i < labels.length(); i++ )
1301 		pd->curNameInst = pd->addNameInst( labels[i].loc, labels[i].data, true );
1302 
1303 	/* Recurse, then pop the names. */
1304 	factorWithRep->makeNameTree( pd );
1305 	pd->curNameInst = prevNameInst;
1306 }
1307 
1308 
resolveNameRefs(ParseData * pd)1309 void FactorWithAug::resolveNameRefs( ParseData *pd )
1310 {
1311 	/* Enter into the name scope created by any labels. */
1312 	NameFrame nameFrame = pd->enterNameScope( false, labels.length() );
1313 
1314 	/* Note action references. */
1315 	for ( int i = 0; i < actions.length(); i++ )
1316 		actions[i].action->actionRefs.append( pd->localNameScope );
1317 
1318 	/* Recurse first. IMPORTANT: we must do the exact same traversal as when
1319 	 * the tree is constructed. */
1320 	factorWithRep->resolveNameRefs( pd );
1321 
1322 	/* Resolve epsilon transitions. */
1323 	for ( int ep = 0; ep < epsilonLinks.length(); ep++ ) {
1324 		/* Get the link. */
1325 		EpsilonLink &link = epsilonLinks[ep];
1326 		NameInst *resolvedName = 0;
1327 
1328 		if ( link.target.length() == 1 && strcmp( link.target.data[0], "final" ) == 0 ) {
1329 			/* Epsilon drawn to an implicit final state. An implicit final is
1330 			 * only available in join operations. */
1331 			resolvedName = pd->localNameScope->final;
1332 		}
1333 		else {
1334 			/* Do an search for the name. */
1335 			NameSet resolved;
1336 			pd->resolveFrom( resolved, pd->localNameScope, link.target, 0 );
1337 			if ( resolved.length() > 0 ) {
1338 				/* Take the first one. */
1339 				resolvedName = resolved[0];
1340 				if ( resolved.length() > 1 ) {
1341 					/* Complain about the multiple references. */
1342 					error(link.loc) << "state reference " << link.target <<
1343 							" resolves to multiple entry points" << endl;
1344 					errorStateLabels( resolved );
1345 				}
1346 			}
1347 		}
1348 
1349 		/* This is tricky, we stuff resolved epsilon transitions into one long
1350 		 * vector in the parse data structure. Since the name resolution and
1351 		 * graph generation both do identical walks of the parse tree we
1352 		 * should always find the link resolutions in the right place.  */
1353 		pd->epsilonResolvedLinks.append( resolvedName );
1354 
1355 		if ( resolvedName != 0 ) {
1356 			/* Found the name, bump of the reference count on it. */
1357 			resolvedName->numRefs += 1;
1358 		}
1359 		else {
1360 			/* Complain, no recovery action, the epsilon op will ignore any
1361 			 * epsilon transitions whose names did not resolve. */
1362 			error(link.loc) << "could not resolve label " << link.target << endl;
1363 		}
1364 	}
1365 
1366 	if ( labels.length() > 0 )
1367 		pd->popNameScope( nameFrame );
1368 }
1369 
1370 
1371 /* Clean up after a factor with repetition node. */
~FactorWithRep()1372 FactorWithRep::~FactorWithRep()
1373 {
1374 	switch ( type ) {
1375 		case StarType: case StarStarType: case OptionalType: case PlusType:
1376 		case ExactType: case MaxType: case MinType: case RangeType:
1377 			delete factorWithRep;
1378 			break;
1379 		case FactorWithNegType:
1380 			delete factorWithNeg;
1381 			break;
1382 	}
1383 }
1384 
1385 /* Evaluate a factor with repetition node. */
walk(ParseData * pd)1386 FsmAp *FactorWithRep::walk( ParseData *pd )
1387 {
1388 	FsmAp *retFsm = 0;
1389 
1390 	switch ( type ) {
1391 	case StarType: {
1392 		/* Evaluate the FactorWithRep. */
1393 		retFsm = factorWithRep->walk( pd );
1394 		if ( retFsm->startState->isFinState() ) {
1395 			warning(loc) << "applying kleene star to a machine that "
1396 					"accepts zero length word" << endl;
1397 			retFsm->unsetFinState( retFsm->startState );
1398 		}
1399 
1400 		/* Shift over the start action orders then do the kleene star. */
1401 		pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1402 		retFsm->starOp( );
1403 		afterOpMinimize( retFsm );
1404 		break;
1405 	}
1406 	case StarStarType: {
1407 		/* Evaluate the FactorWithRep. */
1408 		retFsm = factorWithRep->walk( pd );
1409 		if ( retFsm->startState->isFinState() ) {
1410 			warning(loc) << "applying kleene star to a machine that "
1411 					"accepts zero length word" << endl;
1412 		}
1413 
1414 		/* Set up the prior descs. All gets priority one, whereas leaving gets
1415 		 * priority zero. Make a unique key so that these priorities don't
1416 		 * interfere with any priorities set by the user. */
1417 		priorDescs[0].key = pd->nextPriorKey++;
1418 		priorDescs[0].priority = 1;
1419 		retFsm->allTransPrior( pd->curPriorOrd++, &priorDescs[0] );
1420 
1421 		/* Leaveing gets priority 0. Use same unique key. */
1422 		priorDescs[1].key = priorDescs[0].key;
1423 		priorDescs[1].priority = 0;
1424 		retFsm->leaveFsmPrior( pd->curPriorOrd++, &priorDescs[1] );
1425 
1426 		/* Shift over the start action orders then do the kleene star. */
1427 		pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1428 		retFsm->starOp( );
1429 		afterOpMinimize( retFsm );
1430 		break;
1431 	}
1432 	case OptionalType: {
1433 		/* Make the null fsm. */
1434 		FsmAp *nu = new FsmAp();
1435 		nu->lambdaFsm( );
1436 
1437 		/* Evaluate the FactorWithRep. */
1438 		retFsm = factorWithRep->walk( pd );
1439 
1440 		/* Perform the question operator. */
1441 		retFsm->unionOp( nu );
1442 		afterOpMinimize( retFsm );
1443 		break;
1444 	}
1445 	case PlusType: {
1446 		/* Evaluate the FactorWithRep. */
1447 		retFsm = factorWithRep->walk( pd );
1448 		if ( retFsm->startState->isFinState() ) {
1449 			warning(loc) << "applying plus operator to a machine that "
1450 					"accepts zero length word" << endl;
1451 		}
1452 
1453 		/* Need a duplicated for the star end. */
1454 		FsmAp *dup = new FsmAp( *retFsm );
1455 
1456 		/* The start func orders need to be shifted before doing the star. */
1457 		pd->curActionOrd += dup->shiftStartActionOrder( pd->curActionOrd );
1458 
1459 		/* Star the duplicate. */
1460 		dup->starOp( );
1461 		afterOpMinimize( dup );
1462 
1463 		retFsm->concatOp( dup );
1464 		afterOpMinimize( retFsm );
1465 		break;
1466 	}
1467 	case ExactType: {
1468 		/* Get an int from the repetition amount. */
1469 		if ( lowerRep == 0 ) {
1470 			/* No copies. Don't need to evaluate the factorWithRep.
1471 			 * This Defeats the purpose so give a warning. */
1472 			warning(loc) << "exactly zero repetitions results "
1473 					"in the null machine" << endl;
1474 
1475 			retFsm = new FsmAp();
1476 			retFsm->lambdaFsm();
1477 		}
1478 		else {
1479 			/* Evaluate the first FactorWithRep. */
1480 			retFsm = factorWithRep->walk( pd );
1481 			if ( retFsm->startState->isFinState() ) {
1482 				warning(loc) << "applying repetition to a machine that "
1483 						"accepts zero length word" << endl;
1484 			}
1485 
1486 			/* The start func orders need to be shifted before doing the
1487 			 * repetition. */
1488 			pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1489 
1490 			/* Do the repetition on the machine. Already guarded against n == 0 */
1491 			retFsm->repeatOp( lowerRep );
1492 			afterOpMinimize( retFsm );
1493 		}
1494 		break;
1495 	}
1496 	case MaxType: {
1497 		/* Get an int from the repetition amount. */
1498 		if ( upperRep == 0 ) {
1499 			/* No copies. Don't need to evaluate the factorWithRep.
1500 			 * This Defeats the purpose so give a warning. */
1501 			warning(loc) << "max zero repetitions results "
1502 					"in the null machine" << endl;
1503 
1504 			retFsm = new FsmAp();
1505 			retFsm->lambdaFsm();
1506 		}
1507 		else {
1508 			/* Evaluate the first FactorWithRep. */
1509 			retFsm = factorWithRep->walk( pd );
1510 			if ( retFsm->startState->isFinState() ) {
1511 				warning(loc) << "applying max repetition to a machine that "
1512 						"accepts zero length word" << endl;
1513 			}
1514 
1515 			/* The start func orders need to be shifted before doing the
1516 			 * repetition. */
1517 			pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1518 
1519 			/* Do the repetition on the machine. Already guarded against n == 0 */
1520 			retFsm->optionalRepeatOp( upperRep );
1521 			afterOpMinimize( retFsm );
1522 		}
1523 		break;
1524 	}
1525 	case MinType: {
1526 		/* Evaluate the repeated machine. */
1527 		retFsm = factorWithRep->walk( pd );
1528 		if ( retFsm->startState->isFinState() ) {
1529 			warning(loc) << "applying min repetition to a machine that "
1530 					"accepts zero length word" << endl;
1531 		}
1532 
1533 		/* The start func orders need to be shifted before doing the repetition
1534 		 * and the kleene star. */
1535 		pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1536 
1537 		if ( lowerRep == 0 ) {
1538 			/* Acts just like a star op on the machine to return. */
1539 			retFsm->starOp( );
1540 			afterOpMinimize( retFsm );
1541 		}
1542 		else {
1543 			/* Take a duplicate for the plus. */
1544 			FsmAp *dup = new FsmAp( *retFsm );
1545 
1546 			/* Do repetition on the first half. */
1547 			retFsm->repeatOp( lowerRep );
1548 			afterOpMinimize( retFsm );
1549 
1550 			/* Star the duplicate. */
1551 			dup->starOp( );
1552 			afterOpMinimize( dup );
1553 
1554 			/* Tak on the kleene star. */
1555 			retFsm->concatOp( dup );
1556 			afterOpMinimize( retFsm );
1557 		}
1558 		break;
1559 	}
1560 	case RangeType: {
1561 		/* Check for bogus range. */
1562 		if ( upperRep - lowerRep < 0 ) {
1563 			error(loc) << "invalid range repetition" << endl;
1564 
1565 			/* Return null machine as recovery. */
1566 			retFsm = new FsmAp();
1567 			retFsm->lambdaFsm();
1568 		}
1569 		else if ( lowerRep == 0 && upperRep == 0 ) {
1570 			/* No copies. Don't need to evaluate the factorWithRep.  This
1571 			 * defeats the purpose so give a warning. */
1572 			warning(loc) << "zero to zero repetitions results "
1573 					"in the null machine" << endl;
1574 
1575 			retFsm = new FsmAp();
1576 			retFsm->lambdaFsm();
1577 		}
1578 		else {
1579 			/* Now need to evaluate the repeated machine. */
1580 			retFsm = factorWithRep->walk( pd );
1581 			if ( retFsm->startState->isFinState() ) {
1582 				warning(loc) << "applying range repetition to a machine that "
1583 						"accepts zero length word" << endl;
1584 			}
1585 
1586 			/* The start func orders need to be shifted before doing both kinds
1587 			 * of repetition. */
1588 			pd->curActionOrd += retFsm->shiftStartActionOrder( pd->curActionOrd );
1589 
1590 			if ( lowerRep == 0 ) {
1591 				/* Just doing max repetition. Already guarded against n == 0. */
1592 				retFsm->optionalRepeatOp( upperRep );
1593 				afterOpMinimize( retFsm );
1594 			}
1595 			else if ( lowerRep == upperRep ) {
1596 				/* Just doing exact repetition. Already guarded against n == 0. */
1597 				retFsm->repeatOp( lowerRep );
1598 				afterOpMinimize( retFsm );
1599 			}
1600 			else {
1601 				/* This is the case that 0 < lowerRep < upperRep. Take a
1602 				 * duplicate for the optional repeat. */
1603 				FsmAp *dup = new FsmAp( *retFsm );
1604 
1605 				/* Do repetition on the first half. */
1606 				retFsm->repeatOp( lowerRep );
1607 				afterOpMinimize( retFsm );
1608 
1609 				/* Do optional repetition on the second half. */
1610 				dup->optionalRepeatOp( upperRep - lowerRep );
1611 				afterOpMinimize( dup );
1612 
1613 				/* Tak on the duplicate machine. */
1614 				retFsm->concatOp( dup );
1615 				afterOpMinimize( retFsm );
1616 			}
1617 		}
1618 		break;
1619 	}
1620 	case FactorWithNegType: {
1621 		/* Evaluate the Factor. Pass it up. */
1622 		retFsm = factorWithNeg->walk( pd );
1623 		break;
1624 	}}
1625 	return retFsm;
1626 }
1627 
makeNameTree(ParseData * pd)1628 void FactorWithRep::makeNameTree( ParseData *pd )
1629 {
1630 	switch ( type ) {
1631 	case StarType:
1632 	case StarStarType:
1633 	case OptionalType:
1634 	case PlusType:
1635 	case ExactType:
1636 	case MaxType:
1637 	case MinType:
1638 	case RangeType:
1639 		factorWithRep->makeNameTree( pd );
1640 		break;
1641 	case FactorWithNegType:
1642 		factorWithNeg->makeNameTree( pd );
1643 		break;
1644 	}
1645 }
1646 
resolveNameRefs(ParseData * pd)1647 void FactorWithRep::resolveNameRefs( ParseData *pd )
1648 {
1649 	switch ( type ) {
1650 	case StarType:
1651 	case StarStarType:
1652 	case OptionalType:
1653 	case PlusType:
1654 	case ExactType:
1655 	case MaxType:
1656 	case MinType:
1657 	case RangeType:
1658 		factorWithRep->resolveNameRefs( pd );
1659 		break;
1660 	case FactorWithNegType:
1661 		factorWithNeg->resolveNameRefs( pd );
1662 		break;
1663 	}
1664 }
1665 
1666 /* Clean up after a factor with negation node. */
~FactorWithNeg()1667 FactorWithNeg::~FactorWithNeg()
1668 {
1669 	switch ( type ) {
1670 		case NegateType:
1671 		case CharNegateType:
1672 			delete factorWithNeg;
1673 			break;
1674 		case FactorType:
1675 			delete factor;
1676 			break;
1677 	}
1678 }
1679 
1680 /* Evaluate a factor with negation node. */
walk(ParseData * pd)1681 FsmAp *FactorWithNeg::walk( ParseData *pd )
1682 {
1683 	FsmAp *retFsm = 0;
1684 
1685 	switch ( type ) {
1686 	case NegateType: {
1687 		/* Evaluate the factorWithNeg. */
1688 		FsmAp *toNegate = factorWithNeg->walk( pd );
1689 
1690 		/* Negation is subtract from dot-star. */
1691 		retFsm = dotStarFsm( pd );
1692 		retFsm->subtractOp( toNegate );
1693 		afterOpMinimize( retFsm );
1694 		break;
1695 	}
1696 	case CharNegateType: {
1697 		/* Evaluate the factorWithNeg. */
1698 		FsmAp *toNegate = factorWithNeg->walk( pd );
1699 
1700 		/* CharNegation is subtract from dot. */
1701 		retFsm = dotFsm( pd );
1702 		retFsm->subtractOp( toNegate );
1703 		afterOpMinimize( retFsm );
1704 		break;
1705 	}
1706 	case FactorType: {
1707 		/* Evaluate the Factor. Pass it up. */
1708 		retFsm = factor->walk( pd );
1709 		break;
1710 	}}
1711 	return retFsm;
1712 }
1713 
makeNameTree(ParseData * pd)1714 void FactorWithNeg::makeNameTree( ParseData *pd )
1715 {
1716 	switch ( type ) {
1717 	case NegateType:
1718 	case CharNegateType:
1719 		factorWithNeg->makeNameTree( pd );
1720 		break;
1721 	case FactorType:
1722 		factor->makeNameTree( pd );
1723 		break;
1724 	}
1725 }
1726 
resolveNameRefs(ParseData * pd)1727 void FactorWithNeg::resolveNameRefs( ParseData *pd )
1728 {
1729 	switch ( type ) {
1730 	case NegateType:
1731 	case CharNegateType:
1732 		factorWithNeg->resolveNameRefs( pd );
1733 		break;
1734 	case FactorType:
1735 		factor->resolveNameRefs( pd );
1736 		break;
1737 	}
1738 }
1739 
1740 /* Clean up after a factor node. */
~Factor()1741 Factor::~Factor()
1742 {
1743 	switch ( type ) {
1744 		case LiteralType:
1745 			delete literal;
1746 			break;
1747 		case RangeType:
1748 			delete range;
1749 			break;
1750 		case OrExprType:
1751 			delete reItem;
1752 			break;
1753 		case RegExprType:
1754 			delete regExpr;
1755 			break;
1756 		case ReferenceType:
1757 			break;
1758 		case ParenType:
1759 			delete join;
1760 			break;
1761 		case LongestMatchType:
1762 			delete longestMatch;
1763 			break;
1764 	}
1765 }
1766 
1767 /* Evaluate a factor node. */
walk(ParseData * pd)1768 FsmAp *Factor::walk( ParseData *pd )
1769 {
1770 	FsmAp *rtnVal = 0;
1771 	switch ( type ) {
1772 	case LiteralType:
1773 		rtnVal = literal->walk( pd );
1774 		break;
1775 	case RangeType:
1776 		rtnVal = range->walk( pd );
1777 		break;
1778 	case OrExprType:
1779 		rtnVal = reItem->walk( pd, 0 );
1780 		break;
1781 	case RegExprType:
1782 		rtnVal = regExpr->walk( pd, 0 );
1783 		break;
1784 	case ReferenceType:
1785 		rtnVal = varDef->walk( pd );
1786 		break;
1787 	case ParenType:
1788 		rtnVal = join->walk( pd );
1789 		break;
1790 	case LongestMatchType:
1791 		rtnVal = longestMatch->walk( pd );
1792 		break;
1793 	}
1794 
1795 	return rtnVal;
1796 }
1797 
makeNameTree(ParseData * pd)1798 void Factor::makeNameTree( ParseData *pd )
1799 {
1800 	switch ( type ) {
1801 	case LiteralType:
1802 	case RangeType:
1803 	case OrExprType:
1804 	case RegExprType:
1805 		break;
1806 	case ReferenceType:
1807 		varDef->makeNameTree( loc, pd );
1808 		break;
1809 	case ParenType:
1810 		join->makeNameTree( pd );
1811 		break;
1812 	case LongestMatchType:
1813 		longestMatch->makeNameTree( pd );
1814 		break;
1815 	}
1816 }
1817 
resolveNameRefs(ParseData * pd)1818 void Factor::resolveNameRefs( ParseData *pd )
1819 {
1820 	switch ( type ) {
1821 	case LiteralType:
1822 	case RangeType:
1823 	case OrExprType:
1824 	case RegExprType:
1825 		break;
1826 	case ReferenceType:
1827 		varDef->resolveNameRefs( pd );
1828 		break;
1829 	case ParenType:
1830 		join->resolveNameRefs( pd );
1831 		break;
1832 	case LongestMatchType:
1833 		longestMatch->resolveNameRefs( pd );
1834 		break;
1835 	}
1836 }
1837 
1838 /* Clean up a range object. Must delete the two literals. */
~Range()1839 Range::~Range()
1840 {
1841 	delete lowerLit;
1842 	delete upperLit;
1843 }
1844 
1845 /* Evaluate a range. Gets the lower an upper key and makes an fsm range. */
walk(ParseData * pd)1846 FsmAp *Range::walk( ParseData *pd )
1847 {
1848 	/* Construct and verify the suitability of the lower end of the range. */
1849 	FsmAp *lowerFsm = lowerLit->walk( pd );
1850 	if ( !lowerFsm->checkSingleCharMachine() ) {
1851 		error(lowerLit->token.loc) <<
1852 			"bad range lower end, must be a single character" << endl;
1853 	}
1854 
1855 	/* Construct and verify the upper end. */
1856 	FsmAp *upperFsm = upperLit->walk( pd );
1857 	if ( !upperFsm->checkSingleCharMachine() ) {
1858 		error(upperLit->token.loc) <<
1859 			"bad range upper end, must be a single character" << endl;
1860 	}
1861 
1862 	/* Grab the keys from the machines, then delete them. */
1863 	Key lowKey = lowerFsm->startState->outList.head->lowKey;
1864 	Key highKey = upperFsm->startState->outList.head->lowKey;
1865 	delete lowerFsm;
1866 	delete upperFsm;
1867 
1868 	/* Validate the range. */
1869 	if ( lowKey > highKey ) {
1870 		/* Recover by setting upper to lower; */
1871 		error(lowerLit->token.loc) << "lower end of range is greater then upper end" << endl;
1872 		highKey = lowKey;
1873 	}
1874 
1875 	/* Return the range now that it is validated. */
1876 	FsmAp *retFsm = new FsmAp();
1877 	retFsm->rangeFsm( lowKey, highKey );
1878 	return retFsm;
1879 }
1880 
1881 /* Evaluate a literal object. */
walk(ParseData * pd)1882 FsmAp *Literal::walk( ParseData *pd )
1883 {
1884 	/* FsmAp to return, is the alphabet signed. */
1885 	FsmAp *rtnVal = 0;
1886 
1887 	switch ( type ) {
1888 	case Number: {
1889 		/* Make the fsm key in int format. */
1890 		Key fsmKey = makeFsmKeyNum( token.data, token.loc, pd );
1891 		/* Make the new machine. */
1892 		rtnVal = new FsmAp();
1893 		rtnVal->concatFsm( fsmKey );
1894 		break;
1895 	}
1896 	case LitString: {
1897 		/* Make the array of keys in int format. */
1898 		long length;
1899 		bool caseInsensitive;
1900 		char *data = prepareLitString( token.loc, token.data, token.length,
1901 				length, caseInsensitive );
1902 		Key *arr = new Key[length];
1903 		makeFsmKeyArray( arr, data, length, pd );
1904 
1905 		/* Make the new machine. */
1906 		rtnVal = new FsmAp();
1907 		if ( caseInsensitive )
1908 			rtnVal->concatFsmCI( arr, length );
1909 		else
1910 			rtnVal->concatFsm( arr, length );
1911 		delete[] data;
1912 		delete[] arr;
1913 		break;
1914 	}}
1915 	return rtnVal;
1916 }
1917 
1918 /* Clean up after a regular expression object. */
~RegExpr()1919 RegExpr::~RegExpr()
1920 {
1921 	switch ( type ) {
1922 		case RecurseItem:
1923 			delete regExpr;
1924 			delete item;
1925 			break;
1926 		case Empty:
1927 			break;
1928 	}
1929 }
1930 
1931 /* Evaluate a regular expression object. */
walk(ParseData * pd,RegExpr * rootRegex)1932 FsmAp *RegExpr::walk( ParseData *pd, RegExpr *rootRegex )
1933 {
1934 	/* This is the root regex, pass down a pointer to this. */
1935 	if ( rootRegex == 0 )
1936 		rootRegex = this;
1937 
1938 	FsmAp *rtnVal = 0;
1939 	switch ( type ) {
1940 		case RecurseItem: {
1941 			/* Walk both items. */
1942 			rtnVal = regExpr->walk( pd, rootRegex );
1943 			FsmAp *fsm2 = item->walk( pd, rootRegex );
1944 			rtnVal->concatOp( fsm2 );
1945 			break;
1946 		}
1947 		case Empty: {
1948 			rtnVal = new FsmAp();
1949 			rtnVal->lambdaFsm();
1950 			break;
1951 		}
1952 	}
1953 	return rtnVal;
1954 }
1955 
1956 /* Clean up after an item in a regular expression. */
~ReItem()1957 ReItem::~ReItem()
1958 {
1959 	switch ( type ) {
1960 		case Data:
1961 		case Dot:
1962 			break;
1963 		case OrBlock:
1964 		case NegOrBlock:
1965 			delete orBlock;
1966 			break;
1967 	}
1968 }
1969 
1970 /* Evaluate a regular expression object. */
walk(ParseData * pd,RegExpr * rootRegex)1971 FsmAp *ReItem::walk( ParseData *pd, RegExpr *rootRegex )
1972 {
1973 	/* The fsm to return, is the alphabet signed? */
1974 	FsmAp *rtnVal = 0;
1975 
1976 	switch ( type ) {
1977 		case Data: {
1978 			/* Move the data into an integer array and make a concat fsm. */
1979 			Key *arr = new Key[token.length];
1980 			makeFsmKeyArray( arr, token.data, token.length, pd );
1981 
1982 			/* Make the concat fsm. */
1983 			rtnVal = new FsmAp();
1984 			if ( rootRegex != 0 && rootRegex->caseInsensitive )
1985 				rtnVal->concatFsmCI( arr, token.length );
1986 			else
1987 				rtnVal->concatFsm( arr, token.length );
1988 			delete[] arr;
1989 			break;
1990 		}
1991 		case Dot: {
1992 			/* Make the dot fsm. */
1993 			rtnVal = dotFsm( pd );
1994 			break;
1995 		}
1996 		case OrBlock: {
1997 			/* Get the or block and minmize it. */
1998 			rtnVal = orBlock->walk( pd, rootRegex );
1999 			if ( rtnVal == 0 ) {
2000 				rtnVal = new FsmAp();
2001 				rtnVal->lambdaFsm();
2002 			}
2003 			rtnVal->minimizePartition2();
2004 			break;
2005 		}
2006 		case NegOrBlock: {
2007 			/* Get the or block and minimize it. */
2008 			FsmAp *fsm = orBlock->walk( pd, rootRegex );
2009 			fsm->minimizePartition2();
2010 
2011 			/* Make a dot fsm and subtract from it. */
2012 			rtnVal = dotFsm( pd );
2013 			rtnVal->subtractOp( fsm );
2014 			rtnVal->minimizePartition2();
2015 			break;
2016 		}
2017 	}
2018 
2019 	/* If the item is followed by a star, then apply the star op. */
2020 	if ( star ) {
2021 		if ( rtnVal->startState->isFinState() ) {
2022 			warning(loc) << "applying kleene star to a machine that "
2023 					"accepts zero length word" << endl;
2024 		}
2025 
2026 		rtnVal->starOp();
2027 		rtnVal->minimizePartition2();
2028 	}
2029 	return rtnVal;
2030 }
2031 
2032 /* Clean up after an or block of a regular expression. */
~ReOrBlock()2033 ReOrBlock::~ReOrBlock()
2034 {
2035 	switch ( type ) {
2036 		case RecurseItem:
2037 			delete orBlock;
2038 			delete item;
2039 			break;
2040 		case Empty:
2041 			break;
2042 	}
2043 }
2044 
2045 
2046 /* Evaluate an or block of a regular expression. */
walk(ParseData * pd,RegExpr * rootRegex)2047 FsmAp *ReOrBlock::walk( ParseData *pd, RegExpr *rootRegex )
2048 {
2049 	FsmAp *rtnVal = 0;
2050 	switch ( type ) {
2051 		case RecurseItem: {
2052 			/* Evaluate the two fsm. */
2053 			FsmAp *fsm1 = orBlock->walk( pd, rootRegex );
2054 			FsmAp *fsm2 = item->walk( pd, rootRegex );
2055 			if ( fsm1 == 0 )
2056 				rtnVal = fsm2;
2057 			else {
2058 				fsm1->unionOp( fsm2 );
2059 				rtnVal = fsm1;
2060 			}
2061 			break;
2062 		}
2063 		case Empty: {
2064 			rtnVal = 0;
2065 			break;
2066 		}
2067 	}
2068 	return rtnVal;;
2069 }
2070 
2071 /* Evaluate an or block item of a regular expression. */
walk(ParseData * pd,RegExpr * rootRegex)2072 FsmAp *ReOrItem::walk( ParseData *pd, RegExpr *rootRegex )
2073 {
2074 	/* The return value, is the alphabet signed? */
2075 	FsmAp *rtnVal = 0;
2076 	switch ( type ) {
2077 	case Data: {
2078 		/* Make the or machine. */
2079 		rtnVal = new FsmAp();
2080 
2081 		/* Put the or data into an array of ints. Note that we find unique
2082 		 * keys. Duplicates are silently ignored. The alternative would be to
2083 		 * issue warning or an error but since we can't with [a0-9a] or 'a' |
2084 		 * 'a' don't bother here. */
2085 		KeySet keySet;
2086 		makeFsmUniqueKeyArray( keySet, token.data, token.length,
2087 			rootRegex != 0 ? rootRegex->caseInsensitive : false, pd );
2088 
2089 		/* Run the or operator. */
2090 		rtnVal->orFsm( keySet.data, keySet.length() );
2091 		break;
2092 	}
2093 	case Range: {
2094 		/* Make the upper and lower keys. */
2095 		Key lowKey = makeFsmKeyChar( lower, pd );
2096 		Key highKey = makeFsmKeyChar( upper, pd );
2097 
2098 		/* Validate the range. */
2099 		if ( lowKey > highKey ) {
2100 			/* Recover by setting upper to lower; */
2101 			error(loc) << "lower end of range is greater then upper end" << endl;
2102 			highKey = lowKey;
2103 		}
2104 
2105 		/* Make the range machine. */
2106 		rtnVal = new FsmAp();
2107 		rtnVal->rangeFsm( lowKey, highKey );
2108 
2109 		if ( rootRegex != 0 && rootRegex->caseInsensitive ) {
2110 			if ( lowKey <= 'Z' && 'A' <= highKey ) {
2111 				Key otherLow = lowKey < 'A' ? Key('A') : lowKey;
2112 				Key otherHigh = 'Z' < highKey ? Key('Z') : highKey;
2113 
2114 				otherLow = 'a' + ( otherLow - 'A' );
2115 				otherHigh = 'a' + ( otherHigh - 'A' );
2116 
2117 				FsmAp *otherRange = new FsmAp();
2118 				otherRange->rangeFsm( otherLow, otherHigh );
2119 				rtnVal->unionOp( otherRange );
2120 				rtnVal->minimizePartition2();
2121 			}
2122 			else if ( lowKey <= 'z' && 'a' <= highKey ) {
2123 				Key otherLow = lowKey < 'a' ? Key('a') : lowKey;
2124 				Key otherHigh = 'z' < highKey ? Key('z') : highKey;
2125 
2126 				otherLow = 'A' + ( otherLow - 'a' );
2127 				otherHigh = 'A' + ( otherHigh - 'a' );
2128 
2129 				FsmAp *otherRange = new FsmAp();
2130 				otherRange->rangeFsm( otherLow, otherHigh );
2131 				rtnVal->unionOp( otherRange );
2132 				rtnVal->minimizePartition2();
2133 			}
2134 		}
2135 
2136 		break;
2137 	}}
2138 	return rtnVal;
2139 }
2140