1 /*
2  *  Copyright 2001-2008 Adrian Thurston <thurston@complang.org>
3  */
4 
5 /*  This file is part of Ragel.
6  *
7  *  Ragel is free software; you can redistribute it and/or modify
8  *  it under the terms of the GNU General Public License as published by
9  *  the Free Software Foundation; either version 2 of the License, or
10  *  (at your option) any later version.
11  *
12  *  Ragel is distributed in the hope that it will be useful,
13  *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14  *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  *  GNU General Public License for more details.
16  *
17  *  You should have received a copy of the GNU General Public License
18  *  along with Ragel; if not, write to the Free Software
19  *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
20  */
21 
22 #ifndef _PARSEDATA_H
23 #define _PARSEDATA_H
24 
25 #include <iostream>
26 #include <limits.h>
27 #include <sstream>
28 #include "avlmap.h"
29 #include "bstmap.h"
30 #include "vector.h"
31 #include "dlist.h"
32 #include "fsmgraph.h"
33 #include "compare.h"
34 #include "vector.h"
35 #include "common.h"
36 #include "parsetree.h"
37 
38 /* Forwards. */
39 using std::ostream;
40 
41 struct VarDef;
42 struct Join;
43 struct Expression;
44 struct Term;
45 struct FactorWithAug;
46 struct FactorWithLabel;
47 struct FactorWithRep;
48 struct FactorWithNeg;
49 struct Factor;
50 struct Literal;
51 struct Range;
52 struct RegExpr;
53 struct ReItem;
54 struct ReOrBlock;
55 struct ReOrItem;
56 struct LongestMatch;
57 struct InputData;
58 struct CodeGenData;
59 typedef DList<LongestMatch> LmList;
60 
61 
62 /* Graph dictionary. */
63 struct GraphDictEl
64 :
65 	public AvlTreeEl<GraphDictEl>,
66 	public DListEl<GraphDictEl>
67 {
GraphDictElGraphDictEl68 	GraphDictEl( const char *k )
69 		: key(k), value(0), isInstance(false) { }
GraphDictElGraphDictEl70 	GraphDictEl( const char *k, VarDef *value )
71 		: key(k), value(value), isInstance(false) { }
72 
getKeyGraphDictEl73 	const char *getKey() { return key; }
74 
75 	const char *key;
76 	VarDef *value;
77 	bool isInstance;
78 
79 	/* Location info of graph definition. Points to variable name of assignment. */
80 	InputLoc loc;
81 };
82 
83 typedef AvlTree<GraphDictEl, const char*, CmpStr> GraphDict;
84 typedef DList<GraphDictEl> GraphList;
85 
86 /* Priority name dictionary. */
87 typedef AvlMapEl<char*, int> PriorDictEl;
88 typedef AvlMap<char*, int, CmpStr> PriorDict;
89 
90 /* Local error name dictionary. */
91 typedef AvlMapEl<const char*, int> LocalErrDictEl;
92 typedef AvlMap<const char*, int, CmpStr> LocalErrDict;
93 
94 /* Tree of instantiated names. */
95 typedef BstMapEl<const char*, NameInst*> NameMapEl;
96 typedef BstMap<const char*, NameInst*, CmpStr> NameMap;
97 typedef Vector<NameInst*> NameVect;
98 typedef BstSet<NameInst*> NameSet;
99 
100 /* Node in the tree of instantiated names. */
101 struct NameInst
102 {
NameInstNameInst103 	NameInst( const InputLoc &loc, NameInst *parent, const char *name, int id, bool isLabel ) :
104 		loc(loc), parent(parent), name(name), id(id), isLabel(isLabel),
105 		isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {}
106 
107 	InputLoc loc;
108 
109 	/* Keep parent pointers in the name tree to retrieve
110 	 * fully qulified names. */
111 	NameInst *parent;
112 
113 	const char *name;
114 	int id;
115 	bool isLabel;
116 	bool isLongestMatch;
117 
118 	int numRefs;
119 	int numUses;
120 
121 	/* Names underneath us, excludes anonymous names. */
122 	NameMap children;
123 
124 	/* All names underneath us in order of appearance. */
125 	NameVect childVect;
126 
127 	/* Join scopes need an implicit "final" target. */
128 	NameInst *start, *final;
129 
130 	/* During a fsm generation walk, lists the names that are referenced by
131 	 * epsilon operations in the current scope. After the link is made by the
132 	 * epsilon reference and the join operation is complete, the label can
133 	 * have its refcount decremented. Once there are no more references the
134 	 * entry point can be removed from the fsm returned. */
135 	NameVect referencedNames;
136 
137 	/* Pointers for the name search queue. */
138 	NameInst *prev, *next;
139 
140 	/* Check if this name inst or any name inst below is referenced. */
141 	bool anyRefsRec();
142 };
143 
144 typedef DList<NameInst> NameInstList;
145 
146 /* Stack frame used in walking the name tree. */
147 struct NameFrame
148 {
149 	NameInst *prevNameInst;
150 	int prevNameChild;
151 	NameInst *prevLocalScope;
152 };
153 
154 struct LengthDef
155 {
LengthDefLengthDef156 	LengthDef( char *name )
157 		: name(name) {}
158 
159 	char *name;
160 	LengthDef *prev, *next;
161 };
162 
163 typedef DList<LengthDef> LengthDefList;
164 
165 /* Class to collect information about the machine during the
166  * parse of input. */
167 struct ParseData
168 {
169 	/* Create a new parse data object. This is done at the beginning of every
170 	 * fsm specification. */
171 	ParseData( const char *fileName, char *sectionName, const InputLoc &sectionLoc );
172 	~ParseData();
173 
174 	/*
175 	 * Setting up the graph dict.
176 	 */
177 
178 	/* Initialize a graph dict with the basic fsms. */
179 	void initGraphDict();
180 	void createBuiltin( const char *name, BuiltinMachine builtin );
181 
182 	/* Make a name id in the current name instantiation scope if it is not
183 	 * already there. */
184 	NameInst *addNameInst( const InputLoc &loc, const char *data, bool isLabel );
185 	void makeRootNames();
186 	void makeNameTree( GraphDictEl *gdNode );
187 	void makeExportsNameTree();
188 	void fillNameIndex( NameInst *from );
189 	void printNameTree();
190 
191 	/* Increments the usage count on entry names. Names that are no longer
192 	 * needed will have their entry points unset. */
193 	void unsetObsoleteEntries( FsmAp *graph );
194 
195 	/* Resove name references in action code and epsilon transitions. */
196 	NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly );
197 	void resolveFrom( NameSet &result, NameInst *refFrom,
198 			const NameRef &nameRef, int namePos );
199 	NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action );
200 	void resolveNameRefs( InlineList *inlineList, Action *action );
201 	void resolveActionNameRefs();
202 
203 	/* Set the alphabet type. If type types are not valid returns false. */
204 	bool setAlphType( const InputLoc &loc, char *s1, char *s2 );
205 	bool setAlphType( const InputLoc &loc, char *s1 );
206 
207 	/* Override one of the variables ragel uses. */
208 	bool setVariable( char *var, InlineList *inlineList );
209 
210 	/* Unique actions. */
211 	void removeDups( ActionTable &actionTable );
212 	void removeActionDups( FsmAp *graph );
213 
214 	/* Dumping the name instantiation tree. */
215 	void printNameInst( NameInst *nameInst, int level );
216 
217 	/* Make the graph from a graph dict node. Does minimization. */
218 	FsmAp *makeInstance( GraphDictEl *gdNode );
219 	FsmAp *makeSpecific( GraphDictEl *gdNode );
220 	FsmAp *makeAll();
221 
222 	/* Checking the contents of actions. */
223 	void checkAction( Action *action );
224 	void checkInlineList( Action *act, InlineList *inlineList );
225 
226 	void analyzeAction( Action *action, InlineList *inlineList );
227 	void analyzeGraph( FsmAp *graph );
228 	void makeExports();
229 
230 	void prepareMachineGen( GraphDictEl *graphDictEl );
231 	void prepareMachineGenTBWrapped( GraphDictEl *graphDictEl );
232 	void generateXML( ostream &out );
233 	void generateReduced( InputData &inputData );
234 	FsmAp *sectionGraph;
235 	bool generatingSectionSubset;
236 
237 	void initKeyOps();
238 
239 	/*
240 	 * Data collected during the parse.
241 	 */
242 
243 	/* Dictionary of graphs. Both instances and non-instances go here. */
244 	GraphDict graphDict;
245 
246 	/* The list of instances. */
247 	GraphList instanceList;
248 
249 	/* Dictionary of actions. Lets actions be defined and then referenced. */
250 	ActionDict actionDict;
251 
252 	/* Dictionary of named priorities. */
253 	PriorDict priorDict;
254 
255 	/* Dictionary of named local errors. */
256 	LocalErrDict localErrDict;
257 
258 	/* List of actions. Will be pasted into a switch statement. */
259 	ActionList actionList;
260 
261 	/* The id of the next priority name and label. */
262 	int nextPriorKey, nextLocalErrKey, nextNameId, nextCondId;
263 
264 	/* The default priority number key for a machine. This is active during
265 	 * the parse of the rhs of a machine assignment. */
266 	int curDefPriorKey;
267 
268 	int curDefLocalErrKey;
269 
270 	/* Alphabet type. */
271 	HostType *userAlphType;
272 	bool alphTypeSet;
273 	InputLoc alphTypeLoc;
274 
275 	/* Element type and get key expression. */
276 	InlineList *getKeyExpr;
277 	InlineList *accessExpr;
278 
279 	/* Stack management */
280 	InlineList *prePushExpr;
281 	InlineList *postPopExpr;
282 
283 	/* Overriding variables. */
284 	InlineList *pExpr;
285 	InlineList *peExpr;
286 	InlineList *eofExpr;
287 	InlineList *csExpr;
288 	InlineList *topExpr;
289 	InlineList *stackExpr;
290 	InlineList *actExpr;
291 	InlineList *tokstartExpr;
292 	InlineList *tokendExpr;
293 	InlineList *dataExpr;
294 
295 	/* The alphabet range. */
296 	char *lowerNum, *upperNum;
297 	Key lowKey, highKey;
298 	InputLoc rangeLowLoc, rangeHighLoc;
299 
300 	/* The name of the file the fsm is from, and the spec name. */
301 	const char *fileName;
302 	char *sectionName;
303 	InputLoc sectionLoc;
304 
305 	/* Counting the action and priority ordering. */
306 	int curActionOrd;
307 	int curPriorOrd;
308 
309 	/* Root of the name tree. One root is for the instantiated machines. The
310 	 * other root is for exported definitions. */
311 	NameInst *rootName;
312 	NameInst *exportsRootName;
313 
314 	/* Name tree walking. */
315 	NameInst *curNameInst;
316 	int curNameChild;
317 
318 	/* The place where resolved epsilon transitions go. These cannot go into
319 	 * the parse tree because a single epsilon op can resolve more than once
320 	 * to different nameInsts if the machine it's in is used more than once. */
321 	NameVect epsilonResolvedLinks;
322 	int nextEpsilonResolvedLink;
323 
324 	/* Root of the name tree used for doing local name searches. */
325 	NameInst *localNameScope;
326 
327 	void setLmInRetLoc( InlineList *inlineList );
328 	void initLongestMatchData();
329 	void setLongestMatchData( FsmAp *graph );
330 	void initNameWalk();
331 	void initExportsNameWalk();
nextNameScopeParseData332 	NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; }
333 	NameFrame enterNameScope( bool isLocal, int numScopes );
334 	void popNameScope( const NameFrame &frame );
335 	void resetNameScope( const NameFrame &frame );
336 
337 	/* Make name ids to name inst pointers. */
338 	NameInst **nameIndex;
339 
340 	/* Counter for assigning ids to longest match items. */
341 	int nextLongestMatchId;
342 	bool lmRequiresErrorState;
343 
344 	/* List of all longest match parse tree items. */
345 	LmList lmList;
346 
347 	Action *newAction( const char *name, InlineList *inlineList );
348 
349 	Action *initTokStart;
350 	int initTokStartOrd;
351 
352 	Action *setTokStart;
353 	int setTokStartOrd;
354 
355 	Action *initActId;
356 	int initActIdOrd;
357 
358 	Action *setTokEnd;
359 	int setTokEndOrd;
360 
beginProcessingParseData361 	void beginProcessing()
362 	{
363 		::condData = &thisCondData;
364 		::keyOps = &thisKeyOps;
365 	}
366 
367 	CondData thisCondData;
368 	KeyOps thisKeyOps;
369 
370 	ExportList exportList;
371 	LengthDefList lengthDefList;
372 
373 	CodeGenData *cgd;
374 };
375 
376 void afterOpMinimize( FsmAp *fsm, bool lastInSeq = true );
377 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd );
378 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd );
379 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd );
380 Key makeFsmKeyChar( char c, ParseData *pd );
381 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd );
382 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len,
383 		bool caseInsensitive, ParseData *pd );
384 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd );
385 FsmAp *dotFsm( ParseData *pd );
386 FsmAp *dotStarFsm( ParseData *pd );
387 
388 void errorStateLabels( const NameSet &locations );
389 
390 
391 #endif
392