1 /* 2 * Copyright 2001-2008 Adrian Thurston <thurston@complang.org> 3 */ 4 5 /* This file is part of Ragel. 6 * 7 * Ragel is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * Ragel is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with Ragel; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 */ 21 22 #ifndef _PARSEDATA_H 23 #define _PARSEDATA_H 24 25 #include <iostream> 26 #include <limits.h> 27 #include <sstream> 28 #include "avlmap.h" 29 #include "bstmap.h" 30 #include "vector.h" 31 #include "dlist.h" 32 #include "fsmgraph.h" 33 #include "compare.h" 34 #include "vector.h" 35 #include "common.h" 36 #include "parsetree.h" 37 38 /* Forwards. */ 39 using std::ostream; 40 41 struct VarDef; 42 struct Join; 43 struct Expression; 44 struct Term; 45 struct FactorWithAug; 46 struct FactorWithLabel; 47 struct FactorWithRep; 48 struct FactorWithNeg; 49 struct Factor; 50 struct Literal; 51 struct Range; 52 struct RegExpr; 53 struct ReItem; 54 struct ReOrBlock; 55 struct ReOrItem; 56 struct LongestMatch; 57 struct InputData; 58 struct CodeGenData; 59 typedef DList<LongestMatch> LmList; 60 61 62 /* Graph dictionary. */ 63 struct GraphDictEl 64 : 65 public AvlTreeEl<GraphDictEl>, 66 public DListEl<GraphDictEl> 67 { GraphDictElGraphDictEl68 GraphDictEl( const char *k ) 69 : key(k), value(0), isInstance(false) { } GraphDictElGraphDictEl70 GraphDictEl( const char *k, VarDef *value ) 71 : key(k), value(value), isInstance(false) { } 72 getKeyGraphDictEl73 const char *getKey() { return key; } 74 75 const char *key; 76 VarDef *value; 77 bool isInstance; 78 79 /* Location info of graph definition. Points to variable name of assignment. */ 80 InputLoc loc; 81 }; 82 83 typedef AvlTree<GraphDictEl, const char*, CmpStr> GraphDict; 84 typedef DList<GraphDictEl> GraphList; 85 86 /* Priority name dictionary. */ 87 typedef AvlMapEl<char*, int> PriorDictEl; 88 typedef AvlMap<char*, int, CmpStr> PriorDict; 89 90 /* Local error name dictionary. */ 91 typedef AvlMapEl<const char*, int> LocalErrDictEl; 92 typedef AvlMap<const char*, int, CmpStr> LocalErrDict; 93 94 /* Tree of instantiated names. */ 95 typedef BstMapEl<const char*, NameInst*> NameMapEl; 96 typedef BstMap<const char*, NameInst*, CmpStr> NameMap; 97 typedef Vector<NameInst*> NameVect; 98 typedef BstSet<NameInst*> NameSet; 99 100 /* Node in the tree of instantiated names. */ 101 struct NameInst 102 { NameInstNameInst103 NameInst( const InputLoc &loc, NameInst *parent, const char *name, int id, bool isLabel ) : 104 loc(loc), parent(parent), name(name), id(id), isLabel(isLabel), 105 isLongestMatch(false), numRefs(0), numUses(0), start(0), final(0) {} 106 107 InputLoc loc; 108 109 /* Keep parent pointers in the name tree to retrieve 110 * fully qulified names. */ 111 NameInst *parent; 112 113 const char *name; 114 int id; 115 bool isLabel; 116 bool isLongestMatch; 117 118 int numRefs; 119 int numUses; 120 121 /* Names underneath us, excludes anonymous names. */ 122 NameMap children; 123 124 /* All names underneath us in order of appearance. */ 125 NameVect childVect; 126 127 /* Join scopes need an implicit "final" target. */ 128 NameInst *start, *final; 129 130 /* During a fsm generation walk, lists the names that are referenced by 131 * epsilon operations in the current scope. After the link is made by the 132 * epsilon reference and the join operation is complete, the label can 133 * have its refcount decremented. Once there are no more references the 134 * entry point can be removed from the fsm returned. */ 135 NameVect referencedNames; 136 137 /* Pointers for the name search queue. */ 138 NameInst *prev, *next; 139 140 /* Check if this name inst or any name inst below is referenced. */ 141 bool anyRefsRec(); 142 }; 143 144 typedef DList<NameInst> NameInstList; 145 146 /* Stack frame used in walking the name tree. */ 147 struct NameFrame 148 { 149 NameInst *prevNameInst; 150 int prevNameChild; 151 NameInst *prevLocalScope; 152 }; 153 154 struct LengthDef 155 { LengthDefLengthDef156 LengthDef( char *name ) 157 : name(name) {} 158 159 char *name; 160 LengthDef *prev, *next; 161 }; 162 163 typedef DList<LengthDef> LengthDefList; 164 165 /* Class to collect information about the machine during the 166 * parse of input. */ 167 struct ParseData 168 { 169 /* Create a new parse data object. This is done at the beginning of every 170 * fsm specification. */ 171 ParseData( const char *fileName, char *sectionName, const InputLoc §ionLoc ); 172 ~ParseData(); 173 174 /* 175 * Setting up the graph dict. 176 */ 177 178 /* Initialize a graph dict with the basic fsms. */ 179 void initGraphDict(); 180 void createBuiltin( const char *name, BuiltinMachine builtin ); 181 182 /* Make a name id in the current name instantiation scope if it is not 183 * already there. */ 184 NameInst *addNameInst( const InputLoc &loc, const char *data, bool isLabel ); 185 void makeRootNames(); 186 void makeNameTree( GraphDictEl *gdNode ); 187 void makeExportsNameTree(); 188 void fillNameIndex( NameInst *from ); 189 void printNameTree(); 190 191 /* Increments the usage count on entry names. Names that are no longer 192 * needed will have their entry points unset. */ 193 void unsetObsoleteEntries( FsmAp *graph ); 194 195 /* Resove name references in action code and epsilon transitions. */ 196 NameSet resolvePart( NameInst *refFrom, const char *data, bool recLabelsOnly ); 197 void resolveFrom( NameSet &result, NameInst *refFrom, 198 const NameRef &nameRef, int namePos ); 199 NameInst *resolveStateRef( const NameRef &nameRef, InputLoc &loc, Action *action ); 200 void resolveNameRefs( InlineList *inlineList, Action *action ); 201 void resolveActionNameRefs(); 202 203 /* Set the alphabet type. If type types are not valid returns false. */ 204 bool setAlphType( const InputLoc &loc, char *s1, char *s2 ); 205 bool setAlphType( const InputLoc &loc, char *s1 ); 206 207 /* Override one of the variables ragel uses. */ 208 bool setVariable( char *var, InlineList *inlineList ); 209 210 /* Unique actions. */ 211 void removeDups( ActionTable &actionTable ); 212 void removeActionDups( FsmAp *graph ); 213 214 /* Dumping the name instantiation tree. */ 215 void printNameInst( NameInst *nameInst, int level ); 216 217 /* Make the graph from a graph dict node. Does minimization. */ 218 FsmAp *makeInstance( GraphDictEl *gdNode ); 219 FsmAp *makeSpecific( GraphDictEl *gdNode ); 220 FsmAp *makeAll(); 221 222 /* Checking the contents of actions. */ 223 void checkAction( Action *action ); 224 void checkInlineList( Action *act, InlineList *inlineList ); 225 226 void analyzeAction( Action *action, InlineList *inlineList ); 227 void analyzeGraph( FsmAp *graph ); 228 void makeExports(); 229 230 void prepareMachineGen( GraphDictEl *graphDictEl ); 231 void prepareMachineGenTBWrapped( GraphDictEl *graphDictEl ); 232 void generateXML( ostream &out ); 233 void generateReduced( InputData &inputData ); 234 FsmAp *sectionGraph; 235 bool generatingSectionSubset; 236 237 void initKeyOps(); 238 239 /* 240 * Data collected during the parse. 241 */ 242 243 /* Dictionary of graphs. Both instances and non-instances go here. */ 244 GraphDict graphDict; 245 246 /* The list of instances. */ 247 GraphList instanceList; 248 249 /* Dictionary of actions. Lets actions be defined and then referenced. */ 250 ActionDict actionDict; 251 252 /* Dictionary of named priorities. */ 253 PriorDict priorDict; 254 255 /* Dictionary of named local errors. */ 256 LocalErrDict localErrDict; 257 258 /* List of actions. Will be pasted into a switch statement. */ 259 ActionList actionList; 260 261 /* The id of the next priority name and label. */ 262 int nextPriorKey, nextLocalErrKey, nextNameId, nextCondId; 263 264 /* The default priority number key for a machine. This is active during 265 * the parse of the rhs of a machine assignment. */ 266 int curDefPriorKey; 267 268 int curDefLocalErrKey; 269 270 /* Alphabet type. */ 271 HostType *userAlphType; 272 bool alphTypeSet; 273 InputLoc alphTypeLoc; 274 275 /* Element type and get key expression. */ 276 InlineList *getKeyExpr; 277 InlineList *accessExpr; 278 279 /* Stack management */ 280 InlineList *prePushExpr; 281 InlineList *postPopExpr; 282 283 /* Overriding variables. */ 284 InlineList *pExpr; 285 InlineList *peExpr; 286 InlineList *eofExpr; 287 InlineList *csExpr; 288 InlineList *topExpr; 289 InlineList *stackExpr; 290 InlineList *actExpr; 291 InlineList *tokstartExpr; 292 InlineList *tokendExpr; 293 InlineList *dataExpr; 294 295 /* The alphabet range. */ 296 char *lowerNum, *upperNum; 297 Key lowKey, highKey; 298 InputLoc rangeLowLoc, rangeHighLoc; 299 300 /* The name of the file the fsm is from, and the spec name. */ 301 const char *fileName; 302 char *sectionName; 303 InputLoc sectionLoc; 304 305 /* Counting the action and priority ordering. */ 306 int curActionOrd; 307 int curPriorOrd; 308 309 /* Root of the name tree. One root is for the instantiated machines. The 310 * other root is for exported definitions. */ 311 NameInst *rootName; 312 NameInst *exportsRootName; 313 314 /* Name tree walking. */ 315 NameInst *curNameInst; 316 int curNameChild; 317 318 /* The place where resolved epsilon transitions go. These cannot go into 319 * the parse tree because a single epsilon op can resolve more than once 320 * to different nameInsts if the machine it's in is used more than once. */ 321 NameVect epsilonResolvedLinks; 322 int nextEpsilonResolvedLink; 323 324 /* Root of the name tree used for doing local name searches. */ 325 NameInst *localNameScope; 326 327 void setLmInRetLoc( InlineList *inlineList ); 328 void initLongestMatchData(); 329 void setLongestMatchData( FsmAp *graph ); 330 void initNameWalk(); 331 void initExportsNameWalk(); nextNameScopeParseData332 NameInst *nextNameScope() { return curNameInst->childVect[curNameChild]; } 333 NameFrame enterNameScope( bool isLocal, int numScopes ); 334 void popNameScope( const NameFrame &frame ); 335 void resetNameScope( const NameFrame &frame ); 336 337 /* Make name ids to name inst pointers. */ 338 NameInst **nameIndex; 339 340 /* Counter for assigning ids to longest match items. */ 341 int nextLongestMatchId; 342 bool lmRequiresErrorState; 343 344 /* List of all longest match parse tree items. */ 345 LmList lmList; 346 347 Action *newAction( const char *name, InlineList *inlineList ); 348 349 Action *initTokStart; 350 int initTokStartOrd; 351 352 Action *setTokStart; 353 int setTokStartOrd; 354 355 Action *initActId; 356 int initActIdOrd; 357 358 Action *setTokEnd; 359 int setTokEndOrd; 360 beginProcessingParseData361 void beginProcessing() 362 { 363 ::condData = &thisCondData; 364 ::keyOps = &thisKeyOps; 365 } 366 367 CondData thisCondData; 368 KeyOps thisKeyOps; 369 370 ExportList exportList; 371 LengthDefList lengthDefList; 372 373 CodeGenData *cgd; 374 }; 375 376 void afterOpMinimize( FsmAp *fsm, bool lastInSeq = true ); 377 Key makeFsmKeyHex( char *str, const InputLoc &loc, ParseData *pd ); 378 Key makeFsmKeyDec( char *str, const InputLoc &loc, ParseData *pd ); 379 Key makeFsmKeyNum( char *str, const InputLoc &loc, ParseData *pd ); 380 Key makeFsmKeyChar( char c, ParseData *pd ); 381 void makeFsmKeyArray( Key *result, char *data, int len, ParseData *pd ); 382 void makeFsmUniqueKeyArray( KeySet &result, char *data, int len, 383 bool caseInsensitive, ParseData *pd ); 384 FsmAp *makeBuiltin( BuiltinMachine builtin, ParseData *pd ); 385 FsmAp *dotFsm( ParseData *pd ); 386 FsmAp *dotStarFsm( ParseData *pd ); 387 388 void errorStateLabels( const NameSet &locations ); 389 390 391 #endif 392