1 /* 2 3 This file is part of the Maude 2 interpreter. 4 5 Copyright 1997-2003 SRI International, Menlo Park, CA 94025, USA. 6 7 This program is free software; you can redistribute it and/or modify 8 it under the terms of the GNU General Public License as published by 9 the Free Software Foundation; either version 2 of the License, or 10 (at your option) any later version. 11 12 This program is distributed in the hope that it will be useful, 13 but WITHOUT ANY WARRANTY; without even the implied warranty of 14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 GNU General Public License for more details. 16 17 You should have received a copy of the GNU General Public License 18 along with this program; if not, write to the Free Software 19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA. 20 21 */ 22 23 /************************************************ 24 ** SCP Parsing Algorithm ** 25 ** Maude Implementation ** 26 ** Jose F. Quesada ** 27 ** CSL - SRI Int. 1998 ** 28 ************************************************/ 29 30 /******* 31 ******* scp_parser.hh 32 *******/ 33 34 #ifndef _scp_parser_hh_ 35 #define _scp_parser_hh_ 36 37 #include "macros.hh" 38 #include "vector.hh" 39 #include "string.h" 40 41 #include "scp_kernel.hh" 42 43 class ScpParser 44 { 45 class ScpBaseNT* membasent; 46 int lenbasent; 47 int maxbasent; 48 class ScpAbslNT* memabslnt; 49 int lenabslnt; 50 int maxabslnt; 51 int* ldgrabslnt; 52 int* rdgrabslnt; 53 int maxnontm; 54 class ScpProdtn* memprodtn; 55 int lenprodtn; 56 int curprodtn; 57 int* ldgrprodtn; 58 int* rdgrprodtn; 59 int* memrhs; 60 int lenrhs; 61 int currhs; 62 int maxtermn; 63 int* ldgrtermn; 64 int* rdgrtermn; 65 class ScpBubble* membubble; 66 int lenbubble; 67 int curbubble; 68 int* memexcept; 69 int lenexcept; 70 int curexcept; 71 int compiled; 72 int* addrtermn; 73 int* addrnontm; 74 int* covtbl; 75 int* covnttb; 76 int* covtmtb; 77 int* ldertmtb; /* same as covtmtb */ 78 int* ldernttb; 79 int* rdernttb; 80 int* invrdernttb; 81 char* adjtb; 82 char* bubexcept; 83 class ScpCov* memcov; 84 class ScpCov* bubcov; 85 86 int* nodeterm; 87 int* eventterm; 88 int* inputterm; 89 int maxlenterm; 90 int lengthterm; 91 int rootterm; 92 int ambnodes; 93 int rootnode; 94 95 class ScpEvent* memevent; 96 int lenevent; 97 int curevent; 98 99 class ScpNode* memnode; 100 int lennode; 101 int curnode; 102 103 class ScpAnal* memanal; 104 int lenanal; 105 int curanal; 106 int prevnextanal; 107 108 int* memdetect; 109 int* typedetect; 110 int curdetect; 111 int lendetect; 112 int lasttokendetect; 113 int prevlasttokendetect; 114 int* errorlist; 115 int* errorlevel; 116 int curerrorlist; 117 char* memalter; 118 int lenalter; 119 int curalter; 120 121 int* memprodruntoken; 122 int* memprodrunnode; 123 124 int* memambnode; 125 int prevambnode; 126 127 int parseErrorRecover; 128 129 /* MEMRHS must be at least 2 */ 130 /* MEMPRODTN must be at least 2 */ 131 enum { MEMPRODTN = 1000, ADDPRODTN = 1000, 132 MEMBASENT = 200, ADDBASENT = 200, 133 MEMABSLNT = 400, ADDABSLNT = 400, 134 MEMRHS = 5000, ADDRHS = 5000, 135 MEMBUBBLE = 10, ADDBUBBLE = 10, 136 MEMBUBCOV = 100, ADDBUBCOV = 100, 137 MEMEXCEPT = 50, ADDEXCEPT = 50, 138 MEMTERMN = 500, ADDTERMN = 500, 139 MEMLENTERM= 100, ADDLENTERM= 100, 140 MEMEVENT = 500, ADDEVENT = 500, 141 MEMNODE = 500, ADDNODE = 500, 142 MEMANAL = 500, ADDANAL = 500, 143 MEMDETECT = 50, ADDDETECT = 50, 144 ADDALTER = 10}; 145 // insertProd 146 int insertBasePrecNT(int,int); 147 int getBaseNT(int); 148 int getPrecNT(int); 149 150 // compileParser 151 void compileDerivationTables(); 152 void compileLDerGraph(); 153 void compileLDerTable(); 154 void insertLDerNT(int); 155 void compileRDerGraph(); 156 void compileRDerTable(); 157 void insertRDerNT(int); 158 void compileCoverageTables(); 159 void compileAdjacencyTables(); 160 void insertAdjTM(int,int); 161 void insertAdjNT(int,int); 162 163 // parseSentence 164 void tryEvent(int,int,int,int,int,int); 165 void runToken(int); 166 void runTokenBubble(int); 167 void tryNode(int,int,int,int,int); 168 void runNode(int); 169 void runBubble(int,int,int,int,int,int=0); 170 void initAnalysis(); 171 int skipAnalysis(int); 172 int setAmbNode(int); 173 void printErrorDetect(); 174 175 // Error Detection and Recovery 176 void errorDetect(int,int,int = 0,int = 0); 177 void errorRecovery(int = 0); 178 179 enum { STANDARD_ERROR = 0, 180 EXCEPTION_ERROR = 1, 181 CLOSEPAR_ERROR = 2, 182 MAXERRORLEVEL = 10}; 183 184 // printSentence 185 void printAnalNodes(int); 186 char* printSymbol(int); 187 188 189 public: 190 191 // ScpParser 192 // creates a new parser 193 ScpParser(); 194 195 // ~ScpParser 196 // deletes a previously created parser 197 ~ScpParser(); 198 199 // insertProd (rule version) 200 // inserts a new production in a previously created parser 201 void insertProd (int xlhs, const Vector<int>& xrhs, 202 int xprec,const Vector<int>& xgather); 203 void compileParser(); 204 205 // insertProd (bubble version) 206 // indicates to the parser that a symbol is of type bubble, 207 // and specifies its properties 208 // Description: 209 // nonTerminal: must be a negative number, 210 // it is not necessary de define previously nonTerminal 211 // lowBound: a natural number (0 to INT_MAX) 212 // 0 means that the bubble may be empty 213 // upperBound: an integer number 214 // if lowBound == upperBound then the Bubble has 215 // a fixed length and is not checked with the grammar 216 // to define a bubble without a fixed maximum length, 217 // use upperBound = -1 218 // leftParenToken: 219 // 0-N : the token number associated with the 220 // left parenthesis 221 // -1 : the bubble doesn't use parenthesis 222 // rightParenToken: similar to leftParenToken 223 // 224 // 225 void insertProd(int nonTerminal, 226 int lowBound, 227 int upperBound, 228 int leftParenToken, 229 int rightParenToken, 230 const Vector<int>& exceptionTokenList ); 231 232 // parseSentence 233 // returns the number of analysis: 234 // 0 => ungrammatical input (parsing failure) 235 // 1 => grammatical and non-ambiguous input (1) 236 // N>1 => grammatical and ambiguous input 237 // double parseSentence(Vector<int>& term,int root); 238 int parseSentence(Vector<int>& term,int root,int errorRecover=0); 239 240 // getRootNode 241 // returns the index of the root node 242 // -1 => there isn't a root node (ungrammatical input) 243 // N>=0 => the index of the root node 244 int getRootNode(); 245 246 // getProductionNumber(int node) 247 // returns the production index which created the node 248 // -1 => if the node is a token, or the node index 249 // is incorrect 250 // N>=0 => the production index 251 int getProductionNumber(int node); 252 253 // getFirstPosition(int node) 254 // returns the position of the left-most token 255 // covered by node 256 // -1 => no active parser or incorrect node index 257 // N>=0 => token position 258 int getFirstPosition(int node); 259 260 // getLastPosition(int node) 261 // returns the position of the right-most token 262 // covered by node 263 // -1 => no active parser or incorrect node index 264 // N>=0 => token position 265 int getLastPosition(int node); 266 267 // getNumberOfChildren(int node) 268 // returns the number of Non-Terminal children of node 269 // that is, the length of the gather of the 270 // production taht created the node 271 // -1 => no active parser, or incorrect node 272 // N>=0 => number of NT children (maybe = 0) 273 int getNumberOfChildren(int node); 274 275 // getChild(int node,int childnumber) 276 // returns the index of the node corresponding to 277 // the position indicated by childnumber 278 // This function only considers NonTerminal children 279 // The numeration of children begins with 0 280 // That is, the set of possible children goes 281 // from 0 to getNumberOfNTChildren - 1 282 // -1 => something wrong 283 // N>=0 => node index 284 int getChild(int node,int childnumber); 285 286 // nextAnalysis() 287 // changes the configuration of the parser to the 288 // next parser 289 // -1 => there is no active parser 290 // 0 => there isn't more analysis 291 // 1 => the parser has changed the configuration 292 int nextAnalysis(); 293 294 // freeCurrentParse() 295 // frees the memory used by the current parse 296 void freeCurrentParse(); 297 298 299 // getNumberOfErrors() 300 // when parseSentence() returns 0, getNumberOfErrors() 301 // obtains the total number of errors that the error 302 // detection and error recovery have been able to detect 303 // Errors are numbered beginning with 1 304 int getNumberOfErrors(); 305 306 // getErrorPosition(int errornumber) 307 // for each value errorn in 1 - getNumberOfErrors(), the function 308 // getErrorPosition(errorn) obtains the position where the error 309 // has appeared. Positions are numbered beginning with 0. So, 310 // for an input term of length L, the usual positions of errors 311 // are 0 - (L-1). Nevertheless, getErrorPosition() may return L, 312 // which means that the error has appeared at the end of the 313 // input: premature end. 314 // The function will return -1 is the argument (errornumber) 315 // isn't in the interval 1 - getNumberOfErrors() 316 // 317 int getErrorPosition(int); 318 319 // The error detection strategy includes three kinds of errors. 320 // 1.- Standard Errors 321 // 2.- Exception Errors (inside Bubbles) 322 // 3.- CloseParenthesis Errors (inside Bubbles) 323 // As it is possible that more than just one type of error appears 324 // at one position, the system distinguishes three functions: 325 326 // getAlternativesStandardError(int errornumber) 327 // returns the set of alternatives (tokens) that may appear 328 // at the position of the error 329 Vector<int> getAlternativesStandardError(int errornumber); 330 331 // getBubblesExceptionError(int errornumber) 332 // returns the set of bubbles (non-terminals) that have failed 333 // at that position due to an exception error 334 Vector<int> getBubblesExceptionError(int errornumber); 335 336 // getBubblesCloseParenthesisError(int errornumber) 337 // returns the set of bubbles (non-terminals) that have failed 338 // at that position due to an unbalanced close parenthesis 339 Vector<int> getBubblesCloseParenthesisError(int errornumber); 340 341 // Notes: 342 // 1.- If the three functions return 0-length vectors, this means 343 // that the input must end at the position of the error 344 345 // Auxiliar Functions 346 void printGrammar(); 347 void printSentence(); 348 int getLHSProduction(int); 349 350 void printCurrentParse(); 351 void printNode(int); 352 }; 353 354 typedef ScpParser Parser; 355 356 #endif 357