1 /*
2 
3     This file is part of the Maude 2 interpreter.
4 
5     Copyright 1997-2003 SRI International, Menlo Park, CA 94025, USA.
6 
7     This program is free software; you can redistribute it and/or modify
8     it under the terms of the GNU General Public License as published by
9     the Free Software Foundation; either version 2 of the License, or
10     (at your option) any later version.
11 
12     This program is distributed in the hope that it will be useful,
13     but WITHOUT ANY WARRANTY; without even the implied warranty of
14     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15     GNU General Public License for more details.
16 
17     You should have received a copy of the GNU General Public License
18     along with this program; if not, write to the Free Software
19     Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307, USA.
20 
21 */
22 
23 /************************************************
24  ** SCP Parsing Algorithm                      **
25  **  	Maude Implementation                   **
26  **		Jose F. Quesada                **
27  **		CSL - SRI Int.  1998           **
28  ************************************************/
29 
30 /*******
31  ******* scp_parser.hh
32  *******/
33 
34 #ifndef _scp_parser_hh_
35 #define _scp_parser_hh_
36 
37 #include "macros.hh"
38 #include "vector.hh"
39 #include "string.h"
40 
41 #include "scp_kernel.hh"
42 
43 class ScpParser
44 {
45   class ScpBaseNT*	membasent;
46   int			lenbasent;
47   int			maxbasent;
48   class ScpAbslNT*	memabslnt;
49   int			lenabslnt;
50   int			maxabslnt;
51   int*			ldgrabslnt;
52   int*			rdgrabslnt;
53   int                   maxnontm;
54   class ScpProdtn*	memprodtn;
55   int			lenprodtn;
56   int			curprodtn;
57   int*			ldgrprodtn;
58   int*			rdgrprodtn;
59   int*			memrhs;
60   int			lenrhs;
61   int			currhs;
62   int			maxtermn;
63   int*			ldgrtermn;
64   int*			rdgrtermn;
65   class ScpBubble*	membubble;
66   int			lenbubble;
67   int			curbubble;
68   int*			memexcept;
69   int			lenexcept;
70   int			curexcept;
71   int			compiled;
72   int*			addrtermn;
73   int*                  addrnontm;
74   int*			covtbl;
75   int*                  covnttb;
76   int*                  covtmtb;
77   int*                  ldertmtb; /* same as covtmtb */
78   int*			ldernttb;
79   int*                  rdernttb;
80   int*                  invrdernttb;
81   char*                 adjtb;
82   char*                 bubexcept;
83   class ScpCov*		memcov;
84   class ScpCov*         bubcov;
85 
86   int*                  nodeterm;
87   int*                  eventterm;
88   int*                  inputterm;
89   int                   maxlenterm;
90   int                   lengthterm;
91   int			rootterm;
92   int                   ambnodes;
93   int                   rootnode;
94 
95   class ScpEvent*	memevent;
96   int			lenevent;
97   int			curevent;
98 
99   class ScpNode*	memnode;
100   int			lennode;
101   int			curnode;
102 
103   class ScpAnal*        memanal;
104   int                   lenanal;
105   int                   curanal;
106   int                   prevnextanal;
107 
108   int*                  memdetect;
109   int*                  typedetect;
110   int                   curdetect;
111   int                   lendetect;
112   int                   lasttokendetect;
113   int                   prevlasttokendetect;
114   int*                  errorlist;
115   int*                  errorlevel;
116   int                   curerrorlist;
117   char*                 memalter;
118   int                   lenalter;
119   int                   curalter;
120 
121   int*                  memprodruntoken;
122   int*                  memprodrunnode;
123 
124   int*                  memambnode;
125   int                   prevambnode;
126 
127   int                   parseErrorRecover;
128 
129   /* MEMRHS must be at least 2 */
130   /* MEMPRODTN must be at least 2 */
131   enum { MEMPRODTN =   1000, ADDPRODTN =  1000,
132 	 MEMBASENT =    200, ADDBASENT =   200,
133 	 MEMABSLNT =    400, ADDABSLNT =   400,
134 	 MEMRHS    =   5000, ADDRHS    =  5000,
135 	 MEMBUBBLE =     10, ADDBUBBLE =    10,
136 	 MEMBUBCOV =    100, ADDBUBCOV =   100,
137 	 MEMEXCEPT =     50, ADDEXCEPT =    50,
138 	 MEMTERMN  =    500, ADDTERMN  =   500,
139          MEMLENTERM=    100, ADDLENTERM=   100,
140          MEMEVENT  =    500, ADDEVENT  =   500,
141          MEMNODE   =    500, ADDNODE   =   500,
142          MEMANAL   =    500, ADDANAL   =   500,
143          MEMDETECT =     50, ADDDETECT =    50,
144                              ADDALTER  =    10};
145   // insertProd
146   int insertBasePrecNT(int,int);
147   int getBaseNT(int);
148   int getPrecNT(int);
149 
150   // compileParser
151   void compileDerivationTables();
152   void compileLDerGraph();
153   void compileLDerTable();
154   void insertLDerNT(int);
155   void compileRDerGraph();
156   void compileRDerTable();
157   void insertRDerNT(int);
158   void compileCoverageTables();
159   void compileAdjacencyTables();
160   void insertAdjTM(int,int);
161   void insertAdjNT(int,int);
162 
163   // parseSentence
164   void tryEvent(int,int,int,int,int,int);
165   void runToken(int);
166   void runTokenBubble(int);
167   void tryNode(int,int,int,int,int);
168   void runNode(int);
169   void runBubble(int,int,int,int,int,int=0);
170   void initAnalysis();
171   int  skipAnalysis(int);
172   int  setAmbNode(int);
173   void printErrorDetect();
174 
175   // Error Detection and Recovery
176   void errorDetect(int,int,int = 0,int = 0);
177   void errorRecovery(int = 0);
178 
179   enum { STANDARD_ERROR = 0,
180 	 EXCEPTION_ERROR = 1,
181 	 CLOSEPAR_ERROR = 2,
182          MAXERRORLEVEL = 10};
183 
184   // printSentence
185   void printAnalNodes(int);
186   char* printSymbol(int);
187 
188 
189 public:
190 
191   //	ScpParser
192   //	   creates a new parser
193   ScpParser();
194 
195   //	~ScpParser
196   //	   deletes a previously created parser
197   ~ScpParser();
198 
199   //	insertProd	(rule version)
200   //	   inserts a new production in a previously created parser
201   void insertProd (int xlhs, const Vector<int>& xrhs,
202 		   int xprec,const Vector<int>& xgather);
203   void compileParser();
204 
205   //	insertProd	(bubble version)
206   //	   indicates to the parser that a symbol is of type bubble,
207   //		and specifies its properties
208   //	   Description:
209   //		nonTerminal: must be a negative number,
210   //			it is not necessary de define previously nonTerminal
211   //		lowBound: a natural number (0 to INT_MAX)
212   //			0 means that the bubble may be empty
213   //		upperBound: an integer number
214   //			if lowBound == upperBound then the Bubble has
215   //				a fixed length and is not checked with the grammar
216   //			to define a bubble without a fixed maximum length,
217   //				use upperBound = -1
218   //		leftParenToken:
219   //			0-N : the token number associated with the
220   //				left parenthesis
221   //			-1 : the bubble doesn't use parenthesis
222   //		rightParenToken: similar to leftParenToken
223   //
224   //
225   void insertProd(int nonTerminal,
226 		  int lowBound,
227 		  int upperBound,
228 		  int leftParenToken,
229 		  int rightParenToken,
230 		  const Vector<int>& exceptionTokenList );
231 
232   // 	parseSentence
233   //	    returns the number of analysis:
234   //		0 => ungrammatical input (parsing failure)
235   //		1 => grammatical and non-ambiguous input (1)
236   //		N>1 => grammatical and ambiguous input
237   // double parseSentence(Vector<int>& term,int root);
238   int parseSentence(Vector<int>& term,int root,int errorRecover=0);
239 
240   //	getRootNode
241   //	    returns the index of the root node
242   //		-1 => there isn't a root node (ungrammatical input)
243   //		N>=0 => the index of the root node
244   int getRootNode();
245 
246   //	getProductionNumber(int node)
247   //	    returns the production index which created the node
248   //		-1 => if the node is a token, or the node index
249   //				is incorrect
250   //		N>=0 => the production index
251   int getProductionNumber(int node);
252 
253   //	getFirstPosition(int node)
254   //	    returns the position of the left-most token
255   //	    covered by node
256   //		-1 => no active parser or incorrect node index
257   //		N>=0 => token position
258   int getFirstPosition(int node);
259 
260   //	getLastPosition(int node)
261   //	    returns the position of the right-most token
262   //	    covered by node
263   //		-1 => no active parser or incorrect node index
264   //		N>=0 => token position
265   int getLastPosition(int node);
266 
267   //	getNumberOfChildren(int node)
268   //	    returns the number of Non-Terminal children of node
269   //	    that is, the length of the gather of the
270   //	    production taht created the node
271   //		-1 => no active parser, or incorrect node
272   //		N>=0 => number of NT children (maybe = 0)
273   int getNumberOfChildren(int node);
274 
275   //	getChild(int node,int childnumber)
276   //	    returns the index of the node corresponding to
277   //	   	 the position indicated by childnumber
278   //	    This function only considers NonTerminal children
279   //	    The numeration of children begins with 0
280   //	    That is, the set of possible children goes
281   //	    from 0 to getNumberOfNTChildren - 1
282   //		-1 => something wrong
283   //		N>=0 => node index
284   int getChild(int node,int childnumber);
285 
286   //	nextAnalysis()
287   //	    changes the configuration of the parser to the
288   //	    next parser
289   //	       -1 => there is no active parser
290   //		0 => there isn't more analysis
291   //		1 => the parser has changed the configuration
292   int nextAnalysis();
293 
294   //	freeCurrentParse()
295   //	    frees the memory used by the current parse
296   void freeCurrentParse();
297 
298 
299   //    getNumberOfErrors()
300   //        when parseSentence() returns 0, getNumberOfErrors()
301   //        obtains the total number of errors that the error
302   //        detection and error recovery have been able to detect
303   //        Errors are numbered beginning with 1
304   int getNumberOfErrors();
305 
306   //    getErrorPosition(int errornumber)
307   //        for each value errorn in 1 - getNumberOfErrors(), the function
308   //        getErrorPosition(errorn) obtains the position where the error
309   //        has appeared. Positions are numbered beginning with 0. So,
310   //        for an input term of length L, the usual positions of errors
311   //        are 0 - (L-1). Nevertheless, getErrorPosition() may return L,
312   //        which means that the error has appeared at the end of the
313   //        input: premature end.
314   //        The function will return -1 is the argument (errornumber)
315   //        isn't in the interval 1 - getNumberOfErrors()
316   //
317   int getErrorPosition(int);
318 
319   // The error detection strategy includes three kinds of errors.
320   //  1.- Standard Errors
321   //  2.- Exception Errors (inside Bubbles)
322   //  3.- CloseParenthesis Errors (inside Bubbles)
323   // As it is possible that more than just one type of error appears
324   // at one position, the system distinguishes three functions:
325 
326   // getAlternativesStandardError(int errornumber)
327   //      returns the set of alternatives (tokens) that may appear
328   //      at the position of the error
329   Vector<int> getAlternativesStandardError(int errornumber);
330 
331   // getBubblesExceptionError(int errornumber)
332   //      returns the set of bubbles (non-terminals) that have failed
333   //      at that position due to an exception error
334   Vector<int> getBubblesExceptionError(int errornumber);
335 
336   // getBubblesCloseParenthesisError(int errornumber)
337   //      returns the set of bubbles (non-terminals) that have failed
338   //      at that position due to an unbalanced close parenthesis
339   Vector<int> getBubblesCloseParenthesisError(int errornumber);
340 
341   // Notes:
342   //   1.- If the three functions return 0-length vectors, this means
343   //       that the input must end at the position of the error
344 
345   // Auxiliar Functions
346   void printGrammar();
347   void printSentence();
348   int  getLHSProduction(int);
349 
350   void printCurrentParse();
351   void printNode(int);
352 };
353 
354 typedef ScpParser Parser;
355 
356 #endif
357