1
2 /* Parser implementation */
3
4 /* For a description, see the comments at end of this file */
5
6 /* XXX To do: error recovery */
7
8 #include "Python.h"
9 #include "../Include/pgenheaders.h"
10 #include "../Include/token.h"
11 #include "../Include/grammar.h"
12 #include "../Include/node.h"
13 #include "parser.h"
14 #include "../Include/errcode.h"
15
16
17 #ifdef Py_DEBUG
18 PyAPI_DATA(int) Py_DebugFlag;
19 #define D(x) if (!Py_DebugFlag); else x
20 #else
21 #define D(x)
22 #endif
23
24
25 /* STACK DATA TYPE */
26
27 static void s_reset(stack *);
28
29 static void
s_reset(stack * s)30 s_reset(stack *s)
31 {
32 s->s_top = &s->s_base[MAXSTACK];
33 }
34
35 #define s_empty(s) ((s)->s_top == &(s)->s_base[MAXSTACK])
36
37 static int
s_push(stack * s,dfa * d,node * parent)38 s_push(stack *s, dfa *d, node *parent)
39 {
40 stackentry *top;
41 if (s->s_top == s->s_base) {
42 fprintf(stderr, "s_push: parser stack overflow\n");
43 return E_NOMEM;
44 }
45 top = --s->s_top;
46 top->s_dfa = d;
47 top->s_parent = parent;
48 top->s_state = 0;
49 return 0;
50 }
51
52 #ifdef Py_DEBUG
53
54 static void
s_pop(stack * s)55 s_pop(stack *s)
56 {
57 if (s_empty(s))
58 Py_FatalError("s_pop: parser stack underflow -- FATAL");
59 s->s_top++;
60 }
61
62 #else /* !Py_DEBUG */
63
64 #define s_pop(s) (s)->s_top++
65
66 #endif
67
68
69 /* PARSER CREATION */
70
71 parser_state *
Ta3Parser_New(grammar * g,int start)72 Ta3Parser_New(grammar *g, int start)
73 {
74 parser_state *ps;
75
76 if (!g->g_accel)
77 Ta3Grammar_AddAccelerators(g);
78 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));
79 if (ps == NULL)
80 return NULL;
81 ps->p_grammar = g;
82 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
83 ps->p_flags = 0;
84 #endif
85 ps->p_tree = Ta3Node_New(start);
86 if (ps->p_tree == NULL) {
87 PyMem_FREE(ps);
88 return NULL;
89 }
90 s_reset(&ps->p_stack);
91 (void) s_push(&ps->p_stack, Ta3Grammar_FindDFA(g, start), ps->p_tree);
92 return ps;
93 }
94
95 void
Ta3Parser_Delete(parser_state * ps)96 Ta3Parser_Delete(parser_state *ps)
97 {
98 /* NB If you want to save the parse tree,
99 you must set p_tree to NULL before calling delparser! */
100 Ta3Node_Free(ps->p_tree);
101 PyMem_FREE(ps);
102 }
103
104
105 /* PARSER STACK OPERATIONS */
106
107 static int
shift(stack * s,int type,char * str,int newstate,int lineno,int col_offset)108 shift(stack *s, int type, char *str, int newstate, int lineno, int col_offset)
109 {
110 int err;
111 assert(!s_empty(s));
112 err = Ta3Node_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);
113 if (err)
114 return err;
115 s->s_top->s_state = newstate;
116 return 0;
117 }
118
119 static int
push(stack * s,int type,dfa * d,int newstate,int lineno,int col_offset)120 push(stack *s, int type, dfa *d, int newstate, int lineno, int col_offset)
121 {
122 int err;
123 node *n;
124 n = s->s_top->s_parent;
125 assert(!s_empty(s));
126 err = Ta3Node_AddChild(n, type, (char *)NULL, lineno, col_offset);
127 if (err)
128 return err;
129 s->s_top->s_state = newstate;
130 return s_push(s, d, CHILD(n, NCH(n)-1));
131 }
132
133
134 /* PARSER PROPER */
135
136 static int
classify(parser_state * ps,int type,const char * str)137 classify(parser_state *ps, int type, const char *str)
138 {
139 grammar *g = ps->p_grammar;
140 int n = g->g_ll.ll_nlabels;
141
142 if (type == NAME) {
143 label *l = g->g_ll.ll_label;
144 int i;
145 for (i = n; i > 0; i--, l++) {
146 if (l->lb_type != NAME || l->lb_str == NULL ||
147 l->lb_str[0] != str[0] ||
148 strcmp(l->lb_str, str) != 0)
149 continue;
150 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
151 #if 0
152 /* Leaving this in as an example */
153 if (!(ps->p_flags & CO_FUTURE_WITH_STATEMENT)) {
154 if (str[0] == 'w' && strcmp(str, "with") == 0)
155 break; /* not a keyword yet */
156 else if (str[0] == 'a' && strcmp(str, "as") == 0)
157 break; /* not a keyword yet */
158 }
159 #endif
160 #endif
161 D(printf("It's a keyword\n"));
162 return n - i;
163 }
164 }
165
166 {
167 label *l = g->g_ll.ll_label;
168 int i;
169 for (i = n; i > 0; i--, l++) {
170 if (l->lb_type == type && l->lb_str == NULL) {
171 D(printf("It's a token we know\n"));
172 return n - i;
173 }
174 }
175 }
176
177 D(printf("Illegal token\n"));
178 return -1;
179 }
180
181 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
182 #if 0
183 /* Leaving this in as an example */
184 static void
185 future_hack(parser_state *ps)
186 {
187 node *n = ps->p_stack.s_top->s_parent;
188 node *ch, *cch;
189 int i;
190
191 /* from __future__ import ..., must have at least 4 children */
192 n = CHILD(n, 0);
193 if (NCH(n) < 4)
194 return;
195 ch = CHILD(n, 0);
196 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)
197 return;
198 ch = CHILD(n, 1);
199 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&
200 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)
201 return;
202 ch = CHILD(n, 3);
203 /* ch can be a star, a parenthesis or import_as_names */
204 if (TYPE(ch) == STAR)
205 return;
206 if (TYPE(ch) == LPAR)
207 ch = CHILD(n, 4);
208
209 for (i = 0; i < NCH(ch); i += 2) {
210 cch = CHILD(ch, i);
211 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {
212 char *str_ch = STR(CHILD(cch, 0));
213 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {
214 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;
215 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {
216 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
217 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {
218 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
219 }
220 }
221 }
222 }
223 #endif
224 #endif /* future keyword */
225
226 int
Ta3Parser_AddToken(parser_state * ps,int type,char * str,int lineno,int col_offset,int * expected_ret)227 Ta3Parser_AddToken(parser_state *ps, int type, char *str,
228 int lineno, int col_offset, int *expected_ret)
229 {
230 int ilabel;
231 int err;
232
233 D(printf("Token %s/'%s' ... ", _Ta3Parser_TokenNames[type], str));
234
235 /* Find out which label this token is */
236 ilabel = classify(ps, type, str);
237 if (ilabel < 0)
238 return E_SYNTAX;
239
240 /* Loop until the token is shifted or an error occurred */
241 for (;;) {
242 /* Fetch the current dfa and state */
243 dfa *d = ps->p_stack.s_top->s_dfa;
244 state *s = &d->d_state[ps->p_stack.s_top->s_state];
245
246 D(printf(" DFA '%s', state %d:",
247 d->d_name, ps->p_stack.s_top->s_state));
248
249 /* Check accelerator */
250 if (s->s_lower <= ilabel && ilabel < s->s_upper) {
251 int x = s->s_accel[ilabel - s->s_lower];
252 if (x != -1) {
253 if (x & (1<<7)) {
254 /* Push non-terminal */
255 int nt = (x >> 8) + NT_OFFSET;
256 int arrow = x & ((1<<7)-1);
257 dfa *d1 = Ta3Grammar_FindDFA(
258 ps->p_grammar, nt);
259 if ((err = push(&ps->p_stack, nt, d1,
260 arrow, lineno, col_offset)) > 0) {
261 D(printf(" MemError: push\n"));
262 return err;
263 }
264 D(printf(" Push ...\n"));
265 continue;
266 }
267
268 /* Shift the token */
269 if ((err = shift(&ps->p_stack, type, str,
270 x, lineno, col_offset)) > 0) {
271 D(printf(" MemError: shift.\n"));
272 return err;
273 }
274 D(printf(" Shift.\n"));
275 /* Pop while we are in an accept-only state */
276 while (s = &d->d_state
277 [ps->p_stack.s_top->s_state],
278 s->s_accept && s->s_narcs == 1) {
279 D(printf(" DFA '%s', state %d: "
280 "Direct pop.\n",
281 d->d_name,
282 ps->p_stack.s_top->s_state));
283 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
284 #if 0
285 if (d->d_name[0] == 'i' &&
286 strcmp(d->d_name,
287 "import_stmt") == 0)
288 future_hack(ps);
289 #endif
290 #endif
291 s_pop(&ps->p_stack);
292 if (s_empty(&ps->p_stack)) {
293 D(printf(" ACCEPT.\n"));
294 return E_DONE;
295 }
296 d = ps->p_stack.s_top->s_dfa;
297 }
298 return E_OK;
299 }
300 }
301
302 if (s->s_accept) {
303 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
304 #if 0
305 if (d->d_name[0] == 'i' &&
306 strcmp(d->d_name, "import_stmt") == 0)
307 future_hack(ps);
308 #endif
309 #endif
310 /* Pop this dfa and try again */
311 s_pop(&ps->p_stack);
312 D(printf(" Pop ...\n"));
313 if (s_empty(&ps->p_stack)) {
314 D(printf(" Error: bottom of stack.\n"));
315 return E_SYNTAX;
316 }
317 continue;
318 }
319
320 /* Stuck, report syntax error */
321 D(printf(" Error.\n"));
322 if (expected_ret) {
323 if (s->s_lower == s->s_upper - 1) {
324 /* Only one possible expected token */
325 *expected_ret = ps->p_grammar->
326 g_ll.ll_label[s->s_lower].lb_type;
327 }
328 else
329 *expected_ret = -1;
330 }
331 return E_SYNTAX;
332 }
333 }
334
335
336 #ifdef Py_DEBUG
337
338 /* DEBUG OUTPUT */
339
340 void
dumptree(grammar * g,node * n)341 dumptree(grammar *g, node *n)
342 {
343 int i;
344
345 if (n == NULL)
346 printf("NIL");
347 else {
348 label l;
349 l.lb_type = TYPE(n);
350 l.lb_str = STR(n);
351 printf("%s", Ta3Grammar_LabelRepr(&l));
352 if (ISNONTERMINAL(TYPE(n))) {
353 printf("(");
354 for (i = 0; i < NCH(n); i++) {
355 if (i > 0)
356 printf(",");
357 dumptree(g, CHILD(n, i));
358 }
359 printf(")");
360 }
361 }
362 }
363
364 void
showtree(grammar * g,node * n)365 showtree(grammar *g, node *n)
366 {
367 int i;
368
369 if (n == NULL)
370 return;
371 if (ISNONTERMINAL(TYPE(n))) {
372 for (i = 0; i < NCH(n); i++)
373 showtree(g, CHILD(n, i));
374 }
375 else if (ISTERMINAL(TYPE(n))) {
376 printf("%s", _Ta3Parser_TokenNames[TYPE(n)]);
377 if (TYPE(n) == NUMBER || TYPE(n) == NAME)
378 printf("(%s)", STR(n));
379 printf(" ");
380 }
381 else
382 printf("? ");
383 }
384
385 #endif /* Py_DEBUG */
386
387 /*
388
389 Description
390 -----------
391
392 The parser's interface is different than usual: the function addtoken()
393 must be called for each token in the input. This makes it possible to
394 turn it into an incremental parsing system later. The parsing system
395 constructs a parse tree as it goes.
396
397 A parsing rule is represented as a Deterministic Finite-state Automaton
398 (DFA). A node in a DFA represents a state of the parser; an arc represents
399 a transition. Transitions are either labeled with terminal symbols or
400 with non-terminals. When the parser decides to follow an arc labeled
401 with a non-terminal, it is invoked recursively with the DFA representing
402 the parsing rule for that as its initial state; when that DFA accepts,
403 the parser that invoked it continues. The parse tree constructed by the
404 recursively called parser is inserted as a child in the current parse tree.
405
406 The DFA's can be constructed automatically from a more conventional
407 language description. An extended LL(1) grammar (ELL(1)) is suitable.
408 Certain restrictions make the parser's life easier: rules that can produce
409 the empty string should be outlawed (there are other ways to put loops
410 or optional parts in the language). To avoid the need to construct
411 FIRST sets, we can require that all but the last alternative of a rule
412 (really: arc going out of a DFA's state) must begin with a terminal
413 symbol.
414
415 As an example, consider this grammar:
416
417 expr: term (OP term)*
418 term: CONSTANT | '(' expr ')'
419
420 The DFA corresponding to the rule for expr is:
421
422 ------->.---term-->.------->
423 ^ |
424 | |
425 \----OP----/
426
427 The parse tree generated for the input a+b is:
428
429 (expr: (term: (NAME: a)), (OP: +), (term: (NAME: b)))
430
431 */
432