1
2 /* Parser-tokenizer link implementation */
3
4 #include "../Include/pgenheaders.h"
5 #include "tokenizer.h"
6 #include "../Include/node.h"
7 #include "../Include/grammar.h"
8 #include "parser.h"
9 #include "../Include/parsetok.h"
10 #include "../Include/errcode.h"
11 #include "../Include/graminit.h"
12
13
14 /* Forward */
15 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
16 static int initerr(perrdetail *err_ret, PyObject * filename);
17
18 /* Parse input coming from a string. Return error code, print some errors. */
19 node *
Ta3Parser_ParseString(const char * s,grammar * g,int start,perrdetail * err_ret)20 Ta3Parser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
21 {
22 return Ta3Parser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
23 }
24
25 node *
Ta3Parser_ParseStringFlags(const char * s,grammar * g,int start,perrdetail * err_ret,int flags)26 Ta3Parser_ParseStringFlags(const char *s, grammar *g, int start,
27 perrdetail *err_ret, int flags)
28 {
29 return Ta3Parser_ParseStringFlagsFilename(s, NULL,
30 g, start, err_ret, flags);
31 }
32
33 node *
Ta3Parser_ParseStringFlagsFilename(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int flags)34 Ta3Parser_ParseStringFlagsFilename(const char *s, const char *filename,
35 grammar *g, int start,
36 perrdetail *err_ret, int flags)
37 {
38 int iflags = flags;
39 return Ta3Parser_ParseStringFlagsFilenameEx(s, filename, g, start,
40 err_ret, &iflags);
41 }
42
43 node *
Ta3Parser_ParseStringObject(const char * s,PyObject * filename,grammar * g,int start,perrdetail * err_ret,int * flags)44 Ta3Parser_ParseStringObject(const char *s, PyObject *filename,
45 grammar *g, int start,
46 perrdetail *err_ret, int *flags)
47 {
48 struct tok_state *tok;
49 int exec_input = start == file_input;
50
51 if (initerr(err_ret, filename) < 0)
52 return NULL;
53
54 if (*flags & PyPARSE_IGNORE_COOKIE)
55 tok = Ta3Tokenizer_FromUTF8(s, exec_input);
56 else
57 tok = Ta3Tokenizer_FromString(s, exec_input);
58 if (tok == NULL) {
59 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
60 return NULL;
61 }
62
63 #ifndef PGEN
64 Py_INCREF(err_ret->filename);
65 tok->filename = err_ret->filename;
66 #endif
67 if (*flags & PyPARSE_ASYNC_ALWAYS)
68 tok->async_always = 1;
69 return parsetok(tok, g, start, err_ret, flags);
70 }
71
72 node *
Ta3Parser_ParseStringFlagsFilenameEx(const char * s,const char * filename_str,grammar * g,int start,perrdetail * err_ret,int * flags)73 Ta3Parser_ParseStringFlagsFilenameEx(const char *s, const char *filename_str,
74 grammar *g, int start,
75 perrdetail *err_ret, int *flags)
76 {
77 node *n;
78 PyObject *filename = NULL;
79 #ifndef PGEN
80 if (filename_str != NULL) {
81 filename = PyUnicode_DecodeFSDefault(filename_str);
82 if (filename == NULL) {
83 err_ret->error = E_ERROR;
84 return NULL;
85 }
86 }
87 #endif
88 n = Ta3Parser_ParseStringObject(s, filename, g, start, err_ret, flags);
89 #ifndef PGEN
90 Py_XDECREF(filename);
91 #endif
92 return n;
93 }
94
95 /* Parse input coming from a file. Return error code, print some errors. */
96
97 node *
Ta3Parser_ParseFile(FILE * fp,const char * filename,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret)98 Ta3Parser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
99 const char *ps1, const char *ps2,
100 perrdetail *err_ret)
101 {
102 return Ta3Parser_ParseFileFlags(fp, filename, NULL,
103 g, start, ps1, ps2, err_ret, 0);
104 }
105
106 node *
Ta3Parser_ParseFileFlags(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int flags)107 Ta3Parser_ParseFileFlags(FILE *fp, const char *filename, const char *enc,
108 grammar *g, int start,
109 const char *ps1, const char *ps2,
110 perrdetail *err_ret, int flags)
111 {
112 int iflags = flags;
113 return Ta3Parser_ParseFileFlagsEx(fp, filename, enc, g, start, ps1,
114 ps2, err_ret, &iflags);
115 }
116
117 node *
Ta3Parser_ParseFileObject(FILE * fp,PyObject * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)118 Ta3Parser_ParseFileObject(FILE *fp, PyObject *filename,
119 const char *enc, grammar *g, int start,
120 const char *ps1, const char *ps2,
121 perrdetail *err_ret, int *flags)
122 {
123 struct tok_state *tok;
124
125 if (initerr(err_ret, filename) < 0)
126 return NULL;
127
128 if ((tok = Ta3Tokenizer_FromFile(fp, enc, ps1, ps2)) == NULL) {
129 err_ret->error = E_NOMEM;
130 return NULL;
131 }
132 #ifndef PGEN
133 Py_INCREF(err_ret->filename);
134 tok->filename = err_ret->filename;
135 #endif
136 return parsetok(tok, g, start, err_ret, flags);
137 }
138
139 node *
Ta3Parser_ParseFileFlagsEx(FILE * fp,const char * filename,const char * enc,grammar * g,int start,const char * ps1,const char * ps2,perrdetail * err_ret,int * flags)140 Ta3Parser_ParseFileFlagsEx(FILE *fp, const char *filename,
141 const char *enc, grammar *g, int start,
142 const char *ps1, const char *ps2,
143 perrdetail *err_ret, int *flags)
144 {
145 node *n;
146 PyObject *fileobj = NULL;
147 #ifndef PGEN
148 if (filename != NULL) {
149 fileobj = PyUnicode_DecodeFSDefault(filename);
150 if (fileobj == NULL) {
151 err_ret->error = E_ERROR;
152 return NULL;
153 }
154 }
155 #endif
156 n = Ta3Parser_ParseFileObject(fp, fileobj, enc, g,
157 start, ps1, ps2, err_ret, flags);
158 #ifndef PGEN
159 Py_XDECREF(fileobj);
160 #endif
161 return n;
162 }
163
164 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
165 #if 0
166 static const char with_msg[] =
167 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
168
169 static const char as_msg[] =
170 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
171
172 static void
173 warn(const char *msg, const char *filename, int lineno)
174 {
175 if (filename == NULL)
176 filename = "<string>";
177 PySys_WriteStderr(msg, filename, lineno);
178 }
179 #endif
180 #endif
181
182 typedef struct {
183 struct {
184 int lineno;
185 char *comment;
186 } *items;
187 size_t size;
188 size_t num_items;
189 } growable_comment_array;
190
191 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)192 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
193 assert(initial_size > 0);
194 arr->items = malloc(initial_size * sizeof(*arr->items));
195 arr->size = initial_size;
196 arr->num_items = 0;
197
198 return arr->items != NULL;
199 }
200
201 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)202 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
203 if (arr->num_items >= arr->size) {
204 arr->size *= 2;
205 arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
206 if (!arr->items) {
207 return 0;
208 }
209 }
210
211 arr->items[arr->num_items].lineno = lineno;
212 arr->items[arr->num_items].comment = comment;
213 arr->num_items++;
214 return 1;
215 }
216
217 static void
growable_comment_array_deallocate(growable_comment_array * arr)218 growable_comment_array_deallocate(growable_comment_array *arr) {
219 unsigned i;
220 for (i = 0; i < arr->num_items; i++) {
221 PyObject_FREE(arr->items[i].comment);
222 }
223 free(arr->items);
224 }
225
226 /* Parse input coming from the given tokenizer structure.
227 Return error code. */
228
229 static node *
parsetok(struct tok_state * tok,grammar * g,int start,perrdetail * err_ret,int * flags)230 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
231 int *flags)
232 {
233 parser_state *ps;
234 node *n;
235 int started = 0;
236
237 growable_comment_array type_ignores;
238 if (!growable_comment_array_init(&type_ignores, 10)) {
239 err_ret->error = E_NOMEM;
240 Ta3Tokenizer_Free(tok);
241 return NULL;
242 }
243
244 if ((ps = Ta3Parser_New(g, start)) == NULL) {
245 err_ret->error = E_NOMEM;
246 Ta3Tokenizer_Free(tok);
247 return NULL;
248 }
249 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
250 if (*flags & PyPARSE_BARRY_AS_BDFL)
251 ps->p_flags |= CO_FUTURE_BARRY_AS_BDFL;
252 #endif
253
254 for (;;) {
255 char *a, *b;
256 int type;
257 size_t len;
258 char *str;
259 int col_offset;
260
261 type = Ta3Tokenizer_Get(tok, &a, &b);
262 if (type == ERRORTOKEN) {
263 err_ret->error = tok->done;
264 break;
265 }
266 if (type == ENDMARKER && started) {
267 type = NEWLINE; /* Add an extra newline */
268 started = 0;
269 /* Add the right number of dedent tokens,
270 except if a certain flag is given --
271 codeop.py uses this. */
272 if (tok->indent &&
273 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
274 {
275 tok->pendin = -tok->indent;
276 tok->indent = 0;
277 }
278 }
279 else
280 started = 1;
281 len = (a != NULL && b != NULL) ? b - a : 0;
282 str = (char *) PyObject_MALLOC(len + 1);
283 if (str == NULL) {
284 err_ret->error = E_NOMEM;
285 break;
286 }
287 if (len > 0)
288 strncpy(str, a, len);
289 str[len] = '\0';
290
291 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
292 if (type == NOTEQUAL) {
293 if (!(ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
294 strcmp(str, "!=")) {
295 PyObject_FREE(str);
296 err_ret->error = E_SYNTAX;
297 break;
298 }
299 else if ((ps->p_flags & CO_FUTURE_BARRY_AS_BDFL) &&
300 strcmp(str, "<>")) {
301 PyObject_FREE(str);
302 err_ret->expected = NOTEQUAL;
303 err_ret->error = E_SYNTAX;
304 break;
305 }
306 }
307 #endif
308 if (a != NULL && a >= tok->line_start) {
309 col_offset = Py_SAFE_DOWNCAST(a - tok->line_start,
310 intptr_t, int);
311 }
312 else {
313 col_offset = -1;
314 }
315
316 if (type == TYPE_IGNORE) {
317 if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
318 err_ret->error = E_NOMEM;
319 break;
320 }
321 continue;
322 }
323
324 if ((err_ret->error =
325 Ta3Parser_AddToken(ps, (int)type, str,
326 tok->lineno, col_offset,
327 &(err_ret->expected))) != E_OK) {
328 if (err_ret->error != E_DONE) {
329 PyObject_FREE(str);
330 err_ret->token = type;
331 }
332 break;
333 }
334 }
335
336 if (err_ret->error == E_DONE) {
337 n = ps->p_tree;
338 ps->p_tree = NULL;
339
340 if (n->n_type == file_input) {
341 /* Put type_ignore nodes in the ENDMARKER of file_input. */
342 int num;
343 node *ch;
344 size_t i;
345
346 num = NCH(n);
347 ch = CHILD(n, num - 1);
348 REQ(ch, ENDMARKER);
349
350 for (i = 0; i < type_ignores.num_items; i++) {
351 int res = Ta3Node_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
352 type_ignores.items[i].lineno, 0);
353 if (res != 0) {
354 err_ret->error = res;
355 Ta3Node_Free(n);
356 n = NULL;
357 break;
358 }
359 type_ignores.items[i].comment = NULL;
360 }
361 }
362
363 #ifndef PGEN
364 /* Check that the source for a single input statement really
365 is a single statement by looking at what is left in the
366 buffer after parsing. Trailing whitespace and comments
367 are OK. */
368 if (err_ret->error == E_DONE && start == single_input) {
369 char *cur = tok->cur;
370 char c = *tok->cur;
371
372 for (;;) {
373 while (c == ' ' || c == '\t' || c == '\n' || c == '\014')
374 c = *++cur;
375
376 if (!c)
377 break;
378
379 if (c != '#') {
380 err_ret->error = E_BADSINGLE;
381 Ta3Node_Free(n);
382 n = NULL;
383 break;
384 }
385
386 /* Suck up comment. */
387 while (c && c != '\n')
388 c = *++cur;
389 }
390 }
391 #endif
392 }
393 else
394 n = NULL;
395
396 growable_comment_array_deallocate(&type_ignores);
397
398 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
399 *flags = ps->p_flags;
400 #endif
401 Ta3Parser_Delete(ps);
402
403 if (n == NULL) {
404 if (tok->done == E_EOF)
405 err_ret->error = E_EOF;
406 err_ret->lineno = tok->lineno;
407 if (tok->buf != NULL) {
408 size_t len;
409 assert(tok->cur - tok->buf < INT_MAX);
410 err_ret->offset = (int)(tok->cur - tok->buf);
411 len = tok->inp - tok->buf;
412 err_ret->text = (char *) PyObject_MALLOC(len + 1);
413 if (err_ret->text != NULL) {
414 if (len > 0)
415 strncpy(err_ret->text, tok->buf, len);
416 err_ret->text[len] = '\0';
417 }
418 }
419 } else if (tok->encoding != NULL) {
420 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
421 * allocated using PyMem_
422 */
423 node* r = Ta3Node_New(encoding_decl);
424 if (r)
425 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
426 if (!r || !r->n_str) {
427 err_ret->error = E_NOMEM;
428 if (r)
429 PyObject_FREE(r);
430 n = NULL;
431 goto done;
432 }
433 strcpy(r->n_str, tok->encoding);
434 PyMem_FREE(tok->encoding);
435 tok->encoding = NULL;
436 r->n_nchildren = 1;
437 r->n_child = n;
438 n = r;
439 }
440
441 done:
442 Ta3Tokenizer_Free(tok);
443
444 return n;
445 }
446
447 static int
initerr(perrdetail * err_ret,PyObject * filename)448 initerr(perrdetail *err_ret, PyObject *filename)
449 {
450 err_ret->error = E_OK;
451 err_ret->lineno = 0;
452 err_ret->offset = 0;
453 err_ret->text = NULL;
454 err_ret->token = -1;
455 err_ret->expected = -1;
456 #ifndef PGEN
457 if (filename) {
458 Py_INCREF(filename);
459 err_ret->filename = filename;
460 }
461 else {
462 err_ret->filename = PyUnicode_FromString("<string>");
463 if (err_ret->filename == NULL) {
464 err_ret->error = E_ERROR;
465 return -1;
466 }
467 }
468 #endif
469 return 0;
470 }
471