1
2 /* Parser-tokenizer link implementation */
3
4 #include "../Include/pgenheaders.h"
5 #include "tokenizer.h"
6 #include "../Include/node.h"
7 #include "../Include/grammar.h"
8 #include "parser.h"
9 #include "../Include/parsetok.h"
10 #include "../Include/errcode.h"
11 #include "../Include/graminit.h"
12
13 int Ta27_TabcheckFlag;
14
15
16 /* Forward */
17 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
18 static void initerr(perrdetail *err_ret, const char* filename);
19 static int initerr_object(perrdetail *err_ret, PyObject *filename);
20
21 /* Parse input coming from a string. Return error code, print some errors. */
22 node *
Ta27Parser_ParseString(const char * s,grammar * g,int start,perrdetail * err_ret)23 Ta27Parser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
24 {
25 return Ta27Parser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
26 }
27
28 node *
Ta27Parser_ParseStringFlags(const char * s,grammar * g,int start,perrdetail * err_ret,int flags)29 Ta27Parser_ParseStringFlags(const char *s, grammar *g, int start,
30 perrdetail *err_ret, int flags)
31 {
32 return Ta27Parser_ParseStringFlagsFilename(s, NULL,
33 g, start, err_ret, flags);
34 }
35
36 node *
Ta27Parser_ParseStringFlagsFilename(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int flags)37 Ta27Parser_ParseStringFlagsFilename(const char *s, const char *filename,
38 grammar *g, int start,
39 perrdetail *err_ret, int flags)
40 {
41 int iflags = flags;
42 return Ta27Parser_ParseStringFlagsFilenameEx(s, filename, g, start,
43 err_ret, &iflags);
44 }
45
46 node *
Ta27Parser_ParseStringFlagsFilenameEx(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int * flags)47 Ta27Parser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
48 grammar *g, int start,
49 perrdetail *err_ret, int *flags)
50 {
51 struct tok_state *tok;
52
53 initerr(err_ret, filename);
54
55 if ((tok = Ta27Tokenizer_FromString(s, start == file_input)) == NULL) {
56 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
57 return NULL;
58 }
59
60 tok->filename = filename ? filename : "<string>";
61 if (Ta27_TabcheckFlag || Py_VerboseFlag) {
62 tok->altwarning = (tok->filename != NULL);
63 if (Ta27_TabcheckFlag >= 2)
64 tok->alterror++;
65 }
66
67 return parsetok(tok, g, start, err_ret, flags);
68 }
69
70 node *
Ta27Parser_ParseStringObject(const char * s,PyObject * filename,grammar * g,int start,perrdetail * err_ret,int * flags)71 Ta27Parser_ParseStringObject(const char *s, PyObject *filename,
72 grammar *g, int start,
73 perrdetail *err_ret, int *flags)
74 {
75 struct tok_state *tok;
76 int exec_input = start == file_input;
77
78 initerr_object(err_ret, filename);
79
80 if (*flags & PyPARSE_IGNORE_COOKIE)
81 tok = Ta27Tokenizer_FromUTF8(s, exec_input);
82 else
83 tok = Ta27Tokenizer_FromString(s, exec_input);
84
85 if (tok == NULL) {
86 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
87 return NULL;
88 }
89
90 #ifndef PGEN
91 Py_INCREF(err_ret->filename);
92 tok->filename = PyUnicode_AsUTF8(err_ret->filename);
93 #endif
94 return parsetok(tok, g, start, err_ret, flags);
95 }
96
97 /* Parse input coming from a file. Return error code, print some errors. */
98
99 node *
Ta27Parser_ParseFile(FILE * fp,const char * filename,grammar * g,int start,char * ps1,char * ps2,perrdetail * err_ret)100 Ta27Parser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
101 char *ps1, char *ps2, perrdetail *err_ret)
102 {
103 return Ta27Parser_ParseFileFlags(fp, filename, g, start, ps1, ps2,
104 err_ret, 0);
105 }
106
107 node *
Ta27Parser_ParseFileFlags(FILE * fp,const char * filename,grammar * g,int start,char * ps1,char * ps2,perrdetail * err_ret,int flags)108 Ta27Parser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
109 char *ps1, char *ps2, perrdetail *err_ret, int flags)
110 {
111 int iflags = flags;
112 return Ta27Parser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags);
113 }
114
115 node *
Ta27Parser_ParseFileFlagsEx(FILE * fp,const char * filename,grammar * g,int start,char * ps1,char * ps2,perrdetail * err_ret,int * flags)116 Ta27Parser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start,
117 char *ps1, char *ps2, perrdetail *err_ret, int *flags)
118 {
119 struct tok_state *tok;
120
121 initerr(err_ret, filename);
122
123 if ((tok = Ta27Tokenizer_FromFile(fp, ps1, ps2)) == NULL) {
124 err_ret->error = E_NOMEM;
125 return NULL;
126 }
127 tok->filename = filename;
128 if (Ta27_TabcheckFlag || Py_VerboseFlag) {
129 tok->altwarning = (filename != NULL);
130 if (Ta27_TabcheckFlag >= 2)
131 tok->alterror++;
132 }
133
134 return parsetok(tok, g, start, err_ret, flags);
135 }
136
137 #if 0
138 static char with_msg[] =
139 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
140
141 static char as_msg[] =
142 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
143
144 static void
145 warn(const char *msg, const char *filename, int lineno)
146 {
147 if (filename == NULL)
148 filename = "<string>";
149 PySys_WriteStderr(msg, filename, lineno);
150 }
151 #endif
152
153
154 typedef struct {
155 struct {
156 int lineno;
157 char *comment;
158 } *items;
159 size_t size;
160 size_t num_items;
161 } growable_comment_array;
162
163 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)164 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
165 assert(initial_size > 0);
166 arr->items = malloc(initial_size * sizeof(*arr->items));
167 arr->size = initial_size;
168 arr->num_items = 0;
169
170 return arr->items != NULL;
171 }
172
173 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)174 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
175 if (arr->num_items >= arr->size) {
176 arr->size *= 2;
177 arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
178 if (!arr->items) {
179 return 0;
180 }
181 }
182
183 arr->items[arr->num_items].lineno = lineno;
184 arr->items[arr->num_items].comment = comment;
185 arr->num_items++;
186 return 1;
187 }
188
189 static void
growable_comment_array_deallocate(growable_comment_array * arr)190 growable_comment_array_deallocate(growable_comment_array *arr) {
191 unsigned i;
192 for (i = 0; i < arr->num_items; i++) {
193 PyObject_FREE(arr->items[i].comment);
194 }
195 free(arr->items);
196 }
197
198
199 /* Parse input coming from the given tokenizer structure.
200 Return error code. */
201
202 static node *
parsetok(struct tok_state * tok,grammar * g,int start,perrdetail * err_ret,int * flags)203 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
204 int *flags)
205 {
206 parser_state *ps;
207 node *n;
208 int started = 0;
209
210 growable_comment_array type_ignores;
211 if (!growable_comment_array_init(&type_ignores, 10)) {
212 err_ret->error = E_NOMEM;
213 Ta27Tokenizer_Free(tok);
214 return NULL;
215 }
216
217 if ((ps = Ta27Parser_New(g, start)) == NULL) {
218 fprintf(stderr, "no mem for new parser\n");
219 err_ret->error = E_NOMEM;
220 Ta27Tokenizer_Free(tok);
221 return NULL;
222 }
223 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
224 if (*flags & PyPARSE_PRINT_IS_FUNCTION) {
225 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
226 }
227 if (*flags & PyPARSE_UNICODE_LITERALS) {
228 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
229 }
230
231 #endif
232
233 for (;;) {
234 char *a, *b;
235 int type;
236 size_t len;
237 char *str;
238 int col_offset;
239
240 type = Ta27Tokenizer_Get(tok, &a, &b);
241 if (type == ERRORTOKEN) {
242 err_ret->error = tok->done;
243 break;
244 }
245 if (type == ENDMARKER && started) {
246 type = NEWLINE; /* Add an extra newline */
247 started = 0;
248 /* Add the right number of dedent tokens,
249 except if a certain flag is given --
250 codeop.py uses this. */
251 if (tok->indent &&
252 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
253 {
254 tok->pendin = -tok->indent;
255 tok->indent = 0;
256 }
257 }
258 else
259 started = 1;
260 len = b - a; /* XXX this may compute NULL - NULL */
261 str = (char *) PyObject_MALLOC(len + 1);
262 if (str == NULL) {
263 fprintf(stderr, "no mem for next token\n");
264 err_ret->error = E_NOMEM;
265 break;
266 }
267 if (len > 0)
268 strncpy(str, a, len);
269 str[len] = '\0';
270
271 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
272 #endif
273 if (a >= tok->line_start)
274 col_offset = a - tok->line_start;
275 else
276 col_offset = -1;
277
278 if (type == TYPE_IGNORE) {
279 if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
280 err_ret->error = E_NOMEM;
281 break;
282 }
283 continue;
284 }
285
286 if ((err_ret->error =
287 Ta27Parser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
288 &(err_ret->expected))) != E_OK) {
289 if (err_ret->error != E_DONE) {
290 PyObject_FREE(str);
291 err_ret->token = type;
292 }
293 break;
294 }
295 }
296
297 if (err_ret->error == E_DONE) {
298 n = ps->p_tree;
299 ps->p_tree = NULL;
300
301 if (n->n_type == file_input) {
302 /* Put type_ignore nodes in the ENDMARKER of file_input. */
303 int num;
304 node *ch;
305 size_t i;
306
307 num = NCH(n);
308 ch = CHILD(n, num - 1);
309 REQ(ch, ENDMARKER);
310
311 for (i = 0; i < type_ignores.num_items; i++) {
312 int res = Ta27Node_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
313 type_ignores.items[i].lineno, 0);
314 if (res != 0) {
315 err_ret->error = res;
316 Ta27Node_Free(n);
317 n = NULL;
318 break;
319 }
320 type_ignores.items[i].comment = NULL;
321 }
322 }
323 }
324 else
325 n = NULL;
326
327 growable_comment_array_deallocate(&type_ignores);
328
329 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
330 *flags = ps->p_flags;
331 #endif
332 Ta27Parser_Delete(ps);
333
334 if (n == NULL) {
335 if (tok->lineno <= 1 && tok->done == E_EOF)
336 err_ret->error = E_EOF;
337 err_ret->lineno = tok->lineno;
338 if (tok->buf != NULL) {
339 char *text = NULL;
340 size_t len;
341 assert(tok->cur - tok->buf < INT_MAX);
342 err_ret->offset = (int)(tok->cur - tok->buf);
343 len = tok->inp - tok->buf;
344 #ifdef Py_USING_UNICODE
345 text = Ta27Tokenizer_RestoreEncoding(tok, len, &err_ret->offset);
346
347 #endif
348 if (text == NULL) {
349 text = (char *) PyObject_MALLOC(len + 1);
350 if (text != NULL) {
351 if (len > 0)
352 strncpy(text, tok->buf, len);
353 text[len] = '\0';
354 }
355 }
356 err_ret->text = text;
357 }
358 } else if (tok->encoding != NULL) {
359 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
360 * allocated using PyMem_
361 */
362 node* r = Ta27Node_New(encoding_decl);
363 if (r)
364 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
365 if (!r || !r->n_str) {
366 err_ret->error = E_NOMEM;
367 if (r)
368 PyObject_FREE(r);
369 n = NULL;
370 goto done;
371 }
372 strcpy(r->n_str, tok->encoding);
373 PyMem_FREE(tok->encoding);
374 tok->encoding = NULL;
375 r->n_nchildren = 1;
376 r->n_child = n;
377 n = r;
378 }
379
380 done:
381 Ta27Tokenizer_Free(tok);
382
383 return n;
384 }
385
386 static void
initerr(perrdetail * err_ret,const char * filename)387 initerr(perrdetail *err_ret, const char *filename)
388 {
389 initerr_object(err_ret, PyUnicode_FromString(filename));
390 }
391
392 static int
initerr_object(perrdetail * err_ret,PyObject * filename)393 initerr_object(perrdetail *err_ret, PyObject *filename)
394 {
395 err_ret->error = E_OK;
396 err_ret->lineno = 0;
397 err_ret->offset = 0;
398 err_ret->text = NULL;
399 err_ret->token = -1;
400 err_ret->expected = -1;
401 #ifndef PGEN
402 if (filename) {
403 Py_INCREF(filename);
404 err_ret->filename = filename;
405 }
406 else {
407 err_ret->filename = PyUnicode_FromString("<string>");
408 if (err_ret->filename == NULL) {
409 err_ret->error = E_ERROR;
410 return -1;
411 }
412 }
413 #endif
414 return 0;
415 }
416