1 
2 /* Parser-tokenizer link implementation */
3 
4 #include "../Include/pgenheaders.h"
5 #include "tokenizer.h"
6 #include "../Include/node.h"
7 #include "../Include/grammar.h"
8 #include "parser.h"
9 #include "../Include/parsetok.h"
10 #include "../Include/errcode.h"
11 #include "../Include/graminit.h"
12 
13 int Ta27_TabcheckFlag;
14 
15 
16 /* Forward */
17 static node *parsetok(struct tok_state *, grammar *, int, perrdetail *, int *);
18 static void initerr(perrdetail *err_ret, const char* filename);
19 static int initerr_object(perrdetail *err_ret, PyObject *filename);
20 
21 /* Parse input coming from a string.  Return error code, print some errors. */
22 node *
Ta27Parser_ParseString(const char * s,grammar * g,int start,perrdetail * err_ret)23 Ta27Parser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret)
24 {
25     return Ta27Parser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);
26 }
27 
28 node *
Ta27Parser_ParseStringFlags(const char * s,grammar * g,int start,perrdetail * err_ret,int flags)29 Ta27Parser_ParseStringFlags(const char *s, grammar *g, int start,
30                           perrdetail *err_ret, int flags)
31 {
32     return Ta27Parser_ParseStringFlagsFilename(s, NULL,
33                                              g, start, err_ret, flags);
34 }
35 
36 node *
Ta27Parser_ParseStringFlagsFilename(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int flags)37 Ta27Parser_ParseStringFlagsFilename(const char *s, const char *filename,
38                           grammar *g, int start,
39                           perrdetail *err_ret, int flags)
40 {
41     int iflags = flags;
42     return Ta27Parser_ParseStringFlagsFilenameEx(s, filename, g, start,
43                                                err_ret, &iflags);
44 }
45 
46 node *
Ta27Parser_ParseStringFlagsFilenameEx(const char * s,const char * filename,grammar * g,int start,perrdetail * err_ret,int * flags)47 Ta27Parser_ParseStringFlagsFilenameEx(const char *s, const char *filename,
48                           grammar *g, int start,
49                           perrdetail *err_ret, int *flags)
50 {
51     struct tok_state *tok;
52 
53     initerr(err_ret, filename);
54 
55     if ((tok = Ta27Tokenizer_FromString(s, start == file_input)) == NULL) {
56         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
57         return NULL;
58     }
59 
60     tok->filename = filename ? filename : "<string>";
61     if (Ta27_TabcheckFlag || Py_VerboseFlag) {
62         tok->altwarning = (tok->filename != NULL);
63         if (Ta27_TabcheckFlag >= 2)
64             tok->alterror++;
65     }
66 
67     return parsetok(tok, g, start, err_ret, flags);
68 }
69 
70 node *
Ta27Parser_ParseStringObject(const char * s,PyObject * filename,grammar * g,int start,perrdetail * err_ret,int * flags)71 Ta27Parser_ParseStringObject(const char *s, PyObject *filename,
72                            grammar *g, int start,
73                            perrdetail *err_ret, int *flags)
74 {
75     struct tok_state *tok;
76     int exec_input = start == file_input;
77 
78     initerr_object(err_ret, filename);
79 
80     if (*flags & PyPARSE_IGNORE_COOKIE)
81         tok = Ta27Tokenizer_FromUTF8(s, exec_input);
82     else
83         tok = Ta27Tokenizer_FromString(s, exec_input);
84 
85     if (tok == NULL) {
86         err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;
87         return NULL;
88     }
89 
90 #ifndef PGEN
91     Py_INCREF(err_ret->filename);
92     tok->filename = PyUnicode_AsUTF8(err_ret->filename);
93 #endif
94     return parsetok(tok, g, start, err_ret, flags);
95 }
96 
97 /* Parse input coming from a file.  Return error code, print some errors. */
98 
99 node *
Ta27Parser_ParseFile(FILE * fp,const char * filename,grammar * g,int start,char * ps1,char * ps2,perrdetail * err_ret)100 Ta27Parser_ParseFile(FILE *fp, const char *filename, grammar *g, int start,
101                    char *ps1, char *ps2, perrdetail *err_ret)
102 {
103     return Ta27Parser_ParseFileFlags(fp, filename, g, start, ps1, ps2,
104                                    err_ret, 0);
105 }
106 
107 node *
Ta27Parser_ParseFileFlags(FILE * fp,const char * filename,grammar * g,int start,char * ps1,char * ps2,perrdetail * err_ret,int flags)108 Ta27Parser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start,
109                         char *ps1, char *ps2, perrdetail *err_ret, int flags)
110 {
111     int iflags = flags;
112     return Ta27Parser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags);
113 }
114 
115 node *
Ta27Parser_ParseFileFlagsEx(FILE * fp,const char * filename,grammar * g,int start,char * ps1,char * ps2,perrdetail * err_ret,int * flags)116 Ta27Parser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start,
117                           char *ps1, char *ps2, perrdetail *err_ret, int *flags)
118 {
119     struct tok_state *tok;
120 
121     initerr(err_ret, filename);
122 
123     if ((tok = Ta27Tokenizer_FromFile(fp, ps1, ps2)) == NULL) {
124         err_ret->error = E_NOMEM;
125         return NULL;
126     }
127     tok->filename = filename;
128     if (Ta27_TabcheckFlag || Py_VerboseFlag) {
129         tok->altwarning = (filename != NULL);
130         if (Ta27_TabcheckFlag >= 2)
131             tok->alterror++;
132     }
133 
134     return parsetok(tok, g, start, err_ret, flags);
135 }
136 
137 #if 0
138 static char with_msg[] =
139 "%s:%d: Warning: 'with' will become a reserved keyword in Python 2.6\n";
140 
141 static char as_msg[] =
142 "%s:%d: Warning: 'as' will become a reserved keyword in Python 2.6\n";
143 
144 static void
145 warn(const char *msg, const char *filename, int lineno)
146 {
147     if (filename == NULL)
148         filename = "<string>";
149     PySys_WriteStderr(msg, filename, lineno);
150 }
151 #endif
152 
153 
154 typedef struct {
155     struct {
156         int lineno;
157         char *comment;
158     } *items;
159     size_t size;
160     size_t num_items;
161 } growable_comment_array;
162 
163 static int
growable_comment_array_init(growable_comment_array * arr,size_t initial_size)164 growable_comment_array_init(growable_comment_array *arr, size_t initial_size) {
165     assert(initial_size > 0);
166     arr->items = malloc(initial_size * sizeof(*arr->items));
167     arr->size = initial_size;
168     arr->num_items = 0;
169 
170     return arr->items != NULL;
171 }
172 
173 static int
growable_comment_array_add(growable_comment_array * arr,int lineno,char * comment)174 growable_comment_array_add(growable_comment_array *arr, int lineno, char *comment) {
175     if (arr->num_items >= arr->size) {
176         arr->size *= 2;
177         arr->items = realloc(arr->items, arr->size * sizeof(*arr->items));
178         if (!arr->items) {
179             return 0;
180         }
181     }
182 
183     arr->items[arr->num_items].lineno = lineno;
184     arr->items[arr->num_items].comment = comment;
185     arr->num_items++;
186     return 1;
187 }
188 
189 static void
growable_comment_array_deallocate(growable_comment_array * arr)190 growable_comment_array_deallocate(growable_comment_array *arr) {
191     unsigned i;
192     for (i = 0; i < arr->num_items; i++) {
193         PyObject_FREE(arr->items[i].comment);
194     }
195     free(arr->items);
196 }
197 
198 
199 /* Parse input coming from the given tokenizer structure.
200    Return error code. */
201 
202 static node *
parsetok(struct tok_state * tok,grammar * g,int start,perrdetail * err_ret,int * flags)203 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret,
204          int *flags)
205 {
206     parser_state *ps;
207     node *n;
208     int started = 0;
209 
210     growable_comment_array type_ignores;
211     if (!growable_comment_array_init(&type_ignores, 10)) {
212         err_ret->error = E_NOMEM;
213         Ta27Tokenizer_Free(tok);
214         return NULL;
215     }
216 
217     if ((ps = Ta27Parser_New(g, start)) == NULL) {
218         fprintf(stderr, "no mem for new parser\n");
219         err_ret->error = E_NOMEM;
220         Ta27Tokenizer_Free(tok);
221         return NULL;
222     }
223 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
224     if (*flags & PyPARSE_PRINT_IS_FUNCTION) {
225         ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;
226     }
227     if (*flags & PyPARSE_UNICODE_LITERALS) {
228         ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;
229     }
230 
231 #endif
232 
233     for (;;) {
234         char *a, *b;
235         int type;
236         size_t len;
237         char *str;
238         int col_offset;
239 
240         type = Ta27Tokenizer_Get(tok, &a, &b);
241         if (type == ERRORTOKEN) {
242             err_ret->error = tok->done;
243             break;
244         }
245         if (type == ENDMARKER && started) {
246             type = NEWLINE; /* Add an extra newline */
247             started = 0;
248             /* Add the right number of dedent tokens,
249                except if a certain flag is given --
250                codeop.py uses this. */
251             if (tok->indent &&
252                 !(*flags & PyPARSE_DONT_IMPLY_DEDENT))
253             {
254                 tok->pendin = -tok->indent;
255                 tok->indent = 0;
256             }
257         }
258         else
259             started = 1;
260         len = b - a; /* XXX this may compute NULL - NULL */
261         str = (char *) PyObject_MALLOC(len + 1);
262         if (str == NULL) {
263             fprintf(stderr, "no mem for next token\n");
264             err_ret->error = E_NOMEM;
265             break;
266         }
267         if (len > 0)
268             strncpy(str, a, len);
269         str[len] = '\0';
270 
271 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
272 #endif
273         if (a >= tok->line_start)
274             col_offset = a - tok->line_start;
275         else
276             col_offset = -1;
277 
278         if (type == TYPE_IGNORE) {
279             if (!growable_comment_array_add(&type_ignores, tok->lineno, str)) {
280                 err_ret->error = E_NOMEM;
281                 break;
282             }
283             continue;
284         }
285 
286         if ((err_ret->error =
287              Ta27Parser_AddToken(ps, (int)type, str, tok->lineno, col_offset,
288                                &(err_ret->expected))) != E_OK) {
289             if (err_ret->error != E_DONE) {
290                 PyObject_FREE(str);
291                 err_ret->token = type;
292             }
293             break;
294         }
295     }
296 
297     if (err_ret->error == E_DONE) {
298         n = ps->p_tree;
299         ps->p_tree = NULL;
300 
301         if (n->n_type == file_input) {
302             /* Put type_ignore nodes in the ENDMARKER of file_input. */
303             int num;
304             node *ch;
305             size_t i;
306 
307             num = NCH(n);
308             ch = CHILD(n, num - 1);
309             REQ(ch, ENDMARKER);
310 
311             for (i = 0; i < type_ignores.num_items; i++) {
312                 int res = Ta27Node_AddChild(ch, TYPE_IGNORE, type_ignores.items[i].comment,
313                                             type_ignores.items[i].lineno, 0);
314                 if (res != 0) {
315                     err_ret->error = res;
316                     Ta27Node_Free(n);
317                     n = NULL;
318                     break;
319                 }
320                 type_ignores.items[i].comment = NULL;
321             }
322         }
323     }
324     else
325         n = NULL;
326 
327     growable_comment_array_deallocate(&type_ignores);
328 
329 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD
330     *flags = ps->p_flags;
331 #endif
332     Ta27Parser_Delete(ps);
333 
334     if (n == NULL) {
335         if (tok->lineno <= 1 && tok->done == E_EOF)
336             err_ret->error = E_EOF;
337         err_ret->lineno = tok->lineno;
338         if (tok->buf != NULL) {
339             char *text = NULL;
340             size_t len;
341             assert(tok->cur - tok->buf < INT_MAX);
342             err_ret->offset = (int)(tok->cur - tok->buf);
343             len = tok->inp - tok->buf;
344 #ifdef Py_USING_UNICODE
345             text = Ta27Tokenizer_RestoreEncoding(tok, len, &err_ret->offset);
346 
347 #endif
348             if (text == NULL) {
349                 text = (char *) PyObject_MALLOC(len + 1);
350                 if (text != NULL) {
351                     if (len > 0)
352                         strncpy(text, tok->buf, len);
353                     text[len] = '\0';
354                 }
355             }
356             err_ret->text = text;
357         }
358     } else if (tok->encoding != NULL) {
359         /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was
360          * allocated using PyMem_
361          */
362         node* r = Ta27Node_New(encoding_decl);
363         if (r)
364             r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1);
365         if (!r || !r->n_str) {
366             err_ret->error = E_NOMEM;
367             if (r)
368                 PyObject_FREE(r);
369             n = NULL;
370             goto done;
371         }
372         strcpy(r->n_str, tok->encoding);
373         PyMem_FREE(tok->encoding);
374         tok->encoding = NULL;
375         r->n_nchildren = 1;
376         r->n_child = n;
377         n = r;
378     }
379 
380 done:
381     Ta27Tokenizer_Free(tok);
382 
383     return n;
384 }
385 
386 static void
initerr(perrdetail * err_ret,const char * filename)387 initerr(perrdetail *err_ret, const char *filename)
388 {
389   initerr_object(err_ret, PyUnicode_FromString(filename));
390 }
391 
392 static int
initerr_object(perrdetail * err_ret,PyObject * filename)393 initerr_object(perrdetail *err_ret, PyObject *filename)
394 {
395     err_ret->error = E_OK;
396     err_ret->lineno = 0;
397     err_ret->offset = 0;
398     err_ret->text = NULL;
399     err_ret->token = -1;
400     err_ret->expected = -1;
401 #ifndef PGEN
402     if (filename) {
403         Py_INCREF(filename);
404         err_ret->filename = filename;
405     }
406     else {
407         err_ret->filename = PyUnicode_FromString("<string>");
408         if (err_ret->filename == NULL) {
409             err_ret->error = E_ERROR;
410             return -1;
411         }
412     }
413 #endif
414     return 0;
415 }
416