1 #include <setjmp.h>
2 #include <string.h>
3 
4 #include "parse.h"
5 
6 // Static precedence table, from low (loose binding, do first) to high
7 // (tight binding, do last).
8 #define MAX_PREC_TOKS 9
9 static const struct precedence {
10     int toks[MAX_PREC_TOKS];
11     int rule;
12 } PRECEDENCE[] = {
13     { { TOK_SEMI, TOK_COMMA },                 PREC_REVERSE },
14     { { TOK_ELLIPSIS },                        PREC_SUFFIX  },
15     { { TOK_RETURN, TOK_BREAK, TOK_CONTINUE }, PREC_PREFIX  },
16     { { TOK_ASSIGN, TOK_PLUSEQ, TOK_MINUSEQ,
17         TOK_MULEQ, TOK_DIVEQ, TOK_CATEQ,
18         TOK_BIT_ANDEQ, TOK_BIT_OREQ,
19         TOK_BIT_XOREQ },                       PREC_REVERSE },
20     { { TOK_COLON, TOK_QUESTION },             PREC_REVERSE },
21     { { TOK_VAR },                             PREC_PREFIX  },
22     { { TOK_BIT_OR },                          PREC_BINARY  },
23     { { TOK_BIT_XOR },                         PREC_BINARY  },
24     { { TOK_BIT_AND },                         PREC_BINARY  },
25     { { TOK_OR },                              PREC_BINARY  },
26     { { TOK_AND },                             PREC_BINARY  },
27     { { TOK_EQ, TOK_NEQ },                     PREC_BINARY  },
28     { { TOK_LT, TOK_LTE, TOK_GT, TOK_GTE },    PREC_BINARY  },
29     { { TOK_PLUS, TOK_MINUS, TOK_CAT },        PREC_BINARY  },
30     { { TOK_MUL, TOK_DIV },                    PREC_BINARY  },
31     { { TOK_MINUS, TOK_NEG, TOK_NOT,
32         TOK_CAT, TOK_BIT_NEG },                PREC_PREFIX  },
33     { { TOK_LPAR, TOK_LBRA },                  PREC_SUFFIX  },
34     { { TOK_DOT },                             PREC_BINARY  },
35 };
36 #define PRECEDENCE_LEVELS (sizeof(PRECEDENCE)/sizeof(struct precedence))
37 
naParseError(struct Parser * p,char * msg,int line)38 void naParseError(struct Parser* p, char* msg, int line)
39 {
40     if(line > 0) p->errLine = line;
41     p->err = msg;
42     longjmp(p->jumpHandle, 1);
43 }
44 
oops(struct Parser * p)45 static void oops(struct Parser* p) { naParseError(p, "parse error", -1); }
46 
naParseInit(struct Parser * p)47 void naParseInit(struct Parser* p)
48 {
49     memset(p, 0, sizeof(*p));
50     p->tree.type = TOK_TOP;
51     p->tree.line = 1;
52 }
53 
naParseDestroy(struct Parser * p)54 void naParseDestroy(struct Parser* p)
55 {
56     int i;
57     for(i=0; i<p->nChunks; i++) naFree(p->chunks[i]);
58     naFree(p->chunks);
59     naFree(p->chunkSizes);
60     p->buf = 0;
61 }
62 
naParseAlloc(struct Parser * p,int bytes)63 void* naParseAlloc(struct Parser* p, int bytes)
64 {
65     char* result;
66     bytes = (bytes+7) & (~7); // Round up to 8 byte chunks for alignment
67 
68     if(p->leftInChunk < bytes) {
69         void* newChunk;
70         void** newChunks;
71         int* newChunkSizes;
72         int sz, i;
73 
74         sz = p->len;
75         if(sz < bytes) sz = bytes;
76         newChunk = naAlloc(sz);
77 
78         p->nChunks++;
79 
80         newChunks = naAlloc(p->nChunks * sizeof(void*));
81         for(i=1; i<p->nChunks; i++) newChunks[i] = p->chunks[i-1];
82         newChunks[0] = newChunk;
83         naFree(p->chunks);
84         p->chunks = newChunks;
85 
86         newChunkSizes = naAlloc(p->nChunks * sizeof(int));
87         for(i=1; i<p->nChunks; i++) newChunkSizes[i] = p->chunkSizes[i-1];
88         newChunkSizes[0] = sz;
89         naFree(p->chunkSizes);
90         p->chunkSizes = newChunkSizes;
91 
92         p->leftInChunk = sz;
93     }
94 
95     result = (char *)p->chunks[0] + p->chunkSizes[0] - p->leftInChunk;
96     p->leftInChunk -= bytes;
97     return result;
98 }
99 
addChild(struct Token * par,struct Token * ch)100 static void addChild(struct Token *par, struct Token *ch)
101 {
102     if(par->lastChild) {
103         ch->prev = par->lastChild;
104         par->lastChild->next = ch;
105     } else
106         par->children = ch;
107     par->lastChild = ch;
108 }
109 
endBrace(int tok)110 static int endBrace(int tok)
111 {
112     if(tok == TOK_LBRA) return TOK_RBRA;
113     if(tok == TOK_LPAR) return TOK_RPAR;
114     if(tok == TOK_LCURL) return TOK_RCURL;
115     return -1;
116 }
117 
isOpenBrace(int t)118 static int isOpenBrace(int t)
119 {
120     return t==TOK_LPAR || t==TOK_LBRA || t==TOK_LCURL;
121 }
122 
isLoopoid(int t)123 static int isLoopoid(int t)
124 {
125     return t==TOK_FOR || t==TOK_FOREACH || t==TOK_WHILE || t==TOK_FORINDEX;
126 }
127 
isBlockoid(int t)128 static int isBlockoid(int t)
129 {
130     return isLoopoid(t)||t==TOK_IF||t==TOK_ELSIF||t==TOK_ELSE||t==TOK_FUNC;
131 }
132 
133 /* Yes, a bare else or elsif ends a block; it means we've reached the
134  * end of the previous if/elsif clause. */
isBlockEnd(int t)135 static int isBlockEnd(int t)
136 {
137     return t==TOK_RPAR||t==TOK_RBRA||t==TOK_RCURL||t==TOK_ELSIF||t==TOK_ELSE;
138 }
139 
140 /* To match C's grammar, "blockoid" expressions sometimes need
141  * synthesized terminating semicolons to make them act like
142  * "statements" in C.  Always add one after "loopoid"
143  * (for/foreach/while) expressions.  Add one after a func if it
144  * immediately follows an assignment, and add one after an
145  * if/elsif/else if it is the first token in an expression list */
needsSemi(struct Token * t,struct Token * next)146 static int needsSemi(struct Token* t, struct Token* next)
147 {
148     if(!next || next->type == TOK_SEMI || isBlockEnd(next->type)) return 0;
149     if(t->type == TOK_IF)   return !t->prev || t->prev->type == TOK_SEMI;
150     if(t->type == TOK_FUNC) return t->prev && t->prev->type == TOK_ASSIGN;
151     if(isLoopoid(t->type))  return 1;
152     return 0;
153 }
154 
newToken(struct Parser * p,int type)155 static struct Token* newToken(struct Parser* p, int type)
156 {
157     struct Token* t = naParseAlloc(p, sizeof(struct Token));
158     memset(t, 0, sizeof(*t));
159     t->type = type;
160     t->line = -1;
161     return t;
162 }
163 
164 static struct Token* parseToken(struct Parser* p, struct Token** list);
165 
parseBlock(struct Parser * p,struct Token * top,int end,struct Token ** list)166 static void parseBlock(struct Parser* p, struct Token *top,
167                        int end, struct Token** list)
168 {
169     struct Token *t;
170     while(*list) {
171         if(isBlockEnd((*list)->type) && (*list)->type != end) break;
172         if(end == TOK_SEMI && (*list)->type == TOK_COMMA) break;
173         t = parseToken(p, list);
174         if(t->type == end) return; /* drop end token on the floor */
175         addChild(top, t);
176         if(needsSemi(t, *list))
177             addChild(top, newToken(p, TOK_SEMI));
178     }
179     /* Context dependency: end of block is a parse error UNLESS we're
180      * looking for a statement terminator (a braceless block) or a -1
181      * (the top level) */
182     if(end != TOK_SEMI && end != -1) oops(p);
183 }
184 
parseToken(struct Parser * p,struct Token ** list)185 static struct Token* parseToken(struct Parser* p, struct Token** list)
186 {
187     struct Token *t = *list;
188     *list = t->next;
189     if(t->next) t->next->prev = 0;
190     t->next = t->prev = 0;
191     p->errLine = t->line;
192 
193     if(!t) return 0;
194     if(isOpenBrace(t->type)) {
195         parseBlock(p, t, endBrace(t->type), list);
196     } else if(isBlockoid(t->type)) {
197         /* Read an optional paren expression */
198         if(!*list) oops(p);
199         if((*list)->type == TOK_LPAR)
200             addChild(t, parseToken(p, list));
201 
202         /* And the code block, which might be implicit/braceless */
203         if(!*list) oops(p);
204         if((*list)->type == TOK_LCURL) {
205             addChild(t, parseToken(p, list));
206         } else {
207             /* Context dependency: if we're reading a braceless block,
208              * and the first (!) token is itself a "blockoid"
209              * expression, it is parsed alone, otherwise, read to the
210              * terminating semicolon. */
211             struct Token *blk = newToken(p, TOK_LCURL);
212             if(isBlockoid((*list)->type)) addChild(blk, parseToken(p, list));
213             else                          parseBlock(p, blk, TOK_SEMI, list);
214             addChild(t, blk);
215         }
216 
217         /* Read the elsif/else chain */
218         if(t->type == TOK_IF) {
219             while(*list && ((*list)->type == TOK_ELSIF))
220                 addChild(t, parseToken(p, list));
221             if(*list && (*list)->type == TOK_ELSE)
222                 addChild(t, parseToken(p, list));
223         }
224 
225         /* Finally, check for proper usage */
226         if(t->type != TOK_FUNC) {
227             if(t->type == TOK_ELSE && t->children->type != TOK_LCURL) oops(p);
228             if(t->type != TOK_ELSE && t->children->type != TOK_LPAR) oops(p);
229         }
230     }
231     return t;
232 }
233 
234 // True if the token's type exists in the precedence level.
tokInLevel(struct Token * tok,int level)235 static int tokInLevel(struct Token* tok, int level)
236 {
237     int i;
238     for(i=0; i<MAX_PREC_TOKS; i++)
239         if(PRECEDENCE[level].toks[i] == tok->type)
240             return 1;
241     return 0;
242 }
243 
244 static struct Token* parsePrecedence(struct Parser* p, struct Token* start,
245                                      struct Token* end, int level);
246 
precChildren(struct Parser * p,struct Token * t)247 static void precChildren(struct Parser* p, struct Token* t)
248 {
249     struct Token* top = parsePrecedence(p, t->children, t->lastChild, 0);
250     t->children = top;
251     t->lastChild = top;
252 }
253 
254 // Run a "block structure" node (if/elsif/else/for/while/foreach)
255 // through the precedence parser.  The funny child structure makes
256 // this a little more complicated than it should be.
precBlock(struct Parser * p,struct Token * block)257 static void precBlock(struct Parser* p, struct Token* block)
258 {
259     struct Token* t = block->children;
260     while(t) {
261         if(isOpenBrace(t->type))
262             precChildren(p, t);
263         else if(isBlockoid(t->type))
264             precBlock(p, t);
265         t = t->next;
266     }
267 }
268 
269 /* Binary tokens that get empties synthesized if one side is missing */
oneSidedBinary(int t)270 static int oneSidedBinary(int t)
271 { return t == TOK_SEMI || t ==  TOK_COMMA || t == TOK_COLON; }
272 
parsePrecedence(struct Parser * p,struct Token * start,struct Token * end,int level)273 static struct Token* parsePrecedence(struct Parser* p,
274                                      struct Token* start, struct Token* end,
275                                      int level)
276 {
277     int rule;
278     struct Token *t, *top, *left, *right;
279     struct Token *a, *b, *c, *d; // temporaries
280 
281     // This is an error.  No "siblings" are allowed at the bottom level.
282     if(level >= PRECEDENCE_LEVELS && start != end)
283         naParseError(p, "parse error", start->line);
284 
285     // Synthesize an empty token if necessary
286     if(end == 0 && start == 0)
287         return newToken(p, TOK_EMPTY);
288 
289     // Sanify the list.  This is OK, since we're recursing into the
290     // list structure; stuff to the left and right has already been
291     // handled somewhere above.
292     if(end == 0) end = start;
293     if(start == 0) start = end;
294     if(start->prev) start->prev->next = 0;
295     if(end->next) end->next->prev = 0;
296     start->prev = end->next = 0;
297 
298     // Single tokens parse as themselves.  Recurse into braces, and
299     // parse children of block structure.
300     if(start == end) {
301         if     (isOpenBrace(start->type)) precChildren(p, start);
302         else if(isBlockoid(start->type))  precBlock(p, start);
303         return start;
304     }
305 
306     if(oneSidedBinary(start->type)) {
307         t = newToken(p, TOK_EMPTY);
308         start->prev = t;
309         t->next = start;
310         start = t;
311     }
312     if(oneSidedBinary(end->type)) {
313         t = newToken(p, TOK_EMPTY);
314         end->next = t;
315         t->prev = end;
316         end = t;
317     }
318 
319     // Another one: the "." and (postfix) "[]/()" operators should
320     // really be the same precendence level, but the existing
321     // implementation doesn't allow for it.  Bump us up a level if we
322     // are parsing for DOT but find a LPAR/LBRA at the end of the
323     // list.
324     if(PRECEDENCE[level].toks[0] == TOK_DOT)
325         if(end->type == TOK_LPAR || end->type == TOK_LBRA)
326             level--;
327 
328     top = left = right = 0;
329     rule = PRECEDENCE[level].rule;
330     switch(rule) {
331     case PREC_PREFIX:
332         if(tokInLevel(start, level) && start->next) {
333             a = start->children;
334             b = start->lastChild;
335             c = start->next;
336             d = end;
337             top = start;
338             if(a) left = parsePrecedence(p, a, b, 0);
339             right = parsePrecedence(p, c, d, level);
340         }
341         break;
342     case PREC_SUFFIX:
343         if(tokInLevel(end, level) && end->prev) {
344             a = start;
345             b = end->prev;
346             c = end->children;
347             d = end->lastChild;
348             top = end;
349             left = parsePrecedence(p, a, b, level);
350             if(c) right = parsePrecedence(p, c, d, 0);
351         }
352         break;
353     case PREC_BINARY:
354         t = end->prev;
355         while(t->prev) {
356             if(tokInLevel(t, level)) {
357                 a = t->prev ? start : 0;
358                 b = t->prev;
359                 c = t->next;
360                 d = t->next ? end : 0;
361                 top = t;
362                 left = parsePrecedence(p, a, b, level);
363                 right = parsePrecedence(p, c, d, level+1);
364                 break;
365             }
366             t = t->prev;
367         }
368         break;
369     case PREC_REVERSE:
370         t = start->next;
371         while(t->next) {
372             if(tokInLevel(t, level)) {
373                 a = t->prev ? start : 0;
374                 b = t->prev;
375                 c = t->next;
376                 d = t->next ? end : 0;
377                 top = t;
378                 left = parsePrecedence(p, a, b, level+1);
379                 right = parsePrecedence(p, c, d, level);
380                 break;
381             }
382             t = t->next;
383         }
384         break;
385     }
386 
387     // Found nothing, try the next level
388     if(!top)
389         return parsePrecedence(p, start, end, level+1);
390 
391     top->rule = rule;
392 
393     if(left) {
394         left->next = right;
395         left->prev = 0;
396     }
397     top->children = left;
398 
399     if(right) {
400         right->next = 0;
401         right->prev = left;
402     }
403     top->lastChild = right;
404 
405     top->next = top->prev = 0;
406     return top;
407 }
408 
naParseCode(struct Context * c,naRef srcFile,int firstLine,char * buf,int len,int * errLine)409 naRef naParseCode(struct Context* c, naRef srcFile, int firstLine,
410                   char* buf, int len, int* errLine)
411 {
412     naRef codeObj;
413     struct Token* t;
414     struct Parser p;
415 
416     // Protect from garbage collection
417     naTempSave(c, srcFile);
418 
419     naParseInit(&p);
420 
421     // Catch parser errors here.
422     p.errLine = *errLine = 1;
423     if (setjmp(p.jumpHandle)) {
424         size_t end_ = sizeof(c->error) - 1;
425         strncpy(c->error, p.err, end_);
426         c->error[end_] = '\0';
427 
428         *errLine = p.errLine;
429         naParseDestroy(&p);
430 
431         return naNil();
432     }
433 
434     p.context = c;
435     p.srcFile = srcFile;
436     p.firstLine = firstLine;
437     p.buf = buf;
438     p.len = len;
439 
440     // Lexify, match brace structure, fixup if/for/etc...
441     naLex(&p);
442 
443     // Run the block parser, make sure everything was eaten
444     t = p.tree.children;
445     p.tree.children = p.tree.lastChild = 0;
446     parseBlock(&p, &p.tree, -1, &t);
447     if(t) oops(&p);
448 
449     // Recursively run the precedence parser, and fixup the treetop
450     t = parsePrecedence(&p, p.tree.children, p.tree.lastChild, 0);
451     t->prev = t->next = 0;
452     p.tree.children = t;
453     p.tree.lastChild = t;
454 
455     // Generate code
456     codeObj = naCodeGen(&p, &(p.tree), 0);
457 
458     // Clean up our mess
459     naParseDestroy(&p);
460     naTempSave(c, codeObj);
461 
462     return codeObj;
463 }
464