1 /*
2  * Copyright 2015 WebAssembly Community Group participants
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *     http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 // Pure parsing. Calls methods on a Builder (template argument) to actually
18 // construct the AST
19 //
20 // XXX All parsing methods assume they take ownership of the input string. This
21 //     lets them reuse parts of it. You will segfault if the input string cannot
22 //     be reused and written to.
23 
24 #ifndef wasm_parser_h
25 #define wasm_parser_h
26 
27 #include <algorithm>
28 #include <cstdio>
29 #include <iostream>
30 #include <limits>
31 #include <vector>
32 
33 #include "istring.h"
34 #include "support/safe_integer.h"
35 
36 namespace cashew {
37 
38 // common strings
39 
40 extern IString TOPLEVEL;
41 extern IString DEFUN;
42 extern IString BLOCK;
43 extern IString VAR;
44 extern IString CONST;
45 extern IString CONDITIONAL;
46 extern IString BINARY;
47 extern IString RETURN;
48 extern IString IF;
49 extern IString ELSE;
50 extern IString WHILE;
51 extern IString DO;
52 extern IString FOR;
53 extern IString SEQ;
54 extern IString SUB;
55 extern IString CALL;
56 extern IString LABEL;
57 extern IString BREAK;
58 extern IString CONTINUE;
59 extern IString SWITCH;
60 extern IString STRING;
61 extern IString TRY;
62 extern IString INF;
63 extern IString NaN;
64 extern IString LLVM_CTTZ_I32;
65 extern IString UDIVMODDI4;
66 extern IString UNARY_PREFIX;
67 extern IString UNARY_POSTFIX;
68 extern IString MATH_FROUND;
69 extern IString MATH_CLZ32;
70 extern IString INT64;
71 extern IString INT64_CONST;
72 extern IString SIMD_FLOAT32X4;
73 extern IString SIMD_FLOAT64X2;
74 extern IString SIMD_INT8X16;
75 extern IString SIMD_INT16X8;
76 extern IString SIMD_INT32X4;
77 extern IString PLUS;
78 extern IString MINUS;
79 extern IString OR;
80 extern IString AND;
81 extern IString XOR;
82 extern IString L_NOT;
83 extern IString B_NOT;
84 extern IString LT;
85 extern IString GE;
86 extern IString LE;
87 extern IString GT;
88 extern IString EQ;
89 extern IString NE;
90 extern IString DIV;
91 extern IString MOD;
92 extern IString MUL;
93 extern IString RSHIFT;
94 extern IString LSHIFT;
95 extern IString TRSHIFT;
96 extern IString HEAP8;
97 extern IString HEAP16;
98 extern IString HEAP32;
99 extern IString HEAPF32;
100 extern IString HEAPU8;
101 extern IString HEAPU16;
102 extern IString HEAPU32;
103 extern IString HEAPF64;
104 extern IString F0;
105 extern IString EMPTY;
106 extern IString FUNCTION;
107 extern IString OPEN_PAREN;
108 extern IString OPEN_BRACE;
109 extern IString OPEN_CURLY;
110 extern IString CLOSE_CURLY;
111 extern IString COMMA;
112 extern IString QUESTION;
113 extern IString COLON;
114 extern IString CASE;
115 extern IString DEFAULT;
116 extern IString DOT;
117 extern IString PERIOD;
118 extern IString NEW;
119 extern IString ARRAY;
120 extern IString OBJECT;
121 extern IString THROW;
122 extern IString SET;
123 extern IString ATOMICS;
124 extern IString COMPARE_EXCHANGE;
125 extern IString LOAD;
126 extern IString STORE;
127 
128 extern IStringSet keywords;
129 
130 extern const char *OPERATOR_INITS, *SEPARATORS;
131 
132 extern int MAX_OPERATOR_SIZE, LOWEST_PREC;
133 
134 struct OperatorClass {
135   enum Type { Binary = 0, Prefix = 1, Postfix = 2, Tertiary = 3 };
136 
137   IStringSet ops;
138   bool rtl;
139   Type type;
140 
OperatorClassOperatorClass141   OperatorClass(const char* o, bool r, Type t) : ops(o), rtl(r), type(t) {}
142 
143   static int getPrecedence(Type type, IString op);
144   static bool getRtl(int prec);
145 };
146 
147 extern std::vector<OperatorClass> operatorClasses;
148 
149 extern bool isIdentInit(char x);
150 extern bool isIdentPart(char x);
151 
152 // parser
153 
154 template<class NodeRef, class Builder> class Parser {
155 
isSpace(char x)156   static bool isSpace(char x) {
157     return x == 32 || x == 9 || x == 10 || x == 13;
158   } /* space, tab, linefeed/newline, or return */
skipSpace(char * & curr)159   static void skipSpace(char*& curr) {
160     while (*curr) {
161       if (isSpace(*curr)) {
162         curr++;
163         continue;
164       }
165       if (curr[0] == '/' && curr[1] == '/') {
166         curr += 2;
167         while (*curr && *curr != '\n') {
168           curr++;
169         }
170         if (*curr) {
171           curr++;
172         }
173         continue;
174       }
175       if (curr[0] == '/' && curr[1] == '*') {
176         curr += 2;
177         while (*curr && (curr[0] != '*' || curr[1] != '/')) {
178           curr++;
179         }
180         curr += 2;
181         continue;
182       }
183       return;
184     }
185   }
186 
isDigit(char x)187   static bool isDigit(char x) { return x >= '0' && x <= '9'; }
188 
hasChar(const char * list,char x)189   static bool hasChar(const char* list, char x) {
190     while (*list) {
191       if (*list++ == x) {
192         return true;
193       }
194     }
195     return false;
196   }
197 
198   // An atomic fragment of something. Stops at a natural boundary.
199   enum FragType {
200     KEYWORD = 0,
201     OPERATOR = 1,
202     IDENT = 2,
203     STRING = 3, // without quotes
204     INT = 4,
205     DOUBLE = 5,
206     SEPARATOR = 6
207   };
208 
209   struct Frag {
210   // MSVC does not allow unrestricted unions:
211   // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf
212 #ifndef _MSC_VER
213     union {
214 #endif
215       IString str;
216       double num;
217 #ifndef _MSC_VER
218     };
219 #endif
220     int size;
221     FragType type;
222 
isNumberFrag223     bool isNumber() const { return type == INT || type == DOUBLE; }
224 
FragFrag225     explicit Frag(char* src) {
226       char* start = src;
227       if (isIdentInit(*src)) {
228         // read an identifier or a keyword
229         src++;
230         while (isIdentPart(*src)) {
231           src++;
232         }
233         if (*src == 0) {
234           str.set(start);
235         } else {
236           char temp = *src;
237           *src = 0;
238           str.set(start, false);
239           *src = temp;
240         }
241         type = keywords.has(str) ? KEYWORD : IDENT;
242       } else if (isDigit(*src) || (src[0] == '.' && isDigit(src[1]))) {
243         if (src[0] == '0' && (src[1] == 'x' || src[1] == 'X')) {
244           // Explicitly parse hex numbers of form "0x...", because strtod
245           // supports hex number strings only in C++11, and Visual Studio 2013
246           // does not yet support that functionality.
247           src += 2;
248           num = 0;
249           while (1) {
250             if (*src >= '0' && *src <= '9') {
251               num *= 16;
252               num += *src - '0';
253             } else if (*src >= 'a' && *src <= 'f') {
254               num *= 16;
255               num += *src - 'a' + 10;
256             } else if (*src >= 'A' && *src <= 'F') {
257               num *= 16;
258               num += *src - 'A' + 10;
259             } else {
260               break;
261             }
262             src++;
263           }
264         } else {
265           num = strtod(start, &src);
266         }
267         // asm.js must have a '.' for double values. however, we also tolerate
268         // uglify's tendency to emit without a '.' (and fix it later with a +).
269         // for valid asm.js input, the '.' should be enough, and for uglify
270         // in the emscripten optimizer pipeline, we use simple_ast where
271         // INT/DOUBLE is quite the same at this point anyhow
272         type = (std::find(start, src, '.') == src &&
273                 (wasm::isSInteger32(num) || wasm::isUInteger32(num)))
274                  ? INT
275                  : DOUBLE;
276         assert(src > start);
277       } else if (hasChar(OPERATOR_INITS, *src)) {
278         switch (*src) {
279           case '!':
280             str = src[1] == '=' ? NE : L_NOT;
281             break;
282           case '%':
283             str = MOD;
284             break;
285           case '&':
286             str = AND;
287             break;
288           case '*':
289             str = MUL;
290             break;
291           case '+':
292             str = PLUS;
293             break;
294           case ',':
295             str = COMMA;
296             break;
297           case '-':
298             str = MINUS;
299             break;
300           case '.':
301             str = PERIOD;
302             break;
303           case '/':
304             str = DIV;
305             break;
306           case ':':
307             str = COLON;
308             break;
309           case '<':
310             str = src[1] == '<' ? LSHIFT : (src[1] == '=' ? LE : LT);
311             break;
312           case '=':
313             str = src[1] == '=' ? EQ : SET;
314             break;
315           case '>':
316             str = src[1] == '>' ? (src[2] == '>' ? TRSHIFT : RSHIFT)
317                                 : (src[1] == '=' ? GE : GT);
318             break;
319           case '?':
320             str = QUESTION;
321             break;
322           case '^':
323             str = XOR;
324             break;
325           case '|':
326             str = OR;
327             break;
328           case '~':
329             str = B_NOT;
330             break;
331           default:
332             abort();
333         }
334         size = strlen(str.str);
335 #ifndef NDEBUG
336         char temp = start[size];
337         start[size] = 0;
338         assert(strcmp(str.str, start) == 0);
339         start[size] = temp;
340 #endif
341         type = OPERATOR;
342         return;
343       } else if (hasChar(SEPARATORS, *src)) {
344         type = SEPARATOR;
345         char temp = src[1];
346         src[1] = 0;
347         str.set(src, false);
348         src[1] = temp;
349         src++;
350       } else if (*src == '"' || *src == '\'') {
351         char* end = strchr(src + 1, *src);
352         *end = 0;
353         str.set(src + 1);
354         src = end + 1;
355         type = STRING;
356       } else {
357         dump("frag parsing", src);
358         abort();
359       }
360       size = src - start;
361     }
362   };
363 
364   struct ExpressionElement {
365     bool isNode;
366     // MSVC does not allow unrestricted unions:
367     // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf
368 #ifndef _MSC_VER
369     union {
370 #endif
371       NodeRef node;
372       IString op;
373 #ifndef _MSC_VER
374     };
375 #endif
ExpressionElementExpressionElement376     ExpressionElement(NodeRef n) : isNode(true), node(n) {}
ExpressionElementExpressionElement377     ExpressionElement(IString o) : isNode(false), op(o) {}
378 
getNodeExpressionElement379     NodeRef getNode() {
380       assert(isNode);
381       return node;
382     }
getOpExpressionElement383     IString getOp() {
384       assert(!isNode);
385       return op;
386     }
387   };
388 
389   // This is a list of the current stack of node-operator-node-operator-etc.
390   // this works by each parseExpression call appending to the vector; then
391   // recursing out, and the toplevel sorts it all
392   typedef std::vector<ExpressionElement> ExpressionParts;
393   std::vector<ExpressionParts> expressionPartsStack;
394 
395   // Parses an element in a list of such elements, e.g. list of statements in a
396   // block, or list of parameters in a call
397   NodeRef parseElement(char*& src, const char* seps = ";") {
398     // dump("parseElement", src);
399     skipSpace(src);
400     Frag frag(src);
401     src += frag.size;
402     switch (frag.type) {
403       case KEYWORD: {
404         return parseAfterKeyword(frag, src, seps);
405       }
406       case IDENT: {
407         return parseAfterIdent(frag, src, seps);
408       }
409       case STRING:
410       case INT:
411       case DOUBLE: {
412         return parseExpression(parseFrag(frag), src, seps);
413       }
414       case SEPARATOR: {
415         if (frag.str == OPEN_PAREN) {
416           return parseExpression(parseAfterParen(src), src, seps);
417         }
418         if (frag.str == OPEN_BRACE) {
419           return parseExpression(parseAfterBrace(src), src, seps);
420         }
421         if (frag.str == OPEN_CURLY) {
422           return parseExpression(parseAfterCurly(src), src, seps);
423         }
424         abort();
425       }
426       case OPERATOR: {
427         return parseExpression(frag.str, src, seps);
428       }
429       default:
430         /* dump("parseElement", src); printf("bad frag type: %d\n",frag.type);
431          */
432         abort();
433     }
434     return nullptr;
435   }
436 
parseFrag(Frag & frag)437   NodeRef parseFrag(Frag& frag) {
438     switch (frag.type) {
439       case IDENT:
440         return Builder::makeName(frag.str);
441       case STRING:
442         return Builder::makeString(frag.str);
443       case INT:
444         return Builder::makeInt(uint32_t(frag.num));
445       case DOUBLE:
446         return Builder::makeDouble(frag.num);
447       default:
448         abort();
449     }
450     return nullptr;
451   }
452 
parseAfterKeyword(Frag & frag,char * & src,const char * seps)453   NodeRef parseAfterKeyword(Frag& frag, char*& src, const char* seps) {
454     skipSpace(src);
455     if (frag.str == FUNCTION) {
456       return parseFunction(src, seps);
457     } else if (frag.str == VAR) {
458       return parseVar(src, seps, false);
459     } else if (frag.str == CONST) {
460       return parseVar(src, seps, true);
461     } else if (frag.str == RETURN) {
462       return parseReturn(src, seps);
463     } else if (frag.str == IF) {
464       return parseIf(src, seps);
465     } else if (frag.str == DO) {
466       return parseDo(src, seps);
467     } else if (frag.str == WHILE) {
468       return parseWhile(src, seps);
469     } else if (frag.str == BREAK) {
470       return parseBreak(src, seps);
471     } else if (frag.str == CONTINUE) {
472       return parseContinue(src, seps);
473     } else if (frag.str == SWITCH) {
474       return parseSwitch(src, seps);
475     } else if (frag.str == NEW) {
476       return parseNew(src, seps);
477     } else if (frag.str == FOR) {
478       return parseFor(src, seps);
479     }
480     dump(frag.str.str, src);
481     abort();
482     return nullptr;
483   }
484 
parseFunction(char * & src,const char * seps)485   NodeRef parseFunction(char*& src, const char* seps) {
486     Frag name(src);
487     if (name.type == IDENT) {
488       src += name.size;
489     } else {
490       assert(name.type == SEPARATOR && name.str[0] == '(');
491       name.str = IString();
492     }
493     NodeRef ret = Builder::makeFunction(name.str);
494     skipSpace(src);
495     assert(*src == '(');
496     src++;
497     while (1) {
498       skipSpace(src);
499       if (*src == ')') {
500         break;
501       }
502       Frag arg(src);
503       assert(arg.type == IDENT);
504       src += arg.size;
505       Builder::appendArgumentToFunction(ret, arg.str);
506       skipSpace(src);
507       if (*src == ')') {
508         break;
509       }
510       if (*src == ',') {
511         src++;
512         continue;
513       }
514       abort();
515     }
516     src++;
517     Builder::setBlockContent(ret, parseBracketedBlock(src));
518     // TODO: parse expression?
519     return ret;
520   }
521 
parseVar(char * & src,const char * seps,bool is_const)522   NodeRef parseVar(char*& src, const char* seps, bool is_const) {
523     NodeRef ret = Builder::makeVar(is_const);
524     while (1) {
525       skipSpace(src);
526       if (*src == ';') {
527         break;
528       }
529       Frag name(src);
530       assert(name.type == IDENT);
531       NodeRef value;
532       src += name.size;
533       skipSpace(src);
534       if (*src == '=') {
535         src++;
536         skipSpace(src);
537         value = parseElement(src, ";,");
538       }
539       Builder::appendToVar(ret, name.str, value);
540       skipSpace(src);
541       if (*src == ';') {
542         break;
543       }
544       if (*src == ',') {
545         src++;
546         continue;
547       }
548       abort();
549     }
550     src++;
551     return ret;
552   }
553 
parseReturn(char * & src,const char * seps)554   NodeRef parseReturn(char*& src, const char* seps) {
555     skipSpace(src);
556     NodeRef value = !hasChar(seps, *src) ? parseElement(src, seps) : nullptr;
557     skipSpace(src);
558     assert(hasChar(seps, *src));
559     if (*src == ';') {
560       src++;
561     }
562     return Builder::makeReturn(value);
563   }
564 
parseIf(char * & src,const char * seps)565   NodeRef parseIf(char*& src, const char* seps) {
566     NodeRef condition = parseParenned(src);
567     NodeRef ifTrue = parseMaybeBracketed(src, seps);
568     skipSpace(src);
569     NodeRef ifFalse;
570     if (!hasChar(seps, *src)) {
571       Frag next(src);
572       if (next.type == KEYWORD && next.str == ELSE) {
573         src += next.size;
574         ifFalse = parseMaybeBracketed(src, seps);
575       }
576     }
577     return Builder::makeIf(condition, ifTrue, ifFalse);
578   }
579 
parseDo(char * & src,const char * seps)580   NodeRef parseDo(char*& src, const char* seps) {
581     NodeRef body = parseMaybeBracketed(src, seps);
582     skipSpace(src);
583     Frag next(src);
584     assert(next.type == KEYWORD && next.str == WHILE);
585     src += next.size;
586     NodeRef condition = parseParenned(src);
587     return Builder::makeDo(body, condition);
588   }
589 
parseWhile(char * & src,const char * seps)590   NodeRef parseWhile(char*& src, const char* seps) {
591     NodeRef condition = parseParenned(src);
592     NodeRef body = parseMaybeBracketed(src, seps);
593     return Builder::makeWhile(condition, body);
594   }
595 
parseFor(char * & src,const char * seps)596   NodeRef parseFor(char*& src, const char* seps) {
597     skipSpace(src);
598     assert(*src == '(');
599     src++;
600     NodeRef init = parseElement(src, ";");
601     skipSpace(src);
602     assert(*src == ';');
603     src++;
604     NodeRef condition = parseElement(src, ";");
605     skipSpace(src);
606     assert(*src == ';');
607     src++;
608     NodeRef inc = parseElement(src, ")");
609     skipSpace(src);
610     assert(*src == ')');
611     src++;
612     NodeRef body = parseMaybeBracketed(src, seps);
613     return Builder::makeFor(init, condition, inc, body);
614   }
615 
parseBreak(char * & src,const char * seps)616   NodeRef parseBreak(char*& src, const char* seps) {
617     skipSpace(src);
618     Frag next(src);
619     if (next.type == IDENT) {
620       src += next.size;
621     }
622     return Builder::makeBreak(next.type == IDENT ? next.str : IString());
623   }
624 
parseContinue(char * & src,const char * seps)625   NodeRef parseContinue(char*& src, const char* seps) {
626     skipSpace(src);
627     Frag next(src);
628     if (next.type == IDENT) {
629       src += next.size;
630     }
631     return Builder::makeContinue(next.type == IDENT ? next.str : IString());
632   }
633 
parseSwitch(char * & src,const char * seps)634   NodeRef parseSwitch(char*& src, const char* seps) {
635     NodeRef ret = Builder::makeSwitch(parseParenned(src));
636     skipSpace(src);
637     assert(*src == '{');
638     src++;
639     while (1) {
640       // find all cases and possibly a default
641       skipSpace(src);
642       if (*src == '}') {
643         break;
644       }
645       Frag next(src);
646       if (next.type == KEYWORD) {
647         if (next.str == CASE) {
648           src += next.size;
649           skipSpace(src);
650           NodeRef arg;
651           Frag value(src);
652           if (value.isNumber()) {
653             arg = parseFrag(value);
654             src += value.size;
655           } else if (value.type == OPERATOR) {
656             // negative number
657             assert(value.str == MINUS);
658             src += value.size;
659             skipSpace(src);
660             Frag value2(src);
661             assert(value2.isNumber());
662             arg = Builder::makePrefix(MINUS, parseFrag(value2));
663             src += value2.size;
664           } else {
665             // identifier and function call
666             assert(value.type == IDENT);
667             src += value.size;
668             skipSpace(src);
669             arg = parseCall(parseFrag(value), src);
670           }
671           Builder::appendCaseToSwitch(ret, arg);
672           skipSpace(src);
673           assert(*src == ':');
674           src++;
675           continue;
676         } else if (next.str == DEFAULT) {
677           src += next.size;
678           Builder::appendDefaultToSwitch(ret);
679           skipSpace(src);
680           assert(*src == ':');
681           src++;
682           continue;
683         }
684         // otherwise, may be some keyword that happens to start a block (e.g.
685         // case 1: _return_ 5)
686       }
687       // not case X: or default: or }, so must be some code
688       skipSpace(src);
689       bool explicitBlock = *src == '{';
690       NodeRef subBlock = explicitBlock ? parseBracketedBlock(src)
691                                        : parseBlock(src, ";}", CASE, DEFAULT);
692       Builder::appendCodeToSwitch(ret, subBlock, explicitBlock);
693     }
694     skipSpace(src);
695     assert(*src == '}');
696     src++;
697     return ret;
698   }
699 
parseNew(char * & src,const char * seps)700   NodeRef parseNew(char*& src, const char* seps) {
701     return Builder::makeNew(parseElement(src, seps));
702   }
703 
parseAfterIdent(Frag & frag,char * & src,const char * seps)704   NodeRef parseAfterIdent(Frag& frag, char*& src, const char* seps) {
705     skipSpace(src);
706     if (*src == '(') {
707       return parseExpression(parseCall(parseFrag(frag), src), src, seps);
708     }
709     if (*src == '[') {
710       return parseExpression(parseIndexing(parseFrag(frag), src), src, seps);
711     }
712     if (*src == ':' && expressionPartsStack.back().size() == 0) {
713       src++;
714       skipSpace(src);
715       NodeRef inner;
716       if (*src == '{') {
717         // context lets us know this is not an object, but a block
718         inner = parseBracketedBlock(src);
719       } else {
720         inner = parseElement(src, seps);
721       }
722       return Builder::makeLabel(frag.str, inner);
723     }
724     if (*src == '.') {
725       return parseExpression(parseDotting(parseFrag(frag), src), src, seps);
726     }
727     return parseExpression(parseFrag(frag), src, seps);
728   }
729 
parseCall(NodeRef target,char * & src)730   NodeRef parseCall(NodeRef target, char*& src) {
731     expressionPartsStack.resize(expressionPartsStack.size() + 1);
732     assert(*src == '(');
733     src++;
734     NodeRef ret = Builder::makeCall(target);
735     while (1) {
736       skipSpace(src);
737       if (*src == ')') {
738         break;
739       }
740       Builder::appendToCall(ret, parseElement(src, ",)"));
741       skipSpace(src);
742       if (*src == ')') {
743         break;
744       }
745       if (*src == ',') {
746         src++;
747         continue;
748       }
749       abort();
750     }
751     src++;
752     assert(expressionPartsStack.back().size() == 0);
753     expressionPartsStack.pop_back();
754     return ret;
755   }
756 
parseIndexing(NodeRef target,char * & src)757   NodeRef parseIndexing(NodeRef target, char*& src) {
758     expressionPartsStack.resize(expressionPartsStack.size() + 1);
759     assert(*src == '[');
760     src++;
761     NodeRef ret = Builder::makeIndexing(target, parseElement(src, "]"));
762     skipSpace(src);
763     assert(*src == ']');
764     src++;
765     assert(expressionPartsStack.back().size() == 0);
766     expressionPartsStack.pop_back();
767     return ret;
768   }
769 
parseDotting(NodeRef target,char * & src)770   NodeRef parseDotting(NodeRef target, char*& src) {
771     assert(*src == '.');
772     src++;
773     Frag key(src);
774     assert(key.type == IDENT);
775     src += key.size;
776     return Builder::makeDot(target, key.str);
777   }
778 
parseAfterParen(char * & src)779   NodeRef parseAfterParen(char*& src) {
780     expressionPartsStack.resize(expressionPartsStack.size() + 1);
781     skipSpace(src);
782     NodeRef ret = parseElement(src, ")");
783     skipSpace(src);
784     assert(*src == ')');
785     src++;
786     assert(expressionPartsStack.back().size() == 0);
787     expressionPartsStack.pop_back();
788     return ret;
789   }
790 
parseAfterBrace(char * & src)791   NodeRef parseAfterBrace(char*& src) {
792     expressionPartsStack.resize(expressionPartsStack.size() + 1);
793     NodeRef ret = Builder::makeArray();
794     while (1) {
795       skipSpace(src);
796       assert(*src);
797       if (*src == ']') {
798         break;
799       }
800       NodeRef element = parseElement(src, ",]");
801       Builder::appendToArray(ret, element);
802       skipSpace(src);
803       if (*src == ']') {
804         break;
805       }
806       if (*src == ',') {
807         src++;
808         continue;
809       }
810       abort();
811     }
812     src++;
813     return ret;
814   }
815 
parseAfterCurly(char * & src)816   NodeRef parseAfterCurly(char*& src) {
817     expressionPartsStack.resize(expressionPartsStack.size() + 1);
818     NodeRef ret = Builder::makeObject();
819     while (1) {
820       skipSpace(src);
821       assert(*src);
822       if (*src == '}') {
823         break;
824       }
825       Frag key(src);
826       assert(key.type == IDENT || key.type == STRING);
827       src += key.size;
828       skipSpace(src);
829       assert(*src == ':');
830       src++;
831       NodeRef value = parseElement(src, ",}");
832       Builder::appendToObject(ret, key.str, value);
833       skipSpace(src);
834       if (*src == '}') {
835         break;
836       }
837       if (*src == ',') {
838         src++;
839         continue;
840       }
841       abort();
842     }
843     src++;
844     return ret;
845   }
846 
dumpParts(ExpressionParts & parts,int i)847   void dumpParts(ExpressionParts& parts, int i) {
848     printf("expressionparts: %d (at %d)\n", parts.size(), i);
849     printf("| ");
850     for (int i = 0; i < parts.size(); i++) {
851       if (parts[i].isNode) {
852         parts[i].getNode()->stringify(std::cout);
853         printf("    ");
854       } else {
855         printf("    _%s_    ", parts[i].getOp().str);
856       }
857     }
858     printf("|\n");
859   }
860 
makeBinary(NodeRef left,IString op,NodeRef right)861   NodeRef makeBinary(NodeRef left, IString op, NodeRef right) {
862     if (op == PERIOD) {
863       return Builder::makeDot(left, right);
864     } else {
865       return Builder::makeBinary(left, op, right);
866     }
867   }
868 
869   NodeRef
parseExpression(ExpressionElement initial,char * & src,const char * seps)870   parseExpression(ExpressionElement initial, char*& src, const char* seps) {
871     // dump("parseExpression", src);
872     ExpressionParts& parts = expressionPartsStack.back();
873     skipSpace(src);
874     if (*src == 0 || hasChar(seps, *src)) {
875       if (parts.size() > 0) {
876         parts.push_back(initial); // cherry on top of the cake
877       }
878       return initial.getNode();
879     }
880     bool top = parts.size() == 0;
881     if (initial.isNode) {
882       Frag next(src);
883       if (next.type == OPERATOR) {
884         parts.push_back(initial);
885         src += next.size;
886         parts.push_back(next.str);
887       } else {
888         if (*src == '(') {
889           initial = parseCall(initial.getNode(), src);
890         } else if (*src == '[') {
891           initial = parseIndexing(initial.getNode(), src);
892         } else {
893           dump("bad parseExpression state", src);
894           abort();
895         }
896         return parseExpression(initial, src, seps);
897       }
898     } else {
899       parts.push_back(initial);
900     }
901     NodeRef last = parseElement(src, seps);
902     if (!top) {
903       return last;
904     }
905     {
906       // |parts| may have been invalidated by that call
907       ExpressionParts& parts = expressionPartsStack.back();
908       // we are the toplevel. sort it all out
909       // collapse right to left, highest priority first
910       // dumpParts(parts, 0);
911       for (auto& ops : operatorClasses) {
912         if (ops.rtl) {
913           // right to left
914           for (int i = parts.size() - 1; i >= 0; i--) {
915             if (parts[i].isNode) {
916               continue;
917             }
918             IString op = parts[i].getOp();
919             if (!ops.ops.has(op)) {
920               continue;
921             }
922             if (ops.type == OperatorClass::Binary && i > 0 &&
923                 i < (int)parts.size() - 1) {
924               parts[i] =
925                 makeBinary(parts[i - 1].getNode(), op, parts[i + 1].getNode());
926               parts.erase(parts.begin() + i + 1);
927               parts.erase(parts.begin() + i - 1);
928             } else if (ops.type == OperatorClass::Prefix &&
929                        i < (int)parts.size() - 1) {
930               if (i > 0 && parts[i - 1].isNode) {
931                 // cannot apply prefix operator if it would join two nodes
932                 continue;
933               }
934               parts[i] = Builder::makePrefix(op, parts[i + 1].getNode());
935               parts.erase(parts.begin() + i + 1);
936             } else if (ops.type == OperatorClass::Tertiary) {
937               // we must be at  X ? Y : Z
938               //                      ^
939               // dumpParts(parts, i);
940               if (op != COLON) {
941                 continue;
942               }
943               assert(i < (int)parts.size() - 1 && i >= 3);
944               if (parts[i - 2].getOp() != QUESTION) {
945                 continue; // e.g. x ? y ? 1 : 0 : 2
946               }
947               parts[i - 3] = Builder::makeConditional(parts[i - 3].getNode(),
948                                                       parts[i - 1].getNode(),
949                                                       parts[i + 1].getNode());
950               parts.erase(parts.begin() + i - 2, parts.begin() + i + 2);
951               // basically a reset, due to things like x ? y ? 1 : 0 : 2
952               i = parts.size();
953             } // TODO: postfix
954           }
955         } else {
956           // left to right
957           for (int i = 0; i < (int)parts.size(); i++) {
958             if (parts[i].isNode) {
959               continue;
960             }
961             IString op = parts[i].getOp();
962             if (!ops.ops.has(op)) {
963               continue;
964             }
965             if (ops.type == OperatorClass::Binary && i > 0 &&
966                 i < (int)parts.size() - 1) {
967               parts[i] =
968                 makeBinary(parts[i - 1].getNode(), op, parts[i + 1].getNode());
969               parts.erase(parts.begin() + i + 1);
970               parts.erase(parts.begin() + i - 1);
971               i--;
972             } else if (ops.type == OperatorClass::Prefix &&
973                        i < (int)parts.size() - 1) {
974               if (i > 0 && parts[i - 1].isNode) {
975                 // cannot apply prefix operator if it would join two nodes
976                 continue;
977               }
978               parts[i] = Builder::makePrefix(op, parts[i + 1].getNode());
979               parts.erase(parts.begin() + i + 1);
980               // allow a previous prefix operator to cascade
981               i = std::max(i - 2, 0);
982             } // TODO: tertiary, postfix
983           }
984         }
985       }
986       assert(parts.size() == 1);
987       NodeRef ret = parts[0].getNode();
988       parts.clear();
989       return ret;
990     }
991   }
992 
993   // Parses a block of code (e.g. a bunch of statements inside {,}, or the top
994   // level of o file)
995   NodeRef parseBlock(char*& src,
996                      const char* seps = ";",
997                      IString keywordSep1 = IString(),
998                      IString keywordSep2 = IString()) {
999     NodeRef block = Builder::makeBlock();
1000     // dump("parseBlock", src);
1001     while (1) {
1002       skipSpace(src);
1003       if (*src == 0) {
1004         break;
1005       }
1006       if (*src == ';') {
1007         src++; // skip a statement in this block
1008         continue;
1009       }
1010       if (hasChar(seps, *src)) {
1011         break;
1012       }
1013       if (!!keywordSep1) {
1014         Frag next(src);
1015         if (next.type == KEYWORD && next.str == keywordSep1) {
1016           break;
1017         }
1018       }
1019       if (!!keywordSep2) {
1020         Frag next(src);
1021         if (next.type == KEYWORD && next.str == keywordSep2) {
1022           break;
1023         }
1024       }
1025       NodeRef element = parseElementOrStatement(src, seps);
1026       Builder::appendToBlock(block, element);
1027     }
1028     return block;
1029   }
1030 
parseBracketedBlock(char * & src)1031   NodeRef parseBracketedBlock(char*& src) {
1032     skipSpace(src);
1033     assert(*src == '{');
1034     src++;
1035     // the two are not symmetrical, ; is just internally separating, } is the
1036     // final one - parseBlock knows all this
1037     NodeRef block = parseBlock(src, ";}");
1038     assert(*src == '}');
1039     src++;
1040     return block;
1041   }
1042 
parseElementOrStatement(char * & src,const char * seps)1043   NodeRef parseElementOrStatement(char*& src, const char* seps) {
1044     skipSpace(src);
1045     if (*src == ';') {
1046       src++;
1047       // we don't need the brackets here, but oh well
1048       return Builder::makeBlock();
1049     }
1050     if (*src == '{') { // detect a trivial {} in a statement context
1051       char* before = src;
1052       src++;
1053       skipSpace(src);
1054       if (*src == '}') {
1055         src++;
1056         // we don't need the brackets here, but oh well
1057         return Builder::makeBlock();
1058       }
1059       src = before;
1060     }
1061     NodeRef ret = parseElement(src, seps);
1062     skipSpace(src);
1063     if (*src == ';') {
1064       ret = Builder::makeStatement(ret);
1065       src++;
1066     }
1067     return ret;
1068   }
1069 
parseMaybeBracketed(char * & src,const char * seps)1070   NodeRef parseMaybeBracketed(char*& src, const char* seps) {
1071     skipSpace(src);
1072     return *src == '{' ? parseBracketedBlock(src)
1073                        : parseElementOrStatement(src, seps);
1074   }
1075 
parseParenned(char * & src)1076   NodeRef parseParenned(char*& src) {
1077     skipSpace(src);
1078     assert(*src == '(');
1079     src++;
1080     NodeRef ret = parseElement(src, ")");
1081     skipSpace(src);
1082     assert(*src == ')');
1083     src++;
1084     return ret;
1085   }
1086 
1087   // Debugging
1088 
1089   char* allSource = nullptr;
1090   int allSize = 0;
1091 
dump(const char * where,char * curr)1092   static void dump(const char* where, char* curr) {
1093     /*
1094     printf("%s:\n=============\n", where);
1095     for (int i = 0; i < allSize; i++)
1096       printf("%c", allSource[i] ? allSource[i] :
1097     '?');
1098     printf("\n");
1099     for (int i = 0; i < (curr - allSource); i++) printf(" ");
1100     printf("^\n=============\n");
1101     */
1102     fprintf(stderr, "%s:\n==========\n", where);
1103     int newlinesLeft = 2;
1104     int charsLeft = 200;
1105     while (*curr) {
1106       if (*curr == '\n') {
1107         newlinesLeft--;
1108         if (newlinesLeft == 0) {
1109           break;
1110         }
1111       }
1112       charsLeft--;
1113       if (charsLeft == 0) {
1114         break;
1115       }
1116       fprintf(stderr, "%c", *curr++);
1117     }
1118     fprintf(stderr, "\n\n");
1119   }
1120 
1121 public:
Parser()1122   Parser() { expressionPartsStack.resize(1); }
1123 
1124   // Highest-level parsing, as of a JavaScript script file.
parseToplevel(char * src)1125   NodeRef parseToplevel(char* src) {
1126     allSource = src;
1127     allSize = strlen(src);
1128     NodeRef toplevel = Builder::makeToplevel();
1129     Builder::setBlockContent(toplevel, parseBlock(src));
1130     return toplevel;
1131   }
1132 };
1133 
1134 } // namespace cashew
1135 
1136 #endif // wasm_parser_h
1137