1 /* 2 * Copyright 2015 WebAssembly Community Group participants 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 // Pure parsing. Calls methods on a Builder (template argument) to actually 18 // construct the AST 19 // 20 // XXX All parsing methods assume they take ownership of the input string. This 21 // lets them reuse parts of it. You will segfault if the input string cannot 22 // be reused and written to. 23 24 #ifndef wasm_parser_h 25 #define wasm_parser_h 26 27 #include <algorithm> 28 #include <cstdio> 29 #include <iostream> 30 #include <limits> 31 #include <vector> 32 33 #include "istring.h" 34 #include "support/safe_integer.h" 35 36 namespace cashew { 37 38 // common strings 39 40 extern IString TOPLEVEL; 41 extern IString DEFUN; 42 extern IString BLOCK; 43 extern IString VAR; 44 extern IString CONST; 45 extern IString CONDITIONAL; 46 extern IString BINARY; 47 extern IString RETURN; 48 extern IString IF; 49 extern IString ELSE; 50 extern IString WHILE; 51 extern IString DO; 52 extern IString FOR; 53 extern IString SEQ; 54 extern IString SUB; 55 extern IString CALL; 56 extern IString LABEL; 57 extern IString BREAK; 58 extern IString CONTINUE; 59 extern IString SWITCH; 60 extern IString STRING; 61 extern IString TRY; 62 extern IString INF; 63 extern IString NaN; 64 extern IString LLVM_CTTZ_I32; 65 extern IString UDIVMODDI4; 66 extern IString UNARY_PREFIX; 67 extern IString UNARY_POSTFIX; 68 extern IString MATH_FROUND; 69 extern IString MATH_CLZ32; 70 extern IString INT64; 71 extern IString INT64_CONST; 72 extern IString SIMD_FLOAT32X4; 73 extern IString SIMD_FLOAT64X2; 74 extern IString SIMD_INT8X16; 75 extern IString SIMD_INT16X8; 76 extern IString SIMD_INT32X4; 77 extern IString PLUS; 78 extern IString MINUS; 79 extern IString OR; 80 extern IString AND; 81 extern IString XOR; 82 extern IString L_NOT; 83 extern IString B_NOT; 84 extern IString LT; 85 extern IString GE; 86 extern IString LE; 87 extern IString GT; 88 extern IString EQ; 89 extern IString NE; 90 extern IString DIV; 91 extern IString MOD; 92 extern IString MUL; 93 extern IString RSHIFT; 94 extern IString LSHIFT; 95 extern IString TRSHIFT; 96 extern IString HEAP8; 97 extern IString HEAP16; 98 extern IString HEAP32; 99 extern IString HEAPF32; 100 extern IString HEAPU8; 101 extern IString HEAPU16; 102 extern IString HEAPU32; 103 extern IString HEAPF64; 104 extern IString F0; 105 extern IString EMPTY; 106 extern IString FUNCTION; 107 extern IString OPEN_PAREN; 108 extern IString OPEN_BRACE; 109 extern IString OPEN_CURLY; 110 extern IString CLOSE_CURLY; 111 extern IString COMMA; 112 extern IString QUESTION; 113 extern IString COLON; 114 extern IString CASE; 115 extern IString DEFAULT; 116 extern IString DOT; 117 extern IString PERIOD; 118 extern IString NEW; 119 extern IString ARRAY; 120 extern IString OBJECT; 121 extern IString THROW; 122 extern IString SET; 123 extern IString ATOMICS; 124 extern IString COMPARE_EXCHANGE; 125 extern IString LOAD; 126 extern IString STORE; 127 128 extern IStringSet keywords; 129 130 extern const char *OPERATOR_INITS, *SEPARATORS; 131 132 extern int MAX_OPERATOR_SIZE, LOWEST_PREC; 133 134 struct OperatorClass { 135 enum Type { Binary = 0, Prefix = 1, Postfix = 2, Tertiary = 3 }; 136 137 IStringSet ops; 138 bool rtl; 139 Type type; 140 OperatorClassOperatorClass141 OperatorClass(const char* o, bool r, Type t) : ops(o), rtl(r), type(t) {} 142 143 static int getPrecedence(Type type, IString op); 144 static bool getRtl(int prec); 145 }; 146 147 extern std::vector<OperatorClass> operatorClasses; 148 149 extern bool isIdentInit(char x); 150 extern bool isIdentPart(char x); 151 152 // parser 153 154 template<class NodeRef, class Builder> class Parser { 155 isSpace(char x)156 static bool isSpace(char x) { 157 return x == 32 || x == 9 || x == 10 || x == 13; 158 } /* space, tab, linefeed/newline, or return */ skipSpace(char * & curr)159 static void skipSpace(char*& curr) { 160 while (*curr) { 161 if (isSpace(*curr)) { 162 curr++; 163 continue; 164 } 165 if (curr[0] == '/' && curr[1] == '/') { 166 curr += 2; 167 while (*curr && *curr != '\n') { 168 curr++; 169 } 170 if (*curr) { 171 curr++; 172 } 173 continue; 174 } 175 if (curr[0] == '/' && curr[1] == '*') { 176 curr += 2; 177 while (*curr && (curr[0] != '*' || curr[1] != '/')) { 178 curr++; 179 } 180 curr += 2; 181 continue; 182 } 183 return; 184 } 185 } 186 isDigit(char x)187 static bool isDigit(char x) { return x >= '0' && x <= '9'; } 188 hasChar(const char * list,char x)189 static bool hasChar(const char* list, char x) { 190 while (*list) { 191 if (*list++ == x) { 192 return true; 193 } 194 } 195 return false; 196 } 197 198 // An atomic fragment of something. Stops at a natural boundary. 199 enum FragType { 200 KEYWORD = 0, 201 OPERATOR = 1, 202 IDENT = 2, 203 STRING = 3, // without quotes 204 INT = 4, 205 DOUBLE = 5, 206 SEPARATOR = 6 207 }; 208 209 struct Frag { 210 // MSVC does not allow unrestricted unions: 211 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf 212 #ifndef _MSC_VER 213 union { 214 #endif 215 IString str; 216 double num; 217 #ifndef _MSC_VER 218 }; 219 #endif 220 int size; 221 FragType type; 222 isNumberFrag223 bool isNumber() const { return type == INT || type == DOUBLE; } 224 FragFrag225 explicit Frag(char* src) { 226 char* start = src; 227 if (isIdentInit(*src)) { 228 // read an identifier or a keyword 229 src++; 230 while (isIdentPart(*src)) { 231 src++; 232 } 233 if (*src == 0) { 234 str.set(start); 235 } else { 236 char temp = *src; 237 *src = 0; 238 str.set(start, false); 239 *src = temp; 240 } 241 type = keywords.has(str) ? KEYWORD : IDENT; 242 } else if (isDigit(*src) || (src[0] == '.' && isDigit(src[1]))) { 243 if (src[0] == '0' && (src[1] == 'x' || src[1] == 'X')) { 244 // Explicitly parse hex numbers of form "0x...", because strtod 245 // supports hex number strings only in C++11, and Visual Studio 2013 246 // does not yet support that functionality. 247 src += 2; 248 num = 0; 249 while (1) { 250 if (*src >= '0' && *src <= '9') { 251 num *= 16; 252 num += *src - '0'; 253 } else if (*src >= 'a' && *src <= 'f') { 254 num *= 16; 255 num += *src - 'a' + 10; 256 } else if (*src >= 'A' && *src <= 'F') { 257 num *= 16; 258 num += *src - 'A' + 10; 259 } else { 260 break; 261 } 262 src++; 263 } 264 } else { 265 num = strtod(start, &src); 266 } 267 // asm.js must have a '.' for double values. however, we also tolerate 268 // uglify's tendency to emit without a '.' (and fix it later with a +). 269 // for valid asm.js input, the '.' should be enough, and for uglify 270 // in the emscripten optimizer pipeline, we use simple_ast where 271 // INT/DOUBLE is quite the same at this point anyhow 272 type = (std::find(start, src, '.') == src && 273 (wasm::isSInteger32(num) || wasm::isUInteger32(num))) 274 ? INT 275 : DOUBLE; 276 assert(src > start); 277 } else if (hasChar(OPERATOR_INITS, *src)) { 278 switch (*src) { 279 case '!': 280 str = src[1] == '=' ? NE : L_NOT; 281 break; 282 case '%': 283 str = MOD; 284 break; 285 case '&': 286 str = AND; 287 break; 288 case '*': 289 str = MUL; 290 break; 291 case '+': 292 str = PLUS; 293 break; 294 case ',': 295 str = COMMA; 296 break; 297 case '-': 298 str = MINUS; 299 break; 300 case '.': 301 str = PERIOD; 302 break; 303 case '/': 304 str = DIV; 305 break; 306 case ':': 307 str = COLON; 308 break; 309 case '<': 310 str = src[1] == '<' ? LSHIFT : (src[1] == '=' ? LE : LT); 311 break; 312 case '=': 313 str = src[1] == '=' ? EQ : SET; 314 break; 315 case '>': 316 str = src[1] == '>' ? (src[2] == '>' ? TRSHIFT : RSHIFT) 317 : (src[1] == '=' ? GE : GT); 318 break; 319 case '?': 320 str = QUESTION; 321 break; 322 case '^': 323 str = XOR; 324 break; 325 case '|': 326 str = OR; 327 break; 328 case '~': 329 str = B_NOT; 330 break; 331 default: 332 abort(); 333 } 334 size = strlen(str.str); 335 #ifndef NDEBUG 336 char temp = start[size]; 337 start[size] = 0; 338 assert(strcmp(str.str, start) == 0); 339 start[size] = temp; 340 #endif 341 type = OPERATOR; 342 return; 343 } else if (hasChar(SEPARATORS, *src)) { 344 type = SEPARATOR; 345 char temp = src[1]; 346 src[1] = 0; 347 str.set(src, false); 348 src[1] = temp; 349 src++; 350 } else if (*src == '"' || *src == '\'') { 351 char* end = strchr(src + 1, *src); 352 *end = 0; 353 str.set(src + 1); 354 src = end + 1; 355 type = STRING; 356 } else { 357 dump("frag parsing", src); 358 abort(); 359 } 360 size = src - start; 361 } 362 }; 363 364 struct ExpressionElement { 365 bool isNode; 366 // MSVC does not allow unrestricted unions: 367 // http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2544.pdf 368 #ifndef _MSC_VER 369 union { 370 #endif 371 NodeRef node; 372 IString op; 373 #ifndef _MSC_VER 374 }; 375 #endif ExpressionElementExpressionElement376 ExpressionElement(NodeRef n) : isNode(true), node(n) {} ExpressionElementExpressionElement377 ExpressionElement(IString o) : isNode(false), op(o) {} 378 getNodeExpressionElement379 NodeRef getNode() { 380 assert(isNode); 381 return node; 382 } getOpExpressionElement383 IString getOp() { 384 assert(!isNode); 385 return op; 386 } 387 }; 388 389 // This is a list of the current stack of node-operator-node-operator-etc. 390 // this works by each parseExpression call appending to the vector; then 391 // recursing out, and the toplevel sorts it all 392 typedef std::vector<ExpressionElement> ExpressionParts; 393 std::vector<ExpressionParts> expressionPartsStack; 394 395 // Parses an element in a list of such elements, e.g. list of statements in a 396 // block, or list of parameters in a call 397 NodeRef parseElement(char*& src, const char* seps = ";") { 398 // dump("parseElement", src); 399 skipSpace(src); 400 Frag frag(src); 401 src += frag.size; 402 switch (frag.type) { 403 case KEYWORD: { 404 return parseAfterKeyword(frag, src, seps); 405 } 406 case IDENT: { 407 return parseAfterIdent(frag, src, seps); 408 } 409 case STRING: 410 case INT: 411 case DOUBLE: { 412 return parseExpression(parseFrag(frag), src, seps); 413 } 414 case SEPARATOR: { 415 if (frag.str == OPEN_PAREN) { 416 return parseExpression(parseAfterParen(src), src, seps); 417 } 418 if (frag.str == OPEN_BRACE) { 419 return parseExpression(parseAfterBrace(src), src, seps); 420 } 421 if (frag.str == OPEN_CURLY) { 422 return parseExpression(parseAfterCurly(src), src, seps); 423 } 424 abort(); 425 } 426 case OPERATOR: { 427 return parseExpression(frag.str, src, seps); 428 } 429 default: 430 /* dump("parseElement", src); printf("bad frag type: %d\n",frag.type); 431 */ 432 abort(); 433 } 434 return nullptr; 435 } 436 parseFrag(Frag & frag)437 NodeRef parseFrag(Frag& frag) { 438 switch (frag.type) { 439 case IDENT: 440 return Builder::makeName(frag.str); 441 case STRING: 442 return Builder::makeString(frag.str); 443 case INT: 444 return Builder::makeInt(uint32_t(frag.num)); 445 case DOUBLE: 446 return Builder::makeDouble(frag.num); 447 default: 448 abort(); 449 } 450 return nullptr; 451 } 452 parseAfterKeyword(Frag & frag,char * & src,const char * seps)453 NodeRef parseAfterKeyword(Frag& frag, char*& src, const char* seps) { 454 skipSpace(src); 455 if (frag.str == FUNCTION) { 456 return parseFunction(src, seps); 457 } else if (frag.str == VAR) { 458 return parseVar(src, seps, false); 459 } else if (frag.str == CONST) { 460 return parseVar(src, seps, true); 461 } else if (frag.str == RETURN) { 462 return parseReturn(src, seps); 463 } else if (frag.str == IF) { 464 return parseIf(src, seps); 465 } else if (frag.str == DO) { 466 return parseDo(src, seps); 467 } else if (frag.str == WHILE) { 468 return parseWhile(src, seps); 469 } else if (frag.str == BREAK) { 470 return parseBreak(src, seps); 471 } else if (frag.str == CONTINUE) { 472 return parseContinue(src, seps); 473 } else if (frag.str == SWITCH) { 474 return parseSwitch(src, seps); 475 } else if (frag.str == NEW) { 476 return parseNew(src, seps); 477 } else if (frag.str == FOR) { 478 return parseFor(src, seps); 479 } 480 dump(frag.str.str, src); 481 abort(); 482 return nullptr; 483 } 484 parseFunction(char * & src,const char * seps)485 NodeRef parseFunction(char*& src, const char* seps) { 486 Frag name(src); 487 if (name.type == IDENT) { 488 src += name.size; 489 } else { 490 assert(name.type == SEPARATOR && name.str[0] == '('); 491 name.str = IString(); 492 } 493 NodeRef ret = Builder::makeFunction(name.str); 494 skipSpace(src); 495 assert(*src == '('); 496 src++; 497 while (1) { 498 skipSpace(src); 499 if (*src == ')') { 500 break; 501 } 502 Frag arg(src); 503 assert(arg.type == IDENT); 504 src += arg.size; 505 Builder::appendArgumentToFunction(ret, arg.str); 506 skipSpace(src); 507 if (*src == ')') { 508 break; 509 } 510 if (*src == ',') { 511 src++; 512 continue; 513 } 514 abort(); 515 } 516 src++; 517 Builder::setBlockContent(ret, parseBracketedBlock(src)); 518 // TODO: parse expression? 519 return ret; 520 } 521 parseVar(char * & src,const char * seps,bool is_const)522 NodeRef parseVar(char*& src, const char* seps, bool is_const) { 523 NodeRef ret = Builder::makeVar(is_const); 524 while (1) { 525 skipSpace(src); 526 if (*src == ';') { 527 break; 528 } 529 Frag name(src); 530 assert(name.type == IDENT); 531 NodeRef value; 532 src += name.size; 533 skipSpace(src); 534 if (*src == '=') { 535 src++; 536 skipSpace(src); 537 value = parseElement(src, ";,"); 538 } 539 Builder::appendToVar(ret, name.str, value); 540 skipSpace(src); 541 if (*src == ';') { 542 break; 543 } 544 if (*src == ',') { 545 src++; 546 continue; 547 } 548 abort(); 549 } 550 src++; 551 return ret; 552 } 553 parseReturn(char * & src,const char * seps)554 NodeRef parseReturn(char*& src, const char* seps) { 555 skipSpace(src); 556 NodeRef value = !hasChar(seps, *src) ? parseElement(src, seps) : nullptr; 557 skipSpace(src); 558 assert(hasChar(seps, *src)); 559 if (*src == ';') { 560 src++; 561 } 562 return Builder::makeReturn(value); 563 } 564 parseIf(char * & src,const char * seps)565 NodeRef parseIf(char*& src, const char* seps) { 566 NodeRef condition = parseParenned(src); 567 NodeRef ifTrue = parseMaybeBracketed(src, seps); 568 skipSpace(src); 569 NodeRef ifFalse; 570 if (!hasChar(seps, *src)) { 571 Frag next(src); 572 if (next.type == KEYWORD && next.str == ELSE) { 573 src += next.size; 574 ifFalse = parseMaybeBracketed(src, seps); 575 } 576 } 577 return Builder::makeIf(condition, ifTrue, ifFalse); 578 } 579 parseDo(char * & src,const char * seps)580 NodeRef parseDo(char*& src, const char* seps) { 581 NodeRef body = parseMaybeBracketed(src, seps); 582 skipSpace(src); 583 Frag next(src); 584 assert(next.type == KEYWORD && next.str == WHILE); 585 src += next.size; 586 NodeRef condition = parseParenned(src); 587 return Builder::makeDo(body, condition); 588 } 589 parseWhile(char * & src,const char * seps)590 NodeRef parseWhile(char*& src, const char* seps) { 591 NodeRef condition = parseParenned(src); 592 NodeRef body = parseMaybeBracketed(src, seps); 593 return Builder::makeWhile(condition, body); 594 } 595 parseFor(char * & src,const char * seps)596 NodeRef parseFor(char*& src, const char* seps) { 597 skipSpace(src); 598 assert(*src == '('); 599 src++; 600 NodeRef init = parseElement(src, ";"); 601 skipSpace(src); 602 assert(*src == ';'); 603 src++; 604 NodeRef condition = parseElement(src, ";"); 605 skipSpace(src); 606 assert(*src == ';'); 607 src++; 608 NodeRef inc = parseElement(src, ")"); 609 skipSpace(src); 610 assert(*src == ')'); 611 src++; 612 NodeRef body = parseMaybeBracketed(src, seps); 613 return Builder::makeFor(init, condition, inc, body); 614 } 615 parseBreak(char * & src,const char * seps)616 NodeRef parseBreak(char*& src, const char* seps) { 617 skipSpace(src); 618 Frag next(src); 619 if (next.type == IDENT) { 620 src += next.size; 621 } 622 return Builder::makeBreak(next.type == IDENT ? next.str : IString()); 623 } 624 parseContinue(char * & src,const char * seps)625 NodeRef parseContinue(char*& src, const char* seps) { 626 skipSpace(src); 627 Frag next(src); 628 if (next.type == IDENT) { 629 src += next.size; 630 } 631 return Builder::makeContinue(next.type == IDENT ? next.str : IString()); 632 } 633 parseSwitch(char * & src,const char * seps)634 NodeRef parseSwitch(char*& src, const char* seps) { 635 NodeRef ret = Builder::makeSwitch(parseParenned(src)); 636 skipSpace(src); 637 assert(*src == '{'); 638 src++; 639 while (1) { 640 // find all cases and possibly a default 641 skipSpace(src); 642 if (*src == '}') { 643 break; 644 } 645 Frag next(src); 646 if (next.type == KEYWORD) { 647 if (next.str == CASE) { 648 src += next.size; 649 skipSpace(src); 650 NodeRef arg; 651 Frag value(src); 652 if (value.isNumber()) { 653 arg = parseFrag(value); 654 src += value.size; 655 } else if (value.type == OPERATOR) { 656 // negative number 657 assert(value.str == MINUS); 658 src += value.size; 659 skipSpace(src); 660 Frag value2(src); 661 assert(value2.isNumber()); 662 arg = Builder::makePrefix(MINUS, parseFrag(value2)); 663 src += value2.size; 664 } else { 665 // identifier and function call 666 assert(value.type == IDENT); 667 src += value.size; 668 skipSpace(src); 669 arg = parseCall(parseFrag(value), src); 670 } 671 Builder::appendCaseToSwitch(ret, arg); 672 skipSpace(src); 673 assert(*src == ':'); 674 src++; 675 continue; 676 } else if (next.str == DEFAULT) { 677 src += next.size; 678 Builder::appendDefaultToSwitch(ret); 679 skipSpace(src); 680 assert(*src == ':'); 681 src++; 682 continue; 683 } 684 // otherwise, may be some keyword that happens to start a block (e.g. 685 // case 1: _return_ 5) 686 } 687 // not case X: or default: or }, so must be some code 688 skipSpace(src); 689 bool explicitBlock = *src == '{'; 690 NodeRef subBlock = explicitBlock ? parseBracketedBlock(src) 691 : parseBlock(src, ";}", CASE, DEFAULT); 692 Builder::appendCodeToSwitch(ret, subBlock, explicitBlock); 693 } 694 skipSpace(src); 695 assert(*src == '}'); 696 src++; 697 return ret; 698 } 699 parseNew(char * & src,const char * seps)700 NodeRef parseNew(char*& src, const char* seps) { 701 return Builder::makeNew(parseElement(src, seps)); 702 } 703 parseAfterIdent(Frag & frag,char * & src,const char * seps)704 NodeRef parseAfterIdent(Frag& frag, char*& src, const char* seps) { 705 skipSpace(src); 706 if (*src == '(') { 707 return parseExpression(parseCall(parseFrag(frag), src), src, seps); 708 } 709 if (*src == '[') { 710 return parseExpression(parseIndexing(parseFrag(frag), src), src, seps); 711 } 712 if (*src == ':' && expressionPartsStack.back().size() == 0) { 713 src++; 714 skipSpace(src); 715 NodeRef inner; 716 if (*src == '{') { 717 // context lets us know this is not an object, but a block 718 inner = parseBracketedBlock(src); 719 } else { 720 inner = parseElement(src, seps); 721 } 722 return Builder::makeLabel(frag.str, inner); 723 } 724 if (*src == '.') { 725 return parseExpression(parseDotting(parseFrag(frag), src), src, seps); 726 } 727 return parseExpression(parseFrag(frag), src, seps); 728 } 729 parseCall(NodeRef target,char * & src)730 NodeRef parseCall(NodeRef target, char*& src) { 731 expressionPartsStack.resize(expressionPartsStack.size() + 1); 732 assert(*src == '('); 733 src++; 734 NodeRef ret = Builder::makeCall(target); 735 while (1) { 736 skipSpace(src); 737 if (*src == ')') { 738 break; 739 } 740 Builder::appendToCall(ret, parseElement(src, ",)")); 741 skipSpace(src); 742 if (*src == ')') { 743 break; 744 } 745 if (*src == ',') { 746 src++; 747 continue; 748 } 749 abort(); 750 } 751 src++; 752 assert(expressionPartsStack.back().size() == 0); 753 expressionPartsStack.pop_back(); 754 return ret; 755 } 756 parseIndexing(NodeRef target,char * & src)757 NodeRef parseIndexing(NodeRef target, char*& src) { 758 expressionPartsStack.resize(expressionPartsStack.size() + 1); 759 assert(*src == '['); 760 src++; 761 NodeRef ret = Builder::makeIndexing(target, parseElement(src, "]")); 762 skipSpace(src); 763 assert(*src == ']'); 764 src++; 765 assert(expressionPartsStack.back().size() == 0); 766 expressionPartsStack.pop_back(); 767 return ret; 768 } 769 parseDotting(NodeRef target,char * & src)770 NodeRef parseDotting(NodeRef target, char*& src) { 771 assert(*src == '.'); 772 src++; 773 Frag key(src); 774 assert(key.type == IDENT); 775 src += key.size; 776 return Builder::makeDot(target, key.str); 777 } 778 parseAfterParen(char * & src)779 NodeRef parseAfterParen(char*& src) { 780 expressionPartsStack.resize(expressionPartsStack.size() + 1); 781 skipSpace(src); 782 NodeRef ret = parseElement(src, ")"); 783 skipSpace(src); 784 assert(*src == ')'); 785 src++; 786 assert(expressionPartsStack.back().size() == 0); 787 expressionPartsStack.pop_back(); 788 return ret; 789 } 790 parseAfterBrace(char * & src)791 NodeRef parseAfterBrace(char*& src) { 792 expressionPartsStack.resize(expressionPartsStack.size() + 1); 793 NodeRef ret = Builder::makeArray(); 794 while (1) { 795 skipSpace(src); 796 assert(*src); 797 if (*src == ']') { 798 break; 799 } 800 NodeRef element = parseElement(src, ",]"); 801 Builder::appendToArray(ret, element); 802 skipSpace(src); 803 if (*src == ']') { 804 break; 805 } 806 if (*src == ',') { 807 src++; 808 continue; 809 } 810 abort(); 811 } 812 src++; 813 return ret; 814 } 815 parseAfterCurly(char * & src)816 NodeRef parseAfterCurly(char*& src) { 817 expressionPartsStack.resize(expressionPartsStack.size() + 1); 818 NodeRef ret = Builder::makeObject(); 819 while (1) { 820 skipSpace(src); 821 assert(*src); 822 if (*src == '}') { 823 break; 824 } 825 Frag key(src); 826 assert(key.type == IDENT || key.type == STRING); 827 src += key.size; 828 skipSpace(src); 829 assert(*src == ':'); 830 src++; 831 NodeRef value = parseElement(src, ",}"); 832 Builder::appendToObject(ret, key.str, value); 833 skipSpace(src); 834 if (*src == '}') { 835 break; 836 } 837 if (*src == ',') { 838 src++; 839 continue; 840 } 841 abort(); 842 } 843 src++; 844 return ret; 845 } 846 dumpParts(ExpressionParts & parts,int i)847 void dumpParts(ExpressionParts& parts, int i) { 848 printf("expressionparts: %d (at %d)\n", parts.size(), i); 849 printf("| "); 850 for (int i = 0; i < parts.size(); i++) { 851 if (parts[i].isNode) { 852 parts[i].getNode()->stringify(std::cout); 853 printf(" "); 854 } else { 855 printf(" _%s_ ", parts[i].getOp().str); 856 } 857 } 858 printf("|\n"); 859 } 860 makeBinary(NodeRef left,IString op,NodeRef right)861 NodeRef makeBinary(NodeRef left, IString op, NodeRef right) { 862 if (op == PERIOD) { 863 return Builder::makeDot(left, right); 864 } else { 865 return Builder::makeBinary(left, op, right); 866 } 867 } 868 869 NodeRef parseExpression(ExpressionElement initial,char * & src,const char * seps)870 parseExpression(ExpressionElement initial, char*& src, const char* seps) { 871 // dump("parseExpression", src); 872 ExpressionParts& parts = expressionPartsStack.back(); 873 skipSpace(src); 874 if (*src == 0 || hasChar(seps, *src)) { 875 if (parts.size() > 0) { 876 parts.push_back(initial); // cherry on top of the cake 877 } 878 return initial.getNode(); 879 } 880 bool top = parts.size() == 0; 881 if (initial.isNode) { 882 Frag next(src); 883 if (next.type == OPERATOR) { 884 parts.push_back(initial); 885 src += next.size; 886 parts.push_back(next.str); 887 } else { 888 if (*src == '(') { 889 initial = parseCall(initial.getNode(), src); 890 } else if (*src == '[') { 891 initial = parseIndexing(initial.getNode(), src); 892 } else { 893 dump("bad parseExpression state", src); 894 abort(); 895 } 896 return parseExpression(initial, src, seps); 897 } 898 } else { 899 parts.push_back(initial); 900 } 901 NodeRef last = parseElement(src, seps); 902 if (!top) { 903 return last; 904 } 905 { 906 // |parts| may have been invalidated by that call 907 ExpressionParts& parts = expressionPartsStack.back(); 908 // we are the toplevel. sort it all out 909 // collapse right to left, highest priority first 910 // dumpParts(parts, 0); 911 for (auto& ops : operatorClasses) { 912 if (ops.rtl) { 913 // right to left 914 for (int i = parts.size() - 1; i >= 0; i--) { 915 if (parts[i].isNode) { 916 continue; 917 } 918 IString op = parts[i].getOp(); 919 if (!ops.ops.has(op)) { 920 continue; 921 } 922 if (ops.type == OperatorClass::Binary && i > 0 && 923 i < (int)parts.size() - 1) { 924 parts[i] = 925 makeBinary(parts[i - 1].getNode(), op, parts[i + 1].getNode()); 926 parts.erase(parts.begin() + i + 1); 927 parts.erase(parts.begin() + i - 1); 928 } else if (ops.type == OperatorClass::Prefix && 929 i < (int)parts.size() - 1) { 930 if (i > 0 && parts[i - 1].isNode) { 931 // cannot apply prefix operator if it would join two nodes 932 continue; 933 } 934 parts[i] = Builder::makePrefix(op, parts[i + 1].getNode()); 935 parts.erase(parts.begin() + i + 1); 936 } else if (ops.type == OperatorClass::Tertiary) { 937 // we must be at X ? Y : Z 938 // ^ 939 // dumpParts(parts, i); 940 if (op != COLON) { 941 continue; 942 } 943 assert(i < (int)parts.size() - 1 && i >= 3); 944 if (parts[i - 2].getOp() != QUESTION) { 945 continue; // e.g. x ? y ? 1 : 0 : 2 946 } 947 parts[i - 3] = Builder::makeConditional(parts[i - 3].getNode(), 948 parts[i - 1].getNode(), 949 parts[i + 1].getNode()); 950 parts.erase(parts.begin() + i - 2, parts.begin() + i + 2); 951 // basically a reset, due to things like x ? y ? 1 : 0 : 2 952 i = parts.size(); 953 } // TODO: postfix 954 } 955 } else { 956 // left to right 957 for (int i = 0; i < (int)parts.size(); i++) { 958 if (parts[i].isNode) { 959 continue; 960 } 961 IString op = parts[i].getOp(); 962 if (!ops.ops.has(op)) { 963 continue; 964 } 965 if (ops.type == OperatorClass::Binary && i > 0 && 966 i < (int)parts.size() - 1) { 967 parts[i] = 968 makeBinary(parts[i - 1].getNode(), op, parts[i + 1].getNode()); 969 parts.erase(parts.begin() + i + 1); 970 parts.erase(parts.begin() + i - 1); 971 i--; 972 } else if (ops.type == OperatorClass::Prefix && 973 i < (int)parts.size() - 1) { 974 if (i > 0 && parts[i - 1].isNode) { 975 // cannot apply prefix operator if it would join two nodes 976 continue; 977 } 978 parts[i] = Builder::makePrefix(op, parts[i + 1].getNode()); 979 parts.erase(parts.begin() + i + 1); 980 // allow a previous prefix operator to cascade 981 i = std::max(i - 2, 0); 982 } // TODO: tertiary, postfix 983 } 984 } 985 } 986 assert(parts.size() == 1); 987 NodeRef ret = parts[0].getNode(); 988 parts.clear(); 989 return ret; 990 } 991 } 992 993 // Parses a block of code (e.g. a bunch of statements inside {,}, or the top 994 // level of o file) 995 NodeRef parseBlock(char*& src, 996 const char* seps = ";", 997 IString keywordSep1 = IString(), 998 IString keywordSep2 = IString()) { 999 NodeRef block = Builder::makeBlock(); 1000 // dump("parseBlock", src); 1001 while (1) { 1002 skipSpace(src); 1003 if (*src == 0) { 1004 break; 1005 } 1006 if (*src == ';') { 1007 src++; // skip a statement in this block 1008 continue; 1009 } 1010 if (hasChar(seps, *src)) { 1011 break; 1012 } 1013 if (!!keywordSep1) { 1014 Frag next(src); 1015 if (next.type == KEYWORD && next.str == keywordSep1) { 1016 break; 1017 } 1018 } 1019 if (!!keywordSep2) { 1020 Frag next(src); 1021 if (next.type == KEYWORD && next.str == keywordSep2) { 1022 break; 1023 } 1024 } 1025 NodeRef element = parseElementOrStatement(src, seps); 1026 Builder::appendToBlock(block, element); 1027 } 1028 return block; 1029 } 1030 parseBracketedBlock(char * & src)1031 NodeRef parseBracketedBlock(char*& src) { 1032 skipSpace(src); 1033 assert(*src == '{'); 1034 src++; 1035 // the two are not symmetrical, ; is just internally separating, } is the 1036 // final one - parseBlock knows all this 1037 NodeRef block = parseBlock(src, ";}"); 1038 assert(*src == '}'); 1039 src++; 1040 return block; 1041 } 1042 parseElementOrStatement(char * & src,const char * seps)1043 NodeRef parseElementOrStatement(char*& src, const char* seps) { 1044 skipSpace(src); 1045 if (*src == ';') { 1046 src++; 1047 // we don't need the brackets here, but oh well 1048 return Builder::makeBlock(); 1049 } 1050 if (*src == '{') { // detect a trivial {} in a statement context 1051 char* before = src; 1052 src++; 1053 skipSpace(src); 1054 if (*src == '}') { 1055 src++; 1056 // we don't need the brackets here, but oh well 1057 return Builder::makeBlock(); 1058 } 1059 src = before; 1060 } 1061 NodeRef ret = parseElement(src, seps); 1062 skipSpace(src); 1063 if (*src == ';') { 1064 ret = Builder::makeStatement(ret); 1065 src++; 1066 } 1067 return ret; 1068 } 1069 parseMaybeBracketed(char * & src,const char * seps)1070 NodeRef parseMaybeBracketed(char*& src, const char* seps) { 1071 skipSpace(src); 1072 return *src == '{' ? parseBracketedBlock(src) 1073 : parseElementOrStatement(src, seps); 1074 } 1075 parseParenned(char * & src)1076 NodeRef parseParenned(char*& src) { 1077 skipSpace(src); 1078 assert(*src == '('); 1079 src++; 1080 NodeRef ret = parseElement(src, ")"); 1081 skipSpace(src); 1082 assert(*src == ')'); 1083 src++; 1084 return ret; 1085 } 1086 1087 // Debugging 1088 1089 char* allSource = nullptr; 1090 int allSize = 0; 1091 dump(const char * where,char * curr)1092 static void dump(const char* where, char* curr) { 1093 /* 1094 printf("%s:\n=============\n", where); 1095 for (int i = 0; i < allSize; i++) 1096 printf("%c", allSource[i] ? allSource[i] : 1097 '?'); 1098 printf("\n"); 1099 for (int i = 0; i < (curr - allSource); i++) printf(" "); 1100 printf("^\n=============\n"); 1101 */ 1102 fprintf(stderr, "%s:\n==========\n", where); 1103 int newlinesLeft = 2; 1104 int charsLeft = 200; 1105 while (*curr) { 1106 if (*curr == '\n') { 1107 newlinesLeft--; 1108 if (newlinesLeft == 0) { 1109 break; 1110 } 1111 } 1112 charsLeft--; 1113 if (charsLeft == 0) { 1114 break; 1115 } 1116 fprintf(stderr, "%c", *curr++); 1117 } 1118 fprintf(stderr, "\n\n"); 1119 } 1120 1121 public: Parser()1122 Parser() { expressionPartsStack.resize(1); } 1123 1124 // Highest-level parsing, as of a JavaScript script file. parseToplevel(char * src)1125 NodeRef parseToplevel(char* src) { 1126 allSource = src; 1127 allSize = strlen(src); 1128 NodeRef toplevel = Builder::makeToplevel(); 1129 Builder::setBlockContent(toplevel, parseBlock(src)); 1130 return toplevel; 1131 } 1132 }; 1133 1134 } // namespace cashew 1135 1136 #endif // wasm_parser_h 1137