1 // Mini C parser by Robert van Engelen 2 // A simple one-pass, syntax-directed translation of mini C to JVM bytecode 3 // Requires minic.l, minic.y, minic.hpp 4 // See minicdemo.c for a description of the mini C features 5 // 6 // Ideas for compiler improvements (from easy to hard, roughly): 7 // - add more library functions that compile to JVM virtual and static method invocations 8 // - allow polymorphic functions instead of generating redeclaration errors 9 // - add constant folding, i.e. Expr class contains U4, F8, and std::string constants to combine 10 // - add variable declaration initializers, e.g. int a=1; and/or implicit initialization, requires init method to init statics 11 // - allow variable declarations anywhere in code blocks, not just at the start of a function body 12 // - add structs/classes (structs without methods or classes with methods) 13 // - add exceptions, e.g. 'try' and 'catch' blocks 14 15 %require "3.2" 16 %skeleton "lalr1.cc" 17 %language "c++" 18 19 // parser class: yy::Parser 20 %define api.namespace {yy} 21 %define api.parser.class {Parser} 22 23 // use C++ variants and generate yy::Parser::make_<TOKEN> constructors 24 %define api.value.type variant 25 %define api.token.constructor 26 27 // verbose error messages, reported with yy::Parser::error() 28 %define parse.error verbose 29 30 // generate parser.hpp 31 %defines 32 33 // generate parser.cpp 34 %output "parser.cpp" 35 36 // enable bison locations and generate location.hpp 37 %locations 38 %define api.location.file "location.hpp" 39 40 // pass Scanner and Compiler objects to yy::Parser::parse(), to use lexer and comp in semantic actions 41 %parse-param {yy::Scanner& lexer} 42 %parse-param {Compiler& comp} 43 %parse-param {size_t& errors} 44 45 // generate yy::Parser::token::TOKEN_<name> token constants 46 %define api.token.prefix {TOKEN_} 47 48 // we expect one shift-reduce conflict, for the ambiguous if-else 49 %expect 1 50 51 // tokens with semantic values 52 %token <ID> ID "identifier"; 53 %token <U8> U8 "integer constant"; 54 %token <F8> F8 "float constant"; 55 %token <CS> CS "string literal"; 56 %token EOF 0 "end of file"; 57 58 %token 59 BREAK "break" 60 CASE "case" 61 CONTINUE "continue" 62 DEFAULT "default" 63 DO "do" 64 ELSE "else" 65 FALSE "false" 66 FLOAT "float" 67 FOR "for" 68 IF "if" 69 INT "int" 70 MAIN "main" 71 NEW "new" 72 PRINT "print" 73 PRINTLN "println" 74 RETURN "return" 75 STRING "string" 76 SWITCH "switch" 77 TRUE "true" 78 VOID "void" 79 WHILE "while" 80 PA "+=" 81 NA "-=" 82 TA "*=" 83 DA "/=" 84 MA "%=" 85 AA "&=" 86 XA "^=" 87 OA "|=" 88 LA "<<=" 89 RA ">>=" 90 OR "||" 91 AN "&&" 92 EQ "==" 93 NE "!=" 94 LE "<=" 95 GE ">=" 96 LS "<<" 97 RS ">>" 98 PP "++" 99 NN "--" 100 AR "->" 101 ; 102 103 %token '!' '#' '$' '%' '&' '(' ')' '*' '+' ',' '-' '.' '/' ':' ';' '<' '=' '>' '?' '[' ']' '^' '{' '|' '}' '~' 104 105 // operator precedence and associativity (includes ')' for casts and '[' for array indexing) 106 %right '=' PA NA TA DA MA AA XA OA LA RA 107 %right '?' 108 %right ':' 109 %left OR 110 %left AN 111 %left '|' 112 %left '^' 113 %left '&' 114 %left EQ NE LE '<' GE '>' 115 %left LS RS 116 %left '+' '-' 117 %left '*' '/' '%' 118 %right '!' '~' ')' 119 %nonassoc '#' '$' PP NN 120 %left '[' '.' AR 121 122 // the code to include with parser.hpp 123 %code requires { 124 #include "minic.hpp" 125 namespace yy { 126 class Scanner; 127 } 128 } 129 130 // the code to include with parser.cpp 131 %code { 132 #include "scanner.hpp" 133 #include <iostream> 134 // within yy::Parser::parse() we should invoke lexer.lex() as yylex() 135 #undef yylex 136 #define yylex lexer.lex 137 } 138 139 // nonterminals with Type semantic values 140 %type <Type> type list args optargs exprs optexprs 141 142 // nonterminals with Proto semantic values 143 %type <Proto> proto 144 145 // nonterminals with Expr semantic values 146 %type <Expr> expr cond optcond 147 148 // for print or println functions 149 %type <CS> print prints 150 151 // marker nonterminals with U4 address semantic values 152 %type <U4> A B C G 153 154 %% 155 156 program : statics { 157 delete comp.table[comp.scope]; 158 } 159 ; 160 161 statics : statics func 162 | statics decl 163 | /* empty */ { 164 comp.table[++comp.scope] = new Table(); 165 } 166 ; 167 168 func : main '(' ')' block 169 { 170 // make sure we return from main() 171 comp.emit(return_); 172 // construct a static method "main" 173 ID id = lexer.symbol("main"); 174 TD type = comp.type_function("[Ljava/lang/String;", "V"); 175 Table *table = comp.table[comp.scope]; 176 comp.new_method(ACC_PUBLIC | ACC_STATIC, id, type, table->locals, 256); 177 delete table; 178 if (!comp.table[--comp.scope]->enter_func(id, type, false)) 179 error(@1, "redefined main"); 180 } 181 | proto block { 182 // make sure we return (compiler does not check if functions return a value) 183 comp.emit(return_); 184 // constuct a static function 185 comp.new_method(ACC_PUBLIC | ACC_STATIC, $1.id, $1.type, comp.table[comp.scope]->locals, 256); 186 delete comp.table[comp.scope--]; 187 if (!comp.table[comp.scope]->enter_func($1.id, $1.type, false)) 188 error(@2, "redefined function"); 189 } 190 | proto ';' { 191 delete comp.table[comp.scope--]; 192 } 193 ; 194 195 proto : type ID '(' S optargs ')' 196 { 197 $$.type = comp.type_function($5.type, $1.type); 198 if (!comp.table[comp.scope - 1]->enter_func($2, $$.type, true)) 199 error(@2, "redefined function"); 200 $$.id = $2; 201 } 202 203 main : type MAIN { 204 // start parsing the main() function, create a local scope table 205 Table *table = new Table(comp.table[comp.scope]); 206 comp.table[++comp.scope] = table; 207 // start with offset 1 for the locals in main() since 0 is used for the argument array 208 table->locals = 1; 209 // to emit code in a buffer, copy the code to the method later 210 comp.new_method_code(); 211 // this is the main() function 212 comp.main = true; 213 // main() may return int or void 214 if (!comp.type_is_void($1.type) && !comp.type_is_int($1.type)) 215 error(@1, "main can only return an int or void"); 216 // the function's return type 217 comp.return_type = $1.type; 218 } 219 ; 220 221 S : /* empty */ { 222 // start parsing the scope of a function, create a local scope table 223 Table *table = new Table(comp.table[comp.scope]); 224 comp.table[++comp.scope] = table; 225 // to emit code in a buffer, copy the code to the method later 226 comp.new_method_code(); 227 // not the main() function 228 comp.main = false; 229 // the function's return type 230 comp.return_type = comp.decl_type; 231 } 232 ; 233 234 block : '{' decls stmts '}' 235 ; 236 237 decls : decls decl 238 | /* empty */ 239 ; 240 241 decl : list ';' 242 ; 243 244 type : VOID { 245 $$.type = comp.decl_type = comp.type_void(); 246 } 247 | INT { 248 $$.type = comp.decl_type = comp.type_int(); 249 } 250 | FLOAT { 251 $$.type = comp.decl_type = comp.type_double(); 252 } 253 | STRING { 254 $$.type = comp.decl_type = comp.type_string(); 255 } 256 | type '[' ']' { 257 // array types can be declared but arrays are not fully implemented 258 $$.type = comp.decl_type = comp.type_array($1.type); 259 } 260 ; 261 262 list : list ',' ID { 263 if (comp.scope == 0) 264 { 265 // add a static variable, stored as a static field 266 if (!comp.table[0]->enter($3, $1.type, comp.new_field(ACC_STATIC, $3, $1.type))) 267 error(@3, "redefined static variable"); 268 } 269 else 270 { 271 // add a local variable, stored in the method frame (double takes two frame slots) 272 if (!comp.table[comp.scope]->enter($3, $1.type, comp.table[comp.scope]->locals)) 273 error(@3, "redefined variable"); 274 comp.table[comp.scope]->locals += 1 + comp.type_is_double($1.type); 275 } 276 $$ = $1; 277 } 278 | type ID { 279 if (comp.scope == 0) 280 { 281 // add a static variable, stored as a static field 282 if (!comp.table[0]->enter($2, $1.type, comp.new_field(ACC_STATIC, $2, $1.type))) 283 error(@2, "redefined static variable"); 284 } 285 else 286 { 287 // add a local variable, stored in the method frame (double takes two frame slots) 288 if (!comp.table[comp.scope]->enter($2, $1.type, comp.table[comp.scope]->locals)) 289 error(@2, "redefined variable"); 290 comp.table[comp.scope]->locals += 1 + comp.type_is_double($1.type); 291 } 292 $$ = $1; 293 } 294 ; 295 296 args : args ',' type ID 297 { 298 // add an argument variable, stored in the method frame (double takes two frame slots) 299 if (!comp.table[comp.scope]->enter($4, $3.type, comp.table[comp.scope]->locals)) 300 error(@4, "redefined argument"); 301 comp.table[comp.scope]->locals += 1 + comp.type_is_double($3.type); 302 // concat argument types to produce the JVM type descriptor of the method, e.g. "II" in "(II)I" 303 $$.type = comp.type_concat($1.type, $3.type); 304 } 305 | type ID { 306 // add an argument variable, stored in the method frame (double takes two frame slots) 307 if (!comp.table[comp.scope]->enter($2, $1.type, comp.table[comp.scope]->locals)) 308 error(@2, "redefined argument"); 309 comp.table[comp.scope]->locals += 1 + comp.type_is_double($1.type); 310 $$ = $1; 311 } 312 ; 313 314 optargs : args { 315 $$ = $1; 316 } 317 | /* empty */ { 318 $$.type = comp.type_none(); 319 } 320 ; 321 322 stmts : stmts stmt 323 | /* empty */ 324 ; 325 326 stmt : IF '(' cond ')' stmt 327 { 328 comp.backpatch($3.truelist, $3.after); 329 comp.backpatch($3.falselist, comp.addr()); 330 } 331 | IF '(' cond ')' stmt ELSE G A stmt 332 { 333 comp.backpatch($3.truelist, $3.after); 334 comp.backpatch($3.falselist, $8); 335 comp.backpatch($7, comp.addr()); 336 } 337 | switch G '{' cases '}' G 338 { 339 comp.backpatch($2, comp.addr()); 340 // generate switch lookup table and backpatch the break instruction jumps 341 comp.switch_done(); 342 comp.backpatch($6, comp.addr()); 343 } 344 | WHILE '(' C cond ')' B stmt G 345 { 346 comp.backpatch($4.truelist, $6); 347 comp.backpatch($4.falselist, comp.addr()); 348 comp.backpatch($8, $3); 349 // backpatch the break and continue goto instruction jumps 350 comp.loop_done(); 351 } 352 | DO B stmt WHILE '(' C cond ')' ';' 353 { 354 comp.backpatch($7.truelist, $2); 355 comp.backpatch($7.falselist, comp.addr()); 356 // backpatch the break and continue goto instruction jumps 357 comp.loop_done(); 358 } 359 | FOR '(' optexpr ';' C optcond ';' A optexpr G ')' B stmt G 360 { 361 comp.backpatch($6.truelist, $12); 362 comp.backpatch($6.falselist, comp.addr()); 363 comp.backpatch($10, $5); 364 comp.backpatch($14, $8); 365 // backpatch the break and continue goto instruction jumps 366 comp.loop_done(); 367 } 368 | RETURN expr ';' 369 { 370 TD type = comp.rvalue($2); 371 if (comp.main) 372 { 373 if (!comp.type_is_int(comp.return_type)) 374 error(@2, "return type error"); 375 else if (!comp.coerce($2, comp.return_type)) 376 error(@2, "type error"); 377 comp.emit3(invokestatic, comp.pool_add_Method("java/lang/System", "exit", "(I)V")); 378 } 379 else 380 { 381 if (!comp.coerce($2, comp.return_type)) 382 error(@2, "type error"); 383 else if (comp.type_is_int(comp.return_type)) 384 comp.emit(ireturn); 385 else if (comp.type_is_double(type)) 386 comp.emit(freturn); 387 else 388 comp.emit(areturn); 389 } 390 } 391 | RETURN ';' { 392 if (comp.main) 393 { 394 if (!comp.type_is_void(comp.return_type)) 395 error(@1, "return requires a value"); 396 comp.emit(iconst_0); 397 comp.emit3(invokestatic, comp.pool_add_Method("java/lang/System", "exit", "(I)V")); 398 } 399 else 400 { 401 if (!comp.type_is_void(comp.return_type)) 402 error(@1, "return requires a value"); 403 comp.emit(return_); 404 } 405 } 406 | prints ';' { 407 comp.emit(pop); 408 } 409 | BREAK ';' { 410 if (!comp.emit_break()) 411 error(@1, "break not in loop or switch"); 412 } 413 | CONTINUE ';' { 414 if (!comp.emit_continue()) 415 error(@1, "continue not in loop"); 416 } 417 | '{' stmts '}' 418 | optexpr ';' 419 | error ';' { 420 // synchronize on ; to continue parsing 421 yyerrok; 422 } 423 ; 424 425 switch : SWITCH '(' expr ')' 426 { 427 if (!comp.type_is_int(comp.rvalue($3))) 428 error(@3, "type error"); 429 comp.switch_init(); 430 } 431 ; 432 433 cases : cases case ':' stmts 434 | /* empty */ 435 ; 436 437 case : CASE U8 { 438 if ($2 > 0x7fffffff) 439 error(@2, "integer constant out of range"); 440 else if (!comp.emit_case(static_cast<U4>($2), comp.addr())) 441 error(@2, "duplicate case value"); 442 } 443 | CASE '-' U8 { 444 if ($3 > 0x80000000) 445 error(@3, "integer constant out of range"); 446 else if (!comp.emit_case(static_cast<U4>(-$3), comp.addr())) 447 error(@2 + @3, "duplicate case value"); 448 } 449 | DEFAULT { 450 if (!comp.emit_default(comp.addr())) 451 error(@1, "duplicate default"); 452 } 453 ; 454 455 prints : prints ',' D expr 456 { 457 TD type = comp.rvalue($4); 458 if (comp.type_is_int(type)) 459 comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(I)V")); 460 else if (comp.type_is_double(type)) 461 comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(D)V")); 462 else if (comp.type_is_string(type)) 463 comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(Ljava/lang/String;)V")); 464 else 465 error(@4, "type error"); 466 $$ = $1; 467 } 468 | print expr { 469 TD type = comp.rvalue($2); 470 if (comp.type_is_int(type)) 471 comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(I)V")); 472 else if (comp.type_is_double(type)) 473 comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(D)V")); 474 else if (comp.type_is_string(type)) 475 comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(Ljava/lang/String;)V")); 476 else 477 error(@2, "type error"); 478 $$ = $1; 479 } 480 ; 481 482 print : PRINT { 483 comp.emit3(getstatic, comp.pool_add_Field("java/lang/System", "out", "Ljava/io/PrintStream;")); 484 comp.emit(dup); 485 $$ = "print"; 486 } 487 | PRINTLN { 488 comp.emit3(getstatic, comp.pool_add_Field("java/lang/System", "out", "Ljava/io/PrintStream;")); 489 comp.emit(dup); 490 $$ = "println"; 491 } 492 ; 493 494 exprs : exprs ',' expr 495 { 496 // concat argument types to produce the JVM type descriptor of the method, e.g. "II" in "(II)I" 497 $$.type = comp.type_concat($1.type, comp.rvalue($3)); 498 } 499 | expr { 500 $$.type = comp.rvalue($1); 501 } 502 ; 503 504 optexprs: exprs { 505 $$ = $1; 506 } 507 | /* empty */ { 508 $$ = comp.type_none(); 509 } 510 ; 511 512 expr : expr '=' expr { 513 if (!comp.emit_assign($1, $3, nop, nop, $$)) 514 error(@1 + @3, "not assignable"); 515 $$.after = comp.addr(); 516 } 517 | expr PA expr { 518 if (!comp.emit_assign($1, $3, iadd, dadd, $$)) 519 error(@1 + @3, "not assignable"); 520 $$.after = comp.addr(); 521 } 522 | expr NA expr { 523 if (!comp.emit_assign($1, $3, isub, dsub, $$)) 524 error(@1 + @3, "not assignable"); 525 $$.after = comp.addr(); 526 } 527 | expr TA expr { 528 if (!comp.emit_assign($1, $3, imul, dmul, $$)) 529 error(@1 + @3, "not assignable"); 530 $$.after = comp.addr(); 531 } 532 | expr DA expr { 533 if (!comp.emit_assign($1, $3, idiv, ddiv, $$)) 534 error(@1 + @3, "not assignable"); 535 $$.after = comp.addr(); 536 } 537 | expr MA expr { 538 if (!comp.emit_assign($1, $3, irem, nop, $$)) 539 error(@1 + @3, "not assignable"); 540 $$.after = comp.addr(); 541 } 542 | expr AA expr { 543 if (!comp.emit_assign($1, $3, iand, nop, $$)) 544 error(@1 + @3, "not assignable"); 545 $$.after = comp.addr(); 546 } 547 | expr XA expr { 548 if (!comp.emit_assign($1, $3, ixor, nop, $$)) 549 error(@1 + @3, "not assignable"); 550 $$.after = comp.addr(); 551 } 552 | expr OA expr { 553 if (!comp.emit_assign($1, $3, ior, nop, $$)) 554 error(@1 + @3, "not assignable"); 555 $$.after = comp.addr(); 556 } 557 | expr LA expr { 558 if (!comp.emit_assign($1, $3, ishl, nop, $$)) 559 error(@1 + @3, "not assignable"); 560 $$.after = comp.addr(); 561 } 562 | expr RA expr { 563 if (!comp.emit_assign($1, $3, ishr, nop, $$)) 564 error(@1 + @3, "not assignable"); 565 $$.after = comp.addr(); 566 } 567 | expr '?' expr ':' expr 568 { 569 U4 offset = comp.circuit($1); 570 comp.backpatch($1.truelist, $1.after); 571 comp.adjust($3, offset); 572 comp.adjust($5, offset); 573 if (comp.type_is_circuit($3.type) && comp.type_is_circuit($5.type)) 574 { 575 $$.truelist = comp.merge($3.truelist, $5.truelist); 576 $$.falselist = comp.merge($3.falselist, $5.falselist); 577 } 578 else if (comp.type_is_circuit($3.type)) 579 { 580 comp.circuit($5); 581 $$.truelist = comp.merge($3.truelist, $5.truelist); 582 $$.falselist = comp.merge($3.falselist, $5.falselist); 583 } 584 else if (comp.type_is_circuit($5.type)) 585 { 586 comp.adjust($5, comp.circuit($3)); 587 $$.truelist = comp.merge($3.truelist, $5.truelist); 588 $$.falselist = comp.merge($3.falselist, $5.falselist); 589 } 590 else 591 { 592 if (comp.type_equal($3.type, $5.type)) 593 { 594 $$.type = $3.type; 595 } 596 else 597 { 598 $$.type = comp.type_wider($3.type, $5.type); 599 if ($$.type == NULL) 600 error(@3 + @5, "type error"); 601 comp.coerce($5, $$.type); 602 comp.coerce($3, $$.type); 603 } 604 comp.insert3($3.after, goto_, 0); 605 comp.backpatch($3.after - 3, comp.addr()); 606 } 607 comp.backpatch($1.falselist, $3.after); 608 $$.after = comp.addr(); 609 } 610 | expr OR expr { 611 comp.adjust($3, comp.circuit($1)); 612 comp.circuit($3); 613 $$.truelist = comp.merge($1.truelist, $3.truelist); 614 comp.backpatch($1.falselist, $1.after); 615 $$.falselist = $3.falselist; 616 $$.after = comp.addr(); 617 } 618 | expr AN expr { 619 comp.adjust($3, comp.circuit($1)); 620 comp.circuit($3); 621 $$.falselist = comp.merge($1.falselist, $3.falselist); 622 comp.backpatch($1.truelist, $1.after); 623 $$.truelist = $3.truelist; 624 $$.after = comp.addr(); 625 } 626 | expr '|' expr { 627 if (!comp.emit_oper($1, $3, ior, nop, $$)) 628 error(@1 + @3, "type error"); 629 $$.after = comp.addr(); 630 } 631 | expr '^' expr { 632 if (!comp.emit_oper($1, $3, ixor, nop, $$)) 633 error(@1 + @3, "type error"); 634 $$.after = comp.addr(); 635 } 636 | expr '&' expr { 637 if (!comp.emit_oper($1, $3, iand, nop, $$)) 638 error(@1 + @3, "type error"); 639 $$.after = comp.addr(); 640 } 641 | expr EQ expr { 642 if (!comp.emit_compare($1, $3, ifeq, if_icmpeq, $$)) 643 error(@1 + @3, "type error"); 644 $$.after = comp.addr(); 645 } 646 | expr NE expr { 647 if (!comp.emit_compare($1, $3, ifne, if_icmpne, $$)) 648 error(@1 + @3, "type error"); 649 $$.after = comp.addr(); 650 } 651 | expr GE expr { 652 if (!comp.emit_compare($1, $3, ifge, if_icmpge, $$)) 653 error(@1 + @3, "type error"); 654 $$.after = comp.addr(); 655 } 656 | expr '<' expr { 657 if (!comp.emit_compare($1, $3, iflt, if_icmplt, $$)) 658 error(@1 + @3, "type error"); 659 $$.after = comp.addr(); 660 } 661 | expr LE expr { 662 if (!comp.emit_compare($1, $3, ifle, if_icmple, $$)) 663 error(@1 + @3, "type error"); 664 $$.after = comp.addr(); 665 } 666 | expr '>' expr { 667 if (!comp.emit_compare($1, $3, ifgt, if_icmpgt, $$)) 668 error(@1 + @3, "type error"); 669 $$.after = comp.addr(); 670 } 671 | expr LS expr { 672 if (!comp.emit_oper($1, $3, ishl, nop, $$)) 673 error(@1 + @3, "type error"); 674 $$.after = comp.addr(); 675 } 676 | expr RS expr { 677 if (!comp.emit_oper($1, $3, ishr, nop, $$)) 678 error(@1 + @3, "type error"); 679 $$.after = comp.addr(); 680 } 681 | expr '+' expr { 682 if (!comp.emit_oper($1, $3, iadd, dadd, $$)) 683 error(@1 + @3, "type error"); 684 $$.after = comp.addr(); 685 } 686 | expr '-' expr { 687 if (!comp.emit_oper($1, $3, isub, dsub, $$)) 688 error(@1 + @3, "type error"); 689 $$.after = comp.addr(); 690 } 691 | expr '*' expr { 692 if (!comp.emit_oper($1, $3, imul, dmul, $$)) 693 error(@1 + @3, "type error"); 694 $$.after = comp.addr(); 695 } 696 | expr '/' expr { 697 if (!comp.emit_oper($1, $3, idiv, ddiv, $$)) 698 error(@1 + @3, "type error"); 699 $$.after = comp.addr(); 700 } 701 | expr '%' expr { 702 if (!comp.emit_oper($1, $3, irem, nop, $$)) 703 error(@1 + @3, "type error"); 704 $$.after = comp.addr(); 705 } 706 | '!' expr { 707 comp.circuit($2); 708 $$.truelist = $2.falselist; 709 $$.falselist = $2.truelist; 710 $$.after = comp.addr(); 711 } 712 | '~' expr { 713 $$.type = comp.rvalue($2); 714 if (!comp.type_is_int($$.type)) 715 error(@1 + @2, "type error"); 716 comp.emit(iconst_m1); 717 comp.emit(ixor); 718 $$.after = comp.addr(); 719 } 720 | '+' expr %prec '!' 721 { 722 $$.type = comp.rvalue($2); 723 if (!comp.type_is_int($$.type) && !comp.type_is_double($$.type)) 724 error(@1 + @2, "type error"); 725 $$.after = comp.addr(); 726 } 727 | '-' expr %prec '!' 728 { 729 $$.type = comp.rvalue($2); 730 if (comp.type_is_int($$.type)) 731 comp.emit(ineg); 732 else if (comp.type_is_double($$.type)) 733 comp.emit(dneg); 734 else 735 error(@1 + @2, "type error"); 736 $$.after = comp.addr(); 737 } 738 | '(' expr ')' { 739 $$ = $2; 740 } 741 | '#' expr { 742 TD type = comp.rvalue($2); 743 if (comp.type_is_int(type)) 744 comp.emit3(invokestatic, comp.pool_add_Method("java/lang/Integer", "bitCount", "(I)I")); 745 else if (comp.type_is_string(type)) 746 comp.emit3(invokevirtual, comp.pool_add_Method("java/lang/String", "length", "()I")); 747 else if (comp.type_is_array(type)) 748 comp.emit(arraylength); 749 else 750 error(@1 + @2, "type error"); 751 $$.type = comp.type_int(); 752 $$.after = comp.addr(); 753 } 754 | '#' '$' { 755 if (!comp.main) 756 error(@2, "invalid use of $"); 757 comp.emit(aload_0); 758 comp.emit(arraylength); 759 $$.type = comp.type_int(); 760 $$.after = comp.addr(); 761 } 762 | '$' expr { 763 if (!comp.main) 764 error(@1, "invalid use of $"); 765 if (!comp.type_is_int(comp.rvalue($2))) 766 error(@1 + @2, "type error"); 767 comp.emit(iconst_1); 768 comp.emit(isub); 769 comp.emit(aload_0); 770 comp.emit(swap); 771 comp.emit(aaload); 772 $$.type = comp.type_string(); 773 $$.after = comp.addr(); 774 } 775 | PP expr { 776 if (!comp.emit_update($2, true, true, $$)) 777 error(@2, "not assignable"); 778 $$.after = comp.addr(); 779 } 780 | NN expr { 781 if (!comp.emit_update($2, true, false, $$)) 782 error(@2, "not assignable"); 783 $$.after = comp.addr(); 784 } 785 | expr PP { 786 if (!comp.emit_update($1, false, true, $$)) 787 error(@1, "not assignable"); 788 $$.after = comp.addr(); 789 } 790 | expr NN { 791 if (!comp.emit_update($1, false, false, $$)) 792 error(@1, "not assignable"); 793 $$.after = comp.addr(); 794 } 795 | '(' type ')' expr 796 { 797 if (!comp.coerce($4, $2.type)) 798 error(@4, "type error"); 799 $$ = $4; 800 } 801 | expr '[' expr ']' 802 { 803 if (!comp.type_is_int(comp.rvalue($3))) 804 error(@3, "type error"); 805 TD type = comp.rvalue($1); 806 if (comp.type_is_string(type)) 807 { 808 comp.emit3(invokevirtual, comp.pool_add_Method("java/lang/String", "charAt", "(I)C")); 809 $$.type = comp.type_int(); 810 } 811 else if (comp.type_is_array(type)) 812 { 813 $$.type = comp.type_of_element(type); 814 $$.mode = Expr::ARRAY; 815 } 816 else 817 { 818 error(@1, "type error"); 819 } 820 $$.after = comp.addr(); 821 } 822 | expr '.' ID { 823 error(@2, "not implemented"); 824 $$ = $1; 825 } 826 | expr AR ID { 827 error(@2, "invalid operation"); 828 $$ = $1; 829 } 830 | ID '(' optexprs ')' 831 { 832 Entry *entry = comp.table[comp.scope]->lookup($1); 833 if (entry == NULL) 834 { 835 const Library *lib = comp.library($1, $3.type); 836 if (lib != NULL) 837 { 838 // emit virtual or static method invocation of the library function 839 if (lib->virtype != NULL) 840 comp.emit3(invokevirtual, comp.pool_add_Method(lib->package, lib->method, lib->type)); 841 else 842 comp.emit3(invokestatic, comp.pool_add_Method(lib->package, lib->method, lib->type)); 843 // boolean and char are ints, computationally so "Z" and "C" return types are OK to use as "I" 844 $$.type = comp.type_of_return(lib->type); 845 if (comp.type_is_boolean($$.type) || comp.type_is_char($$.type)) 846 $$.type = comp.type_int(); 847 } 848 else 849 { 850 error(@1, "undefined function or type error in arguments"); 851 } 852 } 853 else if (!comp.type_is_function(entry->type)) 854 { 855 error(@1, "not a function"); 856 } 857 else if (comp.type_check_args($3.type, entry->type)) 858 { 859 // invoke a compiled function 860 comp.emit3(invokestatic, comp.pool_add_Method($1->c_str(), entry->type)); 861 $$.type = comp.type_of_return(entry->type); 862 } 863 else 864 { 865 error(@3, "type error in arguments"); 866 } 867 $$.after = comp.addr(); 868 } 869 | NEW type '[' expr ']' 870 { 871 if (!comp.type_is_int(comp.rvalue($4))) 872 error(@4, "type error"); 873 if (comp.type_is_int($2.type)) 874 comp.emit2(newarray, T_INT); 875 else if (comp.type_is_double($2.type)) 876 comp.emit2(newarray, T_DOUBLE); 877 else 878 comp.emit3(anewarray, comp.pool_add_Class(comp.type_of_reference($2.type))); 879 $$.type = comp.type_array($2.type); 880 $$.after = comp.addr(); 881 } 882 | ID { 883 Entry *entry = comp.table[comp.scope]->lookup($1); 884 if (entry != NULL) 885 { 886 if (comp.type_is_function(entry->type)) 887 error(@1, "not a variable"); 888 $$.type = entry->type; 889 $$.mode = entry->table->scope > 0 ? Expr::LOCAL : Expr::STATIC; 890 $$.place = entry->place; 891 } 892 else 893 { 894 error(@1, "undefined name"); 895 } 896 $$.after = comp.addr(); 897 } 898 | U8 { 899 if ($1 == 0) 900 comp.emit(iconst_0); 901 else if ($1 == 1) 902 comp.emit(iconst_1); 903 else if ($1 == 2) 904 comp.emit(iconst_2); 905 else if ($1 == 3) 906 comp.emit(iconst_3); 907 else if ($1 == 4) 908 comp.emit(iconst_4); 909 else if ($1 == 5) 910 comp.emit(iconst_5); 911 else if ($1 <= 0x7f) 912 comp.emit2(bipush, static_cast<U1>($1)); 913 else if ($1 <= 0x7fff) 914 comp.emit3(sipush, static_cast<U2>($1)); 915 else if ($1 <= 0x7fffffff) 916 comp.emit_ldc(comp.pool_add_Integer(static_cast<U4>($1))); 917 else 918 error(@1, "integer constant out of range"); 919 $$.type = comp.type_int(); 920 $$.after = comp.addr(); 921 } 922 | F8 { 923 if ($1 == 0.0) 924 comp.emit(dconst_0); 925 else if ($1 == 1.0) 926 comp.emit(dconst_1); 927 else 928 comp.emit3(ldc2_w, comp.pool_add_Double($1)); 929 $$.type = comp.type_double(); 930 $$.after = comp.addr(); 931 } 932 | CS { 933 comp.emit_ldc(comp.pool_add_String($1)); 934 $$.type = comp.type_string(); 935 $$.after = comp.addr(); 936 } 937 | FALSE { 938 $$.falselist = comp.backpatch_list_addr(); 939 comp.emit3(goto_, 0); 940 $$.after = comp.addr(); 941 } 942 | TRUE { 943 $$.truelist = comp.backpatch_list_addr(); 944 comp.emit3(goto_, 0); 945 $$.after = comp.addr(); 946 } 947 ; 948 949 optexpr : expr { 950 if (comp.type_is_circuit($1.type)) 951 { 952 comp.backpatch($1.truelist, comp.addr()); 953 comp.backpatch($1.falselist, comp.addr()); 954 } 955 else if (!comp.type_is_void($1.type)) 956 { 957 if ($1.mode == Expr::VALUE) 958 { 959 if (comp.type_is_double($1.type)) 960 comp.emit(pop2); 961 else 962 comp.emit(pop); 963 } 964 else if ($1.mode == Expr::ARRAY) 965 { 966 comp.emit(pop2); 967 } 968 } 969 } 970 | /* empty */ 971 ; 972 973 cond : expr { 974 comp.circuit($1); 975 $$ = $1; 976 } 977 ; 978 979 optcond : cond { 980 $$ = $1; 981 } 982 | /* empty */ { 983 $$.truelist = comp.backpatch_list_addr(); 984 comp.emit3(goto_, 0); 985 $$.after = comp.addr(); 986 } 987 ; 988 989 A : /* empty */ { 990 $$ = comp.addr(); 991 } 992 ; 993 994 B : /* empty */ { 995 comp.break_init(); 996 $$ = comp.addr(); 997 } 998 ; 999 1000 C : /* empty */ { 1001 comp.continue_init(); 1002 $$ = comp.addr(); 1003 } 1004 ; 1005 1006 D : /* empty */ { 1007 comp.emit(dup); 1008 } 1009 ; 1010 1011 G : /* empty */ { 1012 $$ = comp.addr(); 1013 comp.emit3(goto_, 0); 1014 } 1015 ; 1016 1017 %% 1018 1019 int main(int argc, char **argv) 1020 { 1021 FILE *file; 1022 1023 // open the specified source code file for reading 1024 if (argc <= 1 || (file = fopen(argv[1], "r")) == NULL) 1025 { 1026 std::cerr << "Cannot open file for reading\n"; 1027 exit(EXIT_FAILURE); 1028 } 1029 1030 // construct a lexer taking input from the specified file encoded in UTF-8/16/32 1031 yy::Scanner lexer(file); 1032 1033 // with bison-complete and bison-locations we can set the filename to display with syntax errors 1034 lexer.filename = argv[1]; 1035 1036 // the class name is the basename of the filename without path and extension suffix 1037 const char *s = strrchr(argv[1], '/'); 1038 if (s == NULL) 1039 s = argv[1]; 1040 else 1041 ++s; 1042 const char *e = strrchr(s, '.'); 1043 if (e == NULL) 1044 e = s + strlen(s); 1045 std::string name(s, e - s); 1046 1047 // construct a compiler for the class 1048 Compiler comp(name); 1049 1050 // keep track of the number of errors reported with yy:Parser::error() 1051 size_t errors = 0; 1052 1053 // construct a parser, needs the lexer for tokens and compiler for semantic actions 1054 yy::Parser parser(lexer, comp, errors); 1055 1056 // parse and compile the source into a JVM class file 1057 if (parser.parse() || errors > 0) 1058 { 1059 std::cerr << "Compilation errors: class file " << name << ".class not saved\n"; 1060 exit(EXIT_FAILURE); 1061 } 1062 1063 // save the JVM class file 1064 comp.save(); 1065 1066 if (file != stdin) 1067 fclose(file); 1068 1069 exit(EXIT_SUCCESS); 1070 } 1071 1072 // display error and location in the source code 1073 void yy::Parser::error(const location& loc, const std::string& msg) 1074 { 1075 ++errors; 1076 1077 std::cerr << loc << ": " << msg << std::endl; 1078 if (loc.begin.line == loc.end.line && loc.begin.line == static_cast<int>(lexer.lineno())) 1079 { 1080 // the error is on the current line and spans only one line 1081 std::cerr << lexer.matcher().line() << std::endl; 1082 for (int i = 0; i < loc.begin.column; ++i) 1083 std::cerr << " "; 1084 for (int i = loc.begin.column; i <= loc.end.column; ++i) 1085 std::cerr << "~"; 1086 std::cerr << std::endl; 1087 } 1088 else 1089 { 1090 // the error is not on the current line or spans multiple lines 1091 FILE *file = lexer.in().file(); // the current file being scanned 1092 if (file != NULL) 1093 { 1094 yy::Scanner::Matcher *m = lexer.new_matcher(file); // new matcher 1095 lexer.push_matcher(m); // save the current matcher 1096 off_t pos = ftell(file); // save current position in the file 1097 fseek(file, 0, SEEK_SET); // go to the start of the file 1098 for (int i = 1; i < loc.begin.line; ++i) 1099 m->skip('\n'); // skip to the next line 1100 for (int i = loc.begin.line; i <= loc.end.line; ++i) 1101 { 1102 std::cerr << m->line() << std::endl; // display offending line 1103 m->skip('\n'); // next line 1104 } 1105 fseek(file, pos, SEEK_SET); // restore position in the file to continue scanning 1106 lexer.pop_matcher(); // restore matcher 1107 } 1108 } 1109 if (lexer.size() == 0) // if token is unknown (no match) 1110 lexer.matcher().winput(); // skip character 1111 } 1112