1 // Mini C parser by Robert van Engelen
2 // A simple one-pass, syntax-directed translation of mini C to JVM bytecode
3 // Requires minic.l, minic.y, minic.hpp
4 // See minicdemo.c for a description of the mini C features
5 //
6 // Ideas for compiler improvements (from easy to hard, roughly):
7 // - add more library functions that compile to JVM virtual and static method invocations
8 // - allow polymorphic functions instead of generating redeclaration errors
9 // - add constant folding, i.e. Expr class contains U4, F8, and std::string constants to combine
10 // - add variable declaration initializers, e.g. int a=1; and/or implicit initialization, requires init method to init statics
11 // - allow variable declarations anywhere in code blocks, not just at the start of a function body
12 // - add structs/classes (structs without methods or classes with methods)
13 // - add exceptions, e.g. 'try' and 'catch' blocks
14 
15 %require "3.2"
16 %skeleton "lalr1.cc"
17 %language "c++"
18 
19 // parser class: yy::Parser
20 %define api.namespace {yy}
21 %define api.parser.class {Parser}
22 
23 // use C++ variants and generate yy::Parser::make_<TOKEN> constructors
24 %define api.value.type variant
25 %define api.token.constructor
26 
27 // verbose error messages, reported with yy::Parser::error()
28 %define parse.error verbose
29 
30 // generate parser.hpp
31 %defines
32 
33 // generate parser.cpp
34 %output "parser.cpp"
35 
36 // enable bison locations and generate location.hpp
37 %locations
38 %define api.location.file "location.hpp"
39 
40 // pass Scanner and Compiler objects to yy::Parser::parse(), to use lexer and comp in semantic actions
41 %parse-param {yy::Scanner& lexer}
42 %parse-param {Compiler& comp}
43 %parse-param {size_t& errors}
44 
45 // generate yy::Parser::token::TOKEN_<name> token constants
46 %define api.token.prefix {TOKEN_}
47 
48 // we expect one shift-reduce conflict, for the ambiguous if-else
49 %expect 1
50 
51 // tokens with semantic values
52 %token <ID> ID "identifier";
53 %token <U8> U8 "integer constant";
54 %token <F8> F8 "float constant";
55 %token <CS> CS "string literal";
56 %token EOF  0  "end of file";
57 
58 %token
59   BREAK    "break"
60   CASE     "case"
61   CONTINUE "continue"
62   DEFAULT  "default"
63   DO       "do"
64   ELSE     "else"
65   FALSE    "false"
66   FLOAT    "float"
67   FOR      "for"
68   IF       "if"
69   INT      "int"
70   MAIN     "main"
71   NEW      "new"
72   PRINT    "print"
73   PRINTLN  "println"
74   RETURN   "return"
75   STRING   "string"
76   SWITCH   "switch"
77   TRUE     "true"
78   VOID     "void"
79   WHILE    "while"
80   PA       "+="
81   NA       "-="
82   TA       "*="
83   DA       "/="
84   MA       "%="
85   AA       "&="
86   XA       "^="
87   OA       "|="
88   LA       "<<="
89   RA       ">>="
90   OR       "||"
91   AN       "&&"
92   EQ       "=="
93   NE       "!="
94   LE       "<="
95   GE       ">="
96   LS       "<<"
97   RS       ">>"
98   PP       "++"
99   NN       "--"
100   AR       "->"
101 ;
102 
103 %token '!' '#' '$' '%' '&' '(' ')' '*' '+' ',' '-' '.' '/' ':' ';' '<' '=' '>' '?' '[' ']' '^' '{' '|' '}' '~'
104 
105 // operator precedence and associativity (includes ')' for casts and '[' for array indexing)
106 %right     '=' PA NA TA DA MA AA XA OA LA RA
107 %right     '?'
108 %right     ':'
109 %left      OR
110 %left      AN
111 %left      '|'
112 %left      '^'
113 %left      '&'
114 %left      EQ NE LE '<' GE '>'
115 %left      LS RS
116 %left      '+' '-'
117 %left      '*' '/' '%'
118 %right     '!' '~' ')'
119 %nonassoc  '#' '$' PP NN
120 %left      '[' '.' AR
121 
122 // the code to include with parser.hpp
123 %code requires {
124   #include "minic.hpp"
125   namespace yy {
126     class Scanner;
127   }
128 }
129 
130 // the code to include with parser.cpp
131 %code {
132   #include "scanner.hpp"
133   #include <iostream>
134   // within yy::Parser::parse() we should invoke lexer.lex() as yylex()
135   #undef yylex
136   #define yylex lexer.lex
137 }
138 
139 // nonterminals with Type semantic values
140 %type <Type> type list args optargs exprs optexprs
141 
142 // nonterminals with Proto semantic values
143 %type <Proto> proto
144 
145 // nonterminals with Expr semantic values
146 %type <Expr> expr cond optcond
147 
148 // for print or println functions
149 %type <CS> print prints
150 
151 // marker nonterminals with U4 address semantic values
152 %type <U4> A B C G
153 
154 %%
155 
156 program : statics       {
157                           delete comp.table[comp.scope];
158                         }
159         ;
160 
161 statics : statics func
162         | statics decl
163         | /* empty */   {
164                           comp.table[++comp.scope] = new Table();
165                         }
166         ;
167 
168 func    : main '(' ')' block
169                         {
170                           // make sure we return from main()
171                           comp.emit(return_);
172                           // construct a static method "main"
173                           ID id = lexer.symbol("main");
174                           TD type = comp.type_function("[Ljava/lang/String;", "V");
175                           Table *table = comp.table[comp.scope];
176                           comp.new_method(ACC_PUBLIC | ACC_STATIC, id, type, table->locals, 256);
177                           delete table;
178                           if (!comp.table[--comp.scope]->enter_func(id, type, false))
179                             error(@1, "redefined main");
180                         }
181         | proto block   {
182                           // make sure we return (compiler does not check if functions return a value)
183                           comp.emit(return_);
184                           // constuct a static function
185                           comp.new_method(ACC_PUBLIC | ACC_STATIC, $1.id, $1.type, comp.table[comp.scope]->locals, 256);
186                           delete comp.table[comp.scope--];
187                           if (!comp.table[comp.scope]->enter_func($1.id, $1.type, false))
188                             error(@2, "redefined function");
189                         }
190         | proto ';'     {
191                           delete comp.table[comp.scope--];
192                         }
193         ;
194 
195 proto   : type ID '(' S optargs ')'
196                         {
197                           $$.type = comp.type_function($5.type, $1.type);
198                           if (!comp.table[comp.scope - 1]->enter_func($2, $$.type, true))
199                             error(@2, "redefined function");
200                           $$.id = $2;
201                         }
202 
203 main    : type MAIN     {
204                           // start parsing the main() function, create a local scope table
205                           Table *table = new Table(comp.table[comp.scope]);
206                           comp.table[++comp.scope] = table;
207                           // start with offset 1 for the locals in main() since 0 is used for the argument array
208                           table->locals = 1;
209                           // to emit code in a buffer, copy the code to the method later
210                           comp.new_method_code();
211                           // this is the main() function
212                           comp.main = true;
213                           // main() may return int or void
214                           if (!comp.type_is_void($1.type) && !comp.type_is_int($1.type))
215                             error(@1, "main can only return an int or void");
216                           // the function's return type
217                           comp.return_type = $1.type;
218                         }
219         ;
220 
221 S       : /* empty */   {
222                           // start parsing the scope of a function, create a local scope table
223                           Table *table = new Table(comp.table[comp.scope]);
224                           comp.table[++comp.scope] = table;
225                           // to emit code in a buffer, copy the code to the method later
226                           comp.new_method_code();
227                           // not the main() function
228                           comp.main = false;
229                           // the function's return type
230                           comp.return_type = comp.decl_type;
231                         }
232         ;
233 
234 block   : '{' decls stmts '}'
235         ;
236 
237 decls   : decls decl
238         | /* empty */
239         ;
240 
241 decl    : list ';'
242         ;
243 
244 type    : VOID          {
245                           $$.type = comp.decl_type = comp.type_void();
246                         }
247         | INT           {
248                           $$.type = comp.decl_type = comp.type_int();
249                         }
250         | FLOAT         {
251                           $$.type = comp.decl_type = comp.type_double();
252                         }
253         | STRING        {
254                           $$.type = comp.decl_type = comp.type_string();
255                         }
256         | type '[' ']'  {
257                           // array types can be declared but arrays are not fully implemented
258                           $$.type = comp.decl_type = comp.type_array($1.type);
259                         }
260         ;
261 
262 list    : list ',' ID   {
263                           if (comp.scope == 0)
264                           {
265                             // add a static variable, stored as a static field
266                             if (!comp.table[0]->enter($3, $1.type, comp.new_field(ACC_STATIC, $3, $1.type)))
267                               error(@3, "redefined static variable");
268                           }
269                           else
270                           {
271                             // add a local variable, stored in the method frame (double takes two frame slots)
272                             if (!comp.table[comp.scope]->enter($3, $1.type, comp.table[comp.scope]->locals))
273                               error(@3, "redefined variable");
274                             comp.table[comp.scope]->locals += 1 + comp.type_is_double($1.type);
275                           }
276                           $$ = $1;
277                         }
278         | type ID       {
279                           if (comp.scope == 0)
280                           {
281                             // add a static variable, stored as a static field
282                             if (!comp.table[0]->enter($2, $1.type, comp.new_field(ACC_STATIC, $2, $1.type)))
283                               error(@2, "redefined static variable");
284                           }
285                           else
286                           {
287                             // add a local variable, stored in the method frame (double takes two frame slots)
288                             if (!comp.table[comp.scope]->enter($2, $1.type, comp.table[comp.scope]->locals))
289                               error(@2, "redefined variable");
290                             comp.table[comp.scope]->locals += 1 + comp.type_is_double($1.type);
291                           }
292                           $$ = $1;
293                         }
294         ;
295 
296 args    : args ',' type ID
297                         {
298                           // add an argument variable, stored in the method frame (double takes two frame slots)
299                           if (!comp.table[comp.scope]->enter($4, $3.type, comp.table[comp.scope]->locals))
300                             error(@4, "redefined argument");
301                           comp.table[comp.scope]->locals += 1 + comp.type_is_double($3.type);
302                           // concat argument types to produce the JVM type descriptor of the method, e.g. "II" in "(II)I"
303                           $$.type = comp.type_concat($1.type, $3.type);
304                         }
305         | type ID       {
306                           // add an argument variable, stored in the method frame (double takes two frame slots)
307                           if (!comp.table[comp.scope]->enter($2, $1.type, comp.table[comp.scope]->locals))
308                             error(@2, "redefined argument");
309                           comp.table[comp.scope]->locals += 1 + comp.type_is_double($1.type);
310                           $$ = $1;
311                         }
312         ;
313 
314 optargs : args          {
315                           $$ = $1;
316                         }
317         | /* empty */   {
318                           $$.type = comp.type_none();
319                         }
320         ;
321 
322 stmts   : stmts stmt
323         | /* empty */
324         ;
325 
326 stmt    : IF '(' cond ')' stmt
327                         {
328                           comp.backpatch($3.truelist, $3.after);
329                           comp.backpatch($3.falselist, comp.addr());
330                         }
331         | IF '(' cond ')' stmt ELSE G A stmt
332                         {
333                           comp.backpatch($3.truelist, $3.after);
334                           comp.backpatch($3.falselist, $8);
335                           comp.backpatch($7, comp.addr());
336                         }
337         | switch G '{' cases '}' G
338                         {
339                           comp.backpatch($2, comp.addr());
340                           // generate switch lookup table and backpatch the break instruction jumps
341                           comp.switch_done();
342                           comp.backpatch($6, comp.addr());
343                         }
344         | WHILE '(' C cond ')' B stmt G
345                         {
346                           comp.backpatch($4.truelist, $6);
347                           comp.backpatch($4.falselist, comp.addr());
348                           comp.backpatch($8, $3);
349                           // backpatch the break and continue goto instruction jumps
350                           comp.loop_done();
351                         }
352         | DO B stmt WHILE '(' C cond ')' ';'
353                         {
354                           comp.backpatch($7.truelist, $2);
355                           comp.backpatch($7.falselist, comp.addr());
356                           // backpatch the break and continue goto instruction jumps
357                           comp.loop_done();
358                         }
359         | FOR '(' optexpr ';' C optcond ';' A optexpr G ')' B stmt G
360                         {
361                           comp.backpatch($6.truelist, $12);
362                           comp.backpatch($6.falselist, comp.addr());
363                           comp.backpatch($10, $5);
364                           comp.backpatch($14, $8);
365                           // backpatch the break and continue goto instruction jumps
366                           comp.loop_done();
367                         }
368         | RETURN expr ';'
369                         {
370                           TD type = comp.rvalue($2);
371                           if (comp.main)
372                           {
373                             if (!comp.type_is_int(comp.return_type))
374                               error(@2, "return type error");
375                             else if (!comp.coerce($2, comp.return_type))
376                               error(@2, "type error");
377                             comp.emit3(invokestatic, comp.pool_add_Method("java/lang/System", "exit", "(I)V"));
378                           }
379                           else
380                           {
381                             if (!comp.coerce($2, comp.return_type))
382                               error(@2, "type error");
383                             else if (comp.type_is_int(comp.return_type))
384                               comp.emit(ireturn);
385                             else if (comp.type_is_double(type))
386                               comp.emit(freturn);
387                             else
388                               comp.emit(areturn);
389                           }
390                         }
391         | RETURN ';'    {
392                           if (comp.main)
393                           {
394                             if (!comp.type_is_void(comp.return_type))
395                               error(@1, "return requires a value");
396                             comp.emit(iconst_0);
397                             comp.emit3(invokestatic, comp.pool_add_Method("java/lang/System", "exit", "(I)V"));
398                           }
399                           else
400                           {
401                             if (!comp.type_is_void(comp.return_type))
402                               error(@1, "return requires a value");
403                             comp.emit(return_);
404                           }
405                         }
406         | prints ';'    {
407                           comp.emit(pop);
408                         }
409         | BREAK ';'     {
410                           if (!comp.emit_break())
411                             error(@1, "break not in loop or switch");
412                         }
413         | CONTINUE ';'  {
414                           if (!comp.emit_continue())
415                             error(@1, "continue not in loop");
416                         }
417         | '{' stmts '}'
418         | optexpr ';'
419         | error ';'     {
420                           // synchronize on ; to continue parsing
421                           yyerrok;
422                         }
423         ;
424 
425 switch  : SWITCH '(' expr ')'
426                         {
427                           if (!comp.type_is_int(comp.rvalue($3)))
428                             error(@3, "type error");
429                           comp.switch_init();
430                         }
431         ;
432 
433 cases   : cases case ':' stmts
434         | /* empty */
435         ;
436 
437 case    : CASE U8       {
438                           if ($2 > 0x7fffffff)
439                             error(@2, "integer constant out of range");
440                           else if (!comp.emit_case(static_cast<U4>($2), comp.addr()))
441                             error(@2, "duplicate case value");
442                         }
443         | CASE '-' U8   {
444                           if ($3 > 0x80000000)
445                             error(@3, "integer constant out of range");
446                           else if (!comp.emit_case(static_cast<U4>(-$3), comp.addr()))
447                             error(@2 + @3, "duplicate case value");
448                         }
449         | DEFAULT       {
450                           if (!comp.emit_default(comp.addr()))
451                             error(@1, "duplicate default");
452                         }
453         ;
454 
455 prints  : prints ',' D expr
456                         {
457                           TD type = comp.rvalue($4);
458                           if (comp.type_is_int(type))
459                             comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(I)V"));
460                           else if (comp.type_is_double(type))
461                             comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(D)V"));
462                           else if (comp.type_is_string(type))
463                             comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(Ljava/lang/String;)V"));
464                           else
465                             error(@4, "type error");
466                           $$ = $1;
467                         }
468         | print expr    {
469                           TD type = comp.rvalue($2);
470                           if (comp.type_is_int(type))
471                             comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(I)V"));
472                           else if (comp.type_is_double(type))
473                             comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(D)V"));
474                           else if (comp.type_is_string(type))
475                             comp.emit3(invokevirtual, comp.pool_add_Method("java/io/PrintStream", $1, "(Ljava/lang/String;)V"));
476                           else
477                             error(@2, "type error");
478                           $$ = $1;
479                         }
480         ;
481 
482 print   : PRINT         {
483                           comp.emit3(getstatic, comp.pool_add_Field("java/lang/System", "out", "Ljava/io/PrintStream;"));
484                           comp.emit(dup);
485                           $$ = "print";
486                         }
487         | PRINTLN       {
488                           comp.emit3(getstatic, comp.pool_add_Field("java/lang/System", "out", "Ljava/io/PrintStream;"));
489                           comp.emit(dup);
490                           $$ = "println";
491                         }
492         ;
493 
494 exprs   : exprs ',' expr
495                         {
496                           // concat argument types to produce the JVM type descriptor of the method, e.g. "II" in "(II)I"
497                           $$.type = comp.type_concat($1.type, comp.rvalue($3));
498                         }
499         | expr          {
500                           $$.type = comp.rvalue($1);
501                         }
502         ;
503 
504 optexprs: exprs         {
505                           $$ = $1;
506                         }
507         | /* empty */   {
508                           $$ = comp.type_none();
509                         }
510         ;
511 
512 expr    : expr '=' expr {
513                           if (!comp.emit_assign($1, $3, nop, nop, $$))
514                             error(@1 + @3, "not assignable");
515                           $$.after = comp.addr();
516                         }
517         | expr PA expr  {
518                           if (!comp.emit_assign($1, $3, iadd, dadd, $$))
519                             error(@1 + @3, "not assignable");
520                           $$.after = comp.addr();
521                         }
522         | expr NA expr  {
523                           if (!comp.emit_assign($1, $3, isub, dsub, $$))
524                             error(@1 + @3, "not assignable");
525                           $$.after = comp.addr();
526                         }
527         | expr TA expr  {
528                           if (!comp.emit_assign($1, $3, imul, dmul, $$))
529                             error(@1 + @3, "not assignable");
530                           $$.after = comp.addr();
531                         }
532         | expr DA expr  {
533                           if (!comp.emit_assign($1, $3, idiv, ddiv, $$))
534                             error(@1 + @3, "not assignable");
535                           $$.after = comp.addr();
536                         }
537         | expr MA expr  {
538                           if (!comp.emit_assign($1, $3, irem, nop, $$))
539                             error(@1 + @3, "not assignable");
540                           $$.after = comp.addr();
541                         }
542         | expr AA expr  {
543                           if (!comp.emit_assign($1, $3, iand, nop, $$))
544                             error(@1 + @3, "not assignable");
545                           $$.after = comp.addr();
546                         }
547         | expr XA expr  {
548                           if (!comp.emit_assign($1, $3, ixor, nop, $$))
549                             error(@1 + @3, "not assignable");
550                           $$.after = comp.addr();
551                         }
552         | expr OA expr  {
553                           if (!comp.emit_assign($1, $3, ior, nop, $$))
554                             error(@1 + @3, "not assignable");
555                           $$.after = comp.addr();
556                         }
557         | expr LA expr  {
558                           if (!comp.emit_assign($1, $3, ishl, nop, $$))
559                             error(@1 + @3, "not assignable");
560                           $$.after = comp.addr();
561                         }
562         | expr RA expr  {
563                           if (!comp.emit_assign($1, $3, ishr, nop, $$))
564                             error(@1 + @3, "not assignable");
565                           $$.after = comp.addr();
566                         }
567         | expr '?' expr ':' expr
568                         {
569                           U4 offset = comp.circuit($1);
570                           comp.backpatch($1.truelist, $1.after);
571                           comp.adjust($3, offset);
572                           comp.adjust($5, offset);
573                           if (comp.type_is_circuit($3.type) && comp.type_is_circuit($5.type))
574                           {
575                             $$.truelist = comp.merge($3.truelist, $5.truelist);
576                             $$.falselist = comp.merge($3.falselist, $5.falselist);
577                           }
578                           else if (comp.type_is_circuit($3.type))
579                           {
580                             comp.circuit($5);
581                             $$.truelist = comp.merge($3.truelist, $5.truelist);
582                             $$.falselist = comp.merge($3.falselist, $5.falselist);
583                           }
584                           else if (comp.type_is_circuit($5.type))
585                           {
586                             comp.adjust($5, comp.circuit($3));
587                             $$.truelist = comp.merge($3.truelist, $5.truelist);
588                             $$.falselist = comp.merge($3.falselist, $5.falselist);
589                           }
590                           else
591                           {
592                             if (comp.type_equal($3.type, $5.type))
593                             {
594                               $$.type = $3.type;
595                             }
596                             else
597                             {
598                               $$.type = comp.type_wider($3.type, $5.type);
599                               if ($$.type == NULL)
600                                 error(@3 + @5, "type error");
601                               comp.coerce($5, $$.type);
602                               comp.coerce($3, $$.type);
603                             }
604                             comp.insert3($3.after, goto_, 0);
605                             comp.backpatch($3.after - 3, comp.addr());
606                           }
607                           comp.backpatch($1.falselist, $3.after);
608                           $$.after = comp.addr();
609                         }
610         | expr OR expr  {
611                           comp.adjust($3, comp.circuit($1));
612                           comp.circuit($3);
613                           $$.truelist = comp.merge($1.truelist, $3.truelist);
614                           comp.backpatch($1.falselist, $1.after);
615                           $$.falselist = $3.falselist;
616                           $$.after = comp.addr();
617                         }
618         | expr AN expr  {
619                           comp.adjust($3, comp.circuit($1));
620                           comp.circuit($3);
621                           $$.falselist = comp.merge($1.falselist, $3.falselist);
622                           comp.backpatch($1.truelist, $1.after);
623                           $$.truelist = $3.truelist;
624                           $$.after = comp.addr();
625                         }
626         | expr '|' expr {
627                           if (!comp.emit_oper($1, $3, ior, nop, $$))
628                             error(@1 + @3, "type error");
629                           $$.after = comp.addr();
630                         }
631         | expr '^' expr {
632                           if (!comp.emit_oper($1, $3, ixor, nop, $$))
633                             error(@1 + @3, "type error");
634                           $$.after = comp.addr();
635                         }
636         | expr '&' expr {
637                           if (!comp.emit_oper($1, $3, iand, nop, $$))
638                             error(@1 + @3, "type error");
639                           $$.after = comp.addr();
640                         }
641         | expr EQ expr  {
642                           if (!comp.emit_compare($1, $3, ifeq, if_icmpeq, $$))
643                             error(@1 + @3, "type error");
644                           $$.after = comp.addr();
645                         }
646         | expr NE expr  {
647                           if (!comp.emit_compare($1, $3, ifne, if_icmpne, $$))
648                             error(@1 + @3, "type error");
649                           $$.after = comp.addr();
650                         }
651         | expr GE expr  {
652                           if (!comp.emit_compare($1, $3, ifge, if_icmpge, $$))
653                             error(@1 + @3, "type error");
654                           $$.after = comp.addr();
655                         }
656         | expr '<' expr {
657                           if (!comp.emit_compare($1, $3, iflt, if_icmplt, $$))
658                             error(@1 + @3, "type error");
659                           $$.after = comp.addr();
660                         }
661         | expr LE expr  {
662                           if (!comp.emit_compare($1, $3, ifle, if_icmple, $$))
663                             error(@1 + @3, "type error");
664                           $$.after = comp.addr();
665                         }
666         | expr '>' expr {
667                           if (!comp.emit_compare($1, $3, ifgt, if_icmpgt, $$))
668                             error(@1 + @3, "type error");
669                           $$.after = comp.addr();
670                         }
671         | expr LS expr  {
672                           if (!comp.emit_oper($1, $3, ishl, nop, $$))
673                             error(@1 + @3, "type error");
674                           $$.after = comp.addr();
675                         }
676         | expr RS expr  {
677                           if (!comp.emit_oper($1, $3, ishr, nop, $$))
678                             error(@1 + @3, "type error");
679                           $$.after = comp.addr();
680                         }
681         | expr '+' expr {
682                           if (!comp.emit_oper($1, $3, iadd, dadd, $$))
683                             error(@1 + @3, "type error");
684                           $$.after = comp.addr();
685                         }
686         | expr '-' expr {
687                           if (!comp.emit_oper($1, $3, isub, dsub, $$))
688                             error(@1 + @3, "type error");
689                           $$.after = comp.addr();
690                         }
691         | expr '*' expr {
692                           if (!comp.emit_oper($1, $3, imul, dmul, $$))
693                             error(@1 + @3, "type error");
694                           $$.after = comp.addr();
695                         }
696         | expr '/' expr {
697                           if (!comp.emit_oper($1, $3, idiv, ddiv, $$))
698                             error(@1 + @3, "type error");
699                           $$.after = comp.addr();
700                         }
701         | expr '%' expr {
702                           if (!comp.emit_oper($1, $3, irem, nop, $$))
703                             error(@1 + @3, "type error");
704                           $$.after = comp.addr();
705                         }
706         | '!' expr      {
707                           comp.circuit($2);
708                           $$.truelist = $2.falselist;
709                           $$.falselist = $2.truelist;
710                           $$.after = comp.addr();
711                         }
712         | '~' expr      {
713                           $$.type = comp.rvalue($2);
714                           if (!comp.type_is_int($$.type))
715                             error(@1 + @2, "type error");
716                           comp.emit(iconst_m1);
717                           comp.emit(ixor);
718                           $$.after = comp.addr();
719                         }
720         | '+' expr %prec '!'
721                         {
722                           $$.type = comp.rvalue($2);
723                           if (!comp.type_is_int($$.type) && !comp.type_is_double($$.type))
724                             error(@1 + @2, "type error");
725                           $$.after = comp.addr();
726                         }
727         | '-' expr %prec '!'
728                         {
729                           $$.type = comp.rvalue($2);
730                           if (comp.type_is_int($$.type))
731                             comp.emit(ineg);
732                           else if (comp.type_is_double($$.type))
733                             comp.emit(dneg);
734                           else
735                             error(@1 + @2, "type error");
736                           $$.after = comp.addr();
737                         }
738         | '(' expr ')'  {
739                           $$ = $2;
740                         }
741         | '#' expr      {
742                           TD type = comp.rvalue($2);
743                           if (comp.type_is_int(type))
744                             comp.emit3(invokestatic, comp.pool_add_Method("java/lang/Integer", "bitCount", "(I)I"));
745                           else if (comp.type_is_string(type))
746                             comp.emit3(invokevirtual, comp.pool_add_Method("java/lang/String", "length", "()I"));
747                           else if (comp.type_is_array(type))
748                             comp.emit(arraylength);
749                           else
750                             error(@1 + @2, "type error");
751                           $$.type = comp.type_int();
752                           $$.after = comp.addr();
753                         }
754         | '#' '$'       {
755                           if (!comp.main)
756                             error(@2, "invalid use of $");
757                           comp.emit(aload_0);
758                           comp.emit(arraylength);
759                           $$.type = comp.type_int();
760                           $$.after = comp.addr();
761                         }
762         | '$' expr      {
763                           if (!comp.main)
764                             error(@1, "invalid use of $");
765                           if (!comp.type_is_int(comp.rvalue($2)))
766                             error(@1 + @2, "type error");
767                           comp.emit(iconst_1);
768                           comp.emit(isub);
769                           comp.emit(aload_0);
770                           comp.emit(swap);
771                           comp.emit(aaload);
772                           $$.type = comp.type_string();
773                           $$.after = comp.addr();
774                         }
775         | PP expr       {
776                           if (!comp.emit_update($2, true, true, $$))
777                             error(@2, "not assignable");
778                           $$.after = comp.addr();
779                         }
780         | NN expr       {
781                           if (!comp.emit_update($2, true, false, $$))
782                             error(@2, "not assignable");
783                           $$.after = comp.addr();
784                         }
785         | expr PP       {
786                           if (!comp.emit_update($1, false, true, $$))
787                             error(@1, "not assignable");
788                           $$.after = comp.addr();
789                         }
790         | expr NN       {
791                           if (!comp.emit_update($1, false, false, $$))
792                             error(@1, "not assignable");
793                           $$.after = comp.addr();
794                         }
795         | '(' type ')' expr
796                         {
797                           if (!comp.coerce($4, $2.type))
798                             error(@4, "type error");
799                           $$ = $4;
800                         }
801         | expr '[' expr ']'
802                         {
803                           if (!comp.type_is_int(comp.rvalue($3)))
804                             error(@3, "type error");
805                           TD type = comp.rvalue($1);
806                           if (comp.type_is_string(type))
807                           {
808                             comp.emit3(invokevirtual, comp.pool_add_Method("java/lang/String", "charAt", "(I)C"));
809                             $$.type = comp.type_int();
810                           }
811                           else if (comp.type_is_array(type))
812                           {
813                             $$.type = comp.type_of_element(type);
814                             $$.mode = Expr::ARRAY;
815                           }
816                           else
817                           {
818                             error(@1, "type error");
819                           }
820                           $$.after = comp.addr();
821                         }
822         | expr '.' ID   {
823                           error(@2, "not implemented");
824                           $$ = $1;
825                         }
826         | expr AR ID    {
827                           error(@2, "invalid operation");
828                           $$ = $1;
829                         }
830         | ID '(' optexprs ')'
831                         {
832                           Entry *entry = comp.table[comp.scope]->lookup($1);
833                           if (entry == NULL)
834                           {
835                             const Library *lib = comp.library($1, $3.type);
836                             if (lib != NULL)
837                             {
838                               // emit virtual or static method invocation of the library function
839                               if (lib->virtype != NULL)
840                                 comp.emit3(invokevirtual, comp.pool_add_Method(lib->package, lib->method, lib->type));
841                               else
842                                 comp.emit3(invokestatic, comp.pool_add_Method(lib->package, lib->method, lib->type));
843                               // boolean and char are ints, computationally so "Z" and "C" return types are OK to use as "I"
844                               $$.type = comp.type_of_return(lib->type);
845                               if (comp.type_is_boolean($$.type) || comp.type_is_char($$.type))
846                                 $$.type = comp.type_int();
847                             }
848                             else
849                             {
850                               error(@1, "undefined function or type error in arguments");
851                             }
852                           }
853                           else if (!comp.type_is_function(entry->type))
854                           {
855                             error(@1, "not a function");
856                           }
857                           else if (comp.type_check_args($3.type, entry->type))
858                           {
859                             // invoke a compiled function
860                             comp.emit3(invokestatic, comp.pool_add_Method($1->c_str(), entry->type));
861                             $$.type = comp.type_of_return(entry->type);
862                           }
863                           else
864                           {
865                             error(@3, "type error in arguments");
866                           }
867                           $$.after = comp.addr();
868                         }
869         | NEW type '[' expr ']'
870                         {
871                           if (!comp.type_is_int(comp.rvalue($4)))
872                             error(@4, "type error");
873                           if (comp.type_is_int($2.type))
874                             comp.emit2(newarray, T_INT);
875                           else if (comp.type_is_double($2.type))
876                             comp.emit2(newarray, T_DOUBLE);
877                           else
878                             comp.emit3(anewarray, comp.pool_add_Class(comp.type_of_reference($2.type)));
879                           $$.type = comp.type_array($2.type);
880                           $$.after = comp.addr();
881                         }
882         | ID            {
883                           Entry *entry = comp.table[comp.scope]->lookup($1);
884                           if (entry != NULL)
885                           {
886                             if (comp.type_is_function(entry->type))
887                               error(@1, "not a variable");
888                             $$.type = entry->type;
889                             $$.mode = entry->table->scope > 0 ? Expr::LOCAL : Expr::STATIC;
890                             $$.place = entry->place;
891                           }
892                           else
893                           {
894                             error(@1, "undefined name");
895                           }
896                           $$.after = comp.addr();
897                         }
898         | U8            {
899                           if ($1 == 0)
900                             comp.emit(iconst_0);
901                           else if ($1 == 1)
902                             comp.emit(iconst_1);
903                           else if ($1 == 2)
904                             comp.emit(iconst_2);
905                           else if ($1 == 3)
906                             comp.emit(iconst_3);
907                           else if ($1 == 4)
908                             comp.emit(iconst_4);
909                           else if ($1 == 5)
910                             comp.emit(iconst_5);
911                           else if ($1 <= 0x7f)
912                             comp.emit2(bipush, static_cast<U1>($1));
913                           else if ($1 <= 0x7fff)
914                             comp.emit3(sipush, static_cast<U2>($1));
915                           else if ($1 <= 0x7fffffff)
916                             comp.emit_ldc(comp.pool_add_Integer(static_cast<U4>($1)));
917                           else
918                             error(@1, "integer constant out of range");
919                           $$.type = comp.type_int();
920                           $$.after = comp.addr();
921                         }
922         | F8            {
923                           if ($1 == 0.0)
924                             comp.emit(dconst_0);
925                           else if ($1 == 1.0)
926                             comp.emit(dconst_1);
927                           else
928                             comp.emit3(ldc2_w, comp.pool_add_Double($1));
929                           $$.type = comp.type_double();
930                           $$.after = comp.addr();
931                         }
932         | CS            {
933                           comp.emit_ldc(comp.pool_add_String($1));
934                           $$.type = comp.type_string();
935                           $$.after = comp.addr();
936                         }
937         | FALSE         {
938                           $$.falselist = comp.backpatch_list_addr();
939                           comp.emit3(goto_, 0);
940                           $$.after = comp.addr();
941                         }
942         | TRUE          {
943                           $$.truelist = comp.backpatch_list_addr();
944                           comp.emit3(goto_, 0);
945                           $$.after = comp.addr();
946                         }
947         ;
948 
949 optexpr : expr          {
950                           if (comp.type_is_circuit($1.type))
951                           {
952                             comp.backpatch($1.truelist, comp.addr());
953                             comp.backpatch($1.falselist, comp.addr());
954                           }
955                           else if (!comp.type_is_void($1.type))
956                           {
957                             if ($1.mode == Expr::VALUE)
958                             {
959                               if (comp.type_is_double($1.type))
960                                 comp.emit(pop2);
961                               else
962                                 comp.emit(pop);
963                             }
964                             else if ($1.mode == Expr::ARRAY)
965                             {
966                               comp.emit(pop2);
967                             }
968                           }
969                         }
970         | /* empty */
971         ;
972 
973 cond    : expr          {
974                           comp.circuit($1);
975                           $$ = $1;
976                         }
977         ;
978 
979 optcond : cond          {
980                           $$ = $1;
981                         }
982         | /* empty */   {
983                           $$.truelist = comp.backpatch_list_addr();
984                           comp.emit3(goto_, 0);
985                           $$.after = comp.addr();
986                         }
987         ;
988 
989 A       : /* empty */   {
990                           $$ = comp.addr();
991                         }
992         ;
993 
994 B       : /* empty */   {
995                           comp.break_init();
996                           $$ = comp.addr();
997                         }
998         ;
999 
1000 C       : /* empty */   {
1001                           comp.continue_init();
1002                           $$ = comp.addr();
1003                         }
1004         ;
1005 
1006 D       : /* empty */   {
1007                           comp.emit(dup);
1008                         }
1009         ;
1010 
1011 G       : /* empty */   {
1012                           $$ = comp.addr();
1013                           comp.emit3(goto_, 0);
1014                         }
1015         ;
1016 
1017 %%
1018 
1019 int main(int argc, char **argv)
1020 {
1021   FILE *file;
1022 
1023   // open the specified source code file for reading
1024   if (argc <= 1 || (file = fopen(argv[1], "r")) == NULL)
1025   {
1026     std::cerr << "Cannot open file for reading\n";
1027     exit(EXIT_FAILURE);
1028   }
1029 
1030   // construct a lexer taking input from the specified file encoded in UTF-8/16/32
1031   yy::Scanner lexer(file);
1032 
1033   // with bison-complete and bison-locations we can set the filename to display with syntax errors
1034   lexer.filename = argv[1];
1035 
1036   // the class name is the basename of the filename without path and extension suffix
1037   const char *s = strrchr(argv[1], '/');
1038   if (s == NULL)
1039     s = argv[1];
1040   else
1041     ++s;
1042   const char *e = strrchr(s, '.');
1043   if (e == NULL)
1044     e = s + strlen(s);
1045   std::string name(s, e - s);
1046 
1047   // construct a compiler for the class
1048   Compiler comp(name);
1049 
1050   // keep track of the number of errors reported with yy:Parser::error()
1051   size_t errors = 0;
1052 
1053   // construct a parser, needs the lexer for tokens and compiler for semantic actions
1054   yy::Parser parser(lexer, comp, errors);
1055 
1056   // parse and compile the source into a JVM class file
1057   if (parser.parse() || errors > 0)
1058   {
1059     std::cerr << "Compilation errors: class file " << name << ".class not saved\n";
1060     exit(EXIT_FAILURE);
1061   }
1062 
1063   // save the JVM class file
1064   comp.save();
1065 
1066   if (file != stdin)
1067     fclose(file);
1068 
1069   exit(EXIT_SUCCESS);
1070 }
1071 
1072 // display error and location in the source code
1073 void yy::Parser::error(const location& loc, const std::string& msg)
1074 {
1075   ++errors;
1076 
1077   std::cerr << loc << ": " << msg << std::endl;
1078   if (loc.begin.line == loc.end.line && loc.begin.line == static_cast<int>(lexer.lineno()))
1079   {
1080     // the error is on the current line and spans only one line
1081     std::cerr << lexer.matcher().line() << std::endl;
1082     for (int i = 0; i < loc.begin.column; ++i)
1083       std::cerr << " ";
1084     for (int i = loc.begin.column; i <= loc.end.column; ++i)
1085       std::cerr << "~";
1086     std::cerr << std::endl;
1087   }
1088   else
1089   {
1090     // the error is not on the current line or spans multiple lines
1091     FILE *file = lexer.in().file(); // the current file being scanned
1092     if (file != NULL)
1093     {
1094       yy::Scanner::Matcher *m = lexer.new_matcher(file); // new matcher
1095       lexer.push_matcher(m); // save the current matcher
1096       off_t pos = ftell(file); // save current position in the file
1097       fseek(file, 0, SEEK_SET); // go to the start of the file
1098       for (int i = 1; i < loc.begin.line; ++i)
1099         m->skip('\n'); // skip to the next line
1100       for (int i = loc.begin.line; i <= loc.end.line; ++i)
1101       {
1102         std::cerr << m->line() << std::endl; // display offending line
1103         m->skip('\n'); // next line
1104       }
1105       fseek(file, pos, SEEK_SET); // restore position in the file to continue scanning
1106       lexer.pop_matcher(); // restore matcher
1107     }
1108   }
1109   if (lexer.size() == 0) // if token is unknown (no match)
1110     lexer.matcher().winput(); // skip character
1111 }
1112