1 // Mini C code generator for the JVM by Robert van Engelen 2 // A simple one-pass, syntax-directed translation of mini C to JVM bytecode 3 // Requires minic.l, minic.y, minic.hpp 4 // See minicdemo.c for a description of the mini C features 5 6 #ifndef MINIC_HPP 7 #define MINIC_HPP 8 9 #include <cstdlib> 10 #include <cstdio> 11 #include <cstdint> 12 #include <string> 13 #include <map> 14 #include <set> 15 #include <vector> 16 17 // JVM computational types 18 19 typedef int8_t I1; // JVM byte (signed) 20 typedef uint8_t U1; // JVM byte (unsigned) 21 22 typedef int16_t I2; // JVM short (signed) 23 typedef uint16_t U2; // JVM short (unsigned) 24 25 typedef int32_t I4; // JVM int (signed) 26 typedef uint32_t U4; // JVM int (unsigned) 27 28 typedef int64_t I8; // JVM long (signed) 29 typedef uint64_t U8; // JVM long (unsigned) 30 31 typedef float F4; // JVM float 32 typedef double F8; // JVM double 33 34 typedef const char *CS; // JVM string literal 35 36 // JVM opcodes 37 38 const U1 nop = 0x00; 39 const U1 aconst_null = 0x01; 40 const U1 iconst_m1 = 0x02; 41 const U1 iconst_0 = 0x03; 42 const U1 iconst_1 = 0x04; 43 const U1 iconst_2 = 0x05; 44 const U1 iconst_3 = 0x06; 45 const U1 iconst_4 = 0x07; 46 const U1 iconst_5 = 0x08; 47 const U1 lconst_0 = 0x09; 48 const U1 lconst_1 = 0x0a; 49 const U1 fconst_0 = 0x0b; 50 const U1 fconst_1 = 0x0c; 51 const U1 fconst_2 = 0x0d; 52 const U1 dconst_0 = 0x0e; 53 const U1 dconst_1 = 0x0f; 54 const U1 bipush = 0x10; 55 const U1 sipush = 0x11; 56 const U1 ldc = 0x12; 57 const U1 ldc_w = 0x13; 58 const U1 ldc2_w = 0x14; 59 const U1 iload = 0x15; 60 const U1 lload = 0x16; 61 const U1 fload = 0x17; 62 const U1 dload = 0x18; 63 const U1 aload = 0x19; 64 const U1 iload_0 = 0x1a; 65 const U1 iload_1 = 0x1b; 66 const U1 iload_2 = 0x1c; 67 const U1 iload_3 = 0x1d; 68 const U1 lload_0 = 0x1e; 69 const U1 lload_1 = 0x1f; 70 const U1 lload_2 = 0x20; 71 const U1 lload_3 = 0x21; 72 const U1 fload_0 = 0x22; 73 const U1 fload_1 = 0x23; 74 const U1 fload_2 = 0x24; 75 const U1 fload_3 = 0x25; 76 const U1 dload_0 = 0x26; 77 const U1 dload_1 = 0x27; 78 const U1 dload_2 = 0x28; 79 const U1 dload_3 = 0x29; 80 const U1 aload_0 = 0x2a; 81 const U1 aload_1 = 0x2b; 82 const U1 aload_2 = 0x2c; 83 const U1 aload_3 = 0x2d; 84 const U1 iaload = 0x2e; 85 const U1 laload = 0x2f; 86 const U1 faload = 0x30; 87 const U1 daload = 0x31; 88 const U1 aaload = 0x32; 89 const U1 baload = 0x33; 90 const U1 caload = 0x34; 91 const U1 saload = 0x35; 92 const U1 istore = 0x36; 93 const U1 lstore = 0x37; 94 const U1 fstore = 0x38; 95 const U1 dstore = 0x39; 96 const U1 astore = 0x3a; 97 const U1 istore_0 = 0x3b; 98 const U1 istore_1 = 0x3c; 99 const U1 istore_2 = 0x3d; 100 const U1 istore_3 = 0x3e; 101 const U1 lstore_0 = 0x3f; 102 const U1 lstore_1 = 0x40; 103 const U1 lstore_2 = 0x41; 104 const U1 lstore_3 = 0x42; 105 const U1 fstore_0 = 0x43; 106 const U1 fstore_1 = 0x44; 107 const U1 fstore_2 = 0x45; 108 const U1 fstore_3 = 0x46; 109 const U1 dstore_0 = 0x47; 110 const U1 dstore_1 = 0x48; 111 const U1 dstore_2 = 0x49; 112 const U1 dstore_3 = 0x4a; 113 const U1 astore_0 = 0x4b; 114 const U1 astore_1 = 0x4c; 115 const U1 astore_2 = 0x4d; 116 const U1 astore_3 = 0x4e; 117 const U1 iastore = 0x4f; 118 const U1 lastore = 0x50; 119 const U1 fastore = 0x51; 120 const U1 dastore = 0x52; 121 const U1 aastore = 0x53; 122 const U1 bastore = 0x54; 123 const U1 castore = 0x55; 124 const U1 sastore = 0x56; 125 const U1 pop = 0x57; 126 const U1 pop2 = 0x58; 127 const U1 dup = 0x59; 128 const U1 dup_x1 = 0x5a; 129 const U1 dup_x2 = 0x5b; 130 const U1 dup2 = 0x5c; 131 const U1 dup2_x1 = 0x5d; 132 const U1 dup2_x2 = 0x5e; 133 const U1 swap = 0x5f; 134 const U1 iadd = 0x60; 135 const U1 ladd = 0x61; 136 const U1 fadd = 0x62; 137 const U1 dadd = 0x63; 138 const U1 isub = 0x64; 139 const U1 lsub = 0x65; 140 const U1 fsub = 0x66; 141 const U1 dsub = 0x67; 142 const U1 imul = 0x68; 143 const U1 lmul = 0x69; 144 const U1 fmul = 0x6a; 145 const U1 dmul = 0x6b; 146 const U1 idiv = 0x6c; 147 const U1 ldiv_ = 0x6d; 148 const U1 fdiv = 0x6e; 149 const U1 ddiv = 0x6f; 150 const U1 irem = 0x70; 151 const U1 lrem = 0x71; 152 const U1 frem = 0x72; 153 const U1 drem_ = 0x73; 154 const U1 ineg = 0x74; 155 const U1 lneg = 0x75; 156 const U1 fneg = 0x76; 157 const U1 dneg = 0x77; 158 const U1 ishl = 0x78; 159 const U1 lshl = 0x79; 160 const U1 ishr = 0x7a; 161 const U1 lshr = 0x7b; 162 const U1 iushr = 0x7c; 163 const U1 lushr = 0x7d; 164 const U1 iand = 0x7e; 165 const U1 land = 0x7f; 166 const U1 ior = 0x80; 167 const U1 lor = 0x81; 168 const U1 ixor = 0x82; 169 const U1 lxor = 0x83; 170 const U1 iinc = 0x84; 171 const U1 i2l = 0x85; 172 const U1 i2f = 0x86; 173 const U1 i2d = 0x87; 174 const U1 l2i = 0x88; 175 const U1 l2f = 0x89; 176 const U1 l2d = 0x8a; 177 const U1 f2i = 0x8b; 178 const U1 f2l = 0x8c; 179 const U1 f2d = 0x8d; 180 const U1 d2i = 0x8e; 181 const U1 d2l = 0x8f; 182 const U1 d2f = 0x90; 183 const U1 i2b = 0x91; 184 const U1 i2c = 0x92; 185 const U1 i2s = 0x93; 186 const U1 lcmp = 0x94; 187 const U1 fcmpl = 0x95; 188 const U1 fcmpg = 0x96; 189 const U1 dcmpl = 0x97; 190 const U1 dcmpg = 0x98; 191 const U1 ifeq = 0x99; 192 const U1 ifne = 0x9a; 193 const U1 iflt = 0x9b; 194 const U1 ifge = 0x9c; 195 const U1 ifgt = 0x9d; 196 const U1 ifle = 0x9e; 197 const U1 if_icmpeq = 0x9f; 198 const U1 if_icmpne = 0xa0; 199 const U1 if_icmplt = 0xa1; 200 const U1 if_icmpge = 0xa2; 201 const U1 if_icmpgt = 0xa3; 202 const U1 if_icmple = 0xa4; 203 const U1 if_acmpeq = 0xa5; 204 const U1 if_acmpne = 0xa6; 205 const U1 goto_ = 0xa7; 206 const U1 jsr = 0xa8; 207 const U1 ret = 0xa9; 208 const U1 tableswitch = 0xaa; 209 const U1 lookupswitch = 0xab; 210 const U1 ireturn = 0xac; 211 const U1 lreturn = 0xad; 212 const U1 freturn = 0xae; 213 const U1 dreturn = 0xaf; 214 const U1 areturn = 0xb0; 215 const U1 return_ = 0xb1; 216 const U1 getstatic = 0xb2; 217 const U1 putstatic = 0xb3; 218 const U1 getfield = 0xb4; 219 const U1 putfield = 0xb5; 220 const U1 invokevirtual = 0xb6; 221 const U1 invokespecial = 0xb7; 222 const U1 invokestatic = 0xb8; 223 const U1 invokeinterface = 0xb9; 224 const U1 xxxunusedxxx1 = 0xba; 225 const U1 new_ = 0xbb; 226 const U1 newarray = 0xbc; 227 const U1 anewarray = 0xbd; 228 const U1 arraylength = 0xbe; 229 const U1 athrow = 0xbf; 230 const U1 checkcast = 0xc0; 231 const U1 instanceof = 0xc1; 232 const U1 monitorenter = 0xc2; 233 const U1 monitorexit = 0xc3; 234 const U1 wide = 0xc4; 235 const U1 multianewarray = 0xc5; 236 const U1 ifnull = 0xc6; 237 const U1 ifnonnull = 0xc7; 238 const U1 goto_w = 0xc8; 239 const U1 jsr_w = 0xc9; 240 241 // JVM newarray atype operands 242 243 const U1 T_BOOLEAN = 4; 244 const U1 T_CHAR = 5; 245 const U1 T_FLOAT = 6; 246 const U1 T_DOUBLE = 7; 247 const U1 T_BYTE = 8; 248 const U1 T_SHORT = 9; 249 const U1 T_INT = 10; 250 const U1 T_LONG = 11; 251 252 // JVM access flags 253 254 typedef U2 AF; 255 256 const AF ACC_PUBLIC = 0x0001; // declared public; may be accessed from outside its package 257 const AF ACC_PRIVATE = 0x0002; // declared private; usable only within the defining class 258 const AF ACC_PROTECTED = 0x0004; // declared protected; may be accessed within subclasses 259 const AF ACC_STATIC = 0x0008; // declared static 260 const AF ACC_FINAL = 0x0010; // declared final; no subclasses allowed or no further assignment after initialization 261 const AF ACC_SUPER = 0x0020; // treat superclass methods specially when invoked by the invokespecial instruction 262 const AF ACC_VOLATILE = 0x0040; // declared volatile; cannot be cached 263 const AF ACC_TRANSIENT = 0x0080; // declared transient; not written or read by a persistent object manager 264 const AF ACC_INTERFACE = 0x0200; // is an interface, not a class 265 const AF ACC_ABSTRACT = 0x0400; // declared abstract; may not be instantiated 266 const AF ACC_STRICT = 0x0800; // declared strictfp; floating-point mode is FP-strict 267 const AF ACC_SYNTHETIC = 0x1000; // declared synthetic; not present in the source code 268 269 // identifiers are efficienty compared for equality by their pointer to a string stored in the Scanner::symbols set 270 typedef const std::string *ID; 271 272 // JVM type descriptor, as a special case we use NULL to denote short-circuit boolean resolved via backpatching, without a value on the stack 273 typedef const char *TD; 274 275 class Table; 276 277 // table entry with identifier properties 278 class Entry { 279 280 public: 281 Entry(ID id,TD type,U2 place,Table * table,bool proto=false)282 Entry(ID id, TD type, U2 place, Table *table, bool proto = false) 283 : 284 id(id), 285 type(type), 286 place(place), 287 table(table), 288 proto(proto) 289 { } 290 291 ID id; // identifier 292 TD type; // JVM type descriptor 293 U2 place; // variables have a place that is the index of a local or a constant pool index 294 Table *table; // the table to which this entry belongs 295 bool proto; // true if prototype of a function 296 297 }; 298 299 // table hierarchy to map identifiers to their properties 300 class Table { 301 302 public: 303 Table(Table * parent=NULL)304 Table(Table *parent = NULL) 305 : 306 parent(parent), 307 scope(parent != NULL ? parent->scope + 1 : 0), 308 locals(0) 309 { } 310 311 // enter a variable in the table with its type and local index or pool index enter(ID id,TD type,U2 place)312 bool enter(ID id, TD type, U2 place) 313 { 314 for (std::vector<Entry>::iterator i = entries.begin(); i != entries.end(); ++i) 315 if (i->id == id) 316 return false; 317 318 entries.push_back(Entry(id, type, place, this)); 319 320 return true; 321 } 322 323 // enter a function in the table enter_func(ID id,TD type,bool proto)324 bool enter_func(ID id, TD type, bool proto) 325 { 326 for (std::vector<Entry>::iterator i = entries.begin(); i != entries.end(); ++i) 327 { 328 if (i->id == id) 329 { 330 if (strcmp(i->type, type) != 0) 331 return false; 332 333 if (proto) 334 return true; 335 336 if (!i->proto) 337 return false; 338 339 i->proto = false; 340 return true; 341 } 342 } 343 344 entries.push_back(Entry(id, type, 0, this, proto)); 345 346 return true; 347 } 348 349 // lookup an identifier in the table hierarchy lookup(ID id)350 Entry *lookup(ID id) 351 { 352 for (Table *table = this; table != NULL; table = table->parent) 353 for (std::vector<Entry>::iterator i = table->entries.begin(); i != table->entries.end(); ++i) 354 if (i->id == id) 355 return &*i; 356 357 return NULL; 358 } 359 360 Table *parent; // pointer to parent table defining the outer scope or NULL 361 std::vector<Entry> entries; // entries in the table 362 int scope; // scope nesting depth, zero for outermost scope (no parent) 363 U2 locals; // number of arguments and locals declared (long and double take two slots) 364 365 }; 366 367 // backpatch list to resolve jump instruction targets 368 // see also: "Compilers: Principles, Techniques, and Tools" by Aho, Sethi, and Ullman 369 class BackpatchList { 370 371 public: 372 BackpatchList(U4 addr)373 BackpatchList(U4 addr) 374 : 375 addr(addr), 376 next(NULL) 377 { } 378 379 U4 addr; // address of jump instruction to backpatch 380 BackpatchList *next; // next node in linked list 381 382 }; 383 384 // semantic value for types, see minic.y %type <Type> 385 class Type { 386 387 public: 388 Type(TD type=NULL)389 Type(TD type = NULL) 390 : 391 type(type) 392 { } 393 394 TD type; // JVM type descriptor or NULL for short-circuit boolean 395 }; 396 397 // semantic value for function prototypes 398 class Proto : public Type { 399 400 public: 401 Proto(TD type=NULL)402 Proto(TD type = NULL) 403 : 404 Type(type), 405 id() 406 { } 407 408 ID id; 409 410 }; 411 412 // semantic value for expressions, see minic.y %type <Expr> 413 class Expr : public Type { 414 415 public: 416 417 enum Mode { VALUE, LOCAL, STATIC, ARRAY }; 418 Expr(TD type=NULL)419 Expr(TD type = NULL) 420 : 421 Type(type), 422 mode(VALUE), 423 place(0), 424 truelist(NULL), 425 falselist(NULL), 426 after(0) 427 { } 428 429 Mode mode; // VALUE (on the stack or short-circuit), LOCAL var, STATIC var, or ARRAY ref + index on the stack 430 U2 place; // local or constant pool index when expression is an lvalue (LOCAL or STATIC) 431 BackpatchList *truelist; // short-circuit backpatch list for jump instructions on true condition 432 BackpatchList *falselist; // short-circuit backpatch list for jump instructions on false condition 433 U4 after; // the opcode location after the generated code for this expression 434 435 }; 436 437 // library to store Java packages to invoke virtual and static library methods as functions in mini C 438 class Library { 439 440 public: 441 442 const char *name; // mini C library function name 443 const char *package; // JVM package path 444 const char *method; // JVM method name 445 const char *virtype; // type to match if method is virtual otherwise NULL when static 446 const char *type; // JVM method type descriptor 447 448 }; 449 450 // the compiler 451 class Compiler { 452 453 public: 454 Compiler(const std::string & name)455 Compiler(const std::string& name) 456 : 457 main(false), 458 decl_type(NULL), 459 return_type(NULL), 460 scope(-1), 461 break_nest(0), 462 continue_nest(0), 463 switch_nest(0), 464 cf(name) 465 { } 466 467 // library functions we can call, translated to JVM virtual and static method invocations library(ID id,TD types)468 const Library *library(ID id, TD types) 469 { 470 // static and virtual (virtype) methods may use types I, D, Ljava/lang/String, return also C and Z that are implicitly converted to I 471 static const Library builtins[] = { 472 // System 473 { "exit", "java/lang/System", "exit", NULL, "(I)V" }, 474 { "getenv", "java/lang/System", "getenv", NULL, "(Ljava/lang/String;)Ljava/lang/String;" }, 475 // Integer 476 { "bin", "java/lang/Integer", "toBinaryString", NULL, "(I)Ljava/lang/String;" }, 477 { "hex", "java/lang/Integer", "toHexString", NULL, "(I)Ljava/lang/String;" }, 478 { "oct", "java/lang/Integer", "toOctalString", NULL, "(I)Ljava/lang/String;" }, 479 { "dec", "java/lang/String", "valueOf", NULL, "(I)Ljava/lang/String;" }, 480 { "dec", "java/lang/String", "valueOf", NULL, "(D)Ljava/lang/String;" }, 481 { "strtoi", "java/lang/Integer", "parseInt", NULL, "(Ljava/lang/String;)I" }, 482 { "strtoi", "java/lang/Integer", "parseInt", NULL, "(Ljava/lang/String;I)I" }, 483 // Double 484 { "strtof", "java/lang/Double", "parseDouble", NULL, "(Ljava/lang/String;)D" }, 485 // String 486 { "at", "java/lang/String", "charAt", "(Ljava/lang/String;I)C", "(I)C" }, 487 { "compare", "java/lang/String", "compareTo", "(Ljava/lang/String;Ljava/lang/String;)I", "(Ljava/lang/String;)I" }, 488 { "concat", "java/lang/String", "concat", "(Ljava/lang/String;Ljava/lang/String;)Ljava/lang/String;", "(Ljava/lang/String;)Ljava/lang/String;" }, 489 { "empty" , "java/lang/String", "isEmpty", "(Ljava/lang/String;)Z", "()Z" }, 490 { "find", "java/lang/String", "indexOf", "(Ljava/lang/String;I)I", "(I)I" }, 491 { "find", "java/lang/String", "indexOf", "(Ljava/lang/String;II)I", "(II)I" }, 492 { "find", "java/lang/String", "indexOf", "(Ljava/lang/String;Ljava/lang/String;)I", "(Ljava/lang/String;)I" }, 493 { "find", "java/lang/String", "indexOf", "(Ljava/lang/String;Ljava/lang/String;I)I", "(Ljava/lang/String;I)I" }, 494 { "length", "java/lang/String", "length", "(Ljava/lang/String;)I", "()I" }, 495 { "lower", "java/lang/String", "toLowerCase", "(Ljava/lang/String;)Ljava/lang/String;", "()Ljava/lang/String;" }, 496 { "matches", "java/lang/String", "matches", "(Ljava/lang/String;Ljava/lang/String;)Z", "(Ljava/lang/String;)Z" }, 497 { "rfind", "java/lang/String", "lastIndexOf", "(Ljava/lang/String;I)I", "(I)I" }, 498 { "rfind", "java/lang/String", "lastIndexOf", "(Ljava/lang/String;II)I", "(II)I" }, 499 { "rfind", "java/lang/String", "lastIndexOf", "(Ljava/lang/String;Ljava/lang/String;)I", "(Ljava/lang/String;)I" }, 500 { "rfind", "java/lang/String", "lastIndexOf", "(Ljava/lang/String;Ljava/lang/String;I)I", "(Ljava/lang/String;I)I" }, 501 { "substr", "java/lang/String", "substring", "(Ljava/lang/String;I)Ljava/lang/String;", "(I)Ljava/lang/String;" }, 502 { "substr", "java/lang/String", "substring", "(Ljava/lang/String;II)Ljava/lang/String;", "(II)Ljava/lang/String;" }, 503 { "upper", "java/lang/String", "toUpperCase", "(Ljava/lang/String;)Ljava/lang/String;", "()Ljava/lang/String;" }, 504 { "trim", "java/lang/String", "trim", "(Ljava/lang/String;)Ljava/lang/String;", "()Ljava/lang/String;" }, 505 // Math 506 { "abs", "java/lang/Math", "abs", NULL, "(I)I" }, 507 { "abs", "java/lang/Math", "abs", NULL, "(D)D" }, 508 { "acos", "java/lang/Math", "acos", NULL, "(D)D" }, 509 { "asin", "java/lang/Math", "asin", NULL, "(D)D" }, 510 { "atan", "java/lang/Math", "atan", NULL, "(D)D" }, 511 { "atan", "java/lang/Math", "atan2", NULL, "(DD)D" }, 512 { "ceil", "java/lang/Math", "ceil", NULL, "(D)D" }, 513 { "cos", "java/lang/Math", "cos", NULL, "(D)D" }, 514 { "cosh", "java/lang/Math", "cosh", NULL, "(D)D" }, 515 { "deg", "java/lang/Math", "toDegrees", NULL, "(D)D" }, 516 { "exp", "java/lang/Math", "exp", NULL, "(D)D" }, 517 { "floor", "java/lang/Math", "floor", NULL, "(D)D" }, 518 { "log", "java/lang/Math", "log", NULL, "(D)D" }, 519 { "log10", "java/lang/Math", "log10", NULL, "(D)D" }, 520 { "max", "java/lang/Math", "max", NULL, "(DD)D" }, 521 { "max", "java/lang/Math", "max", NULL, "(II)I" }, 522 { "min", "java/lang/Math", "min", NULL, "(DD)D" }, 523 { "min", "java/lang/Math", "min", NULL, "(II)I" }, 524 { "pow", "java/lang/Math", "pow", NULL, "(DD)D" }, 525 { "rad", "java/lang/Math", "toRadians", NULL, "(D)D" }, 526 { "rnd", "java/lang/Math", "random", NULL, "()D" }, 527 { "sin", "java/lang/Math", "sin", NULL, "(D)D" }, 528 { "sinh", "java/lang/Math", "sinh", NULL, "(D)D" }, 529 { "sgn", "java/lang/Math", "signum", NULL, "(D)D" }, 530 { "sqrt", "java/lang/Math", "sqrt", NULL, "(D)D" }, 531 { "tan", "java/lang/Math", "tan", NULL, "(D)D" }, 532 { "tanh", "java/lang/Math", "tanh", NULL, "(D)D" }, 533 { NULL, NULL, NULL, NULL, NULL } 534 }; 535 536 for (const Library *lib = builtins; lib->name != NULL; ++lib) 537 if (strcmp(lib->name, id->c_str()) == 0 && type_check_args(types, lib->virtype ? lib->virtype : lib->type)) 538 return lib; 539 540 return NULL; 541 } 542 543 // return a function type type_function(TD args,TD result)544 TD type_function(TD args, TD result) 545 { 546 return types.insert(std::string("(").append(args).append(")").append(result)).first->c_str(); 547 } 548 549 // return two types concatenated (to construct a function type) type_concat(TD type1,TD type2)550 TD type_concat(TD type1, TD type2) 551 { 552 return types.insert(std::string(type1).append(type2)).first->c_str(); 553 } 554 555 // return an array type type_array(TD type)556 TD type_array(TD type) 557 { 558 return types.insert(std::string("[").append(type)).first->c_str(); 559 } 560 561 // return "no type" (e.g. function without arguments) type_none()562 TD type_none() 563 { 564 return ""; 565 } 566 567 // return a void type type_void()568 TD type_void() 569 { 570 return "V"; 571 } 572 573 // return a boolean type (computationally the same as "I") type_boolean()574 TD type_boolean() 575 { 576 return "Z"; 577 } 578 579 // return a char type (computationally the same as "I") type_char()580 TD type_char() 581 { 582 return "C"; 583 } 584 585 // return a byte type (computationally the same as "I") type_byte()586 TD type_byte() 587 { 588 return "B"; 589 } 590 591 // return a short type (computationally the same as "I") type_short()592 TD type_short() 593 { 594 return "S"; 595 } 596 597 // return an int type type_int()598 TD type_int() 599 { 600 return "I"; 601 } 602 603 // return a long type type_long()604 TD type_long() 605 { 606 return "J"; 607 } 608 609 // return a float type type_float()610 TD type_float() 611 { 612 return "F"; 613 } 614 615 // return a double type type_double()616 TD type_double() 617 { 618 return "D"; 619 } 620 621 // return a string object reference type type_string()622 TD type_string() 623 { 624 return "Ljava/lang/String;"; 625 } 626 627 // return the referenced type if reference to a class, or the type itself type_of_reference(TD type)628 TD type_of_reference(TD type) 629 { 630 if (type != NULL && *type == 'L') 631 return types.insert(std::string(type + 1, strlen(type) - 2)).first->c_str(); 632 return type; 633 } 634 635 // return the return type of a function or NULL type_of_return(TD type_func)636 TD type_of_return(TD type_func) 637 { 638 if (type_func != NULL) 639 { 640 const char *s = strrchr(type_func, ')'); 641 if (s != NULL) 642 return s + 1; 643 } 644 return NULL; 645 } 646 647 // return the element type of an array or NULL type_of_element(TD array_type)648 TD type_of_element(TD array_type) 649 { 650 if (array_type != NULL && *array_type == '[') 651 return array_type + 1; 652 653 return NULL; 654 } 655 656 // check types of the function arguments, which must match exactly type_check_args(TD type_actuals,TD type_func)657 bool type_check_args(TD type_actuals, TD type_func) 658 { 659 if (type_actuals == NULL || type_func == NULL) 660 return false; 661 662 const char *s = strrchr(type_func, ')'); 663 return s != NULL && s == type_func + strlen(type_actuals) + 1 && strncmp(type_actuals, type_func + 1, s - type_func - 1) == 0; 664 } 665 666 // true if the type of an expression is short-circuit (i.e. logic, no value on the stack) type_is_circuit(TD type)667 bool type_is_circuit(TD type) 668 { 669 return type == NULL; 670 } 671 672 // true if the type of an expression is void type_is_void(TD type)673 bool type_is_void(TD type) 674 { 675 return type != NULL && strcmp(type, "V") == 0; 676 } 677 678 // true if the type of an expression is boolean type_is_boolean(TD type)679 bool type_is_boolean(TD type) 680 { 681 return type != NULL && strcmp(type, "Z") == 0; 682 } 683 684 // true if the type of an expression is char type_is_char(TD type)685 bool type_is_char(TD type) 686 { 687 return type != NULL && strcmp(type, "C") == 0; 688 } 689 690 // true if the type of an expression is byte type_is_byte(TD type)691 bool type_is_byte(TD type) 692 { 693 return type != NULL && strcmp(type, "B") == 0; 694 } 695 696 // true if the type of an expression is short type_is_short(TD type)697 bool type_is_short(TD type) 698 { 699 return type != NULL && strcmp(type, "S") == 0; 700 } 701 702 // true if the type of an expression is int type_is_int(TD type)703 bool type_is_int(TD type) 704 { 705 return type != NULL && strcmp(type, "I") == 0; 706 } 707 708 // true if the type of an expression is long type_is_long(TD type)709 bool type_is_long(TD type) 710 { 711 return type != NULL && strcmp(type, "J") == 0; 712 } 713 714 // true if the type of an expression is float type_is_float(TD type)715 bool type_is_float(TD type) 716 { 717 return type != NULL && strcmp(type, "F") == 0; 718 } 719 720 // true if the type of an expression is double type_is_double(TD type)721 bool type_is_double(TD type) 722 { 723 return type != NULL && strcmp(type, "D") == 0; 724 } 725 726 // true if the type of an expression is a string object type_is_string(TD type)727 bool type_is_string(TD type) 728 { 729 return type != NULL && strcmp(type, "Ljava/lang/String;") == 0; 730 } 731 732 // true if the type of an expression is an array type_is_array(TD type)733 bool type_is_array(TD type) 734 { 735 return type != NULL && *type == '['; 736 } 737 738 // true if the type of an expression is a function type_is_function(TD type)739 bool type_is_function(TD type) 740 { 741 return type != NULL && *type == '('; 742 } 743 744 // true if two types are equal type_equal(TD type1,TD type2)745 bool type_equal(TD type1, TD type2) 746 { 747 return type1 == type2 || (type1 != NULL && type2 != NULL && strcmp(type1, type2) == 0); 748 } 749 750 // return the wider type or NULL when coercion will fail type_wider(TD type1,TD type2)751 TD type_wider(TD type1, TD type2) 752 { 753 if ((!type_is_circuit(type1) && !type_is_int(type1) && !type_is_double(type1) && !type_is_string(type1)) || 754 (!type_is_circuit(type2) && !type_is_int(type2) && !type_is_double(type2) && !type_is_string(type2))) 755 return NULL; 756 757 if (type_is_string(type1) || type_is_string(type2)) 758 return type_string(); 759 760 if (type_is_double(type1) || type_is_double(type2)) 761 return type_double(); 762 763 return type_int(); 764 } 765 766 // reset the emitter to generate new code for a method, stored in a buffer new_method_code()767 void new_method_code() 768 { 769 code.clear(); 770 } 771 772 // address of the next instruction emitted addr()773 U4 addr() 774 { 775 return static_cast<U4>(code.size()); 776 } 777 778 // add a UTF-8 formatted string to the ClassFile constant pool if not already stored pool_add_Utf8(CS s)779 U2 pool_add_Utf8(CS s) 780 { 781 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 782 if (p->t == CONSTANT_Utf8 && p->s == s) 783 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 784 785 cf.pool.push_back(ConstantPool(CONSTANT_Utf8, s)); 786 787 return static_cast<U2>(cf.pool.size()); 788 } 789 790 // add a 32-bit unsigned integer to the ClassFile constant pool if not already stored pool_add_Integer(U4 i)791 U2 pool_add_Integer(U4 i) 792 { 793 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 794 if (p->t == CONSTANT_Integer && p->i == i) 795 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 796 797 cf.pool.push_back(ConstantPool(CONSTANT_Integer, i)); 798 799 return static_cast<U2>(cf.pool.size()); 800 } 801 802 // add a 32-bit float to the ClassFile constant pool if not already stored pool_add_Float(F4 f)803 U2 pool_add_Float(F4 f) 804 { 805 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 806 if (p->t == CONSTANT_Float && p->f == f) 807 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 808 809 cf.pool.push_back(ConstantPool(CONSTANT_Float, f)); 810 811 return static_cast<U2>(cf.pool.size()); 812 } 813 814 // add a 64-bit unsigned integer to the ClassFile constant pool if not already stored pool_add_Long(U8 l)815 U2 pool_add_Long(U8 l) 816 { 817 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 818 if (p->t == CONSTANT_Long && p->l == l) 819 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 820 821 cf.pool.push_back(ConstantPool(CONSTANT_Long, l)); 822 823 // double requires one extra entry in the constant pool; use non-existent CONSTANT_Padding 824 cf.pool.push_back(ConstantPool()); 825 826 return static_cast<U2>(cf.pool.size() - 1); 827 } 828 829 // add a 64-bit float to the ClassFile constant pool if not already stored pool_add_Double(F8 d)830 U2 pool_add_Double(F8 d) 831 { 832 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 833 if (p->t == CONSTANT_Double && p->d == d) 834 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 835 836 cf.pool.push_back(ConstantPool(CONSTANT_Double, d)); 837 838 // double requires one extra entry in the constant pool; use non-existent CONSTANT_Padding 839 cf.pool.push_back(ConstantPool()); 840 841 return static_cast<U2>(cf.pool.size() - 1); 842 } 843 844 // add a string to the ClassFile constant pool if not already stored pool_add_String(CS s)845 U2 pool_add_String(CS s) 846 { 847 U2 r = pool_add_Utf8(s); 848 849 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 850 if (p->t == CONSTANT_String && p->r == r) 851 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 852 853 cf.pool.push_back(ConstantPool(CONSTANT_String, r)); 854 855 return static_cast<U2>(cf.pool.size()); 856 } 857 858 // add a class name to the ClassFile constant pool if not already stored pool_add_Class(CS name)859 U2 pool_add_Class(CS name) 860 { 861 U2 r = pool_add_Utf8(name); 862 863 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 864 if (p->t == CONSTANT_Class && p->r == r) 865 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 866 867 cf.pool.push_back(ConstantPool(CONSTANT_Class, r)); 868 869 return static_cast<U2>(cf.pool.size()); 870 } 871 872 // add a field for the current ClassFile to the ClassFile constant pool if not already stored pool_add_Field(CS field_name,CS type)873 U2 pool_add_Field(CS field_name, CS type) 874 { 875 return pool_add_Field(cf.name.c_str(), field_name, type); 876 } 877 878 // add a field to the ClassFile constant pool if not already stored pool_add_Field(CS class_name,CS field_name,CS type)879 U2 pool_add_Field(CS class_name, CS field_name, CS type) 880 { 881 U2 r1 = pool_add_Class(class_name); 882 U2 r2 = pool_add_NameAndType(field_name, type); 883 884 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 885 if (p->t == CONSTANT_Field && p->p.first == r1 && p->p.second == r2) 886 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 887 888 cf.pool.push_back(ConstantPool(CONSTANT_Field, r1, r2)); 889 890 return static_cast<U2>(cf.pool.size()); 891 } 892 893 // add a method for the current ClassFile to the ClassFile constant pool if not already stored pool_add_Method(CS method_name,CS type)894 U2 pool_add_Method(CS method_name, CS type) 895 { 896 return pool_add_Method(cf.name.c_str(), method_name, type); 897 } 898 899 // add a method to the ClassFile constant pool if not already stored pool_add_Method(CS class_name,CS method_name,CS type)900 U2 pool_add_Method(CS class_name, CS method_name, CS type) 901 { 902 U2 r1 = pool_add_Class(class_name); 903 U2 r2 = pool_add_NameAndType(method_name, type); 904 905 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 906 if (p->t == CONSTANT_Method && p->p.first == r1 && p->p.second == r2) 907 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 908 909 cf.pool.push_back(ConstantPool(CONSTANT_Method, r1, r2)); 910 911 return static_cast<U2>(cf.pool.size()); 912 } 913 914 // add a (name,type) pair to the ClassFile constant pool if not already stored pool_add_NameAndType(CS name,CS type)915 U2 pool_add_NameAndType(CS name, CS type) 916 { 917 U2 r1 = pool_add_Utf8(name); 918 U2 r2 = pool_add_Utf8(type); 919 920 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 921 if (p->t == CONSTANT_NameAndType && p->p.first == r1 && p->p.second == r2) 922 return static_cast<U2>(std::distance(cf.pool.begin(), p) + 1); 923 924 cf.pool.push_back(ConstantPool(CONSTANT_NameAndType, r1, r2)); 925 926 return static_cast<U2>(cf.pool.size()); 927 } 928 929 // add a new method definition to the ClassFile new_method(AF access,ID name,TD type,U2 max_locals,U2 max_stack)930 void new_method(AF access, ID name, TD type, U2 max_locals, U2 max_stack) 931 { 932 U2 r1 = pool_add_Utf8(name->c_str()); 933 U2 r2 = pool_add_Utf8(type); 934 935 cf.methods.push_back(MethodInfo(access, r1, r2, max_locals, max_stack, code)); 936 } 937 938 // add a new field definition to the ClassFile new_field(AF access,ID name,TD type)939 U2 new_field(AF access, ID name, TD type) 940 { 941 U2 r1 = pool_add_Utf8(name->c_str()); 942 U2 r2 = pool_add_Utf8(type); 943 944 cf.fields.push_back(FieldInfo(access, r1, r2)); 945 946 return pool_add_Field(name->c_str(), type); 947 } 948 949 // append the second backpatch list to the first list, either one can be NULL merge(BackpatchList * list1,BackpatchList * list2)950 BackpatchList *merge(BackpatchList *list1, BackpatchList *list2) 951 { 952 BackpatchList *end = NULL; 953 954 for (BackpatchList *p = list1; p != NULL; p = p->next) 955 end = p; 956 957 if (end != NULL) 958 { 959 end->next = list2; 960 return list1; 961 } 962 963 return list2; 964 } 965 966 // return new backpatch list for the given address backpatch_list(U4 addr)967 BackpatchList *backpatch_list(U4 addr) 968 { 969 return new BackpatchList(addr); 970 } 971 972 // return new backpatch list for the next jump instruction address backpatch_list_addr()973 BackpatchList *backpatch_list_addr() 974 { 975 return backpatch_list(addr()); 976 } 977 978 // backpatch opcodes in the backpatch list, deleting all nodes in the list backpatch(BackpatchList * list,U4 addr)979 void backpatch(BackpatchList *list, U4 addr) 980 { 981 while (list != NULL) 982 { 983 backpatch(list->addr, addr); 984 BackpatchList *next = list->next; 985 delete list; 986 list = next; 987 } 988 } 989 990 // backpatch one jump instruction backpatch(U4 inst,U4 addr)991 void backpatch(U4 inst, U4 addr) 992 { 993 U2 offset = static_cast<U2>(addr - inst); // should error if out of range? 994 code[inst + 1] = H1(offset); 995 code[inst + 2] = L1(offset); 996 } 997 998 // init loop/switch scope for break statements break_init()999 void break_init() 1000 { 1001 if (breaks.size() <= break_nest) 1002 breaks.push_back(NULL); 1003 else 1004 breaks[break_nest] = NULL; 1005 ++break_nest; 1006 } 1007 1008 // break statement in a loop emit_break()1009 bool emit_break() 1010 { 1011 if (break_nest == 0) 1012 return false; 1013 breaks[break_nest - 1] = merge(breaks[break_nest - 1], backpatch_list_addr()); 1014 emit3(goto_, 0); 1015 return true; 1016 } 1017 1018 // init loop scope for continue statements continue_init()1019 void continue_init() 1020 { 1021 if (continues.size() <= continue_nest) 1022 continues.push_back(NULL); 1023 else 1024 continues[continue_nest] = NULL; 1025 ++continue_nest; 1026 } 1027 1028 // continue statement in a loop emit_continue()1029 bool emit_continue() 1030 { 1031 if (continue_nest == 0) 1032 return false; 1033 continues[continue_nest - 1] = merge(continues[continue_nest - 1], backpatch_list_addr()); 1034 emit3(goto_, 0); 1035 return true; 1036 } 1037 1038 // close loop scope and backpatch the break and continue statements in the loop, if any loop_done()1039 void loop_done() 1040 { 1041 backpatch(breaks[--break_nest], addr()); 1042 backpatch(continues[--continue_nest], addr()); 1043 } 1044 1045 // init switch scope switch_init()1046 void switch_init() 1047 { 1048 if (cases.size() <= switch_nest) 1049 cases.push_back(std::map<I4,U4>()); 1050 else 1051 cases[switch_nest].clear(); 1052 if (defaults.size() <= switch_nest) 1053 defaults.push_back(0); 1054 else 1055 defaults[switch_nest] = 0; 1056 break_init(); 1057 ++switch_nest; 1058 } 1059 1060 // record switch case key-address pair to resolve at the end of switch emit_case(U4 key,U4 addr)1061 bool emit_case(U4 key, U4 addr) 1062 { 1063 if (cases[switch_nest - 1].find(key) != cases[switch_nest - 1].end()) 1064 return false; 1065 cases[switch_nest - 1][key] = addr; 1066 return true; 1067 } 1068 1069 // record switch default-address to resolve at the end of switch emit_default(U4 addr)1070 bool emit_default(U4 addr) 1071 { 1072 if (defaults[switch_nest - 1] != 0) 1073 return false; 1074 defaults[switch_nest - 1] = addr; 1075 return true; 1076 } 1077 1078 // emit switch lookup table switch_done()1079 void switch_done() 1080 { 1081 --switch_nest; 1082 // emit switch lookup table (we could optimize this with tableswitch for consecutive case values) 1083 U4 bpa1 = addr(); 1084 emit(lookupswitch); 1085 U4 padding = (-addr()) & 3; 1086 for (U4 pad = 0; pad < padding; ++pad) 1087 emit(nop); 1088 // default case jump offset 1089 U4 bpa2 = addr(); 1090 if (defaults[switch_nest] == 0) 1091 emit4(0); 1092 else 1093 emit4(defaults[switch_nest] - bpa1); 1094 // number of key-offset pairs 1095 emit4(static_cast<U4>(cases[switch_nest].size())); 1096 // emit key-offset pairs sorted by key 1097 for (std::map<I4,U4>::iterator i = cases[switch_nest].begin(); i != cases[switch_nest].end(); ++i) 1098 { 1099 emit4(i->first); 1100 emit4(i->second - bpa1); 1101 } 1102 // if no default case, the default case jump offset is after the table 1103 if (defaults[switch_nest] == 0) 1104 backpatch(bpa2 + 1, addr() + (bpa2 + 1 - bpa1)); 1105 // backpatch the switch break statements 1106 backpatch(breaks[--break_nest], addr()); 1107 } 1108 1109 // coerce expression from short-circuit or int/double to double/int when applicable coerce(Expr & expr,TD type)1110 bool coerce(Expr& expr, TD type) 1111 { 1112 TD conv = rvalue(expr); 1113 1114 if (type_is_int(conv) && type_is_double(type)) 1115 { 1116 insert(expr.after, i2d); 1117 } 1118 else if (type_is_double(conv) && type_is_int(type)) 1119 { 1120 insert(expr.after, d2i); 1121 } 1122 else if ((type_is_int(conv) || type_is_double(conv)) && type_is_string(type)) 1123 { 1124 if (type_is_double(conv)) 1125 insert(expr.after, d2i); 1126 1127 // create a char[1] array, assign int value to first char[], then convert array to a string containing a single character 1128 insert3(expr.after, new_, pool_add_Class("java/lang/String")); 1129 insert(expr.after, dup_x1); // aString int aString 1130 insert(expr.after, swap); // aString aString int 1131 insert(expr.after, iconst_1); // aString aString int 1 1132 insert2(expr.after, newarray, T_CHAR); // aString aString int char[] 1133 insert(expr.after, dup_x1); // aString aString char[] int char[] 1134 insert(expr.after, swap); // aString aString char[] char[] int 1135 insert(expr.after, iconst_0); // aString aString char[] char[] int 0 1136 insert(expr.after, swap); // aString aString char[] char[] 0 int 1137 insert(expr.after, castore); // aString aString char[] 1138 insert3(expr.after, invokespecial, pool_add_Method("java/lang/String", "<init>", "([C)V")); 1139 } 1140 else if (!type_equal(conv, type)) 1141 { 1142 return false; 1143 } 1144 1145 expr.type = type; 1146 1147 return true;; 1148 } 1149 1150 // convert expression to short-circuit logic (conditional code), no change when already short circuit circuit(Expr & expr)1151 U4 circuit(Expr& expr) 1152 { 1153 U4 addr = expr.after; 1154 1155 if (!type_is_circuit(expr.type)) 1156 { 1157 TD type = rvalue(expr); 1158 1159 if (type_is_int(expr.type)) 1160 { 1161 expr.falselist = backpatch_list(expr.after); 1162 insert3(expr.after, ifeq, 0); 1163 expr.truelist = backpatch_list(expr.after); 1164 insert3(expr.after, goto_, 0); 1165 } 1166 else if (type_is_double(type)) 1167 { 1168 insert(expr.after, dconst_0); 1169 insert(expr.after, dcmpg); 1170 expr.falselist = backpatch_list(expr.after); 1171 insert3(expr.after, ifeq, 0); 1172 expr.truelist = backpatch_list(expr.after); 1173 insert3(expr.after, goto_, 0); 1174 } 1175 else if (type_is_string(type)) 1176 { 1177 insert3(expr.after, invokevirtual, pool_add_Method("java/lang/String", "isEmpty", "()Z")); 1178 expr.falselist = backpatch_list(expr.after); 1179 insert3(expr.after, ifne, 0); 1180 expr.truelist = backpatch_list(expr.after); 1181 insert3(expr.after, goto_, 0); 1182 } 1183 else 1184 { 1185 expr.falselist = backpatch_list(expr.after); 1186 insert3(expr.after, ifnull, 0); 1187 expr.truelist = backpatch_list(expr.after); 1188 insert3(expr.after, goto_, 0); 1189 } 1190 } 1191 1192 return expr.after - addr; 1193 } 1194 1195 // convert expression at the specified address to an rvalue, converts short-circuit logic to push 0 or 1 on stack rvalue(Expr & expr)1196 TD rvalue(Expr& expr) 1197 { 1198 if (type_is_circuit(expr.type)) 1199 { 1200 if (expr.falselist != NULL) 1201 { 1202 backpatch(expr.falselist, expr.after); 1203 insert(expr.after, iconst_0); 1204 if (expr.truelist != NULL) 1205 insert3(expr.after, goto_, 4); 1206 expr.falselist = NULL; 1207 } 1208 if (expr.truelist != NULL) 1209 { 1210 backpatch(expr.truelist, expr.after); 1211 insert(expr.after, iconst_1); 1212 expr.truelist = NULL; 1213 } 1214 1215 expr.type = type_int(); 1216 } 1217 else 1218 { 1219 load(expr); 1220 } 1221 1222 return expr.type; 1223 } 1224 1225 // adjust the "after location" and "backpatch lists" of an expression when instructions were inserted before it adjust(Expr & expr,U4 offset)1226 void adjust(Expr& expr, U4 offset) 1227 { 1228 for (BackpatchList *bp = expr.truelist; bp != NULL; bp = bp->next) 1229 bp->addr += offset; 1230 1231 for (BackpatchList *bp = expr.falselist; bp != NULL; bp = bp->next) 1232 bp->addr += offset; 1233 1234 expr.after += offset; 1235 } 1236 1237 // load value of local or static variable onto the top of stack when not already on the stack load(Expr & expr)1238 void load(Expr& expr) 1239 { 1240 switch (expr.mode) 1241 { 1242 case Expr::VALUE: 1243 break; 1244 1245 case Expr::LOCAL: 1246 if (type_is_int(expr.type)) 1247 { 1248 switch (expr.place) 1249 { 1250 case 0: insert(expr.after, iload_0); break; 1251 case 1: insert(expr.after, iload_1); break; 1252 case 2: insert(expr.after, iload_2); break; 1253 case 3: insert(expr.after, iload_3); break; 1254 default: insert2(expr.after, iload, expr.place); 1255 } 1256 } 1257 else if (type_is_double(expr.type)) 1258 { 1259 switch (expr.place) 1260 { 1261 case 0: insert(expr.after, dload_0); break; 1262 case 1: insert(expr.after, dload_1); break; 1263 case 2: insert(expr.after, dload_2); break; 1264 case 3: insert(expr.after, dload_3); break; 1265 default: insert2(expr.after, dload, expr.place); 1266 } 1267 } 1268 else 1269 { 1270 switch (expr.place) 1271 { 1272 case 0: insert(expr.after, aload_0); break; 1273 case 1: insert(expr.after, aload_1); break; 1274 case 2: insert(expr.after, aload_2); break; 1275 case 3: insert(expr.after, aload_3); break; 1276 default: insert2(expr.after, aload, expr.place); 1277 } 1278 } 1279 break; 1280 1281 case Expr::STATIC: 1282 insert3(expr.after, getstatic, expr.place); 1283 break; 1284 1285 case Expr::ARRAY: 1286 if (type_is_int(expr.type)) 1287 insert(expr.after, iaload); 1288 else if (type_is_double(expr.type)) 1289 insert(expr.after, daload); 1290 else 1291 insert(expr.after, aaload); 1292 break; 1293 } 1294 1295 expr.mode = Expr::VALUE; 1296 } 1297 1298 // store top of stack into local or static variable store(Expr & expr)1299 void store(Expr& expr) 1300 { 1301 switch (expr.mode) 1302 { 1303 case Expr::VALUE: 1304 break; 1305 1306 case Expr::LOCAL: 1307 if (type_is_int(expr.type)) 1308 { 1309 switch (expr.place) 1310 { 1311 case 0: emit(istore_0); break; 1312 case 1: emit(istore_1); break; 1313 case 2: emit(istore_2); break; 1314 case 3: emit(istore_3); break; 1315 default: emit2(istore, expr.place); 1316 } 1317 } 1318 else if (type_is_double(expr.type)) 1319 { 1320 switch (expr.place) 1321 { 1322 case 0: emit(dstore_0); break; 1323 case 1: emit(dstore_1); break; 1324 case 2: emit(dstore_2); break; 1325 case 3: emit(dstore_3); break; 1326 default: emit2(dstore, expr.place); 1327 } 1328 } 1329 else 1330 { 1331 switch (expr.place) 1332 { 1333 case 0: emit(astore_0); break; 1334 case 1: emit(astore_1); break; 1335 case 2: emit(astore_2); break; 1336 case 3: emit(astore_3); break; 1337 default: emit2(astore, expr.place); 1338 } 1339 } 1340 break; 1341 1342 case Expr::STATIC: 1343 emit3(putstatic, expr.place); 1344 break; 1345 1346 case Expr::ARRAY: 1347 if (type_is_int(expr.type)) 1348 emit(iastore); 1349 else if (type_is_double(expr.type)) 1350 emit(dastore); 1351 else 1352 emit(aastore); 1353 break; 1354 } 1355 } 1356 1357 // emit integer/double/string comparison emit_compare(Expr & expr1,Expr & expr2,U1 if_op,U1 if_icmp_op,Expr & result)1358 bool emit_compare(Expr& expr1, Expr& expr2, U1 if_op, U1 if_icmp_op, Expr& result) 1359 { 1360 TD type = type_wider(expr1.type, expr2.type); 1361 1362 coerce(expr2, type); 1363 coerce(expr1, type); 1364 1365 if (type_is_int(type)) 1366 { 1367 result.truelist = backpatch_list_addr(); 1368 emit3(if_icmp_op, 0); 1369 } 1370 else if (type_is_double(type)) 1371 { 1372 emit(dcmpg); 1373 result.truelist = backpatch_list_addr(); 1374 emit3(if_op, 0); 1375 } 1376 else if (type_is_string(expr1.type) && type_is_string(expr2.type)) 1377 { 1378 emit3(invokevirtual, pool_add_Method("java/lang/String", "compareTo", "(Ljava/lang/String;)I")); 1379 result.truelist = backpatch_list_addr(); 1380 emit3(if_op, 0); 1381 } 1382 1383 result.falselist = backpatch_list_addr(); 1384 emit3(goto_, 0); 1385 1386 return true; 1387 } 1388 1389 // emit integer/double/string dyadic operation emit_oper(Expr & expr1,Expr & expr2,U1 i_op,U1 d_op,Expr & result)1390 bool emit_oper(Expr& expr1, Expr& expr2, U1 i_op, U1 d_op, Expr& result) 1391 { 1392 result.type = type_wider(expr1.type, expr2.type); 1393 1394 coerce(expr2, result.type); 1395 coerce(expr1, result.type); 1396 1397 if (type_is_int(result.type)) 1398 emit(i_op); 1399 else if (type_is_double(result.type) && d_op != nop) 1400 emit(d_op); 1401 else if (i_op == iadd && type_is_string(expr1.type) && type_is_string(expr2.type)) 1402 emit3(invokevirtual, pool_add_Method("java/lang/String", "concat", "(Ljava/lang/String;)Ljava/lang/String;")); 1403 else 1404 return false; 1405 1406 return true; 1407 } 1408 1409 // emit integer/double/string (combined) assignment operation emit_assign(Expr & expr1,Expr & expr2,U1 i_op,U1 d_op,Expr & result)1410 bool emit_assign(Expr& expr1, Expr& expr2, U1 i_op, U1 d_op, Expr& result) 1411 { 1412 if (expr1.mode == Expr::VALUE) 1413 return false; 1414 1415 result.type = expr1.type; 1416 1417 if (!coerce(expr2, expr1.type)) 1418 return false; 1419 1420 Expr lhs = expr1; 1421 1422 if (i_op != nop) 1423 { 1424 if (lhs.mode == Expr::ARRAY) 1425 insert(expr1.after, dup2); 1426 1427 load(expr1); 1428 1429 if (type_is_int(expr1.type)) 1430 emit(i_op); 1431 else if (type_is_double(expr1.type) && d_op != nop) 1432 emit(d_op); 1433 else if (type_is_string(expr1.type) && type_is_string(expr2.type) && i_op == iadd) 1434 emit3(invokevirtual, pool_add_Method("java/lang/String", "concat", "(Ljava/lang/String;)Ljava/lang/String;")); 1435 else 1436 return false; 1437 } 1438 1439 if (lhs.mode == Expr::ARRAY) 1440 { 1441 if (type_is_double(lhs.type)) 1442 emit(dup2_x2); 1443 else 1444 emit(dup_x2); 1445 } 1446 else 1447 { 1448 if (type_is_double(lhs.type)) 1449 emit(dup2); 1450 else 1451 emit(dup); 1452 } 1453 1454 store(lhs); 1455 1456 return true; 1457 } 1458 1459 // emit pre/post increment/decrement integer variable update emit_update(Expr & expr,bool pre,bool inc,Expr & result)1460 bool emit_update(Expr& expr, bool pre, bool inc, Expr& result) 1461 { 1462 if (!type_is_int(expr.type)) 1463 return false; 1464 1465 switch (expr.mode) 1466 { 1467 case Expr::VALUE: 1468 return false; 1469 1470 case Expr::LOCAL: 1471 if (!pre) 1472 emit2(iload, expr.place); 1473 emit3(iinc, expr.place, inc ? 1 : 0xff); 1474 if (pre) 1475 emit2(iload, expr.place); 1476 break; 1477 1478 case Expr::STATIC: 1479 emit3(getstatic, expr.place); 1480 if (!pre) 1481 emit(dup); 1482 emit(iconst_1); 1483 emit(inc ? iadd : isub); 1484 if (pre) 1485 emit(dup); 1486 emit3(putstatic, expr.place); 1487 break; 1488 1489 case Expr::ARRAY: 1490 emit(dup2); 1491 emit(iaload); 1492 if (!pre) 1493 emit(dup_x2); 1494 emit(iconst_1); 1495 emit(inc ? iadd : isub); 1496 if (pre) 1497 emit(dup_x2); 1498 emit(iastore); 1499 break; 1500 } 1501 1502 result.type = type_int(); 1503 1504 return true; 1505 } 1506 1507 // emit ldc or ldc_w for the specified constant pool index emit_ldc(U2 index)1508 void emit_ldc(U2 index) 1509 { 1510 if (index <= 0xff) 1511 emit2(ldc, static_cast<U1>(index)); 1512 else 1513 emit3(ldc_w, index); 1514 } 1515 1516 // emit opcode emit(U1 opcode)1517 void emit(U1 opcode) 1518 { 1519 code.push_back(opcode); 1520 } 1521 1522 // emit opcode with single-byte operand emit2(U1 opcode,U1 operand)1523 void emit2(U1 opcode, U1 operand) 1524 { 1525 code.push_back(opcode); 1526 code.push_back(operand); 1527 } 1528 1529 // emit opcode with double-byte operand emit3(U1 opcode,U2 operand)1530 void emit3(U1 opcode, U2 operand) 1531 { 1532 code.push_back(opcode); 1533 code.push_back(H1(operand)); 1534 code.push_back(L1(operand)); 1535 } 1536 1537 // emit opcode with two single-byte operands emit3(U1 opcode,U1 operand1,U1 operand2)1538 void emit3(U1 opcode, U1 operand1, U1 operand2) 1539 { 1540 code.push_back(opcode); 1541 code.push_back(operand1); 1542 code.push_back(operand2); 1543 } 1544 1545 // emit signed int value emit4(I4 value)1546 void emit4(I4 value) 1547 { 1548 emit(H1(H2(value))); 1549 emit(L1(H2(value))); 1550 emit(H1(L2(value))); 1551 emit(L1(L2(value))); 1552 } 1553 1554 // insert opcode at address insert(U4 & addr,U1 opcode)1555 void insert(U4& addr, U1 opcode) 1556 { 1557 code.insert(addr++, 1, opcode); 1558 } 1559 1560 // insert opcode with single-byte operand at address insert2(U4 & addr,U1 opcode,U2 operand)1561 void insert2(U4& addr, U1 opcode, U2 operand) 1562 { 1563 code.insert(addr++, 2, opcode); 1564 code[addr++] = operand; 1565 } 1566 1567 // insert opcode with double-byte operand at address insert3(U4 & addr,U2 opcode,U2 operand)1568 void insert3(U4& addr, U2 opcode, U2 operand) 1569 { 1570 code.insert(addr++, 3, opcode); 1571 code[addr++] = H1(operand); 1572 code[addr++] = L1(operand); 1573 } 1574 1575 // save the class file to file "name.class", "name" is the name of the source code file without extension suffix save()1576 bool save() 1577 { 1578 FILE *fd = fopen(std::string(cf.name).append(".class").c_str(), "w"); 1579 1580 if (fd == NULL) 1581 return false; 1582 1583 U2 attribute_code_ref = pool_add_Utf8("Code"); 1584 U2 this_class_ref = pool_add_Class(cf.name.c_str()); 1585 U2 super_class_ref = pool_add_Class("java/lang/Object"); 1586 1587 save_U4(fd, 0xcafebabe); // magic number 1588 save_U2(fd, 0x0000); // minor_version 1589 save_U2(fd, 0x002e); // major_version 1590 save_pool(fd); 1591 save_U2(fd, cf.access); // access_flags 1592 save_U2(fd, this_class_ref); // this_class (index in pool refers to CONSTANT_Class) 1593 save_U2(fd, super_class_ref); // super_class (index in pool refers to CONSTANT_Class) 1594 save_interfaces(fd); 1595 save_fields(fd); 1596 save_methods(fd, attribute_code_ref); 1597 save_attributes(fd); 1598 1599 int err = ferror(fd); 1600 1601 fclose(fd); 1602 1603 return err == 0; 1604 } 1605 1606 bool main; // if we are compiling the body of main() 1607 TD decl_type; // the last type declared 1608 TD return_type; // the return type of the function we are compiling 1609 Table *table[2]; // table[0] holds statics, table[1] holds locals 1610 int scope; // 0 for static scope, 1 for local scope 1611 size_t break_nest; // depth of the loop 1612 size_t continue_nest; // depth of the loop 1613 size_t switch_nest; // depth of the loop 1614 std::vector<BackpatchList*> breaks; // break jumps to the instructions after the loop 1615 std::vector<BackpatchList*> continues; // continue jumps to the loop condition expression 1616 std::vector< std::map<I4,U4> > cases; // 1617 std::vector<U4> defaults; // 1618 std::set<std::string> types; // collection of TD strings 1619 1620 private: 1621 1622 // JVM bytecode is a sequence of unsigned bytes 1623 typedef std::basic_string<U1> Code; 1624 1625 // each item in the ConstantPool must begin with a 1-byte tag ConstantType indicating the kind of entry 1626 enum ConstantType { 1627 CONSTANT_Padding = 0, // a non-existent value to pad Long and Double entries 1628 CONSTANT_Utf8 = 1, 1629 CONSTANT_Integer = 3, 1630 CONSTANT_Float = 4, 1631 CONSTANT_Long = 5, 1632 CONSTANT_Double = 6, 1633 CONSTANT_Class = 7, 1634 CONSTANT_String = 8, 1635 CONSTANT_Field = 9, 1636 CONSTANT_Method = 10, 1637 CONSTANT_InterfaceMethod = 11, 1638 CONSTANT_NameAndType = 12, 1639 CONSTANT_MethodHandle = 15, // since Java SE 7; not used 1640 CONSTANT_MethodType = 16, // since Java SE 7; not used 1641 CONSTANT_Dynamic = 17, // since Java SE 11; not used 1642 CONSTANT_InvokeDynamic = 18, // since Java SE 7; not used 1643 CONSTANT_Module = 19, // since Java SE 9; not used 1644 CONSTANT_Package = 20, // since Java SE 9; not used 1645 }; 1646 1647 // the constant pool may contain pairs of references 1648 typedef std::pair<U2,U2> Pair; 1649 1650 // a class file constant pool element in simplified form 1651 struct ConstantPool { 1652 ConstantPoolCompiler::ConstantPool1653 ConstantPool() : t(CONSTANT_Padding) { } 1654 ConstantPoolCompiler::ConstantPool1655 ConstantPool(ConstantType t, CS s) : t(t), s(s) { } ConstantPoolCompiler::ConstantPool1656 ConstantPool(ConstantType t, U4 i) : t(t), i(i) { } ConstantPoolCompiler::ConstantPool1657 ConstantPool(ConstantType t, F4 f) : t(t), f(f) { } ConstantPoolCompiler::ConstantPool1658 ConstantPool(ConstantType t, U8 l) : t(t), l(l) { } ConstantPoolCompiler::ConstantPool1659 ConstantPool(ConstantType t, F8 d) : t(t), d(d) { } ConstantPoolCompiler::ConstantPool1660 ConstantPool(ConstantType t, U2 r) : t(t), r(r) { } ConstantPoolCompiler::ConstantPool1661 ConstantPool(ConstantType t, U2 r1, U2 r2) : t(t), p(r1, r2) { } 1662 1663 ConstantType t; // tag 1664 1665 U4 i; // unsigned int 1666 F4 f; // float 1667 U8 l; // long unsigned int 1668 F8 d; // double float 1669 CS s; // string in UTF-8 format 1670 U2 r; // reference (constant pool index) 1671 Pair p; // pair of references 1672 1673 }; 1674 1675 // class file field information 1676 struct FieldInfo { 1677 FieldInfoCompiler::FieldInfo1678 FieldInfo(AF access, U2 name, U2 type) 1679 : 1680 access(access), 1681 name(name), 1682 type(type) 1683 { } 1684 1685 AF access; 1686 U2 name; 1687 U2 type; 1688 1689 }; 1690 1691 // class file method information 1692 struct MethodInfo { 1693 MethodInfoCompiler::MethodInfo1694 MethodInfo(AF access, U2 name, U2 type, U2 max_locals, U2 max_stack, const Code& code) 1695 : 1696 access(access), 1697 name(name), 1698 type(type), 1699 max_locals(max_locals), 1700 max_stack(max_stack), 1701 code(code) 1702 { } 1703 1704 AF access; 1705 U2 name; 1706 U2 type; 1707 U2 max_locals; 1708 U2 max_stack; 1709 Code code; 1710 1711 }; 1712 1713 // the class file in simplified form 1714 struct ClassFile { 1715 1716 // set up a class file structure ClassFileCompiler::ClassFile1717 ClassFile(const std::string& name) 1718 : 1719 access(ACC_PUBLIC), 1720 name(name) 1721 { } 1722 1723 AF access; 1724 std::string name; 1725 std::vector<ConstantPool> pool; 1726 std::vector<FieldInfo> fields; 1727 std::vector<MethodInfo> methods; 1728 1729 }; 1730 1731 // save the ConstantPool contents to the class file save_pool(FILE * fd)1732 void save_pool(FILE *fd) 1733 { 1734 save_U2(fd, static_cast<U2>(cf.pool.size() + 1)); 1735 1736 for (std::vector<ConstantPool>::iterator p = cf.pool.begin(); p != cf.pool.end(); ++p) 1737 { 1738 switch (p->t) 1739 { 1740 case CONSTANT_Padding: 1741 break; 1742 case CONSTANT_Utf8: 1743 save_Utf8(fd, p->s); 1744 break; 1745 case CONSTANT_Integer: 1746 save_Integer(fd, p->i); 1747 break; 1748 case CONSTANT_Float: 1749 save_Float(fd, p->f); 1750 break; 1751 case CONSTANT_Long: 1752 save_Long(fd, p->l); 1753 break; 1754 case CONSTANT_Double: 1755 save_Double(fd, p->d); 1756 break; 1757 case CONSTANT_String: 1758 save_String(fd, p->r); 1759 break; 1760 case CONSTANT_Class: 1761 save_Class(fd, p->r); 1762 break; 1763 case CONSTANT_Field: 1764 save_Field(fd, p->p); 1765 break; 1766 case CONSTANT_Method: 1767 save_Method(fd, p->p); 1768 break; 1769 case CONSTANT_InterfaceMethod: 1770 save_InterfaceMethod(fd, p->p); 1771 break; 1772 case CONSTANT_NameAndType: 1773 save_NameAndType(fd, p->p); 1774 break; 1775 case CONSTANT_MethodHandle: 1776 case CONSTANT_MethodType: 1777 case CONSTANT_Dynamic: 1778 case CONSTANT_InvokeDynamic: 1779 case CONSTANT_Module: 1780 case CONSTANT_Package: 1781 // skipped; not used 1782 break; 1783 } 1784 } 1785 } 1786 save_interfaces(FILE * fd)1787 void save_interfaces(FILE *fd) 1788 { 1789 save_U2(fd, 0); // interfaces_count 1790 // each (u2) value in the interfaces array must be a valid index into the pool table 1791 } 1792 save_fields(FILE * fd)1793 void save_fields(FILE *fd) 1794 { 1795 U2 n = static_cast<U2>(cf.fields.size()); 1796 1797 save_U2(fd, n); 1798 1799 for (U2 i = 0; i < n; i++) 1800 { 1801 save_U2(fd, cf.fields[i].access); 1802 save_U2(fd, cf.fields[i].name); 1803 save_U2(fd, cf.fields[i].type); 1804 save_U2(fd, 0); // no attributes 1805 } 1806 } 1807 save_methods(FILE * fd,U2 attribute_code_ref)1808 void save_methods(FILE *fd, U2 attribute_code_ref) 1809 { 1810 U2 n = static_cast<U2>(cf.methods.size()); 1811 1812 save_U2(fd, n); 1813 1814 for (U2 i = 0; i < n; i++) 1815 { 1816 save_U2(fd, cf.methods[i].access); 1817 save_U2(fd, cf.methods[i].name); 1818 save_U2(fd, cf.methods[i].type); 1819 1820 if (cf.methods[i].code.size() > 0) 1821 { 1822 save_U2(fd, 1); // one attribute 1823 save_U2(fd, attribute_code_ref); // attribute "Code" 1824 save_U4(fd, static_cast<U4>(cf.methods[i].code.size() + 12)); // attribute length 1825 save_U2(fd, cf.methods[i].max_stack); 1826 save_U2(fd, cf.methods[i].max_locals); 1827 save_Code(fd, cf.methods[i].code); 1828 save_U2(fd, 0); // no exceptions 1829 save_U2(fd, 0); // no attributes 1830 } 1831 else 1832 { 1833 save_U2(fd, 0); 1834 } 1835 } 1836 } 1837 save_attributes(FILE * fd)1838 void save_attributes(FILE *fd) 1839 { 1840 save_U2(fd, 0); // attributes_count 1841 // each value of the attributes table must be an attribute structure 1842 } 1843 save_Utf8(FILE * fd,CS s)1844 void save_Utf8(FILE *fd, CS s) 1845 { 1846 U2 n = static_cast<U2>(strlen(s)); 1847 save_U1(fd, CONSTANT_Utf8); 1848 save_U2(fd, n); 1849 fwrite(s, 1, n, fd); 1850 } 1851 save_Integer(FILE * fd,U4 i)1852 void save_Integer(FILE *fd, U4 i) 1853 { 1854 save_U1(fd, CONSTANT_Integer); 1855 save_U4(fd, i); 1856 } 1857 save_Float(FILE * fd,F4 f)1858 void save_Float(FILE *fd, F4 f) 1859 { 1860 save_U1(fd, CONSTANT_Float); 1861 save_U4(fd, *reinterpret_cast<U4*>(&f)); // warning: this is valid assuming this machine uses IEEE 754 1862 } 1863 save_Long(FILE * fd,U8 l)1864 void save_Long(FILE *fd, U8 l) 1865 { 1866 save_U1(fd, CONSTANT_Long); 1867 save_U8(fd, l); 1868 } 1869 save_Double(FILE * fd,F8 d)1870 void save_Double(FILE *fd, F8 d) 1871 { 1872 save_U1(fd, CONSTANT_Double); 1873 save_U8(fd, *reinterpret_cast<U8*>(&d)); // warning: this is valid assuming this machine uses IEEE 754 1874 } 1875 save_String(FILE * fd,U2 r)1876 void save_String(FILE *fd, U2 r) 1877 { 1878 save_U1(fd, CONSTANT_String); 1879 save_U2(fd, r); 1880 } 1881 save_Class(FILE * fd,U2 r)1882 void save_Class(FILE *fd, U2 r) 1883 { 1884 save_U1(fd, CONSTANT_Class); 1885 save_U2(fd, r); 1886 } 1887 save_Field(FILE * fd,Pair & p)1888 void save_Field(FILE *fd, Pair& p) 1889 { 1890 save_U1(fd, CONSTANT_Field); 1891 save_U2(fd, p.first); 1892 save_U2(fd, p.second); 1893 } 1894 save_Method(FILE * fd,Pair & p)1895 void save_Method(FILE *fd, Pair& p) 1896 { 1897 save_U1(fd, CONSTANT_Method); 1898 save_U2(fd, p.first); 1899 save_U2(fd, p.second); 1900 } 1901 save_InterfaceMethod(FILE * fd,Pair & p)1902 void save_InterfaceMethod(FILE *fd, Pair& p) 1903 { 1904 save_U1(fd, CONSTANT_InterfaceMethod); 1905 save_U2(fd, p.first); 1906 save_U2(fd, p.second); 1907 } 1908 save_NameAndType(FILE * fd,Pair & p)1909 void save_NameAndType(FILE *fd, Pair& p) 1910 { 1911 save_U1(fd, CONSTANT_NameAndType); 1912 save_U2(fd, p.first); 1913 save_U2(fd, p.second); 1914 } 1915 save_Code(FILE * fd,const Code & code)1916 void save_Code(FILE *fd, const Code& code) 1917 { 1918 save_U4(fd, static_cast<U4>(code.size())); 1919 fwrite(code.c_str(), 1, code.size(), fd); 1920 } 1921 save_U1(FILE * fd,U1 n)1922 void save_U1(FILE *fd, U1 n) 1923 { 1924 fputc(n, fd); 1925 } 1926 save_U2(FILE * fd,U2 n)1927 void save_U2(FILE *fd, U2 n) 1928 { 1929 save_U1(fd, H1(n)); 1930 save_U1(fd, L1(n)); 1931 } 1932 save_U4(FILE * fd,U4 n)1933 void save_U4(FILE *fd, U4 n) 1934 { 1935 save_U2(fd, H2(n)); 1936 save_U2(fd, L2(n)); 1937 } 1938 save_U8(FILE * fd,U8 n)1939 void save_U8(FILE *fd, U8 n) 1940 { 1941 save_U4(fd, H4(n)); 1942 save_U4(fd, L4(n)); 1943 } 1944 H1(U2 n)1945 static U1 H1(U2 n) 1946 { 1947 return static_cast<U1>(n >> 8); 1948 } 1949 L1(U2 n)1950 static U1 L1(U2 n) 1951 { 1952 return static_cast<U1>(n); 1953 } 1954 H2(U4 n)1955 static U2 H2(U4 n) 1956 { 1957 return static_cast<U2>(n >> 16); 1958 } 1959 L2(U4 n)1960 static U2 L2(U4 n) 1961 { 1962 return static_cast<U2>(n); 1963 } 1964 H4(U8 n)1965 static U4 H4(U8 n) 1966 { 1967 return static_cast<U4>(n >> 32); 1968 } 1969 L4(U8 n)1970 static U4 L4(U8 n) 1971 { 1972 return static_cast<U4>(n); 1973 } 1974 1975 ClassFile cf; // the class file 1976 Code code; // the current code being emitted 1977 1978 }; 1979 1980 #endif 1981