1 /* 2 * Optimizations for Tiny Code Generator for QEMU 3 * 4 * Copyright (c) 2010 Samsung Electronics. 5 * Contributed by Kirill Batuzov <batuzovk@ispras.ru> 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining a copy 8 * of this software and associated documentation files (the "Software"), to deal 9 * in the Software without restriction, including without limitation the rights 10 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 * copies of the Software, and to permit persons to whom the Software is 12 * furnished to do so, subject to the following conditions: 13 * 14 * The above copyright notice and this permission notice shall be included in 15 * all copies or substantial portions of the Software. 16 * 17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 * THE SOFTWARE. 24 */ 25 26 #include "config.h" 27 28 #include <stdlib.h> 29 #include <stdio.h> 30 31 #include "qemu-common.h" 32 #include "tcg-op.h" 33 34 #define CASE_OP_32_64(x) \ 35 glue(glue(case INDEX_op_, x), _i32): \ 36 glue(glue(case INDEX_op_, x), _i64) 37 38 typedef enum { 39 TCG_TEMP_UNDEF = 0, 40 TCG_TEMP_CONST, 41 TCG_TEMP_COPY, 42 } tcg_temp_state; 43 44 struct tcg_temp_info { 45 tcg_temp_state state; 46 uint16_t prev_copy; 47 uint16_t next_copy; 48 tcg_target_ulong val; 49 tcg_target_ulong mask; 50 }; 51 52 static struct tcg_temp_info temps[TCG_MAX_TEMPS]; 53 54 /* Reset TEMP's state to TCG_TEMP_UNDEF. If TEMP only had one copy, remove 55 the copy flag from the left temp. */ 56 static void reset_temp(TCGArg temp) 57 { 58 if (temps[temp].state == TCG_TEMP_COPY) { 59 if (temps[temp].prev_copy == temps[temp].next_copy) { 60 temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF; 61 } else { 62 temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy; 63 temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy; 64 } 65 } 66 temps[temp].state = TCG_TEMP_UNDEF; 67 temps[temp].mask = -1; 68 } 69 70 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op, 71 TCGOpcode opc, int nargs) 72 { 73 int oi = s->gen_next_op_idx; 74 int pi = s->gen_next_parm_idx; 75 int prev = old_op->prev; 76 int next = old_op - s->gen_op_buf; 77 TCGOp *new_op; 78 79 tcg_debug_assert(oi < OPC_BUF_SIZE); 80 tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE); 81 s->gen_next_op_idx = oi + 1; 82 s->gen_next_parm_idx = pi + nargs; 83 84 new_op = &s->gen_op_buf[oi]; 85 *new_op = (TCGOp){ 86 .opc = opc, 87 .args = pi, 88 .prev = prev, 89 .next = next 90 }; 91 if (prev >= 0) { 92 s->gen_op_buf[prev].next = oi; 93 } else { 94 s->gen_first_op_idx = oi; 95 } 96 old_op->prev = oi; 97 98 return new_op; 99 } 100 101 /* Reset all temporaries, given that there are NB_TEMPS of them. */ 102 static void reset_all_temps(int nb_temps) 103 { 104 int i; 105 for (i = 0; i < nb_temps; i++) { 106 temps[i].state = TCG_TEMP_UNDEF; 107 temps[i].mask = -1; 108 } 109 } 110 111 static int op_bits(TCGOpcode op) 112 { 113 const TCGOpDef *def = &tcg_op_defs[op]; 114 return def->flags & TCG_OPF_64BIT ? 64 : 32; 115 } 116 117 static TCGOpcode op_to_mov(TCGOpcode op) 118 { 119 switch (op_bits(op)) { 120 case 32: 121 return INDEX_op_mov_i32; 122 case 64: 123 return INDEX_op_mov_i64; 124 default: 125 fprintf(stderr, "op_to_mov: unexpected return value of " 126 "function op_bits.\n"); 127 tcg_abort(); 128 } 129 } 130 131 static TCGOpcode op_to_movi(TCGOpcode op) 132 { 133 switch (op_bits(op)) { 134 case 32: 135 return INDEX_op_movi_i32; 136 case 64: 137 return INDEX_op_movi_i64; 138 default: 139 fprintf(stderr, "op_to_movi: unexpected return value of " 140 "function op_bits.\n"); 141 tcg_abort(); 142 } 143 } 144 145 static TCGArg find_better_copy(TCGContext *s, TCGArg temp) 146 { 147 TCGArg i; 148 149 /* If this is already a global, we can't do better. */ 150 if (temp < s->nb_globals) { 151 return temp; 152 } 153 154 /* Search for a global first. */ 155 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 156 if (i < s->nb_globals) { 157 return i; 158 } 159 } 160 161 /* If it is a temp, search for a temp local. */ 162 if (!s->temps[temp].temp_local) { 163 for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) { 164 if (s->temps[i].temp_local) { 165 return i; 166 } 167 } 168 } 169 170 /* Failure to find a better representation, return the same temp. */ 171 return temp; 172 } 173 174 static bool temps_are_copies(TCGArg arg1, TCGArg arg2) 175 { 176 TCGArg i; 177 178 if (arg1 == arg2) { 179 return true; 180 } 181 182 if (temps[arg1].state != TCG_TEMP_COPY 183 || temps[arg2].state != TCG_TEMP_COPY) { 184 return false; 185 } 186 187 for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) { 188 if (i == arg2) { 189 return true; 190 } 191 } 192 193 return false; 194 } 195 196 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args, 197 TCGArg dst, TCGArg val) 198 { 199 TCGOpcode new_op = op_to_movi(op->opc); 200 tcg_target_ulong mask; 201 202 op->opc = new_op; 203 204 reset_temp(dst); 205 temps[dst].state = TCG_TEMP_CONST; 206 temps[dst].val = val; 207 mask = val; 208 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 209 /* High bits of the destination are now garbage. */ 210 mask |= ~0xffffffffull; 211 } 212 temps[dst].mask = mask; 213 214 args[0] = dst; 215 args[1] = val; 216 } 217 218 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args, 219 TCGArg dst, TCGArg src) 220 { 221 if (temps_are_copies(dst, src)) { 222 tcg_op_remove(s, op); 223 return; 224 } 225 226 if (temps[src].state == TCG_TEMP_CONST) { 227 tcg_opt_gen_movi(s, op, args, dst, temps[src].val); 228 return; 229 } 230 231 TCGOpcode new_op = op_to_mov(op->opc); 232 tcg_target_ulong mask; 233 234 op->opc = new_op; 235 236 reset_temp(dst); 237 mask = temps[src].mask; 238 if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) { 239 /* High bits of the destination are now garbage. */ 240 mask |= ~0xffffffffull; 241 } 242 temps[dst].mask = mask; 243 244 assert(temps[src].state != TCG_TEMP_CONST); 245 246 if (s->temps[src].type == s->temps[dst].type) { 247 if (temps[src].state != TCG_TEMP_COPY) { 248 temps[src].state = TCG_TEMP_COPY; 249 temps[src].next_copy = src; 250 temps[src].prev_copy = src; 251 } 252 temps[dst].state = TCG_TEMP_COPY; 253 temps[dst].next_copy = temps[src].next_copy; 254 temps[dst].prev_copy = src; 255 temps[temps[dst].next_copy].prev_copy = dst; 256 temps[src].next_copy = dst; 257 } 258 259 args[0] = dst; 260 args[1] = src; 261 } 262 263 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y) 264 { 265 uint64_t l64, h64; 266 267 switch (op) { 268 CASE_OP_32_64(add): 269 return x + y; 270 271 CASE_OP_32_64(sub): 272 return x - y; 273 274 CASE_OP_32_64(mul): 275 return x * y; 276 277 CASE_OP_32_64(and): 278 return x & y; 279 280 CASE_OP_32_64(or): 281 return x | y; 282 283 CASE_OP_32_64(xor): 284 return x ^ y; 285 286 case INDEX_op_shl_i32: 287 return (uint32_t)x << (y & 31); 288 289 case INDEX_op_shl_i64: 290 return (uint64_t)x << (y & 63); 291 292 case INDEX_op_shr_i32: 293 return (uint32_t)x >> (y & 31); 294 295 case INDEX_op_trunc_shr_i32: 296 case INDEX_op_shr_i64: 297 return (uint64_t)x >> (y & 63); 298 299 case INDEX_op_sar_i32: 300 return (int32_t)x >> (y & 31); 301 302 case INDEX_op_sar_i64: 303 return (int64_t)x >> (y & 63); 304 305 case INDEX_op_rotr_i32: 306 return ror32(x, y & 31); 307 308 case INDEX_op_rotr_i64: 309 return ror64(x, y & 63); 310 311 case INDEX_op_rotl_i32: 312 return rol32(x, y & 31); 313 314 case INDEX_op_rotl_i64: 315 return rol64(x, y & 63); 316 317 CASE_OP_32_64(not): 318 return ~x; 319 320 CASE_OP_32_64(neg): 321 return -x; 322 323 CASE_OP_32_64(andc): 324 return x & ~y; 325 326 CASE_OP_32_64(orc): 327 return x | ~y; 328 329 CASE_OP_32_64(eqv): 330 return ~(x ^ y); 331 332 CASE_OP_32_64(nand): 333 return ~(x & y); 334 335 CASE_OP_32_64(nor): 336 return ~(x | y); 337 338 CASE_OP_32_64(ext8s): 339 return (int8_t)x; 340 341 CASE_OP_32_64(ext16s): 342 return (int16_t)x; 343 344 CASE_OP_32_64(ext8u): 345 return (uint8_t)x; 346 347 CASE_OP_32_64(ext16u): 348 return (uint16_t)x; 349 350 case INDEX_op_ext32s_i64: 351 return (int32_t)x; 352 353 case INDEX_op_ext32u_i64: 354 return (uint32_t)x; 355 356 case INDEX_op_muluh_i32: 357 return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32; 358 case INDEX_op_mulsh_i32: 359 return ((int64_t)(int32_t)x * (int32_t)y) >> 32; 360 361 case INDEX_op_muluh_i64: 362 mulu64(&l64, &h64, x, y); 363 return h64; 364 case INDEX_op_mulsh_i64: 365 muls64(&l64, &h64, x, y); 366 return h64; 367 368 case INDEX_op_div_i32: 369 /* Avoid crashing on divide by zero, otherwise undefined. */ 370 return (int32_t)x / ((int32_t)y ? : 1); 371 case INDEX_op_divu_i32: 372 return (uint32_t)x / ((uint32_t)y ? : 1); 373 case INDEX_op_div_i64: 374 return (int64_t)x / ((int64_t)y ? : 1); 375 case INDEX_op_divu_i64: 376 return (uint64_t)x / ((uint64_t)y ? : 1); 377 378 case INDEX_op_rem_i32: 379 return (int32_t)x % ((int32_t)y ? : 1); 380 case INDEX_op_remu_i32: 381 return (uint32_t)x % ((uint32_t)y ? : 1); 382 case INDEX_op_rem_i64: 383 return (int64_t)x % ((int64_t)y ? : 1); 384 case INDEX_op_remu_i64: 385 return (uint64_t)x % ((uint64_t)y ? : 1); 386 387 default: 388 fprintf(stderr, 389 "Unrecognized operation %d in do_constant_folding.\n", op); 390 tcg_abort(); 391 } 392 } 393 394 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y) 395 { 396 TCGArg res = do_constant_folding_2(op, x, y); 397 if (op_bits(op) == 32) { 398 res &= 0xffffffff; 399 } 400 return res; 401 } 402 403 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c) 404 { 405 switch (c) { 406 case TCG_COND_EQ: 407 return x == y; 408 case TCG_COND_NE: 409 return x != y; 410 case TCG_COND_LT: 411 return (int32_t)x < (int32_t)y; 412 case TCG_COND_GE: 413 return (int32_t)x >= (int32_t)y; 414 case TCG_COND_LE: 415 return (int32_t)x <= (int32_t)y; 416 case TCG_COND_GT: 417 return (int32_t)x > (int32_t)y; 418 case TCG_COND_LTU: 419 return x < y; 420 case TCG_COND_GEU: 421 return x >= y; 422 case TCG_COND_LEU: 423 return x <= y; 424 case TCG_COND_GTU: 425 return x > y; 426 default: 427 tcg_abort(); 428 } 429 } 430 431 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c) 432 { 433 switch (c) { 434 case TCG_COND_EQ: 435 return x == y; 436 case TCG_COND_NE: 437 return x != y; 438 case TCG_COND_LT: 439 return (int64_t)x < (int64_t)y; 440 case TCG_COND_GE: 441 return (int64_t)x >= (int64_t)y; 442 case TCG_COND_LE: 443 return (int64_t)x <= (int64_t)y; 444 case TCG_COND_GT: 445 return (int64_t)x > (int64_t)y; 446 case TCG_COND_LTU: 447 return x < y; 448 case TCG_COND_GEU: 449 return x >= y; 450 case TCG_COND_LEU: 451 return x <= y; 452 case TCG_COND_GTU: 453 return x > y; 454 default: 455 tcg_abort(); 456 } 457 } 458 459 static bool do_constant_folding_cond_eq(TCGCond c) 460 { 461 switch (c) { 462 case TCG_COND_GT: 463 case TCG_COND_LTU: 464 case TCG_COND_LT: 465 case TCG_COND_GTU: 466 case TCG_COND_NE: 467 return 0; 468 case TCG_COND_GE: 469 case TCG_COND_GEU: 470 case TCG_COND_LE: 471 case TCG_COND_LEU: 472 case TCG_COND_EQ: 473 return 1; 474 default: 475 tcg_abort(); 476 } 477 } 478 479 /* Return 2 if the condition can't be simplified, and the result 480 of the condition (0 or 1) if it can */ 481 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x, 482 TCGArg y, TCGCond c) 483 { 484 if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) { 485 switch (op_bits(op)) { 486 case 32: 487 return do_constant_folding_cond_32(temps[x].val, temps[y].val, c); 488 case 64: 489 return do_constant_folding_cond_64(temps[x].val, temps[y].val, c); 490 default: 491 tcg_abort(); 492 } 493 } else if (temps_are_copies(x, y)) { 494 return do_constant_folding_cond_eq(c); 495 } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) { 496 switch (c) { 497 case TCG_COND_LTU: 498 return 0; 499 case TCG_COND_GEU: 500 return 1; 501 default: 502 return 2; 503 } 504 } else { 505 return 2; 506 } 507 } 508 509 /* Return 2 if the condition can't be simplified, and the result 510 of the condition (0 or 1) if it can */ 511 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c) 512 { 513 TCGArg al = p1[0], ah = p1[1]; 514 TCGArg bl = p2[0], bh = p2[1]; 515 516 if (temps[bl].state == TCG_TEMP_CONST 517 && temps[bh].state == TCG_TEMP_CONST) { 518 uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val; 519 520 if (temps[al].state == TCG_TEMP_CONST 521 && temps[ah].state == TCG_TEMP_CONST) { 522 uint64_t a; 523 a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val; 524 return do_constant_folding_cond_64(a, b, c); 525 } 526 if (b == 0) { 527 switch (c) { 528 case TCG_COND_LTU: 529 return 0; 530 case TCG_COND_GEU: 531 return 1; 532 default: 533 break; 534 } 535 } 536 } 537 if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) { 538 return do_constant_folding_cond_eq(c); 539 } 540 return 2; 541 } 542 543 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2) 544 { 545 TCGArg a1 = *p1, a2 = *p2; 546 int sum = 0; 547 sum += temps[a1].state == TCG_TEMP_CONST; 548 sum -= temps[a2].state == TCG_TEMP_CONST; 549 550 /* Prefer the constant in second argument, and then the form 551 op a, a, b, which is better handled on non-RISC hosts. */ 552 if (sum > 0 || (sum == 0 && dest == a2)) { 553 *p1 = a2; 554 *p2 = a1; 555 return true; 556 } 557 return false; 558 } 559 560 static bool swap_commutative2(TCGArg *p1, TCGArg *p2) 561 { 562 int sum = 0; 563 sum += temps[p1[0]].state == TCG_TEMP_CONST; 564 sum += temps[p1[1]].state == TCG_TEMP_CONST; 565 sum -= temps[p2[0]].state == TCG_TEMP_CONST; 566 sum -= temps[p2[1]].state == TCG_TEMP_CONST; 567 if (sum > 0) { 568 TCGArg t; 569 t = p1[0], p1[0] = p2[0], p2[0] = t; 570 t = p1[1], p1[1] = p2[1], p2[1] = t; 571 return true; 572 } 573 return false; 574 } 575 576 /* Propagate constants and copies, fold constant expressions. */ 577 void tcg_optimize(TCGContext *s) 578 { 579 int oi, oi_next, nb_temps, nb_globals; 580 581 /* Array VALS has an element for each temp. 582 If this temp holds a constant then its value is kept in VALS' element. 583 If this temp is a copy of other ones then the other copies are 584 available through the doubly linked circular list. */ 585 586 nb_temps = s->nb_temps; 587 nb_globals = s->nb_globals; 588 reset_all_temps(nb_temps); 589 590 for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) { 591 tcg_target_ulong mask, partmask, affected; 592 int nb_oargs, nb_iargs, i; 593 TCGArg tmp; 594 595 TCGOp * const op = &s->gen_op_buf[oi]; 596 TCGArg * const args = &s->gen_opparam_buf[op->args]; 597 TCGOpcode opc = op->opc; 598 const TCGOpDef *def = &tcg_op_defs[opc]; 599 600 oi_next = op->next; 601 if (opc == INDEX_op_call) { 602 nb_oargs = op->callo; 603 nb_iargs = op->calli; 604 } else { 605 nb_oargs = def->nb_oargs; 606 nb_iargs = def->nb_iargs; 607 } 608 609 /* Do copy propagation */ 610 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) { 611 if (temps[args[i]].state == TCG_TEMP_COPY) { 612 args[i] = find_better_copy(s, args[i]); 613 } 614 } 615 616 /* For commutative operations make constant second argument */ 617 switch (opc) { 618 CASE_OP_32_64(add): 619 CASE_OP_32_64(mul): 620 CASE_OP_32_64(and): 621 CASE_OP_32_64(or): 622 CASE_OP_32_64(xor): 623 CASE_OP_32_64(eqv): 624 CASE_OP_32_64(nand): 625 CASE_OP_32_64(nor): 626 CASE_OP_32_64(muluh): 627 CASE_OP_32_64(mulsh): 628 swap_commutative(args[0], &args[1], &args[2]); 629 break; 630 CASE_OP_32_64(brcond): 631 if (swap_commutative(-1, &args[0], &args[1])) { 632 args[2] = tcg_swap_cond(args[2]); 633 } 634 break; 635 CASE_OP_32_64(setcond): 636 if (swap_commutative(args[0], &args[1], &args[2])) { 637 args[3] = tcg_swap_cond(args[3]); 638 } 639 break; 640 CASE_OP_32_64(movcond): 641 if (swap_commutative(-1, &args[1], &args[2])) { 642 args[5] = tcg_swap_cond(args[5]); 643 } 644 /* For movcond, we canonicalize the "false" input reg to match 645 the destination reg so that the tcg backend can implement 646 a "move if true" operation. */ 647 if (swap_commutative(args[0], &args[4], &args[3])) { 648 args[5] = tcg_invert_cond(args[5]); 649 } 650 break; 651 CASE_OP_32_64(add2): 652 swap_commutative(args[0], &args[2], &args[4]); 653 swap_commutative(args[1], &args[3], &args[5]); 654 break; 655 CASE_OP_32_64(mulu2): 656 CASE_OP_32_64(muls2): 657 swap_commutative(args[0], &args[2], &args[3]); 658 break; 659 case INDEX_op_brcond2_i32: 660 if (swap_commutative2(&args[0], &args[2])) { 661 args[4] = tcg_swap_cond(args[4]); 662 } 663 break; 664 case INDEX_op_setcond2_i32: 665 if (swap_commutative2(&args[1], &args[3])) { 666 args[5] = tcg_swap_cond(args[5]); 667 } 668 break; 669 default: 670 break; 671 } 672 673 /* Simplify expressions for "shift/rot r, 0, a => movi r, 0", 674 and "sub r, 0, a => neg r, a" case. */ 675 switch (opc) { 676 CASE_OP_32_64(shl): 677 CASE_OP_32_64(shr): 678 CASE_OP_32_64(sar): 679 CASE_OP_32_64(rotl): 680 CASE_OP_32_64(rotr): 681 if (temps[args[1]].state == TCG_TEMP_CONST 682 && temps[args[1]].val == 0) { 683 tcg_opt_gen_movi(s, op, args, args[0], 0); 684 continue; 685 } 686 break; 687 CASE_OP_32_64(sub): 688 { 689 TCGOpcode neg_op; 690 bool have_neg; 691 692 if (temps[args[2]].state == TCG_TEMP_CONST) { 693 /* Proceed with possible constant folding. */ 694 break; 695 } 696 if (opc == INDEX_op_sub_i32) { 697 neg_op = INDEX_op_neg_i32; 698 have_neg = TCG_TARGET_HAS_neg_i32; 699 } else { 700 neg_op = INDEX_op_neg_i64; 701 have_neg = TCG_TARGET_HAS_neg_i64; 702 } 703 if (!have_neg) { 704 break; 705 } 706 if (temps[args[1]].state == TCG_TEMP_CONST 707 && temps[args[1]].val == 0) { 708 op->opc = neg_op; 709 reset_temp(args[0]); 710 args[1] = args[2]; 711 continue; 712 } 713 } 714 break; 715 CASE_OP_32_64(xor): 716 CASE_OP_32_64(nand): 717 if (temps[args[1]].state != TCG_TEMP_CONST 718 && temps[args[2]].state == TCG_TEMP_CONST 719 && temps[args[2]].val == -1) { 720 i = 1; 721 goto try_not; 722 } 723 break; 724 CASE_OP_32_64(nor): 725 if (temps[args[1]].state != TCG_TEMP_CONST 726 && temps[args[2]].state == TCG_TEMP_CONST 727 && temps[args[2]].val == 0) { 728 i = 1; 729 goto try_not; 730 } 731 break; 732 CASE_OP_32_64(andc): 733 if (temps[args[2]].state != TCG_TEMP_CONST 734 && temps[args[1]].state == TCG_TEMP_CONST 735 && temps[args[1]].val == -1) { 736 i = 2; 737 goto try_not; 738 } 739 break; 740 CASE_OP_32_64(orc): 741 CASE_OP_32_64(eqv): 742 if (temps[args[2]].state != TCG_TEMP_CONST 743 && temps[args[1]].state == TCG_TEMP_CONST 744 && temps[args[1]].val == 0) { 745 i = 2; 746 goto try_not; 747 } 748 break; 749 try_not: 750 { 751 TCGOpcode not_op; 752 bool have_not; 753 754 if (def->flags & TCG_OPF_64BIT) { 755 not_op = INDEX_op_not_i64; 756 have_not = TCG_TARGET_HAS_not_i64; 757 } else { 758 not_op = INDEX_op_not_i32; 759 have_not = TCG_TARGET_HAS_not_i32; 760 } 761 if (!have_not) { 762 break; 763 } 764 op->opc = not_op; 765 reset_temp(args[0]); 766 args[1] = args[i]; 767 continue; 768 } 769 default: 770 break; 771 } 772 773 /* Simplify expression for "op r, a, const => mov r, a" cases */ 774 switch (opc) { 775 CASE_OP_32_64(add): 776 CASE_OP_32_64(sub): 777 CASE_OP_32_64(shl): 778 CASE_OP_32_64(shr): 779 CASE_OP_32_64(sar): 780 CASE_OP_32_64(rotl): 781 CASE_OP_32_64(rotr): 782 CASE_OP_32_64(or): 783 CASE_OP_32_64(xor): 784 CASE_OP_32_64(andc): 785 if (temps[args[1]].state != TCG_TEMP_CONST 786 && temps[args[2]].state == TCG_TEMP_CONST 787 && temps[args[2]].val == 0) { 788 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 789 continue; 790 } 791 break; 792 CASE_OP_32_64(and): 793 CASE_OP_32_64(orc): 794 CASE_OP_32_64(eqv): 795 if (temps[args[1]].state != TCG_TEMP_CONST 796 && temps[args[2]].state == TCG_TEMP_CONST 797 && temps[args[2]].val == -1) { 798 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 799 continue; 800 } 801 break; 802 default: 803 break; 804 } 805 806 /* Simplify using known-zero bits. Currently only ops with a single 807 output argument is supported. */ 808 mask = -1; 809 affected = -1; 810 switch (opc) { 811 CASE_OP_32_64(ext8s): 812 if ((temps[args[1]].mask & 0x80) != 0) { 813 break; 814 } 815 CASE_OP_32_64(ext8u): 816 mask = 0xff; 817 goto and_const; 818 CASE_OP_32_64(ext16s): 819 if ((temps[args[1]].mask & 0x8000) != 0) { 820 break; 821 } 822 CASE_OP_32_64(ext16u): 823 mask = 0xffff; 824 goto and_const; 825 case INDEX_op_ext32s_i64: 826 if ((temps[args[1]].mask & 0x80000000) != 0) { 827 break; 828 } 829 case INDEX_op_ext32u_i64: 830 mask = 0xffffffffU; 831 goto and_const; 832 833 CASE_OP_32_64(and): 834 mask = temps[args[2]].mask; 835 if (temps[args[2]].state == TCG_TEMP_CONST) { 836 and_const: 837 affected = temps[args[1]].mask & ~mask; 838 } 839 mask = temps[args[1]].mask & mask; 840 break; 841 842 CASE_OP_32_64(andc): 843 /* Known-zeros does not imply known-ones. Therefore unless 844 args[2] is constant, we can't infer anything from it. */ 845 if (temps[args[2]].state == TCG_TEMP_CONST) { 846 mask = ~temps[args[2]].mask; 847 goto and_const; 848 } 849 /* But we certainly know nothing outside args[1] may be set. */ 850 mask = temps[args[1]].mask; 851 break; 852 853 case INDEX_op_sar_i32: 854 if (temps[args[2]].state == TCG_TEMP_CONST) { 855 tmp = temps[args[2]].val & 31; 856 mask = (int32_t)temps[args[1]].mask >> tmp; 857 } 858 break; 859 case INDEX_op_sar_i64: 860 if (temps[args[2]].state == TCG_TEMP_CONST) { 861 tmp = temps[args[2]].val & 63; 862 mask = (int64_t)temps[args[1]].mask >> tmp; 863 } 864 break; 865 866 case INDEX_op_shr_i32: 867 if (temps[args[2]].state == TCG_TEMP_CONST) { 868 tmp = temps[args[2]].val & 31; 869 mask = (uint32_t)temps[args[1]].mask >> tmp; 870 } 871 break; 872 case INDEX_op_shr_i64: 873 if (temps[args[2]].state == TCG_TEMP_CONST) { 874 tmp = temps[args[2]].val & 63; 875 mask = (uint64_t)temps[args[1]].mask >> tmp; 876 } 877 break; 878 879 case INDEX_op_trunc_shr_i32: 880 mask = (uint64_t)temps[args[1]].mask >> args[2]; 881 break; 882 883 CASE_OP_32_64(shl): 884 if (temps[args[2]].state == TCG_TEMP_CONST) { 885 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1); 886 mask = temps[args[1]].mask << tmp; 887 } 888 break; 889 890 CASE_OP_32_64(neg): 891 /* Set to 1 all bits to the left of the rightmost. */ 892 mask = -(temps[args[1]].mask & -temps[args[1]].mask); 893 break; 894 895 CASE_OP_32_64(deposit): 896 mask = deposit64(temps[args[1]].mask, args[3], args[4], 897 temps[args[2]].mask); 898 break; 899 900 CASE_OP_32_64(or): 901 CASE_OP_32_64(xor): 902 mask = temps[args[1]].mask | temps[args[2]].mask; 903 break; 904 905 CASE_OP_32_64(setcond): 906 case INDEX_op_setcond2_i32: 907 mask = 1; 908 break; 909 910 CASE_OP_32_64(movcond): 911 mask = temps[args[3]].mask | temps[args[4]].mask; 912 break; 913 914 CASE_OP_32_64(ld8u): 915 mask = 0xff; 916 break; 917 CASE_OP_32_64(ld16u): 918 mask = 0xffff; 919 break; 920 case INDEX_op_ld32u_i64: 921 mask = 0xffffffffu; 922 break; 923 924 CASE_OP_32_64(qemu_ld): 925 { 926 TCGMemOpIdx oi = args[nb_oargs + nb_iargs]; 927 TCGMemOp mop = get_memop(oi); 928 if (!(mop & MO_SIGN)) { 929 mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1; 930 } 931 } 932 break; 933 934 default: 935 break; 936 } 937 938 /* 32-bit ops generate 32-bit results. For the result is zero test 939 below, we can ignore high bits, but for further optimizations we 940 need to record that the high bits contain garbage. */ 941 partmask = mask; 942 if (!(def->flags & TCG_OPF_64BIT)) { 943 mask |= ~(tcg_target_ulong)0xffffffffu; 944 partmask &= 0xffffffffu; 945 affected &= 0xffffffffu; 946 } 947 948 if (partmask == 0) { 949 assert(nb_oargs == 1); 950 tcg_opt_gen_movi(s, op, args, args[0], 0); 951 continue; 952 } 953 if (affected == 0) { 954 assert(nb_oargs == 1); 955 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 956 continue; 957 } 958 959 /* Simplify expression for "op r, a, 0 => movi r, 0" cases */ 960 switch (opc) { 961 CASE_OP_32_64(and): 962 CASE_OP_32_64(mul): 963 CASE_OP_32_64(muluh): 964 CASE_OP_32_64(mulsh): 965 if ((temps[args[2]].state == TCG_TEMP_CONST 966 && temps[args[2]].val == 0)) { 967 tcg_opt_gen_movi(s, op, args, args[0], 0); 968 continue; 969 } 970 break; 971 default: 972 break; 973 } 974 975 /* Simplify expression for "op r, a, a => mov r, a" cases */ 976 switch (opc) { 977 CASE_OP_32_64(or): 978 CASE_OP_32_64(and): 979 if (temps_are_copies(args[1], args[2])) { 980 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 981 continue; 982 } 983 break; 984 default: 985 break; 986 } 987 988 /* Simplify expression for "op r, a, a => movi r, 0" cases */ 989 switch (opc) { 990 CASE_OP_32_64(andc): 991 CASE_OP_32_64(sub): 992 CASE_OP_32_64(xor): 993 if (temps_are_copies(args[1], args[2])) { 994 tcg_opt_gen_movi(s, op, args, args[0], 0); 995 continue; 996 } 997 break; 998 default: 999 break; 1000 } 1001 1002 /* Propagate constants through copy operations and do constant 1003 folding. Constants will be substituted to arguments by register 1004 allocator where needed and possible. Also detect copies. */ 1005 switch (opc) { 1006 CASE_OP_32_64(mov): 1007 tcg_opt_gen_mov(s, op, args, args[0], args[1]); 1008 break; 1009 CASE_OP_32_64(movi): 1010 tcg_opt_gen_movi(s, op, args, args[0], args[1]); 1011 break; 1012 1013 CASE_OP_32_64(not): 1014 CASE_OP_32_64(neg): 1015 CASE_OP_32_64(ext8s): 1016 CASE_OP_32_64(ext8u): 1017 CASE_OP_32_64(ext16s): 1018 CASE_OP_32_64(ext16u): 1019 case INDEX_op_ext32s_i64: 1020 case INDEX_op_ext32u_i64: 1021 if (temps[args[1]].state == TCG_TEMP_CONST) { 1022 tmp = do_constant_folding(opc, temps[args[1]].val, 0); 1023 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1024 break; 1025 } 1026 goto do_default; 1027 1028 case INDEX_op_trunc_shr_i32: 1029 if (temps[args[1]].state == TCG_TEMP_CONST) { 1030 tmp = do_constant_folding(opc, temps[args[1]].val, args[2]); 1031 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1032 break; 1033 } 1034 goto do_default; 1035 1036 CASE_OP_32_64(add): 1037 CASE_OP_32_64(sub): 1038 CASE_OP_32_64(mul): 1039 CASE_OP_32_64(or): 1040 CASE_OP_32_64(and): 1041 CASE_OP_32_64(xor): 1042 CASE_OP_32_64(shl): 1043 CASE_OP_32_64(shr): 1044 CASE_OP_32_64(sar): 1045 CASE_OP_32_64(rotl): 1046 CASE_OP_32_64(rotr): 1047 CASE_OP_32_64(andc): 1048 CASE_OP_32_64(orc): 1049 CASE_OP_32_64(eqv): 1050 CASE_OP_32_64(nand): 1051 CASE_OP_32_64(nor): 1052 CASE_OP_32_64(muluh): 1053 CASE_OP_32_64(mulsh): 1054 CASE_OP_32_64(div): 1055 CASE_OP_32_64(divu): 1056 CASE_OP_32_64(rem): 1057 CASE_OP_32_64(remu): 1058 if (temps[args[1]].state == TCG_TEMP_CONST 1059 && temps[args[2]].state == TCG_TEMP_CONST) { 1060 tmp = do_constant_folding(opc, temps[args[1]].val, 1061 temps[args[2]].val); 1062 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1063 break; 1064 } 1065 goto do_default; 1066 1067 CASE_OP_32_64(deposit): 1068 if (temps[args[1]].state == TCG_TEMP_CONST 1069 && temps[args[2]].state == TCG_TEMP_CONST) { 1070 tmp = deposit64(temps[args[1]].val, args[3], args[4], 1071 temps[args[2]].val); 1072 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1073 break; 1074 } 1075 goto do_default; 1076 1077 CASE_OP_32_64(setcond): 1078 tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]); 1079 if (tmp != 2) { 1080 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1081 break; 1082 } 1083 goto do_default; 1084 1085 CASE_OP_32_64(brcond): 1086 tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]); 1087 if (tmp != 2) { 1088 if (tmp) { 1089 reset_all_temps(nb_temps); 1090 op->opc = INDEX_op_br; 1091 args[0] = args[3]; 1092 } else { 1093 tcg_op_remove(s, op); 1094 } 1095 break; 1096 } 1097 goto do_default; 1098 1099 CASE_OP_32_64(movcond): 1100 tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]); 1101 if (tmp != 2) { 1102 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]); 1103 break; 1104 } 1105 goto do_default; 1106 1107 case INDEX_op_add2_i32: 1108 case INDEX_op_sub2_i32: 1109 if (temps[args[2]].state == TCG_TEMP_CONST 1110 && temps[args[3]].state == TCG_TEMP_CONST 1111 && temps[args[4]].state == TCG_TEMP_CONST 1112 && temps[args[5]].state == TCG_TEMP_CONST) { 1113 uint32_t al = temps[args[2]].val; 1114 uint32_t ah = temps[args[3]].val; 1115 uint32_t bl = temps[args[4]].val; 1116 uint32_t bh = temps[args[5]].val; 1117 uint64_t a = ((uint64_t)ah << 32) | al; 1118 uint64_t b = ((uint64_t)bh << 32) | bl; 1119 TCGArg rl, rh; 1120 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1121 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1122 1123 if (opc == INDEX_op_add2_i32) { 1124 a += b; 1125 } else { 1126 a -= b; 1127 } 1128 1129 rl = args[0]; 1130 rh = args[1]; 1131 tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a); 1132 tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32)); 1133 1134 /* We've done all we need to do with the movi. Skip it. */ 1135 oi_next = op2->next; 1136 break; 1137 } 1138 goto do_default; 1139 1140 case INDEX_op_mulu2_i32: 1141 if (temps[args[2]].state == TCG_TEMP_CONST 1142 && temps[args[3]].state == TCG_TEMP_CONST) { 1143 uint32_t a = temps[args[2]].val; 1144 uint32_t b = temps[args[3]].val; 1145 uint64_t r = (uint64_t)a * b; 1146 TCGArg rl, rh; 1147 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2); 1148 TCGArg *args2 = &s->gen_opparam_buf[op2->args]; 1149 1150 rl = args[0]; 1151 rh = args[1]; 1152 tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r); 1153 tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32)); 1154 1155 /* We've done all we need to do with the movi. Skip it. */ 1156 oi_next = op2->next; 1157 break; 1158 } 1159 goto do_default; 1160 1161 case INDEX_op_brcond2_i32: 1162 tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]); 1163 if (tmp != 2) { 1164 if (tmp) { 1165 do_brcond_true: 1166 reset_all_temps(nb_temps); 1167 op->opc = INDEX_op_br; 1168 args[0] = args[5]; 1169 } else { 1170 do_brcond_false: 1171 tcg_op_remove(s, op); 1172 } 1173 } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE) 1174 && temps[args[2]].state == TCG_TEMP_CONST 1175 && temps[args[3]].state == TCG_TEMP_CONST 1176 && temps[args[2]].val == 0 1177 && temps[args[3]].val == 0) { 1178 /* Simplify LT/GE comparisons vs zero to a single compare 1179 vs the high word of the input. */ 1180 do_brcond_high: 1181 reset_all_temps(nb_temps); 1182 op->opc = INDEX_op_brcond_i32; 1183 args[0] = args[1]; 1184 args[1] = args[3]; 1185 args[2] = args[4]; 1186 args[3] = args[5]; 1187 } else if (args[4] == TCG_COND_EQ) { 1188 /* Simplify EQ comparisons where one of the pairs 1189 can be simplified. */ 1190 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1191 args[0], args[2], TCG_COND_EQ); 1192 if (tmp == 0) { 1193 goto do_brcond_false; 1194 } else if (tmp == 1) { 1195 goto do_brcond_high; 1196 } 1197 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1198 args[1], args[3], TCG_COND_EQ); 1199 if (tmp == 0) { 1200 goto do_brcond_false; 1201 } else if (tmp != 1) { 1202 goto do_default; 1203 } 1204 do_brcond_low: 1205 reset_all_temps(nb_temps); 1206 op->opc = INDEX_op_brcond_i32; 1207 args[1] = args[2]; 1208 args[2] = args[4]; 1209 args[3] = args[5]; 1210 } else if (args[4] == TCG_COND_NE) { 1211 /* Simplify NE comparisons where one of the pairs 1212 can be simplified. */ 1213 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1214 args[0], args[2], TCG_COND_NE); 1215 if (tmp == 0) { 1216 goto do_brcond_high; 1217 } else if (tmp == 1) { 1218 goto do_brcond_true; 1219 } 1220 tmp = do_constant_folding_cond(INDEX_op_brcond_i32, 1221 args[1], args[3], TCG_COND_NE); 1222 if (tmp == 0) { 1223 goto do_brcond_low; 1224 } else if (tmp == 1) { 1225 goto do_brcond_true; 1226 } 1227 goto do_default; 1228 } else { 1229 goto do_default; 1230 } 1231 break; 1232 1233 case INDEX_op_setcond2_i32: 1234 tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]); 1235 if (tmp != 2) { 1236 do_setcond_const: 1237 tcg_opt_gen_movi(s, op, args, args[0], tmp); 1238 } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE) 1239 && temps[args[3]].state == TCG_TEMP_CONST 1240 && temps[args[4]].state == TCG_TEMP_CONST 1241 && temps[args[3]].val == 0 1242 && temps[args[4]].val == 0) { 1243 /* Simplify LT/GE comparisons vs zero to a single compare 1244 vs the high word of the input. */ 1245 do_setcond_high: 1246 reset_temp(args[0]); 1247 temps[args[0]].mask = 1; 1248 op->opc = INDEX_op_setcond_i32; 1249 args[1] = args[2]; 1250 args[2] = args[4]; 1251 args[3] = args[5]; 1252 } else if (args[5] == TCG_COND_EQ) { 1253 /* Simplify EQ comparisons where one of the pairs 1254 can be simplified. */ 1255 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1256 args[1], args[3], TCG_COND_EQ); 1257 if (tmp == 0) { 1258 goto do_setcond_const; 1259 } else if (tmp == 1) { 1260 goto do_setcond_high; 1261 } 1262 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1263 args[2], args[4], TCG_COND_EQ); 1264 if (tmp == 0) { 1265 goto do_setcond_high; 1266 } else if (tmp != 1) { 1267 goto do_default; 1268 } 1269 do_setcond_low: 1270 reset_temp(args[0]); 1271 temps[args[0]].mask = 1; 1272 op->opc = INDEX_op_setcond_i32; 1273 args[2] = args[3]; 1274 args[3] = args[5]; 1275 } else if (args[5] == TCG_COND_NE) { 1276 /* Simplify NE comparisons where one of the pairs 1277 can be simplified. */ 1278 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1279 args[1], args[3], TCG_COND_NE); 1280 if (tmp == 0) { 1281 goto do_setcond_high; 1282 } else if (tmp == 1) { 1283 goto do_setcond_const; 1284 } 1285 tmp = do_constant_folding_cond(INDEX_op_setcond_i32, 1286 args[2], args[4], TCG_COND_NE); 1287 if (tmp == 0) { 1288 goto do_setcond_low; 1289 } else if (tmp == 1) { 1290 goto do_setcond_const; 1291 } 1292 goto do_default; 1293 } else { 1294 goto do_default; 1295 } 1296 break; 1297 1298 case INDEX_op_call: 1299 if (!(args[nb_oargs + nb_iargs + 1] 1300 & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) { 1301 for (i = 0; i < nb_globals; i++) { 1302 reset_temp(i); 1303 } 1304 } 1305 goto do_reset_output; 1306 1307 default: 1308 do_default: 1309 /* Default case: we know nothing about operation (or were unable 1310 to compute the operation result) so no propagation is done. 1311 We trash everything if the operation is the end of a basic 1312 block, otherwise we only trash the output args. "mask" is 1313 the non-zero bits mask for the first output arg. */ 1314 if (def->flags & TCG_OPF_BB_END) { 1315 reset_all_temps(nb_temps); 1316 } else { 1317 do_reset_output: 1318 for (i = 0; i < nb_oargs; i++) { 1319 reset_temp(args[i]); 1320 /* Save the corresponding known-zero bits mask for the 1321 first output argument (only one supported so far). */ 1322 if (i == 0) { 1323 temps[args[i]].mask = mask; 1324 } 1325 } 1326 } 1327 break; 1328 } 1329 } 1330 } 1331