xref: /qemu/tcg/optimize.c (revision 52ea63de)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu-common.h"
28 #include "exec/cpu-common.h"
29 #include "tcg-op.h"
30 
31 #define CASE_OP_32_64(x)                        \
32         glue(glue(case INDEX_op_, x), _i32):    \
33         glue(glue(case INDEX_op_, x), _i64)
34 
35 struct tcg_temp_info {
36     bool is_const;
37     uint16_t prev_copy;
38     uint16_t next_copy;
39     tcg_target_ulong val;
40     tcg_target_ulong mask;
41 };
42 
43 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
44 static TCGTempSet temps_used;
45 
46 static inline bool temp_is_const(TCGArg arg)
47 {
48     return temps[arg].is_const;
49 }
50 
51 static inline bool temp_is_copy(TCGArg arg)
52 {
53     return temps[arg].next_copy != arg;
54 }
55 
56 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
57 static void reset_temp(TCGArg temp)
58 {
59     temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
60     temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
61     temps[temp].next_copy = temp;
62     temps[temp].prev_copy = temp;
63     temps[temp].is_const = false;
64     temps[temp].mask = -1;
65 }
66 
67 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
68 static void reset_all_temps(int nb_temps)
69 {
70     bitmap_zero(temps_used.l, nb_temps);
71 }
72 
73 /* Initialize and activate a temporary.  */
74 static void init_temp_info(TCGArg temp)
75 {
76     if (!test_bit(temp, temps_used.l)) {
77         temps[temp].next_copy = temp;
78         temps[temp].prev_copy = temp;
79         temps[temp].is_const = false;
80         temps[temp].mask = -1;
81         set_bit(temp, temps_used.l);
82     }
83 }
84 
85 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
86                                 TCGOpcode opc, int nargs)
87 {
88     int oi = s->gen_next_op_idx;
89     int pi = s->gen_next_parm_idx;
90     int prev = old_op->prev;
91     int next = old_op - s->gen_op_buf;
92     TCGOp *new_op;
93 
94     tcg_debug_assert(oi < OPC_BUF_SIZE);
95     tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
96     s->gen_next_op_idx = oi + 1;
97     s->gen_next_parm_idx = pi + nargs;
98 
99     new_op = &s->gen_op_buf[oi];
100     *new_op = (TCGOp){
101         .opc = opc,
102         .args = pi,
103         .prev = prev,
104         .next = next
105     };
106     if (prev >= 0) {
107         s->gen_op_buf[prev].next = oi;
108     } else {
109         s->gen_first_op_idx = oi;
110     }
111     old_op->prev = oi;
112 
113     return new_op;
114 }
115 
116 static int op_bits(TCGOpcode op)
117 {
118     const TCGOpDef *def = &tcg_op_defs[op];
119     return def->flags & TCG_OPF_64BIT ? 64 : 32;
120 }
121 
122 static TCGOpcode op_to_mov(TCGOpcode op)
123 {
124     switch (op_bits(op)) {
125     case 32:
126         return INDEX_op_mov_i32;
127     case 64:
128         return INDEX_op_mov_i64;
129     default:
130         fprintf(stderr, "op_to_mov: unexpected return value of "
131                 "function op_bits.\n");
132         tcg_abort();
133     }
134 }
135 
136 static TCGOpcode op_to_movi(TCGOpcode op)
137 {
138     switch (op_bits(op)) {
139     case 32:
140         return INDEX_op_movi_i32;
141     case 64:
142         return INDEX_op_movi_i64;
143     default:
144         fprintf(stderr, "op_to_movi: unexpected return value of "
145                 "function op_bits.\n");
146         tcg_abort();
147     }
148 }
149 
150 static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
151 {
152     TCGArg i;
153 
154     /* If this is already a global, we can't do better. */
155     if (temp < s->nb_globals) {
156         return temp;
157     }
158 
159     /* Search for a global first. */
160     for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
161         if (i < s->nb_globals) {
162             return i;
163         }
164     }
165 
166     /* If it is a temp, search for a temp local. */
167     if (!s->temps[temp].temp_local) {
168         for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
169             if (s->temps[i].temp_local) {
170                 return i;
171             }
172         }
173     }
174 
175     /* Failure to find a better representation, return the same temp. */
176     return temp;
177 }
178 
179 static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
180 {
181     TCGArg i;
182 
183     if (arg1 == arg2) {
184         return true;
185     }
186 
187     if (!temp_is_copy(arg1) || !temp_is_copy(arg2)) {
188         return false;
189     }
190 
191     for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
192         if (i == arg2) {
193             return true;
194         }
195     }
196 
197     return false;
198 }
199 
200 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
201                              TCGArg dst, TCGArg val)
202 {
203     TCGOpcode new_op = op_to_movi(op->opc);
204     tcg_target_ulong mask;
205 
206     op->opc = new_op;
207 
208     reset_temp(dst);
209     temps[dst].is_const = true;
210     temps[dst].val = val;
211     mask = val;
212     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
213         /* High bits of the destination are now garbage.  */
214         mask |= ~0xffffffffull;
215     }
216     temps[dst].mask = mask;
217 
218     args[0] = dst;
219     args[1] = val;
220 }
221 
222 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
223                             TCGArg dst, TCGArg src)
224 {
225     if (temps_are_copies(dst, src)) {
226         tcg_op_remove(s, op);
227         return;
228     }
229 
230     TCGOpcode new_op = op_to_mov(op->opc);
231     tcg_target_ulong mask;
232 
233     op->opc = new_op;
234 
235     reset_temp(dst);
236     mask = temps[src].mask;
237     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
238         /* High bits of the destination are now garbage.  */
239         mask |= ~0xffffffffull;
240     }
241     temps[dst].mask = mask;
242 
243     if (s->temps[src].type == s->temps[dst].type) {
244         temps[dst].next_copy = temps[src].next_copy;
245         temps[dst].prev_copy = src;
246         temps[temps[dst].next_copy].prev_copy = dst;
247         temps[src].next_copy = dst;
248         temps[dst].is_const = temps[src].is_const;
249         temps[dst].val = temps[src].val;
250     }
251 
252     args[0] = dst;
253     args[1] = src;
254 }
255 
256 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
257 {
258     uint64_t l64, h64;
259 
260     switch (op) {
261     CASE_OP_32_64(add):
262         return x + y;
263 
264     CASE_OP_32_64(sub):
265         return x - y;
266 
267     CASE_OP_32_64(mul):
268         return x * y;
269 
270     CASE_OP_32_64(and):
271         return x & y;
272 
273     CASE_OP_32_64(or):
274         return x | y;
275 
276     CASE_OP_32_64(xor):
277         return x ^ y;
278 
279     case INDEX_op_shl_i32:
280         return (uint32_t)x << (y & 31);
281 
282     case INDEX_op_shl_i64:
283         return (uint64_t)x << (y & 63);
284 
285     case INDEX_op_shr_i32:
286         return (uint32_t)x >> (y & 31);
287 
288     case INDEX_op_shr_i64:
289         return (uint64_t)x >> (y & 63);
290 
291     case INDEX_op_sar_i32:
292         return (int32_t)x >> (y & 31);
293 
294     case INDEX_op_sar_i64:
295         return (int64_t)x >> (y & 63);
296 
297     case INDEX_op_rotr_i32:
298         return ror32(x, y & 31);
299 
300     case INDEX_op_rotr_i64:
301         return ror64(x, y & 63);
302 
303     case INDEX_op_rotl_i32:
304         return rol32(x, y & 31);
305 
306     case INDEX_op_rotl_i64:
307         return rol64(x, y & 63);
308 
309     CASE_OP_32_64(not):
310         return ~x;
311 
312     CASE_OP_32_64(neg):
313         return -x;
314 
315     CASE_OP_32_64(andc):
316         return x & ~y;
317 
318     CASE_OP_32_64(orc):
319         return x | ~y;
320 
321     CASE_OP_32_64(eqv):
322         return ~(x ^ y);
323 
324     CASE_OP_32_64(nand):
325         return ~(x & y);
326 
327     CASE_OP_32_64(nor):
328         return ~(x | y);
329 
330     CASE_OP_32_64(ext8s):
331         return (int8_t)x;
332 
333     CASE_OP_32_64(ext16s):
334         return (int16_t)x;
335 
336     CASE_OP_32_64(ext8u):
337         return (uint8_t)x;
338 
339     CASE_OP_32_64(ext16u):
340         return (uint16_t)x;
341 
342     case INDEX_op_ext_i32_i64:
343     case INDEX_op_ext32s_i64:
344         return (int32_t)x;
345 
346     case INDEX_op_extu_i32_i64:
347     case INDEX_op_extrl_i64_i32:
348     case INDEX_op_ext32u_i64:
349         return (uint32_t)x;
350 
351     case INDEX_op_extrh_i64_i32:
352         return (uint64_t)x >> 32;
353 
354     case INDEX_op_muluh_i32:
355         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
356     case INDEX_op_mulsh_i32:
357         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
358 
359     case INDEX_op_muluh_i64:
360         mulu64(&l64, &h64, x, y);
361         return h64;
362     case INDEX_op_mulsh_i64:
363         muls64(&l64, &h64, x, y);
364         return h64;
365 
366     case INDEX_op_div_i32:
367         /* Avoid crashing on divide by zero, otherwise undefined.  */
368         return (int32_t)x / ((int32_t)y ? : 1);
369     case INDEX_op_divu_i32:
370         return (uint32_t)x / ((uint32_t)y ? : 1);
371     case INDEX_op_div_i64:
372         return (int64_t)x / ((int64_t)y ? : 1);
373     case INDEX_op_divu_i64:
374         return (uint64_t)x / ((uint64_t)y ? : 1);
375 
376     case INDEX_op_rem_i32:
377         return (int32_t)x % ((int32_t)y ? : 1);
378     case INDEX_op_remu_i32:
379         return (uint32_t)x % ((uint32_t)y ? : 1);
380     case INDEX_op_rem_i64:
381         return (int64_t)x % ((int64_t)y ? : 1);
382     case INDEX_op_remu_i64:
383         return (uint64_t)x % ((uint64_t)y ? : 1);
384 
385     default:
386         fprintf(stderr,
387                 "Unrecognized operation %d in do_constant_folding.\n", op);
388         tcg_abort();
389     }
390 }
391 
392 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
393 {
394     TCGArg res = do_constant_folding_2(op, x, y);
395     if (op_bits(op) == 32) {
396         res = (int32_t)res;
397     }
398     return res;
399 }
400 
401 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
402 {
403     switch (c) {
404     case TCG_COND_EQ:
405         return x == y;
406     case TCG_COND_NE:
407         return x != y;
408     case TCG_COND_LT:
409         return (int32_t)x < (int32_t)y;
410     case TCG_COND_GE:
411         return (int32_t)x >= (int32_t)y;
412     case TCG_COND_LE:
413         return (int32_t)x <= (int32_t)y;
414     case TCG_COND_GT:
415         return (int32_t)x > (int32_t)y;
416     case TCG_COND_LTU:
417         return x < y;
418     case TCG_COND_GEU:
419         return x >= y;
420     case TCG_COND_LEU:
421         return x <= y;
422     case TCG_COND_GTU:
423         return x > y;
424     default:
425         tcg_abort();
426     }
427 }
428 
429 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
430 {
431     switch (c) {
432     case TCG_COND_EQ:
433         return x == y;
434     case TCG_COND_NE:
435         return x != y;
436     case TCG_COND_LT:
437         return (int64_t)x < (int64_t)y;
438     case TCG_COND_GE:
439         return (int64_t)x >= (int64_t)y;
440     case TCG_COND_LE:
441         return (int64_t)x <= (int64_t)y;
442     case TCG_COND_GT:
443         return (int64_t)x > (int64_t)y;
444     case TCG_COND_LTU:
445         return x < y;
446     case TCG_COND_GEU:
447         return x >= y;
448     case TCG_COND_LEU:
449         return x <= y;
450     case TCG_COND_GTU:
451         return x > y;
452     default:
453         tcg_abort();
454     }
455 }
456 
457 static bool do_constant_folding_cond_eq(TCGCond c)
458 {
459     switch (c) {
460     case TCG_COND_GT:
461     case TCG_COND_LTU:
462     case TCG_COND_LT:
463     case TCG_COND_GTU:
464     case TCG_COND_NE:
465         return 0;
466     case TCG_COND_GE:
467     case TCG_COND_GEU:
468     case TCG_COND_LE:
469     case TCG_COND_LEU:
470     case TCG_COND_EQ:
471         return 1;
472     default:
473         tcg_abort();
474     }
475 }
476 
477 /* Return 2 if the condition can't be simplified, and the result
478    of the condition (0 or 1) if it can */
479 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
480                                        TCGArg y, TCGCond c)
481 {
482     if (temp_is_const(x) && temp_is_const(y)) {
483         switch (op_bits(op)) {
484         case 32:
485             return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
486         case 64:
487             return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
488         default:
489             tcg_abort();
490         }
491     } else if (temps_are_copies(x, y)) {
492         return do_constant_folding_cond_eq(c);
493     } else if (temp_is_const(y) && temps[y].val == 0) {
494         switch (c) {
495         case TCG_COND_LTU:
496             return 0;
497         case TCG_COND_GEU:
498             return 1;
499         default:
500             return 2;
501         }
502     } else {
503         return 2;
504     }
505 }
506 
507 /* Return 2 if the condition can't be simplified, and the result
508    of the condition (0 or 1) if it can */
509 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
510 {
511     TCGArg al = p1[0], ah = p1[1];
512     TCGArg bl = p2[0], bh = p2[1];
513 
514     if (temp_is_const(bl) && temp_is_const(bh)) {
515         uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
516 
517         if (temp_is_const(al) && temp_is_const(ah)) {
518             uint64_t a;
519             a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
520             return do_constant_folding_cond_64(a, b, c);
521         }
522         if (b == 0) {
523             switch (c) {
524             case TCG_COND_LTU:
525                 return 0;
526             case TCG_COND_GEU:
527                 return 1;
528             default:
529                 break;
530             }
531         }
532     }
533     if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
534         return do_constant_folding_cond_eq(c);
535     }
536     return 2;
537 }
538 
539 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
540 {
541     TCGArg a1 = *p1, a2 = *p2;
542     int sum = 0;
543     sum += temp_is_const(a1);
544     sum -= temp_is_const(a2);
545 
546     /* Prefer the constant in second argument, and then the form
547        op a, a, b, which is better handled on non-RISC hosts. */
548     if (sum > 0 || (sum == 0 && dest == a2)) {
549         *p1 = a2;
550         *p2 = a1;
551         return true;
552     }
553     return false;
554 }
555 
556 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
557 {
558     int sum = 0;
559     sum += temp_is_const(p1[0]);
560     sum += temp_is_const(p1[1]);
561     sum -= temp_is_const(p2[0]);
562     sum -= temp_is_const(p2[1]);
563     if (sum > 0) {
564         TCGArg t;
565         t = p1[0], p1[0] = p2[0], p2[0] = t;
566         t = p1[1], p1[1] = p2[1], p2[1] = t;
567         return true;
568     }
569     return false;
570 }
571 
572 /* Propagate constants and copies, fold constant expressions. */
573 void tcg_optimize(TCGContext *s)
574 {
575     int oi, oi_next, nb_temps, nb_globals;
576 
577     /* Array VALS has an element for each temp.
578        If this temp holds a constant then its value is kept in VALS' element.
579        If this temp is a copy of other ones then the other copies are
580        available through the doubly linked circular list. */
581 
582     nb_temps = s->nb_temps;
583     nb_globals = s->nb_globals;
584     reset_all_temps(nb_temps);
585 
586     for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
587         tcg_target_ulong mask, partmask, affected;
588         int nb_oargs, nb_iargs, i;
589         TCGArg tmp;
590 
591         TCGOp * const op = &s->gen_op_buf[oi];
592         TCGArg * const args = &s->gen_opparam_buf[op->args];
593         TCGOpcode opc = op->opc;
594         const TCGOpDef *def = &tcg_op_defs[opc];
595 
596         oi_next = op->next;
597 
598         /* Count the arguments, and initialize the temps that are
599            going to be used */
600         if (opc == INDEX_op_call) {
601             nb_oargs = op->callo;
602             nb_iargs = op->calli;
603             for (i = 0; i < nb_oargs + nb_iargs; i++) {
604                 tmp = args[i];
605                 if (tmp != TCG_CALL_DUMMY_ARG) {
606                     init_temp_info(tmp);
607                 }
608             }
609         } else {
610             nb_oargs = def->nb_oargs;
611             nb_iargs = def->nb_iargs;
612             for (i = 0; i < nb_oargs + nb_iargs; i++) {
613                 init_temp_info(args[i]);
614             }
615         }
616 
617         /* Do copy propagation */
618         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
619             if (temp_is_copy(args[i])) {
620                 args[i] = find_better_copy(s, args[i]);
621             }
622         }
623 
624         /* For commutative operations make constant second argument */
625         switch (opc) {
626         CASE_OP_32_64(add):
627         CASE_OP_32_64(mul):
628         CASE_OP_32_64(and):
629         CASE_OP_32_64(or):
630         CASE_OP_32_64(xor):
631         CASE_OP_32_64(eqv):
632         CASE_OP_32_64(nand):
633         CASE_OP_32_64(nor):
634         CASE_OP_32_64(muluh):
635         CASE_OP_32_64(mulsh):
636             swap_commutative(args[0], &args[1], &args[2]);
637             break;
638         CASE_OP_32_64(brcond):
639             if (swap_commutative(-1, &args[0], &args[1])) {
640                 args[2] = tcg_swap_cond(args[2]);
641             }
642             break;
643         CASE_OP_32_64(setcond):
644             if (swap_commutative(args[0], &args[1], &args[2])) {
645                 args[3] = tcg_swap_cond(args[3]);
646             }
647             break;
648         CASE_OP_32_64(movcond):
649             if (swap_commutative(-1, &args[1], &args[2])) {
650                 args[5] = tcg_swap_cond(args[5]);
651             }
652             /* For movcond, we canonicalize the "false" input reg to match
653                the destination reg so that the tcg backend can implement
654                a "move if true" operation.  */
655             if (swap_commutative(args[0], &args[4], &args[3])) {
656                 args[5] = tcg_invert_cond(args[5]);
657             }
658             break;
659         CASE_OP_32_64(add2):
660             swap_commutative(args[0], &args[2], &args[4]);
661             swap_commutative(args[1], &args[3], &args[5]);
662             break;
663         CASE_OP_32_64(mulu2):
664         CASE_OP_32_64(muls2):
665             swap_commutative(args[0], &args[2], &args[3]);
666             break;
667         case INDEX_op_brcond2_i32:
668             if (swap_commutative2(&args[0], &args[2])) {
669                 args[4] = tcg_swap_cond(args[4]);
670             }
671             break;
672         case INDEX_op_setcond2_i32:
673             if (swap_commutative2(&args[1], &args[3])) {
674                 args[5] = tcg_swap_cond(args[5]);
675             }
676             break;
677         default:
678             break;
679         }
680 
681         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
682            and "sub r, 0, a => neg r, a" case.  */
683         switch (opc) {
684         CASE_OP_32_64(shl):
685         CASE_OP_32_64(shr):
686         CASE_OP_32_64(sar):
687         CASE_OP_32_64(rotl):
688         CASE_OP_32_64(rotr):
689             if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
690                 tcg_opt_gen_movi(s, op, args, args[0], 0);
691                 continue;
692             }
693             break;
694         CASE_OP_32_64(sub):
695             {
696                 TCGOpcode neg_op;
697                 bool have_neg;
698 
699                 if (temp_is_const(args[2])) {
700                     /* Proceed with possible constant folding. */
701                     break;
702                 }
703                 if (opc == INDEX_op_sub_i32) {
704                     neg_op = INDEX_op_neg_i32;
705                     have_neg = TCG_TARGET_HAS_neg_i32;
706                 } else {
707                     neg_op = INDEX_op_neg_i64;
708                     have_neg = TCG_TARGET_HAS_neg_i64;
709                 }
710                 if (!have_neg) {
711                     break;
712                 }
713                 if (temp_is_const(args[1]) && temps[args[1]].val == 0) {
714                     op->opc = neg_op;
715                     reset_temp(args[0]);
716                     args[1] = args[2];
717                     continue;
718                 }
719             }
720             break;
721         CASE_OP_32_64(xor):
722         CASE_OP_32_64(nand):
723             if (!temp_is_const(args[1])
724                 && temp_is_const(args[2]) && temps[args[2]].val == -1) {
725                 i = 1;
726                 goto try_not;
727             }
728             break;
729         CASE_OP_32_64(nor):
730             if (!temp_is_const(args[1])
731                 && temp_is_const(args[2]) && temps[args[2]].val == 0) {
732                 i = 1;
733                 goto try_not;
734             }
735             break;
736         CASE_OP_32_64(andc):
737             if (!temp_is_const(args[2])
738                 && temp_is_const(args[1]) && temps[args[1]].val == -1) {
739                 i = 2;
740                 goto try_not;
741             }
742             break;
743         CASE_OP_32_64(orc):
744         CASE_OP_32_64(eqv):
745             if (!temp_is_const(args[2])
746                 && temp_is_const(args[1]) && temps[args[1]].val == 0) {
747                 i = 2;
748                 goto try_not;
749             }
750             break;
751         try_not:
752             {
753                 TCGOpcode not_op;
754                 bool have_not;
755 
756                 if (def->flags & TCG_OPF_64BIT) {
757                     not_op = INDEX_op_not_i64;
758                     have_not = TCG_TARGET_HAS_not_i64;
759                 } else {
760                     not_op = INDEX_op_not_i32;
761                     have_not = TCG_TARGET_HAS_not_i32;
762                 }
763                 if (!have_not) {
764                     break;
765                 }
766                 op->opc = not_op;
767                 reset_temp(args[0]);
768                 args[1] = args[i];
769                 continue;
770             }
771         default:
772             break;
773         }
774 
775         /* Simplify expression for "op r, a, const => mov r, a" cases */
776         switch (opc) {
777         CASE_OP_32_64(add):
778         CASE_OP_32_64(sub):
779         CASE_OP_32_64(shl):
780         CASE_OP_32_64(shr):
781         CASE_OP_32_64(sar):
782         CASE_OP_32_64(rotl):
783         CASE_OP_32_64(rotr):
784         CASE_OP_32_64(or):
785         CASE_OP_32_64(xor):
786         CASE_OP_32_64(andc):
787             if (!temp_is_const(args[1])
788                 && temp_is_const(args[2]) && temps[args[2]].val == 0) {
789                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
790                 continue;
791             }
792             break;
793         CASE_OP_32_64(and):
794         CASE_OP_32_64(orc):
795         CASE_OP_32_64(eqv):
796             if (!temp_is_const(args[1])
797                 && temp_is_const(args[2]) && temps[args[2]].val == -1) {
798                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
799                 continue;
800             }
801             break;
802         default:
803             break;
804         }
805 
806         /* Simplify using known-zero bits. Currently only ops with a single
807            output argument is supported. */
808         mask = -1;
809         affected = -1;
810         switch (opc) {
811         CASE_OP_32_64(ext8s):
812             if ((temps[args[1]].mask & 0x80) != 0) {
813                 break;
814             }
815         CASE_OP_32_64(ext8u):
816             mask = 0xff;
817             goto and_const;
818         CASE_OP_32_64(ext16s):
819             if ((temps[args[1]].mask & 0x8000) != 0) {
820                 break;
821             }
822         CASE_OP_32_64(ext16u):
823             mask = 0xffff;
824             goto and_const;
825         case INDEX_op_ext32s_i64:
826             if ((temps[args[1]].mask & 0x80000000) != 0) {
827                 break;
828             }
829         case INDEX_op_ext32u_i64:
830             mask = 0xffffffffU;
831             goto and_const;
832 
833         CASE_OP_32_64(and):
834             mask = temps[args[2]].mask;
835             if (temp_is_const(args[2])) {
836         and_const:
837                 affected = temps[args[1]].mask & ~mask;
838             }
839             mask = temps[args[1]].mask & mask;
840             break;
841 
842         case INDEX_op_ext_i32_i64:
843             if ((temps[args[1]].mask & 0x80000000) != 0) {
844                 break;
845             }
846         case INDEX_op_extu_i32_i64:
847             /* We do not compute affected as it is a size changing op.  */
848             mask = (uint32_t)temps[args[1]].mask;
849             break;
850 
851         CASE_OP_32_64(andc):
852             /* Known-zeros does not imply known-ones.  Therefore unless
853                args[2] is constant, we can't infer anything from it.  */
854             if (temp_is_const(args[2])) {
855                 mask = ~temps[args[2]].mask;
856                 goto and_const;
857             }
858             /* But we certainly know nothing outside args[1] may be set. */
859             mask = temps[args[1]].mask;
860             break;
861 
862         case INDEX_op_sar_i32:
863             if (temp_is_const(args[2])) {
864                 tmp = temps[args[2]].val & 31;
865                 mask = (int32_t)temps[args[1]].mask >> tmp;
866             }
867             break;
868         case INDEX_op_sar_i64:
869             if (temp_is_const(args[2])) {
870                 tmp = temps[args[2]].val & 63;
871                 mask = (int64_t)temps[args[1]].mask >> tmp;
872             }
873             break;
874 
875         case INDEX_op_shr_i32:
876             if (temp_is_const(args[2])) {
877                 tmp = temps[args[2]].val & 31;
878                 mask = (uint32_t)temps[args[1]].mask >> tmp;
879             }
880             break;
881         case INDEX_op_shr_i64:
882             if (temp_is_const(args[2])) {
883                 tmp = temps[args[2]].val & 63;
884                 mask = (uint64_t)temps[args[1]].mask >> tmp;
885             }
886             break;
887 
888         case INDEX_op_extrl_i64_i32:
889             mask = (uint32_t)temps[args[1]].mask;
890             break;
891         case INDEX_op_extrh_i64_i32:
892             mask = (uint64_t)temps[args[1]].mask >> 32;
893             break;
894 
895         CASE_OP_32_64(shl):
896             if (temp_is_const(args[2])) {
897                 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
898                 mask = temps[args[1]].mask << tmp;
899             }
900             break;
901 
902         CASE_OP_32_64(neg):
903             /* Set to 1 all bits to the left of the rightmost.  */
904             mask = -(temps[args[1]].mask & -temps[args[1]].mask);
905             break;
906 
907         CASE_OP_32_64(deposit):
908             mask = deposit64(temps[args[1]].mask, args[3], args[4],
909                              temps[args[2]].mask);
910             break;
911 
912         CASE_OP_32_64(or):
913         CASE_OP_32_64(xor):
914             mask = temps[args[1]].mask | temps[args[2]].mask;
915             break;
916 
917         CASE_OP_32_64(setcond):
918         case INDEX_op_setcond2_i32:
919             mask = 1;
920             break;
921 
922         CASE_OP_32_64(movcond):
923             mask = temps[args[3]].mask | temps[args[4]].mask;
924             break;
925 
926         CASE_OP_32_64(ld8u):
927             mask = 0xff;
928             break;
929         CASE_OP_32_64(ld16u):
930             mask = 0xffff;
931             break;
932         case INDEX_op_ld32u_i64:
933             mask = 0xffffffffu;
934             break;
935 
936         CASE_OP_32_64(qemu_ld):
937             {
938                 TCGMemOpIdx oi = args[nb_oargs + nb_iargs];
939                 TCGMemOp mop = get_memop(oi);
940                 if (!(mop & MO_SIGN)) {
941                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
942                 }
943             }
944             break;
945 
946         default:
947             break;
948         }
949 
950         /* 32-bit ops generate 32-bit results.  For the result is zero test
951            below, we can ignore high bits, but for further optimizations we
952            need to record that the high bits contain garbage.  */
953         partmask = mask;
954         if (!(def->flags & TCG_OPF_64BIT)) {
955             mask |= ~(tcg_target_ulong)0xffffffffu;
956             partmask &= 0xffffffffu;
957             affected &= 0xffffffffu;
958         }
959 
960         if (partmask == 0) {
961             tcg_debug_assert(nb_oargs == 1);
962             tcg_opt_gen_movi(s, op, args, args[0], 0);
963             continue;
964         }
965         if (affected == 0) {
966             tcg_debug_assert(nb_oargs == 1);
967             tcg_opt_gen_mov(s, op, args, args[0], args[1]);
968             continue;
969         }
970 
971         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
972         switch (opc) {
973         CASE_OP_32_64(and):
974         CASE_OP_32_64(mul):
975         CASE_OP_32_64(muluh):
976         CASE_OP_32_64(mulsh):
977             if ((temp_is_const(args[2]) && temps[args[2]].val == 0)) {
978                 tcg_opt_gen_movi(s, op, args, args[0], 0);
979                 continue;
980             }
981             break;
982         default:
983             break;
984         }
985 
986         /* Simplify expression for "op r, a, a => mov r, a" cases */
987         switch (opc) {
988         CASE_OP_32_64(or):
989         CASE_OP_32_64(and):
990             if (temps_are_copies(args[1], args[2])) {
991                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
992                 continue;
993             }
994             break;
995         default:
996             break;
997         }
998 
999         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1000         switch (opc) {
1001         CASE_OP_32_64(andc):
1002         CASE_OP_32_64(sub):
1003         CASE_OP_32_64(xor):
1004             if (temps_are_copies(args[1], args[2])) {
1005                 tcg_opt_gen_movi(s, op, args, args[0], 0);
1006                 continue;
1007             }
1008             break;
1009         default:
1010             break;
1011         }
1012 
1013         /* Propagate constants through copy operations and do constant
1014            folding.  Constants will be substituted to arguments by register
1015            allocator where needed and possible.  Also detect copies. */
1016         switch (opc) {
1017         CASE_OP_32_64(mov):
1018             tcg_opt_gen_mov(s, op, args, args[0], args[1]);
1019             break;
1020         CASE_OP_32_64(movi):
1021             tcg_opt_gen_movi(s, op, args, args[0], args[1]);
1022             break;
1023 
1024         CASE_OP_32_64(not):
1025         CASE_OP_32_64(neg):
1026         CASE_OP_32_64(ext8s):
1027         CASE_OP_32_64(ext8u):
1028         CASE_OP_32_64(ext16s):
1029         CASE_OP_32_64(ext16u):
1030         case INDEX_op_ext32s_i64:
1031         case INDEX_op_ext32u_i64:
1032         case INDEX_op_ext_i32_i64:
1033         case INDEX_op_extu_i32_i64:
1034         case INDEX_op_extrl_i64_i32:
1035         case INDEX_op_extrh_i64_i32:
1036             if (temp_is_const(args[1])) {
1037                 tmp = do_constant_folding(opc, temps[args[1]].val, 0);
1038                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1039                 break;
1040             }
1041             goto do_default;
1042 
1043         CASE_OP_32_64(add):
1044         CASE_OP_32_64(sub):
1045         CASE_OP_32_64(mul):
1046         CASE_OP_32_64(or):
1047         CASE_OP_32_64(and):
1048         CASE_OP_32_64(xor):
1049         CASE_OP_32_64(shl):
1050         CASE_OP_32_64(shr):
1051         CASE_OP_32_64(sar):
1052         CASE_OP_32_64(rotl):
1053         CASE_OP_32_64(rotr):
1054         CASE_OP_32_64(andc):
1055         CASE_OP_32_64(orc):
1056         CASE_OP_32_64(eqv):
1057         CASE_OP_32_64(nand):
1058         CASE_OP_32_64(nor):
1059         CASE_OP_32_64(muluh):
1060         CASE_OP_32_64(mulsh):
1061         CASE_OP_32_64(div):
1062         CASE_OP_32_64(divu):
1063         CASE_OP_32_64(rem):
1064         CASE_OP_32_64(remu):
1065             if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1066                 tmp = do_constant_folding(opc, temps[args[1]].val,
1067                                           temps[args[2]].val);
1068                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1069                 break;
1070             }
1071             goto do_default;
1072 
1073         CASE_OP_32_64(deposit):
1074             if (temp_is_const(args[1]) && temp_is_const(args[2])) {
1075                 tmp = deposit64(temps[args[1]].val, args[3], args[4],
1076                                 temps[args[2]].val);
1077                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1078                 break;
1079             }
1080             goto do_default;
1081 
1082         CASE_OP_32_64(setcond):
1083             tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
1084             if (tmp != 2) {
1085                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1086                 break;
1087             }
1088             goto do_default;
1089 
1090         CASE_OP_32_64(brcond):
1091             tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
1092             if (tmp != 2) {
1093                 if (tmp) {
1094                     reset_all_temps(nb_temps);
1095                     op->opc = INDEX_op_br;
1096                     args[0] = args[3];
1097                 } else {
1098                     tcg_op_remove(s, op);
1099                 }
1100                 break;
1101             }
1102             goto do_default;
1103 
1104         CASE_OP_32_64(movcond):
1105             tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
1106             if (tmp != 2) {
1107                 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
1108                 break;
1109             }
1110             goto do_default;
1111 
1112         case INDEX_op_add2_i32:
1113         case INDEX_op_sub2_i32:
1114             if (temp_is_const(args[2]) && temp_is_const(args[3])
1115                 && temp_is_const(args[4]) && temp_is_const(args[5])) {
1116                 uint32_t al = temps[args[2]].val;
1117                 uint32_t ah = temps[args[3]].val;
1118                 uint32_t bl = temps[args[4]].val;
1119                 uint32_t bh = temps[args[5]].val;
1120                 uint64_t a = ((uint64_t)ah << 32) | al;
1121                 uint64_t b = ((uint64_t)bh << 32) | bl;
1122                 TCGArg rl, rh;
1123                 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1124                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1125 
1126                 if (opc == INDEX_op_add2_i32) {
1127                     a += b;
1128                 } else {
1129                     a -= b;
1130                 }
1131 
1132                 rl = args[0];
1133                 rh = args[1];
1134                 tcg_opt_gen_movi(s, op, args, rl, (int32_t)a);
1135                 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(a >> 32));
1136 
1137                 /* We've done all we need to do with the movi.  Skip it.  */
1138                 oi_next = op2->next;
1139                 break;
1140             }
1141             goto do_default;
1142 
1143         case INDEX_op_mulu2_i32:
1144             if (temp_is_const(args[2]) && temp_is_const(args[3])) {
1145                 uint32_t a = temps[args[2]].val;
1146                 uint32_t b = temps[args[3]].val;
1147                 uint64_t r = (uint64_t)a * b;
1148                 TCGArg rl, rh;
1149                 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1150                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1151 
1152                 rl = args[0];
1153                 rh = args[1];
1154                 tcg_opt_gen_movi(s, op, args, rl, (int32_t)r);
1155                 tcg_opt_gen_movi(s, op2, args2, rh, (int32_t)(r >> 32));
1156 
1157                 /* We've done all we need to do with the movi.  Skip it.  */
1158                 oi_next = op2->next;
1159                 break;
1160             }
1161             goto do_default;
1162 
1163         case INDEX_op_brcond2_i32:
1164             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1165             if (tmp != 2) {
1166                 if (tmp) {
1167             do_brcond_true:
1168                     reset_all_temps(nb_temps);
1169                     op->opc = INDEX_op_br;
1170                     args[0] = args[5];
1171                 } else {
1172             do_brcond_false:
1173                     tcg_op_remove(s, op);
1174                 }
1175             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1176                        && temp_is_const(args[2]) && temps[args[2]].val == 0
1177                        && temp_is_const(args[3]) && temps[args[3]].val == 0) {
1178                 /* Simplify LT/GE comparisons vs zero to a single compare
1179                    vs the high word of the input.  */
1180             do_brcond_high:
1181                 reset_all_temps(nb_temps);
1182                 op->opc = INDEX_op_brcond_i32;
1183                 args[0] = args[1];
1184                 args[1] = args[3];
1185                 args[2] = args[4];
1186                 args[3] = args[5];
1187             } else if (args[4] == TCG_COND_EQ) {
1188                 /* Simplify EQ comparisons where one of the pairs
1189                    can be simplified.  */
1190                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1191                                                args[0], args[2], TCG_COND_EQ);
1192                 if (tmp == 0) {
1193                     goto do_brcond_false;
1194                 } else if (tmp == 1) {
1195                     goto do_brcond_high;
1196                 }
1197                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1198                                                args[1], args[3], TCG_COND_EQ);
1199                 if (tmp == 0) {
1200                     goto do_brcond_false;
1201                 } else if (tmp != 1) {
1202                     goto do_default;
1203                 }
1204             do_brcond_low:
1205                 reset_all_temps(nb_temps);
1206                 op->opc = INDEX_op_brcond_i32;
1207                 args[1] = args[2];
1208                 args[2] = args[4];
1209                 args[3] = args[5];
1210             } else if (args[4] == TCG_COND_NE) {
1211                 /* Simplify NE comparisons where one of the pairs
1212                    can be simplified.  */
1213                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1214                                                args[0], args[2], TCG_COND_NE);
1215                 if (tmp == 0) {
1216                     goto do_brcond_high;
1217                 } else if (tmp == 1) {
1218                     goto do_brcond_true;
1219                 }
1220                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1221                                                args[1], args[3], TCG_COND_NE);
1222                 if (tmp == 0) {
1223                     goto do_brcond_low;
1224                 } else if (tmp == 1) {
1225                     goto do_brcond_true;
1226                 }
1227                 goto do_default;
1228             } else {
1229                 goto do_default;
1230             }
1231             break;
1232 
1233         case INDEX_op_setcond2_i32:
1234             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1235             if (tmp != 2) {
1236             do_setcond_const:
1237                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1238             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1239                        && temp_is_const(args[3]) && temps[args[3]].val == 0
1240                        && temp_is_const(args[4]) && temps[args[4]].val == 0) {
1241                 /* Simplify LT/GE comparisons vs zero to a single compare
1242                    vs the high word of the input.  */
1243             do_setcond_high:
1244                 reset_temp(args[0]);
1245                 temps[args[0]].mask = 1;
1246                 op->opc = INDEX_op_setcond_i32;
1247                 args[1] = args[2];
1248                 args[2] = args[4];
1249                 args[3] = args[5];
1250             } else if (args[5] == TCG_COND_EQ) {
1251                 /* Simplify EQ comparisons where one of the pairs
1252                    can be simplified.  */
1253                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1254                                                args[1], args[3], TCG_COND_EQ);
1255                 if (tmp == 0) {
1256                     goto do_setcond_const;
1257                 } else if (tmp == 1) {
1258                     goto do_setcond_high;
1259                 }
1260                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1261                                                args[2], args[4], TCG_COND_EQ);
1262                 if (tmp == 0) {
1263                     goto do_setcond_high;
1264                 } else if (tmp != 1) {
1265                     goto do_default;
1266                 }
1267             do_setcond_low:
1268                 reset_temp(args[0]);
1269                 temps[args[0]].mask = 1;
1270                 op->opc = INDEX_op_setcond_i32;
1271                 args[2] = args[3];
1272                 args[3] = args[5];
1273             } else if (args[5] == TCG_COND_NE) {
1274                 /* Simplify NE comparisons where one of the pairs
1275                    can be simplified.  */
1276                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1277                                                args[1], args[3], TCG_COND_NE);
1278                 if (tmp == 0) {
1279                     goto do_setcond_high;
1280                 } else if (tmp == 1) {
1281                     goto do_setcond_const;
1282                 }
1283                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1284                                                args[2], args[4], TCG_COND_NE);
1285                 if (tmp == 0) {
1286                     goto do_setcond_low;
1287                 } else if (tmp == 1) {
1288                     goto do_setcond_const;
1289                 }
1290                 goto do_default;
1291             } else {
1292                 goto do_default;
1293             }
1294             break;
1295 
1296         case INDEX_op_call:
1297             if (!(args[nb_oargs + nb_iargs + 1]
1298                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1299                 for (i = 0; i < nb_globals; i++) {
1300                     if (test_bit(i, temps_used.l)) {
1301                         reset_temp(i);
1302                     }
1303                 }
1304             }
1305             goto do_reset_output;
1306 
1307         default:
1308         do_default:
1309             /* Default case: we know nothing about operation (or were unable
1310                to compute the operation result) so no propagation is done.
1311                We trash everything if the operation is the end of a basic
1312                block, otherwise we only trash the output args.  "mask" is
1313                the non-zero bits mask for the first output arg.  */
1314             if (def->flags & TCG_OPF_BB_END) {
1315                 reset_all_temps(nb_temps);
1316             } else {
1317         do_reset_output:
1318                 for (i = 0; i < nb_oargs; i++) {
1319                     reset_temp(args[i]);
1320                     /* Save the corresponding known-zero bits mask for the
1321                        first output argument (only one supported so far). */
1322                     if (i == 0) {
1323                         temps[args[i]].mask = mask;
1324                     }
1325                 }
1326             }
1327             break;
1328         }
1329     }
1330 }
1331