xref: /qemu/tcg/optimize.c (revision bfa3ab61)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "config.h"
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 
31 #include "qemu-common.h"
32 #include "tcg-op.h"
33 
34 #define CASE_OP_32_64(x)                        \
35         glue(glue(case INDEX_op_, x), _i32):    \
36         glue(glue(case INDEX_op_, x), _i64)
37 
38 typedef enum {
39     TCG_TEMP_UNDEF = 0,
40     TCG_TEMP_CONST,
41     TCG_TEMP_COPY,
42 } tcg_temp_state;
43 
44 struct tcg_temp_info {
45     tcg_temp_state state;
46     uint16_t prev_copy;
47     uint16_t next_copy;
48     tcg_target_ulong val;
49     tcg_target_ulong mask;
50 };
51 
52 static struct tcg_temp_info temps[TCG_MAX_TEMPS];
53 
54 /* Reset TEMP's state to TCG_TEMP_UNDEF.  If TEMP only had one copy, remove
55    the copy flag from the left temp.  */
56 static void reset_temp(TCGArg temp)
57 {
58     if (temps[temp].state == TCG_TEMP_COPY) {
59         if (temps[temp].prev_copy == temps[temp].next_copy) {
60             temps[temps[temp].next_copy].state = TCG_TEMP_UNDEF;
61         } else {
62             temps[temps[temp].next_copy].prev_copy = temps[temp].prev_copy;
63             temps[temps[temp].prev_copy].next_copy = temps[temp].next_copy;
64         }
65     }
66     temps[temp].state = TCG_TEMP_UNDEF;
67     temps[temp].mask = -1;
68 }
69 
70 static TCGOp *insert_op_before(TCGContext *s, TCGOp *old_op,
71                                 TCGOpcode opc, int nargs)
72 {
73     int oi = s->gen_next_op_idx;
74     int pi = s->gen_next_parm_idx;
75     int prev = old_op->prev;
76     int next = old_op - s->gen_op_buf;
77     TCGOp *new_op;
78 
79     tcg_debug_assert(oi < OPC_BUF_SIZE);
80     tcg_debug_assert(pi + nargs <= OPPARAM_BUF_SIZE);
81     s->gen_next_op_idx = oi + 1;
82     s->gen_next_parm_idx = pi + nargs;
83 
84     new_op = &s->gen_op_buf[oi];
85     *new_op = (TCGOp){
86         .opc = opc,
87         .args = pi,
88         .prev = prev,
89         .next = next
90     };
91     if (prev >= 0) {
92         s->gen_op_buf[prev].next = oi;
93     } else {
94         s->gen_first_op_idx = oi;
95     }
96     old_op->prev = oi;
97 
98     return new_op;
99 }
100 
101 /* Reset all temporaries, given that there are NB_TEMPS of them.  */
102 static void reset_all_temps(int nb_temps)
103 {
104     int i;
105     for (i = 0; i < nb_temps; i++) {
106         temps[i].state = TCG_TEMP_UNDEF;
107         temps[i].mask = -1;
108     }
109 }
110 
111 static int op_bits(TCGOpcode op)
112 {
113     const TCGOpDef *def = &tcg_op_defs[op];
114     return def->flags & TCG_OPF_64BIT ? 64 : 32;
115 }
116 
117 static TCGOpcode op_to_mov(TCGOpcode op)
118 {
119     switch (op_bits(op)) {
120     case 32:
121         return INDEX_op_mov_i32;
122     case 64:
123         return INDEX_op_mov_i64;
124     default:
125         fprintf(stderr, "op_to_mov: unexpected return value of "
126                 "function op_bits.\n");
127         tcg_abort();
128     }
129 }
130 
131 static TCGOpcode op_to_movi(TCGOpcode op)
132 {
133     switch (op_bits(op)) {
134     case 32:
135         return INDEX_op_movi_i32;
136     case 64:
137         return INDEX_op_movi_i64;
138     default:
139         fprintf(stderr, "op_to_movi: unexpected return value of "
140                 "function op_bits.\n");
141         tcg_abort();
142     }
143 }
144 
145 static TCGArg find_better_copy(TCGContext *s, TCGArg temp)
146 {
147     TCGArg i;
148 
149     /* If this is already a global, we can't do better. */
150     if (temp < s->nb_globals) {
151         return temp;
152     }
153 
154     /* Search for a global first. */
155     for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
156         if (i < s->nb_globals) {
157             return i;
158         }
159     }
160 
161     /* If it is a temp, search for a temp local. */
162     if (!s->temps[temp].temp_local) {
163         for (i = temps[temp].next_copy ; i != temp ; i = temps[i].next_copy) {
164             if (s->temps[i].temp_local) {
165                 return i;
166             }
167         }
168     }
169 
170     /* Failure to find a better representation, return the same temp. */
171     return temp;
172 }
173 
174 static bool temps_are_copies(TCGArg arg1, TCGArg arg2)
175 {
176     TCGArg i;
177 
178     if (arg1 == arg2) {
179         return true;
180     }
181 
182     if (temps[arg1].state != TCG_TEMP_COPY
183         || temps[arg2].state != TCG_TEMP_COPY) {
184         return false;
185     }
186 
187     for (i = temps[arg1].next_copy ; i != arg1 ; i = temps[i].next_copy) {
188         if (i == arg2) {
189             return true;
190         }
191     }
192 
193     return false;
194 }
195 
196 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg *args,
197                              TCGArg dst, TCGArg val)
198 {
199     TCGOpcode new_op = op_to_movi(op->opc);
200     tcg_target_ulong mask;
201 
202     op->opc = new_op;
203 
204     reset_temp(dst);
205     temps[dst].state = TCG_TEMP_CONST;
206     temps[dst].val = val;
207     mask = val;
208     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
209         /* High bits of the destination are now garbage.  */
210         mask |= ~0xffffffffull;
211     }
212     temps[dst].mask = mask;
213 
214     args[0] = dst;
215     args[1] = val;
216 }
217 
218 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg *args,
219                             TCGArg dst, TCGArg src)
220 {
221     if (temps_are_copies(dst, src)) {
222         tcg_op_remove(s, op);
223         return;
224     }
225 
226     if (temps[src].state == TCG_TEMP_CONST) {
227         tcg_opt_gen_movi(s, op, args, dst, temps[src].val);
228         return;
229     }
230 
231     TCGOpcode new_op = op_to_mov(op->opc);
232     tcg_target_ulong mask;
233 
234     op->opc = new_op;
235 
236     reset_temp(dst);
237     mask = temps[src].mask;
238     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
239         /* High bits of the destination are now garbage.  */
240         mask |= ~0xffffffffull;
241     }
242     temps[dst].mask = mask;
243 
244     assert(temps[src].state != TCG_TEMP_CONST);
245 
246     if (s->temps[src].type == s->temps[dst].type) {
247         if (temps[src].state != TCG_TEMP_COPY) {
248             temps[src].state = TCG_TEMP_COPY;
249             temps[src].next_copy = src;
250             temps[src].prev_copy = src;
251         }
252         temps[dst].state = TCG_TEMP_COPY;
253         temps[dst].next_copy = temps[src].next_copy;
254         temps[dst].prev_copy = src;
255         temps[temps[dst].next_copy].prev_copy = dst;
256         temps[src].next_copy = dst;
257     }
258 
259     args[0] = dst;
260     args[1] = src;
261 }
262 
263 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
264 {
265     uint64_t l64, h64;
266 
267     switch (op) {
268     CASE_OP_32_64(add):
269         return x + y;
270 
271     CASE_OP_32_64(sub):
272         return x - y;
273 
274     CASE_OP_32_64(mul):
275         return x * y;
276 
277     CASE_OP_32_64(and):
278         return x & y;
279 
280     CASE_OP_32_64(or):
281         return x | y;
282 
283     CASE_OP_32_64(xor):
284         return x ^ y;
285 
286     case INDEX_op_shl_i32:
287         return (uint32_t)x << (y & 31);
288 
289     case INDEX_op_shl_i64:
290         return (uint64_t)x << (y & 63);
291 
292     case INDEX_op_shr_i32:
293         return (uint32_t)x >> (y & 31);
294 
295     case INDEX_op_trunc_shr_i32:
296     case INDEX_op_shr_i64:
297         return (uint64_t)x >> (y & 63);
298 
299     case INDEX_op_sar_i32:
300         return (int32_t)x >> (y & 31);
301 
302     case INDEX_op_sar_i64:
303         return (int64_t)x >> (y & 63);
304 
305     case INDEX_op_rotr_i32:
306         return ror32(x, y & 31);
307 
308     case INDEX_op_rotr_i64:
309         return ror64(x, y & 63);
310 
311     case INDEX_op_rotl_i32:
312         return rol32(x, y & 31);
313 
314     case INDEX_op_rotl_i64:
315         return rol64(x, y & 63);
316 
317     CASE_OP_32_64(not):
318         return ~x;
319 
320     CASE_OP_32_64(neg):
321         return -x;
322 
323     CASE_OP_32_64(andc):
324         return x & ~y;
325 
326     CASE_OP_32_64(orc):
327         return x | ~y;
328 
329     CASE_OP_32_64(eqv):
330         return ~(x ^ y);
331 
332     CASE_OP_32_64(nand):
333         return ~(x & y);
334 
335     CASE_OP_32_64(nor):
336         return ~(x | y);
337 
338     CASE_OP_32_64(ext8s):
339         return (int8_t)x;
340 
341     CASE_OP_32_64(ext16s):
342         return (int16_t)x;
343 
344     CASE_OP_32_64(ext8u):
345         return (uint8_t)x;
346 
347     CASE_OP_32_64(ext16u):
348         return (uint16_t)x;
349 
350     case INDEX_op_ext32s_i64:
351         return (int32_t)x;
352 
353     case INDEX_op_ext32u_i64:
354         return (uint32_t)x;
355 
356     case INDEX_op_muluh_i32:
357         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
358     case INDEX_op_mulsh_i32:
359         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
360 
361     case INDEX_op_muluh_i64:
362         mulu64(&l64, &h64, x, y);
363         return h64;
364     case INDEX_op_mulsh_i64:
365         muls64(&l64, &h64, x, y);
366         return h64;
367 
368     case INDEX_op_div_i32:
369         /* Avoid crashing on divide by zero, otherwise undefined.  */
370         return (int32_t)x / ((int32_t)y ? : 1);
371     case INDEX_op_divu_i32:
372         return (uint32_t)x / ((uint32_t)y ? : 1);
373     case INDEX_op_div_i64:
374         return (int64_t)x / ((int64_t)y ? : 1);
375     case INDEX_op_divu_i64:
376         return (uint64_t)x / ((uint64_t)y ? : 1);
377 
378     case INDEX_op_rem_i32:
379         return (int32_t)x % ((int32_t)y ? : 1);
380     case INDEX_op_remu_i32:
381         return (uint32_t)x % ((uint32_t)y ? : 1);
382     case INDEX_op_rem_i64:
383         return (int64_t)x % ((int64_t)y ? : 1);
384     case INDEX_op_remu_i64:
385         return (uint64_t)x % ((uint64_t)y ? : 1);
386 
387     default:
388         fprintf(stderr,
389                 "Unrecognized operation %d in do_constant_folding.\n", op);
390         tcg_abort();
391     }
392 }
393 
394 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
395 {
396     TCGArg res = do_constant_folding_2(op, x, y);
397     if (op_bits(op) == 32) {
398         res &= 0xffffffff;
399     }
400     return res;
401 }
402 
403 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
404 {
405     switch (c) {
406     case TCG_COND_EQ:
407         return x == y;
408     case TCG_COND_NE:
409         return x != y;
410     case TCG_COND_LT:
411         return (int32_t)x < (int32_t)y;
412     case TCG_COND_GE:
413         return (int32_t)x >= (int32_t)y;
414     case TCG_COND_LE:
415         return (int32_t)x <= (int32_t)y;
416     case TCG_COND_GT:
417         return (int32_t)x > (int32_t)y;
418     case TCG_COND_LTU:
419         return x < y;
420     case TCG_COND_GEU:
421         return x >= y;
422     case TCG_COND_LEU:
423         return x <= y;
424     case TCG_COND_GTU:
425         return x > y;
426     default:
427         tcg_abort();
428     }
429 }
430 
431 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
432 {
433     switch (c) {
434     case TCG_COND_EQ:
435         return x == y;
436     case TCG_COND_NE:
437         return x != y;
438     case TCG_COND_LT:
439         return (int64_t)x < (int64_t)y;
440     case TCG_COND_GE:
441         return (int64_t)x >= (int64_t)y;
442     case TCG_COND_LE:
443         return (int64_t)x <= (int64_t)y;
444     case TCG_COND_GT:
445         return (int64_t)x > (int64_t)y;
446     case TCG_COND_LTU:
447         return x < y;
448     case TCG_COND_GEU:
449         return x >= y;
450     case TCG_COND_LEU:
451         return x <= y;
452     case TCG_COND_GTU:
453         return x > y;
454     default:
455         tcg_abort();
456     }
457 }
458 
459 static bool do_constant_folding_cond_eq(TCGCond c)
460 {
461     switch (c) {
462     case TCG_COND_GT:
463     case TCG_COND_LTU:
464     case TCG_COND_LT:
465     case TCG_COND_GTU:
466     case TCG_COND_NE:
467         return 0;
468     case TCG_COND_GE:
469     case TCG_COND_GEU:
470     case TCG_COND_LE:
471     case TCG_COND_LEU:
472     case TCG_COND_EQ:
473         return 1;
474     default:
475         tcg_abort();
476     }
477 }
478 
479 /* Return 2 if the condition can't be simplified, and the result
480    of the condition (0 or 1) if it can */
481 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
482                                        TCGArg y, TCGCond c)
483 {
484     if (temps[x].state == TCG_TEMP_CONST && temps[y].state == TCG_TEMP_CONST) {
485         switch (op_bits(op)) {
486         case 32:
487             return do_constant_folding_cond_32(temps[x].val, temps[y].val, c);
488         case 64:
489             return do_constant_folding_cond_64(temps[x].val, temps[y].val, c);
490         default:
491             tcg_abort();
492         }
493     } else if (temps_are_copies(x, y)) {
494         return do_constant_folding_cond_eq(c);
495     } else if (temps[y].state == TCG_TEMP_CONST && temps[y].val == 0) {
496         switch (c) {
497         case TCG_COND_LTU:
498             return 0;
499         case TCG_COND_GEU:
500             return 1;
501         default:
502             return 2;
503         }
504     } else {
505         return 2;
506     }
507 }
508 
509 /* Return 2 if the condition can't be simplified, and the result
510    of the condition (0 or 1) if it can */
511 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
512 {
513     TCGArg al = p1[0], ah = p1[1];
514     TCGArg bl = p2[0], bh = p2[1];
515 
516     if (temps[bl].state == TCG_TEMP_CONST
517         && temps[bh].state == TCG_TEMP_CONST) {
518         uint64_t b = ((uint64_t)temps[bh].val << 32) | (uint32_t)temps[bl].val;
519 
520         if (temps[al].state == TCG_TEMP_CONST
521             && temps[ah].state == TCG_TEMP_CONST) {
522             uint64_t a;
523             a = ((uint64_t)temps[ah].val << 32) | (uint32_t)temps[al].val;
524             return do_constant_folding_cond_64(a, b, c);
525         }
526         if (b == 0) {
527             switch (c) {
528             case TCG_COND_LTU:
529                 return 0;
530             case TCG_COND_GEU:
531                 return 1;
532             default:
533                 break;
534             }
535         }
536     }
537     if (temps_are_copies(al, bl) && temps_are_copies(ah, bh)) {
538         return do_constant_folding_cond_eq(c);
539     }
540     return 2;
541 }
542 
543 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
544 {
545     TCGArg a1 = *p1, a2 = *p2;
546     int sum = 0;
547     sum += temps[a1].state == TCG_TEMP_CONST;
548     sum -= temps[a2].state == TCG_TEMP_CONST;
549 
550     /* Prefer the constant in second argument, and then the form
551        op a, a, b, which is better handled on non-RISC hosts. */
552     if (sum > 0 || (sum == 0 && dest == a2)) {
553         *p1 = a2;
554         *p2 = a1;
555         return true;
556     }
557     return false;
558 }
559 
560 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
561 {
562     int sum = 0;
563     sum += temps[p1[0]].state == TCG_TEMP_CONST;
564     sum += temps[p1[1]].state == TCG_TEMP_CONST;
565     sum -= temps[p2[0]].state == TCG_TEMP_CONST;
566     sum -= temps[p2[1]].state == TCG_TEMP_CONST;
567     if (sum > 0) {
568         TCGArg t;
569         t = p1[0], p1[0] = p2[0], p2[0] = t;
570         t = p1[1], p1[1] = p2[1], p2[1] = t;
571         return true;
572     }
573     return false;
574 }
575 
576 /* Propagate constants and copies, fold constant expressions. */
577 void tcg_optimize(TCGContext *s)
578 {
579     int oi, oi_next, nb_temps, nb_globals;
580 
581     /* Array VALS has an element for each temp.
582        If this temp holds a constant then its value is kept in VALS' element.
583        If this temp is a copy of other ones then the other copies are
584        available through the doubly linked circular list. */
585 
586     nb_temps = s->nb_temps;
587     nb_globals = s->nb_globals;
588     reset_all_temps(nb_temps);
589 
590     for (oi = s->gen_first_op_idx; oi >= 0; oi = oi_next) {
591         tcg_target_ulong mask, partmask, affected;
592         int nb_oargs, nb_iargs, i;
593         TCGArg tmp;
594 
595         TCGOp * const op = &s->gen_op_buf[oi];
596         TCGArg * const args = &s->gen_opparam_buf[op->args];
597         TCGOpcode opc = op->opc;
598         const TCGOpDef *def = &tcg_op_defs[opc];
599 
600         oi_next = op->next;
601         if (opc == INDEX_op_call) {
602             nb_oargs = op->callo;
603             nb_iargs = op->calli;
604         } else {
605             nb_oargs = def->nb_oargs;
606             nb_iargs = def->nb_iargs;
607         }
608 
609         /* Do copy propagation */
610         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
611             if (temps[args[i]].state == TCG_TEMP_COPY) {
612                 args[i] = find_better_copy(s, args[i]);
613             }
614         }
615 
616         /* For commutative operations make constant second argument */
617         switch (opc) {
618         CASE_OP_32_64(add):
619         CASE_OP_32_64(mul):
620         CASE_OP_32_64(and):
621         CASE_OP_32_64(or):
622         CASE_OP_32_64(xor):
623         CASE_OP_32_64(eqv):
624         CASE_OP_32_64(nand):
625         CASE_OP_32_64(nor):
626         CASE_OP_32_64(muluh):
627         CASE_OP_32_64(mulsh):
628             swap_commutative(args[0], &args[1], &args[2]);
629             break;
630         CASE_OP_32_64(brcond):
631             if (swap_commutative(-1, &args[0], &args[1])) {
632                 args[2] = tcg_swap_cond(args[2]);
633             }
634             break;
635         CASE_OP_32_64(setcond):
636             if (swap_commutative(args[0], &args[1], &args[2])) {
637                 args[3] = tcg_swap_cond(args[3]);
638             }
639             break;
640         CASE_OP_32_64(movcond):
641             if (swap_commutative(-1, &args[1], &args[2])) {
642                 args[5] = tcg_swap_cond(args[5]);
643             }
644             /* For movcond, we canonicalize the "false" input reg to match
645                the destination reg so that the tcg backend can implement
646                a "move if true" operation.  */
647             if (swap_commutative(args[0], &args[4], &args[3])) {
648                 args[5] = tcg_invert_cond(args[5]);
649             }
650             break;
651         CASE_OP_32_64(add2):
652             swap_commutative(args[0], &args[2], &args[4]);
653             swap_commutative(args[1], &args[3], &args[5]);
654             break;
655         CASE_OP_32_64(mulu2):
656         CASE_OP_32_64(muls2):
657             swap_commutative(args[0], &args[2], &args[3]);
658             break;
659         case INDEX_op_brcond2_i32:
660             if (swap_commutative2(&args[0], &args[2])) {
661                 args[4] = tcg_swap_cond(args[4]);
662             }
663             break;
664         case INDEX_op_setcond2_i32:
665             if (swap_commutative2(&args[1], &args[3])) {
666                 args[5] = tcg_swap_cond(args[5]);
667             }
668             break;
669         default:
670             break;
671         }
672 
673         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
674            and "sub r, 0, a => neg r, a" case.  */
675         switch (opc) {
676         CASE_OP_32_64(shl):
677         CASE_OP_32_64(shr):
678         CASE_OP_32_64(sar):
679         CASE_OP_32_64(rotl):
680         CASE_OP_32_64(rotr):
681             if (temps[args[1]].state == TCG_TEMP_CONST
682                 && temps[args[1]].val == 0) {
683                 tcg_opt_gen_movi(s, op, args, args[0], 0);
684                 continue;
685             }
686             break;
687         CASE_OP_32_64(sub):
688             {
689                 TCGOpcode neg_op;
690                 bool have_neg;
691 
692                 if (temps[args[2]].state == TCG_TEMP_CONST) {
693                     /* Proceed with possible constant folding. */
694                     break;
695                 }
696                 if (opc == INDEX_op_sub_i32) {
697                     neg_op = INDEX_op_neg_i32;
698                     have_neg = TCG_TARGET_HAS_neg_i32;
699                 } else {
700                     neg_op = INDEX_op_neg_i64;
701                     have_neg = TCG_TARGET_HAS_neg_i64;
702                 }
703                 if (!have_neg) {
704                     break;
705                 }
706                 if (temps[args[1]].state == TCG_TEMP_CONST
707                     && temps[args[1]].val == 0) {
708                     op->opc = neg_op;
709                     reset_temp(args[0]);
710                     args[1] = args[2];
711                     continue;
712                 }
713             }
714             break;
715         CASE_OP_32_64(xor):
716         CASE_OP_32_64(nand):
717             if (temps[args[1]].state != TCG_TEMP_CONST
718                 && temps[args[2]].state == TCG_TEMP_CONST
719                 && temps[args[2]].val == -1) {
720                 i = 1;
721                 goto try_not;
722             }
723             break;
724         CASE_OP_32_64(nor):
725             if (temps[args[1]].state != TCG_TEMP_CONST
726                 && temps[args[2]].state == TCG_TEMP_CONST
727                 && temps[args[2]].val == 0) {
728                 i = 1;
729                 goto try_not;
730             }
731             break;
732         CASE_OP_32_64(andc):
733             if (temps[args[2]].state != TCG_TEMP_CONST
734                 && temps[args[1]].state == TCG_TEMP_CONST
735                 && temps[args[1]].val == -1) {
736                 i = 2;
737                 goto try_not;
738             }
739             break;
740         CASE_OP_32_64(orc):
741         CASE_OP_32_64(eqv):
742             if (temps[args[2]].state != TCG_TEMP_CONST
743                 && temps[args[1]].state == TCG_TEMP_CONST
744                 && temps[args[1]].val == 0) {
745                 i = 2;
746                 goto try_not;
747             }
748             break;
749         try_not:
750             {
751                 TCGOpcode not_op;
752                 bool have_not;
753 
754                 if (def->flags & TCG_OPF_64BIT) {
755                     not_op = INDEX_op_not_i64;
756                     have_not = TCG_TARGET_HAS_not_i64;
757                 } else {
758                     not_op = INDEX_op_not_i32;
759                     have_not = TCG_TARGET_HAS_not_i32;
760                 }
761                 if (!have_not) {
762                     break;
763                 }
764                 op->opc = not_op;
765                 reset_temp(args[0]);
766                 args[1] = args[i];
767                 continue;
768             }
769         default:
770             break;
771         }
772 
773         /* Simplify expression for "op r, a, const => mov r, a" cases */
774         switch (opc) {
775         CASE_OP_32_64(add):
776         CASE_OP_32_64(sub):
777         CASE_OP_32_64(shl):
778         CASE_OP_32_64(shr):
779         CASE_OP_32_64(sar):
780         CASE_OP_32_64(rotl):
781         CASE_OP_32_64(rotr):
782         CASE_OP_32_64(or):
783         CASE_OP_32_64(xor):
784         CASE_OP_32_64(andc):
785             if (temps[args[1]].state != TCG_TEMP_CONST
786                 && temps[args[2]].state == TCG_TEMP_CONST
787                 && temps[args[2]].val == 0) {
788                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
789                 continue;
790             }
791             break;
792         CASE_OP_32_64(and):
793         CASE_OP_32_64(orc):
794         CASE_OP_32_64(eqv):
795             if (temps[args[1]].state != TCG_TEMP_CONST
796                 && temps[args[2]].state == TCG_TEMP_CONST
797                 && temps[args[2]].val == -1) {
798                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
799                 continue;
800             }
801             break;
802         default:
803             break;
804         }
805 
806         /* Simplify using known-zero bits. Currently only ops with a single
807            output argument is supported. */
808         mask = -1;
809         affected = -1;
810         switch (opc) {
811         CASE_OP_32_64(ext8s):
812             if ((temps[args[1]].mask & 0x80) != 0) {
813                 break;
814             }
815         CASE_OP_32_64(ext8u):
816             mask = 0xff;
817             goto and_const;
818         CASE_OP_32_64(ext16s):
819             if ((temps[args[1]].mask & 0x8000) != 0) {
820                 break;
821             }
822         CASE_OP_32_64(ext16u):
823             mask = 0xffff;
824             goto and_const;
825         case INDEX_op_ext32s_i64:
826             if ((temps[args[1]].mask & 0x80000000) != 0) {
827                 break;
828             }
829         case INDEX_op_ext32u_i64:
830             mask = 0xffffffffU;
831             goto and_const;
832 
833         CASE_OP_32_64(and):
834             mask = temps[args[2]].mask;
835             if (temps[args[2]].state == TCG_TEMP_CONST) {
836         and_const:
837                 affected = temps[args[1]].mask & ~mask;
838             }
839             mask = temps[args[1]].mask & mask;
840             break;
841 
842         CASE_OP_32_64(andc):
843             /* Known-zeros does not imply known-ones.  Therefore unless
844                args[2] is constant, we can't infer anything from it.  */
845             if (temps[args[2]].state == TCG_TEMP_CONST) {
846                 mask = ~temps[args[2]].mask;
847                 goto and_const;
848             }
849             /* But we certainly know nothing outside args[1] may be set. */
850             mask = temps[args[1]].mask;
851             break;
852 
853         case INDEX_op_sar_i32:
854             if (temps[args[2]].state == TCG_TEMP_CONST) {
855                 tmp = temps[args[2]].val & 31;
856                 mask = (int32_t)temps[args[1]].mask >> tmp;
857             }
858             break;
859         case INDEX_op_sar_i64:
860             if (temps[args[2]].state == TCG_TEMP_CONST) {
861                 tmp = temps[args[2]].val & 63;
862                 mask = (int64_t)temps[args[1]].mask >> tmp;
863             }
864             break;
865 
866         case INDEX_op_shr_i32:
867             if (temps[args[2]].state == TCG_TEMP_CONST) {
868                 tmp = temps[args[2]].val & 31;
869                 mask = (uint32_t)temps[args[1]].mask >> tmp;
870             }
871             break;
872         case INDEX_op_shr_i64:
873             if (temps[args[2]].state == TCG_TEMP_CONST) {
874                 tmp = temps[args[2]].val & 63;
875                 mask = (uint64_t)temps[args[1]].mask >> tmp;
876             }
877             break;
878 
879         case INDEX_op_trunc_shr_i32:
880             mask = (uint64_t)temps[args[1]].mask >> args[2];
881             break;
882 
883         CASE_OP_32_64(shl):
884             if (temps[args[2]].state == TCG_TEMP_CONST) {
885                 tmp = temps[args[2]].val & (TCG_TARGET_REG_BITS - 1);
886                 mask = temps[args[1]].mask << tmp;
887             }
888             break;
889 
890         CASE_OP_32_64(neg):
891             /* Set to 1 all bits to the left of the rightmost.  */
892             mask = -(temps[args[1]].mask & -temps[args[1]].mask);
893             break;
894 
895         CASE_OP_32_64(deposit):
896             mask = deposit64(temps[args[1]].mask, args[3], args[4],
897                              temps[args[2]].mask);
898             break;
899 
900         CASE_OP_32_64(or):
901         CASE_OP_32_64(xor):
902             mask = temps[args[1]].mask | temps[args[2]].mask;
903             break;
904 
905         CASE_OP_32_64(setcond):
906         case INDEX_op_setcond2_i32:
907             mask = 1;
908             break;
909 
910         CASE_OP_32_64(movcond):
911             mask = temps[args[3]].mask | temps[args[4]].mask;
912             break;
913 
914         CASE_OP_32_64(ld8u):
915             mask = 0xff;
916             break;
917         CASE_OP_32_64(ld16u):
918             mask = 0xffff;
919             break;
920         case INDEX_op_ld32u_i64:
921             mask = 0xffffffffu;
922             break;
923 
924         CASE_OP_32_64(qemu_ld):
925             {
926                 TCGMemOpIdx oi = args[nb_oargs + nb_iargs];
927                 TCGMemOp mop = get_memop(oi);
928                 if (!(mop & MO_SIGN)) {
929                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
930                 }
931             }
932             break;
933 
934         default:
935             break;
936         }
937 
938         /* 32-bit ops generate 32-bit results.  For the result is zero test
939            below, we can ignore high bits, but for further optimizations we
940            need to record that the high bits contain garbage.  */
941         partmask = mask;
942         if (!(def->flags & TCG_OPF_64BIT)) {
943             mask |= ~(tcg_target_ulong)0xffffffffu;
944             partmask &= 0xffffffffu;
945             affected &= 0xffffffffu;
946         }
947 
948         if (partmask == 0) {
949             assert(nb_oargs == 1);
950             tcg_opt_gen_movi(s, op, args, args[0], 0);
951             continue;
952         }
953         if (affected == 0) {
954             assert(nb_oargs == 1);
955             tcg_opt_gen_mov(s, op, args, args[0], args[1]);
956             continue;
957         }
958 
959         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
960         switch (opc) {
961         CASE_OP_32_64(and):
962         CASE_OP_32_64(mul):
963         CASE_OP_32_64(muluh):
964         CASE_OP_32_64(mulsh):
965             if ((temps[args[2]].state == TCG_TEMP_CONST
966                 && temps[args[2]].val == 0)) {
967                 tcg_opt_gen_movi(s, op, args, args[0], 0);
968                 continue;
969             }
970             break;
971         default:
972             break;
973         }
974 
975         /* Simplify expression for "op r, a, a => mov r, a" cases */
976         switch (opc) {
977         CASE_OP_32_64(or):
978         CASE_OP_32_64(and):
979             if (temps_are_copies(args[1], args[2])) {
980                 tcg_opt_gen_mov(s, op, args, args[0], args[1]);
981                 continue;
982             }
983             break;
984         default:
985             break;
986         }
987 
988         /* Simplify expression for "op r, a, a => movi r, 0" cases */
989         switch (opc) {
990         CASE_OP_32_64(andc):
991         CASE_OP_32_64(sub):
992         CASE_OP_32_64(xor):
993             if (temps_are_copies(args[1], args[2])) {
994                 tcg_opt_gen_movi(s, op, args, args[0], 0);
995                 continue;
996             }
997             break;
998         default:
999             break;
1000         }
1001 
1002         /* Propagate constants through copy operations and do constant
1003            folding.  Constants will be substituted to arguments by register
1004            allocator where needed and possible.  Also detect copies. */
1005         switch (opc) {
1006         CASE_OP_32_64(mov):
1007             tcg_opt_gen_mov(s, op, args, args[0], args[1]);
1008             break;
1009         CASE_OP_32_64(movi):
1010             tcg_opt_gen_movi(s, op, args, args[0], args[1]);
1011             break;
1012 
1013         CASE_OP_32_64(not):
1014         CASE_OP_32_64(neg):
1015         CASE_OP_32_64(ext8s):
1016         CASE_OP_32_64(ext8u):
1017         CASE_OP_32_64(ext16s):
1018         CASE_OP_32_64(ext16u):
1019         case INDEX_op_ext32s_i64:
1020         case INDEX_op_ext32u_i64:
1021             if (temps[args[1]].state == TCG_TEMP_CONST) {
1022                 tmp = do_constant_folding(opc, temps[args[1]].val, 0);
1023                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1024                 break;
1025             }
1026             goto do_default;
1027 
1028         case INDEX_op_trunc_shr_i32:
1029             if (temps[args[1]].state == TCG_TEMP_CONST) {
1030                 tmp = do_constant_folding(opc, temps[args[1]].val, args[2]);
1031                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1032                 break;
1033             }
1034             goto do_default;
1035 
1036         CASE_OP_32_64(add):
1037         CASE_OP_32_64(sub):
1038         CASE_OP_32_64(mul):
1039         CASE_OP_32_64(or):
1040         CASE_OP_32_64(and):
1041         CASE_OP_32_64(xor):
1042         CASE_OP_32_64(shl):
1043         CASE_OP_32_64(shr):
1044         CASE_OP_32_64(sar):
1045         CASE_OP_32_64(rotl):
1046         CASE_OP_32_64(rotr):
1047         CASE_OP_32_64(andc):
1048         CASE_OP_32_64(orc):
1049         CASE_OP_32_64(eqv):
1050         CASE_OP_32_64(nand):
1051         CASE_OP_32_64(nor):
1052         CASE_OP_32_64(muluh):
1053         CASE_OP_32_64(mulsh):
1054         CASE_OP_32_64(div):
1055         CASE_OP_32_64(divu):
1056         CASE_OP_32_64(rem):
1057         CASE_OP_32_64(remu):
1058             if (temps[args[1]].state == TCG_TEMP_CONST
1059                 && temps[args[2]].state == TCG_TEMP_CONST) {
1060                 tmp = do_constant_folding(opc, temps[args[1]].val,
1061                                           temps[args[2]].val);
1062                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1063                 break;
1064             }
1065             goto do_default;
1066 
1067         CASE_OP_32_64(deposit):
1068             if (temps[args[1]].state == TCG_TEMP_CONST
1069                 && temps[args[2]].state == TCG_TEMP_CONST) {
1070                 tmp = deposit64(temps[args[1]].val, args[3], args[4],
1071                                 temps[args[2]].val);
1072                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1073                 break;
1074             }
1075             goto do_default;
1076 
1077         CASE_OP_32_64(setcond):
1078             tmp = do_constant_folding_cond(opc, args[1], args[2], args[3]);
1079             if (tmp != 2) {
1080                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1081                 break;
1082             }
1083             goto do_default;
1084 
1085         CASE_OP_32_64(brcond):
1086             tmp = do_constant_folding_cond(opc, args[0], args[1], args[2]);
1087             if (tmp != 2) {
1088                 if (tmp) {
1089                     reset_all_temps(nb_temps);
1090                     op->opc = INDEX_op_br;
1091                     args[0] = args[3];
1092                 } else {
1093                     tcg_op_remove(s, op);
1094                 }
1095                 break;
1096             }
1097             goto do_default;
1098 
1099         CASE_OP_32_64(movcond):
1100             tmp = do_constant_folding_cond(opc, args[1], args[2], args[5]);
1101             if (tmp != 2) {
1102                 tcg_opt_gen_mov(s, op, args, args[0], args[4-tmp]);
1103                 break;
1104             }
1105             goto do_default;
1106 
1107         case INDEX_op_add2_i32:
1108         case INDEX_op_sub2_i32:
1109             if (temps[args[2]].state == TCG_TEMP_CONST
1110                 && temps[args[3]].state == TCG_TEMP_CONST
1111                 && temps[args[4]].state == TCG_TEMP_CONST
1112                 && temps[args[5]].state == TCG_TEMP_CONST) {
1113                 uint32_t al = temps[args[2]].val;
1114                 uint32_t ah = temps[args[3]].val;
1115                 uint32_t bl = temps[args[4]].val;
1116                 uint32_t bh = temps[args[5]].val;
1117                 uint64_t a = ((uint64_t)ah << 32) | al;
1118                 uint64_t b = ((uint64_t)bh << 32) | bl;
1119                 TCGArg rl, rh;
1120                 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1121                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1122 
1123                 if (opc == INDEX_op_add2_i32) {
1124                     a += b;
1125                 } else {
1126                     a -= b;
1127                 }
1128 
1129                 rl = args[0];
1130                 rh = args[1];
1131                 tcg_opt_gen_movi(s, op, args, rl, (uint32_t)a);
1132                 tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(a >> 32));
1133 
1134                 /* We've done all we need to do with the movi.  Skip it.  */
1135                 oi_next = op2->next;
1136                 break;
1137             }
1138             goto do_default;
1139 
1140         case INDEX_op_mulu2_i32:
1141             if (temps[args[2]].state == TCG_TEMP_CONST
1142                 && temps[args[3]].state == TCG_TEMP_CONST) {
1143                 uint32_t a = temps[args[2]].val;
1144                 uint32_t b = temps[args[3]].val;
1145                 uint64_t r = (uint64_t)a * b;
1146                 TCGArg rl, rh;
1147                 TCGOp *op2 = insert_op_before(s, op, INDEX_op_movi_i32, 2);
1148                 TCGArg *args2 = &s->gen_opparam_buf[op2->args];
1149 
1150                 rl = args[0];
1151                 rh = args[1];
1152                 tcg_opt_gen_movi(s, op, args, rl, (uint32_t)r);
1153                 tcg_opt_gen_movi(s, op2, args2, rh, (uint32_t)(r >> 32));
1154 
1155                 /* We've done all we need to do with the movi.  Skip it.  */
1156                 oi_next = op2->next;
1157                 break;
1158             }
1159             goto do_default;
1160 
1161         case INDEX_op_brcond2_i32:
1162             tmp = do_constant_folding_cond2(&args[0], &args[2], args[4]);
1163             if (tmp != 2) {
1164                 if (tmp) {
1165             do_brcond_true:
1166                     reset_all_temps(nb_temps);
1167                     op->opc = INDEX_op_br;
1168                     args[0] = args[5];
1169                 } else {
1170             do_brcond_false:
1171                     tcg_op_remove(s, op);
1172                 }
1173             } else if ((args[4] == TCG_COND_LT || args[4] == TCG_COND_GE)
1174                        && temps[args[2]].state == TCG_TEMP_CONST
1175                        && temps[args[3]].state == TCG_TEMP_CONST
1176                        && temps[args[2]].val == 0
1177                        && temps[args[3]].val == 0) {
1178                 /* Simplify LT/GE comparisons vs zero to a single compare
1179                    vs the high word of the input.  */
1180             do_brcond_high:
1181                 reset_all_temps(nb_temps);
1182                 op->opc = INDEX_op_brcond_i32;
1183                 args[0] = args[1];
1184                 args[1] = args[3];
1185                 args[2] = args[4];
1186                 args[3] = args[5];
1187             } else if (args[4] == TCG_COND_EQ) {
1188                 /* Simplify EQ comparisons where one of the pairs
1189                    can be simplified.  */
1190                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1191                                                args[0], args[2], TCG_COND_EQ);
1192                 if (tmp == 0) {
1193                     goto do_brcond_false;
1194                 } else if (tmp == 1) {
1195                     goto do_brcond_high;
1196                 }
1197                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1198                                                args[1], args[3], TCG_COND_EQ);
1199                 if (tmp == 0) {
1200                     goto do_brcond_false;
1201                 } else if (tmp != 1) {
1202                     goto do_default;
1203                 }
1204             do_brcond_low:
1205                 reset_all_temps(nb_temps);
1206                 op->opc = INDEX_op_brcond_i32;
1207                 args[1] = args[2];
1208                 args[2] = args[4];
1209                 args[3] = args[5];
1210             } else if (args[4] == TCG_COND_NE) {
1211                 /* Simplify NE comparisons where one of the pairs
1212                    can be simplified.  */
1213                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1214                                                args[0], args[2], TCG_COND_NE);
1215                 if (tmp == 0) {
1216                     goto do_brcond_high;
1217                 } else if (tmp == 1) {
1218                     goto do_brcond_true;
1219                 }
1220                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1221                                                args[1], args[3], TCG_COND_NE);
1222                 if (tmp == 0) {
1223                     goto do_brcond_low;
1224                 } else if (tmp == 1) {
1225                     goto do_brcond_true;
1226                 }
1227                 goto do_default;
1228             } else {
1229                 goto do_default;
1230             }
1231             break;
1232 
1233         case INDEX_op_setcond2_i32:
1234             tmp = do_constant_folding_cond2(&args[1], &args[3], args[5]);
1235             if (tmp != 2) {
1236             do_setcond_const:
1237                 tcg_opt_gen_movi(s, op, args, args[0], tmp);
1238             } else if ((args[5] == TCG_COND_LT || args[5] == TCG_COND_GE)
1239                        && temps[args[3]].state == TCG_TEMP_CONST
1240                        && temps[args[4]].state == TCG_TEMP_CONST
1241                        && temps[args[3]].val == 0
1242                        && temps[args[4]].val == 0) {
1243                 /* Simplify LT/GE comparisons vs zero to a single compare
1244                    vs the high word of the input.  */
1245             do_setcond_high:
1246                 reset_temp(args[0]);
1247                 temps[args[0]].mask = 1;
1248                 op->opc = INDEX_op_setcond_i32;
1249                 args[1] = args[2];
1250                 args[2] = args[4];
1251                 args[3] = args[5];
1252             } else if (args[5] == TCG_COND_EQ) {
1253                 /* Simplify EQ comparisons where one of the pairs
1254                    can be simplified.  */
1255                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1256                                                args[1], args[3], TCG_COND_EQ);
1257                 if (tmp == 0) {
1258                     goto do_setcond_const;
1259                 } else if (tmp == 1) {
1260                     goto do_setcond_high;
1261                 }
1262                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1263                                                args[2], args[4], TCG_COND_EQ);
1264                 if (tmp == 0) {
1265                     goto do_setcond_high;
1266                 } else if (tmp != 1) {
1267                     goto do_default;
1268                 }
1269             do_setcond_low:
1270                 reset_temp(args[0]);
1271                 temps[args[0]].mask = 1;
1272                 op->opc = INDEX_op_setcond_i32;
1273                 args[2] = args[3];
1274                 args[3] = args[5];
1275             } else if (args[5] == TCG_COND_NE) {
1276                 /* Simplify NE comparisons where one of the pairs
1277                    can be simplified.  */
1278                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1279                                                args[1], args[3], TCG_COND_NE);
1280                 if (tmp == 0) {
1281                     goto do_setcond_high;
1282                 } else if (tmp == 1) {
1283                     goto do_setcond_const;
1284                 }
1285                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1286                                                args[2], args[4], TCG_COND_NE);
1287                 if (tmp == 0) {
1288                     goto do_setcond_low;
1289                 } else if (tmp == 1) {
1290                     goto do_setcond_const;
1291                 }
1292                 goto do_default;
1293             } else {
1294                 goto do_default;
1295             }
1296             break;
1297 
1298         case INDEX_op_call:
1299             if (!(args[nb_oargs + nb_iargs + 1]
1300                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1301                 for (i = 0; i < nb_globals; i++) {
1302                     reset_temp(i);
1303                 }
1304             }
1305             goto do_reset_output;
1306 
1307         default:
1308         do_default:
1309             /* Default case: we know nothing about operation (or were unable
1310                to compute the operation result) so no propagation is done.
1311                We trash everything if the operation is the end of a basic
1312                block, otherwise we only trash the output args.  "mask" is
1313                the non-zero bits mask for the first output arg.  */
1314             if (def->flags & TCG_OPF_BB_END) {
1315                 reset_all_temps(nb_temps);
1316             } else {
1317         do_reset_output:
1318                 for (i = 0; i < nb_oargs; i++) {
1319                     reset_temp(args[i]);
1320                     /* Save the corresponding known-zero bits mask for the
1321                        first output argument (only one supported so far). */
1322                     if (i == 0) {
1323                         temps[args[i]].mask = mask;
1324                     }
1325                 }
1326             }
1327             break;
1328         }
1329     }
1330 }
1331