1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "tcg/tcg-op.h"
28 
29 #define CASE_OP_32_64(x)                        \
30         glue(glue(case INDEX_op_, x), _i32):    \
31         glue(glue(case INDEX_op_, x), _i64)
32 
33 #define CASE_OP_32_64_VEC(x)                    \
34         glue(glue(case INDEX_op_, x), _i32):    \
35         glue(glue(case INDEX_op_, x), _i64):    \
36         glue(glue(case INDEX_op_, x), _vec)
37 
38 struct tcg_temp_info {
39     bool is_const;
40     TCGTemp *prev_copy;
41     TCGTemp *next_copy;
42     tcg_target_ulong val;
43     tcg_target_ulong mask;
44 };
45 
ts_info(TCGTemp * ts)46 static inline struct tcg_temp_info *ts_info(TCGTemp *ts)
47 {
48     return ts->state_ptr;
49 }
50 
arg_info(TCGArg arg)51 static inline struct tcg_temp_info *arg_info(TCGArg arg)
52 {
53     return ts_info(arg_temp(arg));
54 }
55 
ts_is_const(TCGTemp * ts)56 static inline bool ts_is_const(TCGTemp *ts)
57 {
58     return ts_info(ts)->is_const;
59 }
60 
arg_is_const(TCGArg arg)61 static inline bool arg_is_const(TCGArg arg)
62 {
63     return ts_is_const(arg_temp(arg));
64 }
65 
ts_is_copy(TCGTemp * ts)66 static inline bool ts_is_copy(TCGTemp *ts)
67 {
68     return ts_info(ts)->next_copy != ts;
69 }
70 
71 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
reset_ts(TCGTemp * ts)72 static void reset_ts(TCGTemp *ts)
73 {
74     struct tcg_temp_info *ti = ts_info(ts);
75     struct tcg_temp_info *pi = ts_info(ti->prev_copy);
76     struct tcg_temp_info *ni = ts_info(ti->next_copy);
77 
78     ni->prev_copy = ti->prev_copy;
79     pi->next_copy = ti->next_copy;
80     ti->next_copy = ts;
81     ti->prev_copy = ts;
82     ti->is_const = false;
83     ti->mask = -1;
84 }
85 
reset_temp(TCGArg arg)86 static void reset_temp(TCGArg arg)
87 {
88     reset_ts(arg_temp(arg));
89 }
90 
91 /* Initialize and activate a temporary.  */
init_ts_info(struct tcg_temp_info * infos,TCGTempSet * temps_used,TCGTemp * ts)92 static void init_ts_info(struct tcg_temp_info *infos,
93                          TCGTempSet *temps_used, TCGTemp *ts)
94 {
95     size_t idx = temp_idx(ts);
96     if (!test_bit(idx, temps_used->l)) {
97         struct tcg_temp_info *ti = &infos[idx];
98 
99         ts->state_ptr = ti;
100         ti->next_copy = ts;
101         ti->prev_copy = ts;
102         ti->is_const = false;
103         ti->mask = -1;
104         set_bit(idx, temps_used->l);
105     }
106 }
107 
init_arg_info(struct tcg_temp_info * infos,TCGTempSet * temps_used,TCGArg arg)108 static void init_arg_info(struct tcg_temp_info *infos,
109                           TCGTempSet *temps_used, TCGArg arg)
110 {
111     init_ts_info(infos, temps_used, arg_temp(arg));
112 }
113 
find_better_copy(TCGContext * s,TCGTemp * ts)114 static TCGTemp *find_better_copy(TCGContext *s, TCGTemp *ts)
115 {
116     TCGTemp *i;
117 
118     /* If this is already a global, we can't do better. */
119     if (ts->temp_global) {
120         return ts;
121     }
122 
123     /* Search for a global first. */
124     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
125         if (i->temp_global) {
126             return i;
127         }
128     }
129 
130     /* If it is a temp, search for a temp local. */
131     if (!ts->temp_local) {
132         for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
133             if (ts->temp_local) {
134                 return i;
135             }
136         }
137     }
138 
139     /* Failure to find a better representation, return the same temp. */
140     return ts;
141 }
142 
ts_are_copies(TCGTemp * ts1,TCGTemp * ts2)143 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
144 {
145     TCGTemp *i;
146 
147     if (ts1 == ts2) {
148         return true;
149     }
150 
151     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
152         return false;
153     }
154 
155     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
156         if (i == ts2) {
157             return true;
158         }
159     }
160 
161     return false;
162 }
163 
args_are_copies(TCGArg arg1,TCGArg arg2)164 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
165 {
166     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
167 }
168 
tcg_opt_gen_movi(TCGContext * s,TCGOp * op,TCGArg dst,TCGArg val)169 static void tcg_opt_gen_movi(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg val)
170 {
171     const TCGOpDef *def;
172     TCGOpcode new_op;
173     tcg_target_ulong mask;
174     struct tcg_temp_info *di = arg_info(dst);
175 
176     def = &tcg_op_defs[op->opc];
177     if (def->flags & TCG_OPF_VECTOR) {
178         new_op = INDEX_op_dupi_vec;
179     } else if (def->flags & TCG_OPF_64BIT) {
180         new_op = INDEX_op_movi_i64;
181     } else {
182         new_op = INDEX_op_movi_i32;
183     }
184     op->opc = new_op;
185     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
186     op->args[0] = dst;
187     op->args[1] = val;
188 
189     reset_temp(dst);
190     di->is_const = true;
191     di->val = val;
192     mask = val;
193     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_movi_i32) {
194         /* High bits of the destination are now garbage.  */
195         mask |= ~0xffffffffull;
196     }
197     di->mask = mask;
198 }
199 
tcg_opt_gen_mov(TCGContext * s,TCGOp * op,TCGArg dst,TCGArg src)200 static void tcg_opt_gen_mov(TCGContext *s, TCGOp *op, TCGArg dst, TCGArg src)
201 {
202     TCGTemp *dst_ts = arg_temp(dst);
203     TCGTemp *src_ts = arg_temp(src);
204     const TCGOpDef *def;
205     struct tcg_temp_info *di;
206     struct tcg_temp_info *si;
207     tcg_target_ulong mask;
208     TCGOpcode new_op;
209 
210     if (ts_are_copies(dst_ts, src_ts)) {
211         tcg_op_remove(s, op);
212         return;
213     }
214 
215     reset_ts(dst_ts);
216     di = ts_info(dst_ts);
217     si = ts_info(src_ts);
218     def = &tcg_op_defs[op->opc];
219     if (def->flags & TCG_OPF_VECTOR) {
220         new_op = INDEX_op_mov_vec;
221     } else if (def->flags & TCG_OPF_64BIT) {
222         new_op = INDEX_op_mov_i64;
223     } else {
224         new_op = INDEX_op_mov_i32;
225     }
226     op->opc = new_op;
227     /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
228     op->args[0] = dst;
229     op->args[1] = src;
230 
231     mask = si->mask;
232     if (TCG_TARGET_REG_BITS > 32 && new_op == INDEX_op_mov_i32) {
233         /* High bits of the destination are now garbage.  */
234         mask |= ~0xffffffffull;
235     }
236     di->mask = mask;
237 
238     if (src_ts->type == dst_ts->type) {
239         struct tcg_temp_info *ni = ts_info(si->next_copy);
240 
241         di->next_copy = si->next_copy;
242         di->prev_copy = src_ts;
243         ni->prev_copy = dst_ts;
244         si->next_copy = dst_ts;
245         di->is_const = si->is_const;
246         di->val = si->val;
247     }
248 }
249 
do_constant_folding_2(TCGOpcode op,TCGArg x,TCGArg y)250 static TCGArg do_constant_folding_2(TCGOpcode op, TCGArg x, TCGArg y)
251 {
252     uint64_t l64, h64;
253 
254     switch (op) {
255     CASE_OP_32_64(add):
256         return x + y;
257 
258     CASE_OP_32_64(sub):
259         return x - y;
260 
261     CASE_OP_32_64(mul):
262         return x * y;
263 
264     CASE_OP_32_64(and):
265         return x & y;
266 
267     CASE_OP_32_64(or):
268         return x | y;
269 
270     CASE_OP_32_64(xor):
271         return x ^ y;
272 
273     case INDEX_op_shl_i32:
274         return (uint32_t)x << (y & 31);
275 
276     case INDEX_op_shl_i64:
277         return (uint64_t)x << (y & 63);
278 
279     case INDEX_op_shr_i32:
280         return (uint32_t)x >> (y & 31);
281 
282     case INDEX_op_shr_i64:
283         return (uint64_t)x >> (y & 63);
284 
285     case INDEX_op_sar_i32:
286         return (int32_t)x >> (y & 31);
287 
288     case INDEX_op_sar_i64:
289         return (int64_t)x >> (y & 63);
290 
291     case INDEX_op_rotr_i32:
292         return ror32(x, y & 31);
293 
294     case INDEX_op_rotr_i64:
295         return ror64(x, y & 63);
296 
297     case INDEX_op_rotl_i32:
298         return rol32(x, y & 31);
299 
300     case INDEX_op_rotl_i64:
301         return rol64(x, y & 63);
302 
303     CASE_OP_32_64(not):
304         return ~x;
305 
306     CASE_OP_32_64(neg):
307         return -x;
308 
309     CASE_OP_32_64(andc):
310         return x & ~y;
311 
312     CASE_OP_32_64(orc):
313         return x | ~y;
314 
315     CASE_OP_32_64(eqv):
316         return ~(x ^ y);
317 
318     CASE_OP_32_64(nand):
319         return ~(x & y);
320 
321     CASE_OP_32_64(nor):
322         return ~(x | y);
323 
324     case INDEX_op_clz_i32:
325         return (uint32_t)x ? clz32(x) : y;
326 
327     case INDEX_op_clz_i64:
328         return x ? clz64(x) : y;
329 
330     case INDEX_op_ctz_i32:
331         return (uint32_t)x ? ctz32(x) : y;
332 
333     case INDEX_op_ctz_i64:
334         return x ? ctz64(x) : y;
335 
336     case INDEX_op_ctpop_i32:
337         return ctpop32(x);
338 
339     case INDEX_op_ctpop_i64:
340         return ctpop64(x);
341 
342     CASE_OP_32_64(ext8s):
343         return (int8_t)x;
344 
345     CASE_OP_32_64(ext16s):
346         return (int16_t)x;
347 
348     CASE_OP_32_64(ext8u):
349         return (uint8_t)x;
350 
351     CASE_OP_32_64(ext16u):
352         return (uint16_t)x;
353 
354     CASE_OP_32_64(bswap16):
355         return bswap16(x);
356 
357     CASE_OP_32_64(bswap32):
358         return bswap32(x);
359 
360     case INDEX_op_bswap64_i64:
361         return bswap64(x);
362 
363     case INDEX_op_ext_i32_i64:
364     case INDEX_op_ext32s_i64:
365         return (int32_t)x;
366 
367     case INDEX_op_extu_i32_i64:
368     case INDEX_op_extrl_i64_i32:
369     case INDEX_op_ext32u_i64:
370         return (uint32_t)x;
371 
372     case INDEX_op_extrh_i64_i32:
373         return (uint64_t)x >> 32;
374 
375     case INDEX_op_muluh_i32:
376         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
377     case INDEX_op_mulsh_i32:
378         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
379 
380     case INDEX_op_muluh_i64:
381         mulu64(&l64, &h64, x, y);
382         return h64;
383     case INDEX_op_mulsh_i64:
384         muls64(&l64, &h64, x, y);
385         return h64;
386 
387     case INDEX_op_div_i32:
388         /* Avoid crashing on divide by zero, otherwise undefined.  */
389         return (int32_t)x / ((int32_t)y ? : 1);
390     case INDEX_op_divu_i32:
391         return (uint32_t)x / ((uint32_t)y ? : 1);
392     case INDEX_op_div_i64:
393         return (int64_t)x / ((int64_t)y ? : 1);
394     case INDEX_op_divu_i64:
395         return (uint64_t)x / ((uint64_t)y ? : 1);
396 
397     case INDEX_op_rem_i32:
398         return (int32_t)x % ((int32_t)y ? : 1);
399     case INDEX_op_remu_i32:
400         return (uint32_t)x % ((uint32_t)y ? : 1);
401     case INDEX_op_rem_i64:
402         return (int64_t)x % ((int64_t)y ? : 1);
403     case INDEX_op_remu_i64:
404         return (uint64_t)x % ((uint64_t)y ? : 1);
405 
406     default:
407         fprintf(stderr,
408                 "Unrecognized operation %d in do_constant_folding.\n", op);
409         tcg_abort();
410     }
411 }
412 
do_constant_folding(TCGOpcode op,TCGArg x,TCGArg y)413 static TCGArg do_constant_folding(TCGOpcode op, TCGArg x, TCGArg y)
414 {
415     const TCGOpDef *def = &tcg_op_defs[op];
416     TCGArg res = do_constant_folding_2(op, x, y);
417     if (!(def->flags & TCG_OPF_64BIT)) {
418         res = (int32_t)res;
419     }
420     return res;
421 }
422 
do_constant_folding_cond_32(uint32_t x,uint32_t y,TCGCond c)423 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
424 {
425     switch (c) {
426     case TCG_COND_EQ:
427         return x == y;
428     case TCG_COND_NE:
429         return x != y;
430     case TCG_COND_LT:
431         return (int32_t)x < (int32_t)y;
432     case TCG_COND_GE:
433         return (int32_t)x >= (int32_t)y;
434     case TCG_COND_LE:
435         return (int32_t)x <= (int32_t)y;
436     case TCG_COND_GT:
437         return (int32_t)x > (int32_t)y;
438     case TCG_COND_LTU:
439         return x < y;
440     case TCG_COND_GEU:
441         return x >= y;
442     case TCG_COND_LEU:
443         return x <= y;
444     case TCG_COND_GTU:
445         return x > y;
446     default:
447         tcg_abort();
448     }
449 }
450 
do_constant_folding_cond_64(uint64_t x,uint64_t y,TCGCond c)451 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
452 {
453     switch (c) {
454     case TCG_COND_EQ:
455         return x == y;
456     case TCG_COND_NE:
457         return x != y;
458     case TCG_COND_LT:
459         return (int64_t)x < (int64_t)y;
460     case TCG_COND_GE:
461         return (int64_t)x >= (int64_t)y;
462     case TCG_COND_LE:
463         return (int64_t)x <= (int64_t)y;
464     case TCG_COND_GT:
465         return (int64_t)x > (int64_t)y;
466     case TCG_COND_LTU:
467         return x < y;
468     case TCG_COND_GEU:
469         return x >= y;
470     case TCG_COND_LEU:
471         return x <= y;
472     case TCG_COND_GTU:
473         return x > y;
474     default:
475         tcg_abort();
476     }
477 }
478 
do_constant_folding_cond_eq(TCGCond c)479 static bool do_constant_folding_cond_eq(TCGCond c)
480 {
481     switch (c) {
482     case TCG_COND_GT:
483     case TCG_COND_LTU:
484     case TCG_COND_LT:
485     case TCG_COND_GTU:
486     case TCG_COND_NE:
487         return 0;
488     case TCG_COND_GE:
489     case TCG_COND_GEU:
490     case TCG_COND_LE:
491     case TCG_COND_LEU:
492     case TCG_COND_EQ:
493         return 1;
494     default:
495         tcg_abort();
496     }
497 }
498 
499 /* Return 2 if the condition can't be simplified, and the result
500    of the condition (0 or 1) if it can */
do_constant_folding_cond(TCGOpcode op,TCGArg x,TCGArg y,TCGCond c)501 static TCGArg do_constant_folding_cond(TCGOpcode op, TCGArg x,
502                                        TCGArg y, TCGCond c)
503 {
504     tcg_target_ulong xv = arg_info(x)->val;
505     tcg_target_ulong yv = arg_info(y)->val;
506     if (arg_is_const(x) && arg_is_const(y)) {
507         const TCGOpDef *def = &tcg_op_defs[op];
508         tcg_debug_assert(!(def->flags & TCG_OPF_VECTOR));
509         if (def->flags & TCG_OPF_64BIT) {
510             return do_constant_folding_cond_64(xv, yv, c);
511         } else {
512             return do_constant_folding_cond_32(xv, yv, c);
513         }
514     } else if (args_are_copies(x, y)) {
515         return do_constant_folding_cond_eq(c);
516     } else if (arg_is_const(y) && yv == 0) {
517         switch (c) {
518         case TCG_COND_LTU:
519             return 0;
520         case TCG_COND_GEU:
521             return 1;
522         default:
523             return 2;
524         }
525     }
526     return 2;
527 }
528 
529 /* Return 2 if the condition can't be simplified, and the result
530    of the condition (0 or 1) if it can */
do_constant_folding_cond2(TCGArg * p1,TCGArg * p2,TCGCond c)531 static TCGArg do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
532 {
533     TCGArg al = p1[0], ah = p1[1];
534     TCGArg bl = p2[0], bh = p2[1];
535 
536     if (arg_is_const(bl) && arg_is_const(bh)) {
537         tcg_target_ulong blv = arg_info(bl)->val;
538         tcg_target_ulong bhv = arg_info(bh)->val;
539         uint64_t b = deposit64(blv, 32, 32, bhv);
540 
541         if (arg_is_const(al) && arg_is_const(ah)) {
542             tcg_target_ulong alv = arg_info(al)->val;
543             tcg_target_ulong ahv = arg_info(ah)->val;
544             uint64_t a = deposit64(alv, 32, 32, ahv);
545             return do_constant_folding_cond_64(a, b, c);
546         }
547         if (b == 0) {
548             switch (c) {
549             case TCG_COND_LTU:
550                 return 0;
551             case TCG_COND_GEU:
552                 return 1;
553             default:
554                 break;
555             }
556         }
557     }
558     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
559         return do_constant_folding_cond_eq(c);
560     }
561     return 2;
562 }
563 
swap_commutative(TCGArg dest,TCGArg * p1,TCGArg * p2)564 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
565 {
566     TCGArg a1 = *p1, a2 = *p2;
567     int sum = 0;
568     sum += arg_is_const(a1);
569     sum -= arg_is_const(a2);
570 
571     /* Prefer the constant in second argument, and then the form
572        op a, a, b, which is better handled on non-RISC hosts. */
573     if (sum > 0 || (sum == 0 && dest == a2)) {
574         *p1 = a2;
575         *p2 = a1;
576         return true;
577     }
578     return false;
579 }
580 
swap_commutative2(TCGArg * p1,TCGArg * p2)581 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
582 {
583     int sum = 0;
584     sum += arg_is_const(p1[0]);
585     sum += arg_is_const(p1[1]);
586     sum -= arg_is_const(p2[0]);
587     sum -= arg_is_const(p2[1]);
588     if (sum > 0) {
589         TCGArg t;
590         t = p1[0], p1[0] = p2[0], p2[0] = t;
591         t = p1[1], p1[1] = p2[1], p2[1] = t;
592         return true;
593     }
594     return false;
595 }
596 
597 /* Propagate constants and copies, fold constant expressions. */
tcg_optimize(TCGContext * s)598 void tcg_optimize(TCGContext *s)
599 {
600     int nb_temps, nb_globals;
601     TCGOp *op, *op_next, *prev_mb = NULL;
602     struct tcg_temp_info *infos;
603     TCGTempSet temps_used;
604 
605     /* Array VALS has an element for each temp.
606        If this temp holds a constant then its value is kept in VALS' element.
607        If this temp is a copy of other ones then the other copies are
608        available through the doubly linked circular list. */
609 
610     nb_temps = s->nb_temps;
611     nb_globals = s->nb_globals;
612     bitmap_zero(temps_used.l, nb_temps);
613     infos = tcg_malloc(sizeof(struct tcg_temp_info) * nb_temps);
614 
615     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
616         tcg_target_ulong mask, partmask, affected;
617         int nb_oargs, nb_iargs, i;
618         TCGArg tmp;
619         TCGOpcode opc = op->opc;
620         const TCGOpDef *def = &tcg_op_defs[opc];
621 
622         /* Count the arguments, and initialize the temps that are
623            going to be used */
624         if (opc == INDEX_op_call) {
625             nb_oargs = TCGOP_CALLO(op);
626             nb_iargs = TCGOP_CALLI(op);
627             for (i = 0; i < nb_oargs + nb_iargs; i++) {
628                 TCGTemp *ts = arg_temp(op->args[i]);
629                 if (ts) {
630                     init_ts_info(infos, &temps_used, ts);
631                 }
632             }
633         } else {
634             nb_oargs = def->nb_oargs;
635             nb_iargs = def->nb_iargs;
636             for (i = 0; i < nb_oargs + nb_iargs; i++) {
637                 init_arg_info(infos, &temps_used, op->args[i]);
638             }
639         }
640 
641         /* Do copy propagation */
642         for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
643             TCGTemp *ts = arg_temp(op->args[i]);
644             if (ts && ts_is_copy(ts)) {
645                 op->args[i] = temp_arg(find_better_copy(s, ts));
646             }
647         }
648 
649         /* For commutative operations make constant second argument */
650         switch (opc) {
651         CASE_OP_32_64_VEC(add):
652         CASE_OP_32_64_VEC(mul):
653         CASE_OP_32_64_VEC(and):
654         CASE_OP_32_64_VEC(or):
655         CASE_OP_32_64_VEC(xor):
656         CASE_OP_32_64(eqv):
657         CASE_OP_32_64(nand):
658         CASE_OP_32_64(nor):
659         CASE_OP_32_64(muluh):
660         CASE_OP_32_64(mulsh):
661             swap_commutative(op->args[0], &op->args[1], &op->args[2]);
662             break;
663         CASE_OP_32_64(brcond):
664             if (swap_commutative(-1, &op->args[0], &op->args[1])) {
665                 op->args[2] = tcg_swap_cond(op->args[2]);
666             }
667             break;
668         CASE_OP_32_64(setcond):
669             if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
670                 op->args[3] = tcg_swap_cond(op->args[3]);
671             }
672             break;
673         CASE_OP_32_64(movcond):
674             if (swap_commutative(-1, &op->args[1], &op->args[2])) {
675                 op->args[5] = tcg_swap_cond(op->args[5]);
676             }
677             /* For movcond, we canonicalize the "false" input reg to match
678                the destination reg so that the tcg backend can implement
679                a "move if true" operation.  */
680             if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
681                 op->args[5] = tcg_invert_cond(op->args[5]);
682             }
683             break;
684         CASE_OP_32_64(add2):
685             swap_commutative(op->args[0], &op->args[2], &op->args[4]);
686             swap_commutative(op->args[1], &op->args[3], &op->args[5]);
687             break;
688         CASE_OP_32_64(mulu2):
689         CASE_OP_32_64(muls2):
690             swap_commutative(op->args[0], &op->args[2], &op->args[3]);
691             break;
692         case INDEX_op_brcond2_i32:
693             if (swap_commutative2(&op->args[0], &op->args[2])) {
694                 op->args[4] = tcg_swap_cond(op->args[4]);
695             }
696             break;
697         case INDEX_op_setcond2_i32:
698             if (swap_commutative2(&op->args[1], &op->args[3])) {
699                 op->args[5] = tcg_swap_cond(op->args[5]);
700             }
701             break;
702         default:
703             break;
704         }
705 
706         /* Simplify expressions for "shift/rot r, 0, a => movi r, 0",
707            and "sub r, 0, a => neg r, a" case.  */
708         switch (opc) {
709         CASE_OP_32_64(shl):
710         CASE_OP_32_64(shr):
711         CASE_OP_32_64(sar):
712         CASE_OP_32_64(rotl):
713         CASE_OP_32_64(rotr):
714             if (arg_is_const(op->args[1])
715                 && arg_info(op->args[1])->val == 0) {
716                 tcg_opt_gen_movi(s, op, op->args[0], 0);
717                 continue;
718             }
719             break;
720         CASE_OP_32_64_VEC(sub):
721             {
722                 TCGOpcode neg_op;
723                 bool have_neg;
724 
725                 if (arg_is_const(op->args[2])) {
726                     /* Proceed with possible constant folding. */
727                     break;
728                 }
729                 if (opc == INDEX_op_sub_i32) {
730                     neg_op = INDEX_op_neg_i32;
731                     have_neg = TCG_TARGET_HAS_neg_i32;
732                 } else if (opc == INDEX_op_sub_i64) {
733                     neg_op = INDEX_op_neg_i64;
734                     have_neg = TCG_TARGET_HAS_neg_i64;
735                 } else if (TCG_TARGET_HAS_neg_vec) {
736                     TCGType type = TCGOP_VECL(op) + TCG_TYPE_V64;
737                     unsigned vece = TCGOP_VECE(op);
738                     neg_op = INDEX_op_neg_vec;
739                     have_neg = tcg_can_emit_vec_op(neg_op, type, vece) > 0;
740                 } else {
741                     break;
742                 }
743                 if (!have_neg) {
744                     break;
745                 }
746                 if (arg_is_const(op->args[1])
747                     && arg_info(op->args[1])->val == 0) {
748                     op->opc = neg_op;
749                     reset_temp(op->args[0]);
750                     op->args[1] = op->args[2];
751                     continue;
752                 }
753             }
754             break;
755         CASE_OP_32_64_VEC(xor):
756         CASE_OP_32_64(nand):
757             if (!arg_is_const(op->args[1])
758                 && arg_is_const(op->args[2])
759                 && arg_info(op->args[2])->val == -1) {
760                 i = 1;
761                 goto try_not;
762             }
763             break;
764         CASE_OP_32_64(nor):
765             if (!arg_is_const(op->args[1])
766                 && arg_is_const(op->args[2])
767                 && arg_info(op->args[2])->val == 0) {
768                 i = 1;
769                 goto try_not;
770             }
771             break;
772         CASE_OP_32_64_VEC(andc):
773             if (!arg_is_const(op->args[2])
774                 && arg_is_const(op->args[1])
775                 && arg_info(op->args[1])->val == -1) {
776                 i = 2;
777                 goto try_not;
778             }
779             break;
780         CASE_OP_32_64_VEC(orc):
781         CASE_OP_32_64(eqv):
782             if (!arg_is_const(op->args[2])
783                 && arg_is_const(op->args[1])
784                 && arg_info(op->args[1])->val == 0) {
785                 i = 2;
786                 goto try_not;
787             }
788             break;
789         try_not:
790             {
791                 TCGOpcode not_op;
792                 bool have_not;
793 
794                 if (def->flags & TCG_OPF_VECTOR) {
795                     not_op = INDEX_op_not_vec;
796                     have_not = TCG_TARGET_HAS_not_vec;
797                 } else if (def->flags & TCG_OPF_64BIT) {
798                     not_op = INDEX_op_not_i64;
799                     have_not = TCG_TARGET_HAS_not_i64;
800                 } else {
801                     not_op = INDEX_op_not_i32;
802                     have_not = TCG_TARGET_HAS_not_i32;
803                 }
804                 if (!have_not) {
805                     break;
806                 }
807                 op->opc = not_op;
808                 reset_temp(op->args[0]);
809                 op->args[1] = op->args[i];
810                 continue;
811             }
812         default:
813             break;
814         }
815 
816         /* Simplify expression for "op r, a, const => mov r, a" cases */
817         switch (opc) {
818         CASE_OP_32_64_VEC(add):
819         CASE_OP_32_64_VEC(sub):
820         CASE_OP_32_64_VEC(or):
821         CASE_OP_32_64_VEC(xor):
822         CASE_OP_32_64_VEC(andc):
823         CASE_OP_32_64(shl):
824         CASE_OP_32_64(shr):
825         CASE_OP_32_64(sar):
826         CASE_OP_32_64(rotl):
827         CASE_OP_32_64(rotr):
828             if (!arg_is_const(op->args[1])
829                 && arg_is_const(op->args[2])
830                 && arg_info(op->args[2])->val == 0) {
831                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
832                 continue;
833             }
834             break;
835         CASE_OP_32_64_VEC(and):
836         CASE_OP_32_64_VEC(orc):
837         CASE_OP_32_64(eqv):
838             if (!arg_is_const(op->args[1])
839                 && arg_is_const(op->args[2])
840                 && arg_info(op->args[2])->val == -1) {
841                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
842                 continue;
843             }
844             break;
845         default:
846             break;
847         }
848 
849         /* Simplify using known-zero bits. Currently only ops with a single
850            output argument is supported. */
851         mask = -1;
852         affected = -1;
853         switch (opc) {
854         CASE_OP_32_64(ext8s):
855             if ((arg_info(op->args[1])->mask & 0x80) != 0) {
856                 break;
857             }
858         CASE_OP_32_64(ext8u):
859             mask = 0xff;
860             goto and_const;
861         CASE_OP_32_64(ext16s):
862             if ((arg_info(op->args[1])->mask & 0x8000) != 0) {
863                 break;
864             }
865         CASE_OP_32_64(ext16u):
866             mask = 0xffff;
867             goto and_const;
868         case INDEX_op_ext32s_i64:
869             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
870                 break;
871             }
872         case INDEX_op_ext32u_i64:
873             mask = 0xffffffffU;
874             goto and_const;
875 
876         CASE_OP_32_64(and):
877             mask = arg_info(op->args[2])->mask;
878             if (arg_is_const(op->args[2])) {
879         and_const:
880                 affected = arg_info(op->args[1])->mask & ~mask;
881             }
882             mask = arg_info(op->args[1])->mask & mask;
883             break;
884 
885         case INDEX_op_ext_i32_i64:
886             if ((arg_info(op->args[1])->mask & 0x80000000) != 0) {
887                 break;
888             }
889         case INDEX_op_extu_i32_i64:
890             /* We do not compute affected as it is a size changing op.  */
891             mask = (uint32_t)arg_info(op->args[1])->mask;
892             break;
893 
894         CASE_OP_32_64(andc):
895             /* Known-zeros does not imply known-ones.  Therefore unless
896                op->args[2] is constant, we can't infer anything from it.  */
897             if (arg_is_const(op->args[2])) {
898                 mask = ~arg_info(op->args[2])->mask;
899                 goto and_const;
900             }
901             /* But we certainly know nothing outside args[1] may be set. */
902             mask = arg_info(op->args[1])->mask;
903             break;
904 
905         case INDEX_op_sar_i32:
906             if (arg_is_const(op->args[2])) {
907                 tmp = arg_info(op->args[2])->val & 31;
908                 mask = (int32_t)arg_info(op->args[1])->mask >> tmp;
909             }
910             break;
911         case INDEX_op_sar_i64:
912             if (arg_is_const(op->args[2])) {
913                 tmp = arg_info(op->args[2])->val & 63;
914                 mask = (int64_t)arg_info(op->args[1])->mask >> tmp;
915             }
916             break;
917 
918         case INDEX_op_shr_i32:
919             if (arg_is_const(op->args[2])) {
920                 tmp = arg_info(op->args[2])->val & 31;
921                 mask = (uint32_t)arg_info(op->args[1])->mask >> tmp;
922             }
923             break;
924         case INDEX_op_shr_i64:
925             if (arg_is_const(op->args[2])) {
926                 tmp = arg_info(op->args[2])->val & 63;
927                 mask = (uint64_t)arg_info(op->args[1])->mask >> tmp;
928             }
929             break;
930 
931         case INDEX_op_extrl_i64_i32:
932             mask = (uint32_t)arg_info(op->args[1])->mask;
933             break;
934         case INDEX_op_extrh_i64_i32:
935             mask = (uint64_t)arg_info(op->args[1])->mask >> 32;
936             break;
937 
938         CASE_OP_32_64(shl):
939             if (arg_is_const(op->args[2])) {
940                 tmp = arg_info(op->args[2])->val & (TCG_TARGET_REG_BITS - 1);
941                 mask = arg_info(op->args[1])->mask << tmp;
942             }
943             break;
944 
945         CASE_OP_32_64(neg):
946             /* Set to 1 all bits to the left of the rightmost.  */
947             mask = -(arg_info(op->args[1])->mask
948                      & -arg_info(op->args[1])->mask);
949             break;
950 
951         CASE_OP_32_64(deposit):
952             mask = deposit64(arg_info(op->args[1])->mask,
953                              op->args[3], op->args[4],
954                              arg_info(op->args[2])->mask);
955             break;
956 
957         CASE_OP_32_64(extract):
958             mask = extract64(arg_info(op->args[1])->mask,
959                              op->args[2], op->args[3]);
960             if (op->args[2] == 0) {
961                 affected = arg_info(op->args[1])->mask & ~mask;
962             }
963             break;
964         CASE_OP_32_64(sextract):
965             mask = sextract64(arg_info(op->args[1])->mask,
966                               op->args[2], op->args[3]);
967             if (op->args[2] == 0 && (tcg_target_long)mask >= 0) {
968                 affected = arg_info(op->args[1])->mask & ~mask;
969             }
970             break;
971 
972         CASE_OP_32_64(or):
973         CASE_OP_32_64(xor):
974             mask = arg_info(op->args[1])->mask | arg_info(op->args[2])->mask;
975             break;
976 
977         case INDEX_op_clz_i32:
978         case INDEX_op_ctz_i32:
979             mask = arg_info(op->args[2])->mask | 31;
980             break;
981 
982         case INDEX_op_clz_i64:
983         case INDEX_op_ctz_i64:
984             mask = arg_info(op->args[2])->mask | 63;
985             break;
986 
987         case INDEX_op_ctpop_i32:
988             mask = 32 | 31;
989             break;
990         case INDEX_op_ctpop_i64:
991             mask = 64 | 63;
992             break;
993 
994         CASE_OP_32_64(setcond):
995         case INDEX_op_setcond2_i32:
996             mask = 1;
997             break;
998 
999         CASE_OP_32_64(movcond):
1000             mask = arg_info(op->args[3])->mask | arg_info(op->args[4])->mask;
1001             break;
1002 
1003         CASE_OP_32_64(ld8u):
1004             mask = 0xff;
1005             break;
1006         CASE_OP_32_64(ld16u):
1007             mask = 0xffff;
1008             break;
1009         case INDEX_op_ld32u_i64:
1010             mask = 0xffffffffu;
1011             break;
1012 
1013         CASE_OP_32_64(qemu_ld):
1014             {
1015                 TCGMemOpIdx oi = op->args[nb_oargs + nb_iargs];
1016                 MemOp mop = get_memop(oi);
1017                 if (!(mop & MO_SIGN)) {
1018                     mask = (2ULL << ((8 << (mop & MO_SIZE)) - 1)) - 1;
1019                 }
1020             }
1021             break;
1022 
1023         default:
1024             break;
1025         }
1026 
1027         /* 32-bit ops generate 32-bit results.  For the result is zero test
1028            below, we can ignore high bits, but for further optimizations we
1029            need to record that the high bits contain garbage.  */
1030         partmask = mask;
1031         if (!(def->flags & TCG_OPF_64BIT)) {
1032             mask |= ~(tcg_target_ulong)0xffffffffu;
1033             partmask &= 0xffffffffu;
1034             affected &= 0xffffffffu;
1035         }
1036 
1037         if (partmask == 0) {
1038             tcg_debug_assert(nb_oargs == 1);
1039             tcg_opt_gen_movi(s, op, op->args[0], 0);
1040             continue;
1041         }
1042         if (affected == 0) {
1043             tcg_debug_assert(nb_oargs == 1);
1044             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1045             continue;
1046         }
1047 
1048         /* Simplify expression for "op r, a, 0 => movi r, 0" cases */
1049         switch (opc) {
1050         CASE_OP_32_64_VEC(and):
1051         CASE_OP_32_64_VEC(mul):
1052         CASE_OP_32_64(muluh):
1053         CASE_OP_32_64(mulsh):
1054             if (arg_is_const(op->args[2])
1055                 && arg_info(op->args[2])->val == 0) {
1056                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1057                 continue;
1058             }
1059             break;
1060         default:
1061             break;
1062         }
1063 
1064         /* Simplify expression for "op r, a, a => mov r, a" cases */
1065         switch (opc) {
1066         CASE_OP_32_64_VEC(or):
1067         CASE_OP_32_64_VEC(and):
1068             if (args_are_copies(op->args[1], op->args[2])) {
1069                 tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1070                 continue;
1071             }
1072             break;
1073         default:
1074             break;
1075         }
1076 
1077         /* Simplify expression for "op r, a, a => movi r, 0" cases */
1078         switch (opc) {
1079         CASE_OP_32_64_VEC(andc):
1080         CASE_OP_32_64_VEC(sub):
1081         CASE_OP_32_64_VEC(xor):
1082             if (args_are_copies(op->args[1], op->args[2])) {
1083                 tcg_opt_gen_movi(s, op, op->args[0], 0);
1084                 continue;
1085             }
1086             break;
1087         default:
1088             break;
1089         }
1090 
1091         /* Propagate constants through copy operations and do constant
1092            folding.  Constants will be substituted to arguments by register
1093            allocator where needed and possible.  Also detect copies. */
1094         switch (opc) {
1095         CASE_OP_32_64_VEC(mov):
1096             tcg_opt_gen_mov(s, op, op->args[0], op->args[1]);
1097             break;
1098         CASE_OP_32_64(movi):
1099         case INDEX_op_dupi_vec:
1100             tcg_opt_gen_movi(s, op, op->args[0], op->args[1]);
1101             break;
1102 
1103         case INDEX_op_dup_vec:
1104             if (arg_is_const(op->args[1])) {
1105                 tmp = arg_info(op->args[1])->val;
1106                 tmp = dup_const(TCGOP_VECE(op), tmp);
1107                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1108                 break;
1109             }
1110             goto do_default;
1111 
1112         CASE_OP_32_64(not):
1113         CASE_OP_32_64(neg):
1114         CASE_OP_32_64(ext8s):
1115         CASE_OP_32_64(ext8u):
1116         CASE_OP_32_64(ext16s):
1117         CASE_OP_32_64(ext16u):
1118         CASE_OP_32_64(ctpop):
1119         CASE_OP_32_64(bswap16):
1120         CASE_OP_32_64(bswap32):
1121         case INDEX_op_bswap64_i64:
1122         case INDEX_op_ext32s_i64:
1123         case INDEX_op_ext32u_i64:
1124         case INDEX_op_ext_i32_i64:
1125         case INDEX_op_extu_i32_i64:
1126         case INDEX_op_extrl_i64_i32:
1127         case INDEX_op_extrh_i64_i32:
1128             if (arg_is_const(op->args[1])) {
1129                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val, 0);
1130                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1131                 break;
1132             }
1133             goto do_default;
1134 
1135         CASE_OP_32_64(add):
1136         CASE_OP_32_64(sub):
1137         CASE_OP_32_64(mul):
1138         CASE_OP_32_64(or):
1139         CASE_OP_32_64(and):
1140         CASE_OP_32_64(xor):
1141         CASE_OP_32_64(shl):
1142         CASE_OP_32_64(shr):
1143         CASE_OP_32_64(sar):
1144         CASE_OP_32_64(rotl):
1145         CASE_OP_32_64(rotr):
1146         CASE_OP_32_64(andc):
1147         CASE_OP_32_64(orc):
1148         CASE_OP_32_64(eqv):
1149         CASE_OP_32_64(nand):
1150         CASE_OP_32_64(nor):
1151         CASE_OP_32_64(muluh):
1152         CASE_OP_32_64(mulsh):
1153         CASE_OP_32_64(div):
1154         CASE_OP_32_64(divu):
1155         CASE_OP_32_64(rem):
1156         CASE_OP_32_64(remu):
1157             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1158                 tmp = do_constant_folding(opc, arg_info(op->args[1])->val,
1159                                           arg_info(op->args[2])->val);
1160                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1161                 break;
1162             }
1163             goto do_default;
1164 
1165         CASE_OP_32_64(clz):
1166         CASE_OP_32_64(ctz):
1167             if (arg_is_const(op->args[1])) {
1168                 TCGArg v = arg_info(op->args[1])->val;
1169                 if (v != 0) {
1170                     tmp = do_constant_folding(opc, v, 0);
1171                     tcg_opt_gen_movi(s, op, op->args[0], tmp);
1172                 } else {
1173                     tcg_opt_gen_mov(s, op, op->args[0], op->args[2]);
1174                 }
1175                 break;
1176             }
1177             goto do_default;
1178 
1179         CASE_OP_32_64(deposit):
1180             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1181                 tmp = deposit64(arg_info(op->args[1])->val,
1182                                 op->args[3], op->args[4],
1183                                 arg_info(op->args[2])->val);
1184                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1185                 break;
1186             }
1187             goto do_default;
1188 
1189         CASE_OP_32_64(extract):
1190             if (arg_is_const(op->args[1])) {
1191                 tmp = extract64(arg_info(op->args[1])->val,
1192                                 op->args[2], op->args[3]);
1193                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1194                 break;
1195             }
1196             goto do_default;
1197 
1198         CASE_OP_32_64(sextract):
1199             if (arg_is_const(op->args[1])) {
1200                 tmp = sextract64(arg_info(op->args[1])->val,
1201                                  op->args[2], op->args[3]);
1202                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1203                 break;
1204             }
1205             goto do_default;
1206 
1207         CASE_OP_32_64(extract2):
1208             if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1209                 TCGArg v1 = arg_info(op->args[1])->val;
1210                 TCGArg v2 = arg_info(op->args[2])->val;
1211 
1212                 if (opc == INDEX_op_extract2_i64) {
1213                     tmp = (v1 >> op->args[3]) | (v2 << (64 - op->args[3]));
1214                 } else {
1215                     tmp = (int32_t)(((uint32_t)v1 >> op->args[3]) |
1216                                     ((uint32_t)v2 << (32 - op->args[3])));
1217                 }
1218                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1219                 break;
1220             }
1221             goto do_default;
1222 
1223         CASE_OP_32_64(setcond):
1224             tmp = do_constant_folding_cond(opc, op->args[1],
1225                                            op->args[2], op->args[3]);
1226             if (tmp != 2) {
1227                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1228                 break;
1229             }
1230             goto do_default;
1231 
1232         CASE_OP_32_64(brcond):
1233             tmp = do_constant_folding_cond(opc, op->args[0],
1234                                            op->args[1], op->args[2]);
1235             if (tmp != 2) {
1236                 if (tmp) {
1237                     bitmap_zero(temps_used.l, nb_temps);
1238                     op->opc = INDEX_op_br;
1239                     op->args[0] = op->args[3];
1240                 } else {
1241                     tcg_op_remove(s, op);
1242                 }
1243                 break;
1244             }
1245             goto do_default;
1246 
1247         CASE_OP_32_64(movcond):
1248             tmp = do_constant_folding_cond(opc, op->args[1],
1249                                            op->args[2], op->args[5]);
1250             if (tmp != 2) {
1251                 tcg_opt_gen_mov(s, op, op->args[0], op->args[4-tmp]);
1252                 break;
1253             }
1254             if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1255                 tcg_target_ulong tv = arg_info(op->args[3])->val;
1256                 tcg_target_ulong fv = arg_info(op->args[4])->val;
1257                 TCGCond cond = op->args[5];
1258                 if (fv == 1 && tv == 0) {
1259                     cond = tcg_invert_cond(cond);
1260                 } else if (!(tv == 1 && fv == 0)) {
1261                     goto do_default;
1262                 }
1263                 op->args[3] = cond;
1264                 op->opc = opc = (opc == INDEX_op_movcond_i32
1265                                  ? INDEX_op_setcond_i32
1266                                  : INDEX_op_setcond_i64);
1267                 nb_iargs = 2;
1268             }
1269             goto do_default;
1270 
1271         case INDEX_op_add2_i32:
1272         case INDEX_op_sub2_i32:
1273             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])
1274                 && arg_is_const(op->args[4]) && arg_is_const(op->args[5])) {
1275                 uint32_t al = arg_info(op->args[2])->val;
1276                 uint32_t ah = arg_info(op->args[3])->val;
1277                 uint32_t bl = arg_info(op->args[4])->val;
1278                 uint32_t bh = arg_info(op->args[5])->val;
1279                 uint64_t a = ((uint64_t)ah << 32) | al;
1280                 uint64_t b = ((uint64_t)bh << 32) | bl;
1281                 TCGArg rl, rh;
1282                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1283 
1284                 if (opc == INDEX_op_add2_i32) {
1285                     a += b;
1286                 } else {
1287                     a -= b;
1288                 }
1289 
1290                 rl = op->args[0];
1291                 rh = op->args[1];
1292                 tcg_opt_gen_movi(s, op, rl, (int32_t)a);
1293                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(a >> 32));
1294                 break;
1295             }
1296             goto do_default;
1297 
1298         case INDEX_op_mulu2_i32:
1299             if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1300                 uint32_t a = arg_info(op->args[2])->val;
1301                 uint32_t b = arg_info(op->args[3])->val;
1302                 uint64_t r = (uint64_t)a * b;
1303                 TCGArg rl, rh;
1304                 TCGOp *op2 = tcg_op_insert_before(s, op, INDEX_op_movi_i32);
1305 
1306                 rl = op->args[0];
1307                 rh = op->args[1];
1308                 tcg_opt_gen_movi(s, op, rl, (int32_t)r);
1309                 tcg_opt_gen_movi(s, op2, rh, (int32_t)(r >> 32));
1310                 break;
1311             }
1312             goto do_default;
1313 
1314         case INDEX_op_brcond2_i32:
1315             tmp = do_constant_folding_cond2(&op->args[0], &op->args[2],
1316                                             op->args[4]);
1317             if (tmp != 2) {
1318                 if (tmp) {
1319             do_brcond_true:
1320                     bitmap_zero(temps_used.l, nb_temps);
1321                     op->opc = INDEX_op_br;
1322                     op->args[0] = op->args[5];
1323                 } else {
1324             do_brcond_false:
1325                     tcg_op_remove(s, op);
1326                 }
1327             } else if ((op->args[4] == TCG_COND_LT
1328                         || op->args[4] == TCG_COND_GE)
1329                        && arg_is_const(op->args[2])
1330                        && arg_info(op->args[2])->val == 0
1331                        && arg_is_const(op->args[3])
1332                        && arg_info(op->args[3])->val == 0) {
1333                 /* Simplify LT/GE comparisons vs zero to a single compare
1334                    vs the high word of the input.  */
1335             do_brcond_high:
1336                 bitmap_zero(temps_used.l, nb_temps);
1337                 op->opc = INDEX_op_brcond_i32;
1338                 op->args[0] = op->args[1];
1339                 op->args[1] = op->args[3];
1340                 op->args[2] = op->args[4];
1341                 op->args[3] = op->args[5];
1342             } else if (op->args[4] == TCG_COND_EQ) {
1343                 /* Simplify EQ comparisons where one of the pairs
1344                    can be simplified.  */
1345                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1346                                                op->args[0], op->args[2],
1347                                                TCG_COND_EQ);
1348                 if (tmp == 0) {
1349                     goto do_brcond_false;
1350                 } else if (tmp == 1) {
1351                     goto do_brcond_high;
1352                 }
1353                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1354                                                op->args[1], op->args[3],
1355                                                TCG_COND_EQ);
1356                 if (tmp == 0) {
1357                     goto do_brcond_false;
1358                 } else if (tmp != 1) {
1359                     goto do_default;
1360                 }
1361             do_brcond_low:
1362                 bitmap_zero(temps_used.l, nb_temps);
1363                 op->opc = INDEX_op_brcond_i32;
1364                 op->args[1] = op->args[2];
1365                 op->args[2] = op->args[4];
1366                 op->args[3] = op->args[5];
1367             } else if (op->args[4] == TCG_COND_NE) {
1368                 /* Simplify NE comparisons where one of the pairs
1369                    can be simplified.  */
1370                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1371                                                op->args[0], op->args[2],
1372                                                TCG_COND_NE);
1373                 if (tmp == 0) {
1374                     goto do_brcond_high;
1375                 } else if (tmp == 1) {
1376                     goto do_brcond_true;
1377                 }
1378                 tmp = do_constant_folding_cond(INDEX_op_brcond_i32,
1379                                                op->args[1], op->args[3],
1380                                                TCG_COND_NE);
1381                 if (tmp == 0) {
1382                     goto do_brcond_low;
1383                 } else if (tmp == 1) {
1384                     goto do_brcond_true;
1385                 }
1386                 goto do_default;
1387             } else {
1388                 goto do_default;
1389             }
1390             break;
1391 
1392         case INDEX_op_setcond2_i32:
1393             tmp = do_constant_folding_cond2(&op->args[1], &op->args[3],
1394                                             op->args[5]);
1395             if (tmp != 2) {
1396             do_setcond_const:
1397                 tcg_opt_gen_movi(s, op, op->args[0], tmp);
1398             } else if ((op->args[5] == TCG_COND_LT
1399                         || op->args[5] == TCG_COND_GE)
1400                        && arg_is_const(op->args[3])
1401                        && arg_info(op->args[3])->val == 0
1402                        && arg_is_const(op->args[4])
1403                        && arg_info(op->args[4])->val == 0) {
1404                 /* Simplify LT/GE comparisons vs zero to a single compare
1405                    vs the high word of the input.  */
1406             do_setcond_high:
1407                 reset_temp(op->args[0]);
1408                 arg_info(op->args[0])->mask = 1;
1409                 op->opc = INDEX_op_setcond_i32;
1410                 op->args[1] = op->args[2];
1411                 op->args[2] = op->args[4];
1412                 op->args[3] = op->args[5];
1413             } else if (op->args[5] == TCG_COND_EQ) {
1414                 /* Simplify EQ comparisons where one of the pairs
1415                    can be simplified.  */
1416                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1417                                                op->args[1], op->args[3],
1418                                                TCG_COND_EQ);
1419                 if (tmp == 0) {
1420                     goto do_setcond_const;
1421                 } else if (tmp == 1) {
1422                     goto do_setcond_high;
1423                 }
1424                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1425                                                op->args[2], op->args[4],
1426                                                TCG_COND_EQ);
1427                 if (tmp == 0) {
1428                     goto do_setcond_high;
1429                 } else if (tmp != 1) {
1430                     goto do_default;
1431                 }
1432             do_setcond_low:
1433                 reset_temp(op->args[0]);
1434                 arg_info(op->args[0])->mask = 1;
1435                 op->opc = INDEX_op_setcond_i32;
1436                 op->args[2] = op->args[3];
1437                 op->args[3] = op->args[5];
1438             } else if (op->args[5] == TCG_COND_NE) {
1439                 /* Simplify NE comparisons where one of the pairs
1440                    can be simplified.  */
1441                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1442                                                op->args[1], op->args[3],
1443                                                TCG_COND_NE);
1444                 if (tmp == 0) {
1445                     goto do_setcond_high;
1446                 } else if (tmp == 1) {
1447                     goto do_setcond_const;
1448                 }
1449                 tmp = do_constant_folding_cond(INDEX_op_setcond_i32,
1450                                                op->args[2], op->args[4],
1451                                                TCG_COND_NE);
1452                 if (tmp == 0) {
1453                     goto do_setcond_low;
1454                 } else if (tmp == 1) {
1455                     goto do_setcond_const;
1456                 }
1457                 goto do_default;
1458             } else {
1459                 goto do_default;
1460             }
1461             break;
1462 
1463         case INDEX_op_call:
1464             if (!(op->args[nb_oargs + nb_iargs + 1]
1465                   & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1466                 for (i = 0; i < nb_globals; i++) {
1467                     if (test_bit(i, temps_used.l)) {
1468                         reset_ts(&s->temps[i]);
1469                     }
1470                 }
1471             }
1472             goto do_reset_output;
1473 
1474         default:
1475         do_default:
1476             /* Default case: we know nothing about operation (or were unable
1477                to compute the operation result) so no propagation is done.
1478                We trash everything if the operation is the end of a basic
1479                block, otherwise we only trash the output args.  "mask" is
1480                the non-zero bits mask for the first output arg.  */
1481             if (def->flags & TCG_OPF_BB_END) {
1482                 bitmap_zero(temps_used.l, nb_temps);
1483             } else {
1484         do_reset_output:
1485                 for (i = 0; i < nb_oargs; i++) {
1486                     reset_temp(op->args[i]);
1487                     /* Save the corresponding known-zero bits mask for the
1488                        first output argument (only one supported so far). */
1489                     if (i == 0) {
1490                         arg_info(op->args[i])->mask = mask;
1491                     }
1492                 }
1493             }
1494             break;
1495         }
1496 
1497         /* Eliminate duplicate and redundant fence instructions.  */
1498         if (prev_mb) {
1499             switch (opc) {
1500             case INDEX_op_mb:
1501                 /* Merge two barriers of the same type into one,
1502                  * or a weaker barrier into a stronger one,
1503                  * or two weaker barriers into a stronger one.
1504                  *   mb X; mb Y => mb X|Y
1505                  *   mb; strl => mb; st
1506                  *   ldaq; mb => ld; mb
1507                  *   ldaq; strl => ld; mb; st
1508                  * Other combinations are also merged into a strong
1509                  * barrier.  This is stricter than specified but for
1510                  * the purposes of TCG is better than not optimizing.
1511                  */
1512                 prev_mb->args[0] |= op->args[0];
1513                 tcg_op_remove(s, op);
1514                 break;
1515 
1516             default:
1517                 /* Opcodes that end the block stop the optimization.  */
1518                 if ((def->flags & TCG_OPF_BB_END) == 0) {
1519                     break;
1520                 }
1521                 /* fallthru */
1522             case INDEX_op_qemu_ld_i32:
1523             case INDEX_op_qemu_ld_i64:
1524             case INDEX_op_qemu_st_i32:
1525             case INDEX_op_qemu_st_i64:
1526             case INDEX_op_call:
1527                 /* Opcodes that touch guest memory stop the optimization.  */
1528                 prev_mb = NULL;
1529                 break;
1530             }
1531         } else if (opc == INDEX_op_mb) {
1532             prev_mb = op;
1533         }
1534     }
1535 }
1536