xref: /qemu/tcg/optimize.c (revision 940bb5fa)
1 /*
2  * Optimizations for Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2010 Samsung Electronics.
5  * Contributed by Kirill Batuzov <batuzovk@ispras.ru>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a copy
8  * of this software and associated documentation files (the "Software"), to deal
9  * in the Software without restriction, including without limitation the rights
10  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11  * copies of the Software, and to permit persons to whom the Software is
12  * furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice shall be included in
15  * all copies or substantial portions of the Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
23  * THE SOFTWARE.
24  */
25 
26 #include "qemu/osdep.h"
27 #include "qemu/int128.h"
28 #include "qemu/interval-tree.h"
29 #include "tcg/tcg-op-common.h"
30 #include "tcg-internal.h"
31 
32 #define CASE_OP_32_64(x)                        \
33         glue(glue(case INDEX_op_, x), _i32):    \
34         glue(glue(case INDEX_op_, x), _i64)
35 
36 #define CASE_OP_32_64_VEC(x)                    \
37         glue(glue(case INDEX_op_, x), _i32):    \
38         glue(glue(case INDEX_op_, x), _i64):    \
39         glue(glue(case INDEX_op_, x), _vec)
40 
41 typedef struct MemCopyInfo {
42     IntervalTreeNode itree;
43     QSIMPLEQ_ENTRY (MemCopyInfo) next;
44     TCGTemp *ts;
45     TCGType type;
46 } MemCopyInfo;
47 
48 typedef struct TempOptInfo {
49     bool is_const;
50     TCGTemp *prev_copy;
51     TCGTemp *next_copy;
52     QSIMPLEQ_HEAD(, MemCopyInfo) mem_copy;
53     uint64_t val;
54     uint64_t z_mask;  /* mask bit is 0 if and only if value bit is 0 */
55     uint64_t s_mask;  /* a left-aligned mask of clrsb(value) bits. */
56 } TempOptInfo;
57 
58 typedef struct OptContext {
59     TCGContext *tcg;
60     TCGOp *prev_mb;
61     TCGTempSet temps_used;
62 
63     IntervalTreeRoot mem_copy;
64     QSIMPLEQ_HEAD(, MemCopyInfo) mem_free;
65 
66     /* In flight values from optimization. */
67     uint64_t a_mask;  /* mask bit is 0 iff value identical to first input */
68     uint64_t z_mask;  /* mask bit is 0 iff value bit is 0 */
69     uint64_t s_mask;  /* mask of clrsb(value) bits */
70     TCGType type;
71 } OptContext;
72 
73 /* Calculate the smask for a specific value. */
74 static uint64_t smask_from_value(uint64_t value)
75 {
76     int rep = clrsb64(value);
77     return ~(~0ull >> rep);
78 }
79 
80 /*
81  * Calculate the smask for a given set of known-zeros.
82  * If there are lots of zeros on the left, we can consider the remainder
83  * an unsigned field, and thus the corresponding signed field is one bit
84  * larger.
85  */
86 static uint64_t smask_from_zmask(uint64_t zmask)
87 {
88     /*
89      * Only the 0 bits are significant for zmask, thus the msb itself
90      * must be zero, else we have no sign information.
91      */
92     int rep = clz64(zmask);
93     if (rep == 0) {
94         return 0;
95     }
96     rep -= 1;
97     return ~(~0ull >> rep);
98 }
99 
100 /*
101  * Recreate a properly left-aligned smask after manipulation.
102  * Some bit-shuffling, particularly shifts and rotates, may
103  * retain sign bits on the left, but may scatter disconnected
104  * sign bits on the right.  Retain only what remains to the left.
105  */
106 static uint64_t smask_from_smask(int64_t smask)
107 {
108     /* Only the 1 bits are significant for smask */
109     return smask_from_zmask(~smask);
110 }
111 
112 static inline TempOptInfo *ts_info(TCGTemp *ts)
113 {
114     return ts->state_ptr;
115 }
116 
117 static inline TempOptInfo *arg_info(TCGArg arg)
118 {
119     return ts_info(arg_temp(arg));
120 }
121 
122 static inline bool ts_is_const(TCGTemp *ts)
123 {
124     return ts_info(ts)->is_const;
125 }
126 
127 static inline bool arg_is_const(TCGArg arg)
128 {
129     return ts_is_const(arg_temp(arg));
130 }
131 
132 static inline bool ts_is_copy(TCGTemp *ts)
133 {
134     return ts_info(ts)->next_copy != ts;
135 }
136 
137 static TCGTemp *cmp_better_copy(TCGTemp *a, TCGTemp *b)
138 {
139     return a->kind < b->kind ? b : a;
140 }
141 
142 /* Initialize and activate a temporary.  */
143 static void init_ts_info(OptContext *ctx, TCGTemp *ts)
144 {
145     size_t idx = temp_idx(ts);
146     TempOptInfo *ti;
147 
148     if (test_bit(idx, ctx->temps_used.l)) {
149         return;
150     }
151     set_bit(idx, ctx->temps_used.l);
152 
153     ti = ts->state_ptr;
154     if (ti == NULL) {
155         ti = tcg_malloc(sizeof(TempOptInfo));
156         ts->state_ptr = ti;
157     }
158 
159     ti->next_copy = ts;
160     ti->prev_copy = ts;
161     QSIMPLEQ_INIT(&ti->mem_copy);
162     if (ts->kind == TEMP_CONST) {
163         ti->is_const = true;
164         ti->val = ts->val;
165         ti->z_mask = ts->val;
166         ti->s_mask = smask_from_value(ts->val);
167     } else {
168         ti->is_const = false;
169         ti->z_mask = -1;
170         ti->s_mask = 0;
171     }
172 }
173 
174 static MemCopyInfo *mem_copy_first(OptContext *ctx, intptr_t s, intptr_t l)
175 {
176     IntervalTreeNode *r = interval_tree_iter_first(&ctx->mem_copy, s, l);
177     return r ? container_of(r, MemCopyInfo, itree) : NULL;
178 }
179 
180 static MemCopyInfo *mem_copy_next(MemCopyInfo *mem, intptr_t s, intptr_t l)
181 {
182     IntervalTreeNode *r = interval_tree_iter_next(&mem->itree, s, l);
183     return r ? container_of(r, MemCopyInfo, itree) : NULL;
184 }
185 
186 static void remove_mem_copy(OptContext *ctx, MemCopyInfo *mc)
187 {
188     TCGTemp *ts = mc->ts;
189     TempOptInfo *ti = ts_info(ts);
190 
191     interval_tree_remove(&mc->itree, &ctx->mem_copy);
192     QSIMPLEQ_REMOVE(&ti->mem_copy, mc, MemCopyInfo, next);
193     QSIMPLEQ_INSERT_TAIL(&ctx->mem_free, mc, next);
194 }
195 
196 static void remove_mem_copy_in(OptContext *ctx, intptr_t s, intptr_t l)
197 {
198     while (true) {
199         MemCopyInfo *mc = mem_copy_first(ctx, s, l);
200         if (!mc) {
201             break;
202         }
203         remove_mem_copy(ctx, mc);
204     }
205 }
206 
207 static void remove_mem_copy_all(OptContext *ctx)
208 {
209     remove_mem_copy_in(ctx, 0, -1);
210     tcg_debug_assert(interval_tree_is_empty(&ctx->mem_copy));
211 }
212 
213 static TCGTemp *find_better_copy(TCGTemp *ts)
214 {
215     TCGTemp *i, *ret;
216 
217     /* If this is already readonly, we can't do better. */
218     if (temp_readonly(ts)) {
219         return ts;
220     }
221 
222     ret = ts;
223     for (i = ts_info(ts)->next_copy; i != ts; i = ts_info(i)->next_copy) {
224         ret = cmp_better_copy(ret, i);
225     }
226     return ret;
227 }
228 
229 static void move_mem_copies(TCGTemp *dst_ts, TCGTemp *src_ts)
230 {
231     TempOptInfo *si = ts_info(src_ts);
232     TempOptInfo *di = ts_info(dst_ts);
233     MemCopyInfo *mc;
234 
235     QSIMPLEQ_FOREACH(mc, &si->mem_copy, next) {
236         tcg_debug_assert(mc->ts == src_ts);
237         mc->ts = dst_ts;
238     }
239     QSIMPLEQ_CONCAT(&di->mem_copy, &si->mem_copy);
240 }
241 
242 /* Reset TEMP's state, possibly removing the temp for the list of copies.  */
243 static void reset_ts(OptContext *ctx, TCGTemp *ts)
244 {
245     TempOptInfo *ti = ts_info(ts);
246     TCGTemp *pts = ti->prev_copy;
247     TCGTemp *nts = ti->next_copy;
248     TempOptInfo *pi = ts_info(pts);
249     TempOptInfo *ni = ts_info(nts);
250 
251     ni->prev_copy = ti->prev_copy;
252     pi->next_copy = ti->next_copy;
253     ti->next_copy = ts;
254     ti->prev_copy = ts;
255     ti->is_const = false;
256     ti->z_mask = -1;
257     ti->s_mask = 0;
258 
259     if (!QSIMPLEQ_EMPTY(&ti->mem_copy)) {
260         if (ts == nts) {
261             /* Last temp copy being removed, the mem copies die. */
262             MemCopyInfo *mc;
263             QSIMPLEQ_FOREACH(mc, &ti->mem_copy, next) {
264                 interval_tree_remove(&mc->itree, &ctx->mem_copy);
265             }
266             QSIMPLEQ_CONCAT(&ctx->mem_free, &ti->mem_copy);
267         } else {
268             move_mem_copies(find_better_copy(nts), ts);
269         }
270     }
271 }
272 
273 static void reset_temp(OptContext *ctx, TCGArg arg)
274 {
275     reset_ts(ctx, arg_temp(arg));
276 }
277 
278 static void record_mem_copy(OptContext *ctx, TCGType type,
279                             TCGTemp *ts, intptr_t start, intptr_t last)
280 {
281     MemCopyInfo *mc;
282     TempOptInfo *ti;
283 
284     mc = QSIMPLEQ_FIRST(&ctx->mem_free);
285     if (mc) {
286         QSIMPLEQ_REMOVE_HEAD(&ctx->mem_free, next);
287     } else {
288         mc = tcg_malloc(sizeof(*mc));
289     }
290 
291     memset(mc, 0, sizeof(*mc));
292     mc->itree.start = start;
293     mc->itree.last = last;
294     mc->type = type;
295     interval_tree_insert(&mc->itree, &ctx->mem_copy);
296 
297     ts = find_better_copy(ts);
298     ti = ts_info(ts);
299     mc->ts = ts;
300     QSIMPLEQ_INSERT_TAIL(&ti->mem_copy, mc, next);
301 }
302 
303 static bool ts_are_copies(TCGTemp *ts1, TCGTemp *ts2)
304 {
305     TCGTemp *i;
306 
307     if (ts1 == ts2) {
308         return true;
309     }
310 
311     if (!ts_is_copy(ts1) || !ts_is_copy(ts2)) {
312         return false;
313     }
314 
315     for (i = ts_info(ts1)->next_copy; i != ts1; i = ts_info(i)->next_copy) {
316         if (i == ts2) {
317             return true;
318         }
319     }
320 
321     return false;
322 }
323 
324 static bool args_are_copies(TCGArg arg1, TCGArg arg2)
325 {
326     return ts_are_copies(arg_temp(arg1), arg_temp(arg2));
327 }
328 
329 static TCGTemp *find_mem_copy_for(OptContext *ctx, TCGType type, intptr_t s)
330 {
331     MemCopyInfo *mc;
332 
333     for (mc = mem_copy_first(ctx, s, s); mc; mc = mem_copy_next(mc, s, s)) {
334         if (mc->itree.start == s && mc->type == type) {
335             return find_better_copy(mc->ts);
336         }
337     }
338     return NULL;
339 }
340 
341 static TCGArg arg_new_constant(OptContext *ctx, uint64_t val)
342 {
343     TCGType type = ctx->type;
344     TCGTemp *ts;
345 
346     if (type == TCG_TYPE_I32) {
347         val = (int32_t)val;
348     }
349 
350     ts = tcg_constant_internal(type, val);
351     init_ts_info(ctx, ts);
352 
353     return temp_arg(ts);
354 }
355 
356 static bool tcg_opt_gen_mov(OptContext *ctx, TCGOp *op, TCGArg dst, TCGArg src)
357 {
358     TCGTemp *dst_ts = arg_temp(dst);
359     TCGTemp *src_ts = arg_temp(src);
360     TempOptInfo *di;
361     TempOptInfo *si;
362     TCGOpcode new_op;
363 
364     if (ts_are_copies(dst_ts, src_ts)) {
365         tcg_op_remove(ctx->tcg, op);
366         return true;
367     }
368 
369     reset_ts(ctx, dst_ts);
370     di = ts_info(dst_ts);
371     si = ts_info(src_ts);
372 
373     switch (ctx->type) {
374     case TCG_TYPE_I32:
375         new_op = INDEX_op_mov_i32;
376         break;
377     case TCG_TYPE_I64:
378         new_op = INDEX_op_mov_i64;
379         break;
380     case TCG_TYPE_V64:
381     case TCG_TYPE_V128:
382     case TCG_TYPE_V256:
383         /* TCGOP_VECL and TCGOP_VECE remain unchanged.  */
384         new_op = INDEX_op_mov_vec;
385         break;
386     default:
387         g_assert_not_reached();
388     }
389     op->opc = new_op;
390     op->args[0] = dst;
391     op->args[1] = src;
392 
393     di->z_mask = si->z_mask;
394     di->s_mask = si->s_mask;
395 
396     if (src_ts->type == dst_ts->type) {
397         TempOptInfo *ni = ts_info(si->next_copy);
398 
399         di->next_copy = si->next_copy;
400         di->prev_copy = src_ts;
401         ni->prev_copy = dst_ts;
402         si->next_copy = dst_ts;
403         di->is_const = si->is_const;
404         di->val = si->val;
405 
406         if (!QSIMPLEQ_EMPTY(&si->mem_copy)
407             && cmp_better_copy(src_ts, dst_ts) == dst_ts) {
408             move_mem_copies(dst_ts, src_ts);
409         }
410     }
411     return true;
412 }
413 
414 static bool tcg_opt_gen_movi(OptContext *ctx, TCGOp *op,
415                              TCGArg dst, uint64_t val)
416 {
417     /* Convert movi to mov with constant temp. */
418     return tcg_opt_gen_mov(ctx, op, dst, arg_new_constant(ctx, val));
419 }
420 
421 static uint64_t do_constant_folding_2(TCGOpcode op, uint64_t x, uint64_t y)
422 {
423     uint64_t l64, h64;
424 
425     switch (op) {
426     CASE_OP_32_64(add):
427         return x + y;
428 
429     CASE_OP_32_64(sub):
430         return x - y;
431 
432     CASE_OP_32_64(mul):
433         return x * y;
434 
435     CASE_OP_32_64_VEC(and):
436         return x & y;
437 
438     CASE_OP_32_64_VEC(or):
439         return x | y;
440 
441     CASE_OP_32_64_VEC(xor):
442         return x ^ y;
443 
444     case INDEX_op_shl_i32:
445         return (uint32_t)x << (y & 31);
446 
447     case INDEX_op_shl_i64:
448         return (uint64_t)x << (y & 63);
449 
450     case INDEX_op_shr_i32:
451         return (uint32_t)x >> (y & 31);
452 
453     case INDEX_op_shr_i64:
454         return (uint64_t)x >> (y & 63);
455 
456     case INDEX_op_sar_i32:
457         return (int32_t)x >> (y & 31);
458 
459     case INDEX_op_sar_i64:
460         return (int64_t)x >> (y & 63);
461 
462     case INDEX_op_rotr_i32:
463         return ror32(x, y & 31);
464 
465     case INDEX_op_rotr_i64:
466         return ror64(x, y & 63);
467 
468     case INDEX_op_rotl_i32:
469         return rol32(x, y & 31);
470 
471     case INDEX_op_rotl_i64:
472         return rol64(x, y & 63);
473 
474     CASE_OP_32_64_VEC(not):
475         return ~x;
476 
477     CASE_OP_32_64(neg):
478         return -x;
479 
480     CASE_OP_32_64_VEC(andc):
481         return x & ~y;
482 
483     CASE_OP_32_64_VEC(orc):
484         return x | ~y;
485 
486     CASE_OP_32_64_VEC(eqv):
487         return ~(x ^ y);
488 
489     CASE_OP_32_64_VEC(nand):
490         return ~(x & y);
491 
492     CASE_OP_32_64_VEC(nor):
493         return ~(x | y);
494 
495     case INDEX_op_clz_i32:
496         return (uint32_t)x ? clz32(x) : y;
497 
498     case INDEX_op_clz_i64:
499         return x ? clz64(x) : y;
500 
501     case INDEX_op_ctz_i32:
502         return (uint32_t)x ? ctz32(x) : y;
503 
504     case INDEX_op_ctz_i64:
505         return x ? ctz64(x) : y;
506 
507     case INDEX_op_ctpop_i32:
508         return ctpop32(x);
509 
510     case INDEX_op_ctpop_i64:
511         return ctpop64(x);
512 
513     CASE_OP_32_64(ext8s):
514         return (int8_t)x;
515 
516     CASE_OP_32_64(ext16s):
517         return (int16_t)x;
518 
519     CASE_OP_32_64(ext8u):
520         return (uint8_t)x;
521 
522     CASE_OP_32_64(ext16u):
523         return (uint16_t)x;
524 
525     CASE_OP_32_64(bswap16):
526         x = bswap16(x);
527         return y & TCG_BSWAP_OS ? (int16_t)x : x;
528 
529     CASE_OP_32_64(bswap32):
530         x = bswap32(x);
531         return y & TCG_BSWAP_OS ? (int32_t)x : x;
532 
533     case INDEX_op_bswap64_i64:
534         return bswap64(x);
535 
536     case INDEX_op_ext_i32_i64:
537     case INDEX_op_ext32s_i64:
538         return (int32_t)x;
539 
540     case INDEX_op_extu_i32_i64:
541     case INDEX_op_extrl_i64_i32:
542     case INDEX_op_ext32u_i64:
543         return (uint32_t)x;
544 
545     case INDEX_op_extrh_i64_i32:
546         return (uint64_t)x >> 32;
547 
548     case INDEX_op_muluh_i32:
549         return ((uint64_t)(uint32_t)x * (uint32_t)y) >> 32;
550     case INDEX_op_mulsh_i32:
551         return ((int64_t)(int32_t)x * (int32_t)y) >> 32;
552 
553     case INDEX_op_muluh_i64:
554         mulu64(&l64, &h64, x, y);
555         return h64;
556     case INDEX_op_mulsh_i64:
557         muls64(&l64, &h64, x, y);
558         return h64;
559 
560     case INDEX_op_div_i32:
561         /* Avoid crashing on divide by zero, otherwise undefined.  */
562         return (int32_t)x / ((int32_t)y ? : 1);
563     case INDEX_op_divu_i32:
564         return (uint32_t)x / ((uint32_t)y ? : 1);
565     case INDEX_op_div_i64:
566         return (int64_t)x / ((int64_t)y ? : 1);
567     case INDEX_op_divu_i64:
568         return (uint64_t)x / ((uint64_t)y ? : 1);
569 
570     case INDEX_op_rem_i32:
571         return (int32_t)x % ((int32_t)y ? : 1);
572     case INDEX_op_remu_i32:
573         return (uint32_t)x % ((uint32_t)y ? : 1);
574     case INDEX_op_rem_i64:
575         return (int64_t)x % ((int64_t)y ? : 1);
576     case INDEX_op_remu_i64:
577         return (uint64_t)x % ((uint64_t)y ? : 1);
578 
579     default:
580         g_assert_not_reached();
581     }
582 }
583 
584 static uint64_t do_constant_folding(TCGOpcode op, TCGType type,
585                                     uint64_t x, uint64_t y)
586 {
587     uint64_t res = do_constant_folding_2(op, x, y);
588     if (type == TCG_TYPE_I32) {
589         res = (int32_t)res;
590     }
591     return res;
592 }
593 
594 static bool do_constant_folding_cond_32(uint32_t x, uint32_t y, TCGCond c)
595 {
596     switch (c) {
597     case TCG_COND_EQ:
598         return x == y;
599     case TCG_COND_NE:
600         return x != y;
601     case TCG_COND_LT:
602         return (int32_t)x < (int32_t)y;
603     case TCG_COND_GE:
604         return (int32_t)x >= (int32_t)y;
605     case TCG_COND_LE:
606         return (int32_t)x <= (int32_t)y;
607     case TCG_COND_GT:
608         return (int32_t)x > (int32_t)y;
609     case TCG_COND_LTU:
610         return x < y;
611     case TCG_COND_GEU:
612         return x >= y;
613     case TCG_COND_LEU:
614         return x <= y;
615     case TCG_COND_GTU:
616         return x > y;
617     default:
618         g_assert_not_reached();
619     }
620 }
621 
622 static bool do_constant_folding_cond_64(uint64_t x, uint64_t y, TCGCond c)
623 {
624     switch (c) {
625     case TCG_COND_EQ:
626         return x == y;
627     case TCG_COND_NE:
628         return x != y;
629     case TCG_COND_LT:
630         return (int64_t)x < (int64_t)y;
631     case TCG_COND_GE:
632         return (int64_t)x >= (int64_t)y;
633     case TCG_COND_LE:
634         return (int64_t)x <= (int64_t)y;
635     case TCG_COND_GT:
636         return (int64_t)x > (int64_t)y;
637     case TCG_COND_LTU:
638         return x < y;
639     case TCG_COND_GEU:
640         return x >= y;
641     case TCG_COND_LEU:
642         return x <= y;
643     case TCG_COND_GTU:
644         return x > y;
645     default:
646         g_assert_not_reached();
647     }
648 }
649 
650 static bool do_constant_folding_cond_eq(TCGCond c)
651 {
652     switch (c) {
653     case TCG_COND_GT:
654     case TCG_COND_LTU:
655     case TCG_COND_LT:
656     case TCG_COND_GTU:
657     case TCG_COND_NE:
658         return 0;
659     case TCG_COND_GE:
660     case TCG_COND_GEU:
661     case TCG_COND_LE:
662     case TCG_COND_LEU:
663     case TCG_COND_EQ:
664         return 1;
665     default:
666         g_assert_not_reached();
667     }
668 }
669 
670 /*
671  * Return -1 if the condition can't be simplified,
672  * and the result of the condition (0 or 1) if it can.
673  */
674 static int do_constant_folding_cond(TCGType type, TCGArg x,
675                                     TCGArg y, TCGCond c)
676 {
677     if (arg_is_const(x) && arg_is_const(y)) {
678         uint64_t xv = arg_info(x)->val;
679         uint64_t yv = arg_info(y)->val;
680 
681         switch (type) {
682         case TCG_TYPE_I32:
683             return do_constant_folding_cond_32(xv, yv, c);
684         case TCG_TYPE_I64:
685             return do_constant_folding_cond_64(xv, yv, c);
686         default:
687             /* Only scalar comparisons are optimizable */
688             return -1;
689         }
690     } else if (args_are_copies(x, y)) {
691         return do_constant_folding_cond_eq(c);
692     } else if (arg_is_const(y) && arg_info(y)->val == 0) {
693         switch (c) {
694         case TCG_COND_LTU:
695             return 0;
696         case TCG_COND_GEU:
697             return 1;
698         default:
699             return -1;
700         }
701     }
702     return -1;
703 }
704 
705 /*
706  * Return -1 if the condition can't be simplified,
707  * and the result of the condition (0 or 1) if it can.
708  */
709 static int do_constant_folding_cond2(TCGArg *p1, TCGArg *p2, TCGCond c)
710 {
711     TCGArg al = p1[0], ah = p1[1];
712     TCGArg bl = p2[0], bh = p2[1];
713 
714     if (arg_is_const(bl) && arg_is_const(bh)) {
715         tcg_target_ulong blv = arg_info(bl)->val;
716         tcg_target_ulong bhv = arg_info(bh)->val;
717         uint64_t b = deposit64(blv, 32, 32, bhv);
718 
719         if (arg_is_const(al) && arg_is_const(ah)) {
720             tcg_target_ulong alv = arg_info(al)->val;
721             tcg_target_ulong ahv = arg_info(ah)->val;
722             uint64_t a = deposit64(alv, 32, 32, ahv);
723             return do_constant_folding_cond_64(a, b, c);
724         }
725         if (b == 0) {
726             switch (c) {
727             case TCG_COND_LTU:
728                 return 0;
729             case TCG_COND_GEU:
730                 return 1;
731             default:
732                 break;
733             }
734         }
735     }
736     if (args_are_copies(al, bl) && args_are_copies(ah, bh)) {
737         return do_constant_folding_cond_eq(c);
738     }
739     return -1;
740 }
741 
742 /**
743  * swap_commutative:
744  * @dest: TCGArg of the destination argument, or NO_DEST.
745  * @p1: first paired argument
746  * @p2: second paired argument
747  *
748  * If *@p1 is a constant and *@p2 is not, swap.
749  * If *@p2 matches @dest, swap.
750  * Return true if a swap was performed.
751  */
752 
753 #define NO_DEST  temp_arg(NULL)
754 
755 static bool swap_commutative(TCGArg dest, TCGArg *p1, TCGArg *p2)
756 {
757     TCGArg a1 = *p1, a2 = *p2;
758     int sum = 0;
759     sum += arg_is_const(a1);
760     sum -= arg_is_const(a2);
761 
762     /* Prefer the constant in second argument, and then the form
763        op a, a, b, which is better handled on non-RISC hosts. */
764     if (sum > 0 || (sum == 0 && dest == a2)) {
765         *p1 = a2;
766         *p2 = a1;
767         return true;
768     }
769     return false;
770 }
771 
772 static bool swap_commutative2(TCGArg *p1, TCGArg *p2)
773 {
774     int sum = 0;
775     sum += arg_is_const(p1[0]);
776     sum += arg_is_const(p1[1]);
777     sum -= arg_is_const(p2[0]);
778     sum -= arg_is_const(p2[1]);
779     if (sum > 0) {
780         TCGArg t;
781         t = p1[0], p1[0] = p2[0], p2[0] = t;
782         t = p1[1], p1[1] = p2[1], p2[1] = t;
783         return true;
784     }
785     return false;
786 }
787 
788 static void init_arguments(OptContext *ctx, TCGOp *op, int nb_args)
789 {
790     for (int i = 0; i < nb_args; i++) {
791         TCGTemp *ts = arg_temp(op->args[i]);
792         init_ts_info(ctx, ts);
793     }
794 }
795 
796 static void copy_propagate(OptContext *ctx, TCGOp *op,
797                            int nb_oargs, int nb_iargs)
798 {
799     for (int i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
800         TCGTemp *ts = arg_temp(op->args[i]);
801         if (ts_is_copy(ts)) {
802             op->args[i] = temp_arg(find_better_copy(ts));
803         }
804     }
805 }
806 
807 static void finish_folding(OptContext *ctx, TCGOp *op)
808 {
809     const TCGOpDef *def = &tcg_op_defs[op->opc];
810     int i, nb_oargs;
811 
812     /*
813      * We only optimize extended basic blocks.  If the opcode ends a BB
814      * and is not a conditional branch, reset all temp data.
815      */
816     if (def->flags & TCG_OPF_BB_END) {
817         ctx->prev_mb = NULL;
818         if (!(def->flags & TCG_OPF_COND_BRANCH)) {
819             memset(&ctx->temps_used, 0, sizeof(ctx->temps_used));
820             remove_mem_copy_all(ctx);
821         }
822         return;
823     }
824 
825     nb_oargs = def->nb_oargs;
826     for (i = 0; i < nb_oargs; i++) {
827         TCGTemp *ts = arg_temp(op->args[i]);
828         reset_ts(ctx, ts);
829         /*
830          * Save the corresponding known-zero/sign bits mask for the
831          * first output argument (only one supported so far).
832          */
833         if (i == 0) {
834             ts_info(ts)->z_mask = ctx->z_mask;
835             ts_info(ts)->s_mask = ctx->s_mask;
836         }
837     }
838 }
839 
840 /*
841  * The fold_* functions return true when processing is complete,
842  * usually by folding the operation to a constant or to a copy,
843  * and calling tcg_opt_gen_{mov,movi}.  They may do other things,
844  * like collect information about the value produced, for use in
845  * optimizing a subsequent operation.
846  *
847  * These first fold_* functions are all helpers, used by other
848  * folders for more specific operations.
849  */
850 
851 static bool fold_const1(OptContext *ctx, TCGOp *op)
852 {
853     if (arg_is_const(op->args[1])) {
854         uint64_t t;
855 
856         t = arg_info(op->args[1])->val;
857         t = do_constant_folding(op->opc, ctx->type, t, 0);
858         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
859     }
860     return false;
861 }
862 
863 static bool fold_const2(OptContext *ctx, TCGOp *op)
864 {
865     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
866         uint64_t t1 = arg_info(op->args[1])->val;
867         uint64_t t2 = arg_info(op->args[2])->val;
868 
869         t1 = do_constant_folding(op->opc, ctx->type, t1, t2);
870         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
871     }
872     return false;
873 }
874 
875 static bool fold_commutative(OptContext *ctx, TCGOp *op)
876 {
877     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
878     return false;
879 }
880 
881 static bool fold_const2_commutative(OptContext *ctx, TCGOp *op)
882 {
883     swap_commutative(op->args[0], &op->args[1], &op->args[2]);
884     return fold_const2(ctx, op);
885 }
886 
887 static bool fold_masks(OptContext *ctx, TCGOp *op)
888 {
889     uint64_t a_mask = ctx->a_mask;
890     uint64_t z_mask = ctx->z_mask;
891     uint64_t s_mask = ctx->s_mask;
892 
893     /*
894      * 32-bit ops generate 32-bit results, which for the purpose of
895      * simplifying tcg are sign-extended.  Certainly that's how we
896      * represent our constants elsewhere.  Note that the bits will
897      * be reset properly for a 64-bit value when encountering the
898      * type changing opcodes.
899      */
900     if (ctx->type == TCG_TYPE_I32) {
901         a_mask = (int32_t)a_mask;
902         z_mask = (int32_t)z_mask;
903         s_mask |= MAKE_64BIT_MASK(32, 32);
904         ctx->z_mask = z_mask;
905         ctx->s_mask = s_mask;
906     }
907 
908     if (z_mask == 0) {
909         return tcg_opt_gen_movi(ctx, op, op->args[0], 0);
910     }
911     if (a_mask == 0) {
912         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
913     }
914     return false;
915 }
916 
917 /*
918  * Convert @op to NOT, if NOT is supported by the host.
919  * Return true f the conversion is successful, which will still
920  * indicate that the processing is complete.
921  */
922 static bool fold_not(OptContext *ctx, TCGOp *op);
923 static bool fold_to_not(OptContext *ctx, TCGOp *op, int idx)
924 {
925     TCGOpcode not_op;
926     bool have_not;
927 
928     switch (ctx->type) {
929     case TCG_TYPE_I32:
930         not_op = INDEX_op_not_i32;
931         have_not = TCG_TARGET_HAS_not_i32;
932         break;
933     case TCG_TYPE_I64:
934         not_op = INDEX_op_not_i64;
935         have_not = TCG_TARGET_HAS_not_i64;
936         break;
937     case TCG_TYPE_V64:
938     case TCG_TYPE_V128:
939     case TCG_TYPE_V256:
940         not_op = INDEX_op_not_vec;
941         have_not = TCG_TARGET_HAS_not_vec;
942         break;
943     default:
944         g_assert_not_reached();
945     }
946     if (have_not) {
947         op->opc = not_op;
948         op->args[1] = op->args[idx];
949         return fold_not(ctx, op);
950     }
951     return false;
952 }
953 
954 /* If the binary operation has first argument @i, fold to @i. */
955 static bool fold_ix_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
956 {
957     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
958         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
959     }
960     return false;
961 }
962 
963 /* If the binary operation has first argument @i, fold to NOT. */
964 static bool fold_ix_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
965 {
966     if (arg_is_const(op->args[1]) && arg_info(op->args[1])->val == i) {
967         return fold_to_not(ctx, op, 2);
968     }
969     return false;
970 }
971 
972 /* If the binary operation has second argument @i, fold to @i. */
973 static bool fold_xi_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
974 {
975     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
976         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
977     }
978     return false;
979 }
980 
981 /* If the binary operation has second argument @i, fold to identity. */
982 static bool fold_xi_to_x(OptContext *ctx, TCGOp *op, uint64_t i)
983 {
984     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
985         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
986     }
987     return false;
988 }
989 
990 /* If the binary operation has second argument @i, fold to NOT. */
991 static bool fold_xi_to_not(OptContext *ctx, TCGOp *op, uint64_t i)
992 {
993     if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == i) {
994         return fold_to_not(ctx, op, 1);
995     }
996     return false;
997 }
998 
999 /* If the binary operation has both arguments equal, fold to @i. */
1000 static bool fold_xx_to_i(OptContext *ctx, TCGOp *op, uint64_t i)
1001 {
1002     if (args_are_copies(op->args[1], op->args[2])) {
1003         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1004     }
1005     return false;
1006 }
1007 
1008 /* If the binary operation has both arguments equal, fold to identity. */
1009 static bool fold_xx_to_x(OptContext *ctx, TCGOp *op)
1010 {
1011     if (args_are_copies(op->args[1], op->args[2])) {
1012         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1013     }
1014     return false;
1015 }
1016 
1017 /*
1018  * These outermost fold_<op> functions are sorted alphabetically.
1019  *
1020  * The ordering of the transformations should be:
1021  *   1) those that produce a constant
1022  *   2) those that produce a copy
1023  *   3) those that produce information about the result value.
1024  */
1025 
1026 static bool fold_add(OptContext *ctx, TCGOp *op)
1027 {
1028     if (fold_const2_commutative(ctx, op) ||
1029         fold_xi_to_x(ctx, op, 0)) {
1030         return true;
1031     }
1032     return false;
1033 }
1034 
1035 /* We cannot as yet do_constant_folding with vectors. */
1036 static bool fold_add_vec(OptContext *ctx, TCGOp *op)
1037 {
1038     if (fold_commutative(ctx, op) ||
1039         fold_xi_to_x(ctx, op, 0)) {
1040         return true;
1041     }
1042     return false;
1043 }
1044 
1045 static bool fold_addsub2(OptContext *ctx, TCGOp *op, bool add)
1046 {
1047     bool a_const = arg_is_const(op->args[2]) && arg_is_const(op->args[3]);
1048     bool b_const = arg_is_const(op->args[4]) && arg_is_const(op->args[5]);
1049 
1050     if (a_const && b_const) {
1051         uint64_t al = arg_info(op->args[2])->val;
1052         uint64_t ah = arg_info(op->args[3])->val;
1053         uint64_t bl = arg_info(op->args[4])->val;
1054         uint64_t bh = arg_info(op->args[5])->val;
1055         TCGArg rl, rh;
1056         TCGOp *op2;
1057 
1058         if (ctx->type == TCG_TYPE_I32) {
1059             uint64_t a = deposit64(al, 32, 32, ah);
1060             uint64_t b = deposit64(bl, 32, 32, bh);
1061 
1062             if (add) {
1063                 a += b;
1064             } else {
1065                 a -= b;
1066             }
1067 
1068             al = sextract64(a, 0, 32);
1069             ah = sextract64(a, 32, 32);
1070         } else {
1071             Int128 a = int128_make128(al, ah);
1072             Int128 b = int128_make128(bl, bh);
1073 
1074             if (add) {
1075                 a = int128_add(a, b);
1076             } else {
1077                 a = int128_sub(a, b);
1078             }
1079 
1080             al = int128_getlo(a);
1081             ah = int128_gethi(a);
1082         }
1083 
1084         rl = op->args[0];
1085         rh = op->args[1];
1086 
1087         /* The proper opcode is supplied by tcg_opt_gen_mov. */
1088         op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1089 
1090         tcg_opt_gen_movi(ctx, op, rl, al);
1091         tcg_opt_gen_movi(ctx, op2, rh, ah);
1092         return true;
1093     }
1094 
1095     /* Fold sub2 r,x,i to add2 r,x,-i */
1096     if (!add && b_const) {
1097         uint64_t bl = arg_info(op->args[4])->val;
1098         uint64_t bh = arg_info(op->args[5])->val;
1099 
1100         /* Negate the two parts without assembling and disassembling. */
1101         bl = -bl;
1102         bh = ~bh + !bl;
1103 
1104         op->opc = (ctx->type == TCG_TYPE_I32
1105                    ? INDEX_op_add2_i32 : INDEX_op_add2_i64);
1106         op->args[4] = arg_new_constant(ctx, bl);
1107         op->args[5] = arg_new_constant(ctx, bh);
1108     }
1109     return false;
1110 }
1111 
1112 static bool fold_add2(OptContext *ctx, TCGOp *op)
1113 {
1114     /* Note that the high and low parts may be independently swapped. */
1115     swap_commutative(op->args[0], &op->args[2], &op->args[4]);
1116     swap_commutative(op->args[1], &op->args[3], &op->args[5]);
1117 
1118     return fold_addsub2(ctx, op, true);
1119 }
1120 
1121 static bool fold_and(OptContext *ctx, TCGOp *op)
1122 {
1123     uint64_t z1, z2;
1124 
1125     if (fold_const2_commutative(ctx, op) ||
1126         fold_xi_to_i(ctx, op, 0) ||
1127         fold_xi_to_x(ctx, op, -1) ||
1128         fold_xx_to_x(ctx, op)) {
1129         return true;
1130     }
1131 
1132     z1 = arg_info(op->args[1])->z_mask;
1133     z2 = arg_info(op->args[2])->z_mask;
1134     ctx->z_mask = z1 & z2;
1135 
1136     /*
1137      * Sign repetitions are perforce all identical, whether they are 1 or 0.
1138      * Bitwise operations preserve the relative quantity of the repetitions.
1139      */
1140     ctx->s_mask = arg_info(op->args[1])->s_mask
1141                 & arg_info(op->args[2])->s_mask;
1142 
1143     /*
1144      * Known-zeros does not imply known-ones.  Therefore unless
1145      * arg2 is constant, we can't infer affected bits from it.
1146      */
1147     if (arg_is_const(op->args[2])) {
1148         ctx->a_mask = z1 & ~z2;
1149     }
1150 
1151     return fold_masks(ctx, op);
1152 }
1153 
1154 static bool fold_andc(OptContext *ctx, TCGOp *op)
1155 {
1156     uint64_t z1;
1157 
1158     if (fold_const2(ctx, op) ||
1159         fold_xx_to_i(ctx, op, 0) ||
1160         fold_xi_to_x(ctx, op, 0) ||
1161         fold_ix_to_not(ctx, op, -1)) {
1162         return true;
1163     }
1164 
1165     z1 = arg_info(op->args[1])->z_mask;
1166 
1167     /*
1168      * Known-zeros does not imply known-ones.  Therefore unless
1169      * arg2 is constant, we can't infer anything from it.
1170      */
1171     if (arg_is_const(op->args[2])) {
1172         uint64_t z2 = ~arg_info(op->args[2])->z_mask;
1173         ctx->a_mask = z1 & ~z2;
1174         z1 &= z2;
1175     }
1176     ctx->z_mask = z1;
1177 
1178     ctx->s_mask = arg_info(op->args[1])->s_mask
1179                 & arg_info(op->args[2])->s_mask;
1180     return fold_masks(ctx, op);
1181 }
1182 
1183 static bool fold_brcond(OptContext *ctx, TCGOp *op)
1184 {
1185     TCGCond cond = op->args[2];
1186     int i;
1187 
1188     if (swap_commutative(NO_DEST, &op->args[0], &op->args[1])) {
1189         op->args[2] = cond = tcg_swap_cond(cond);
1190     }
1191 
1192     i = do_constant_folding_cond(ctx->type, op->args[0], op->args[1], cond);
1193     if (i == 0) {
1194         tcg_op_remove(ctx->tcg, op);
1195         return true;
1196     }
1197     if (i > 0) {
1198         op->opc = INDEX_op_br;
1199         op->args[0] = op->args[3];
1200     }
1201     return false;
1202 }
1203 
1204 static bool fold_brcond2(OptContext *ctx, TCGOp *op)
1205 {
1206     TCGCond cond = op->args[4];
1207     TCGArg label = op->args[5];
1208     int i, inv = 0;
1209 
1210     if (swap_commutative2(&op->args[0], &op->args[2])) {
1211         op->args[4] = cond = tcg_swap_cond(cond);
1212     }
1213 
1214     i = do_constant_folding_cond2(&op->args[0], &op->args[2], cond);
1215     if (i >= 0) {
1216         goto do_brcond_const;
1217     }
1218 
1219     switch (cond) {
1220     case TCG_COND_LT:
1221     case TCG_COND_GE:
1222         /*
1223          * Simplify LT/GE comparisons vs zero to a single compare
1224          * vs the high word of the input.
1225          */
1226         if (arg_is_const(op->args[2]) && arg_info(op->args[2])->val == 0 &&
1227             arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0) {
1228             goto do_brcond_high;
1229         }
1230         break;
1231 
1232     case TCG_COND_NE:
1233         inv = 1;
1234         QEMU_FALLTHROUGH;
1235     case TCG_COND_EQ:
1236         /*
1237          * Simplify EQ/NE comparisons where one of the pairs
1238          * can be simplified.
1239          */
1240         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[0],
1241                                      op->args[2], cond);
1242         switch (i ^ inv) {
1243         case 0:
1244             goto do_brcond_const;
1245         case 1:
1246             goto do_brcond_high;
1247         }
1248 
1249         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
1250                                      op->args[3], cond);
1251         switch (i ^ inv) {
1252         case 0:
1253             goto do_brcond_const;
1254         case 1:
1255             op->opc = INDEX_op_brcond_i32;
1256             op->args[1] = op->args[2];
1257             op->args[2] = cond;
1258             op->args[3] = label;
1259             break;
1260         }
1261         break;
1262 
1263     default:
1264         break;
1265 
1266     do_brcond_high:
1267         op->opc = INDEX_op_brcond_i32;
1268         op->args[0] = op->args[1];
1269         op->args[1] = op->args[3];
1270         op->args[2] = cond;
1271         op->args[3] = label;
1272         break;
1273 
1274     do_brcond_const:
1275         if (i == 0) {
1276             tcg_op_remove(ctx->tcg, op);
1277             return true;
1278         }
1279         op->opc = INDEX_op_br;
1280         op->args[0] = label;
1281         break;
1282     }
1283     return false;
1284 }
1285 
1286 static bool fold_bswap(OptContext *ctx, TCGOp *op)
1287 {
1288     uint64_t z_mask, s_mask, sign;
1289 
1290     if (arg_is_const(op->args[1])) {
1291         uint64_t t = arg_info(op->args[1])->val;
1292 
1293         t = do_constant_folding(op->opc, ctx->type, t, op->args[2]);
1294         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1295     }
1296 
1297     z_mask = arg_info(op->args[1])->z_mask;
1298 
1299     switch (op->opc) {
1300     case INDEX_op_bswap16_i32:
1301     case INDEX_op_bswap16_i64:
1302         z_mask = bswap16(z_mask);
1303         sign = INT16_MIN;
1304         break;
1305     case INDEX_op_bswap32_i32:
1306     case INDEX_op_bswap32_i64:
1307         z_mask = bswap32(z_mask);
1308         sign = INT32_MIN;
1309         break;
1310     case INDEX_op_bswap64_i64:
1311         z_mask = bswap64(z_mask);
1312         sign = INT64_MIN;
1313         break;
1314     default:
1315         g_assert_not_reached();
1316     }
1317     s_mask = smask_from_zmask(z_mask);
1318 
1319     switch (op->args[2] & (TCG_BSWAP_OZ | TCG_BSWAP_OS)) {
1320     case TCG_BSWAP_OZ:
1321         break;
1322     case TCG_BSWAP_OS:
1323         /* If the sign bit may be 1, force all the bits above to 1. */
1324         if (z_mask & sign) {
1325             z_mask |= sign;
1326             s_mask = sign << 1;
1327         }
1328         break;
1329     default:
1330         /* The high bits are undefined: force all bits above the sign to 1. */
1331         z_mask |= sign << 1;
1332         s_mask = 0;
1333         break;
1334     }
1335     ctx->z_mask = z_mask;
1336     ctx->s_mask = s_mask;
1337 
1338     return fold_masks(ctx, op);
1339 }
1340 
1341 static bool fold_call(OptContext *ctx, TCGOp *op)
1342 {
1343     TCGContext *s = ctx->tcg;
1344     int nb_oargs = TCGOP_CALLO(op);
1345     int nb_iargs = TCGOP_CALLI(op);
1346     int flags, i;
1347 
1348     init_arguments(ctx, op, nb_oargs + nb_iargs);
1349     copy_propagate(ctx, op, nb_oargs, nb_iargs);
1350 
1351     /* If the function reads or writes globals, reset temp data. */
1352     flags = tcg_call_flags(op);
1353     if (!(flags & (TCG_CALL_NO_READ_GLOBALS | TCG_CALL_NO_WRITE_GLOBALS))) {
1354         int nb_globals = s->nb_globals;
1355 
1356         for (i = 0; i < nb_globals; i++) {
1357             if (test_bit(i, ctx->temps_used.l)) {
1358                 reset_ts(ctx, &ctx->tcg->temps[i]);
1359             }
1360         }
1361     }
1362 
1363     /* If the function has side effects, reset mem data. */
1364     if (!(flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1365         remove_mem_copy_all(ctx);
1366     }
1367 
1368     /* Reset temp data for outputs. */
1369     for (i = 0; i < nb_oargs; i++) {
1370         reset_temp(ctx, op->args[i]);
1371     }
1372 
1373     /* Stop optimizing MB across calls. */
1374     ctx->prev_mb = NULL;
1375     return true;
1376 }
1377 
1378 static bool fold_count_zeros(OptContext *ctx, TCGOp *op)
1379 {
1380     uint64_t z_mask;
1381 
1382     if (arg_is_const(op->args[1])) {
1383         uint64_t t = arg_info(op->args[1])->val;
1384 
1385         if (t != 0) {
1386             t = do_constant_folding(op->opc, ctx->type, t, 0);
1387             return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1388         }
1389         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[2]);
1390     }
1391 
1392     switch (ctx->type) {
1393     case TCG_TYPE_I32:
1394         z_mask = 31;
1395         break;
1396     case TCG_TYPE_I64:
1397         z_mask = 63;
1398         break;
1399     default:
1400         g_assert_not_reached();
1401     }
1402     ctx->z_mask = arg_info(op->args[2])->z_mask | z_mask;
1403     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1404     return false;
1405 }
1406 
1407 static bool fold_ctpop(OptContext *ctx, TCGOp *op)
1408 {
1409     if (fold_const1(ctx, op)) {
1410         return true;
1411     }
1412 
1413     switch (ctx->type) {
1414     case TCG_TYPE_I32:
1415         ctx->z_mask = 32 | 31;
1416         break;
1417     case TCG_TYPE_I64:
1418         ctx->z_mask = 64 | 63;
1419         break;
1420     default:
1421         g_assert_not_reached();
1422     }
1423     ctx->s_mask = smask_from_zmask(ctx->z_mask);
1424     return false;
1425 }
1426 
1427 static bool fold_deposit(OptContext *ctx, TCGOp *op)
1428 {
1429     TCGOpcode and_opc;
1430 
1431     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1432         uint64_t t1 = arg_info(op->args[1])->val;
1433         uint64_t t2 = arg_info(op->args[2])->val;
1434 
1435         t1 = deposit64(t1, op->args[3], op->args[4], t2);
1436         return tcg_opt_gen_movi(ctx, op, op->args[0], t1);
1437     }
1438 
1439     switch (ctx->type) {
1440     case TCG_TYPE_I32:
1441         and_opc = INDEX_op_and_i32;
1442         break;
1443     case TCG_TYPE_I64:
1444         and_opc = INDEX_op_and_i64;
1445         break;
1446     default:
1447         g_assert_not_reached();
1448     }
1449 
1450     /* Inserting a value into zero at offset 0. */
1451     if (arg_is_const(op->args[1])
1452         && arg_info(op->args[1])->val == 0
1453         && op->args[3] == 0) {
1454         uint64_t mask = MAKE_64BIT_MASK(0, op->args[4]);
1455 
1456         op->opc = and_opc;
1457         op->args[1] = op->args[2];
1458         op->args[2] = arg_new_constant(ctx, mask);
1459         ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1460         return false;
1461     }
1462 
1463     /* Inserting zero into a value. */
1464     if (arg_is_const(op->args[2])
1465         && arg_info(op->args[2])->val == 0) {
1466         uint64_t mask = deposit64(-1, op->args[3], op->args[4], 0);
1467 
1468         op->opc = and_opc;
1469         op->args[2] = arg_new_constant(ctx, mask);
1470         ctx->z_mask = mask & arg_info(op->args[1])->z_mask;
1471         return false;
1472     }
1473 
1474     ctx->z_mask = deposit64(arg_info(op->args[1])->z_mask,
1475                             op->args[3], op->args[4],
1476                             arg_info(op->args[2])->z_mask);
1477     return false;
1478 }
1479 
1480 static bool fold_divide(OptContext *ctx, TCGOp *op)
1481 {
1482     if (fold_const2(ctx, op) ||
1483         fold_xi_to_x(ctx, op, 1)) {
1484         return true;
1485     }
1486     return false;
1487 }
1488 
1489 static bool fold_dup(OptContext *ctx, TCGOp *op)
1490 {
1491     if (arg_is_const(op->args[1])) {
1492         uint64_t t = arg_info(op->args[1])->val;
1493         t = dup_const(TCGOP_VECE(op), t);
1494         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1495     }
1496     return false;
1497 }
1498 
1499 static bool fold_dup2(OptContext *ctx, TCGOp *op)
1500 {
1501     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1502         uint64_t t = deposit64(arg_info(op->args[1])->val, 32, 32,
1503                                arg_info(op->args[2])->val);
1504         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1505     }
1506 
1507     if (args_are_copies(op->args[1], op->args[2])) {
1508         op->opc = INDEX_op_dup_vec;
1509         TCGOP_VECE(op) = MO_32;
1510     }
1511     return false;
1512 }
1513 
1514 static bool fold_eqv(OptContext *ctx, TCGOp *op)
1515 {
1516     if (fold_const2_commutative(ctx, op) ||
1517         fold_xi_to_x(ctx, op, -1) ||
1518         fold_xi_to_not(ctx, op, 0)) {
1519         return true;
1520     }
1521 
1522     ctx->s_mask = arg_info(op->args[1])->s_mask
1523                 & arg_info(op->args[2])->s_mask;
1524     return false;
1525 }
1526 
1527 static bool fold_extract(OptContext *ctx, TCGOp *op)
1528 {
1529     uint64_t z_mask_old, z_mask;
1530     int pos = op->args[2];
1531     int len = op->args[3];
1532 
1533     if (arg_is_const(op->args[1])) {
1534         uint64_t t;
1535 
1536         t = arg_info(op->args[1])->val;
1537         t = extract64(t, pos, len);
1538         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
1539     }
1540 
1541     z_mask_old = arg_info(op->args[1])->z_mask;
1542     z_mask = extract64(z_mask_old, pos, len);
1543     if (pos == 0) {
1544         ctx->a_mask = z_mask_old ^ z_mask;
1545     }
1546     ctx->z_mask = z_mask;
1547     ctx->s_mask = smask_from_zmask(z_mask);
1548 
1549     return fold_masks(ctx, op);
1550 }
1551 
1552 static bool fold_extract2(OptContext *ctx, TCGOp *op)
1553 {
1554     if (arg_is_const(op->args[1]) && arg_is_const(op->args[2])) {
1555         uint64_t v1 = arg_info(op->args[1])->val;
1556         uint64_t v2 = arg_info(op->args[2])->val;
1557         int shr = op->args[3];
1558 
1559         if (op->opc == INDEX_op_extract2_i64) {
1560             v1 >>= shr;
1561             v2 <<= 64 - shr;
1562         } else {
1563             v1 = (uint32_t)v1 >> shr;
1564             v2 = (uint64_t)((int32_t)v2 << (32 - shr));
1565         }
1566         return tcg_opt_gen_movi(ctx, op, op->args[0], v1 | v2);
1567     }
1568     return false;
1569 }
1570 
1571 static bool fold_exts(OptContext *ctx, TCGOp *op)
1572 {
1573     uint64_t s_mask_old, s_mask, z_mask, sign;
1574     bool type_change = false;
1575 
1576     if (fold_const1(ctx, op)) {
1577         return true;
1578     }
1579 
1580     z_mask = arg_info(op->args[1])->z_mask;
1581     s_mask = arg_info(op->args[1])->s_mask;
1582     s_mask_old = s_mask;
1583 
1584     switch (op->opc) {
1585     CASE_OP_32_64(ext8s):
1586         sign = INT8_MIN;
1587         z_mask = (uint8_t)z_mask;
1588         break;
1589     CASE_OP_32_64(ext16s):
1590         sign = INT16_MIN;
1591         z_mask = (uint16_t)z_mask;
1592         break;
1593     case INDEX_op_ext_i32_i64:
1594         type_change = true;
1595         QEMU_FALLTHROUGH;
1596     case INDEX_op_ext32s_i64:
1597         sign = INT32_MIN;
1598         z_mask = (uint32_t)z_mask;
1599         break;
1600     default:
1601         g_assert_not_reached();
1602     }
1603 
1604     if (z_mask & sign) {
1605         z_mask |= sign;
1606     }
1607     s_mask |= sign << 1;
1608 
1609     ctx->z_mask = z_mask;
1610     ctx->s_mask = s_mask;
1611     if (!type_change) {
1612         ctx->a_mask = s_mask & ~s_mask_old;
1613     }
1614 
1615     return fold_masks(ctx, op);
1616 }
1617 
1618 static bool fold_extu(OptContext *ctx, TCGOp *op)
1619 {
1620     uint64_t z_mask_old, z_mask;
1621     bool type_change = false;
1622 
1623     if (fold_const1(ctx, op)) {
1624         return true;
1625     }
1626 
1627     z_mask_old = z_mask = arg_info(op->args[1])->z_mask;
1628 
1629     switch (op->opc) {
1630     CASE_OP_32_64(ext8u):
1631         z_mask = (uint8_t)z_mask;
1632         break;
1633     CASE_OP_32_64(ext16u):
1634         z_mask = (uint16_t)z_mask;
1635         break;
1636     case INDEX_op_extrl_i64_i32:
1637     case INDEX_op_extu_i32_i64:
1638         type_change = true;
1639         QEMU_FALLTHROUGH;
1640     case INDEX_op_ext32u_i64:
1641         z_mask = (uint32_t)z_mask;
1642         break;
1643     case INDEX_op_extrh_i64_i32:
1644         type_change = true;
1645         z_mask >>= 32;
1646         break;
1647     default:
1648         g_assert_not_reached();
1649     }
1650 
1651     ctx->z_mask = z_mask;
1652     ctx->s_mask = smask_from_zmask(z_mask);
1653     if (!type_change) {
1654         ctx->a_mask = z_mask_old ^ z_mask;
1655     }
1656     return fold_masks(ctx, op);
1657 }
1658 
1659 static bool fold_mb(OptContext *ctx, TCGOp *op)
1660 {
1661     /* Eliminate duplicate and redundant fence instructions.  */
1662     if (ctx->prev_mb) {
1663         /*
1664          * Merge two barriers of the same type into one,
1665          * or a weaker barrier into a stronger one,
1666          * or two weaker barriers into a stronger one.
1667          *   mb X; mb Y => mb X|Y
1668          *   mb; strl => mb; st
1669          *   ldaq; mb => ld; mb
1670          *   ldaq; strl => ld; mb; st
1671          * Other combinations are also merged into a strong
1672          * barrier.  This is stricter than specified but for
1673          * the purposes of TCG is better than not optimizing.
1674          */
1675         ctx->prev_mb->args[0] |= op->args[0];
1676         tcg_op_remove(ctx->tcg, op);
1677     } else {
1678         ctx->prev_mb = op;
1679     }
1680     return true;
1681 }
1682 
1683 static bool fold_mov(OptContext *ctx, TCGOp *op)
1684 {
1685     return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[1]);
1686 }
1687 
1688 static bool fold_movcond(OptContext *ctx, TCGOp *op)
1689 {
1690     TCGCond cond = op->args[5];
1691     int i;
1692 
1693     if (swap_commutative(NO_DEST, &op->args[1], &op->args[2])) {
1694         op->args[5] = cond = tcg_swap_cond(cond);
1695     }
1696     /*
1697      * Canonicalize the "false" input reg to match the destination reg so
1698      * that the tcg backend can implement a "move if true" operation.
1699      */
1700     if (swap_commutative(op->args[0], &op->args[4], &op->args[3])) {
1701         op->args[5] = cond = tcg_invert_cond(cond);
1702     }
1703 
1704     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1705     if (i >= 0) {
1706         return tcg_opt_gen_mov(ctx, op, op->args[0], op->args[4 - i]);
1707     }
1708 
1709     ctx->z_mask = arg_info(op->args[3])->z_mask
1710                 | arg_info(op->args[4])->z_mask;
1711     ctx->s_mask = arg_info(op->args[3])->s_mask
1712                 & arg_info(op->args[4])->s_mask;
1713 
1714     if (arg_is_const(op->args[3]) && arg_is_const(op->args[4])) {
1715         uint64_t tv = arg_info(op->args[3])->val;
1716         uint64_t fv = arg_info(op->args[4])->val;
1717         TCGOpcode opc, negopc = 0;
1718 
1719         switch (ctx->type) {
1720         case TCG_TYPE_I32:
1721             opc = INDEX_op_setcond_i32;
1722             if (TCG_TARGET_HAS_negsetcond_i32) {
1723                 negopc = INDEX_op_negsetcond_i32;
1724             }
1725             tv = (int32_t)tv;
1726             fv = (int32_t)fv;
1727             break;
1728         case TCG_TYPE_I64:
1729             opc = INDEX_op_setcond_i64;
1730             if (TCG_TARGET_HAS_negsetcond_i64) {
1731                 negopc = INDEX_op_negsetcond_i64;
1732             }
1733             break;
1734         default:
1735             g_assert_not_reached();
1736         }
1737 
1738         if (tv == 1 && fv == 0) {
1739             op->opc = opc;
1740             op->args[3] = cond;
1741         } else if (fv == 1 && tv == 0) {
1742             op->opc = opc;
1743             op->args[3] = tcg_invert_cond(cond);
1744         } else if (negopc) {
1745             if (tv == -1 && fv == 0) {
1746                 op->opc = negopc;
1747                 op->args[3] = cond;
1748             } else if (fv == -1 && tv == 0) {
1749                 op->opc = negopc;
1750                 op->args[3] = tcg_invert_cond(cond);
1751             }
1752         }
1753     }
1754     return false;
1755 }
1756 
1757 static bool fold_mul(OptContext *ctx, TCGOp *op)
1758 {
1759     if (fold_const2(ctx, op) ||
1760         fold_xi_to_i(ctx, op, 0) ||
1761         fold_xi_to_x(ctx, op, 1)) {
1762         return true;
1763     }
1764     return false;
1765 }
1766 
1767 static bool fold_mul_highpart(OptContext *ctx, TCGOp *op)
1768 {
1769     if (fold_const2_commutative(ctx, op) ||
1770         fold_xi_to_i(ctx, op, 0)) {
1771         return true;
1772     }
1773     return false;
1774 }
1775 
1776 static bool fold_multiply2(OptContext *ctx, TCGOp *op)
1777 {
1778     swap_commutative(op->args[0], &op->args[2], &op->args[3]);
1779 
1780     if (arg_is_const(op->args[2]) && arg_is_const(op->args[3])) {
1781         uint64_t a = arg_info(op->args[2])->val;
1782         uint64_t b = arg_info(op->args[3])->val;
1783         uint64_t h, l;
1784         TCGArg rl, rh;
1785         TCGOp *op2;
1786 
1787         switch (op->opc) {
1788         case INDEX_op_mulu2_i32:
1789             l = (uint64_t)(uint32_t)a * (uint32_t)b;
1790             h = (int32_t)(l >> 32);
1791             l = (int32_t)l;
1792             break;
1793         case INDEX_op_muls2_i32:
1794             l = (int64_t)(int32_t)a * (int32_t)b;
1795             h = l >> 32;
1796             l = (int32_t)l;
1797             break;
1798         case INDEX_op_mulu2_i64:
1799             mulu64(&l, &h, a, b);
1800             break;
1801         case INDEX_op_muls2_i64:
1802             muls64(&l, &h, a, b);
1803             break;
1804         default:
1805             g_assert_not_reached();
1806         }
1807 
1808         rl = op->args[0];
1809         rh = op->args[1];
1810 
1811         /* The proper opcode is supplied by tcg_opt_gen_mov. */
1812         op2 = tcg_op_insert_before(ctx->tcg, op, 0, 2);
1813 
1814         tcg_opt_gen_movi(ctx, op, rl, l);
1815         tcg_opt_gen_movi(ctx, op2, rh, h);
1816         return true;
1817     }
1818     return false;
1819 }
1820 
1821 static bool fold_nand(OptContext *ctx, TCGOp *op)
1822 {
1823     if (fold_const2_commutative(ctx, op) ||
1824         fold_xi_to_not(ctx, op, -1)) {
1825         return true;
1826     }
1827 
1828     ctx->s_mask = arg_info(op->args[1])->s_mask
1829                 & arg_info(op->args[2])->s_mask;
1830     return false;
1831 }
1832 
1833 static bool fold_neg(OptContext *ctx, TCGOp *op)
1834 {
1835     uint64_t z_mask;
1836 
1837     if (fold_const1(ctx, op)) {
1838         return true;
1839     }
1840 
1841     /* Set to 1 all bits to the left of the rightmost.  */
1842     z_mask = arg_info(op->args[1])->z_mask;
1843     ctx->z_mask = -(z_mask & -z_mask);
1844 
1845     /*
1846      * Because of fold_sub_to_neg, we want to always return true,
1847      * via finish_folding.
1848      */
1849     finish_folding(ctx, op);
1850     return true;
1851 }
1852 
1853 static bool fold_nor(OptContext *ctx, TCGOp *op)
1854 {
1855     if (fold_const2_commutative(ctx, op) ||
1856         fold_xi_to_not(ctx, op, 0)) {
1857         return true;
1858     }
1859 
1860     ctx->s_mask = arg_info(op->args[1])->s_mask
1861                 & arg_info(op->args[2])->s_mask;
1862     return false;
1863 }
1864 
1865 static bool fold_not(OptContext *ctx, TCGOp *op)
1866 {
1867     if (fold_const1(ctx, op)) {
1868         return true;
1869     }
1870 
1871     ctx->s_mask = arg_info(op->args[1])->s_mask;
1872 
1873     /* Because of fold_to_not, we want to always return true, via finish. */
1874     finish_folding(ctx, op);
1875     return true;
1876 }
1877 
1878 static bool fold_or(OptContext *ctx, TCGOp *op)
1879 {
1880     if (fold_const2_commutative(ctx, op) ||
1881         fold_xi_to_x(ctx, op, 0) ||
1882         fold_xx_to_x(ctx, op)) {
1883         return true;
1884     }
1885 
1886     ctx->z_mask = arg_info(op->args[1])->z_mask
1887                 | arg_info(op->args[2])->z_mask;
1888     ctx->s_mask = arg_info(op->args[1])->s_mask
1889                 & arg_info(op->args[2])->s_mask;
1890     return fold_masks(ctx, op);
1891 }
1892 
1893 static bool fold_orc(OptContext *ctx, TCGOp *op)
1894 {
1895     if (fold_const2(ctx, op) ||
1896         fold_xx_to_i(ctx, op, -1) ||
1897         fold_xi_to_x(ctx, op, -1) ||
1898         fold_ix_to_not(ctx, op, 0)) {
1899         return true;
1900     }
1901 
1902     ctx->s_mask = arg_info(op->args[1])->s_mask
1903                 & arg_info(op->args[2])->s_mask;
1904     return false;
1905 }
1906 
1907 static bool fold_qemu_ld(OptContext *ctx, TCGOp *op)
1908 {
1909     const TCGOpDef *def = &tcg_op_defs[op->opc];
1910     MemOpIdx oi = op->args[def->nb_oargs + def->nb_iargs];
1911     MemOp mop = get_memop(oi);
1912     int width = 8 * memop_size(mop);
1913 
1914     if (width < 64) {
1915         ctx->s_mask = MAKE_64BIT_MASK(width, 64 - width);
1916         if (!(mop & MO_SIGN)) {
1917             ctx->z_mask = MAKE_64BIT_MASK(0, width);
1918             ctx->s_mask <<= 1;
1919         }
1920     }
1921 
1922     /* Opcodes that touch guest memory stop the mb optimization.  */
1923     ctx->prev_mb = NULL;
1924     return false;
1925 }
1926 
1927 static bool fold_qemu_st(OptContext *ctx, TCGOp *op)
1928 {
1929     /* Opcodes that touch guest memory stop the mb optimization.  */
1930     ctx->prev_mb = NULL;
1931     return false;
1932 }
1933 
1934 static bool fold_remainder(OptContext *ctx, TCGOp *op)
1935 {
1936     if (fold_const2(ctx, op) ||
1937         fold_xx_to_i(ctx, op, 0)) {
1938         return true;
1939     }
1940     return false;
1941 }
1942 
1943 static bool fold_setcond(OptContext *ctx, TCGOp *op)
1944 {
1945     TCGCond cond = op->args[3];
1946     int i;
1947 
1948     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1949         op->args[3] = cond = tcg_swap_cond(cond);
1950     }
1951 
1952     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1953     if (i >= 0) {
1954         return tcg_opt_gen_movi(ctx, op, op->args[0], i);
1955     }
1956 
1957     ctx->z_mask = 1;
1958     ctx->s_mask = smask_from_zmask(1);
1959     return false;
1960 }
1961 
1962 static bool fold_negsetcond(OptContext *ctx, TCGOp *op)
1963 {
1964     TCGCond cond = op->args[3];
1965     int i;
1966 
1967     if (swap_commutative(op->args[0], &op->args[1], &op->args[2])) {
1968         op->args[3] = cond = tcg_swap_cond(cond);
1969     }
1970 
1971     i = do_constant_folding_cond(ctx->type, op->args[1], op->args[2], cond);
1972     if (i >= 0) {
1973         return tcg_opt_gen_movi(ctx, op, op->args[0], -i);
1974     }
1975 
1976     /* Value is {0,-1} so all bits are repetitions of the sign. */
1977     ctx->s_mask = -1;
1978     return false;
1979 }
1980 
1981 
1982 static bool fold_setcond2(OptContext *ctx, TCGOp *op)
1983 {
1984     TCGCond cond = op->args[5];
1985     int i, inv = 0;
1986 
1987     if (swap_commutative2(&op->args[1], &op->args[3])) {
1988         op->args[5] = cond = tcg_swap_cond(cond);
1989     }
1990 
1991     i = do_constant_folding_cond2(&op->args[1], &op->args[3], cond);
1992     if (i >= 0) {
1993         goto do_setcond_const;
1994     }
1995 
1996     switch (cond) {
1997     case TCG_COND_LT:
1998     case TCG_COND_GE:
1999         /*
2000          * Simplify LT/GE comparisons vs zero to a single compare
2001          * vs the high word of the input.
2002          */
2003         if (arg_is_const(op->args[3]) && arg_info(op->args[3])->val == 0 &&
2004             arg_is_const(op->args[4]) && arg_info(op->args[4])->val == 0) {
2005             goto do_setcond_high;
2006         }
2007         break;
2008 
2009     case TCG_COND_NE:
2010         inv = 1;
2011         QEMU_FALLTHROUGH;
2012     case TCG_COND_EQ:
2013         /*
2014          * Simplify EQ/NE comparisons where one of the pairs
2015          * can be simplified.
2016          */
2017         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[1],
2018                                      op->args[3], cond);
2019         switch (i ^ inv) {
2020         case 0:
2021             goto do_setcond_const;
2022         case 1:
2023             goto do_setcond_high;
2024         }
2025 
2026         i = do_constant_folding_cond(TCG_TYPE_I32, op->args[2],
2027                                      op->args[4], cond);
2028         switch (i ^ inv) {
2029         case 0:
2030             goto do_setcond_const;
2031         case 1:
2032             op->args[2] = op->args[3];
2033             op->args[3] = cond;
2034             op->opc = INDEX_op_setcond_i32;
2035             break;
2036         }
2037         break;
2038 
2039     default:
2040         break;
2041 
2042     do_setcond_high:
2043         op->args[1] = op->args[2];
2044         op->args[2] = op->args[4];
2045         op->args[3] = cond;
2046         op->opc = INDEX_op_setcond_i32;
2047         break;
2048     }
2049 
2050     ctx->z_mask = 1;
2051     ctx->s_mask = smask_from_zmask(1);
2052     return false;
2053 
2054  do_setcond_const:
2055     return tcg_opt_gen_movi(ctx, op, op->args[0], i);
2056 }
2057 
2058 static bool fold_sextract(OptContext *ctx, TCGOp *op)
2059 {
2060     uint64_t z_mask, s_mask, s_mask_old;
2061     int pos = op->args[2];
2062     int len = op->args[3];
2063 
2064     if (arg_is_const(op->args[1])) {
2065         uint64_t t;
2066 
2067         t = arg_info(op->args[1])->val;
2068         t = sextract64(t, pos, len);
2069         return tcg_opt_gen_movi(ctx, op, op->args[0], t);
2070     }
2071 
2072     z_mask = arg_info(op->args[1])->z_mask;
2073     z_mask = sextract64(z_mask, pos, len);
2074     ctx->z_mask = z_mask;
2075 
2076     s_mask_old = arg_info(op->args[1])->s_mask;
2077     s_mask = sextract64(s_mask_old, pos, len);
2078     s_mask |= MAKE_64BIT_MASK(len, 64 - len);
2079     ctx->s_mask = s_mask;
2080 
2081     if (pos == 0) {
2082         ctx->a_mask = s_mask & ~s_mask_old;
2083     }
2084 
2085     return fold_masks(ctx, op);
2086 }
2087 
2088 static bool fold_shift(OptContext *ctx, TCGOp *op)
2089 {
2090     uint64_t s_mask, z_mask, sign;
2091 
2092     if (fold_const2(ctx, op) ||
2093         fold_ix_to_i(ctx, op, 0) ||
2094         fold_xi_to_x(ctx, op, 0)) {
2095         return true;
2096     }
2097 
2098     s_mask = arg_info(op->args[1])->s_mask;
2099     z_mask = arg_info(op->args[1])->z_mask;
2100 
2101     if (arg_is_const(op->args[2])) {
2102         int sh = arg_info(op->args[2])->val;
2103 
2104         ctx->z_mask = do_constant_folding(op->opc, ctx->type, z_mask, sh);
2105 
2106         s_mask = do_constant_folding(op->opc, ctx->type, s_mask, sh);
2107         ctx->s_mask = smask_from_smask(s_mask);
2108 
2109         return fold_masks(ctx, op);
2110     }
2111 
2112     switch (op->opc) {
2113     CASE_OP_32_64(sar):
2114         /*
2115          * Arithmetic right shift will not reduce the number of
2116          * input sign repetitions.
2117          */
2118         ctx->s_mask = s_mask;
2119         break;
2120     CASE_OP_32_64(shr):
2121         /*
2122          * If the sign bit is known zero, then logical right shift
2123          * will not reduced the number of input sign repetitions.
2124          */
2125         sign = (s_mask & -s_mask) >> 1;
2126         if (!(z_mask & sign)) {
2127             ctx->s_mask = s_mask;
2128         }
2129         break;
2130     default:
2131         break;
2132     }
2133 
2134     return false;
2135 }
2136 
2137 static bool fold_sub_to_neg(OptContext *ctx, TCGOp *op)
2138 {
2139     TCGOpcode neg_op;
2140     bool have_neg;
2141 
2142     if (!arg_is_const(op->args[1]) || arg_info(op->args[1])->val != 0) {
2143         return false;
2144     }
2145 
2146     switch (ctx->type) {
2147     case TCG_TYPE_I32:
2148         neg_op = INDEX_op_neg_i32;
2149         have_neg = true;
2150         break;
2151     case TCG_TYPE_I64:
2152         neg_op = INDEX_op_neg_i64;
2153         have_neg = true;
2154         break;
2155     case TCG_TYPE_V64:
2156     case TCG_TYPE_V128:
2157     case TCG_TYPE_V256:
2158         neg_op = INDEX_op_neg_vec;
2159         have_neg = (TCG_TARGET_HAS_neg_vec &&
2160                     tcg_can_emit_vec_op(neg_op, ctx->type, TCGOP_VECE(op)) > 0);
2161         break;
2162     default:
2163         g_assert_not_reached();
2164     }
2165     if (have_neg) {
2166         op->opc = neg_op;
2167         op->args[1] = op->args[2];
2168         return fold_neg(ctx, op);
2169     }
2170     return false;
2171 }
2172 
2173 /* We cannot as yet do_constant_folding with vectors. */
2174 static bool fold_sub_vec(OptContext *ctx, TCGOp *op)
2175 {
2176     if (fold_xx_to_i(ctx, op, 0) ||
2177         fold_xi_to_x(ctx, op, 0) ||
2178         fold_sub_to_neg(ctx, op)) {
2179         return true;
2180     }
2181     return false;
2182 }
2183 
2184 static bool fold_sub(OptContext *ctx, TCGOp *op)
2185 {
2186     if (fold_const2(ctx, op) || fold_sub_vec(ctx, op)) {
2187         return true;
2188     }
2189 
2190     /* Fold sub r,x,i to add r,x,-i */
2191     if (arg_is_const(op->args[2])) {
2192         uint64_t val = arg_info(op->args[2])->val;
2193 
2194         op->opc = (ctx->type == TCG_TYPE_I32
2195                    ? INDEX_op_add_i32 : INDEX_op_add_i64);
2196         op->args[2] = arg_new_constant(ctx, -val);
2197     }
2198     return false;
2199 }
2200 
2201 static bool fold_sub2(OptContext *ctx, TCGOp *op)
2202 {
2203     return fold_addsub2(ctx, op, false);
2204 }
2205 
2206 static bool fold_tcg_ld(OptContext *ctx, TCGOp *op)
2207 {
2208     /* We can't do any folding with a load, but we can record bits. */
2209     switch (op->opc) {
2210     CASE_OP_32_64(ld8s):
2211         ctx->s_mask = MAKE_64BIT_MASK(8, 56);
2212         break;
2213     CASE_OP_32_64(ld8u):
2214         ctx->z_mask = MAKE_64BIT_MASK(0, 8);
2215         ctx->s_mask = MAKE_64BIT_MASK(9, 55);
2216         break;
2217     CASE_OP_32_64(ld16s):
2218         ctx->s_mask = MAKE_64BIT_MASK(16, 48);
2219         break;
2220     CASE_OP_32_64(ld16u):
2221         ctx->z_mask = MAKE_64BIT_MASK(0, 16);
2222         ctx->s_mask = MAKE_64BIT_MASK(17, 47);
2223         break;
2224     case INDEX_op_ld32s_i64:
2225         ctx->s_mask = MAKE_64BIT_MASK(32, 32);
2226         break;
2227     case INDEX_op_ld32u_i64:
2228         ctx->z_mask = MAKE_64BIT_MASK(0, 32);
2229         ctx->s_mask = MAKE_64BIT_MASK(33, 31);
2230         break;
2231     default:
2232         g_assert_not_reached();
2233     }
2234     return false;
2235 }
2236 
2237 static bool fold_tcg_ld_memcopy(OptContext *ctx, TCGOp *op)
2238 {
2239     TCGTemp *dst, *src;
2240     intptr_t ofs;
2241     TCGType type;
2242 
2243     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2244         return false;
2245     }
2246 
2247     type = ctx->type;
2248     ofs = op->args[2];
2249     dst = arg_temp(op->args[0]);
2250     src = find_mem_copy_for(ctx, type, ofs);
2251     if (src && src->base_type == type) {
2252         return tcg_opt_gen_mov(ctx, op, temp_arg(dst), temp_arg(src));
2253     }
2254 
2255     reset_ts(ctx, dst);
2256     record_mem_copy(ctx, type, dst, ofs, ofs + tcg_type_size(type) - 1);
2257     return true;
2258 }
2259 
2260 static bool fold_tcg_st(OptContext *ctx, TCGOp *op)
2261 {
2262     intptr_t ofs = op->args[2];
2263     intptr_t lm1;
2264 
2265     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2266         remove_mem_copy_all(ctx);
2267         return false;
2268     }
2269 
2270     switch (op->opc) {
2271     CASE_OP_32_64(st8):
2272         lm1 = 0;
2273         break;
2274     CASE_OP_32_64(st16):
2275         lm1 = 1;
2276         break;
2277     case INDEX_op_st32_i64:
2278     case INDEX_op_st_i32:
2279         lm1 = 3;
2280         break;
2281     case INDEX_op_st_i64:
2282         lm1 = 7;
2283         break;
2284     case INDEX_op_st_vec:
2285         lm1 = tcg_type_size(ctx->type) - 1;
2286         break;
2287     default:
2288         g_assert_not_reached();
2289     }
2290     remove_mem_copy_in(ctx, ofs, ofs + lm1);
2291     return false;
2292 }
2293 
2294 static bool fold_tcg_st_memcopy(OptContext *ctx, TCGOp *op)
2295 {
2296     TCGTemp *src;
2297     intptr_t ofs, last;
2298     TCGType type;
2299 
2300     if (op->args[1] != tcgv_ptr_arg(tcg_env)) {
2301         fold_tcg_st(ctx, op);
2302         return false;
2303     }
2304 
2305     src = arg_temp(op->args[0]);
2306     ofs = op->args[2];
2307     type = ctx->type;
2308 
2309     /*
2310      * Eliminate duplicate stores of a constant.
2311      * This happens frequently when the target ISA zero-extends.
2312      */
2313     if (ts_is_const(src)) {
2314         TCGTemp *prev = find_mem_copy_for(ctx, type, ofs);
2315         if (src == prev) {
2316             tcg_op_remove(ctx->tcg, op);
2317             return true;
2318         }
2319     }
2320 
2321     last = ofs + tcg_type_size(type) - 1;
2322     remove_mem_copy_in(ctx, ofs, last);
2323     record_mem_copy(ctx, type, src, ofs, last);
2324     return false;
2325 }
2326 
2327 static bool fold_xor(OptContext *ctx, TCGOp *op)
2328 {
2329     if (fold_const2_commutative(ctx, op) ||
2330         fold_xx_to_i(ctx, op, 0) ||
2331         fold_xi_to_x(ctx, op, 0) ||
2332         fold_xi_to_not(ctx, op, -1)) {
2333         return true;
2334     }
2335 
2336     ctx->z_mask = arg_info(op->args[1])->z_mask
2337                 | arg_info(op->args[2])->z_mask;
2338     ctx->s_mask = arg_info(op->args[1])->s_mask
2339                 & arg_info(op->args[2])->s_mask;
2340     return fold_masks(ctx, op);
2341 }
2342 
2343 /* Propagate constants and copies, fold constant expressions. */
2344 void tcg_optimize(TCGContext *s)
2345 {
2346     int nb_temps, i;
2347     TCGOp *op, *op_next;
2348     OptContext ctx = { .tcg = s };
2349 
2350     QSIMPLEQ_INIT(&ctx.mem_free);
2351 
2352     /* Array VALS has an element for each temp.
2353        If this temp holds a constant then its value is kept in VALS' element.
2354        If this temp is a copy of other ones then the other copies are
2355        available through the doubly linked circular list. */
2356 
2357     nb_temps = s->nb_temps;
2358     for (i = 0; i < nb_temps; ++i) {
2359         s->temps[i].state_ptr = NULL;
2360     }
2361 
2362     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2363         TCGOpcode opc = op->opc;
2364         const TCGOpDef *def;
2365         bool done = false;
2366 
2367         /* Calls are special. */
2368         if (opc == INDEX_op_call) {
2369             fold_call(&ctx, op);
2370             continue;
2371         }
2372 
2373         def = &tcg_op_defs[opc];
2374         init_arguments(&ctx, op, def->nb_oargs + def->nb_iargs);
2375         copy_propagate(&ctx, op, def->nb_oargs, def->nb_iargs);
2376 
2377         /* Pre-compute the type of the operation. */
2378         if (def->flags & TCG_OPF_VECTOR) {
2379             ctx.type = TCG_TYPE_V64 + TCGOP_VECL(op);
2380         } else if (def->flags & TCG_OPF_64BIT) {
2381             ctx.type = TCG_TYPE_I64;
2382         } else {
2383             ctx.type = TCG_TYPE_I32;
2384         }
2385 
2386         /* Assume all bits affected, no bits known zero, no sign reps. */
2387         ctx.a_mask = -1;
2388         ctx.z_mask = -1;
2389         ctx.s_mask = 0;
2390 
2391         /*
2392          * Process each opcode.
2393          * Sorted alphabetically by opcode as much as possible.
2394          */
2395         switch (opc) {
2396         CASE_OP_32_64(add):
2397             done = fold_add(&ctx, op);
2398             break;
2399         case INDEX_op_add_vec:
2400             done = fold_add_vec(&ctx, op);
2401             break;
2402         CASE_OP_32_64(add2):
2403             done = fold_add2(&ctx, op);
2404             break;
2405         CASE_OP_32_64_VEC(and):
2406             done = fold_and(&ctx, op);
2407             break;
2408         CASE_OP_32_64_VEC(andc):
2409             done = fold_andc(&ctx, op);
2410             break;
2411         CASE_OP_32_64(brcond):
2412             done = fold_brcond(&ctx, op);
2413             break;
2414         case INDEX_op_brcond2_i32:
2415             done = fold_brcond2(&ctx, op);
2416             break;
2417         CASE_OP_32_64(bswap16):
2418         CASE_OP_32_64(bswap32):
2419         case INDEX_op_bswap64_i64:
2420             done = fold_bswap(&ctx, op);
2421             break;
2422         CASE_OP_32_64(clz):
2423         CASE_OP_32_64(ctz):
2424             done = fold_count_zeros(&ctx, op);
2425             break;
2426         CASE_OP_32_64(ctpop):
2427             done = fold_ctpop(&ctx, op);
2428             break;
2429         CASE_OP_32_64(deposit):
2430             done = fold_deposit(&ctx, op);
2431             break;
2432         CASE_OP_32_64(div):
2433         CASE_OP_32_64(divu):
2434             done = fold_divide(&ctx, op);
2435             break;
2436         case INDEX_op_dup_vec:
2437             done = fold_dup(&ctx, op);
2438             break;
2439         case INDEX_op_dup2_vec:
2440             done = fold_dup2(&ctx, op);
2441             break;
2442         CASE_OP_32_64_VEC(eqv):
2443             done = fold_eqv(&ctx, op);
2444             break;
2445         CASE_OP_32_64(extract):
2446             done = fold_extract(&ctx, op);
2447             break;
2448         CASE_OP_32_64(extract2):
2449             done = fold_extract2(&ctx, op);
2450             break;
2451         CASE_OP_32_64(ext8s):
2452         CASE_OP_32_64(ext16s):
2453         case INDEX_op_ext32s_i64:
2454         case INDEX_op_ext_i32_i64:
2455             done = fold_exts(&ctx, op);
2456             break;
2457         CASE_OP_32_64(ext8u):
2458         CASE_OP_32_64(ext16u):
2459         case INDEX_op_ext32u_i64:
2460         case INDEX_op_extu_i32_i64:
2461         case INDEX_op_extrl_i64_i32:
2462         case INDEX_op_extrh_i64_i32:
2463             done = fold_extu(&ctx, op);
2464             break;
2465         CASE_OP_32_64(ld8s):
2466         CASE_OP_32_64(ld8u):
2467         CASE_OP_32_64(ld16s):
2468         CASE_OP_32_64(ld16u):
2469         case INDEX_op_ld32s_i64:
2470         case INDEX_op_ld32u_i64:
2471             done = fold_tcg_ld(&ctx, op);
2472             break;
2473         case INDEX_op_ld_i32:
2474         case INDEX_op_ld_i64:
2475         case INDEX_op_ld_vec:
2476             done = fold_tcg_ld_memcopy(&ctx, op);
2477             break;
2478         CASE_OP_32_64(st8):
2479         CASE_OP_32_64(st16):
2480         case INDEX_op_st32_i64:
2481             done = fold_tcg_st(&ctx, op);
2482             break;
2483         case INDEX_op_st_i32:
2484         case INDEX_op_st_i64:
2485         case INDEX_op_st_vec:
2486             done = fold_tcg_st_memcopy(&ctx, op);
2487             break;
2488         case INDEX_op_mb:
2489             done = fold_mb(&ctx, op);
2490             break;
2491         CASE_OP_32_64_VEC(mov):
2492             done = fold_mov(&ctx, op);
2493             break;
2494         CASE_OP_32_64(movcond):
2495             done = fold_movcond(&ctx, op);
2496             break;
2497         CASE_OP_32_64(mul):
2498             done = fold_mul(&ctx, op);
2499             break;
2500         CASE_OP_32_64(mulsh):
2501         CASE_OP_32_64(muluh):
2502             done = fold_mul_highpart(&ctx, op);
2503             break;
2504         CASE_OP_32_64(muls2):
2505         CASE_OP_32_64(mulu2):
2506             done = fold_multiply2(&ctx, op);
2507             break;
2508         CASE_OP_32_64_VEC(nand):
2509             done = fold_nand(&ctx, op);
2510             break;
2511         CASE_OP_32_64(neg):
2512             done = fold_neg(&ctx, op);
2513             break;
2514         CASE_OP_32_64_VEC(nor):
2515             done = fold_nor(&ctx, op);
2516             break;
2517         CASE_OP_32_64_VEC(not):
2518             done = fold_not(&ctx, op);
2519             break;
2520         CASE_OP_32_64_VEC(or):
2521             done = fold_or(&ctx, op);
2522             break;
2523         CASE_OP_32_64_VEC(orc):
2524             done = fold_orc(&ctx, op);
2525             break;
2526         case INDEX_op_qemu_ld_a32_i32:
2527         case INDEX_op_qemu_ld_a64_i32:
2528         case INDEX_op_qemu_ld_a32_i64:
2529         case INDEX_op_qemu_ld_a64_i64:
2530         case INDEX_op_qemu_ld_a32_i128:
2531         case INDEX_op_qemu_ld_a64_i128:
2532             done = fold_qemu_ld(&ctx, op);
2533             break;
2534         case INDEX_op_qemu_st8_a32_i32:
2535         case INDEX_op_qemu_st8_a64_i32:
2536         case INDEX_op_qemu_st_a32_i32:
2537         case INDEX_op_qemu_st_a64_i32:
2538         case INDEX_op_qemu_st_a32_i64:
2539         case INDEX_op_qemu_st_a64_i64:
2540         case INDEX_op_qemu_st_a32_i128:
2541         case INDEX_op_qemu_st_a64_i128:
2542             done = fold_qemu_st(&ctx, op);
2543             break;
2544         CASE_OP_32_64(rem):
2545         CASE_OP_32_64(remu):
2546             done = fold_remainder(&ctx, op);
2547             break;
2548         CASE_OP_32_64(rotl):
2549         CASE_OP_32_64(rotr):
2550         CASE_OP_32_64(sar):
2551         CASE_OP_32_64(shl):
2552         CASE_OP_32_64(shr):
2553             done = fold_shift(&ctx, op);
2554             break;
2555         CASE_OP_32_64(setcond):
2556             done = fold_setcond(&ctx, op);
2557             break;
2558         CASE_OP_32_64(negsetcond):
2559             done = fold_negsetcond(&ctx, op);
2560             break;
2561         case INDEX_op_setcond2_i32:
2562             done = fold_setcond2(&ctx, op);
2563             break;
2564         CASE_OP_32_64(sextract):
2565             done = fold_sextract(&ctx, op);
2566             break;
2567         CASE_OP_32_64(sub):
2568             done = fold_sub(&ctx, op);
2569             break;
2570         case INDEX_op_sub_vec:
2571             done = fold_sub_vec(&ctx, op);
2572             break;
2573         CASE_OP_32_64(sub2):
2574             done = fold_sub2(&ctx, op);
2575             break;
2576         CASE_OP_32_64_VEC(xor):
2577             done = fold_xor(&ctx, op);
2578             break;
2579         default:
2580             break;
2581         }
2582 
2583         if (!done) {
2584             finish_folding(&ctx, op);
2585         }
2586     }
2587 }
2588