xref: /qemu/tcg/tcg.c (revision 78f314cf)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 
38 /* Note: the long term plan is to reduce the dependencies on the QEMU
39    CPU definitions. Currently they are used for qemu_ld/st
40    instructions */
41 #define NO_CPU_IO_DEFS
42 
43 #include "exec/exec-all.h"
44 #include "tcg/tcg-op.h"
45 
46 #if UINTPTR_MAX == UINT32_MAX
47 # define ELF_CLASS  ELFCLASS32
48 #else
49 # define ELF_CLASS  ELFCLASS64
50 #endif
51 #if HOST_BIG_ENDIAN
52 # define ELF_DATA   ELFDATA2MSB
53 #else
54 # define ELF_DATA   ELFDATA2LSB
55 #endif
56 
57 #include "elf.h"
58 #include "exec/log.h"
59 #include "tcg/tcg-ldst.h"
60 #include "tcg/tcg-temp-internal.h"
61 #include "tcg-internal.h"
62 #include "accel/tcg/perf.h"
63 #ifdef CONFIG_USER_ONLY
64 #include "exec/user/guest-base.h"
65 #endif
66 
67 /* Forward declarations for functions declared in tcg-target.c.inc and
68    used here. */
69 static void tcg_target_init(TCGContext *s);
70 static void tcg_target_qemu_prologue(TCGContext *s);
71 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
72                         intptr_t value, intptr_t addend);
73 
74 /* The CIE and FDE header definitions will be common to all hosts.  */
75 typedef struct {
76     uint32_t len __attribute__((aligned((sizeof(void *)))));
77     uint32_t id;
78     uint8_t version;
79     char augmentation[1];
80     uint8_t code_align;
81     uint8_t data_align;
82     uint8_t return_column;
83 } DebugFrameCIE;
84 
85 typedef struct QEMU_PACKED {
86     uint32_t len __attribute__((aligned((sizeof(void *)))));
87     uint32_t cie_offset;
88     uintptr_t func_start;
89     uintptr_t func_len;
90 } DebugFrameFDEHeader;
91 
92 typedef struct QEMU_PACKED {
93     DebugFrameCIE cie;
94     DebugFrameFDEHeader fde;
95 } DebugFrameHeader;
96 
97 typedef struct TCGLabelQemuLdst {
98     bool is_ld;             /* qemu_ld: true, qemu_st: false */
99     MemOpIdx oi;
100     TCGType type;           /* result type of a load */
101     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
102     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
103     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
104     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
105     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
106     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
107     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
108 } TCGLabelQemuLdst;
109 
110 static void tcg_register_jit_int(const void *buf, size_t size,
111                                  const void *debug_frame,
112                                  size_t debug_frame_size)
113     __attribute__((unused));
114 
115 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
116 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
117                        intptr_t arg2);
118 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_movi(TCGContext *s, TCGType type,
120                          TCGReg ret, tcg_target_long arg);
121 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
128 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
129 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
130 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
131 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
132 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
133 static void tcg_out_goto_tb(TCGContext *s, int which);
134 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
135                        const TCGArg args[TCG_MAX_OP_ARGS],
136                        const int const_args[TCG_MAX_OP_ARGS]);
137 #if TCG_TARGET_MAYBE_vec
138 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
139                             TCGReg dst, TCGReg src);
140 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
141                              TCGReg dst, TCGReg base, intptr_t offset);
142 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
143                              TCGReg dst, int64_t arg);
144 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
145                            unsigned vecl, unsigned vece,
146                            const TCGArg args[TCG_MAX_OP_ARGS],
147                            const int const_args[TCG_MAX_OP_ARGS]);
148 #else
149 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
150                                    TCGReg dst, TCGReg src)
151 {
152     g_assert_not_reached();
153 }
154 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
155                                     TCGReg dst, TCGReg base, intptr_t offset)
156 {
157     g_assert_not_reached();
158 }
159 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
160                                     TCGReg dst, int64_t arg)
161 {
162     g_assert_not_reached();
163 }
164 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
165                                   unsigned vecl, unsigned vece,
166                                   const TCGArg args[TCG_MAX_OP_ARGS],
167                                   const int const_args[TCG_MAX_OP_ARGS])
168 {
169     g_assert_not_reached();
170 }
171 #endif
172 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
173                        intptr_t arg2);
174 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
175                         TCGReg base, intptr_t ofs);
176 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
177                          const TCGHelperInfo *info);
178 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
179 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
180 #ifdef TCG_TARGET_NEED_LDST_LABELS
181 static int tcg_out_ldst_finalize(TCGContext *s);
182 #endif
183 
184 typedef struct TCGLdstHelperParam {
185     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
186     unsigned ntmp;
187     int tmp[3];
188 } TCGLdstHelperParam;
189 
190 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
191                                    const TCGLdstHelperParam *p)
192     __attribute__((unused));
193 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
194                                   bool load_sign, const TCGLdstHelperParam *p)
195     __attribute__((unused));
196 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
197                                    const TCGLdstHelperParam *p)
198     __attribute__((unused));
199 
200 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
201     [MO_UB] = helper_ldub_mmu,
202     [MO_SB] = helper_ldsb_mmu,
203     [MO_UW] = helper_lduw_mmu,
204     [MO_SW] = helper_ldsw_mmu,
205     [MO_UL] = helper_ldul_mmu,
206     [MO_UQ] = helper_ldq_mmu,
207 #if TCG_TARGET_REG_BITS == 64
208     [MO_SL] = helper_ldsl_mmu,
209     [MO_128] = helper_ld16_mmu,
210 #endif
211 };
212 
213 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
214     [MO_8]  = helper_stb_mmu,
215     [MO_16] = helper_stw_mmu,
216     [MO_32] = helper_stl_mmu,
217     [MO_64] = helper_stq_mmu,
218 #if TCG_TARGET_REG_BITS == 64
219     [MO_128] = helper_st16_mmu,
220 #endif
221 };
222 
223 typedef struct {
224     MemOp atom;   /* lg2 bits of atomicity required */
225     MemOp align;  /* lg2 bits of alignment to use */
226 } TCGAtomAlign;
227 
228 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
229                                            MemOp host_atom, bool allow_two_ops)
230     __attribute__((unused));
231 
232 TCGContext tcg_init_ctx;
233 __thread TCGContext *tcg_ctx;
234 
235 TCGContext **tcg_ctxs;
236 unsigned int tcg_cur_ctxs;
237 unsigned int tcg_max_ctxs;
238 TCGv_env cpu_env = 0;
239 const void *tcg_code_gen_epilogue;
240 uintptr_t tcg_splitwx_diff;
241 
242 #ifndef CONFIG_TCG_INTERPRETER
243 tcg_prologue_fn *tcg_qemu_tb_exec;
244 #endif
245 
246 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
247 static TCGRegSet tcg_target_call_clobber_regs;
248 
249 #if TCG_TARGET_INSN_UNIT_SIZE == 1
250 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
251 {
252     *s->code_ptr++ = v;
253 }
254 
255 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
256                                                       uint8_t v)
257 {
258     *p = v;
259 }
260 #endif
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
263 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
264 {
265     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
266         *s->code_ptr++ = v;
267     } else {
268         tcg_insn_unit *p = s->code_ptr;
269         memcpy(p, &v, sizeof(v));
270         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
271     }
272 }
273 
274 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
275                                                        uint16_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
278         *p = v;
279     } else {
280         memcpy(p, &v, sizeof(v));
281     }
282 }
283 #endif
284 
285 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
286 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
287 {
288     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
289         *s->code_ptr++ = v;
290     } else {
291         tcg_insn_unit *p = s->code_ptr;
292         memcpy(p, &v, sizeof(v));
293         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
294     }
295 }
296 
297 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
298                                                        uint32_t v)
299 {
300     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
301         *p = v;
302     } else {
303         memcpy(p, &v, sizeof(v));
304     }
305 }
306 #endif
307 
308 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
309 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
310 {
311     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
312         *s->code_ptr++ = v;
313     } else {
314         tcg_insn_unit *p = s->code_ptr;
315         memcpy(p, &v, sizeof(v));
316         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
317     }
318 }
319 
320 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
321                                                        uint64_t v)
322 {
323     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
324         *p = v;
325     } else {
326         memcpy(p, &v, sizeof(v));
327     }
328 }
329 #endif
330 
331 /* label relocation processing */
332 
333 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
334                           TCGLabel *l, intptr_t addend)
335 {
336     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
337 
338     r->type = type;
339     r->ptr = code_ptr;
340     r->addend = addend;
341     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
342 }
343 
344 static void tcg_out_label(TCGContext *s, TCGLabel *l)
345 {
346     tcg_debug_assert(!l->has_value);
347     l->has_value = 1;
348     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
349 }
350 
351 TCGLabel *gen_new_label(void)
352 {
353     TCGContext *s = tcg_ctx;
354     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
355 
356     memset(l, 0, sizeof(TCGLabel));
357     l->id = s->nb_labels++;
358     QSIMPLEQ_INIT(&l->branches);
359     QSIMPLEQ_INIT(&l->relocs);
360 
361     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
362 
363     return l;
364 }
365 
366 static bool tcg_resolve_relocs(TCGContext *s)
367 {
368     TCGLabel *l;
369 
370     QSIMPLEQ_FOREACH(l, &s->labels, next) {
371         TCGRelocation *r;
372         uintptr_t value = l->u.value;
373 
374         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
375             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
376                 return false;
377             }
378         }
379     }
380     return true;
381 }
382 
383 static void set_jmp_reset_offset(TCGContext *s, int which)
384 {
385     /*
386      * We will check for overflow at the end of the opcode loop in
387      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
388      */
389     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
390 }
391 
392 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
393 {
394     /*
395      * We will check for overflow at the end of the opcode loop in
396      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
397      */
398     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
399 }
400 
401 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
402 {
403     /*
404      * Return the read-execute version of the pointer, for the benefit
405      * of any pc-relative addressing mode.
406      */
407     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
408 }
409 
410 /* Signal overflow, starting over with fewer guest insns. */
411 static G_NORETURN
412 void tcg_raise_tb_overflow(TCGContext *s)
413 {
414     siglongjmp(s->jmp_trans, -2);
415 }
416 
417 /*
418  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
419  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
420  *
421  * However, tcg_out_helper_load_slots reuses this field to hold an
422  * argument slot number (which may designate a argument register or an
423  * argument stack slot), converting to TCGReg once all arguments that
424  * are destined for the stack are processed.
425  */
426 typedef struct TCGMovExtend {
427     unsigned dst;
428     TCGReg src;
429     TCGType dst_type;
430     TCGType src_type;
431     MemOp src_ext;
432 } TCGMovExtend;
433 
434 /**
435  * tcg_out_movext -- move and extend
436  * @s: tcg context
437  * @dst_type: integral type for destination
438  * @dst: destination register
439  * @src_type: integral type for source
440  * @src_ext: extension to apply to source
441  * @src: source register
442  *
443  * Move or extend @src into @dst, depending on @src_ext and the types.
444  */
445 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
446                            TCGType src_type, MemOp src_ext, TCGReg src)
447 {
448     switch (src_ext) {
449     case MO_UB:
450         tcg_out_ext8u(s, dst, src);
451         break;
452     case MO_SB:
453         tcg_out_ext8s(s, dst_type, dst, src);
454         break;
455     case MO_UW:
456         tcg_out_ext16u(s, dst, src);
457         break;
458     case MO_SW:
459         tcg_out_ext16s(s, dst_type, dst, src);
460         break;
461     case MO_UL:
462     case MO_SL:
463         if (dst_type == TCG_TYPE_I32) {
464             if (src_type == TCG_TYPE_I32) {
465                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
466             } else {
467                 tcg_out_extrl_i64_i32(s, dst, src);
468             }
469         } else if (src_type == TCG_TYPE_I32) {
470             if (src_ext & MO_SIGN) {
471                 tcg_out_exts_i32_i64(s, dst, src);
472             } else {
473                 tcg_out_extu_i32_i64(s, dst, src);
474             }
475         } else {
476             if (src_ext & MO_SIGN) {
477                 tcg_out_ext32s(s, dst, src);
478             } else {
479                 tcg_out_ext32u(s, dst, src);
480             }
481         }
482         break;
483     case MO_UQ:
484         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
485         if (dst_type == TCG_TYPE_I32) {
486             tcg_out_extrl_i64_i32(s, dst, src);
487         } else {
488             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
489         }
490         break;
491     default:
492         g_assert_not_reached();
493     }
494 }
495 
496 /* Minor variations on a theme, using a structure. */
497 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
498                                     TCGReg src)
499 {
500     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
501 }
502 
503 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
504 {
505     tcg_out_movext1_new_src(s, i, i->src);
506 }
507 
508 /**
509  * tcg_out_movext2 -- move and extend two pair
510  * @s: tcg context
511  * @i1: first move description
512  * @i2: second move description
513  * @scratch: temporary register, or -1 for none
514  *
515  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
516  * between the sources and destinations.
517  */
518 
519 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
520                             const TCGMovExtend *i2, int scratch)
521 {
522     TCGReg src1 = i1->src;
523     TCGReg src2 = i2->src;
524 
525     if (i1->dst != src2) {
526         tcg_out_movext1(s, i1);
527         tcg_out_movext1(s, i2);
528         return;
529     }
530     if (i2->dst == src1) {
531         TCGType src1_type = i1->src_type;
532         TCGType src2_type = i2->src_type;
533 
534         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
535             /* The data is now in the correct registers, now extend. */
536             src1 = i2->src;
537             src2 = i1->src;
538         } else {
539             tcg_debug_assert(scratch >= 0);
540             tcg_out_mov(s, src1_type, scratch, src1);
541             src1 = scratch;
542         }
543     }
544     tcg_out_movext1_new_src(s, i2, src2);
545     tcg_out_movext1_new_src(s, i1, src1);
546 }
547 
548 /**
549  * tcg_out_movext3 -- move and extend three pair
550  * @s: tcg context
551  * @i1: first move description
552  * @i2: second move description
553  * @i3: third move description
554  * @scratch: temporary register, or -1 for none
555  *
556  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
557  * between the sources and destinations.
558  */
559 
560 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
561                             const TCGMovExtend *i2, const TCGMovExtend *i3,
562                             int scratch)
563 {
564     TCGReg src1 = i1->src;
565     TCGReg src2 = i2->src;
566     TCGReg src3 = i3->src;
567 
568     if (i1->dst != src2 && i1->dst != src3) {
569         tcg_out_movext1(s, i1);
570         tcg_out_movext2(s, i2, i3, scratch);
571         return;
572     }
573     if (i2->dst != src1 && i2->dst != src3) {
574         tcg_out_movext1(s, i2);
575         tcg_out_movext2(s, i1, i3, scratch);
576         return;
577     }
578     if (i3->dst != src1 && i3->dst != src2) {
579         tcg_out_movext1(s, i3);
580         tcg_out_movext2(s, i1, i2, scratch);
581         return;
582     }
583 
584     /*
585      * There is a cycle.  Since there are only 3 nodes, the cycle is
586      * either "clockwise" or "anti-clockwise", and can be solved with
587      * a single scratch or two xchg.
588      */
589     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
590         /* "Clockwise" */
591         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
592             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
593             /* The data is now in the correct registers, now extend. */
594             tcg_out_movext1_new_src(s, i1, i1->dst);
595             tcg_out_movext1_new_src(s, i2, i2->dst);
596             tcg_out_movext1_new_src(s, i3, i3->dst);
597         } else {
598             tcg_debug_assert(scratch >= 0);
599             tcg_out_mov(s, i1->src_type, scratch, src1);
600             tcg_out_movext1(s, i3);
601             tcg_out_movext1(s, i2);
602             tcg_out_movext1_new_src(s, i1, scratch);
603         }
604     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
605         /* "Anti-clockwise" */
606         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
607             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
608             /* The data is now in the correct registers, now extend. */
609             tcg_out_movext1_new_src(s, i1, i1->dst);
610             tcg_out_movext1_new_src(s, i2, i2->dst);
611             tcg_out_movext1_new_src(s, i3, i3->dst);
612         } else {
613             tcg_debug_assert(scratch >= 0);
614             tcg_out_mov(s, i1->src_type, scratch, src1);
615             tcg_out_movext1(s, i2);
616             tcg_out_movext1(s, i3);
617             tcg_out_movext1_new_src(s, i1, scratch);
618         }
619     } else {
620         g_assert_not_reached();
621     }
622 }
623 
624 #define C_PFX1(P, A)                    P##A
625 #define C_PFX2(P, A, B)                 P##A##_##B
626 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
627 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
628 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
629 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
630 
631 /* Define an enumeration for the various combinations. */
632 
633 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
634 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
635 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
636 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
637 
638 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
639 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
640 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
641 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
642 
643 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
644 
645 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
646 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
647 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
648 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
649 
650 typedef enum {
651 #include "tcg-target-con-set.h"
652 } TCGConstraintSetIndex;
653 
654 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
655 
656 #undef C_O0_I1
657 #undef C_O0_I2
658 #undef C_O0_I3
659 #undef C_O0_I4
660 #undef C_O1_I1
661 #undef C_O1_I2
662 #undef C_O1_I3
663 #undef C_O1_I4
664 #undef C_N1_I2
665 #undef C_O2_I1
666 #undef C_O2_I2
667 #undef C_O2_I3
668 #undef C_O2_I4
669 
670 /* Put all of the constraint sets into an array, indexed by the enum. */
671 
672 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
673 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
674 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
675 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
676 
677 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
678 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
679 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
680 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
681 
682 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
683 
684 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
685 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
686 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
687 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
688 
689 static const TCGTargetOpDef constraint_sets[] = {
690 #include "tcg-target-con-set.h"
691 };
692 
693 
694 #undef C_O0_I1
695 #undef C_O0_I2
696 #undef C_O0_I3
697 #undef C_O0_I4
698 #undef C_O1_I1
699 #undef C_O1_I2
700 #undef C_O1_I3
701 #undef C_O1_I4
702 #undef C_N1_I2
703 #undef C_O2_I1
704 #undef C_O2_I2
705 #undef C_O2_I3
706 #undef C_O2_I4
707 
708 /* Expand the enumerator to be returned from tcg_target_op_def(). */
709 
710 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
711 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
712 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
713 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
714 
715 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
716 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
717 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
718 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
719 
720 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
721 
722 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
723 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
724 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
725 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
726 
727 #include "tcg-target.c.inc"
728 
729 static void alloc_tcg_plugin_context(TCGContext *s)
730 {
731 #ifdef CONFIG_PLUGIN
732     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
733     s->plugin_tb->insns =
734         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
735 #endif
736 }
737 
738 /*
739  * All TCG threads except the parent (i.e. the one that called tcg_context_init
740  * and registered the target's TCG globals) must register with this function
741  * before initiating translation.
742  *
743  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
744  * of tcg_region_init() for the reasoning behind this.
745  *
746  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
747  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
748  * is not used anymore for translation once this function is called.
749  *
750  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
751  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
752  */
753 #ifdef CONFIG_USER_ONLY
754 void tcg_register_thread(void)
755 {
756     tcg_ctx = &tcg_init_ctx;
757 }
758 #else
759 void tcg_register_thread(void)
760 {
761     TCGContext *s = g_malloc(sizeof(*s));
762     unsigned int i, n;
763 
764     *s = tcg_init_ctx;
765 
766     /* Relink mem_base.  */
767     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
768         if (tcg_init_ctx.temps[i].mem_base) {
769             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
770             tcg_debug_assert(b >= 0 && b < n);
771             s->temps[i].mem_base = &s->temps[b];
772         }
773     }
774 
775     /* Claim an entry in tcg_ctxs */
776     n = qatomic_fetch_inc(&tcg_cur_ctxs);
777     g_assert(n < tcg_max_ctxs);
778     qatomic_set(&tcg_ctxs[n], s);
779 
780     if (n > 0) {
781         alloc_tcg_plugin_context(s);
782         tcg_region_initial_alloc(s);
783     }
784 
785     tcg_ctx = s;
786 }
787 #endif /* !CONFIG_USER_ONLY */
788 
789 /* pool based memory allocation */
790 void *tcg_malloc_internal(TCGContext *s, int size)
791 {
792     TCGPool *p;
793     int pool_size;
794 
795     if (size > TCG_POOL_CHUNK_SIZE) {
796         /* big malloc: insert a new pool (XXX: could optimize) */
797         p = g_malloc(sizeof(TCGPool) + size);
798         p->size = size;
799         p->next = s->pool_first_large;
800         s->pool_first_large = p;
801         return p->data;
802     } else {
803         p = s->pool_current;
804         if (!p) {
805             p = s->pool_first;
806             if (!p)
807                 goto new_pool;
808         } else {
809             if (!p->next) {
810             new_pool:
811                 pool_size = TCG_POOL_CHUNK_SIZE;
812                 p = g_malloc(sizeof(TCGPool) + pool_size);
813                 p->size = pool_size;
814                 p->next = NULL;
815                 if (s->pool_current) {
816                     s->pool_current->next = p;
817                 } else {
818                     s->pool_first = p;
819                 }
820             } else {
821                 p = p->next;
822             }
823         }
824     }
825     s->pool_current = p;
826     s->pool_cur = p->data + size;
827     s->pool_end = p->data + p->size;
828     return p->data;
829 }
830 
831 void tcg_pool_reset(TCGContext *s)
832 {
833     TCGPool *p, *t;
834     for (p = s->pool_first_large; p; p = t) {
835         t = p->next;
836         g_free(p);
837     }
838     s->pool_first_large = NULL;
839     s->pool_cur = s->pool_end = NULL;
840     s->pool_current = NULL;
841 }
842 
843 #include "exec/helper-proto.h"
844 
845 static TCGHelperInfo all_helpers[] = {
846 #include "exec/helper-tcg.h"
847 };
848 static GHashTable *helper_table;
849 
850 /*
851  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
852  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
853  * We only use these for layout in tcg_out_ld_helper_ret and
854  * tcg_out_st_helper_args, and share them between several of
855  * the helpers, with the end result that it's easier to build manually.
856  */
857 
858 #if TCG_TARGET_REG_BITS == 32
859 # define dh_typecode_ttl  dh_typecode_i32
860 #else
861 # define dh_typecode_ttl  dh_typecode_i64
862 #endif
863 
864 static TCGHelperInfo info_helper_ld32_mmu = {
865     .flags = TCG_CALL_NO_WG,
866     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
867               | dh_typemask(env, 1)
868               | dh_typemask(i64, 2)  /* uint64_t addr */
869               | dh_typemask(i32, 3)  /* unsigned oi */
870               | dh_typemask(ptr, 4)  /* uintptr_t ra */
871 };
872 
873 static TCGHelperInfo info_helper_ld64_mmu = {
874     .flags = TCG_CALL_NO_WG,
875     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
876               | dh_typemask(env, 1)
877               | dh_typemask(i64, 2)  /* uint64_t addr */
878               | dh_typemask(i32, 3)  /* unsigned oi */
879               | dh_typemask(ptr, 4)  /* uintptr_t ra */
880 };
881 
882 static TCGHelperInfo info_helper_ld128_mmu = {
883     .flags = TCG_CALL_NO_WG,
884     .typemask = dh_typemask(i128, 0) /* return Int128 */
885               | dh_typemask(env, 1)
886               | dh_typemask(i64, 2)  /* uint64_t addr */
887               | dh_typemask(i32, 3)  /* unsigned oi */
888               | dh_typemask(ptr, 4)  /* uintptr_t ra */
889 };
890 
891 static TCGHelperInfo info_helper_st32_mmu = {
892     .flags = TCG_CALL_NO_WG,
893     .typemask = dh_typemask(void, 0)
894               | dh_typemask(env, 1)
895               | dh_typemask(i64, 2)  /* uint64_t addr */
896               | dh_typemask(i32, 3)  /* uint32_t data */
897               | dh_typemask(i32, 4)  /* unsigned oi */
898               | dh_typemask(ptr, 5)  /* uintptr_t ra */
899 };
900 
901 static TCGHelperInfo info_helper_st64_mmu = {
902     .flags = TCG_CALL_NO_WG,
903     .typemask = dh_typemask(void, 0)
904               | dh_typemask(env, 1)
905               | dh_typemask(i64, 2)  /* uint64_t addr */
906               | dh_typemask(i64, 3)  /* uint64_t data */
907               | dh_typemask(i32, 4)  /* unsigned oi */
908               | dh_typemask(ptr, 5)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_st128_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(void, 0)
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i128, 3) /* Int128 data */
917               | dh_typemask(i32, 4)  /* unsigned oi */
918               | dh_typemask(ptr, 5)  /* uintptr_t ra */
919 };
920 
921 #ifdef CONFIG_TCG_INTERPRETER
922 static ffi_type *typecode_to_ffi(int argmask)
923 {
924     /*
925      * libffi does not support __int128_t, so we have forced Int128
926      * to use the structure definition instead of the builtin type.
927      */
928     static ffi_type *ffi_type_i128_elements[3] = {
929         &ffi_type_uint64,
930         &ffi_type_uint64,
931         NULL
932     };
933     static ffi_type ffi_type_i128 = {
934         .size = 16,
935         .alignment = __alignof__(Int128),
936         .type = FFI_TYPE_STRUCT,
937         .elements = ffi_type_i128_elements,
938     };
939 
940     switch (argmask) {
941     case dh_typecode_void:
942         return &ffi_type_void;
943     case dh_typecode_i32:
944         return &ffi_type_uint32;
945     case dh_typecode_s32:
946         return &ffi_type_sint32;
947     case dh_typecode_i64:
948         return &ffi_type_uint64;
949     case dh_typecode_s64:
950         return &ffi_type_sint64;
951     case dh_typecode_ptr:
952         return &ffi_type_pointer;
953     case dh_typecode_i128:
954         return &ffi_type_i128;
955     }
956     g_assert_not_reached();
957 }
958 
959 static void init_ffi_layouts(void)
960 {
961     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
962     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
963 
964     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
965         TCGHelperInfo *info = &all_helpers[i];
966         unsigned typemask = info->typemask;
967         gpointer hash = (gpointer)(uintptr_t)typemask;
968         struct {
969             ffi_cif cif;
970             ffi_type *args[];
971         } *ca;
972         ffi_status status;
973         int nargs;
974         ffi_cif *cif;
975 
976         cif = g_hash_table_lookup(ffi_table, hash);
977         if (cif) {
978             info->cif = cif;
979             continue;
980         }
981 
982         /* Ignoring the return type, find the last non-zero field. */
983         nargs = 32 - clz32(typemask >> 3);
984         nargs = DIV_ROUND_UP(nargs, 3);
985         assert(nargs <= MAX_CALL_IARGS);
986 
987         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
988         ca->cif.rtype = typecode_to_ffi(typemask & 7);
989         ca->cif.nargs = nargs;
990 
991         if (nargs != 0) {
992             ca->cif.arg_types = ca->args;
993             for (int j = 0; j < nargs; ++j) {
994                 int typecode = extract32(typemask, (j + 1) * 3, 3);
995                 ca->args[j] = typecode_to_ffi(typecode);
996             }
997         }
998 
999         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1000                               ca->cif.rtype, ca->cif.arg_types);
1001         assert(status == FFI_OK);
1002 
1003         cif = &ca->cif;
1004         info->cif = cif;
1005         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
1006     }
1007 
1008     g_hash_table_destroy(ffi_table);
1009 }
1010 #endif /* CONFIG_TCG_INTERPRETER */
1011 
1012 static inline bool arg_slot_reg_p(unsigned arg_slot)
1013 {
1014     /*
1015      * Split the sizeof away from the comparison to avoid Werror from
1016      * "unsigned < 0 is always false", when iarg_regs is empty.
1017      */
1018     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1019     return arg_slot < nreg;
1020 }
1021 
1022 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1023 {
1024     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1025     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1026 
1027     tcg_debug_assert(stk_slot < max);
1028     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1029 }
1030 
1031 typedef struct TCGCumulativeArgs {
1032     int arg_idx;                /* tcg_gen_callN args[] */
1033     int info_in_idx;            /* TCGHelperInfo in[] */
1034     int arg_slot;               /* regs+stack slot */
1035     int ref_slot;               /* stack slots for references */
1036 } TCGCumulativeArgs;
1037 
1038 static void layout_arg_even(TCGCumulativeArgs *cum)
1039 {
1040     cum->arg_slot += cum->arg_slot & 1;
1041 }
1042 
1043 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1044                          TCGCallArgumentKind kind)
1045 {
1046     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1047 
1048     *loc = (TCGCallArgumentLoc){
1049         .kind = kind,
1050         .arg_idx = cum->arg_idx,
1051         .arg_slot = cum->arg_slot,
1052     };
1053     cum->info_in_idx++;
1054     cum->arg_slot++;
1055 }
1056 
1057 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1058                                 TCGHelperInfo *info, int n)
1059 {
1060     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1061 
1062     for (int i = 0; i < n; ++i) {
1063         /* Layout all using the same arg_idx, adjusting the subindex. */
1064         loc[i] = (TCGCallArgumentLoc){
1065             .kind = TCG_CALL_ARG_NORMAL,
1066             .arg_idx = cum->arg_idx,
1067             .tmp_subindex = i,
1068             .arg_slot = cum->arg_slot + i,
1069         };
1070     }
1071     cum->info_in_idx += n;
1072     cum->arg_slot += n;
1073 }
1074 
1075 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1076 {
1077     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1078     int n = 128 / TCG_TARGET_REG_BITS;
1079 
1080     /* The first subindex carries the pointer. */
1081     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1082 
1083     /*
1084      * The callee is allowed to clobber memory associated with
1085      * structure pass by-reference.  Therefore we must make copies.
1086      * Allocate space from "ref_slot", which will be adjusted to
1087      * follow the parameters on the stack.
1088      */
1089     loc[0].ref_slot = cum->ref_slot;
1090 
1091     /*
1092      * Subsequent words also go into the reference slot, but
1093      * do not accumulate into the regular arguments.
1094      */
1095     for (int i = 1; i < n; ++i) {
1096         loc[i] = (TCGCallArgumentLoc){
1097             .kind = TCG_CALL_ARG_BY_REF_N,
1098             .arg_idx = cum->arg_idx,
1099             .tmp_subindex = i,
1100             .ref_slot = cum->ref_slot + i,
1101         };
1102     }
1103     cum->info_in_idx += n;
1104     cum->ref_slot += n;
1105 }
1106 
1107 static void init_call_layout(TCGHelperInfo *info)
1108 {
1109     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1110     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1111     unsigned typemask = info->typemask;
1112     unsigned typecode;
1113     TCGCumulativeArgs cum = { };
1114 
1115     /*
1116      * Parse and place any function return value.
1117      */
1118     typecode = typemask & 7;
1119     switch (typecode) {
1120     case dh_typecode_void:
1121         info->nr_out = 0;
1122         break;
1123     case dh_typecode_i32:
1124     case dh_typecode_s32:
1125     case dh_typecode_ptr:
1126         info->nr_out = 1;
1127         info->out_kind = TCG_CALL_RET_NORMAL;
1128         break;
1129     case dh_typecode_i64:
1130     case dh_typecode_s64:
1131         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1132         info->out_kind = TCG_CALL_RET_NORMAL;
1133         /* Query the last register now to trigger any assert early. */
1134         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1135         break;
1136     case dh_typecode_i128:
1137         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1138         info->out_kind = TCG_TARGET_CALL_RET_I128;
1139         switch (TCG_TARGET_CALL_RET_I128) {
1140         case TCG_CALL_RET_NORMAL:
1141             /* Query the last register now to trigger any assert early. */
1142             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1143             break;
1144         case TCG_CALL_RET_BY_VEC:
1145             /* Query the single register now to trigger any assert early. */
1146             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1147             break;
1148         case TCG_CALL_RET_BY_REF:
1149             /*
1150              * Allocate the first argument to the output.
1151              * We don't need to store this anywhere, just make it
1152              * unavailable for use in the input loop below.
1153              */
1154             cum.arg_slot = 1;
1155             break;
1156         default:
1157             qemu_build_not_reached();
1158         }
1159         break;
1160     default:
1161         g_assert_not_reached();
1162     }
1163 
1164     /*
1165      * Parse and place function arguments.
1166      */
1167     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1168         TCGCallArgumentKind kind;
1169         TCGType type;
1170 
1171         typecode = typemask & 7;
1172         switch (typecode) {
1173         case dh_typecode_i32:
1174         case dh_typecode_s32:
1175             type = TCG_TYPE_I32;
1176             break;
1177         case dh_typecode_i64:
1178         case dh_typecode_s64:
1179             type = TCG_TYPE_I64;
1180             break;
1181         case dh_typecode_ptr:
1182             type = TCG_TYPE_PTR;
1183             break;
1184         case dh_typecode_i128:
1185             type = TCG_TYPE_I128;
1186             break;
1187         default:
1188             g_assert_not_reached();
1189         }
1190 
1191         switch (type) {
1192         case TCG_TYPE_I32:
1193             switch (TCG_TARGET_CALL_ARG_I32) {
1194             case TCG_CALL_ARG_EVEN:
1195                 layout_arg_even(&cum);
1196                 /* fall through */
1197             case TCG_CALL_ARG_NORMAL:
1198                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1199                 break;
1200             case TCG_CALL_ARG_EXTEND:
1201                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1202                 layout_arg_1(&cum, info, kind);
1203                 break;
1204             default:
1205                 qemu_build_not_reached();
1206             }
1207             break;
1208 
1209         case TCG_TYPE_I64:
1210             switch (TCG_TARGET_CALL_ARG_I64) {
1211             case TCG_CALL_ARG_EVEN:
1212                 layout_arg_even(&cum);
1213                 /* fall through */
1214             case TCG_CALL_ARG_NORMAL:
1215                 if (TCG_TARGET_REG_BITS == 32) {
1216                     layout_arg_normal_n(&cum, info, 2);
1217                 } else {
1218                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1219                 }
1220                 break;
1221             default:
1222                 qemu_build_not_reached();
1223             }
1224             break;
1225 
1226         case TCG_TYPE_I128:
1227             switch (TCG_TARGET_CALL_ARG_I128) {
1228             case TCG_CALL_ARG_EVEN:
1229                 layout_arg_even(&cum);
1230                 /* fall through */
1231             case TCG_CALL_ARG_NORMAL:
1232                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1233                 break;
1234             case TCG_CALL_ARG_BY_REF:
1235                 layout_arg_by_ref(&cum, info);
1236                 break;
1237             default:
1238                 qemu_build_not_reached();
1239             }
1240             break;
1241 
1242         default:
1243             g_assert_not_reached();
1244         }
1245     }
1246     info->nr_in = cum.info_in_idx;
1247 
1248     /* Validate that we didn't overrun the input array. */
1249     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1250     /* Validate the backend has enough argument space. */
1251     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1252 
1253     /*
1254      * Relocate the "ref_slot" area to the end of the parameters.
1255      * Minimizing this stack offset helps code size for x86,
1256      * which has a signed 8-bit offset encoding.
1257      */
1258     if (cum.ref_slot != 0) {
1259         int ref_base = 0;
1260 
1261         if (cum.arg_slot > max_reg_slots) {
1262             int align = __alignof(Int128) / sizeof(tcg_target_long);
1263 
1264             ref_base = cum.arg_slot - max_reg_slots;
1265             if (align > 1) {
1266                 ref_base = ROUND_UP(ref_base, align);
1267             }
1268         }
1269         assert(ref_base + cum.ref_slot <= max_stk_slots);
1270         ref_base += max_reg_slots;
1271 
1272         if (ref_base != 0) {
1273             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1274                 TCGCallArgumentLoc *loc = &info->in[i];
1275                 switch (loc->kind) {
1276                 case TCG_CALL_ARG_BY_REF:
1277                 case TCG_CALL_ARG_BY_REF_N:
1278                     loc->ref_slot += ref_base;
1279                     break;
1280                 default:
1281                     break;
1282                 }
1283             }
1284         }
1285     }
1286 }
1287 
1288 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1289 static void process_op_defs(TCGContext *s);
1290 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1291                                             TCGReg reg, const char *name);
1292 
1293 static void tcg_context_init(unsigned max_cpus)
1294 {
1295     TCGContext *s = &tcg_init_ctx;
1296     int op, total_args, n, i;
1297     TCGOpDef *def;
1298     TCGArgConstraint *args_ct;
1299     TCGTemp *ts;
1300 
1301     memset(s, 0, sizeof(*s));
1302     s->nb_globals = 0;
1303 
1304     /* Count total number of arguments and allocate the corresponding
1305        space */
1306     total_args = 0;
1307     for(op = 0; op < NB_OPS; op++) {
1308         def = &tcg_op_defs[op];
1309         n = def->nb_iargs + def->nb_oargs;
1310         total_args += n;
1311     }
1312 
1313     args_ct = g_new0(TCGArgConstraint, total_args);
1314 
1315     for(op = 0; op < NB_OPS; op++) {
1316         def = &tcg_op_defs[op];
1317         def->args_ct = args_ct;
1318         n = def->nb_iargs + def->nb_oargs;
1319         args_ct += n;
1320     }
1321 
1322     /* Register helpers.  */
1323     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1324     helper_table = g_hash_table_new(NULL, NULL);
1325 
1326     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1327         init_call_layout(&all_helpers[i]);
1328         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1329                             (gpointer)&all_helpers[i]);
1330     }
1331 
1332     init_call_layout(&info_helper_ld32_mmu);
1333     init_call_layout(&info_helper_ld64_mmu);
1334     init_call_layout(&info_helper_ld128_mmu);
1335     init_call_layout(&info_helper_st32_mmu);
1336     init_call_layout(&info_helper_st64_mmu);
1337     init_call_layout(&info_helper_st128_mmu);
1338 
1339 #ifdef CONFIG_TCG_INTERPRETER
1340     init_ffi_layouts();
1341 #endif
1342 
1343     tcg_target_init(s);
1344     process_op_defs(s);
1345 
1346     /* Reverse the order of the saved registers, assuming they're all at
1347        the start of tcg_target_reg_alloc_order.  */
1348     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1349         int r = tcg_target_reg_alloc_order[n];
1350         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1351             break;
1352         }
1353     }
1354     for (i = 0; i < n; ++i) {
1355         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1356     }
1357     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1358         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1359     }
1360 
1361     alloc_tcg_plugin_context(s);
1362 
1363     tcg_ctx = s;
1364     /*
1365      * In user-mode we simply share the init context among threads, since we
1366      * use a single region. See the documentation tcg_region_init() for the
1367      * reasoning behind this.
1368      * In softmmu we will have at most max_cpus TCG threads.
1369      */
1370 #ifdef CONFIG_USER_ONLY
1371     tcg_ctxs = &tcg_ctx;
1372     tcg_cur_ctxs = 1;
1373     tcg_max_ctxs = 1;
1374 #else
1375     tcg_max_ctxs = max_cpus;
1376     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1377 #endif
1378 
1379     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1380     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1381     cpu_env = temp_tcgv_ptr(ts);
1382 }
1383 
1384 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1385 {
1386     tcg_context_init(max_cpus);
1387     tcg_region_init(tb_size, splitwx, max_cpus);
1388 }
1389 
1390 /*
1391  * Allocate TBs right before their corresponding translated code, making
1392  * sure that TBs and code are on different cache lines.
1393  */
1394 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1395 {
1396     uintptr_t align = qemu_icache_linesize;
1397     TranslationBlock *tb;
1398     void *next;
1399 
1400  retry:
1401     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1402     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1403 
1404     if (unlikely(next > s->code_gen_highwater)) {
1405         if (tcg_region_alloc(s)) {
1406             return NULL;
1407         }
1408         goto retry;
1409     }
1410     qatomic_set(&s->code_gen_ptr, next);
1411     s->data_gen_ptr = NULL;
1412     return tb;
1413 }
1414 
1415 void tcg_prologue_init(TCGContext *s)
1416 {
1417     size_t prologue_size;
1418 
1419     s->code_ptr = s->code_gen_ptr;
1420     s->code_buf = s->code_gen_ptr;
1421     s->data_gen_ptr = NULL;
1422 
1423 #ifndef CONFIG_TCG_INTERPRETER
1424     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1425 #endif
1426 
1427 #ifdef TCG_TARGET_NEED_POOL_LABELS
1428     s->pool_labels = NULL;
1429 #endif
1430 
1431     qemu_thread_jit_write();
1432     /* Generate the prologue.  */
1433     tcg_target_qemu_prologue(s);
1434 
1435 #ifdef TCG_TARGET_NEED_POOL_LABELS
1436     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1437     {
1438         int result = tcg_out_pool_finalize(s);
1439         tcg_debug_assert(result == 0);
1440     }
1441 #endif
1442 
1443     prologue_size = tcg_current_code_size(s);
1444     perf_report_prologue(s->code_gen_ptr, prologue_size);
1445 
1446 #ifndef CONFIG_TCG_INTERPRETER
1447     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1448                         (uintptr_t)s->code_buf, prologue_size);
1449 #endif
1450 
1451     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1452         FILE *logfile = qemu_log_trylock();
1453         if (logfile) {
1454             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1455             if (s->data_gen_ptr) {
1456                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1457                 size_t data_size = prologue_size - code_size;
1458                 size_t i;
1459 
1460                 disas(logfile, s->code_gen_ptr, code_size);
1461 
1462                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1463                     if (sizeof(tcg_target_ulong) == 8) {
1464                         fprintf(logfile,
1465                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1466                                 (uintptr_t)s->data_gen_ptr + i,
1467                                 *(uint64_t *)(s->data_gen_ptr + i));
1468                     } else {
1469                         fprintf(logfile,
1470                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1471                                 (uintptr_t)s->data_gen_ptr + i,
1472                                 *(uint32_t *)(s->data_gen_ptr + i));
1473                     }
1474                 }
1475             } else {
1476                 disas(logfile, s->code_gen_ptr, prologue_size);
1477             }
1478             fprintf(logfile, "\n");
1479             qemu_log_unlock(logfile);
1480         }
1481     }
1482 
1483 #ifndef CONFIG_TCG_INTERPRETER
1484     /*
1485      * Assert that goto_ptr is implemented completely, setting an epilogue.
1486      * For tci, we use NULL as the signal to return from the interpreter,
1487      * so skip this check.
1488      */
1489     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1490 #endif
1491 
1492     tcg_region_prologue_set(s);
1493 }
1494 
1495 void tcg_func_start(TCGContext *s)
1496 {
1497     tcg_pool_reset(s);
1498     s->nb_temps = s->nb_globals;
1499 
1500     /* No temps have been previously allocated for size or locality.  */
1501     memset(s->free_temps, 0, sizeof(s->free_temps));
1502 
1503     /* No constant temps have been previously allocated. */
1504     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1505         if (s->const_table[i]) {
1506             g_hash_table_remove_all(s->const_table[i]);
1507         }
1508     }
1509 
1510     s->nb_ops = 0;
1511     s->nb_labels = 0;
1512     s->current_frame_offset = s->frame_start;
1513 
1514 #ifdef CONFIG_DEBUG_TCG
1515     s->goto_tb_issue_mask = 0;
1516 #endif
1517 
1518     QTAILQ_INIT(&s->ops);
1519     QTAILQ_INIT(&s->free_ops);
1520     QSIMPLEQ_INIT(&s->labels);
1521 
1522     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1523                      s->addr_type == TCG_TYPE_I64);
1524 }
1525 
1526 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1527 {
1528     int n = s->nb_temps++;
1529 
1530     if (n >= TCG_MAX_TEMPS) {
1531         tcg_raise_tb_overflow(s);
1532     }
1533     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1534 }
1535 
1536 static TCGTemp *tcg_global_alloc(TCGContext *s)
1537 {
1538     TCGTemp *ts;
1539 
1540     tcg_debug_assert(s->nb_globals == s->nb_temps);
1541     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1542     s->nb_globals++;
1543     ts = tcg_temp_alloc(s);
1544     ts->kind = TEMP_GLOBAL;
1545 
1546     return ts;
1547 }
1548 
1549 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1550                                             TCGReg reg, const char *name)
1551 {
1552     TCGTemp *ts;
1553 
1554     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1555 
1556     ts = tcg_global_alloc(s);
1557     ts->base_type = type;
1558     ts->type = type;
1559     ts->kind = TEMP_FIXED;
1560     ts->reg = reg;
1561     ts->name = name;
1562     tcg_regset_set_reg(s->reserved_regs, reg);
1563 
1564     return ts;
1565 }
1566 
1567 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1568 {
1569     s->frame_start = start;
1570     s->frame_end = start + size;
1571     s->frame_temp
1572         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1573 }
1574 
1575 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1576                                      intptr_t offset, const char *name)
1577 {
1578     TCGContext *s = tcg_ctx;
1579     TCGTemp *base_ts = tcgv_ptr_temp(base);
1580     TCGTemp *ts = tcg_global_alloc(s);
1581     int indirect_reg = 0;
1582 
1583     switch (base_ts->kind) {
1584     case TEMP_FIXED:
1585         break;
1586     case TEMP_GLOBAL:
1587         /* We do not support double-indirect registers.  */
1588         tcg_debug_assert(!base_ts->indirect_reg);
1589         base_ts->indirect_base = 1;
1590         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1591                             ? 2 : 1);
1592         indirect_reg = 1;
1593         break;
1594     default:
1595         g_assert_not_reached();
1596     }
1597 
1598     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1599         TCGTemp *ts2 = tcg_global_alloc(s);
1600         char buf[64];
1601 
1602         ts->base_type = TCG_TYPE_I64;
1603         ts->type = TCG_TYPE_I32;
1604         ts->indirect_reg = indirect_reg;
1605         ts->mem_allocated = 1;
1606         ts->mem_base = base_ts;
1607         ts->mem_offset = offset;
1608         pstrcpy(buf, sizeof(buf), name);
1609         pstrcat(buf, sizeof(buf), "_0");
1610         ts->name = strdup(buf);
1611 
1612         tcg_debug_assert(ts2 == ts + 1);
1613         ts2->base_type = TCG_TYPE_I64;
1614         ts2->type = TCG_TYPE_I32;
1615         ts2->indirect_reg = indirect_reg;
1616         ts2->mem_allocated = 1;
1617         ts2->mem_base = base_ts;
1618         ts2->mem_offset = offset + 4;
1619         ts2->temp_subindex = 1;
1620         pstrcpy(buf, sizeof(buf), name);
1621         pstrcat(buf, sizeof(buf), "_1");
1622         ts2->name = strdup(buf);
1623     } else {
1624         ts->base_type = type;
1625         ts->type = type;
1626         ts->indirect_reg = indirect_reg;
1627         ts->mem_allocated = 1;
1628         ts->mem_base = base_ts;
1629         ts->mem_offset = offset;
1630         ts->name = name;
1631     }
1632     return ts;
1633 }
1634 
1635 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1636 {
1637     TCGContext *s = tcg_ctx;
1638     TCGTemp *ts;
1639     int n;
1640 
1641     if (kind == TEMP_EBB) {
1642         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1643 
1644         if (idx < TCG_MAX_TEMPS) {
1645             /* There is already an available temp with the right type.  */
1646             clear_bit(idx, s->free_temps[type].l);
1647 
1648             ts = &s->temps[idx];
1649             ts->temp_allocated = 1;
1650             tcg_debug_assert(ts->base_type == type);
1651             tcg_debug_assert(ts->kind == kind);
1652             return ts;
1653         }
1654     } else {
1655         tcg_debug_assert(kind == TEMP_TB);
1656     }
1657 
1658     switch (type) {
1659     case TCG_TYPE_I32:
1660     case TCG_TYPE_V64:
1661     case TCG_TYPE_V128:
1662     case TCG_TYPE_V256:
1663         n = 1;
1664         break;
1665     case TCG_TYPE_I64:
1666         n = 64 / TCG_TARGET_REG_BITS;
1667         break;
1668     case TCG_TYPE_I128:
1669         n = 128 / TCG_TARGET_REG_BITS;
1670         break;
1671     default:
1672         g_assert_not_reached();
1673     }
1674 
1675     ts = tcg_temp_alloc(s);
1676     ts->base_type = type;
1677     ts->temp_allocated = 1;
1678     ts->kind = kind;
1679 
1680     if (n == 1) {
1681         ts->type = type;
1682     } else {
1683         ts->type = TCG_TYPE_REG;
1684 
1685         for (int i = 1; i < n; ++i) {
1686             TCGTemp *ts2 = tcg_temp_alloc(s);
1687 
1688             tcg_debug_assert(ts2 == ts + i);
1689             ts2->base_type = type;
1690             ts2->type = TCG_TYPE_REG;
1691             ts2->temp_allocated = 1;
1692             ts2->temp_subindex = i;
1693             ts2->kind = kind;
1694         }
1695     }
1696     return ts;
1697 }
1698 
1699 TCGv_vec tcg_temp_new_vec(TCGType type)
1700 {
1701     TCGTemp *t;
1702 
1703 #ifdef CONFIG_DEBUG_TCG
1704     switch (type) {
1705     case TCG_TYPE_V64:
1706         assert(TCG_TARGET_HAS_v64);
1707         break;
1708     case TCG_TYPE_V128:
1709         assert(TCG_TARGET_HAS_v128);
1710         break;
1711     case TCG_TYPE_V256:
1712         assert(TCG_TARGET_HAS_v256);
1713         break;
1714     default:
1715         g_assert_not_reached();
1716     }
1717 #endif
1718 
1719     t = tcg_temp_new_internal(type, TEMP_EBB);
1720     return temp_tcgv_vec(t);
1721 }
1722 
1723 /* Create a new temp of the same type as an existing temp.  */
1724 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1725 {
1726     TCGTemp *t = tcgv_vec_temp(match);
1727 
1728     tcg_debug_assert(t->temp_allocated != 0);
1729 
1730     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1731     return temp_tcgv_vec(t);
1732 }
1733 
1734 void tcg_temp_free_internal(TCGTemp *ts)
1735 {
1736     TCGContext *s = tcg_ctx;
1737 
1738     switch (ts->kind) {
1739     case TEMP_CONST:
1740     case TEMP_TB:
1741         /* Silently ignore free. */
1742         break;
1743     case TEMP_EBB:
1744         tcg_debug_assert(ts->temp_allocated != 0);
1745         ts->temp_allocated = 0;
1746         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1747         break;
1748     default:
1749         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1750         g_assert_not_reached();
1751     }
1752 }
1753 
1754 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1755 {
1756     TCGContext *s = tcg_ctx;
1757     GHashTable *h = s->const_table[type];
1758     TCGTemp *ts;
1759 
1760     if (h == NULL) {
1761         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1762         s->const_table[type] = h;
1763     }
1764 
1765     ts = g_hash_table_lookup(h, &val);
1766     if (ts == NULL) {
1767         int64_t *val_ptr;
1768 
1769         ts = tcg_temp_alloc(s);
1770 
1771         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1772             TCGTemp *ts2 = tcg_temp_alloc(s);
1773 
1774             tcg_debug_assert(ts2 == ts + 1);
1775 
1776             ts->base_type = TCG_TYPE_I64;
1777             ts->type = TCG_TYPE_I32;
1778             ts->kind = TEMP_CONST;
1779             ts->temp_allocated = 1;
1780 
1781             ts2->base_type = TCG_TYPE_I64;
1782             ts2->type = TCG_TYPE_I32;
1783             ts2->kind = TEMP_CONST;
1784             ts2->temp_allocated = 1;
1785             ts2->temp_subindex = 1;
1786 
1787             /*
1788              * Retain the full value of the 64-bit constant in the low
1789              * part, so that the hash table works.  Actual uses will
1790              * truncate the value to the low part.
1791              */
1792             ts[HOST_BIG_ENDIAN].val = val;
1793             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1794             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1795         } else {
1796             ts->base_type = type;
1797             ts->type = type;
1798             ts->kind = TEMP_CONST;
1799             ts->temp_allocated = 1;
1800             ts->val = val;
1801             val_ptr = &ts->val;
1802         }
1803         g_hash_table_insert(h, val_ptr, ts);
1804     }
1805 
1806     return ts;
1807 }
1808 
1809 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1810 {
1811     val = dup_const(vece, val);
1812     return temp_tcgv_vec(tcg_constant_internal(type, val));
1813 }
1814 
1815 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1816 {
1817     TCGTemp *t = tcgv_vec_temp(match);
1818 
1819     tcg_debug_assert(t->temp_allocated != 0);
1820     return tcg_constant_vec(t->base_type, vece, val);
1821 }
1822 
1823 /* Return true if OP may appear in the opcode stream.
1824    Test the runtime variable that controls each opcode.  */
1825 bool tcg_op_supported(TCGOpcode op)
1826 {
1827     const bool have_vec
1828         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1829 
1830     switch (op) {
1831     case INDEX_op_discard:
1832     case INDEX_op_set_label:
1833     case INDEX_op_call:
1834     case INDEX_op_br:
1835     case INDEX_op_mb:
1836     case INDEX_op_insn_start:
1837     case INDEX_op_exit_tb:
1838     case INDEX_op_goto_tb:
1839     case INDEX_op_goto_ptr:
1840     case INDEX_op_qemu_ld_a32_i32:
1841     case INDEX_op_qemu_ld_a64_i32:
1842     case INDEX_op_qemu_st_a32_i32:
1843     case INDEX_op_qemu_st_a64_i32:
1844     case INDEX_op_qemu_ld_a32_i64:
1845     case INDEX_op_qemu_ld_a64_i64:
1846     case INDEX_op_qemu_st_a32_i64:
1847     case INDEX_op_qemu_st_a64_i64:
1848         return true;
1849 
1850     case INDEX_op_qemu_st8_a32_i32:
1851     case INDEX_op_qemu_st8_a64_i32:
1852         return TCG_TARGET_HAS_qemu_st8_i32;
1853 
1854     case INDEX_op_qemu_ld_a32_i128:
1855     case INDEX_op_qemu_ld_a64_i128:
1856     case INDEX_op_qemu_st_a32_i128:
1857     case INDEX_op_qemu_st_a64_i128:
1858         return TCG_TARGET_HAS_qemu_ldst_i128;
1859 
1860     case INDEX_op_mov_i32:
1861     case INDEX_op_setcond_i32:
1862     case INDEX_op_brcond_i32:
1863     case INDEX_op_ld8u_i32:
1864     case INDEX_op_ld8s_i32:
1865     case INDEX_op_ld16u_i32:
1866     case INDEX_op_ld16s_i32:
1867     case INDEX_op_ld_i32:
1868     case INDEX_op_st8_i32:
1869     case INDEX_op_st16_i32:
1870     case INDEX_op_st_i32:
1871     case INDEX_op_add_i32:
1872     case INDEX_op_sub_i32:
1873     case INDEX_op_mul_i32:
1874     case INDEX_op_and_i32:
1875     case INDEX_op_or_i32:
1876     case INDEX_op_xor_i32:
1877     case INDEX_op_shl_i32:
1878     case INDEX_op_shr_i32:
1879     case INDEX_op_sar_i32:
1880         return true;
1881 
1882     case INDEX_op_movcond_i32:
1883         return TCG_TARGET_HAS_movcond_i32;
1884     case INDEX_op_div_i32:
1885     case INDEX_op_divu_i32:
1886         return TCG_TARGET_HAS_div_i32;
1887     case INDEX_op_rem_i32:
1888     case INDEX_op_remu_i32:
1889         return TCG_TARGET_HAS_rem_i32;
1890     case INDEX_op_div2_i32:
1891     case INDEX_op_divu2_i32:
1892         return TCG_TARGET_HAS_div2_i32;
1893     case INDEX_op_rotl_i32:
1894     case INDEX_op_rotr_i32:
1895         return TCG_TARGET_HAS_rot_i32;
1896     case INDEX_op_deposit_i32:
1897         return TCG_TARGET_HAS_deposit_i32;
1898     case INDEX_op_extract_i32:
1899         return TCG_TARGET_HAS_extract_i32;
1900     case INDEX_op_sextract_i32:
1901         return TCG_TARGET_HAS_sextract_i32;
1902     case INDEX_op_extract2_i32:
1903         return TCG_TARGET_HAS_extract2_i32;
1904     case INDEX_op_add2_i32:
1905         return TCG_TARGET_HAS_add2_i32;
1906     case INDEX_op_sub2_i32:
1907         return TCG_TARGET_HAS_sub2_i32;
1908     case INDEX_op_mulu2_i32:
1909         return TCG_TARGET_HAS_mulu2_i32;
1910     case INDEX_op_muls2_i32:
1911         return TCG_TARGET_HAS_muls2_i32;
1912     case INDEX_op_muluh_i32:
1913         return TCG_TARGET_HAS_muluh_i32;
1914     case INDEX_op_mulsh_i32:
1915         return TCG_TARGET_HAS_mulsh_i32;
1916     case INDEX_op_ext8s_i32:
1917         return TCG_TARGET_HAS_ext8s_i32;
1918     case INDEX_op_ext16s_i32:
1919         return TCG_TARGET_HAS_ext16s_i32;
1920     case INDEX_op_ext8u_i32:
1921         return TCG_TARGET_HAS_ext8u_i32;
1922     case INDEX_op_ext16u_i32:
1923         return TCG_TARGET_HAS_ext16u_i32;
1924     case INDEX_op_bswap16_i32:
1925         return TCG_TARGET_HAS_bswap16_i32;
1926     case INDEX_op_bswap32_i32:
1927         return TCG_TARGET_HAS_bswap32_i32;
1928     case INDEX_op_not_i32:
1929         return TCG_TARGET_HAS_not_i32;
1930     case INDEX_op_neg_i32:
1931         return TCG_TARGET_HAS_neg_i32;
1932     case INDEX_op_andc_i32:
1933         return TCG_TARGET_HAS_andc_i32;
1934     case INDEX_op_orc_i32:
1935         return TCG_TARGET_HAS_orc_i32;
1936     case INDEX_op_eqv_i32:
1937         return TCG_TARGET_HAS_eqv_i32;
1938     case INDEX_op_nand_i32:
1939         return TCG_TARGET_HAS_nand_i32;
1940     case INDEX_op_nor_i32:
1941         return TCG_TARGET_HAS_nor_i32;
1942     case INDEX_op_clz_i32:
1943         return TCG_TARGET_HAS_clz_i32;
1944     case INDEX_op_ctz_i32:
1945         return TCG_TARGET_HAS_ctz_i32;
1946     case INDEX_op_ctpop_i32:
1947         return TCG_TARGET_HAS_ctpop_i32;
1948 
1949     case INDEX_op_brcond2_i32:
1950     case INDEX_op_setcond2_i32:
1951         return TCG_TARGET_REG_BITS == 32;
1952 
1953     case INDEX_op_mov_i64:
1954     case INDEX_op_setcond_i64:
1955     case INDEX_op_brcond_i64:
1956     case INDEX_op_ld8u_i64:
1957     case INDEX_op_ld8s_i64:
1958     case INDEX_op_ld16u_i64:
1959     case INDEX_op_ld16s_i64:
1960     case INDEX_op_ld32u_i64:
1961     case INDEX_op_ld32s_i64:
1962     case INDEX_op_ld_i64:
1963     case INDEX_op_st8_i64:
1964     case INDEX_op_st16_i64:
1965     case INDEX_op_st32_i64:
1966     case INDEX_op_st_i64:
1967     case INDEX_op_add_i64:
1968     case INDEX_op_sub_i64:
1969     case INDEX_op_mul_i64:
1970     case INDEX_op_and_i64:
1971     case INDEX_op_or_i64:
1972     case INDEX_op_xor_i64:
1973     case INDEX_op_shl_i64:
1974     case INDEX_op_shr_i64:
1975     case INDEX_op_sar_i64:
1976     case INDEX_op_ext_i32_i64:
1977     case INDEX_op_extu_i32_i64:
1978         return TCG_TARGET_REG_BITS == 64;
1979 
1980     case INDEX_op_movcond_i64:
1981         return TCG_TARGET_HAS_movcond_i64;
1982     case INDEX_op_div_i64:
1983     case INDEX_op_divu_i64:
1984         return TCG_TARGET_HAS_div_i64;
1985     case INDEX_op_rem_i64:
1986     case INDEX_op_remu_i64:
1987         return TCG_TARGET_HAS_rem_i64;
1988     case INDEX_op_div2_i64:
1989     case INDEX_op_divu2_i64:
1990         return TCG_TARGET_HAS_div2_i64;
1991     case INDEX_op_rotl_i64:
1992     case INDEX_op_rotr_i64:
1993         return TCG_TARGET_HAS_rot_i64;
1994     case INDEX_op_deposit_i64:
1995         return TCG_TARGET_HAS_deposit_i64;
1996     case INDEX_op_extract_i64:
1997         return TCG_TARGET_HAS_extract_i64;
1998     case INDEX_op_sextract_i64:
1999         return TCG_TARGET_HAS_sextract_i64;
2000     case INDEX_op_extract2_i64:
2001         return TCG_TARGET_HAS_extract2_i64;
2002     case INDEX_op_extrl_i64_i32:
2003         return TCG_TARGET_HAS_extrl_i64_i32;
2004     case INDEX_op_extrh_i64_i32:
2005         return TCG_TARGET_HAS_extrh_i64_i32;
2006     case INDEX_op_ext8s_i64:
2007         return TCG_TARGET_HAS_ext8s_i64;
2008     case INDEX_op_ext16s_i64:
2009         return TCG_TARGET_HAS_ext16s_i64;
2010     case INDEX_op_ext32s_i64:
2011         return TCG_TARGET_HAS_ext32s_i64;
2012     case INDEX_op_ext8u_i64:
2013         return TCG_TARGET_HAS_ext8u_i64;
2014     case INDEX_op_ext16u_i64:
2015         return TCG_TARGET_HAS_ext16u_i64;
2016     case INDEX_op_ext32u_i64:
2017         return TCG_TARGET_HAS_ext32u_i64;
2018     case INDEX_op_bswap16_i64:
2019         return TCG_TARGET_HAS_bswap16_i64;
2020     case INDEX_op_bswap32_i64:
2021         return TCG_TARGET_HAS_bswap32_i64;
2022     case INDEX_op_bswap64_i64:
2023         return TCG_TARGET_HAS_bswap64_i64;
2024     case INDEX_op_not_i64:
2025         return TCG_TARGET_HAS_not_i64;
2026     case INDEX_op_neg_i64:
2027         return TCG_TARGET_HAS_neg_i64;
2028     case INDEX_op_andc_i64:
2029         return TCG_TARGET_HAS_andc_i64;
2030     case INDEX_op_orc_i64:
2031         return TCG_TARGET_HAS_orc_i64;
2032     case INDEX_op_eqv_i64:
2033         return TCG_TARGET_HAS_eqv_i64;
2034     case INDEX_op_nand_i64:
2035         return TCG_TARGET_HAS_nand_i64;
2036     case INDEX_op_nor_i64:
2037         return TCG_TARGET_HAS_nor_i64;
2038     case INDEX_op_clz_i64:
2039         return TCG_TARGET_HAS_clz_i64;
2040     case INDEX_op_ctz_i64:
2041         return TCG_TARGET_HAS_ctz_i64;
2042     case INDEX_op_ctpop_i64:
2043         return TCG_TARGET_HAS_ctpop_i64;
2044     case INDEX_op_add2_i64:
2045         return TCG_TARGET_HAS_add2_i64;
2046     case INDEX_op_sub2_i64:
2047         return TCG_TARGET_HAS_sub2_i64;
2048     case INDEX_op_mulu2_i64:
2049         return TCG_TARGET_HAS_mulu2_i64;
2050     case INDEX_op_muls2_i64:
2051         return TCG_TARGET_HAS_muls2_i64;
2052     case INDEX_op_muluh_i64:
2053         return TCG_TARGET_HAS_muluh_i64;
2054     case INDEX_op_mulsh_i64:
2055         return TCG_TARGET_HAS_mulsh_i64;
2056 
2057     case INDEX_op_mov_vec:
2058     case INDEX_op_dup_vec:
2059     case INDEX_op_dupm_vec:
2060     case INDEX_op_ld_vec:
2061     case INDEX_op_st_vec:
2062     case INDEX_op_add_vec:
2063     case INDEX_op_sub_vec:
2064     case INDEX_op_and_vec:
2065     case INDEX_op_or_vec:
2066     case INDEX_op_xor_vec:
2067     case INDEX_op_cmp_vec:
2068         return have_vec;
2069     case INDEX_op_dup2_vec:
2070         return have_vec && TCG_TARGET_REG_BITS == 32;
2071     case INDEX_op_not_vec:
2072         return have_vec && TCG_TARGET_HAS_not_vec;
2073     case INDEX_op_neg_vec:
2074         return have_vec && TCG_TARGET_HAS_neg_vec;
2075     case INDEX_op_abs_vec:
2076         return have_vec && TCG_TARGET_HAS_abs_vec;
2077     case INDEX_op_andc_vec:
2078         return have_vec && TCG_TARGET_HAS_andc_vec;
2079     case INDEX_op_orc_vec:
2080         return have_vec && TCG_TARGET_HAS_orc_vec;
2081     case INDEX_op_nand_vec:
2082         return have_vec && TCG_TARGET_HAS_nand_vec;
2083     case INDEX_op_nor_vec:
2084         return have_vec && TCG_TARGET_HAS_nor_vec;
2085     case INDEX_op_eqv_vec:
2086         return have_vec && TCG_TARGET_HAS_eqv_vec;
2087     case INDEX_op_mul_vec:
2088         return have_vec && TCG_TARGET_HAS_mul_vec;
2089     case INDEX_op_shli_vec:
2090     case INDEX_op_shri_vec:
2091     case INDEX_op_sari_vec:
2092         return have_vec && TCG_TARGET_HAS_shi_vec;
2093     case INDEX_op_shls_vec:
2094     case INDEX_op_shrs_vec:
2095     case INDEX_op_sars_vec:
2096         return have_vec && TCG_TARGET_HAS_shs_vec;
2097     case INDEX_op_shlv_vec:
2098     case INDEX_op_shrv_vec:
2099     case INDEX_op_sarv_vec:
2100         return have_vec && TCG_TARGET_HAS_shv_vec;
2101     case INDEX_op_rotli_vec:
2102         return have_vec && TCG_TARGET_HAS_roti_vec;
2103     case INDEX_op_rotls_vec:
2104         return have_vec && TCG_TARGET_HAS_rots_vec;
2105     case INDEX_op_rotlv_vec:
2106     case INDEX_op_rotrv_vec:
2107         return have_vec && TCG_TARGET_HAS_rotv_vec;
2108     case INDEX_op_ssadd_vec:
2109     case INDEX_op_usadd_vec:
2110     case INDEX_op_sssub_vec:
2111     case INDEX_op_ussub_vec:
2112         return have_vec && TCG_TARGET_HAS_sat_vec;
2113     case INDEX_op_smin_vec:
2114     case INDEX_op_umin_vec:
2115     case INDEX_op_smax_vec:
2116     case INDEX_op_umax_vec:
2117         return have_vec && TCG_TARGET_HAS_minmax_vec;
2118     case INDEX_op_bitsel_vec:
2119         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2120     case INDEX_op_cmpsel_vec:
2121         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2122 
2123     default:
2124         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2125         return true;
2126     }
2127 }
2128 
2129 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2130 
2131 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
2132 {
2133     const TCGHelperInfo *info;
2134     TCGv_i64 extend_free[MAX_CALL_IARGS];
2135     int n_extend = 0;
2136     TCGOp *op;
2137     int i, n, pi = 0, total_args;
2138 
2139     info = g_hash_table_lookup(helper_table, (gpointer)func);
2140     total_args = info->nr_out + info->nr_in + 2;
2141     op = tcg_op_alloc(INDEX_op_call, total_args);
2142 
2143 #ifdef CONFIG_PLUGIN
2144     /* Flag helpers that may affect guest state */
2145     if (tcg_ctx->plugin_insn &&
2146         !(info->flags & TCG_CALL_PLUGIN) &&
2147         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2148         tcg_ctx->plugin_insn->calls_helpers = true;
2149     }
2150 #endif
2151 
2152     TCGOP_CALLO(op) = n = info->nr_out;
2153     switch (n) {
2154     case 0:
2155         tcg_debug_assert(ret == NULL);
2156         break;
2157     case 1:
2158         tcg_debug_assert(ret != NULL);
2159         op->args[pi++] = temp_arg(ret);
2160         break;
2161     case 2:
2162     case 4:
2163         tcg_debug_assert(ret != NULL);
2164         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2165         tcg_debug_assert(ret->temp_subindex == 0);
2166         for (i = 0; i < n; ++i) {
2167             op->args[pi++] = temp_arg(ret + i);
2168         }
2169         break;
2170     default:
2171         g_assert_not_reached();
2172     }
2173 
2174     TCGOP_CALLI(op) = n = info->nr_in;
2175     for (i = 0; i < n; i++) {
2176         const TCGCallArgumentLoc *loc = &info->in[i];
2177         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2178 
2179         switch (loc->kind) {
2180         case TCG_CALL_ARG_NORMAL:
2181         case TCG_CALL_ARG_BY_REF:
2182         case TCG_CALL_ARG_BY_REF_N:
2183             op->args[pi++] = temp_arg(ts);
2184             break;
2185 
2186         case TCG_CALL_ARG_EXTEND_U:
2187         case TCG_CALL_ARG_EXTEND_S:
2188             {
2189                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2190                 TCGv_i32 orig = temp_tcgv_i32(ts);
2191 
2192                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2193                     tcg_gen_ext_i32_i64(temp, orig);
2194                 } else {
2195                     tcg_gen_extu_i32_i64(temp, orig);
2196                 }
2197                 op->args[pi++] = tcgv_i64_arg(temp);
2198                 extend_free[n_extend++] = temp;
2199             }
2200             break;
2201 
2202         default:
2203             g_assert_not_reached();
2204         }
2205     }
2206     op->args[pi++] = (uintptr_t)func;
2207     op->args[pi++] = (uintptr_t)info;
2208     tcg_debug_assert(pi == total_args);
2209 
2210     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2211 
2212     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2213     for (i = 0; i < n_extend; ++i) {
2214         tcg_temp_free_i64(extend_free[i]);
2215     }
2216 }
2217 
2218 static void tcg_reg_alloc_start(TCGContext *s)
2219 {
2220     int i, n;
2221 
2222     for (i = 0, n = s->nb_temps; i < n; i++) {
2223         TCGTemp *ts = &s->temps[i];
2224         TCGTempVal val = TEMP_VAL_MEM;
2225 
2226         switch (ts->kind) {
2227         case TEMP_CONST:
2228             val = TEMP_VAL_CONST;
2229             break;
2230         case TEMP_FIXED:
2231             val = TEMP_VAL_REG;
2232             break;
2233         case TEMP_GLOBAL:
2234             break;
2235         case TEMP_EBB:
2236             val = TEMP_VAL_DEAD;
2237             /* fall through */
2238         case TEMP_TB:
2239             ts->mem_allocated = 0;
2240             break;
2241         default:
2242             g_assert_not_reached();
2243         }
2244         ts->val_type = val;
2245     }
2246 
2247     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2248 }
2249 
2250 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2251                                  TCGTemp *ts)
2252 {
2253     int idx = temp_idx(ts);
2254 
2255     switch (ts->kind) {
2256     case TEMP_FIXED:
2257     case TEMP_GLOBAL:
2258         pstrcpy(buf, buf_size, ts->name);
2259         break;
2260     case TEMP_TB:
2261         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2262         break;
2263     case TEMP_EBB:
2264         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2265         break;
2266     case TEMP_CONST:
2267         switch (ts->type) {
2268         case TCG_TYPE_I32:
2269             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2270             break;
2271 #if TCG_TARGET_REG_BITS > 32
2272         case TCG_TYPE_I64:
2273             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2274             break;
2275 #endif
2276         case TCG_TYPE_V64:
2277         case TCG_TYPE_V128:
2278         case TCG_TYPE_V256:
2279             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2280                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2281             break;
2282         default:
2283             g_assert_not_reached();
2284         }
2285         break;
2286     }
2287     return buf;
2288 }
2289 
2290 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2291                              int buf_size, TCGArg arg)
2292 {
2293     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2294 }
2295 
2296 static const char * const cond_name[] =
2297 {
2298     [TCG_COND_NEVER] = "never",
2299     [TCG_COND_ALWAYS] = "always",
2300     [TCG_COND_EQ] = "eq",
2301     [TCG_COND_NE] = "ne",
2302     [TCG_COND_LT] = "lt",
2303     [TCG_COND_GE] = "ge",
2304     [TCG_COND_LE] = "le",
2305     [TCG_COND_GT] = "gt",
2306     [TCG_COND_LTU] = "ltu",
2307     [TCG_COND_GEU] = "geu",
2308     [TCG_COND_LEU] = "leu",
2309     [TCG_COND_GTU] = "gtu"
2310 };
2311 
2312 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2313 {
2314     [MO_UB]   = "ub",
2315     [MO_SB]   = "sb",
2316     [MO_LEUW] = "leuw",
2317     [MO_LESW] = "lesw",
2318     [MO_LEUL] = "leul",
2319     [MO_LESL] = "lesl",
2320     [MO_LEUQ] = "leq",
2321     [MO_BEUW] = "beuw",
2322     [MO_BESW] = "besw",
2323     [MO_BEUL] = "beul",
2324     [MO_BESL] = "besl",
2325     [MO_BEUQ] = "beq",
2326     [MO_128 + MO_BE] = "beo",
2327     [MO_128 + MO_LE] = "leo",
2328 };
2329 
2330 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2331     [MO_UNALN >> MO_ASHIFT]    = "un+",
2332     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2333     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2334     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2335     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2336     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2337     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2338     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2339 };
2340 
2341 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2342     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2343     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2344     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2345     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2346     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2347     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2348 };
2349 
2350 static const char bswap_flag_name[][6] = {
2351     [TCG_BSWAP_IZ] = "iz",
2352     [TCG_BSWAP_OZ] = "oz",
2353     [TCG_BSWAP_OS] = "os",
2354     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2355     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2356 };
2357 
2358 static inline bool tcg_regset_single(TCGRegSet d)
2359 {
2360     return (d & (d - 1)) == 0;
2361 }
2362 
2363 static inline TCGReg tcg_regset_first(TCGRegSet d)
2364 {
2365     if (TCG_TARGET_NB_REGS <= 32) {
2366         return ctz32(d);
2367     } else {
2368         return ctz64(d);
2369     }
2370 }
2371 
2372 /* Return only the number of characters output -- no error return. */
2373 #define ne_fprintf(...) \
2374     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2375 
2376 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2377 {
2378     char buf[128];
2379     TCGOp *op;
2380 
2381     QTAILQ_FOREACH(op, &s->ops, link) {
2382         int i, k, nb_oargs, nb_iargs, nb_cargs;
2383         const TCGOpDef *def;
2384         TCGOpcode c;
2385         int col = 0;
2386 
2387         c = op->opc;
2388         def = &tcg_op_defs[c];
2389 
2390         if (c == INDEX_op_insn_start) {
2391             nb_oargs = 0;
2392             col += ne_fprintf(f, "\n ----");
2393 
2394             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2395                 col += ne_fprintf(f, " %016" PRIx64,
2396                                   tcg_get_insn_start_param(op, i));
2397             }
2398         } else if (c == INDEX_op_call) {
2399             const TCGHelperInfo *info = tcg_call_info(op);
2400             void *func = tcg_call_func(op);
2401 
2402             /* variable number of arguments */
2403             nb_oargs = TCGOP_CALLO(op);
2404             nb_iargs = TCGOP_CALLI(op);
2405             nb_cargs = def->nb_cargs;
2406 
2407             col += ne_fprintf(f, " %s ", def->name);
2408 
2409             /*
2410              * Print the function name from TCGHelperInfo, if available.
2411              * Note that plugins have a template function for the info,
2412              * but the actual function pointer comes from the plugin.
2413              */
2414             if (func == info->func) {
2415                 col += ne_fprintf(f, "%s", info->name);
2416             } else {
2417                 col += ne_fprintf(f, "plugin(%p)", func);
2418             }
2419 
2420             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2421             for (i = 0; i < nb_oargs; i++) {
2422                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2423                                                             op->args[i]));
2424             }
2425             for (i = 0; i < nb_iargs; i++) {
2426                 TCGArg arg = op->args[nb_oargs + i];
2427                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2428                 col += ne_fprintf(f, ",%s", t);
2429             }
2430         } else {
2431             col += ne_fprintf(f, " %s ", def->name);
2432 
2433             nb_oargs = def->nb_oargs;
2434             nb_iargs = def->nb_iargs;
2435             nb_cargs = def->nb_cargs;
2436 
2437             if (def->flags & TCG_OPF_VECTOR) {
2438                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2439                                   8 << TCGOP_VECE(op));
2440             }
2441 
2442             k = 0;
2443             for (i = 0; i < nb_oargs; i++) {
2444                 const char *sep =  k ? "," : "";
2445                 col += ne_fprintf(f, "%s%s", sep,
2446                                   tcg_get_arg_str(s, buf, sizeof(buf),
2447                                                   op->args[k++]));
2448             }
2449             for (i = 0; i < nb_iargs; i++) {
2450                 const char *sep =  k ? "," : "";
2451                 col += ne_fprintf(f, "%s%s", sep,
2452                                   tcg_get_arg_str(s, buf, sizeof(buf),
2453                                                   op->args[k++]));
2454             }
2455             switch (c) {
2456             case INDEX_op_brcond_i32:
2457             case INDEX_op_setcond_i32:
2458             case INDEX_op_movcond_i32:
2459             case INDEX_op_brcond2_i32:
2460             case INDEX_op_setcond2_i32:
2461             case INDEX_op_brcond_i64:
2462             case INDEX_op_setcond_i64:
2463             case INDEX_op_movcond_i64:
2464             case INDEX_op_cmp_vec:
2465             case INDEX_op_cmpsel_vec:
2466                 if (op->args[k] < ARRAY_SIZE(cond_name)
2467                     && cond_name[op->args[k]]) {
2468                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2469                 } else {
2470                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2471                 }
2472                 i = 1;
2473                 break;
2474             case INDEX_op_qemu_ld_a32_i32:
2475             case INDEX_op_qemu_ld_a64_i32:
2476             case INDEX_op_qemu_st_a32_i32:
2477             case INDEX_op_qemu_st_a64_i32:
2478             case INDEX_op_qemu_st8_a32_i32:
2479             case INDEX_op_qemu_st8_a64_i32:
2480             case INDEX_op_qemu_ld_a32_i64:
2481             case INDEX_op_qemu_ld_a64_i64:
2482             case INDEX_op_qemu_st_a32_i64:
2483             case INDEX_op_qemu_st_a64_i64:
2484             case INDEX_op_qemu_ld_a32_i128:
2485             case INDEX_op_qemu_ld_a64_i128:
2486             case INDEX_op_qemu_st_a32_i128:
2487             case INDEX_op_qemu_st_a64_i128:
2488                 {
2489                     const char *s_al, *s_op, *s_at;
2490                     MemOpIdx oi = op->args[k++];
2491                     MemOp op = get_memop(oi);
2492                     unsigned ix = get_mmuidx(oi);
2493 
2494                     s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2495                     s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2496                     s_at = atom_name[(op & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2497                     op &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2498 
2499                     /* If all fields are accounted for, print symbolically. */
2500                     if (!op && s_al && s_op && s_at) {
2501                         col += ne_fprintf(f, ",%s%s%s,%u",
2502                                           s_at, s_al, s_op, ix);
2503                     } else {
2504                         op = get_memop(oi);
2505                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2506                     }
2507                     i = 1;
2508                 }
2509                 break;
2510             case INDEX_op_bswap16_i32:
2511             case INDEX_op_bswap16_i64:
2512             case INDEX_op_bswap32_i32:
2513             case INDEX_op_bswap32_i64:
2514             case INDEX_op_bswap64_i64:
2515                 {
2516                     TCGArg flags = op->args[k];
2517                     const char *name = NULL;
2518 
2519                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2520                         name = bswap_flag_name[flags];
2521                     }
2522                     if (name) {
2523                         col += ne_fprintf(f, ",%s", name);
2524                     } else {
2525                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2526                     }
2527                     i = k = 1;
2528                 }
2529                 break;
2530             default:
2531                 i = 0;
2532                 break;
2533             }
2534             switch (c) {
2535             case INDEX_op_set_label:
2536             case INDEX_op_br:
2537             case INDEX_op_brcond_i32:
2538             case INDEX_op_brcond_i64:
2539             case INDEX_op_brcond2_i32:
2540                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2541                                   arg_label(op->args[k])->id);
2542                 i++, k++;
2543                 break;
2544             case INDEX_op_mb:
2545                 {
2546                     TCGBar membar = op->args[k];
2547                     const char *b_op, *m_op;
2548 
2549                     switch (membar & TCG_BAR_SC) {
2550                     case 0:
2551                         b_op = "none";
2552                         break;
2553                     case TCG_BAR_LDAQ:
2554                         b_op = "acq";
2555                         break;
2556                     case TCG_BAR_STRL:
2557                         b_op = "rel";
2558                         break;
2559                     case TCG_BAR_SC:
2560                         b_op = "seq";
2561                         break;
2562                     default:
2563                         g_assert_not_reached();
2564                     }
2565 
2566                     switch (membar & TCG_MO_ALL) {
2567                     case 0:
2568                         m_op = "none";
2569                         break;
2570                     case TCG_MO_LD_LD:
2571                         m_op = "rr";
2572                         break;
2573                     case TCG_MO_LD_ST:
2574                         m_op = "rw";
2575                         break;
2576                     case TCG_MO_ST_LD:
2577                         m_op = "wr";
2578                         break;
2579                     case TCG_MO_ST_ST:
2580                         m_op = "ww";
2581                         break;
2582                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2583                         m_op = "rr+rw";
2584                         break;
2585                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2586                         m_op = "rr+wr";
2587                         break;
2588                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2589                         m_op = "rr+ww";
2590                         break;
2591                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2592                         m_op = "rw+wr";
2593                         break;
2594                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2595                         m_op = "rw+ww";
2596                         break;
2597                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2598                         m_op = "wr+ww";
2599                         break;
2600                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2601                         m_op = "rr+rw+wr";
2602                         break;
2603                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2604                         m_op = "rr+rw+ww";
2605                         break;
2606                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2607                         m_op = "rr+wr+ww";
2608                         break;
2609                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2610                         m_op = "rw+wr+ww";
2611                         break;
2612                     case TCG_MO_ALL:
2613                         m_op = "all";
2614                         break;
2615                     default:
2616                         g_assert_not_reached();
2617                     }
2618 
2619                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2620                     i++, k++;
2621                 }
2622                 break;
2623             default:
2624                 break;
2625             }
2626             for (; i < nb_cargs; i++, k++) {
2627                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2628                                   op->args[k]);
2629             }
2630         }
2631 
2632         if (have_prefs || op->life) {
2633             for (; col < 40; ++col) {
2634                 putc(' ', f);
2635             }
2636         }
2637 
2638         if (op->life) {
2639             unsigned life = op->life;
2640 
2641             if (life & (SYNC_ARG * 3)) {
2642                 ne_fprintf(f, "  sync:");
2643                 for (i = 0; i < 2; ++i) {
2644                     if (life & (SYNC_ARG << i)) {
2645                         ne_fprintf(f, " %d", i);
2646                     }
2647                 }
2648             }
2649             life /= DEAD_ARG;
2650             if (life) {
2651                 ne_fprintf(f, "  dead:");
2652                 for (i = 0; life; ++i, life >>= 1) {
2653                     if (life & 1) {
2654                         ne_fprintf(f, " %d", i);
2655                     }
2656                 }
2657             }
2658         }
2659 
2660         if (have_prefs) {
2661             for (i = 0; i < nb_oargs; ++i) {
2662                 TCGRegSet set = output_pref(op, i);
2663 
2664                 if (i == 0) {
2665                     ne_fprintf(f, "  pref=");
2666                 } else {
2667                     ne_fprintf(f, ",");
2668                 }
2669                 if (set == 0) {
2670                     ne_fprintf(f, "none");
2671                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2672                     ne_fprintf(f, "all");
2673 #ifdef CONFIG_DEBUG_TCG
2674                 } else if (tcg_regset_single(set)) {
2675                     TCGReg reg = tcg_regset_first(set);
2676                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2677 #endif
2678                 } else if (TCG_TARGET_NB_REGS <= 32) {
2679                     ne_fprintf(f, "0x%x", (uint32_t)set);
2680                 } else {
2681                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2682                 }
2683             }
2684         }
2685 
2686         putc('\n', f);
2687     }
2688 }
2689 
2690 /* we give more priority to constraints with less registers */
2691 static int get_constraint_priority(const TCGOpDef *def, int k)
2692 {
2693     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2694     int n = ctpop64(arg_ct->regs);
2695 
2696     /*
2697      * Sort constraints of a single register first, which includes output
2698      * aliases (which must exactly match the input already allocated).
2699      */
2700     if (n == 1 || arg_ct->oalias) {
2701         return INT_MAX;
2702     }
2703 
2704     /*
2705      * Sort register pairs next, first then second immediately after.
2706      * Arbitrarily sort multiple pairs by the index of the first reg;
2707      * there shouldn't be many pairs.
2708      */
2709     switch (arg_ct->pair) {
2710     case 1:
2711     case 3:
2712         return (k + 1) * 2;
2713     case 2:
2714         return (arg_ct->pair_index + 1) * 2 - 1;
2715     }
2716 
2717     /* Finally, sort by decreasing register count. */
2718     assert(n > 1);
2719     return -n;
2720 }
2721 
2722 /* sort from highest priority to lowest */
2723 static void sort_constraints(TCGOpDef *def, int start, int n)
2724 {
2725     int i, j;
2726     TCGArgConstraint *a = def->args_ct;
2727 
2728     for (i = 0; i < n; i++) {
2729         a[start + i].sort_index = start + i;
2730     }
2731     if (n <= 1) {
2732         return;
2733     }
2734     for (i = 0; i < n - 1; i++) {
2735         for (j = i + 1; j < n; j++) {
2736             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2737             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2738             if (p1 < p2) {
2739                 int tmp = a[start + i].sort_index;
2740                 a[start + i].sort_index = a[start + j].sort_index;
2741                 a[start + j].sort_index = tmp;
2742             }
2743         }
2744     }
2745 }
2746 
2747 static void process_op_defs(TCGContext *s)
2748 {
2749     TCGOpcode op;
2750 
2751     for (op = 0; op < NB_OPS; op++) {
2752         TCGOpDef *def = &tcg_op_defs[op];
2753         const TCGTargetOpDef *tdefs;
2754         bool saw_alias_pair = false;
2755         int i, o, i2, o2, nb_args;
2756 
2757         if (def->flags & TCG_OPF_NOT_PRESENT) {
2758             continue;
2759         }
2760 
2761         nb_args = def->nb_iargs + def->nb_oargs;
2762         if (nb_args == 0) {
2763             continue;
2764         }
2765 
2766         /*
2767          * Macro magic should make it impossible, but double-check that
2768          * the array index is in range.  Since the signness of an enum
2769          * is implementation defined, force the result to unsigned.
2770          */
2771         unsigned con_set = tcg_target_op_def(op);
2772         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2773         tdefs = &constraint_sets[con_set];
2774 
2775         for (i = 0; i < nb_args; i++) {
2776             const char *ct_str = tdefs->args_ct_str[i];
2777             bool input_p = i >= def->nb_oargs;
2778 
2779             /* Incomplete TCGTargetOpDef entry. */
2780             tcg_debug_assert(ct_str != NULL);
2781 
2782             switch (*ct_str) {
2783             case '0' ... '9':
2784                 o = *ct_str - '0';
2785                 tcg_debug_assert(input_p);
2786                 tcg_debug_assert(o < def->nb_oargs);
2787                 tcg_debug_assert(def->args_ct[o].regs != 0);
2788                 tcg_debug_assert(!def->args_ct[o].oalias);
2789                 def->args_ct[i] = def->args_ct[o];
2790                 /* The output sets oalias.  */
2791                 def->args_ct[o].oalias = 1;
2792                 def->args_ct[o].alias_index = i;
2793                 /* The input sets ialias. */
2794                 def->args_ct[i].ialias = 1;
2795                 def->args_ct[i].alias_index = o;
2796                 if (def->args_ct[i].pair) {
2797                     saw_alias_pair = true;
2798                 }
2799                 tcg_debug_assert(ct_str[1] == '\0');
2800                 continue;
2801 
2802             case '&':
2803                 tcg_debug_assert(!input_p);
2804                 def->args_ct[i].newreg = true;
2805                 ct_str++;
2806                 break;
2807 
2808             case 'p': /* plus */
2809                 /* Allocate to the register after the previous. */
2810                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2811                 o = i - 1;
2812                 tcg_debug_assert(!def->args_ct[o].pair);
2813                 tcg_debug_assert(!def->args_ct[o].ct);
2814                 def->args_ct[i] = (TCGArgConstraint){
2815                     .pair = 2,
2816                     .pair_index = o,
2817                     .regs = def->args_ct[o].regs << 1,
2818                 };
2819                 def->args_ct[o].pair = 1;
2820                 def->args_ct[o].pair_index = i;
2821                 tcg_debug_assert(ct_str[1] == '\0');
2822                 continue;
2823 
2824             case 'm': /* minus */
2825                 /* Allocate to the register before the previous. */
2826                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2827                 o = i - 1;
2828                 tcg_debug_assert(!def->args_ct[o].pair);
2829                 tcg_debug_assert(!def->args_ct[o].ct);
2830                 def->args_ct[i] = (TCGArgConstraint){
2831                     .pair = 1,
2832                     .pair_index = o,
2833                     .regs = def->args_ct[o].regs >> 1,
2834                 };
2835                 def->args_ct[o].pair = 2;
2836                 def->args_ct[o].pair_index = i;
2837                 tcg_debug_assert(ct_str[1] == '\0');
2838                 continue;
2839             }
2840 
2841             do {
2842                 switch (*ct_str) {
2843                 case 'i':
2844                     def->args_ct[i].ct |= TCG_CT_CONST;
2845                     break;
2846 
2847                 /* Include all of the target-specific constraints. */
2848 
2849 #undef CONST
2850 #define CONST(CASE, MASK) \
2851     case CASE: def->args_ct[i].ct |= MASK; break;
2852 #define REGS(CASE, MASK) \
2853     case CASE: def->args_ct[i].regs |= MASK; break;
2854 
2855 #include "tcg-target-con-str.h"
2856 
2857 #undef REGS
2858 #undef CONST
2859                 default:
2860                 case '0' ... '9':
2861                 case '&':
2862                 case 'p':
2863                 case 'm':
2864                     /* Typo in TCGTargetOpDef constraint. */
2865                     g_assert_not_reached();
2866                 }
2867             } while (*++ct_str != '\0');
2868         }
2869 
2870         /* TCGTargetOpDef entry with too much information? */
2871         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2872 
2873         /*
2874          * Fix up output pairs that are aliased with inputs.
2875          * When we created the alias, we copied pair from the output.
2876          * There are three cases:
2877          *    (1a) Pairs of inputs alias pairs of outputs.
2878          *    (1b) One input aliases the first of a pair of outputs.
2879          *    (2)  One input aliases the second of a pair of outputs.
2880          *
2881          * Case 1a is handled by making sure that the pair_index'es are
2882          * properly updated so that they appear the same as a pair of inputs.
2883          *
2884          * Case 1b is handled by setting the pair_index of the input to
2885          * itself, simply so it doesn't point to an unrelated argument.
2886          * Since we don't encounter the "second" during the input allocation
2887          * phase, nothing happens with the second half of the input pair.
2888          *
2889          * Case 2 is handled by setting the second input to pair=3, the
2890          * first output to pair=3, and the pair_index'es to match.
2891          */
2892         if (saw_alias_pair) {
2893             for (i = def->nb_oargs; i < nb_args; i++) {
2894                 /*
2895                  * Since [0-9pm] must be alone in the constraint string,
2896                  * the only way they can both be set is if the pair comes
2897                  * from the output alias.
2898                  */
2899                 if (!def->args_ct[i].ialias) {
2900                     continue;
2901                 }
2902                 switch (def->args_ct[i].pair) {
2903                 case 0:
2904                     break;
2905                 case 1:
2906                     o = def->args_ct[i].alias_index;
2907                     o2 = def->args_ct[o].pair_index;
2908                     tcg_debug_assert(def->args_ct[o].pair == 1);
2909                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2910                     if (def->args_ct[o2].oalias) {
2911                         /* Case 1a */
2912                         i2 = def->args_ct[o2].alias_index;
2913                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2914                         def->args_ct[i2].pair_index = i;
2915                         def->args_ct[i].pair_index = i2;
2916                     } else {
2917                         /* Case 1b */
2918                         def->args_ct[i].pair_index = i;
2919                     }
2920                     break;
2921                 case 2:
2922                     o = def->args_ct[i].alias_index;
2923                     o2 = def->args_ct[o].pair_index;
2924                     tcg_debug_assert(def->args_ct[o].pair == 2);
2925                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2926                     if (def->args_ct[o2].oalias) {
2927                         /* Case 1a */
2928                         i2 = def->args_ct[o2].alias_index;
2929                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2930                         def->args_ct[i2].pair_index = i;
2931                         def->args_ct[i].pair_index = i2;
2932                     } else {
2933                         /* Case 2 */
2934                         def->args_ct[i].pair = 3;
2935                         def->args_ct[o2].pair = 3;
2936                         def->args_ct[i].pair_index = o2;
2937                         def->args_ct[o2].pair_index = i;
2938                     }
2939                     break;
2940                 default:
2941                     g_assert_not_reached();
2942                 }
2943             }
2944         }
2945 
2946         /* sort the constraints (XXX: this is just an heuristic) */
2947         sort_constraints(def, 0, def->nb_oargs);
2948         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2949     }
2950 }
2951 
2952 static void remove_label_use(TCGOp *op, int idx)
2953 {
2954     TCGLabel *label = arg_label(op->args[idx]);
2955     TCGLabelUse *use;
2956 
2957     QSIMPLEQ_FOREACH(use, &label->branches, next) {
2958         if (use->op == op) {
2959             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
2960             return;
2961         }
2962     }
2963     g_assert_not_reached();
2964 }
2965 
2966 void tcg_op_remove(TCGContext *s, TCGOp *op)
2967 {
2968     switch (op->opc) {
2969     case INDEX_op_br:
2970         remove_label_use(op, 0);
2971         break;
2972     case INDEX_op_brcond_i32:
2973     case INDEX_op_brcond_i64:
2974         remove_label_use(op, 3);
2975         break;
2976     case INDEX_op_brcond2_i32:
2977         remove_label_use(op, 5);
2978         break;
2979     default:
2980         break;
2981     }
2982 
2983     QTAILQ_REMOVE(&s->ops, op, link);
2984     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2985     s->nb_ops--;
2986 
2987 #ifdef CONFIG_PROFILER
2988     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2989 #endif
2990 }
2991 
2992 void tcg_remove_ops_after(TCGOp *op)
2993 {
2994     TCGContext *s = tcg_ctx;
2995 
2996     while (true) {
2997         TCGOp *last = tcg_last_op();
2998         if (last == op) {
2999             return;
3000         }
3001         tcg_op_remove(s, last);
3002     }
3003 }
3004 
3005 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3006 {
3007     TCGContext *s = tcg_ctx;
3008     TCGOp *op = NULL;
3009 
3010     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3011         QTAILQ_FOREACH(op, &s->free_ops, link) {
3012             if (nargs <= op->nargs) {
3013                 QTAILQ_REMOVE(&s->free_ops, op, link);
3014                 nargs = op->nargs;
3015                 goto found;
3016             }
3017         }
3018     }
3019 
3020     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3021     nargs = MAX(4, nargs);
3022     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3023 
3024  found:
3025     memset(op, 0, offsetof(TCGOp, link));
3026     op->opc = opc;
3027     op->nargs = nargs;
3028 
3029     /* Check for bitfield overflow. */
3030     tcg_debug_assert(op->nargs == nargs);
3031 
3032     s->nb_ops++;
3033     return op;
3034 }
3035 
3036 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3037 {
3038     TCGOp *op = tcg_op_alloc(opc, nargs);
3039     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3040     return op;
3041 }
3042 
3043 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3044                             TCGOpcode opc, unsigned nargs)
3045 {
3046     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3047     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3048     return new_op;
3049 }
3050 
3051 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3052                            TCGOpcode opc, unsigned nargs)
3053 {
3054     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3055     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3056     return new_op;
3057 }
3058 
3059 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3060 {
3061     TCGLabelUse *u;
3062 
3063     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3064         TCGOp *op = u->op;
3065         switch (op->opc) {
3066         case INDEX_op_br:
3067             op->args[0] = label_arg(to);
3068             break;
3069         case INDEX_op_brcond_i32:
3070         case INDEX_op_brcond_i64:
3071             op->args[3] = label_arg(to);
3072             break;
3073         case INDEX_op_brcond2_i32:
3074             op->args[5] = label_arg(to);
3075             break;
3076         default:
3077             g_assert_not_reached();
3078         }
3079     }
3080 
3081     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3082 }
3083 
3084 /* Reachable analysis : remove unreachable code.  */
3085 static void __attribute__((noinline))
3086 reachable_code_pass(TCGContext *s)
3087 {
3088     TCGOp *op, *op_next, *op_prev;
3089     bool dead = false;
3090 
3091     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3092         bool remove = dead;
3093         TCGLabel *label;
3094 
3095         switch (op->opc) {
3096         case INDEX_op_set_label:
3097             label = arg_label(op->args[0]);
3098 
3099             /*
3100              * Note that the first op in the TB is always a load,
3101              * so there is always something before a label.
3102              */
3103             op_prev = QTAILQ_PREV(op, link);
3104 
3105             /*
3106              * If we find two sequential labels, move all branches to
3107              * reference the second label and remove the first label.
3108              * Do this before branch to next optimization, so that the
3109              * middle label is out of the way.
3110              */
3111             if (op_prev->opc == INDEX_op_set_label) {
3112                 move_label_uses(label, arg_label(op_prev->args[0]));
3113                 tcg_op_remove(s, op_prev);
3114                 op_prev = QTAILQ_PREV(op, link);
3115             }
3116 
3117             /*
3118              * Optimization can fold conditional branches to unconditional.
3119              * If we find a label which is preceded by an unconditional
3120              * branch to next, remove the branch.  We couldn't do this when
3121              * processing the branch because any dead code between the branch
3122              * and label had not yet been removed.
3123              */
3124             if (op_prev->opc == INDEX_op_br &&
3125                 label == arg_label(op_prev->args[0])) {
3126                 tcg_op_remove(s, op_prev);
3127                 /* Fall through means insns become live again.  */
3128                 dead = false;
3129             }
3130 
3131             if (QSIMPLEQ_EMPTY(&label->branches)) {
3132                 /*
3133                  * While there is an occasional backward branch, virtually
3134                  * all branches generated by the translators are forward.
3135                  * Which means that generally we will have already removed
3136                  * all references to the label that will be, and there is
3137                  * little to be gained by iterating.
3138                  */
3139                 remove = true;
3140             } else {
3141                 /* Once we see a label, insns become live again.  */
3142                 dead = false;
3143                 remove = false;
3144             }
3145             break;
3146 
3147         case INDEX_op_br:
3148         case INDEX_op_exit_tb:
3149         case INDEX_op_goto_ptr:
3150             /* Unconditional branches; everything following is dead.  */
3151             dead = true;
3152             break;
3153 
3154         case INDEX_op_call:
3155             /* Notice noreturn helper calls, raising exceptions.  */
3156             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3157                 dead = true;
3158             }
3159             break;
3160 
3161         case INDEX_op_insn_start:
3162             /* Never remove -- we need to keep these for unwind.  */
3163             remove = false;
3164             break;
3165 
3166         default:
3167             break;
3168         }
3169 
3170         if (remove) {
3171             tcg_op_remove(s, op);
3172         }
3173     }
3174 }
3175 
3176 #define TS_DEAD  1
3177 #define TS_MEM   2
3178 
3179 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3180 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3181 
3182 /* For liveness_pass_1, the register preferences for a given temp.  */
3183 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3184 {
3185     return ts->state_ptr;
3186 }
3187 
3188 /* For liveness_pass_1, reset the preferences for a given temp to the
3189  * maximal regset for its type.
3190  */
3191 static inline void la_reset_pref(TCGTemp *ts)
3192 {
3193     *la_temp_pref(ts)
3194         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3195 }
3196 
3197 /* liveness analysis: end of function: all temps are dead, and globals
3198    should be in memory. */
3199 static void la_func_end(TCGContext *s, int ng, int nt)
3200 {
3201     int i;
3202 
3203     for (i = 0; i < ng; ++i) {
3204         s->temps[i].state = TS_DEAD | TS_MEM;
3205         la_reset_pref(&s->temps[i]);
3206     }
3207     for (i = ng; i < nt; ++i) {
3208         s->temps[i].state = TS_DEAD;
3209         la_reset_pref(&s->temps[i]);
3210     }
3211 }
3212 
3213 /* liveness analysis: end of basic block: all temps are dead, globals
3214    and local temps should be in memory. */
3215 static void la_bb_end(TCGContext *s, int ng, int nt)
3216 {
3217     int i;
3218 
3219     for (i = 0; i < nt; ++i) {
3220         TCGTemp *ts = &s->temps[i];
3221         int state;
3222 
3223         switch (ts->kind) {
3224         case TEMP_FIXED:
3225         case TEMP_GLOBAL:
3226         case TEMP_TB:
3227             state = TS_DEAD | TS_MEM;
3228             break;
3229         case TEMP_EBB:
3230         case TEMP_CONST:
3231             state = TS_DEAD;
3232             break;
3233         default:
3234             g_assert_not_reached();
3235         }
3236         ts->state = state;
3237         la_reset_pref(ts);
3238     }
3239 }
3240 
3241 /* liveness analysis: sync globals back to memory.  */
3242 static void la_global_sync(TCGContext *s, int ng)
3243 {
3244     int i;
3245 
3246     for (i = 0; i < ng; ++i) {
3247         int state = s->temps[i].state;
3248         s->temps[i].state = state | TS_MEM;
3249         if (state == TS_DEAD) {
3250             /* If the global was previously dead, reset prefs.  */
3251             la_reset_pref(&s->temps[i]);
3252         }
3253     }
3254 }
3255 
3256 /*
3257  * liveness analysis: conditional branch: all temps are dead unless
3258  * explicitly live-across-conditional-branch, globals and local temps
3259  * should be synced.
3260  */
3261 static void la_bb_sync(TCGContext *s, int ng, int nt)
3262 {
3263     la_global_sync(s, ng);
3264 
3265     for (int i = ng; i < nt; ++i) {
3266         TCGTemp *ts = &s->temps[i];
3267         int state;
3268 
3269         switch (ts->kind) {
3270         case TEMP_TB:
3271             state = ts->state;
3272             ts->state = state | TS_MEM;
3273             if (state != TS_DEAD) {
3274                 continue;
3275             }
3276             break;
3277         case TEMP_EBB:
3278         case TEMP_CONST:
3279             continue;
3280         default:
3281             g_assert_not_reached();
3282         }
3283         la_reset_pref(&s->temps[i]);
3284     }
3285 }
3286 
3287 /* liveness analysis: sync globals back to memory and kill.  */
3288 static void la_global_kill(TCGContext *s, int ng)
3289 {
3290     int i;
3291 
3292     for (i = 0; i < ng; i++) {
3293         s->temps[i].state = TS_DEAD | TS_MEM;
3294         la_reset_pref(&s->temps[i]);
3295     }
3296 }
3297 
3298 /* liveness analysis: note live globals crossing calls.  */
3299 static void la_cross_call(TCGContext *s, int nt)
3300 {
3301     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3302     int i;
3303 
3304     for (i = 0; i < nt; i++) {
3305         TCGTemp *ts = &s->temps[i];
3306         if (!(ts->state & TS_DEAD)) {
3307             TCGRegSet *pset = la_temp_pref(ts);
3308             TCGRegSet set = *pset;
3309 
3310             set &= mask;
3311             /* If the combination is not possible, restart.  */
3312             if (set == 0) {
3313                 set = tcg_target_available_regs[ts->type] & mask;
3314             }
3315             *pset = set;
3316         }
3317     }
3318 }
3319 
3320 /*
3321  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3322  * to TEMP_EBB, if possible.
3323  */
3324 static void __attribute__((noinline))
3325 liveness_pass_0(TCGContext *s)
3326 {
3327     void * const multiple_ebb = (void *)(uintptr_t)-1;
3328     int nb_temps = s->nb_temps;
3329     TCGOp *op, *ebb;
3330 
3331     for (int i = s->nb_globals; i < nb_temps; ++i) {
3332         s->temps[i].state_ptr = NULL;
3333     }
3334 
3335     /*
3336      * Represent each EBB by the op at which it begins.  In the case of
3337      * the first EBB, this is the first op, otherwise it is a label.
3338      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3339      * within a single EBB, else MULTIPLE_EBB.
3340      */
3341     ebb = QTAILQ_FIRST(&s->ops);
3342     QTAILQ_FOREACH(op, &s->ops, link) {
3343         const TCGOpDef *def;
3344         int nb_oargs, nb_iargs;
3345 
3346         switch (op->opc) {
3347         case INDEX_op_set_label:
3348             ebb = op;
3349             continue;
3350         case INDEX_op_discard:
3351             continue;
3352         case INDEX_op_call:
3353             nb_oargs = TCGOP_CALLO(op);
3354             nb_iargs = TCGOP_CALLI(op);
3355             break;
3356         default:
3357             def = &tcg_op_defs[op->opc];
3358             nb_oargs = def->nb_oargs;
3359             nb_iargs = def->nb_iargs;
3360             break;
3361         }
3362 
3363         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3364             TCGTemp *ts = arg_temp(op->args[i]);
3365 
3366             if (ts->kind != TEMP_TB) {
3367                 continue;
3368             }
3369             if (ts->state_ptr == NULL) {
3370                 ts->state_ptr = ebb;
3371             } else if (ts->state_ptr != ebb) {
3372                 ts->state_ptr = multiple_ebb;
3373             }
3374         }
3375     }
3376 
3377     /*
3378      * For TEMP_TB that turned out not to be used beyond one EBB,
3379      * reduce the liveness to TEMP_EBB.
3380      */
3381     for (int i = s->nb_globals; i < nb_temps; ++i) {
3382         TCGTemp *ts = &s->temps[i];
3383         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3384             ts->kind = TEMP_EBB;
3385         }
3386     }
3387 }
3388 
3389 /* Liveness analysis : update the opc_arg_life array to tell if a
3390    given input arguments is dead. Instructions updating dead
3391    temporaries are removed. */
3392 static void __attribute__((noinline))
3393 liveness_pass_1(TCGContext *s)
3394 {
3395     int nb_globals = s->nb_globals;
3396     int nb_temps = s->nb_temps;
3397     TCGOp *op, *op_prev;
3398     TCGRegSet *prefs;
3399     int i;
3400 
3401     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3402     for (i = 0; i < nb_temps; ++i) {
3403         s->temps[i].state_ptr = prefs + i;
3404     }
3405 
3406     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3407     la_func_end(s, nb_globals, nb_temps);
3408 
3409     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3410         int nb_iargs, nb_oargs;
3411         TCGOpcode opc_new, opc_new2;
3412         bool have_opc_new2;
3413         TCGLifeData arg_life = 0;
3414         TCGTemp *ts;
3415         TCGOpcode opc = op->opc;
3416         const TCGOpDef *def = &tcg_op_defs[opc];
3417 
3418         switch (opc) {
3419         case INDEX_op_call:
3420             {
3421                 const TCGHelperInfo *info = tcg_call_info(op);
3422                 int call_flags = tcg_call_flags(op);
3423 
3424                 nb_oargs = TCGOP_CALLO(op);
3425                 nb_iargs = TCGOP_CALLI(op);
3426 
3427                 /* pure functions can be removed if their result is unused */
3428                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3429                     for (i = 0; i < nb_oargs; i++) {
3430                         ts = arg_temp(op->args[i]);
3431                         if (ts->state != TS_DEAD) {
3432                             goto do_not_remove_call;
3433                         }
3434                     }
3435                     goto do_remove;
3436                 }
3437             do_not_remove_call:
3438 
3439                 /* Output args are dead.  */
3440                 for (i = 0; i < nb_oargs; i++) {
3441                     ts = arg_temp(op->args[i]);
3442                     if (ts->state & TS_DEAD) {
3443                         arg_life |= DEAD_ARG << i;
3444                     }
3445                     if (ts->state & TS_MEM) {
3446                         arg_life |= SYNC_ARG << i;
3447                     }
3448                     ts->state = TS_DEAD;
3449                     la_reset_pref(ts);
3450                 }
3451 
3452                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3453                 memset(op->output_pref, 0, sizeof(op->output_pref));
3454 
3455                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3456                                     TCG_CALL_NO_READ_GLOBALS))) {
3457                     la_global_kill(s, nb_globals);
3458                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3459                     la_global_sync(s, nb_globals);
3460                 }
3461 
3462                 /* Record arguments that die in this helper.  */
3463                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3464                     ts = arg_temp(op->args[i]);
3465                     if (ts->state & TS_DEAD) {
3466                         arg_life |= DEAD_ARG << i;
3467                     }
3468                 }
3469 
3470                 /* For all live registers, remove call-clobbered prefs.  */
3471                 la_cross_call(s, nb_temps);
3472 
3473                 /*
3474                  * Input arguments are live for preceding opcodes.
3475                  *
3476                  * For those arguments that die, and will be allocated in
3477                  * registers, clear the register set for that arg, to be
3478                  * filled in below.  For args that will be on the stack,
3479                  * reset to any available reg.  Process arguments in reverse
3480                  * order so that if a temp is used more than once, the stack
3481                  * reset to max happens before the register reset to 0.
3482                  */
3483                 for (i = nb_iargs - 1; i >= 0; i--) {
3484                     const TCGCallArgumentLoc *loc = &info->in[i];
3485                     ts = arg_temp(op->args[nb_oargs + i]);
3486 
3487                     if (ts->state & TS_DEAD) {
3488                         switch (loc->kind) {
3489                         case TCG_CALL_ARG_NORMAL:
3490                         case TCG_CALL_ARG_EXTEND_U:
3491                         case TCG_CALL_ARG_EXTEND_S:
3492                             if (arg_slot_reg_p(loc->arg_slot)) {
3493                                 *la_temp_pref(ts) = 0;
3494                                 break;
3495                             }
3496                             /* fall through */
3497                         default:
3498                             *la_temp_pref(ts) =
3499                                 tcg_target_available_regs[ts->type];
3500                             break;
3501                         }
3502                         ts->state &= ~TS_DEAD;
3503                     }
3504                 }
3505 
3506                 /*
3507                  * For each input argument, add its input register to prefs.
3508                  * If a temp is used once, this produces a single set bit;
3509                  * if a temp is used multiple times, this produces a set.
3510                  */
3511                 for (i = 0; i < nb_iargs; i++) {
3512                     const TCGCallArgumentLoc *loc = &info->in[i];
3513                     ts = arg_temp(op->args[nb_oargs + i]);
3514 
3515                     switch (loc->kind) {
3516                     case TCG_CALL_ARG_NORMAL:
3517                     case TCG_CALL_ARG_EXTEND_U:
3518                     case TCG_CALL_ARG_EXTEND_S:
3519                         if (arg_slot_reg_p(loc->arg_slot)) {
3520                             tcg_regset_set_reg(*la_temp_pref(ts),
3521                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3522                         }
3523                         break;
3524                     default:
3525                         break;
3526                     }
3527                 }
3528             }
3529             break;
3530         case INDEX_op_insn_start:
3531             break;
3532         case INDEX_op_discard:
3533             /* mark the temporary as dead */
3534             ts = arg_temp(op->args[0]);
3535             ts->state = TS_DEAD;
3536             la_reset_pref(ts);
3537             break;
3538 
3539         case INDEX_op_add2_i32:
3540             opc_new = INDEX_op_add_i32;
3541             goto do_addsub2;
3542         case INDEX_op_sub2_i32:
3543             opc_new = INDEX_op_sub_i32;
3544             goto do_addsub2;
3545         case INDEX_op_add2_i64:
3546             opc_new = INDEX_op_add_i64;
3547             goto do_addsub2;
3548         case INDEX_op_sub2_i64:
3549             opc_new = INDEX_op_sub_i64;
3550         do_addsub2:
3551             nb_iargs = 4;
3552             nb_oargs = 2;
3553             /* Test if the high part of the operation is dead, but not
3554                the low part.  The result can be optimized to a simple
3555                add or sub.  This happens often for x86_64 guest when the
3556                cpu mode is set to 32 bit.  */
3557             if (arg_temp(op->args[1])->state == TS_DEAD) {
3558                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3559                     goto do_remove;
3560                 }
3561                 /* Replace the opcode and adjust the args in place,
3562                    leaving 3 unused args at the end.  */
3563                 op->opc = opc = opc_new;
3564                 op->args[1] = op->args[2];
3565                 op->args[2] = op->args[4];
3566                 /* Fall through and mark the single-word operation live.  */
3567                 nb_iargs = 2;
3568                 nb_oargs = 1;
3569             }
3570             goto do_not_remove;
3571 
3572         case INDEX_op_mulu2_i32:
3573             opc_new = INDEX_op_mul_i32;
3574             opc_new2 = INDEX_op_muluh_i32;
3575             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3576             goto do_mul2;
3577         case INDEX_op_muls2_i32:
3578             opc_new = INDEX_op_mul_i32;
3579             opc_new2 = INDEX_op_mulsh_i32;
3580             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3581             goto do_mul2;
3582         case INDEX_op_mulu2_i64:
3583             opc_new = INDEX_op_mul_i64;
3584             opc_new2 = INDEX_op_muluh_i64;
3585             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3586             goto do_mul2;
3587         case INDEX_op_muls2_i64:
3588             opc_new = INDEX_op_mul_i64;
3589             opc_new2 = INDEX_op_mulsh_i64;
3590             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3591             goto do_mul2;
3592         do_mul2:
3593             nb_iargs = 2;
3594             nb_oargs = 2;
3595             if (arg_temp(op->args[1])->state == TS_DEAD) {
3596                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3597                     /* Both parts of the operation are dead.  */
3598                     goto do_remove;
3599                 }
3600                 /* The high part of the operation is dead; generate the low. */
3601                 op->opc = opc = opc_new;
3602                 op->args[1] = op->args[2];
3603                 op->args[2] = op->args[3];
3604             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3605                 /* The low part of the operation is dead; generate the high. */
3606                 op->opc = opc = opc_new2;
3607                 op->args[0] = op->args[1];
3608                 op->args[1] = op->args[2];
3609                 op->args[2] = op->args[3];
3610             } else {
3611                 goto do_not_remove;
3612             }
3613             /* Mark the single-word operation live.  */
3614             nb_oargs = 1;
3615             goto do_not_remove;
3616 
3617         default:
3618             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3619             nb_iargs = def->nb_iargs;
3620             nb_oargs = def->nb_oargs;
3621 
3622             /* Test if the operation can be removed because all
3623                its outputs are dead. We assume that nb_oargs == 0
3624                implies side effects */
3625             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3626                 for (i = 0; i < nb_oargs; i++) {
3627                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3628                         goto do_not_remove;
3629                     }
3630                 }
3631                 goto do_remove;
3632             }
3633             goto do_not_remove;
3634 
3635         do_remove:
3636             tcg_op_remove(s, op);
3637             break;
3638 
3639         do_not_remove:
3640             for (i = 0; i < nb_oargs; i++) {
3641                 ts = arg_temp(op->args[i]);
3642 
3643                 /* Remember the preference of the uses that followed.  */
3644                 if (i < ARRAY_SIZE(op->output_pref)) {
3645                     op->output_pref[i] = *la_temp_pref(ts);
3646                 }
3647 
3648                 /* Output args are dead.  */
3649                 if (ts->state & TS_DEAD) {
3650                     arg_life |= DEAD_ARG << i;
3651                 }
3652                 if (ts->state & TS_MEM) {
3653                     arg_life |= SYNC_ARG << i;
3654                 }
3655                 ts->state = TS_DEAD;
3656                 la_reset_pref(ts);
3657             }
3658 
3659             /* If end of basic block, update.  */
3660             if (def->flags & TCG_OPF_BB_EXIT) {
3661                 la_func_end(s, nb_globals, nb_temps);
3662             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3663                 la_bb_sync(s, nb_globals, nb_temps);
3664             } else if (def->flags & TCG_OPF_BB_END) {
3665                 la_bb_end(s, nb_globals, nb_temps);
3666             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3667                 la_global_sync(s, nb_globals);
3668                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3669                     la_cross_call(s, nb_temps);
3670                 }
3671             }
3672 
3673             /* Record arguments that die in this opcode.  */
3674             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3675                 ts = arg_temp(op->args[i]);
3676                 if (ts->state & TS_DEAD) {
3677                     arg_life |= DEAD_ARG << i;
3678                 }
3679             }
3680 
3681             /* Input arguments are live for preceding opcodes.  */
3682             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3683                 ts = arg_temp(op->args[i]);
3684                 if (ts->state & TS_DEAD) {
3685                     /* For operands that were dead, initially allow
3686                        all regs for the type.  */
3687                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3688                     ts->state &= ~TS_DEAD;
3689                 }
3690             }
3691 
3692             /* Incorporate constraints for this operand.  */
3693             switch (opc) {
3694             case INDEX_op_mov_i32:
3695             case INDEX_op_mov_i64:
3696                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3697                    have proper constraints.  That said, special case
3698                    moves to propagate preferences backward.  */
3699                 if (IS_DEAD_ARG(1)) {
3700                     *la_temp_pref(arg_temp(op->args[0]))
3701                         = *la_temp_pref(arg_temp(op->args[1]));
3702                 }
3703                 break;
3704 
3705             default:
3706                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3707                     const TCGArgConstraint *ct = &def->args_ct[i];
3708                     TCGRegSet set, *pset;
3709 
3710                     ts = arg_temp(op->args[i]);
3711                     pset = la_temp_pref(ts);
3712                     set = *pset;
3713 
3714                     set &= ct->regs;
3715                     if (ct->ialias) {
3716                         set &= output_pref(op, ct->alias_index);
3717                     }
3718                     /* If the combination is not possible, restart.  */
3719                     if (set == 0) {
3720                         set = ct->regs;
3721                     }
3722                     *pset = set;
3723                 }
3724                 break;
3725             }
3726             break;
3727         }
3728         op->life = arg_life;
3729     }
3730 }
3731 
3732 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3733 static bool __attribute__((noinline))
3734 liveness_pass_2(TCGContext *s)
3735 {
3736     int nb_globals = s->nb_globals;
3737     int nb_temps, i;
3738     bool changes = false;
3739     TCGOp *op, *op_next;
3740 
3741     /* Create a temporary for each indirect global.  */
3742     for (i = 0; i < nb_globals; ++i) {
3743         TCGTemp *its = &s->temps[i];
3744         if (its->indirect_reg) {
3745             TCGTemp *dts = tcg_temp_alloc(s);
3746             dts->type = its->type;
3747             dts->base_type = its->base_type;
3748             dts->temp_subindex = its->temp_subindex;
3749             dts->kind = TEMP_EBB;
3750             its->state_ptr = dts;
3751         } else {
3752             its->state_ptr = NULL;
3753         }
3754         /* All globals begin dead.  */
3755         its->state = TS_DEAD;
3756     }
3757     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3758         TCGTemp *its = &s->temps[i];
3759         its->state_ptr = NULL;
3760         its->state = TS_DEAD;
3761     }
3762 
3763     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3764         TCGOpcode opc = op->opc;
3765         const TCGOpDef *def = &tcg_op_defs[opc];
3766         TCGLifeData arg_life = op->life;
3767         int nb_iargs, nb_oargs, call_flags;
3768         TCGTemp *arg_ts, *dir_ts;
3769 
3770         if (opc == INDEX_op_call) {
3771             nb_oargs = TCGOP_CALLO(op);
3772             nb_iargs = TCGOP_CALLI(op);
3773             call_flags = tcg_call_flags(op);
3774         } else {
3775             nb_iargs = def->nb_iargs;
3776             nb_oargs = def->nb_oargs;
3777 
3778             /* Set flags similar to how calls require.  */
3779             if (def->flags & TCG_OPF_COND_BRANCH) {
3780                 /* Like reading globals: sync_globals */
3781                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3782             } else if (def->flags & TCG_OPF_BB_END) {
3783                 /* Like writing globals: save_globals */
3784                 call_flags = 0;
3785             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3786                 /* Like reading globals: sync_globals */
3787                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3788             } else {
3789                 /* No effect on globals.  */
3790                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3791                               TCG_CALL_NO_WRITE_GLOBALS);
3792             }
3793         }
3794 
3795         /* Make sure that input arguments are available.  */
3796         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3797             arg_ts = arg_temp(op->args[i]);
3798             dir_ts = arg_ts->state_ptr;
3799             if (dir_ts && arg_ts->state == TS_DEAD) {
3800                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3801                                   ? INDEX_op_ld_i32
3802                                   : INDEX_op_ld_i64);
3803                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3804 
3805                 lop->args[0] = temp_arg(dir_ts);
3806                 lop->args[1] = temp_arg(arg_ts->mem_base);
3807                 lop->args[2] = arg_ts->mem_offset;
3808 
3809                 /* Loaded, but synced with memory.  */
3810                 arg_ts->state = TS_MEM;
3811             }
3812         }
3813 
3814         /* Perform input replacement, and mark inputs that became dead.
3815            No action is required except keeping temp_state up to date
3816            so that we reload when needed.  */
3817         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3818             arg_ts = arg_temp(op->args[i]);
3819             dir_ts = arg_ts->state_ptr;
3820             if (dir_ts) {
3821                 op->args[i] = temp_arg(dir_ts);
3822                 changes = true;
3823                 if (IS_DEAD_ARG(i)) {
3824                     arg_ts->state = TS_DEAD;
3825                 }
3826             }
3827         }
3828 
3829         /* Liveness analysis should ensure that the following are
3830            all correct, for call sites and basic block end points.  */
3831         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3832             /* Nothing to do */
3833         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3834             for (i = 0; i < nb_globals; ++i) {
3835                 /* Liveness should see that globals are synced back,
3836                    that is, either TS_DEAD or TS_MEM.  */
3837                 arg_ts = &s->temps[i];
3838                 tcg_debug_assert(arg_ts->state_ptr == 0
3839                                  || arg_ts->state != 0);
3840             }
3841         } else {
3842             for (i = 0; i < nb_globals; ++i) {
3843                 /* Liveness should see that globals are saved back,
3844                    that is, TS_DEAD, waiting to be reloaded.  */
3845                 arg_ts = &s->temps[i];
3846                 tcg_debug_assert(arg_ts->state_ptr == 0
3847                                  || arg_ts->state == TS_DEAD);
3848             }
3849         }
3850 
3851         /* Outputs become available.  */
3852         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3853             arg_ts = arg_temp(op->args[0]);
3854             dir_ts = arg_ts->state_ptr;
3855             if (dir_ts) {
3856                 op->args[0] = temp_arg(dir_ts);
3857                 changes = true;
3858 
3859                 /* The output is now live and modified.  */
3860                 arg_ts->state = 0;
3861 
3862                 if (NEED_SYNC_ARG(0)) {
3863                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3864                                       ? INDEX_op_st_i32
3865                                       : INDEX_op_st_i64);
3866                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3867                     TCGTemp *out_ts = dir_ts;
3868 
3869                     if (IS_DEAD_ARG(0)) {
3870                         out_ts = arg_temp(op->args[1]);
3871                         arg_ts->state = TS_DEAD;
3872                         tcg_op_remove(s, op);
3873                     } else {
3874                         arg_ts->state = TS_MEM;
3875                     }
3876 
3877                     sop->args[0] = temp_arg(out_ts);
3878                     sop->args[1] = temp_arg(arg_ts->mem_base);
3879                     sop->args[2] = arg_ts->mem_offset;
3880                 } else {
3881                     tcg_debug_assert(!IS_DEAD_ARG(0));
3882                 }
3883             }
3884         } else {
3885             for (i = 0; i < nb_oargs; i++) {
3886                 arg_ts = arg_temp(op->args[i]);
3887                 dir_ts = arg_ts->state_ptr;
3888                 if (!dir_ts) {
3889                     continue;
3890                 }
3891                 op->args[i] = temp_arg(dir_ts);
3892                 changes = true;
3893 
3894                 /* The output is now live and modified.  */
3895                 arg_ts->state = 0;
3896 
3897                 /* Sync outputs upon their last write.  */
3898                 if (NEED_SYNC_ARG(i)) {
3899                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3900                                       ? INDEX_op_st_i32
3901                                       : INDEX_op_st_i64);
3902                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3903 
3904                     sop->args[0] = temp_arg(dir_ts);
3905                     sop->args[1] = temp_arg(arg_ts->mem_base);
3906                     sop->args[2] = arg_ts->mem_offset;
3907 
3908                     arg_ts->state = TS_MEM;
3909                 }
3910                 /* Drop outputs that are dead.  */
3911                 if (IS_DEAD_ARG(i)) {
3912                     arg_ts->state = TS_DEAD;
3913                 }
3914             }
3915         }
3916     }
3917 
3918     return changes;
3919 }
3920 
3921 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3922 {
3923     intptr_t off;
3924     int size, align;
3925 
3926     /* When allocating an object, look at the full type. */
3927     size = tcg_type_size(ts->base_type);
3928     switch (ts->base_type) {
3929     case TCG_TYPE_I32:
3930         align = 4;
3931         break;
3932     case TCG_TYPE_I64:
3933     case TCG_TYPE_V64:
3934         align = 8;
3935         break;
3936     case TCG_TYPE_I128:
3937     case TCG_TYPE_V128:
3938     case TCG_TYPE_V256:
3939         /*
3940          * Note that we do not require aligned storage for V256,
3941          * and that we provide alignment for I128 to match V128,
3942          * even if that's above what the host ABI requires.
3943          */
3944         align = 16;
3945         break;
3946     default:
3947         g_assert_not_reached();
3948     }
3949 
3950     /*
3951      * Assume the stack is sufficiently aligned.
3952      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3953      * and do not require 16 byte vector alignment.  This seems slightly
3954      * easier than fully parameterizing the above switch statement.
3955      */
3956     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3957     off = ROUND_UP(s->current_frame_offset, align);
3958 
3959     /* If we've exhausted the stack frame, restart with a smaller TB. */
3960     if (off + size > s->frame_end) {
3961         tcg_raise_tb_overflow(s);
3962     }
3963     s->current_frame_offset = off + size;
3964 #if defined(__sparc__)
3965     off += TCG_TARGET_STACK_BIAS;
3966 #endif
3967 
3968     /* If the object was subdivided, assign memory to all the parts. */
3969     if (ts->base_type != ts->type) {
3970         int part_size = tcg_type_size(ts->type);
3971         int part_count = size / part_size;
3972 
3973         /*
3974          * Each part is allocated sequentially in tcg_temp_new_internal.
3975          * Jump back to the first part by subtracting the current index.
3976          */
3977         ts -= ts->temp_subindex;
3978         for (int i = 0; i < part_count; ++i) {
3979             ts[i].mem_offset = off + i * part_size;
3980             ts[i].mem_base = s->frame_temp;
3981             ts[i].mem_allocated = 1;
3982         }
3983     } else {
3984         ts->mem_offset = off;
3985         ts->mem_base = s->frame_temp;
3986         ts->mem_allocated = 1;
3987     }
3988 }
3989 
3990 /* Assign @reg to @ts, and update reg_to_temp[]. */
3991 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3992 {
3993     if (ts->val_type == TEMP_VAL_REG) {
3994         TCGReg old = ts->reg;
3995         tcg_debug_assert(s->reg_to_temp[old] == ts);
3996         if (old == reg) {
3997             return;
3998         }
3999         s->reg_to_temp[old] = NULL;
4000     }
4001     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4002     s->reg_to_temp[reg] = ts;
4003     ts->val_type = TEMP_VAL_REG;
4004     ts->reg = reg;
4005 }
4006 
4007 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4008 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4009 {
4010     tcg_debug_assert(type != TEMP_VAL_REG);
4011     if (ts->val_type == TEMP_VAL_REG) {
4012         TCGReg reg = ts->reg;
4013         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4014         s->reg_to_temp[reg] = NULL;
4015     }
4016     ts->val_type = type;
4017 }
4018 
4019 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4020 
4021 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4022    mark it free; otherwise mark it dead.  */
4023 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4024 {
4025     TCGTempVal new_type;
4026 
4027     switch (ts->kind) {
4028     case TEMP_FIXED:
4029         return;
4030     case TEMP_GLOBAL:
4031     case TEMP_TB:
4032         new_type = TEMP_VAL_MEM;
4033         break;
4034     case TEMP_EBB:
4035         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4036         break;
4037     case TEMP_CONST:
4038         new_type = TEMP_VAL_CONST;
4039         break;
4040     default:
4041         g_assert_not_reached();
4042     }
4043     set_temp_val_nonreg(s, ts, new_type);
4044 }
4045 
4046 /* Mark a temporary as dead.  */
4047 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4048 {
4049     temp_free_or_dead(s, ts, 1);
4050 }
4051 
4052 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4053    registers needs to be allocated to store a constant.  If 'free_or_dead'
4054    is non-zero, subsequently release the temporary; if it is positive, the
4055    temp is dead; if it is negative, the temp is free.  */
4056 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4057                       TCGRegSet preferred_regs, int free_or_dead)
4058 {
4059     if (!temp_readonly(ts) && !ts->mem_coherent) {
4060         if (!ts->mem_allocated) {
4061             temp_allocate_frame(s, ts);
4062         }
4063         switch (ts->val_type) {
4064         case TEMP_VAL_CONST:
4065             /* If we're going to free the temp immediately, then we won't
4066                require it later in a register, so attempt to store the
4067                constant to memory directly.  */
4068             if (free_or_dead
4069                 && tcg_out_sti(s, ts->type, ts->val,
4070                                ts->mem_base->reg, ts->mem_offset)) {
4071                 break;
4072             }
4073             temp_load(s, ts, tcg_target_available_regs[ts->type],
4074                       allocated_regs, preferred_regs);
4075             /* fallthrough */
4076 
4077         case TEMP_VAL_REG:
4078             tcg_out_st(s, ts->type, ts->reg,
4079                        ts->mem_base->reg, ts->mem_offset);
4080             break;
4081 
4082         case TEMP_VAL_MEM:
4083             break;
4084 
4085         case TEMP_VAL_DEAD:
4086         default:
4087             g_assert_not_reached();
4088         }
4089         ts->mem_coherent = 1;
4090     }
4091     if (free_or_dead) {
4092         temp_free_or_dead(s, ts, free_or_dead);
4093     }
4094 }
4095 
4096 /* free register 'reg' by spilling the corresponding temporary if necessary */
4097 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4098 {
4099     TCGTemp *ts = s->reg_to_temp[reg];
4100     if (ts != NULL) {
4101         temp_sync(s, ts, allocated_regs, 0, -1);
4102     }
4103 }
4104 
4105 /**
4106  * tcg_reg_alloc:
4107  * @required_regs: Set of registers in which we must allocate.
4108  * @allocated_regs: Set of registers which must be avoided.
4109  * @preferred_regs: Set of registers we should prefer.
4110  * @rev: True if we search the registers in "indirect" order.
4111  *
4112  * The allocated register must be in @required_regs & ~@allocated_regs,
4113  * but if we can put it in @preferred_regs we may save a move later.
4114  */
4115 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4116                             TCGRegSet allocated_regs,
4117                             TCGRegSet preferred_regs, bool rev)
4118 {
4119     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4120     TCGRegSet reg_ct[2];
4121     const int *order;
4122 
4123     reg_ct[1] = required_regs & ~allocated_regs;
4124     tcg_debug_assert(reg_ct[1] != 0);
4125     reg_ct[0] = reg_ct[1] & preferred_regs;
4126 
4127     /* Skip the preferred_regs option if it cannot be satisfied,
4128        or if the preference made no difference.  */
4129     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4130 
4131     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4132 
4133     /* Try free registers, preferences first.  */
4134     for (j = f; j < 2; j++) {
4135         TCGRegSet set = reg_ct[j];
4136 
4137         if (tcg_regset_single(set)) {
4138             /* One register in the set.  */
4139             TCGReg reg = tcg_regset_first(set);
4140             if (s->reg_to_temp[reg] == NULL) {
4141                 return reg;
4142             }
4143         } else {
4144             for (i = 0; i < n; i++) {
4145                 TCGReg reg = order[i];
4146                 if (s->reg_to_temp[reg] == NULL &&
4147                     tcg_regset_test_reg(set, reg)) {
4148                     return reg;
4149                 }
4150             }
4151         }
4152     }
4153 
4154     /* We must spill something.  */
4155     for (j = f; j < 2; j++) {
4156         TCGRegSet set = reg_ct[j];
4157 
4158         if (tcg_regset_single(set)) {
4159             /* One register in the set.  */
4160             TCGReg reg = tcg_regset_first(set);
4161             tcg_reg_free(s, reg, allocated_regs);
4162             return reg;
4163         } else {
4164             for (i = 0; i < n; i++) {
4165                 TCGReg reg = order[i];
4166                 if (tcg_regset_test_reg(set, reg)) {
4167                     tcg_reg_free(s, reg, allocated_regs);
4168                     return reg;
4169                 }
4170             }
4171         }
4172     }
4173 
4174     g_assert_not_reached();
4175 }
4176 
4177 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4178                                  TCGRegSet allocated_regs,
4179                                  TCGRegSet preferred_regs, bool rev)
4180 {
4181     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4182     TCGRegSet reg_ct[2];
4183     const int *order;
4184 
4185     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4186     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4187     tcg_debug_assert(reg_ct[1] != 0);
4188     reg_ct[0] = reg_ct[1] & preferred_regs;
4189 
4190     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4191 
4192     /*
4193      * Skip the preferred_regs option if it cannot be satisfied,
4194      * or if the preference made no difference.
4195      */
4196     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4197 
4198     /*
4199      * Minimize the number of flushes by looking for 2 free registers first,
4200      * then a single flush, then two flushes.
4201      */
4202     for (fmin = 2; fmin >= 0; fmin--) {
4203         for (j = k; j < 2; j++) {
4204             TCGRegSet set = reg_ct[j];
4205 
4206             for (i = 0; i < n; i++) {
4207                 TCGReg reg = order[i];
4208 
4209                 if (tcg_regset_test_reg(set, reg)) {
4210                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4211                     if (f >= fmin) {
4212                         tcg_reg_free(s, reg, allocated_regs);
4213                         tcg_reg_free(s, reg + 1, allocated_regs);
4214                         return reg;
4215                     }
4216                 }
4217             }
4218         }
4219     }
4220     g_assert_not_reached();
4221 }
4222 
4223 /* Make sure the temporary is in a register.  If needed, allocate the register
4224    from DESIRED while avoiding ALLOCATED.  */
4225 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4226                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4227 {
4228     TCGReg reg;
4229 
4230     switch (ts->val_type) {
4231     case TEMP_VAL_REG:
4232         return;
4233     case TEMP_VAL_CONST:
4234         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4235                             preferred_regs, ts->indirect_base);
4236         if (ts->type <= TCG_TYPE_I64) {
4237             tcg_out_movi(s, ts->type, reg, ts->val);
4238         } else {
4239             uint64_t val = ts->val;
4240             MemOp vece = MO_64;
4241 
4242             /*
4243              * Find the minimal vector element that matches the constant.
4244              * The targets will, in general, have to do this search anyway,
4245              * do this generically.
4246              */
4247             if (val == dup_const(MO_8, val)) {
4248                 vece = MO_8;
4249             } else if (val == dup_const(MO_16, val)) {
4250                 vece = MO_16;
4251             } else if (val == dup_const(MO_32, val)) {
4252                 vece = MO_32;
4253             }
4254 
4255             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4256         }
4257         ts->mem_coherent = 0;
4258         break;
4259     case TEMP_VAL_MEM:
4260         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4261                             preferred_regs, ts->indirect_base);
4262         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4263         ts->mem_coherent = 1;
4264         break;
4265     case TEMP_VAL_DEAD:
4266     default:
4267         g_assert_not_reached();
4268     }
4269     set_temp_val_reg(s, ts, reg);
4270 }
4271 
4272 /* Save a temporary to memory. 'allocated_regs' is used in case a
4273    temporary registers needs to be allocated to store a constant.  */
4274 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4275 {
4276     /* The liveness analysis already ensures that globals are back
4277        in memory. Keep an tcg_debug_assert for safety. */
4278     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4279 }
4280 
4281 /* save globals to their canonical location and assume they can be
4282    modified be the following code. 'allocated_regs' is used in case a
4283    temporary registers needs to be allocated to store a constant. */
4284 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4285 {
4286     int i, n;
4287 
4288     for (i = 0, n = s->nb_globals; i < n; i++) {
4289         temp_save(s, &s->temps[i], allocated_regs);
4290     }
4291 }
4292 
4293 /* sync globals to their canonical location and assume they can be
4294    read by the following code. 'allocated_regs' is used in case a
4295    temporary registers needs to be allocated to store a constant. */
4296 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4297 {
4298     int i, n;
4299 
4300     for (i = 0, n = s->nb_globals; i < n; i++) {
4301         TCGTemp *ts = &s->temps[i];
4302         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4303                          || ts->kind == TEMP_FIXED
4304                          || ts->mem_coherent);
4305     }
4306 }
4307 
4308 /* at the end of a basic block, we assume all temporaries are dead and
4309    all globals are stored at their canonical location. */
4310 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4311 {
4312     int i;
4313 
4314     for (i = s->nb_globals; i < s->nb_temps; i++) {
4315         TCGTemp *ts = &s->temps[i];
4316 
4317         switch (ts->kind) {
4318         case TEMP_TB:
4319             temp_save(s, ts, allocated_regs);
4320             break;
4321         case TEMP_EBB:
4322             /* The liveness analysis already ensures that temps are dead.
4323                Keep an tcg_debug_assert for safety. */
4324             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4325             break;
4326         case TEMP_CONST:
4327             /* Similarly, we should have freed any allocated register. */
4328             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4329             break;
4330         default:
4331             g_assert_not_reached();
4332         }
4333     }
4334 
4335     save_globals(s, allocated_regs);
4336 }
4337 
4338 /*
4339  * At a conditional branch, we assume all temporaries are dead unless
4340  * explicitly live-across-conditional-branch; all globals and local
4341  * temps are synced to their location.
4342  */
4343 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4344 {
4345     sync_globals(s, allocated_regs);
4346 
4347     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4348         TCGTemp *ts = &s->temps[i];
4349         /*
4350          * The liveness analysis already ensures that temps are dead.
4351          * Keep tcg_debug_asserts for safety.
4352          */
4353         switch (ts->kind) {
4354         case TEMP_TB:
4355             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4356             break;
4357         case TEMP_EBB:
4358         case TEMP_CONST:
4359             break;
4360         default:
4361             g_assert_not_reached();
4362         }
4363     }
4364 }
4365 
4366 /*
4367  * Specialized code generation for INDEX_op_mov_* with a constant.
4368  */
4369 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4370                                   tcg_target_ulong val, TCGLifeData arg_life,
4371                                   TCGRegSet preferred_regs)
4372 {
4373     /* ENV should not be modified.  */
4374     tcg_debug_assert(!temp_readonly(ots));
4375 
4376     /* The movi is not explicitly generated here.  */
4377     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4378     ots->val = val;
4379     ots->mem_coherent = 0;
4380     if (NEED_SYNC_ARG(0)) {
4381         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4382     } else if (IS_DEAD_ARG(0)) {
4383         temp_dead(s, ots);
4384     }
4385 }
4386 
4387 /*
4388  * Specialized code generation for INDEX_op_mov_*.
4389  */
4390 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4391 {
4392     const TCGLifeData arg_life = op->life;
4393     TCGRegSet allocated_regs, preferred_regs;
4394     TCGTemp *ts, *ots;
4395     TCGType otype, itype;
4396     TCGReg oreg, ireg;
4397 
4398     allocated_regs = s->reserved_regs;
4399     preferred_regs = output_pref(op, 0);
4400     ots = arg_temp(op->args[0]);
4401     ts = arg_temp(op->args[1]);
4402 
4403     /* ENV should not be modified.  */
4404     tcg_debug_assert(!temp_readonly(ots));
4405 
4406     /* Note that otype != itype for no-op truncation.  */
4407     otype = ots->type;
4408     itype = ts->type;
4409 
4410     if (ts->val_type == TEMP_VAL_CONST) {
4411         /* propagate constant or generate sti */
4412         tcg_target_ulong val = ts->val;
4413         if (IS_DEAD_ARG(1)) {
4414             temp_dead(s, ts);
4415         }
4416         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4417         return;
4418     }
4419 
4420     /* If the source value is in memory we're going to be forced
4421        to have it in a register in order to perform the copy.  Copy
4422        the SOURCE value into its own register first, that way we
4423        don't have to reload SOURCE the next time it is used. */
4424     if (ts->val_type == TEMP_VAL_MEM) {
4425         temp_load(s, ts, tcg_target_available_regs[itype],
4426                   allocated_regs, preferred_regs);
4427     }
4428     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4429     ireg = ts->reg;
4430 
4431     if (IS_DEAD_ARG(0)) {
4432         /* mov to a non-saved dead register makes no sense (even with
4433            liveness analysis disabled). */
4434         tcg_debug_assert(NEED_SYNC_ARG(0));
4435         if (!ots->mem_allocated) {
4436             temp_allocate_frame(s, ots);
4437         }
4438         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4439         if (IS_DEAD_ARG(1)) {
4440             temp_dead(s, ts);
4441         }
4442         temp_dead(s, ots);
4443         return;
4444     }
4445 
4446     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4447         /*
4448          * The mov can be suppressed.  Kill input first, so that it
4449          * is unlinked from reg_to_temp, then set the output to the
4450          * reg that we saved from the input.
4451          */
4452         temp_dead(s, ts);
4453         oreg = ireg;
4454     } else {
4455         if (ots->val_type == TEMP_VAL_REG) {
4456             oreg = ots->reg;
4457         } else {
4458             /* Make sure to not spill the input register during allocation. */
4459             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4460                                  allocated_regs | ((TCGRegSet)1 << ireg),
4461                                  preferred_regs, ots->indirect_base);
4462         }
4463         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4464             /*
4465              * Cross register class move not supported.
4466              * Store the source register into the destination slot
4467              * and leave the destination temp as TEMP_VAL_MEM.
4468              */
4469             assert(!temp_readonly(ots));
4470             if (!ts->mem_allocated) {
4471                 temp_allocate_frame(s, ots);
4472             }
4473             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4474             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4475             ots->mem_coherent = 1;
4476             return;
4477         }
4478     }
4479     set_temp_val_reg(s, ots, oreg);
4480     ots->mem_coherent = 0;
4481 
4482     if (NEED_SYNC_ARG(0)) {
4483         temp_sync(s, ots, allocated_regs, 0, 0);
4484     }
4485 }
4486 
4487 /*
4488  * Specialized code generation for INDEX_op_dup_vec.
4489  */
4490 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4491 {
4492     const TCGLifeData arg_life = op->life;
4493     TCGRegSet dup_out_regs, dup_in_regs;
4494     TCGTemp *its, *ots;
4495     TCGType itype, vtype;
4496     unsigned vece;
4497     int lowpart_ofs;
4498     bool ok;
4499 
4500     ots = arg_temp(op->args[0]);
4501     its = arg_temp(op->args[1]);
4502 
4503     /* ENV should not be modified.  */
4504     tcg_debug_assert(!temp_readonly(ots));
4505 
4506     itype = its->type;
4507     vece = TCGOP_VECE(op);
4508     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4509 
4510     if (its->val_type == TEMP_VAL_CONST) {
4511         /* Propagate constant via movi -> dupi.  */
4512         tcg_target_ulong val = its->val;
4513         if (IS_DEAD_ARG(1)) {
4514             temp_dead(s, its);
4515         }
4516         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4517         return;
4518     }
4519 
4520     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4521     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4522 
4523     /* Allocate the output register now.  */
4524     if (ots->val_type != TEMP_VAL_REG) {
4525         TCGRegSet allocated_regs = s->reserved_regs;
4526         TCGReg oreg;
4527 
4528         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4529             /* Make sure to not spill the input register. */
4530             tcg_regset_set_reg(allocated_regs, its->reg);
4531         }
4532         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4533                              output_pref(op, 0), ots->indirect_base);
4534         set_temp_val_reg(s, ots, oreg);
4535     }
4536 
4537     switch (its->val_type) {
4538     case TEMP_VAL_REG:
4539         /*
4540          * The dup constriaints must be broad, covering all possible VECE.
4541          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4542          * to fail, indicating that extra moves are required for that case.
4543          */
4544         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4545             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4546                 goto done;
4547             }
4548             /* Try again from memory or a vector input register.  */
4549         }
4550         if (!its->mem_coherent) {
4551             /*
4552              * The input register is not synced, and so an extra store
4553              * would be required to use memory.  Attempt an integer-vector
4554              * register move first.  We do not have a TCGRegSet for this.
4555              */
4556             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4557                 break;
4558             }
4559             /* Sync the temp back to its slot and load from there.  */
4560             temp_sync(s, its, s->reserved_regs, 0, 0);
4561         }
4562         /* fall through */
4563 
4564     case TEMP_VAL_MEM:
4565         lowpart_ofs = 0;
4566         if (HOST_BIG_ENDIAN) {
4567             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4568         }
4569         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4570                              its->mem_offset + lowpart_ofs)) {
4571             goto done;
4572         }
4573         /* Load the input into the destination vector register. */
4574         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4575         break;
4576 
4577     default:
4578         g_assert_not_reached();
4579     }
4580 
4581     /* We now have a vector input register, so dup must succeed. */
4582     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4583     tcg_debug_assert(ok);
4584 
4585  done:
4586     ots->mem_coherent = 0;
4587     if (IS_DEAD_ARG(1)) {
4588         temp_dead(s, its);
4589     }
4590     if (NEED_SYNC_ARG(0)) {
4591         temp_sync(s, ots, s->reserved_regs, 0, 0);
4592     }
4593     if (IS_DEAD_ARG(0)) {
4594         temp_dead(s, ots);
4595     }
4596 }
4597 
4598 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4599 {
4600     const TCGLifeData arg_life = op->life;
4601     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4602     TCGRegSet i_allocated_regs;
4603     TCGRegSet o_allocated_regs;
4604     int i, k, nb_iargs, nb_oargs;
4605     TCGReg reg;
4606     TCGArg arg;
4607     const TCGArgConstraint *arg_ct;
4608     TCGTemp *ts;
4609     TCGArg new_args[TCG_MAX_OP_ARGS];
4610     int const_args[TCG_MAX_OP_ARGS];
4611 
4612     nb_oargs = def->nb_oargs;
4613     nb_iargs = def->nb_iargs;
4614 
4615     /* copy constants */
4616     memcpy(new_args + nb_oargs + nb_iargs,
4617            op->args + nb_oargs + nb_iargs,
4618            sizeof(TCGArg) * def->nb_cargs);
4619 
4620     i_allocated_regs = s->reserved_regs;
4621     o_allocated_regs = s->reserved_regs;
4622 
4623     /* satisfy input constraints */
4624     for (k = 0; k < nb_iargs; k++) {
4625         TCGRegSet i_preferred_regs, i_required_regs;
4626         bool allocate_new_reg, copyto_new_reg;
4627         TCGTemp *ts2;
4628         int i1, i2;
4629 
4630         i = def->args_ct[nb_oargs + k].sort_index;
4631         arg = op->args[i];
4632         arg_ct = &def->args_ct[i];
4633         ts = arg_temp(arg);
4634 
4635         if (ts->val_type == TEMP_VAL_CONST
4636             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4637             /* constant is OK for instruction */
4638             const_args[i] = 1;
4639             new_args[i] = ts->val;
4640             continue;
4641         }
4642 
4643         reg = ts->reg;
4644         i_preferred_regs = 0;
4645         i_required_regs = arg_ct->regs;
4646         allocate_new_reg = false;
4647         copyto_new_reg = false;
4648 
4649         switch (arg_ct->pair) {
4650         case 0: /* not paired */
4651             if (arg_ct->ialias) {
4652                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4653 
4654                 /*
4655                  * If the input is readonly, then it cannot also be an
4656                  * output and aliased to itself.  If the input is not
4657                  * dead after the instruction, we must allocate a new
4658                  * register and move it.
4659                  */
4660                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4661                     allocate_new_reg = true;
4662                 } else if (ts->val_type == TEMP_VAL_REG) {
4663                     /*
4664                      * Check if the current register has already been
4665                      * allocated for another input.
4666                      */
4667                     allocate_new_reg =
4668                         tcg_regset_test_reg(i_allocated_regs, reg);
4669                 }
4670             }
4671             if (!allocate_new_reg) {
4672                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4673                           i_preferred_regs);
4674                 reg = ts->reg;
4675                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4676             }
4677             if (allocate_new_reg) {
4678                 /*
4679                  * Allocate a new register matching the constraint
4680                  * and move the temporary register into it.
4681                  */
4682                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4683                           i_allocated_regs, 0);
4684                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4685                                     i_preferred_regs, ts->indirect_base);
4686                 copyto_new_reg = true;
4687             }
4688             break;
4689 
4690         case 1:
4691             /* First of an input pair; if i1 == i2, the second is an output. */
4692             i1 = i;
4693             i2 = arg_ct->pair_index;
4694             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4695 
4696             /*
4697              * It is easier to default to allocating a new pair
4698              * and to identify a few cases where it's not required.
4699              */
4700             if (arg_ct->ialias) {
4701                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4702                 if (IS_DEAD_ARG(i1) &&
4703                     IS_DEAD_ARG(i2) &&
4704                     !temp_readonly(ts) &&
4705                     ts->val_type == TEMP_VAL_REG &&
4706                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4707                     tcg_regset_test_reg(i_required_regs, reg) &&
4708                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4709                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4710                     (ts2
4711                      ? ts2->val_type == TEMP_VAL_REG &&
4712                        ts2->reg == reg + 1 &&
4713                        !temp_readonly(ts2)
4714                      : s->reg_to_temp[reg + 1] == NULL)) {
4715                     break;
4716                 }
4717             } else {
4718                 /* Without aliasing, the pair must also be an input. */
4719                 tcg_debug_assert(ts2);
4720                 if (ts->val_type == TEMP_VAL_REG &&
4721                     ts2->val_type == TEMP_VAL_REG &&
4722                     ts2->reg == reg + 1 &&
4723                     tcg_regset_test_reg(i_required_regs, reg)) {
4724                     break;
4725                 }
4726             }
4727             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4728                                      0, ts->indirect_base);
4729             goto do_pair;
4730 
4731         case 2: /* pair second */
4732             reg = new_args[arg_ct->pair_index] + 1;
4733             goto do_pair;
4734 
4735         case 3: /* ialias with second output, no first input */
4736             tcg_debug_assert(arg_ct->ialias);
4737             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4738 
4739             if (IS_DEAD_ARG(i) &&
4740                 !temp_readonly(ts) &&
4741                 ts->val_type == TEMP_VAL_REG &&
4742                 reg > 0 &&
4743                 s->reg_to_temp[reg - 1] == NULL &&
4744                 tcg_regset_test_reg(i_required_regs, reg) &&
4745                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4746                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4747                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4748                 break;
4749             }
4750             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4751                                      i_allocated_regs, 0,
4752                                      ts->indirect_base);
4753             tcg_regset_set_reg(i_allocated_regs, reg);
4754             reg += 1;
4755             goto do_pair;
4756 
4757         do_pair:
4758             /*
4759              * If an aliased input is not dead after the instruction,
4760              * we must allocate a new register and move it.
4761              */
4762             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4763                 TCGRegSet t_allocated_regs = i_allocated_regs;
4764 
4765                 /*
4766                  * Because of the alias, and the continued life, make sure
4767                  * that the temp is somewhere *other* than the reg pair,
4768                  * and we get a copy in reg.
4769                  */
4770                 tcg_regset_set_reg(t_allocated_regs, reg);
4771                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4772                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4773                     /* If ts was already in reg, copy it somewhere else. */
4774                     TCGReg nr;
4775                     bool ok;
4776 
4777                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4778                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4779                                        t_allocated_regs, 0, ts->indirect_base);
4780                     ok = tcg_out_mov(s, ts->type, nr, reg);
4781                     tcg_debug_assert(ok);
4782 
4783                     set_temp_val_reg(s, ts, nr);
4784                 } else {
4785                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4786                               t_allocated_regs, 0);
4787                     copyto_new_reg = true;
4788                 }
4789             } else {
4790                 /* Preferably allocate to reg, otherwise copy. */
4791                 i_required_regs = (TCGRegSet)1 << reg;
4792                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4793                           i_preferred_regs);
4794                 copyto_new_reg = ts->reg != reg;
4795             }
4796             break;
4797 
4798         default:
4799             g_assert_not_reached();
4800         }
4801 
4802         if (copyto_new_reg) {
4803             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4804                 /*
4805                  * Cross register class move not supported.  Sync the
4806                  * temp back to its slot and load from there.
4807                  */
4808                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4809                 tcg_out_ld(s, ts->type, reg,
4810                            ts->mem_base->reg, ts->mem_offset);
4811             }
4812         }
4813         new_args[i] = reg;
4814         const_args[i] = 0;
4815         tcg_regset_set_reg(i_allocated_regs, reg);
4816     }
4817 
4818     /* mark dead temporaries and free the associated registers */
4819     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4820         if (IS_DEAD_ARG(i)) {
4821             temp_dead(s, arg_temp(op->args[i]));
4822         }
4823     }
4824 
4825     if (def->flags & TCG_OPF_COND_BRANCH) {
4826         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4827     } else if (def->flags & TCG_OPF_BB_END) {
4828         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4829     } else {
4830         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4831             /* XXX: permit generic clobber register list ? */
4832             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4833                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4834                     tcg_reg_free(s, i, i_allocated_regs);
4835                 }
4836             }
4837         }
4838         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4839             /* sync globals if the op has side effects and might trigger
4840                an exception. */
4841             sync_globals(s, i_allocated_regs);
4842         }
4843 
4844         /* satisfy the output constraints */
4845         for(k = 0; k < nb_oargs; k++) {
4846             i = def->args_ct[k].sort_index;
4847             arg = op->args[i];
4848             arg_ct = &def->args_ct[i];
4849             ts = arg_temp(arg);
4850 
4851             /* ENV should not be modified.  */
4852             tcg_debug_assert(!temp_readonly(ts));
4853 
4854             switch (arg_ct->pair) {
4855             case 0: /* not paired */
4856                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4857                     reg = new_args[arg_ct->alias_index];
4858                 } else if (arg_ct->newreg) {
4859                     reg = tcg_reg_alloc(s, arg_ct->regs,
4860                                         i_allocated_regs | o_allocated_regs,
4861                                         output_pref(op, k), ts->indirect_base);
4862                 } else {
4863                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4864                                         output_pref(op, k), ts->indirect_base);
4865                 }
4866                 break;
4867 
4868             case 1: /* first of pair */
4869                 tcg_debug_assert(!arg_ct->newreg);
4870                 if (arg_ct->oalias) {
4871                     reg = new_args[arg_ct->alias_index];
4872                     break;
4873                 }
4874                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4875                                          output_pref(op, k), ts->indirect_base);
4876                 break;
4877 
4878             case 2: /* second of pair */
4879                 tcg_debug_assert(!arg_ct->newreg);
4880                 if (arg_ct->oalias) {
4881                     reg = new_args[arg_ct->alias_index];
4882                 } else {
4883                     reg = new_args[arg_ct->pair_index] + 1;
4884                 }
4885                 break;
4886 
4887             case 3: /* first of pair, aliasing with a second input */
4888                 tcg_debug_assert(!arg_ct->newreg);
4889                 reg = new_args[arg_ct->pair_index] - 1;
4890                 break;
4891 
4892             default:
4893                 g_assert_not_reached();
4894             }
4895             tcg_regset_set_reg(o_allocated_regs, reg);
4896             set_temp_val_reg(s, ts, reg);
4897             ts->mem_coherent = 0;
4898             new_args[i] = reg;
4899         }
4900     }
4901 
4902     /* emit instruction */
4903     switch (op->opc) {
4904     case INDEX_op_ext8s_i32:
4905         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4906         break;
4907     case INDEX_op_ext8s_i64:
4908         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4909         break;
4910     case INDEX_op_ext8u_i32:
4911     case INDEX_op_ext8u_i64:
4912         tcg_out_ext8u(s, new_args[0], new_args[1]);
4913         break;
4914     case INDEX_op_ext16s_i32:
4915         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
4916         break;
4917     case INDEX_op_ext16s_i64:
4918         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
4919         break;
4920     case INDEX_op_ext16u_i32:
4921     case INDEX_op_ext16u_i64:
4922         tcg_out_ext16u(s, new_args[0], new_args[1]);
4923         break;
4924     case INDEX_op_ext32s_i64:
4925         tcg_out_ext32s(s, new_args[0], new_args[1]);
4926         break;
4927     case INDEX_op_ext32u_i64:
4928         tcg_out_ext32u(s, new_args[0], new_args[1]);
4929         break;
4930     case INDEX_op_ext_i32_i64:
4931         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
4932         break;
4933     case INDEX_op_extu_i32_i64:
4934         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
4935         break;
4936     case INDEX_op_extrl_i64_i32:
4937         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
4938         break;
4939     default:
4940         if (def->flags & TCG_OPF_VECTOR) {
4941             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4942                            new_args, const_args);
4943         } else {
4944             tcg_out_op(s, op->opc, new_args, const_args);
4945         }
4946         break;
4947     }
4948 
4949     /* move the outputs in the correct register if needed */
4950     for(i = 0; i < nb_oargs; i++) {
4951         ts = arg_temp(op->args[i]);
4952 
4953         /* ENV should not be modified.  */
4954         tcg_debug_assert(!temp_readonly(ts));
4955 
4956         if (NEED_SYNC_ARG(i)) {
4957             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4958         } else if (IS_DEAD_ARG(i)) {
4959             temp_dead(s, ts);
4960         }
4961     }
4962 }
4963 
4964 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4965 {
4966     const TCGLifeData arg_life = op->life;
4967     TCGTemp *ots, *itsl, *itsh;
4968     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4969 
4970     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4971     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4972     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4973 
4974     ots = arg_temp(op->args[0]);
4975     itsl = arg_temp(op->args[1]);
4976     itsh = arg_temp(op->args[2]);
4977 
4978     /* ENV should not be modified.  */
4979     tcg_debug_assert(!temp_readonly(ots));
4980 
4981     /* Allocate the output register now.  */
4982     if (ots->val_type != TEMP_VAL_REG) {
4983         TCGRegSet allocated_regs = s->reserved_regs;
4984         TCGRegSet dup_out_regs =
4985             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4986         TCGReg oreg;
4987 
4988         /* Make sure to not spill the input registers. */
4989         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4990             tcg_regset_set_reg(allocated_regs, itsl->reg);
4991         }
4992         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4993             tcg_regset_set_reg(allocated_regs, itsh->reg);
4994         }
4995 
4996         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4997                              output_pref(op, 0), ots->indirect_base);
4998         set_temp_val_reg(s, ots, oreg);
4999     }
5000 
5001     /* Promote dup2 of immediates to dupi_vec. */
5002     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5003         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5004         MemOp vece = MO_64;
5005 
5006         if (val == dup_const(MO_8, val)) {
5007             vece = MO_8;
5008         } else if (val == dup_const(MO_16, val)) {
5009             vece = MO_16;
5010         } else if (val == dup_const(MO_32, val)) {
5011             vece = MO_32;
5012         }
5013 
5014         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5015         goto done;
5016     }
5017 
5018     /* If the two inputs form one 64-bit value, try dupm_vec. */
5019     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5020         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5021         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5022         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5023 
5024         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5025         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5026 
5027         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5028                              its->mem_base->reg, its->mem_offset)) {
5029             goto done;
5030         }
5031     }
5032 
5033     /* Fall back to generic expansion. */
5034     return false;
5035 
5036  done:
5037     ots->mem_coherent = 0;
5038     if (IS_DEAD_ARG(1)) {
5039         temp_dead(s, itsl);
5040     }
5041     if (IS_DEAD_ARG(2)) {
5042         temp_dead(s, itsh);
5043     }
5044     if (NEED_SYNC_ARG(0)) {
5045         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5046     } else if (IS_DEAD_ARG(0)) {
5047         temp_dead(s, ots);
5048     }
5049     return true;
5050 }
5051 
5052 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5053                          TCGRegSet allocated_regs)
5054 {
5055     if (ts->val_type == TEMP_VAL_REG) {
5056         if (ts->reg != reg) {
5057             tcg_reg_free(s, reg, allocated_regs);
5058             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5059                 /*
5060                  * Cross register class move not supported.  Sync the
5061                  * temp back to its slot and load from there.
5062                  */
5063                 temp_sync(s, ts, allocated_regs, 0, 0);
5064                 tcg_out_ld(s, ts->type, reg,
5065                            ts->mem_base->reg, ts->mem_offset);
5066             }
5067         }
5068     } else {
5069         TCGRegSet arg_set = 0;
5070 
5071         tcg_reg_free(s, reg, allocated_regs);
5072         tcg_regset_set_reg(arg_set, reg);
5073         temp_load(s, ts, arg_set, allocated_regs, 0);
5074     }
5075 }
5076 
5077 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5078                          TCGRegSet allocated_regs)
5079 {
5080     /*
5081      * When the destination is on the stack, load up the temp and store.
5082      * If there are many call-saved registers, the temp might live to
5083      * see another use; otherwise it'll be discarded.
5084      */
5085     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5086     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5087                arg_slot_stk_ofs(arg_slot));
5088 }
5089 
5090 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5091                             TCGTemp *ts, TCGRegSet *allocated_regs)
5092 {
5093     if (arg_slot_reg_p(l->arg_slot)) {
5094         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5095         load_arg_reg(s, reg, ts, *allocated_regs);
5096         tcg_regset_set_reg(*allocated_regs, reg);
5097     } else {
5098         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5099     }
5100 }
5101 
5102 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5103                          intptr_t ref_off, TCGRegSet *allocated_regs)
5104 {
5105     TCGReg reg;
5106 
5107     if (arg_slot_reg_p(arg_slot)) {
5108         reg = tcg_target_call_iarg_regs[arg_slot];
5109         tcg_reg_free(s, reg, *allocated_regs);
5110         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5111         tcg_regset_set_reg(*allocated_regs, reg);
5112     } else {
5113         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5114                             *allocated_regs, 0, false);
5115         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5116         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5117                    arg_slot_stk_ofs(arg_slot));
5118     }
5119 }
5120 
5121 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5122 {
5123     const int nb_oargs = TCGOP_CALLO(op);
5124     const int nb_iargs = TCGOP_CALLI(op);
5125     const TCGLifeData arg_life = op->life;
5126     const TCGHelperInfo *info = tcg_call_info(op);
5127     TCGRegSet allocated_regs = s->reserved_regs;
5128     int i;
5129 
5130     /*
5131      * Move inputs into place in reverse order,
5132      * so that we place stacked arguments first.
5133      */
5134     for (i = nb_iargs - 1; i >= 0; --i) {
5135         const TCGCallArgumentLoc *loc = &info->in[i];
5136         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5137 
5138         switch (loc->kind) {
5139         case TCG_CALL_ARG_NORMAL:
5140         case TCG_CALL_ARG_EXTEND_U:
5141         case TCG_CALL_ARG_EXTEND_S:
5142             load_arg_normal(s, loc, ts, &allocated_regs);
5143             break;
5144         case TCG_CALL_ARG_BY_REF:
5145             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5146             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5147                          arg_slot_stk_ofs(loc->ref_slot),
5148                          &allocated_regs);
5149             break;
5150         case TCG_CALL_ARG_BY_REF_N:
5151             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5152             break;
5153         default:
5154             g_assert_not_reached();
5155         }
5156     }
5157 
5158     /* Mark dead temporaries and free the associated registers.  */
5159     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5160         if (IS_DEAD_ARG(i)) {
5161             temp_dead(s, arg_temp(op->args[i]));
5162         }
5163     }
5164 
5165     /* Clobber call registers.  */
5166     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5167         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5168             tcg_reg_free(s, i, allocated_regs);
5169         }
5170     }
5171 
5172     /*
5173      * Save globals if they might be written by the helper,
5174      * sync them if they might be read.
5175      */
5176     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5177         /* Nothing to do */
5178     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5179         sync_globals(s, allocated_regs);
5180     } else {
5181         save_globals(s, allocated_regs);
5182     }
5183 
5184     /*
5185      * If the ABI passes a pointer to the returned struct as the first
5186      * argument, load that now.  Pass a pointer to the output home slot.
5187      */
5188     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5189         TCGTemp *ts = arg_temp(op->args[0]);
5190 
5191         if (!ts->mem_allocated) {
5192             temp_allocate_frame(s, ts);
5193         }
5194         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5195     }
5196 
5197     tcg_out_call(s, tcg_call_func(op), info);
5198 
5199     /* Assign output registers and emit moves if needed.  */
5200     switch (info->out_kind) {
5201     case TCG_CALL_RET_NORMAL:
5202         for (i = 0; i < nb_oargs; i++) {
5203             TCGTemp *ts = arg_temp(op->args[i]);
5204             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5205 
5206             /* ENV should not be modified.  */
5207             tcg_debug_assert(!temp_readonly(ts));
5208 
5209             set_temp_val_reg(s, ts, reg);
5210             ts->mem_coherent = 0;
5211         }
5212         break;
5213 
5214     case TCG_CALL_RET_BY_VEC:
5215         {
5216             TCGTemp *ts = arg_temp(op->args[0]);
5217 
5218             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5219             tcg_debug_assert(ts->temp_subindex == 0);
5220             if (!ts->mem_allocated) {
5221                 temp_allocate_frame(s, ts);
5222             }
5223             tcg_out_st(s, TCG_TYPE_V128,
5224                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5225                        ts->mem_base->reg, ts->mem_offset);
5226         }
5227         /* fall through to mark all parts in memory */
5228 
5229     case TCG_CALL_RET_BY_REF:
5230         /* The callee has performed a write through the reference. */
5231         for (i = 0; i < nb_oargs; i++) {
5232             TCGTemp *ts = arg_temp(op->args[i]);
5233             ts->val_type = TEMP_VAL_MEM;
5234         }
5235         break;
5236 
5237     default:
5238         g_assert_not_reached();
5239     }
5240 
5241     /* Flush or discard output registers as needed. */
5242     for (i = 0; i < nb_oargs; i++) {
5243         TCGTemp *ts = arg_temp(op->args[i]);
5244         if (NEED_SYNC_ARG(i)) {
5245             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5246         } else if (IS_DEAD_ARG(i)) {
5247             temp_dead(s, ts);
5248         }
5249     }
5250 }
5251 
5252 /**
5253  * atom_and_align_for_opc:
5254  * @s: tcg context
5255  * @opc: memory operation code
5256  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5257  * @allow_two_ops: true if we are prepared to issue two operations
5258  *
5259  * Return the alignment and atomicity to use for the inline fast path
5260  * for the given memory operation.  The alignment may be larger than
5261  * that specified in @opc, and the correct alignment will be diagnosed
5262  * by the slow path helper.
5263  *
5264  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5265  * and issue two loads or stores for subalignment.
5266  */
5267 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5268                                            MemOp host_atom, bool allow_two_ops)
5269 {
5270     MemOp align = get_alignment_bits(opc);
5271     MemOp size = opc & MO_SIZE;
5272     MemOp half = size ? size - 1 : 0;
5273     MemOp atmax;
5274     MemOp atom;
5275 
5276     /* When serialized, no further atomicity required.  */
5277     if (s->gen_tb->cflags & CF_PARALLEL) {
5278         atom = opc & MO_ATOM_MASK;
5279     } else {
5280         atom = MO_ATOM_NONE;
5281     }
5282 
5283     switch (atom) {
5284     case MO_ATOM_NONE:
5285         /* The operation requires no specific atomicity. */
5286         atmax = MO_8;
5287         break;
5288 
5289     case MO_ATOM_IFALIGN:
5290         atmax = size;
5291         break;
5292 
5293     case MO_ATOM_IFALIGN_PAIR:
5294         atmax = half;
5295         break;
5296 
5297     case MO_ATOM_WITHIN16:
5298         atmax = size;
5299         if (size == MO_128) {
5300             /* Misalignment implies !within16, and therefore no atomicity. */
5301         } else if (host_atom != MO_ATOM_WITHIN16) {
5302             /* The host does not implement within16, so require alignment. */
5303             align = MAX(align, size);
5304         }
5305         break;
5306 
5307     case MO_ATOM_WITHIN16_PAIR:
5308         atmax = size;
5309         /*
5310          * Misalignment implies !within16, and therefore half atomicity.
5311          * Any host prepared for two operations can implement this with
5312          * half alignment.
5313          */
5314         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5315             align = MAX(align, half);
5316         }
5317         break;
5318 
5319     case MO_ATOM_SUBALIGN:
5320         atmax = size;
5321         if (host_atom != MO_ATOM_SUBALIGN) {
5322             /* If unaligned but not odd, there are subobjects up to half. */
5323             if (allow_two_ops) {
5324                 align = MAX(align, half);
5325             } else {
5326                 align = MAX(align, size);
5327             }
5328         }
5329         break;
5330 
5331     default:
5332         g_assert_not_reached();
5333     }
5334 
5335     return (TCGAtomAlign){ .atom = atmax, .align = align };
5336 }
5337 
5338 /*
5339  * Similarly for qemu_ld/st slow path helpers.
5340  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5341  * using only the provided backend tcg_out_* functions.
5342  */
5343 
5344 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5345 {
5346     int ofs = arg_slot_stk_ofs(slot);
5347 
5348     /*
5349      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5350      * require extension to uint64_t, adjust the address for uint32_t.
5351      */
5352     if (HOST_BIG_ENDIAN &&
5353         TCG_TARGET_REG_BITS == 64 &&
5354         type == TCG_TYPE_I32) {
5355         ofs += 4;
5356     }
5357     return ofs;
5358 }
5359 
5360 static void tcg_out_helper_load_slots(TCGContext *s,
5361                                       unsigned nmov, TCGMovExtend *mov,
5362                                       const TCGLdstHelperParam *parm)
5363 {
5364     unsigned i;
5365     TCGReg dst3;
5366 
5367     /*
5368      * Start from the end, storing to the stack first.
5369      * This frees those registers, so we need not consider overlap.
5370      */
5371     for (i = nmov; i-- > 0; ) {
5372         unsigned slot = mov[i].dst;
5373 
5374         if (arg_slot_reg_p(slot)) {
5375             goto found_reg;
5376         }
5377 
5378         TCGReg src = mov[i].src;
5379         TCGType dst_type = mov[i].dst_type;
5380         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5381 
5382         /* The argument is going onto the stack; extend into scratch. */
5383         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5384             tcg_debug_assert(parm->ntmp != 0);
5385             mov[i].dst = src = parm->tmp[0];
5386             tcg_out_movext1(s, &mov[i]);
5387         }
5388 
5389         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5390                    tcg_out_helper_stk_ofs(dst_type, slot));
5391     }
5392     return;
5393 
5394  found_reg:
5395     /*
5396      * The remaining arguments are in registers.
5397      * Convert slot numbers to argument registers.
5398      */
5399     nmov = i + 1;
5400     for (i = 0; i < nmov; ++i) {
5401         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5402     }
5403 
5404     switch (nmov) {
5405     case 4:
5406         /* The backend must have provided enough temps for the worst case. */
5407         tcg_debug_assert(parm->ntmp >= 2);
5408 
5409         dst3 = mov[3].dst;
5410         for (unsigned j = 0; j < 3; ++j) {
5411             if (dst3 == mov[j].src) {
5412                 /*
5413                  * Conflict. Copy the source to a temporary, perform the
5414                  * remaining moves, then the extension from our scratch
5415                  * on the way out.
5416                  */
5417                 TCGReg scratch = parm->tmp[1];
5418 
5419                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5420                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5421                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5422                 break;
5423             }
5424         }
5425 
5426         /* No conflicts: perform this move and continue. */
5427         tcg_out_movext1(s, &mov[3]);
5428         /* fall through */
5429 
5430     case 3:
5431         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5432                         parm->ntmp ? parm->tmp[0] : -1);
5433         break;
5434     case 2:
5435         tcg_out_movext2(s, mov, mov + 1,
5436                         parm->ntmp ? parm->tmp[0] : -1);
5437         break;
5438     case 1:
5439         tcg_out_movext1(s, mov);
5440         break;
5441     default:
5442         g_assert_not_reached();
5443     }
5444 }
5445 
5446 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5447                                     TCGType type, tcg_target_long imm,
5448                                     const TCGLdstHelperParam *parm)
5449 {
5450     if (arg_slot_reg_p(slot)) {
5451         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5452     } else {
5453         int ofs = tcg_out_helper_stk_ofs(type, slot);
5454         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5455             tcg_debug_assert(parm->ntmp != 0);
5456             tcg_out_movi(s, type, parm->tmp[0], imm);
5457             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5458         }
5459     }
5460 }
5461 
5462 static void tcg_out_helper_load_common_args(TCGContext *s,
5463                                             const TCGLabelQemuLdst *ldst,
5464                                             const TCGLdstHelperParam *parm,
5465                                             const TCGHelperInfo *info,
5466                                             unsigned next_arg)
5467 {
5468     TCGMovExtend ptr_mov = {
5469         .dst_type = TCG_TYPE_PTR,
5470         .src_type = TCG_TYPE_PTR,
5471         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5472     };
5473     const TCGCallArgumentLoc *loc = &info->in[0];
5474     TCGType type;
5475     unsigned slot;
5476     tcg_target_ulong imm;
5477 
5478     /*
5479      * Handle env, which is always first.
5480      */
5481     ptr_mov.dst = loc->arg_slot;
5482     ptr_mov.src = TCG_AREG0;
5483     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5484 
5485     /*
5486      * Handle oi.
5487      */
5488     imm = ldst->oi;
5489     loc = &info->in[next_arg];
5490     type = TCG_TYPE_I32;
5491     switch (loc->kind) {
5492     case TCG_CALL_ARG_NORMAL:
5493         break;
5494     case TCG_CALL_ARG_EXTEND_U:
5495     case TCG_CALL_ARG_EXTEND_S:
5496         /* No extension required for MemOpIdx. */
5497         tcg_debug_assert(imm <= INT32_MAX);
5498         type = TCG_TYPE_REG;
5499         break;
5500     default:
5501         g_assert_not_reached();
5502     }
5503     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5504     next_arg++;
5505 
5506     /*
5507      * Handle ra.
5508      */
5509     loc = &info->in[next_arg];
5510     slot = loc->arg_slot;
5511     if (parm->ra_gen) {
5512         int arg_reg = -1;
5513         TCGReg ra_reg;
5514 
5515         if (arg_slot_reg_p(slot)) {
5516             arg_reg = tcg_target_call_iarg_regs[slot];
5517         }
5518         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5519 
5520         ptr_mov.dst = slot;
5521         ptr_mov.src = ra_reg;
5522         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5523     } else {
5524         imm = (uintptr_t)ldst->raddr;
5525         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5526     }
5527 }
5528 
5529 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5530                                        const TCGCallArgumentLoc *loc,
5531                                        TCGType dst_type, TCGType src_type,
5532                                        TCGReg lo, TCGReg hi)
5533 {
5534     MemOp reg_mo;
5535 
5536     if (dst_type <= TCG_TYPE_REG) {
5537         MemOp src_ext;
5538 
5539         switch (loc->kind) {
5540         case TCG_CALL_ARG_NORMAL:
5541             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5542             break;
5543         case TCG_CALL_ARG_EXTEND_U:
5544             dst_type = TCG_TYPE_REG;
5545             src_ext = MO_UL;
5546             break;
5547         case TCG_CALL_ARG_EXTEND_S:
5548             dst_type = TCG_TYPE_REG;
5549             src_ext = MO_SL;
5550             break;
5551         default:
5552             g_assert_not_reached();
5553         }
5554 
5555         mov[0].dst = loc->arg_slot;
5556         mov[0].dst_type = dst_type;
5557         mov[0].src = lo;
5558         mov[0].src_type = src_type;
5559         mov[0].src_ext = src_ext;
5560         return 1;
5561     }
5562 
5563     if (TCG_TARGET_REG_BITS == 32) {
5564         assert(dst_type == TCG_TYPE_I64);
5565         reg_mo = MO_32;
5566     } else {
5567         assert(dst_type == TCG_TYPE_I128);
5568         reg_mo = MO_64;
5569     }
5570 
5571     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5572     mov[0].src = lo;
5573     mov[0].dst_type = TCG_TYPE_REG;
5574     mov[0].src_type = TCG_TYPE_REG;
5575     mov[0].src_ext = reg_mo;
5576 
5577     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5578     mov[1].src = hi;
5579     mov[1].dst_type = TCG_TYPE_REG;
5580     mov[1].src_type = TCG_TYPE_REG;
5581     mov[1].src_ext = reg_mo;
5582 
5583     return 2;
5584 }
5585 
5586 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5587                                    const TCGLdstHelperParam *parm)
5588 {
5589     const TCGHelperInfo *info;
5590     const TCGCallArgumentLoc *loc;
5591     TCGMovExtend mov[2];
5592     unsigned next_arg, nmov;
5593     MemOp mop = get_memop(ldst->oi);
5594 
5595     switch (mop & MO_SIZE) {
5596     case MO_8:
5597     case MO_16:
5598     case MO_32:
5599         info = &info_helper_ld32_mmu;
5600         break;
5601     case MO_64:
5602         info = &info_helper_ld64_mmu;
5603         break;
5604     case MO_128:
5605         info = &info_helper_ld128_mmu;
5606         break;
5607     default:
5608         g_assert_not_reached();
5609     }
5610 
5611     /* Defer env argument. */
5612     next_arg = 1;
5613 
5614     loc = &info->in[next_arg];
5615     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5616         /*
5617          * 32-bit host with 32-bit guest: zero-extend the guest address
5618          * to 64-bits for the helper by storing the low part, then
5619          * load a zero for the high part.
5620          */
5621         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5622                                TCG_TYPE_I32, TCG_TYPE_I32,
5623                                ldst->addrlo_reg, -1);
5624         tcg_out_helper_load_slots(s, 1, mov, parm);
5625 
5626         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5627                                 TCG_TYPE_I32, 0, parm);
5628         next_arg += 2;
5629     } else {
5630         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5631                                       ldst->addrlo_reg, ldst->addrhi_reg);
5632         tcg_out_helper_load_slots(s, nmov, mov, parm);
5633         next_arg += nmov;
5634     }
5635 
5636     switch (info->out_kind) {
5637     case TCG_CALL_RET_NORMAL:
5638     case TCG_CALL_RET_BY_VEC:
5639         break;
5640     case TCG_CALL_RET_BY_REF:
5641         /*
5642          * The return reference is in the first argument slot.
5643          * We need memory in which to return: re-use the top of stack.
5644          */
5645         {
5646             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5647 
5648             if (arg_slot_reg_p(0)) {
5649                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5650                                  TCG_REG_CALL_STACK, ofs_slot0);
5651             } else {
5652                 tcg_debug_assert(parm->ntmp != 0);
5653                 tcg_out_addi_ptr(s, parm->tmp[0],
5654                                  TCG_REG_CALL_STACK, ofs_slot0);
5655                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5656                            TCG_REG_CALL_STACK, ofs_slot0);
5657             }
5658         }
5659         break;
5660     default:
5661         g_assert_not_reached();
5662     }
5663 
5664     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5665 }
5666 
5667 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5668                                   bool load_sign,
5669                                   const TCGLdstHelperParam *parm)
5670 {
5671     MemOp mop = get_memop(ldst->oi);
5672     TCGMovExtend mov[2];
5673     int ofs_slot0;
5674 
5675     switch (ldst->type) {
5676     case TCG_TYPE_I64:
5677         if (TCG_TARGET_REG_BITS == 32) {
5678             break;
5679         }
5680         /* fall through */
5681 
5682     case TCG_TYPE_I32:
5683         mov[0].dst = ldst->datalo_reg;
5684         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5685         mov[0].dst_type = ldst->type;
5686         mov[0].src_type = TCG_TYPE_REG;
5687 
5688         /*
5689          * If load_sign, then we allowed the helper to perform the
5690          * appropriate sign extension to tcg_target_ulong, and all
5691          * we need now is a plain move.
5692          *
5693          * If they do not, then we expect the relevant extension
5694          * instruction to be no more expensive than a move, and
5695          * we thus save the icache etc by only using one of two
5696          * helper functions.
5697          */
5698         if (load_sign || !(mop & MO_SIGN)) {
5699             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5700                 mov[0].src_ext = MO_32;
5701             } else {
5702                 mov[0].src_ext = MO_64;
5703             }
5704         } else {
5705             mov[0].src_ext = mop & MO_SSIZE;
5706         }
5707         tcg_out_movext1(s, mov);
5708         return;
5709 
5710     case TCG_TYPE_I128:
5711         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5712         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5713         switch (TCG_TARGET_CALL_RET_I128) {
5714         case TCG_CALL_RET_NORMAL:
5715             break;
5716         case TCG_CALL_RET_BY_VEC:
5717             tcg_out_st(s, TCG_TYPE_V128,
5718                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5719                        TCG_REG_CALL_STACK, ofs_slot0);
5720             /* fall through */
5721         case TCG_CALL_RET_BY_REF:
5722             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5723                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5724             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5725                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5726             return;
5727         default:
5728             g_assert_not_reached();
5729         }
5730         break;
5731 
5732     default:
5733         g_assert_not_reached();
5734     }
5735 
5736     mov[0].dst = ldst->datalo_reg;
5737     mov[0].src =
5738         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5739     mov[0].dst_type = TCG_TYPE_REG;
5740     mov[0].src_type = TCG_TYPE_REG;
5741     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5742 
5743     mov[1].dst = ldst->datahi_reg;
5744     mov[1].src =
5745         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5746     mov[1].dst_type = TCG_TYPE_REG;
5747     mov[1].src_type = TCG_TYPE_REG;
5748     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5749 
5750     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5751 }
5752 
5753 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5754                                    const TCGLdstHelperParam *parm)
5755 {
5756     const TCGHelperInfo *info;
5757     const TCGCallArgumentLoc *loc;
5758     TCGMovExtend mov[4];
5759     TCGType data_type;
5760     unsigned next_arg, nmov, n;
5761     MemOp mop = get_memop(ldst->oi);
5762 
5763     switch (mop & MO_SIZE) {
5764     case MO_8:
5765     case MO_16:
5766     case MO_32:
5767         info = &info_helper_st32_mmu;
5768         data_type = TCG_TYPE_I32;
5769         break;
5770     case MO_64:
5771         info = &info_helper_st64_mmu;
5772         data_type = TCG_TYPE_I64;
5773         break;
5774     case MO_128:
5775         info = &info_helper_st128_mmu;
5776         data_type = TCG_TYPE_I128;
5777         break;
5778     default:
5779         g_assert_not_reached();
5780     }
5781 
5782     /* Defer env argument. */
5783     next_arg = 1;
5784     nmov = 0;
5785 
5786     /* Handle addr argument. */
5787     loc = &info->in[next_arg];
5788     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5789         /*
5790          * 32-bit host with 32-bit guest: zero-extend the guest address
5791          * to 64-bits for the helper by storing the low part.  Later,
5792          * after we have processed the register inputs, we will load a
5793          * zero for the high part.
5794          */
5795         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5796                                TCG_TYPE_I32, TCG_TYPE_I32,
5797                                ldst->addrlo_reg, -1);
5798         next_arg += 2;
5799         nmov += 1;
5800     } else {
5801         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5802                                    ldst->addrlo_reg, ldst->addrhi_reg);
5803         next_arg += n;
5804         nmov += n;
5805     }
5806 
5807     /* Handle data argument. */
5808     loc = &info->in[next_arg];
5809     switch (loc->kind) {
5810     case TCG_CALL_ARG_NORMAL:
5811     case TCG_CALL_ARG_EXTEND_U:
5812     case TCG_CALL_ARG_EXTEND_S:
5813         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
5814                                    ldst->datalo_reg, ldst->datahi_reg);
5815         next_arg += n;
5816         nmov += n;
5817         tcg_out_helper_load_slots(s, nmov, mov, parm);
5818         break;
5819 
5820     case TCG_CALL_ARG_BY_REF:
5821         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5822         tcg_debug_assert(data_type == TCG_TYPE_I128);
5823         tcg_out_st(s, TCG_TYPE_I64,
5824                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
5825                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
5826         tcg_out_st(s, TCG_TYPE_I64,
5827                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
5828                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
5829 
5830         tcg_out_helper_load_slots(s, nmov, mov, parm);
5831 
5832         if (arg_slot_reg_p(loc->arg_slot)) {
5833             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
5834                              TCG_REG_CALL_STACK,
5835                              arg_slot_stk_ofs(loc->ref_slot));
5836         } else {
5837             tcg_debug_assert(parm->ntmp != 0);
5838             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
5839                              arg_slot_stk_ofs(loc->ref_slot));
5840             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5841                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
5842         }
5843         next_arg += 2;
5844         break;
5845 
5846     default:
5847         g_assert_not_reached();
5848     }
5849 
5850     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5851         /* Zero extend the address by loading a zero for the high part. */
5852         loc = &info->in[1 + !HOST_BIG_ENDIAN];
5853         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
5854     }
5855 
5856     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5857 }
5858 
5859 #ifdef CONFIG_PROFILER
5860 
5861 /* avoid copy/paste errors */
5862 #define PROF_ADD(to, from, field)                       \
5863     do {                                                \
5864         (to)->field += qatomic_read(&((from)->field));  \
5865     } while (0)
5866 
5867 #define PROF_MAX(to, from, field)                                       \
5868     do {                                                                \
5869         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
5870         if (val__ > (to)->field) {                                      \
5871             (to)->field = val__;                                        \
5872         }                                                               \
5873     } while (0)
5874 
5875 /* Pass in a zero'ed @prof */
5876 static inline
5877 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
5878 {
5879     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5880     unsigned int i;
5881 
5882     for (i = 0; i < n_ctxs; i++) {
5883         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5884         const TCGProfile *orig = &s->prof;
5885 
5886         if (counters) {
5887             PROF_ADD(prof, orig, cpu_exec_time);
5888             PROF_ADD(prof, orig, tb_count1);
5889             PROF_ADD(prof, orig, tb_count);
5890             PROF_ADD(prof, orig, op_count);
5891             PROF_MAX(prof, orig, op_count_max);
5892             PROF_ADD(prof, orig, temp_count);
5893             PROF_MAX(prof, orig, temp_count_max);
5894             PROF_ADD(prof, orig, del_op_count);
5895             PROF_ADD(prof, orig, code_in_len);
5896             PROF_ADD(prof, orig, code_out_len);
5897             PROF_ADD(prof, orig, search_out_len);
5898             PROF_ADD(prof, orig, interm_time);
5899             PROF_ADD(prof, orig, code_time);
5900             PROF_ADD(prof, orig, la_time);
5901             PROF_ADD(prof, orig, opt_time);
5902             PROF_ADD(prof, orig, restore_count);
5903             PROF_ADD(prof, orig, restore_time);
5904         }
5905         if (table) {
5906             int i;
5907 
5908             for (i = 0; i < NB_OPS; i++) {
5909                 PROF_ADD(prof, orig, table_op_count[i]);
5910             }
5911         }
5912     }
5913 }
5914 
5915 #undef PROF_ADD
5916 #undef PROF_MAX
5917 
5918 static void tcg_profile_snapshot_counters(TCGProfile *prof)
5919 {
5920     tcg_profile_snapshot(prof, true, false);
5921 }
5922 
5923 static void tcg_profile_snapshot_table(TCGProfile *prof)
5924 {
5925     tcg_profile_snapshot(prof, false, true);
5926 }
5927 
5928 void tcg_dump_op_count(GString *buf)
5929 {
5930     TCGProfile prof = {};
5931     int i;
5932 
5933     tcg_profile_snapshot_table(&prof);
5934     for (i = 0; i < NB_OPS; i++) {
5935         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
5936                                prof.table_op_count[i]);
5937     }
5938 }
5939 
5940 int64_t tcg_cpu_exec_time(void)
5941 {
5942     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
5943     unsigned int i;
5944     int64_t ret = 0;
5945 
5946     for (i = 0; i < n_ctxs; i++) {
5947         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
5948         const TCGProfile *prof = &s->prof;
5949 
5950         ret += qatomic_read(&prof->cpu_exec_time);
5951     }
5952     return ret;
5953 }
5954 #else
5955 void tcg_dump_op_count(GString *buf)
5956 {
5957     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5958 }
5959 
5960 int64_t tcg_cpu_exec_time(void)
5961 {
5962     error_report("%s: TCG profiler not compiled", __func__);
5963     exit(EXIT_FAILURE);
5964 }
5965 #endif
5966 
5967 
5968 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
5969 {
5970 #ifdef CONFIG_PROFILER
5971     TCGProfile *prof = &s->prof;
5972 #endif
5973     int i, num_insns;
5974     TCGOp *op;
5975 
5976 #ifdef CONFIG_PROFILER
5977     {
5978         int n = 0;
5979 
5980         QTAILQ_FOREACH(op, &s->ops, link) {
5981             n++;
5982         }
5983         qatomic_set(&prof->op_count, prof->op_count + n);
5984         if (n > prof->op_count_max) {
5985             qatomic_set(&prof->op_count_max, n);
5986         }
5987 
5988         n = s->nb_temps;
5989         qatomic_set(&prof->temp_count, prof->temp_count + n);
5990         if (n > prof->temp_count_max) {
5991             qatomic_set(&prof->temp_count_max, n);
5992         }
5993     }
5994 #endif
5995 
5996     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
5997                  && qemu_log_in_addr_range(pc_start))) {
5998         FILE *logfile = qemu_log_trylock();
5999         if (logfile) {
6000             fprintf(logfile, "OP:\n");
6001             tcg_dump_ops(s, logfile, false);
6002             fprintf(logfile, "\n");
6003             qemu_log_unlock(logfile);
6004         }
6005     }
6006 
6007 #ifdef CONFIG_DEBUG_TCG
6008     /* Ensure all labels referenced have been emitted.  */
6009     {
6010         TCGLabel *l;
6011         bool error = false;
6012 
6013         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6014             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6015                 qemu_log_mask(CPU_LOG_TB_OP,
6016                               "$L%d referenced but not present.\n", l->id);
6017                 error = true;
6018             }
6019         }
6020         assert(!error);
6021     }
6022 #endif
6023 
6024 #ifdef CONFIG_PROFILER
6025     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
6026 #endif
6027 
6028     tcg_optimize(s);
6029 
6030 #ifdef CONFIG_PROFILER
6031     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
6032     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
6033 #endif
6034 
6035     reachable_code_pass(s);
6036     liveness_pass_0(s);
6037     liveness_pass_1(s);
6038 
6039     if (s->nb_indirects > 0) {
6040         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6041                      && qemu_log_in_addr_range(pc_start))) {
6042             FILE *logfile = qemu_log_trylock();
6043             if (logfile) {
6044                 fprintf(logfile, "OP before indirect lowering:\n");
6045                 tcg_dump_ops(s, logfile, false);
6046                 fprintf(logfile, "\n");
6047                 qemu_log_unlock(logfile);
6048             }
6049         }
6050 
6051         /* Replace indirect temps with direct temps.  */
6052         if (liveness_pass_2(s)) {
6053             /* If changes were made, re-run liveness.  */
6054             liveness_pass_1(s);
6055         }
6056     }
6057 
6058 #ifdef CONFIG_PROFILER
6059     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
6060 #endif
6061 
6062     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6063                  && qemu_log_in_addr_range(pc_start))) {
6064         FILE *logfile = qemu_log_trylock();
6065         if (logfile) {
6066             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6067             tcg_dump_ops(s, logfile, true);
6068             fprintf(logfile, "\n");
6069             qemu_log_unlock(logfile);
6070         }
6071     }
6072 
6073     /* Initialize goto_tb jump offsets. */
6074     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6075     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6076     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6077     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6078 
6079     tcg_reg_alloc_start(s);
6080 
6081     /*
6082      * Reset the buffer pointers when restarting after overflow.
6083      * TODO: Move this into translate-all.c with the rest of the
6084      * buffer management.  Having only this done here is confusing.
6085      */
6086     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6087     s->code_ptr = s->code_buf;
6088 
6089 #ifdef TCG_TARGET_NEED_LDST_LABELS
6090     QSIMPLEQ_INIT(&s->ldst_labels);
6091 #endif
6092 #ifdef TCG_TARGET_NEED_POOL_LABELS
6093     s->pool_labels = NULL;
6094 #endif
6095 
6096     num_insns = -1;
6097     QTAILQ_FOREACH(op, &s->ops, link) {
6098         TCGOpcode opc = op->opc;
6099 
6100 #ifdef CONFIG_PROFILER
6101         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
6102 #endif
6103 
6104         switch (opc) {
6105         case INDEX_op_mov_i32:
6106         case INDEX_op_mov_i64:
6107         case INDEX_op_mov_vec:
6108             tcg_reg_alloc_mov(s, op);
6109             break;
6110         case INDEX_op_dup_vec:
6111             tcg_reg_alloc_dup(s, op);
6112             break;
6113         case INDEX_op_insn_start:
6114             if (num_insns >= 0) {
6115                 size_t off = tcg_current_code_size(s);
6116                 s->gen_insn_end_off[num_insns] = off;
6117                 /* Assert that we do not overflow our stored offset.  */
6118                 assert(s->gen_insn_end_off[num_insns] == off);
6119             }
6120             num_insns++;
6121             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
6122                 s->gen_insn_data[num_insns][i] =
6123                     tcg_get_insn_start_param(op, i);
6124             }
6125             break;
6126         case INDEX_op_discard:
6127             temp_dead(s, arg_temp(op->args[0]));
6128             break;
6129         case INDEX_op_set_label:
6130             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6131             tcg_out_label(s, arg_label(op->args[0]));
6132             break;
6133         case INDEX_op_call:
6134             tcg_reg_alloc_call(s, op);
6135             break;
6136         case INDEX_op_exit_tb:
6137             tcg_out_exit_tb(s, op->args[0]);
6138             break;
6139         case INDEX_op_goto_tb:
6140             tcg_out_goto_tb(s, op->args[0]);
6141             break;
6142         case INDEX_op_dup2_vec:
6143             if (tcg_reg_alloc_dup2(s, op)) {
6144                 break;
6145             }
6146             /* fall through */
6147         default:
6148             /* Sanity check that we've not introduced any unhandled opcodes. */
6149             tcg_debug_assert(tcg_op_supported(opc));
6150             /* Note: in order to speed up the code, it would be much
6151                faster to have specialized register allocator functions for
6152                some common argument patterns */
6153             tcg_reg_alloc_op(s, op);
6154             break;
6155         }
6156         /* Test for (pending) buffer overflow.  The assumption is that any
6157            one operation beginning below the high water mark cannot overrun
6158            the buffer completely.  Thus we can test for overflow after
6159            generating code without having to check during generation.  */
6160         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6161             return -1;
6162         }
6163         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6164         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6165             return -2;
6166         }
6167     }
6168     tcg_debug_assert(num_insns >= 0);
6169     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6170 
6171     /* Generate TB finalization at the end of block */
6172 #ifdef TCG_TARGET_NEED_LDST_LABELS
6173     i = tcg_out_ldst_finalize(s);
6174     if (i < 0) {
6175         return i;
6176     }
6177 #endif
6178 #ifdef TCG_TARGET_NEED_POOL_LABELS
6179     i = tcg_out_pool_finalize(s);
6180     if (i < 0) {
6181         return i;
6182     }
6183 #endif
6184     if (!tcg_resolve_relocs(s)) {
6185         return -2;
6186     }
6187 
6188 #ifndef CONFIG_TCG_INTERPRETER
6189     /* flush instruction cache */
6190     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6191                         (uintptr_t)s->code_buf,
6192                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6193 #endif
6194 
6195     return tcg_current_code_size(s);
6196 }
6197 
6198 #ifdef CONFIG_PROFILER
6199 void tcg_dump_info(GString *buf)
6200 {
6201     TCGProfile prof = {};
6202     const TCGProfile *s;
6203     int64_t tb_count;
6204     int64_t tb_div_count;
6205     int64_t tot;
6206 
6207     tcg_profile_snapshot_counters(&prof);
6208     s = &prof;
6209     tb_count = s->tb_count;
6210     tb_div_count = tb_count ? tb_count : 1;
6211     tot = s->interm_time + s->code_time;
6212 
6213     g_string_append_printf(buf, "JIT cycles          %" PRId64
6214                            " (%0.3f s at 2.4 GHz)\n",
6215                            tot, tot / 2.4e9);
6216     g_string_append_printf(buf, "translated TBs      %" PRId64
6217                            " (aborted=%" PRId64 " %0.1f%%)\n",
6218                            tb_count, s->tb_count1 - tb_count,
6219                            (double)(s->tb_count1 - s->tb_count)
6220                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
6221     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
6222                            (double)s->op_count / tb_div_count, s->op_count_max);
6223     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
6224                            (double)s->del_op_count / tb_div_count);
6225     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
6226                            (double)s->temp_count / tb_div_count,
6227                            s->temp_count_max);
6228     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
6229                            (double)s->code_out_len / tb_div_count);
6230     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
6231                            (double)s->search_out_len / tb_div_count);
6232 
6233     g_string_append_printf(buf, "cycles/op           %0.1f\n",
6234                            s->op_count ? (double)tot / s->op_count : 0);
6235     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
6236                            s->code_in_len ? (double)tot / s->code_in_len : 0);
6237     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
6238                            s->code_out_len ? (double)tot / s->code_out_len : 0);
6239     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
6240                            s->search_out_len ?
6241                            (double)tot / s->search_out_len : 0);
6242     if (tot == 0) {
6243         tot = 1;
6244     }
6245     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
6246                            (double)s->interm_time / tot * 100.0);
6247     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
6248                            (double)s->code_time / tot * 100.0);
6249     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
6250                            (double)s->opt_time / (s->code_time ?
6251                                                   s->code_time : 1)
6252                            * 100.0);
6253     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
6254                            (double)s->la_time / (s->code_time ?
6255                                                  s->code_time : 1) * 100.0);
6256     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
6257                            s->restore_count);
6258     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
6259                            s->restore_count ?
6260                            (double)s->restore_time / s->restore_count : 0);
6261 }
6262 #else
6263 void tcg_dump_info(GString *buf)
6264 {
6265     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
6266 }
6267 #endif
6268 
6269 #ifdef ELF_HOST_MACHINE
6270 /* In order to use this feature, the backend needs to do three things:
6271 
6272    (1) Define ELF_HOST_MACHINE to indicate both what value to
6273        put into the ELF image and to indicate support for the feature.
6274 
6275    (2) Define tcg_register_jit.  This should create a buffer containing
6276        the contents of a .debug_frame section that describes the post-
6277        prologue unwind info for the tcg machine.
6278 
6279    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6280 */
6281 
6282 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6283 typedef enum {
6284     JIT_NOACTION = 0,
6285     JIT_REGISTER_FN,
6286     JIT_UNREGISTER_FN
6287 } jit_actions_t;
6288 
6289 struct jit_code_entry {
6290     struct jit_code_entry *next_entry;
6291     struct jit_code_entry *prev_entry;
6292     const void *symfile_addr;
6293     uint64_t symfile_size;
6294 };
6295 
6296 struct jit_descriptor {
6297     uint32_t version;
6298     uint32_t action_flag;
6299     struct jit_code_entry *relevant_entry;
6300     struct jit_code_entry *first_entry;
6301 };
6302 
6303 void __jit_debug_register_code(void) __attribute__((noinline));
6304 void __jit_debug_register_code(void)
6305 {
6306     asm("");
6307 }
6308 
6309 /* Must statically initialize the version, because GDB may check
6310    the version before we can set it.  */
6311 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6312 
6313 /* End GDB interface.  */
6314 
6315 static int find_string(const char *strtab, const char *str)
6316 {
6317     const char *p = strtab + 1;
6318 
6319     while (1) {
6320         if (strcmp(p, str) == 0) {
6321             return p - strtab;
6322         }
6323         p += strlen(p) + 1;
6324     }
6325 }
6326 
6327 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6328                                  const void *debug_frame,
6329                                  size_t debug_frame_size)
6330 {
6331     struct __attribute__((packed)) DebugInfo {
6332         uint32_t  len;
6333         uint16_t  version;
6334         uint32_t  abbrev;
6335         uint8_t   ptr_size;
6336         uint8_t   cu_die;
6337         uint16_t  cu_lang;
6338         uintptr_t cu_low_pc;
6339         uintptr_t cu_high_pc;
6340         uint8_t   fn_die;
6341         char      fn_name[16];
6342         uintptr_t fn_low_pc;
6343         uintptr_t fn_high_pc;
6344         uint8_t   cu_eoc;
6345     };
6346 
6347     struct ElfImage {
6348         ElfW(Ehdr) ehdr;
6349         ElfW(Phdr) phdr;
6350         ElfW(Shdr) shdr[7];
6351         ElfW(Sym)  sym[2];
6352         struct DebugInfo di;
6353         uint8_t    da[24];
6354         char       str[80];
6355     };
6356 
6357     struct ElfImage *img;
6358 
6359     static const struct ElfImage img_template = {
6360         .ehdr = {
6361             .e_ident[EI_MAG0] = ELFMAG0,
6362             .e_ident[EI_MAG1] = ELFMAG1,
6363             .e_ident[EI_MAG2] = ELFMAG2,
6364             .e_ident[EI_MAG3] = ELFMAG3,
6365             .e_ident[EI_CLASS] = ELF_CLASS,
6366             .e_ident[EI_DATA] = ELF_DATA,
6367             .e_ident[EI_VERSION] = EV_CURRENT,
6368             .e_type = ET_EXEC,
6369             .e_machine = ELF_HOST_MACHINE,
6370             .e_version = EV_CURRENT,
6371             .e_phoff = offsetof(struct ElfImage, phdr),
6372             .e_shoff = offsetof(struct ElfImage, shdr),
6373             .e_ehsize = sizeof(ElfW(Shdr)),
6374             .e_phentsize = sizeof(ElfW(Phdr)),
6375             .e_phnum = 1,
6376             .e_shentsize = sizeof(ElfW(Shdr)),
6377             .e_shnum = ARRAY_SIZE(img->shdr),
6378             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6379 #ifdef ELF_HOST_FLAGS
6380             .e_flags = ELF_HOST_FLAGS,
6381 #endif
6382 #ifdef ELF_OSABI
6383             .e_ident[EI_OSABI] = ELF_OSABI,
6384 #endif
6385         },
6386         .phdr = {
6387             .p_type = PT_LOAD,
6388             .p_flags = PF_X,
6389         },
6390         .shdr = {
6391             [0] = { .sh_type = SHT_NULL },
6392             /* Trick: The contents of code_gen_buffer are not present in
6393                this fake ELF file; that got allocated elsewhere.  Therefore
6394                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6395                will not look for contents.  We can record any address.  */
6396             [1] = { /* .text */
6397                 .sh_type = SHT_NOBITS,
6398                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6399             },
6400             [2] = { /* .debug_info */
6401                 .sh_type = SHT_PROGBITS,
6402                 .sh_offset = offsetof(struct ElfImage, di),
6403                 .sh_size = sizeof(struct DebugInfo),
6404             },
6405             [3] = { /* .debug_abbrev */
6406                 .sh_type = SHT_PROGBITS,
6407                 .sh_offset = offsetof(struct ElfImage, da),
6408                 .sh_size = sizeof(img->da),
6409             },
6410             [4] = { /* .debug_frame */
6411                 .sh_type = SHT_PROGBITS,
6412                 .sh_offset = sizeof(struct ElfImage),
6413             },
6414             [5] = { /* .symtab */
6415                 .sh_type = SHT_SYMTAB,
6416                 .sh_offset = offsetof(struct ElfImage, sym),
6417                 .sh_size = sizeof(img->sym),
6418                 .sh_info = 1,
6419                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6420                 .sh_entsize = sizeof(ElfW(Sym)),
6421             },
6422             [6] = { /* .strtab */
6423                 .sh_type = SHT_STRTAB,
6424                 .sh_offset = offsetof(struct ElfImage, str),
6425                 .sh_size = sizeof(img->str),
6426             }
6427         },
6428         .sym = {
6429             [1] = { /* code_gen_buffer */
6430                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6431                 .st_shndx = 1,
6432             }
6433         },
6434         .di = {
6435             .len = sizeof(struct DebugInfo) - 4,
6436             .version = 2,
6437             .ptr_size = sizeof(void *),
6438             .cu_die = 1,
6439             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6440             .fn_die = 2,
6441             .fn_name = "code_gen_buffer"
6442         },
6443         .da = {
6444             1,          /* abbrev number (the cu) */
6445             0x11, 1,    /* DW_TAG_compile_unit, has children */
6446             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6447             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6448             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6449             0, 0,       /* end of abbrev */
6450             2,          /* abbrev number (the fn) */
6451             0x2e, 0,    /* DW_TAG_subprogram, no children */
6452             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6453             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6454             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6455             0, 0,       /* end of abbrev */
6456             0           /* no more abbrev */
6457         },
6458         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6459                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6460     };
6461 
6462     /* We only need a single jit entry; statically allocate it.  */
6463     static struct jit_code_entry one_entry;
6464 
6465     uintptr_t buf = (uintptr_t)buf_ptr;
6466     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6467     DebugFrameHeader *dfh;
6468 
6469     img = g_malloc(img_size);
6470     *img = img_template;
6471 
6472     img->phdr.p_vaddr = buf;
6473     img->phdr.p_paddr = buf;
6474     img->phdr.p_memsz = buf_size;
6475 
6476     img->shdr[1].sh_name = find_string(img->str, ".text");
6477     img->shdr[1].sh_addr = buf;
6478     img->shdr[1].sh_size = buf_size;
6479 
6480     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6481     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6482 
6483     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6484     img->shdr[4].sh_size = debug_frame_size;
6485 
6486     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6487     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6488 
6489     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6490     img->sym[1].st_value = buf;
6491     img->sym[1].st_size = buf_size;
6492 
6493     img->di.cu_low_pc = buf;
6494     img->di.cu_high_pc = buf + buf_size;
6495     img->di.fn_low_pc = buf;
6496     img->di.fn_high_pc = buf + buf_size;
6497 
6498     dfh = (DebugFrameHeader *)(img + 1);
6499     memcpy(dfh, debug_frame, debug_frame_size);
6500     dfh->fde.func_start = buf;
6501     dfh->fde.func_len = buf_size;
6502 
6503 #ifdef DEBUG_JIT
6504     /* Enable this block to be able to debug the ELF image file creation.
6505        One can use readelf, objdump, or other inspection utilities.  */
6506     {
6507         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6508         FILE *f = fopen(jit, "w+b");
6509         if (f) {
6510             if (fwrite(img, img_size, 1, f) != img_size) {
6511                 /* Avoid stupid unused return value warning for fwrite.  */
6512             }
6513             fclose(f);
6514         }
6515     }
6516 #endif
6517 
6518     one_entry.symfile_addr = img;
6519     one_entry.symfile_size = img_size;
6520 
6521     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6522     __jit_debug_descriptor.relevant_entry = &one_entry;
6523     __jit_debug_descriptor.first_entry = &one_entry;
6524     __jit_debug_register_code();
6525 }
6526 #else
6527 /* No support for the feature.  Provide the entry point expected by exec.c,
6528    and implement the internal function we declared earlier.  */
6529 
6530 static void tcg_register_jit_int(const void *buf, size_t size,
6531                                  const void *debug_frame,
6532                                  size_t debug_frame_size)
6533 {
6534 }
6535 
6536 void tcg_register_jit(const void *buf, size_t buf_size)
6537 {
6538 }
6539 #endif /* ELF_HOST_MACHINE */
6540 
6541 #if !TCG_TARGET_MAYBE_vec
6542 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6543 {
6544     g_assert_not_reached();
6545 }
6546 #endif
6547