xref: /qemu/tcg/tcg.c (revision 971febb8)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 #include "qemu/osdep.h"
26 
27 /* Define to jump the ELF file used to communicate with GDB.  */
28 #undef DEBUG_JIT
29 
30 #include "qemu/error-report.h"
31 #include "qemu/cutils.h"
32 #include "qemu/host-utils.h"
33 #include "qemu/qemu-print.h"
34 #include "qemu/cacheflush.h"
35 #include "qemu/cacheinfo.h"
36 #include "qemu/timer.h"
37 #include "exec/translation-block.h"
38 #include "exec/tlb-common.h"
39 #include "tcg/startup.h"
40 #include "tcg/tcg-op-common.h"
41 
42 #if UINTPTR_MAX == UINT32_MAX
43 # define ELF_CLASS  ELFCLASS32
44 #else
45 # define ELF_CLASS  ELFCLASS64
46 #endif
47 #if HOST_BIG_ENDIAN
48 # define ELF_DATA   ELFDATA2MSB
49 #else
50 # define ELF_DATA   ELFDATA2LSB
51 #endif
52 
53 #include "elf.h"
54 #include "exec/log.h"
55 #include "tcg/tcg-ldst.h"
56 #include "tcg/tcg-temp-internal.h"
57 #include "tcg-internal.h"
58 #include "tcg/perf.h"
59 #ifdef CONFIG_USER_ONLY
60 #include "exec/user/guest-base.h"
61 #endif
62 
63 /* Forward declarations for functions declared in tcg-target.c.inc and
64    used here. */
65 static void tcg_target_init(TCGContext *s);
66 static void tcg_target_qemu_prologue(TCGContext *s);
67 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
68                         intptr_t value, intptr_t addend);
69 
70 /* The CIE and FDE header definitions will be common to all hosts.  */
71 typedef struct {
72     uint32_t len __attribute__((aligned((sizeof(void *)))));
73     uint32_t id;
74     uint8_t version;
75     char augmentation[1];
76     uint8_t code_align;
77     uint8_t data_align;
78     uint8_t return_column;
79 } DebugFrameCIE;
80 
81 typedef struct QEMU_PACKED {
82     uint32_t len __attribute__((aligned((sizeof(void *)))));
83     uint32_t cie_offset;
84     uintptr_t func_start;
85     uintptr_t func_len;
86 } DebugFrameFDEHeader;
87 
88 typedef struct QEMU_PACKED {
89     DebugFrameCIE cie;
90     DebugFrameFDEHeader fde;
91 } DebugFrameHeader;
92 
93 typedef struct TCGLabelQemuLdst {
94     bool is_ld;             /* qemu_ld: true, qemu_st: false */
95     MemOpIdx oi;
96     TCGType type;           /* result type of a load */
97     TCGReg addrlo_reg;      /* reg index for low word of guest virtual addr */
98     TCGReg addrhi_reg;      /* reg index for high word of guest virtual addr */
99     TCGReg datalo_reg;      /* reg index for low word to be loaded or stored */
100     TCGReg datahi_reg;      /* reg index for high word to be loaded or stored */
101     const tcg_insn_unit *raddr;   /* addr of the next IR of qemu_ld/st IR */
102     tcg_insn_unit *label_ptr[2]; /* label pointers to be updated */
103     QSIMPLEQ_ENTRY(TCGLabelQemuLdst) next;
104 } TCGLabelQemuLdst;
105 
106 static void tcg_register_jit_int(const void *buf, size_t size,
107                                  const void *debug_frame,
108                                  size_t debug_frame_size)
109     __attribute__((unused));
110 
111 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
112 static void tcg_out_tb_start(TCGContext *s);
113 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
114                        intptr_t arg2);
115 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
116 static void tcg_out_movi(TCGContext *s, TCGType type,
117                          TCGReg ret, tcg_target_long arg);
118 static void tcg_out_ext8s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
119 static void tcg_out_ext16s(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
120 static void tcg_out_ext8u(TCGContext *s, TCGReg ret, TCGReg arg);
121 static void tcg_out_ext16u(TCGContext *s, TCGReg ret, TCGReg arg);
122 static void tcg_out_ext32s(TCGContext *s, TCGReg ret, TCGReg arg);
123 static void tcg_out_ext32u(TCGContext *s, TCGReg ret, TCGReg arg);
124 static void tcg_out_exts_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
125 static void tcg_out_extu_i32_i64(TCGContext *s, TCGReg ret, TCGReg arg);
126 static void tcg_out_extrl_i64_i32(TCGContext *s, TCGReg ret, TCGReg arg);
127 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
128 static bool tcg_out_xchg(TCGContext *s, TCGType type, TCGReg r1, TCGReg r2);
129 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
130 static void tcg_out_goto_tb(TCGContext *s, int which);
131 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
132                        const TCGArg args[TCG_MAX_OP_ARGS],
133                        const int const_args[TCG_MAX_OP_ARGS]);
134 #if TCG_TARGET_MAYBE_vec
135 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
136                             TCGReg dst, TCGReg src);
137 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
138                              TCGReg dst, TCGReg base, intptr_t offset);
139 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
140                              TCGReg dst, int64_t arg);
141 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
142                            unsigned vecl, unsigned vece,
143                            const TCGArg args[TCG_MAX_OP_ARGS],
144                            const int const_args[TCG_MAX_OP_ARGS]);
145 #else
146 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
147                                    TCGReg dst, TCGReg src)
148 {
149     g_assert_not_reached();
150 }
151 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
152                                     TCGReg dst, TCGReg base, intptr_t offset)
153 {
154     g_assert_not_reached();
155 }
156 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
157                                     TCGReg dst, int64_t arg)
158 {
159     g_assert_not_reached();
160 }
161 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
162                                   unsigned vecl, unsigned vece,
163                                   const TCGArg args[TCG_MAX_OP_ARGS],
164                                   const int const_args[TCG_MAX_OP_ARGS])
165 {
166     g_assert_not_reached();
167 }
168 #endif
169 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
170                        intptr_t arg2);
171 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
172                         TCGReg base, intptr_t ofs);
173 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
174                          const TCGHelperInfo *info);
175 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
176 static bool tcg_target_const_match(int64_t val, int ct,
177                                    TCGType type, TCGCond cond, int vece);
178 #ifdef TCG_TARGET_NEED_LDST_LABELS
179 static int tcg_out_ldst_finalize(TCGContext *s);
180 #endif
181 
182 #ifndef CONFIG_USER_ONLY
183 #define guest_base  ({ qemu_build_not_reached(); (uintptr_t)0; })
184 #endif
185 
186 typedef struct TCGLdstHelperParam {
187     TCGReg (*ra_gen)(TCGContext *s, const TCGLabelQemuLdst *l, int arg_reg);
188     unsigned ntmp;
189     int tmp[3];
190 } TCGLdstHelperParam;
191 
192 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
193                                    const TCGLdstHelperParam *p)
194     __attribute__((unused));
195 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *l,
196                                   bool load_sign, const TCGLdstHelperParam *p)
197     __attribute__((unused));
198 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *l,
199                                    const TCGLdstHelperParam *p)
200     __attribute__((unused));
201 
202 static void * const qemu_ld_helpers[MO_SSIZE + 1] __attribute__((unused)) = {
203     [MO_UB] = helper_ldub_mmu,
204     [MO_SB] = helper_ldsb_mmu,
205     [MO_UW] = helper_lduw_mmu,
206     [MO_SW] = helper_ldsw_mmu,
207     [MO_UL] = helper_ldul_mmu,
208     [MO_UQ] = helper_ldq_mmu,
209 #if TCG_TARGET_REG_BITS == 64
210     [MO_SL] = helper_ldsl_mmu,
211     [MO_128] = helper_ld16_mmu,
212 #endif
213 };
214 
215 static void * const qemu_st_helpers[MO_SIZE + 1] __attribute__((unused)) = {
216     [MO_8]  = helper_stb_mmu,
217     [MO_16] = helper_stw_mmu,
218     [MO_32] = helper_stl_mmu,
219     [MO_64] = helper_stq_mmu,
220 #if TCG_TARGET_REG_BITS == 64
221     [MO_128] = helper_st16_mmu,
222 #endif
223 };
224 
225 typedef struct {
226     MemOp atom;   /* lg2 bits of atomicity required */
227     MemOp align;  /* lg2 bits of alignment to use */
228 } TCGAtomAlign;
229 
230 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
231                                            MemOp host_atom, bool allow_two_ops)
232     __attribute__((unused));
233 
234 #ifdef CONFIG_USER_ONLY
235 bool tcg_use_softmmu;
236 #endif
237 
238 TCGContext tcg_init_ctx;
239 __thread TCGContext *tcg_ctx;
240 
241 TCGContext **tcg_ctxs;
242 unsigned int tcg_cur_ctxs;
243 unsigned int tcg_max_ctxs;
244 TCGv_env tcg_env;
245 const void *tcg_code_gen_epilogue;
246 uintptr_t tcg_splitwx_diff;
247 
248 #ifndef CONFIG_TCG_INTERPRETER
249 tcg_prologue_fn *tcg_qemu_tb_exec;
250 #endif
251 
252 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
253 static TCGRegSet tcg_target_call_clobber_regs;
254 
255 #if TCG_TARGET_INSN_UNIT_SIZE == 1
256 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
257 {
258     *s->code_ptr++ = v;
259 }
260 
261 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
262                                                       uint8_t v)
263 {
264     *p = v;
265 }
266 #endif
267 
268 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
269 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
270 {
271     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
272         *s->code_ptr++ = v;
273     } else {
274         tcg_insn_unit *p = s->code_ptr;
275         memcpy(p, &v, sizeof(v));
276         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
277     }
278 }
279 
280 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
281                                                        uint16_t v)
282 {
283     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
284         *p = v;
285     } else {
286         memcpy(p, &v, sizeof(v));
287     }
288 }
289 #endif
290 
291 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
292 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
293 {
294     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
295         *s->code_ptr++ = v;
296     } else {
297         tcg_insn_unit *p = s->code_ptr;
298         memcpy(p, &v, sizeof(v));
299         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
300     }
301 }
302 
303 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
304                                                        uint32_t v)
305 {
306     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
307         *p = v;
308     } else {
309         memcpy(p, &v, sizeof(v));
310     }
311 }
312 #endif
313 
314 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
315 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
316 {
317     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
318         *s->code_ptr++ = v;
319     } else {
320         tcg_insn_unit *p = s->code_ptr;
321         memcpy(p, &v, sizeof(v));
322         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
323     }
324 }
325 
326 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
327                                                        uint64_t v)
328 {
329     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
330         *p = v;
331     } else {
332         memcpy(p, &v, sizeof(v));
333     }
334 }
335 #endif
336 
337 /* label relocation processing */
338 
339 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
340                           TCGLabel *l, intptr_t addend)
341 {
342     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
343 
344     r->type = type;
345     r->ptr = code_ptr;
346     r->addend = addend;
347     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
348 }
349 
350 static void tcg_out_label(TCGContext *s, TCGLabel *l)
351 {
352     tcg_debug_assert(!l->has_value);
353     l->has_value = 1;
354     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
355 }
356 
357 TCGLabel *gen_new_label(void)
358 {
359     TCGContext *s = tcg_ctx;
360     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
361 
362     memset(l, 0, sizeof(TCGLabel));
363     l->id = s->nb_labels++;
364     QSIMPLEQ_INIT(&l->branches);
365     QSIMPLEQ_INIT(&l->relocs);
366 
367     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
368 
369     return l;
370 }
371 
372 static bool tcg_resolve_relocs(TCGContext *s)
373 {
374     TCGLabel *l;
375 
376     QSIMPLEQ_FOREACH(l, &s->labels, next) {
377         TCGRelocation *r;
378         uintptr_t value = l->u.value;
379 
380         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
381             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
382                 return false;
383             }
384         }
385     }
386     return true;
387 }
388 
389 static void set_jmp_reset_offset(TCGContext *s, int which)
390 {
391     /*
392      * We will check for overflow at the end of the opcode loop in
393      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
394      */
395     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
396 }
397 
398 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
399 {
400     /*
401      * We will check for overflow at the end of the opcode loop in
402      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
403      */
404     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
405 }
406 
407 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
408 {
409     /*
410      * Return the read-execute version of the pointer, for the benefit
411      * of any pc-relative addressing mode.
412      */
413     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
414 }
415 
416 static int __attribute__((unused))
417 tlb_mask_table_ofs(TCGContext *s, int which)
418 {
419     return (offsetof(CPUNegativeOffsetState, tlb.f[which]) -
420             sizeof(CPUNegativeOffsetState));
421 }
422 
423 /* Signal overflow, starting over with fewer guest insns. */
424 static G_NORETURN
425 void tcg_raise_tb_overflow(TCGContext *s)
426 {
427     siglongjmp(s->jmp_trans, -2);
428 }
429 
430 /*
431  * Used by tcg_out_movext{1,2} to hold the arguments for tcg_out_movext.
432  * By the time we arrive at tcg_out_movext1, @dst is always a TCGReg.
433  *
434  * However, tcg_out_helper_load_slots reuses this field to hold an
435  * argument slot number (which may designate a argument register or an
436  * argument stack slot), converting to TCGReg once all arguments that
437  * are destined for the stack are processed.
438  */
439 typedef struct TCGMovExtend {
440     unsigned dst;
441     TCGReg src;
442     TCGType dst_type;
443     TCGType src_type;
444     MemOp src_ext;
445 } TCGMovExtend;
446 
447 /**
448  * tcg_out_movext -- move and extend
449  * @s: tcg context
450  * @dst_type: integral type for destination
451  * @dst: destination register
452  * @src_type: integral type for source
453  * @src_ext: extension to apply to source
454  * @src: source register
455  *
456  * Move or extend @src into @dst, depending on @src_ext and the types.
457  */
458 static void tcg_out_movext(TCGContext *s, TCGType dst_type, TCGReg dst,
459                            TCGType src_type, MemOp src_ext, TCGReg src)
460 {
461     switch (src_ext) {
462     case MO_UB:
463         tcg_out_ext8u(s, dst, src);
464         break;
465     case MO_SB:
466         tcg_out_ext8s(s, dst_type, dst, src);
467         break;
468     case MO_UW:
469         tcg_out_ext16u(s, dst, src);
470         break;
471     case MO_SW:
472         tcg_out_ext16s(s, dst_type, dst, src);
473         break;
474     case MO_UL:
475     case MO_SL:
476         if (dst_type == TCG_TYPE_I32) {
477             if (src_type == TCG_TYPE_I32) {
478                 tcg_out_mov(s, TCG_TYPE_I32, dst, src);
479             } else {
480                 tcg_out_extrl_i64_i32(s, dst, src);
481             }
482         } else if (src_type == TCG_TYPE_I32) {
483             if (src_ext & MO_SIGN) {
484                 tcg_out_exts_i32_i64(s, dst, src);
485             } else {
486                 tcg_out_extu_i32_i64(s, dst, src);
487             }
488         } else {
489             if (src_ext & MO_SIGN) {
490                 tcg_out_ext32s(s, dst, src);
491             } else {
492                 tcg_out_ext32u(s, dst, src);
493             }
494         }
495         break;
496     case MO_UQ:
497         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
498         if (dst_type == TCG_TYPE_I32) {
499             tcg_out_extrl_i64_i32(s, dst, src);
500         } else {
501             tcg_out_mov(s, TCG_TYPE_I64, dst, src);
502         }
503         break;
504     default:
505         g_assert_not_reached();
506     }
507 }
508 
509 /* Minor variations on a theme, using a structure. */
510 static void tcg_out_movext1_new_src(TCGContext *s, const TCGMovExtend *i,
511                                     TCGReg src)
512 {
513     tcg_out_movext(s, i->dst_type, i->dst, i->src_type, i->src_ext, src);
514 }
515 
516 static void tcg_out_movext1(TCGContext *s, const TCGMovExtend *i)
517 {
518     tcg_out_movext1_new_src(s, i, i->src);
519 }
520 
521 /**
522  * tcg_out_movext2 -- move and extend two pair
523  * @s: tcg context
524  * @i1: first move description
525  * @i2: second move description
526  * @scratch: temporary register, or -1 for none
527  *
528  * As tcg_out_movext, for both @i1 and @i2, caring for overlap
529  * between the sources and destinations.
530  */
531 
532 static void tcg_out_movext2(TCGContext *s, const TCGMovExtend *i1,
533                             const TCGMovExtend *i2, int scratch)
534 {
535     TCGReg src1 = i1->src;
536     TCGReg src2 = i2->src;
537 
538     if (i1->dst != src2) {
539         tcg_out_movext1(s, i1);
540         tcg_out_movext1(s, i2);
541         return;
542     }
543     if (i2->dst == src1) {
544         TCGType src1_type = i1->src_type;
545         TCGType src2_type = i2->src_type;
546 
547         if (tcg_out_xchg(s, MAX(src1_type, src2_type), src1, src2)) {
548             /* The data is now in the correct registers, now extend. */
549             src1 = i2->src;
550             src2 = i1->src;
551         } else {
552             tcg_debug_assert(scratch >= 0);
553             tcg_out_mov(s, src1_type, scratch, src1);
554             src1 = scratch;
555         }
556     }
557     tcg_out_movext1_new_src(s, i2, src2);
558     tcg_out_movext1_new_src(s, i1, src1);
559 }
560 
561 /**
562  * tcg_out_movext3 -- move and extend three pair
563  * @s: tcg context
564  * @i1: first move description
565  * @i2: second move description
566  * @i3: third move description
567  * @scratch: temporary register, or -1 for none
568  *
569  * As tcg_out_movext, for all of @i1, @i2 and @i3, caring for overlap
570  * between the sources and destinations.
571  */
572 
573 static void tcg_out_movext3(TCGContext *s, const TCGMovExtend *i1,
574                             const TCGMovExtend *i2, const TCGMovExtend *i3,
575                             int scratch)
576 {
577     TCGReg src1 = i1->src;
578     TCGReg src2 = i2->src;
579     TCGReg src3 = i3->src;
580 
581     if (i1->dst != src2 && i1->dst != src3) {
582         tcg_out_movext1(s, i1);
583         tcg_out_movext2(s, i2, i3, scratch);
584         return;
585     }
586     if (i2->dst != src1 && i2->dst != src3) {
587         tcg_out_movext1(s, i2);
588         tcg_out_movext2(s, i1, i3, scratch);
589         return;
590     }
591     if (i3->dst != src1 && i3->dst != src2) {
592         tcg_out_movext1(s, i3);
593         tcg_out_movext2(s, i1, i2, scratch);
594         return;
595     }
596 
597     /*
598      * There is a cycle.  Since there are only 3 nodes, the cycle is
599      * either "clockwise" or "anti-clockwise", and can be solved with
600      * a single scratch or two xchg.
601      */
602     if (i1->dst == src2 && i2->dst == src3 && i3->dst == src1) {
603         /* "Clockwise" */
604         if (tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2)) {
605             tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3);
606             /* The data is now in the correct registers, now extend. */
607             tcg_out_movext1_new_src(s, i1, i1->dst);
608             tcg_out_movext1_new_src(s, i2, i2->dst);
609             tcg_out_movext1_new_src(s, i3, i3->dst);
610         } else {
611             tcg_debug_assert(scratch >= 0);
612             tcg_out_mov(s, i1->src_type, scratch, src1);
613             tcg_out_movext1(s, i3);
614             tcg_out_movext1(s, i2);
615             tcg_out_movext1_new_src(s, i1, scratch);
616         }
617     } else if (i1->dst == src3 && i2->dst == src1 && i3->dst == src2) {
618         /* "Anti-clockwise" */
619         if (tcg_out_xchg(s, MAX(i2->src_type, i3->src_type), src2, src3)) {
620             tcg_out_xchg(s, MAX(i1->src_type, i2->src_type), src1, src2);
621             /* The data is now in the correct registers, now extend. */
622             tcg_out_movext1_new_src(s, i1, i1->dst);
623             tcg_out_movext1_new_src(s, i2, i2->dst);
624             tcg_out_movext1_new_src(s, i3, i3->dst);
625         } else {
626             tcg_debug_assert(scratch >= 0);
627             tcg_out_mov(s, i1->src_type, scratch, src1);
628             tcg_out_movext1(s, i2);
629             tcg_out_movext1(s, i3);
630             tcg_out_movext1_new_src(s, i1, scratch);
631         }
632     } else {
633         g_assert_not_reached();
634     }
635 }
636 
637 #define C_PFX1(P, A)                    P##A
638 #define C_PFX2(P, A, B)                 P##A##_##B
639 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
640 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
641 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
642 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
643 
644 /* Define an enumeration for the various combinations. */
645 
646 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
647 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
648 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
649 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
650 
651 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
652 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
653 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
654 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
655 
656 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
657 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1),
658 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1),
659 
660 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
661 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
662 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
663 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
664 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4),
665 
666 typedef enum {
667 #include "tcg-target-con-set.h"
668 } TCGConstraintSetIndex;
669 
670 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
671 
672 #undef C_O0_I1
673 #undef C_O0_I2
674 #undef C_O0_I3
675 #undef C_O0_I4
676 #undef C_O1_I1
677 #undef C_O1_I2
678 #undef C_O1_I3
679 #undef C_O1_I4
680 #undef C_N1_I2
681 #undef C_N1O1_I1
682 #undef C_N2_I1
683 #undef C_O2_I1
684 #undef C_O2_I2
685 #undef C_O2_I3
686 #undef C_O2_I4
687 #undef C_N1_O1_I4
688 
689 /* Put all of the constraint sets into an array, indexed by the enum. */
690 
691 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
692 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
693 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
694 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
695 
696 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
697 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
698 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
699 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
700 
701 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
702 #define C_N1O1_I1(O1, O2, I1)           { .args_ct_str = { "&" #O1, #O2, #I1 } },
703 #define C_N2_I1(O1, O2, I1)             { .args_ct_str = { "&" #O1, "&" #O2, #I1 } },
704 
705 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
706 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
707 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
708 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
709 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { "&" #O1, #O2, #I1, #I2, #I3, #I4 } },
710 
711 static const TCGTargetOpDef constraint_sets[] = {
712 #include "tcg-target-con-set.h"
713 };
714 
715 
716 #undef C_O0_I1
717 #undef C_O0_I2
718 #undef C_O0_I3
719 #undef C_O0_I4
720 #undef C_O1_I1
721 #undef C_O1_I2
722 #undef C_O1_I3
723 #undef C_O1_I4
724 #undef C_N1_I2
725 #undef C_N1O1_I1
726 #undef C_N2_I1
727 #undef C_O2_I1
728 #undef C_O2_I2
729 #undef C_O2_I3
730 #undef C_O2_I4
731 #undef C_N1_O1_I4
732 
733 /* Expand the enumerator to be returned from tcg_target_op_def(). */
734 
735 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
736 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
737 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
738 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
739 
740 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
741 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
742 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
743 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
744 
745 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
746 #define C_N1O1_I1(O1, O2, I1)           C_PFX3(c_n1o1_i1_, O1, O2, I1)
747 #define C_N2_I1(O1, O2, I1)             C_PFX3(c_n2_i1_, O1, O2, I1)
748 
749 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
750 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
751 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
752 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
753 #define C_N1_O1_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_n1_o1_i4_, O1, O2, I1, I2, I3, I4)
754 
755 #include "tcg-target.c.inc"
756 
757 #ifndef CONFIG_TCG_INTERPRETER
758 /* Validate CPUTLBDescFast placement. */
759 QEMU_BUILD_BUG_ON((int)(offsetof(CPUNegativeOffsetState, tlb.f[0]) -
760                         sizeof(CPUNegativeOffsetState))
761                   < MIN_TLB_MASK_TABLE_OFS);
762 #endif
763 
764 static void alloc_tcg_plugin_context(TCGContext *s)
765 {
766 #ifdef CONFIG_PLUGIN
767     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
768     s->plugin_tb->insns =
769         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
770 #endif
771 }
772 
773 /*
774  * All TCG threads except the parent (i.e. the one that called tcg_context_init
775  * and registered the target's TCG globals) must register with this function
776  * before initiating translation.
777  *
778  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
779  * of tcg_region_init() for the reasoning behind this.
780  *
781  * In system-mode each caller registers its context in tcg_ctxs[]. Note that in
782  * system-mode tcg_ctxs[] does not track tcg_ctx_init, since the initial context
783  * is not used anymore for translation once this function is called.
784  *
785  * Not tracking tcg_init_ctx in tcg_ctxs[] in system-mode keeps code that
786  * iterates over the array (e.g. tcg_code_size() the same for both system/user
787  * modes.
788  */
789 #ifdef CONFIG_USER_ONLY
790 void tcg_register_thread(void)
791 {
792     tcg_ctx = &tcg_init_ctx;
793 }
794 #else
795 void tcg_register_thread(void)
796 {
797     TCGContext *s = g_malloc(sizeof(*s));
798     unsigned int i, n;
799 
800     *s = tcg_init_ctx;
801 
802     /* Relink mem_base.  */
803     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
804         if (tcg_init_ctx.temps[i].mem_base) {
805             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
806             tcg_debug_assert(b >= 0 && b < n);
807             s->temps[i].mem_base = &s->temps[b];
808         }
809     }
810 
811     /* Claim an entry in tcg_ctxs */
812     n = qatomic_fetch_inc(&tcg_cur_ctxs);
813     g_assert(n < tcg_max_ctxs);
814     qatomic_set(&tcg_ctxs[n], s);
815 
816     if (n > 0) {
817         alloc_tcg_plugin_context(s);
818         tcg_region_initial_alloc(s);
819     }
820 
821     tcg_ctx = s;
822 }
823 #endif /* !CONFIG_USER_ONLY */
824 
825 /* pool based memory allocation */
826 void *tcg_malloc_internal(TCGContext *s, int size)
827 {
828     TCGPool *p;
829     int pool_size;
830 
831     if (size > TCG_POOL_CHUNK_SIZE) {
832         /* big malloc: insert a new pool (XXX: could optimize) */
833         p = g_malloc(sizeof(TCGPool) + size);
834         p->size = size;
835         p->next = s->pool_first_large;
836         s->pool_first_large = p;
837         return p->data;
838     } else {
839         p = s->pool_current;
840         if (!p) {
841             p = s->pool_first;
842             if (!p)
843                 goto new_pool;
844         } else {
845             if (!p->next) {
846             new_pool:
847                 pool_size = TCG_POOL_CHUNK_SIZE;
848                 p = g_malloc(sizeof(TCGPool) + pool_size);
849                 p->size = pool_size;
850                 p->next = NULL;
851                 if (s->pool_current) {
852                     s->pool_current->next = p;
853                 } else {
854                     s->pool_first = p;
855                 }
856             } else {
857                 p = p->next;
858             }
859         }
860     }
861     s->pool_current = p;
862     s->pool_cur = p->data + size;
863     s->pool_end = p->data + p->size;
864     return p->data;
865 }
866 
867 void tcg_pool_reset(TCGContext *s)
868 {
869     TCGPool *p, *t;
870     for (p = s->pool_first_large; p; p = t) {
871         t = p->next;
872         g_free(p);
873     }
874     s->pool_first_large = NULL;
875     s->pool_cur = s->pool_end = NULL;
876     s->pool_current = NULL;
877 }
878 
879 /*
880  * Create TCGHelperInfo structures for "tcg/tcg-ldst.h" functions,
881  * akin to what "exec/helper-tcg.h" does with DEF_HELPER_FLAGS_N.
882  * We only use these for layout in tcg_out_ld_helper_ret and
883  * tcg_out_st_helper_args, and share them between several of
884  * the helpers, with the end result that it's easier to build manually.
885  */
886 
887 #if TCG_TARGET_REG_BITS == 32
888 # define dh_typecode_ttl  dh_typecode_i32
889 #else
890 # define dh_typecode_ttl  dh_typecode_i64
891 #endif
892 
893 static TCGHelperInfo info_helper_ld32_mmu = {
894     .flags = TCG_CALL_NO_WG,
895     .typemask = dh_typemask(ttl, 0)  /* return tcg_target_ulong */
896               | dh_typemask(env, 1)
897               | dh_typemask(i64, 2)  /* uint64_t addr */
898               | dh_typemask(i32, 3)  /* unsigned oi */
899               | dh_typemask(ptr, 4)  /* uintptr_t ra */
900 };
901 
902 static TCGHelperInfo info_helper_ld64_mmu = {
903     .flags = TCG_CALL_NO_WG,
904     .typemask = dh_typemask(i64, 0)  /* return uint64_t */
905               | dh_typemask(env, 1)
906               | dh_typemask(i64, 2)  /* uint64_t addr */
907               | dh_typemask(i32, 3)  /* unsigned oi */
908               | dh_typemask(ptr, 4)  /* uintptr_t ra */
909 };
910 
911 static TCGHelperInfo info_helper_ld128_mmu = {
912     .flags = TCG_CALL_NO_WG,
913     .typemask = dh_typemask(i128, 0) /* return Int128 */
914               | dh_typemask(env, 1)
915               | dh_typemask(i64, 2)  /* uint64_t addr */
916               | dh_typemask(i32, 3)  /* unsigned oi */
917               | dh_typemask(ptr, 4)  /* uintptr_t ra */
918 };
919 
920 static TCGHelperInfo info_helper_st32_mmu = {
921     .flags = TCG_CALL_NO_WG,
922     .typemask = dh_typemask(void, 0)
923               | dh_typemask(env, 1)
924               | dh_typemask(i64, 2)  /* uint64_t addr */
925               | dh_typemask(i32, 3)  /* uint32_t data */
926               | dh_typemask(i32, 4)  /* unsigned oi */
927               | dh_typemask(ptr, 5)  /* uintptr_t ra */
928 };
929 
930 static TCGHelperInfo info_helper_st64_mmu = {
931     .flags = TCG_CALL_NO_WG,
932     .typemask = dh_typemask(void, 0)
933               | dh_typemask(env, 1)
934               | dh_typemask(i64, 2)  /* uint64_t addr */
935               | dh_typemask(i64, 3)  /* uint64_t data */
936               | dh_typemask(i32, 4)  /* unsigned oi */
937               | dh_typemask(ptr, 5)  /* uintptr_t ra */
938 };
939 
940 static TCGHelperInfo info_helper_st128_mmu = {
941     .flags = TCG_CALL_NO_WG,
942     .typemask = dh_typemask(void, 0)
943               | dh_typemask(env, 1)
944               | dh_typemask(i64, 2)  /* uint64_t addr */
945               | dh_typemask(i128, 3) /* Int128 data */
946               | dh_typemask(i32, 4)  /* unsigned oi */
947               | dh_typemask(ptr, 5)  /* uintptr_t ra */
948 };
949 
950 #ifdef CONFIG_TCG_INTERPRETER
951 static ffi_type *typecode_to_ffi(int argmask)
952 {
953     /*
954      * libffi does not support __int128_t, so we have forced Int128
955      * to use the structure definition instead of the builtin type.
956      */
957     static ffi_type *ffi_type_i128_elements[3] = {
958         &ffi_type_uint64,
959         &ffi_type_uint64,
960         NULL
961     };
962     static ffi_type ffi_type_i128 = {
963         .size = 16,
964         .alignment = __alignof__(Int128),
965         .type = FFI_TYPE_STRUCT,
966         .elements = ffi_type_i128_elements,
967     };
968 
969     switch (argmask) {
970     case dh_typecode_void:
971         return &ffi_type_void;
972     case dh_typecode_i32:
973         return &ffi_type_uint32;
974     case dh_typecode_s32:
975         return &ffi_type_sint32;
976     case dh_typecode_i64:
977         return &ffi_type_uint64;
978     case dh_typecode_s64:
979         return &ffi_type_sint64;
980     case dh_typecode_ptr:
981         return &ffi_type_pointer;
982     case dh_typecode_i128:
983         return &ffi_type_i128;
984     }
985     g_assert_not_reached();
986 }
987 
988 static ffi_cif *init_ffi_layout(TCGHelperInfo *info)
989 {
990     unsigned typemask = info->typemask;
991     struct {
992         ffi_cif cif;
993         ffi_type *args[];
994     } *ca;
995     ffi_status status;
996     int nargs;
997 
998     /* Ignoring the return type, find the last non-zero field. */
999     nargs = 32 - clz32(typemask >> 3);
1000     nargs = DIV_ROUND_UP(nargs, 3);
1001     assert(nargs <= MAX_CALL_IARGS);
1002 
1003     ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
1004     ca->cif.rtype = typecode_to_ffi(typemask & 7);
1005     ca->cif.nargs = nargs;
1006 
1007     if (nargs != 0) {
1008         ca->cif.arg_types = ca->args;
1009         for (int j = 0; j < nargs; ++j) {
1010             int typecode = extract32(typemask, (j + 1) * 3, 3);
1011             ca->args[j] = typecode_to_ffi(typecode);
1012         }
1013     }
1014 
1015     status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
1016                           ca->cif.rtype, ca->cif.arg_types);
1017     assert(status == FFI_OK);
1018 
1019     return &ca->cif;
1020 }
1021 
1022 #define HELPER_INFO_INIT(I)      (&(I)->cif)
1023 #define HELPER_INFO_INIT_VAL(I)  init_ffi_layout(I)
1024 #else
1025 #define HELPER_INFO_INIT(I)      (&(I)->init)
1026 #define HELPER_INFO_INIT_VAL(I)  1
1027 #endif /* CONFIG_TCG_INTERPRETER */
1028 
1029 static inline bool arg_slot_reg_p(unsigned arg_slot)
1030 {
1031     /*
1032      * Split the sizeof away from the comparison to avoid Werror from
1033      * "unsigned < 0 is always false", when iarg_regs is empty.
1034      */
1035     unsigned nreg = ARRAY_SIZE(tcg_target_call_iarg_regs);
1036     return arg_slot < nreg;
1037 }
1038 
1039 static inline int arg_slot_stk_ofs(unsigned arg_slot)
1040 {
1041     unsigned max = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1042     unsigned stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
1043 
1044     tcg_debug_assert(stk_slot < max);
1045     return TCG_TARGET_CALL_STACK_OFFSET + stk_slot * sizeof(tcg_target_long);
1046 }
1047 
1048 typedef struct TCGCumulativeArgs {
1049     int arg_idx;                /* tcg_gen_callN args[] */
1050     int info_in_idx;            /* TCGHelperInfo in[] */
1051     int arg_slot;               /* regs+stack slot */
1052     int ref_slot;               /* stack slots for references */
1053 } TCGCumulativeArgs;
1054 
1055 static void layout_arg_even(TCGCumulativeArgs *cum)
1056 {
1057     cum->arg_slot += cum->arg_slot & 1;
1058 }
1059 
1060 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
1061                          TCGCallArgumentKind kind)
1062 {
1063     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1064 
1065     *loc = (TCGCallArgumentLoc){
1066         .kind = kind,
1067         .arg_idx = cum->arg_idx,
1068         .arg_slot = cum->arg_slot,
1069     };
1070     cum->info_in_idx++;
1071     cum->arg_slot++;
1072 }
1073 
1074 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
1075                                 TCGHelperInfo *info, int n)
1076 {
1077     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1078 
1079     for (int i = 0; i < n; ++i) {
1080         /* Layout all using the same arg_idx, adjusting the subindex. */
1081         loc[i] = (TCGCallArgumentLoc){
1082             .kind = TCG_CALL_ARG_NORMAL,
1083             .arg_idx = cum->arg_idx,
1084             .tmp_subindex = i,
1085             .arg_slot = cum->arg_slot + i,
1086         };
1087     }
1088     cum->info_in_idx += n;
1089     cum->arg_slot += n;
1090 }
1091 
1092 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
1093 {
1094     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
1095     int n = 128 / TCG_TARGET_REG_BITS;
1096 
1097     /* The first subindex carries the pointer. */
1098     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
1099 
1100     /*
1101      * The callee is allowed to clobber memory associated with
1102      * structure pass by-reference.  Therefore we must make copies.
1103      * Allocate space from "ref_slot", which will be adjusted to
1104      * follow the parameters on the stack.
1105      */
1106     loc[0].ref_slot = cum->ref_slot;
1107 
1108     /*
1109      * Subsequent words also go into the reference slot, but
1110      * do not accumulate into the regular arguments.
1111      */
1112     for (int i = 1; i < n; ++i) {
1113         loc[i] = (TCGCallArgumentLoc){
1114             .kind = TCG_CALL_ARG_BY_REF_N,
1115             .arg_idx = cum->arg_idx,
1116             .tmp_subindex = i,
1117             .ref_slot = cum->ref_slot + i,
1118         };
1119     }
1120     cum->info_in_idx += n - 1;  /* i=0 accounted for in layout_arg_1 */
1121     cum->ref_slot += n;
1122 }
1123 
1124 static void init_call_layout(TCGHelperInfo *info)
1125 {
1126     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
1127     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
1128     unsigned typemask = info->typemask;
1129     unsigned typecode;
1130     TCGCumulativeArgs cum = { };
1131 
1132     /*
1133      * Parse and place any function return value.
1134      */
1135     typecode = typemask & 7;
1136     switch (typecode) {
1137     case dh_typecode_void:
1138         info->nr_out = 0;
1139         break;
1140     case dh_typecode_i32:
1141     case dh_typecode_s32:
1142     case dh_typecode_ptr:
1143         info->nr_out = 1;
1144         info->out_kind = TCG_CALL_RET_NORMAL;
1145         break;
1146     case dh_typecode_i64:
1147     case dh_typecode_s64:
1148         info->nr_out = 64 / TCG_TARGET_REG_BITS;
1149         info->out_kind = TCG_CALL_RET_NORMAL;
1150         /* Query the last register now to trigger any assert early. */
1151         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1152         break;
1153     case dh_typecode_i128:
1154         info->nr_out = 128 / TCG_TARGET_REG_BITS;
1155         info->out_kind = TCG_TARGET_CALL_RET_I128;
1156         switch (TCG_TARGET_CALL_RET_I128) {
1157         case TCG_CALL_RET_NORMAL:
1158             /* Query the last register now to trigger any assert early. */
1159             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
1160             break;
1161         case TCG_CALL_RET_BY_VEC:
1162             /* Query the single register now to trigger any assert early. */
1163             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
1164             break;
1165         case TCG_CALL_RET_BY_REF:
1166             /*
1167              * Allocate the first argument to the output.
1168              * We don't need to store this anywhere, just make it
1169              * unavailable for use in the input loop below.
1170              */
1171             cum.arg_slot = 1;
1172             break;
1173         default:
1174             qemu_build_not_reached();
1175         }
1176         break;
1177     default:
1178         g_assert_not_reached();
1179     }
1180 
1181     /*
1182      * Parse and place function arguments.
1183      */
1184     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
1185         TCGCallArgumentKind kind;
1186         TCGType type;
1187 
1188         typecode = typemask & 7;
1189         switch (typecode) {
1190         case dh_typecode_i32:
1191         case dh_typecode_s32:
1192             type = TCG_TYPE_I32;
1193             break;
1194         case dh_typecode_i64:
1195         case dh_typecode_s64:
1196             type = TCG_TYPE_I64;
1197             break;
1198         case dh_typecode_ptr:
1199             type = TCG_TYPE_PTR;
1200             break;
1201         case dh_typecode_i128:
1202             type = TCG_TYPE_I128;
1203             break;
1204         default:
1205             g_assert_not_reached();
1206         }
1207 
1208         switch (type) {
1209         case TCG_TYPE_I32:
1210             switch (TCG_TARGET_CALL_ARG_I32) {
1211             case TCG_CALL_ARG_EVEN:
1212                 layout_arg_even(&cum);
1213                 /* fall through */
1214             case TCG_CALL_ARG_NORMAL:
1215                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1216                 break;
1217             case TCG_CALL_ARG_EXTEND:
1218                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
1219                 layout_arg_1(&cum, info, kind);
1220                 break;
1221             default:
1222                 qemu_build_not_reached();
1223             }
1224             break;
1225 
1226         case TCG_TYPE_I64:
1227             switch (TCG_TARGET_CALL_ARG_I64) {
1228             case TCG_CALL_ARG_EVEN:
1229                 layout_arg_even(&cum);
1230                 /* fall through */
1231             case TCG_CALL_ARG_NORMAL:
1232                 if (TCG_TARGET_REG_BITS == 32) {
1233                     layout_arg_normal_n(&cum, info, 2);
1234                 } else {
1235                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
1236                 }
1237                 break;
1238             default:
1239                 qemu_build_not_reached();
1240             }
1241             break;
1242 
1243         case TCG_TYPE_I128:
1244             switch (TCG_TARGET_CALL_ARG_I128) {
1245             case TCG_CALL_ARG_EVEN:
1246                 layout_arg_even(&cum);
1247                 /* fall through */
1248             case TCG_CALL_ARG_NORMAL:
1249                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
1250                 break;
1251             case TCG_CALL_ARG_BY_REF:
1252                 layout_arg_by_ref(&cum, info);
1253                 break;
1254             default:
1255                 qemu_build_not_reached();
1256             }
1257             break;
1258 
1259         default:
1260             g_assert_not_reached();
1261         }
1262     }
1263     info->nr_in = cum.info_in_idx;
1264 
1265     /* Validate that we didn't overrun the input array. */
1266     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
1267     /* Validate the backend has enough argument space. */
1268     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
1269 
1270     /*
1271      * Relocate the "ref_slot" area to the end of the parameters.
1272      * Minimizing this stack offset helps code size for x86,
1273      * which has a signed 8-bit offset encoding.
1274      */
1275     if (cum.ref_slot != 0) {
1276         int ref_base = 0;
1277 
1278         if (cum.arg_slot > max_reg_slots) {
1279             int align = __alignof(Int128) / sizeof(tcg_target_long);
1280 
1281             ref_base = cum.arg_slot - max_reg_slots;
1282             if (align > 1) {
1283                 ref_base = ROUND_UP(ref_base, align);
1284             }
1285         }
1286         assert(ref_base + cum.ref_slot <= max_stk_slots);
1287         ref_base += max_reg_slots;
1288 
1289         if (ref_base != 0) {
1290             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
1291                 TCGCallArgumentLoc *loc = &info->in[i];
1292                 switch (loc->kind) {
1293                 case TCG_CALL_ARG_BY_REF:
1294                 case TCG_CALL_ARG_BY_REF_N:
1295                     loc->ref_slot += ref_base;
1296                     break;
1297                 default:
1298                     break;
1299                 }
1300             }
1301         }
1302     }
1303 }
1304 
1305 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1306 static void process_op_defs(TCGContext *s);
1307 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1308                                             TCGReg reg, const char *name);
1309 
1310 static void tcg_context_init(unsigned max_cpus)
1311 {
1312     TCGContext *s = &tcg_init_ctx;
1313     int op, total_args, n, i;
1314     TCGOpDef *def;
1315     TCGArgConstraint *args_ct;
1316     TCGTemp *ts;
1317 
1318     memset(s, 0, sizeof(*s));
1319     s->nb_globals = 0;
1320 
1321     /* Count total number of arguments and allocate the corresponding
1322        space */
1323     total_args = 0;
1324     for(op = 0; op < NB_OPS; op++) {
1325         def = &tcg_op_defs[op];
1326         n = def->nb_iargs + def->nb_oargs;
1327         total_args += n;
1328     }
1329 
1330     args_ct = g_new0(TCGArgConstraint, total_args);
1331 
1332     for(op = 0; op < NB_OPS; op++) {
1333         def = &tcg_op_defs[op];
1334         def->args_ct = args_ct;
1335         n = def->nb_iargs + def->nb_oargs;
1336         args_ct += n;
1337     }
1338 
1339     init_call_layout(&info_helper_ld32_mmu);
1340     init_call_layout(&info_helper_ld64_mmu);
1341     init_call_layout(&info_helper_ld128_mmu);
1342     init_call_layout(&info_helper_st32_mmu);
1343     init_call_layout(&info_helper_st64_mmu);
1344     init_call_layout(&info_helper_st128_mmu);
1345 
1346     tcg_target_init(s);
1347     process_op_defs(s);
1348 
1349     /* Reverse the order of the saved registers, assuming they're all at
1350        the start of tcg_target_reg_alloc_order.  */
1351     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1352         int r = tcg_target_reg_alloc_order[n];
1353         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1354             break;
1355         }
1356     }
1357     for (i = 0; i < n; ++i) {
1358         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1359     }
1360     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1361         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1362     }
1363 
1364     alloc_tcg_plugin_context(s);
1365 
1366     tcg_ctx = s;
1367     /*
1368      * In user-mode we simply share the init context among threads, since we
1369      * use a single region. See the documentation tcg_region_init() for the
1370      * reasoning behind this.
1371      * In system-mode we will have at most max_cpus TCG threads.
1372      */
1373 #ifdef CONFIG_USER_ONLY
1374     tcg_ctxs = &tcg_ctx;
1375     tcg_cur_ctxs = 1;
1376     tcg_max_ctxs = 1;
1377 #else
1378     tcg_max_ctxs = max_cpus;
1379     tcg_ctxs = g_new0(TCGContext *, max_cpus);
1380 #endif
1381 
1382     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1383     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1384     tcg_env = temp_tcgv_ptr(ts);
1385 }
1386 
1387 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1388 {
1389     tcg_context_init(max_cpus);
1390     tcg_region_init(tb_size, splitwx, max_cpus);
1391 }
1392 
1393 /*
1394  * Allocate TBs right before their corresponding translated code, making
1395  * sure that TBs and code are on different cache lines.
1396  */
1397 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1398 {
1399     uintptr_t align = qemu_icache_linesize;
1400     TranslationBlock *tb;
1401     void *next;
1402 
1403  retry:
1404     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1405     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1406 
1407     if (unlikely(next > s->code_gen_highwater)) {
1408         if (tcg_region_alloc(s)) {
1409             return NULL;
1410         }
1411         goto retry;
1412     }
1413     qatomic_set(&s->code_gen_ptr, next);
1414     s->data_gen_ptr = NULL;
1415     return tb;
1416 }
1417 
1418 void tcg_prologue_init(void)
1419 {
1420     TCGContext *s = tcg_ctx;
1421     size_t prologue_size;
1422 
1423     s->code_ptr = s->code_gen_ptr;
1424     s->code_buf = s->code_gen_ptr;
1425     s->data_gen_ptr = NULL;
1426 
1427 #ifndef CONFIG_TCG_INTERPRETER
1428     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1429 #endif
1430 
1431 #ifdef TCG_TARGET_NEED_POOL_LABELS
1432     s->pool_labels = NULL;
1433 #endif
1434 
1435     qemu_thread_jit_write();
1436     /* Generate the prologue.  */
1437     tcg_target_qemu_prologue(s);
1438 
1439 #ifdef TCG_TARGET_NEED_POOL_LABELS
1440     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1441     {
1442         int result = tcg_out_pool_finalize(s);
1443         tcg_debug_assert(result == 0);
1444     }
1445 #endif
1446 
1447     prologue_size = tcg_current_code_size(s);
1448     perf_report_prologue(s->code_gen_ptr, prologue_size);
1449 
1450 #ifndef CONFIG_TCG_INTERPRETER
1451     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1452                         (uintptr_t)s->code_buf, prologue_size);
1453 #endif
1454 
1455     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1456         FILE *logfile = qemu_log_trylock();
1457         if (logfile) {
1458             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1459             if (s->data_gen_ptr) {
1460                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1461                 size_t data_size = prologue_size - code_size;
1462                 size_t i;
1463 
1464                 disas(logfile, s->code_gen_ptr, code_size);
1465 
1466                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1467                     if (sizeof(tcg_target_ulong) == 8) {
1468                         fprintf(logfile,
1469                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1470                                 (uintptr_t)s->data_gen_ptr + i,
1471                                 *(uint64_t *)(s->data_gen_ptr + i));
1472                     } else {
1473                         fprintf(logfile,
1474                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1475                                 (uintptr_t)s->data_gen_ptr + i,
1476                                 *(uint32_t *)(s->data_gen_ptr + i));
1477                     }
1478                 }
1479             } else {
1480                 disas(logfile, s->code_gen_ptr, prologue_size);
1481             }
1482             fprintf(logfile, "\n");
1483             qemu_log_unlock(logfile);
1484         }
1485     }
1486 
1487 #ifndef CONFIG_TCG_INTERPRETER
1488     /*
1489      * Assert that goto_ptr is implemented completely, setting an epilogue.
1490      * For tci, we use NULL as the signal to return from the interpreter,
1491      * so skip this check.
1492      */
1493     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1494 #endif
1495 
1496     tcg_region_prologue_set(s);
1497 }
1498 
1499 void tcg_func_start(TCGContext *s)
1500 {
1501     tcg_pool_reset(s);
1502     s->nb_temps = s->nb_globals;
1503 
1504     /* No temps have been previously allocated for size or locality.  */
1505     memset(s->free_temps, 0, sizeof(s->free_temps));
1506 
1507     /* No constant temps have been previously allocated. */
1508     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1509         if (s->const_table[i]) {
1510             g_hash_table_remove_all(s->const_table[i]);
1511         }
1512     }
1513 
1514     s->nb_ops = 0;
1515     s->nb_labels = 0;
1516     s->current_frame_offset = s->frame_start;
1517 
1518 #ifdef CONFIG_DEBUG_TCG
1519     s->goto_tb_issue_mask = 0;
1520 #endif
1521 
1522     QTAILQ_INIT(&s->ops);
1523     QTAILQ_INIT(&s->free_ops);
1524     s->emit_before_op = NULL;
1525     QSIMPLEQ_INIT(&s->labels);
1526 
1527     tcg_debug_assert(s->addr_type == TCG_TYPE_I32 ||
1528                      s->addr_type == TCG_TYPE_I64);
1529 
1530     tcg_debug_assert(s->insn_start_words > 0);
1531 }
1532 
1533 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1534 {
1535     int n = s->nb_temps++;
1536 
1537     if (n >= TCG_MAX_TEMPS) {
1538         tcg_raise_tb_overflow(s);
1539     }
1540     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1541 }
1542 
1543 static TCGTemp *tcg_global_alloc(TCGContext *s)
1544 {
1545     TCGTemp *ts;
1546 
1547     tcg_debug_assert(s->nb_globals == s->nb_temps);
1548     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1549     s->nb_globals++;
1550     ts = tcg_temp_alloc(s);
1551     ts->kind = TEMP_GLOBAL;
1552 
1553     return ts;
1554 }
1555 
1556 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1557                                             TCGReg reg, const char *name)
1558 {
1559     TCGTemp *ts;
1560 
1561     tcg_debug_assert(TCG_TARGET_REG_BITS == 64 || type == TCG_TYPE_I32);
1562 
1563     ts = tcg_global_alloc(s);
1564     ts->base_type = type;
1565     ts->type = type;
1566     ts->kind = TEMP_FIXED;
1567     ts->reg = reg;
1568     ts->name = name;
1569     tcg_regset_set_reg(s->reserved_regs, reg);
1570 
1571     return ts;
1572 }
1573 
1574 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1575 {
1576     s->frame_start = start;
1577     s->frame_end = start + size;
1578     s->frame_temp
1579         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1580 }
1581 
1582 static TCGTemp *tcg_global_mem_new_internal(TCGv_ptr base, intptr_t offset,
1583                                             const char *name, TCGType type)
1584 {
1585     TCGContext *s = tcg_ctx;
1586     TCGTemp *base_ts = tcgv_ptr_temp(base);
1587     TCGTemp *ts = tcg_global_alloc(s);
1588     int indirect_reg = 0;
1589 
1590     switch (base_ts->kind) {
1591     case TEMP_FIXED:
1592         break;
1593     case TEMP_GLOBAL:
1594         /* We do not support double-indirect registers.  */
1595         tcg_debug_assert(!base_ts->indirect_reg);
1596         base_ts->indirect_base = 1;
1597         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1598                             ? 2 : 1);
1599         indirect_reg = 1;
1600         break;
1601     default:
1602         g_assert_not_reached();
1603     }
1604 
1605     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1606         TCGTemp *ts2 = tcg_global_alloc(s);
1607         char buf[64];
1608 
1609         ts->base_type = TCG_TYPE_I64;
1610         ts->type = TCG_TYPE_I32;
1611         ts->indirect_reg = indirect_reg;
1612         ts->mem_allocated = 1;
1613         ts->mem_base = base_ts;
1614         ts->mem_offset = offset;
1615         pstrcpy(buf, sizeof(buf), name);
1616         pstrcat(buf, sizeof(buf), "_0");
1617         ts->name = strdup(buf);
1618 
1619         tcg_debug_assert(ts2 == ts + 1);
1620         ts2->base_type = TCG_TYPE_I64;
1621         ts2->type = TCG_TYPE_I32;
1622         ts2->indirect_reg = indirect_reg;
1623         ts2->mem_allocated = 1;
1624         ts2->mem_base = base_ts;
1625         ts2->mem_offset = offset + 4;
1626         ts2->temp_subindex = 1;
1627         pstrcpy(buf, sizeof(buf), name);
1628         pstrcat(buf, sizeof(buf), "_1");
1629         ts2->name = strdup(buf);
1630     } else {
1631         ts->base_type = type;
1632         ts->type = type;
1633         ts->indirect_reg = indirect_reg;
1634         ts->mem_allocated = 1;
1635         ts->mem_base = base_ts;
1636         ts->mem_offset = offset;
1637         ts->name = name;
1638     }
1639     return ts;
1640 }
1641 
1642 TCGv_i32 tcg_global_mem_new_i32(TCGv_ptr reg, intptr_t off, const char *name)
1643 {
1644     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I32);
1645     return temp_tcgv_i32(ts);
1646 }
1647 
1648 TCGv_i64 tcg_global_mem_new_i64(TCGv_ptr reg, intptr_t off, const char *name)
1649 {
1650     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_I64);
1651     return temp_tcgv_i64(ts);
1652 }
1653 
1654 TCGv_ptr tcg_global_mem_new_ptr(TCGv_ptr reg, intptr_t off, const char *name)
1655 {
1656     TCGTemp *ts = tcg_global_mem_new_internal(reg, off, name, TCG_TYPE_PTR);
1657     return temp_tcgv_ptr(ts);
1658 }
1659 
1660 TCGTemp *tcg_temp_new_internal(TCGType type, TCGTempKind kind)
1661 {
1662     TCGContext *s = tcg_ctx;
1663     TCGTemp *ts;
1664     int n;
1665 
1666     if (kind == TEMP_EBB) {
1667         int idx = find_first_bit(s->free_temps[type].l, TCG_MAX_TEMPS);
1668 
1669         if (idx < TCG_MAX_TEMPS) {
1670             /* There is already an available temp with the right type.  */
1671             clear_bit(idx, s->free_temps[type].l);
1672 
1673             ts = &s->temps[idx];
1674             ts->temp_allocated = 1;
1675             tcg_debug_assert(ts->base_type == type);
1676             tcg_debug_assert(ts->kind == kind);
1677             return ts;
1678         }
1679     } else {
1680         tcg_debug_assert(kind == TEMP_TB);
1681     }
1682 
1683     switch (type) {
1684     case TCG_TYPE_I32:
1685     case TCG_TYPE_V64:
1686     case TCG_TYPE_V128:
1687     case TCG_TYPE_V256:
1688         n = 1;
1689         break;
1690     case TCG_TYPE_I64:
1691         n = 64 / TCG_TARGET_REG_BITS;
1692         break;
1693     case TCG_TYPE_I128:
1694         n = 128 / TCG_TARGET_REG_BITS;
1695         break;
1696     default:
1697         g_assert_not_reached();
1698     }
1699 
1700     ts = tcg_temp_alloc(s);
1701     ts->base_type = type;
1702     ts->temp_allocated = 1;
1703     ts->kind = kind;
1704 
1705     if (n == 1) {
1706         ts->type = type;
1707     } else {
1708         ts->type = TCG_TYPE_REG;
1709 
1710         for (int i = 1; i < n; ++i) {
1711             TCGTemp *ts2 = tcg_temp_alloc(s);
1712 
1713             tcg_debug_assert(ts2 == ts + i);
1714             ts2->base_type = type;
1715             ts2->type = TCG_TYPE_REG;
1716             ts2->temp_allocated = 1;
1717             ts2->temp_subindex = i;
1718             ts2->kind = kind;
1719         }
1720     }
1721     return ts;
1722 }
1723 
1724 TCGv_i32 tcg_temp_new_i32(void)
1725 {
1726     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_TB));
1727 }
1728 
1729 TCGv_i32 tcg_temp_ebb_new_i32(void)
1730 {
1731     return temp_tcgv_i32(tcg_temp_new_internal(TCG_TYPE_I32, TEMP_EBB));
1732 }
1733 
1734 TCGv_i64 tcg_temp_new_i64(void)
1735 {
1736     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_TB));
1737 }
1738 
1739 TCGv_i64 tcg_temp_ebb_new_i64(void)
1740 {
1741     return temp_tcgv_i64(tcg_temp_new_internal(TCG_TYPE_I64, TEMP_EBB));
1742 }
1743 
1744 TCGv_ptr tcg_temp_new_ptr(void)
1745 {
1746     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_TB));
1747 }
1748 
1749 TCGv_ptr tcg_temp_ebb_new_ptr(void)
1750 {
1751     return temp_tcgv_ptr(tcg_temp_new_internal(TCG_TYPE_PTR, TEMP_EBB));
1752 }
1753 
1754 TCGv_i128 tcg_temp_new_i128(void)
1755 {
1756     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_TB));
1757 }
1758 
1759 TCGv_i128 tcg_temp_ebb_new_i128(void)
1760 {
1761     return temp_tcgv_i128(tcg_temp_new_internal(TCG_TYPE_I128, TEMP_EBB));
1762 }
1763 
1764 TCGv_vec tcg_temp_new_vec(TCGType type)
1765 {
1766     TCGTemp *t;
1767 
1768 #ifdef CONFIG_DEBUG_TCG
1769     switch (type) {
1770     case TCG_TYPE_V64:
1771         assert(TCG_TARGET_HAS_v64);
1772         break;
1773     case TCG_TYPE_V128:
1774         assert(TCG_TARGET_HAS_v128);
1775         break;
1776     case TCG_TYPE_V256:
1777         assert(TCG_TARGET_HAS_v256);
1778         break;
1779     default:
1780         g_assert_not_reached();
1781     }
1782 #endif
1783 
1784     t = tcg_temp_new_internal(type, TEMP_EBB);
1785     return temp_tcgv_vec(t);
1786 }
1787 
1788 /* Create a new temp of the same type as an existing temp.  */
1789 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1790 {
1791     TCGTemp *t = tcgv_vec_temp(match);
1792 
1793     tcg_debug_assert(t->temp_allocated != 0);
1794 
1795     t = tcg_temp_new_internal(t->base_type, TEMP_EBB);
1796     return temp_tcgv_vec(t);
1797 }
1798 
1799 void tcg_temp_free_internal(TCGTemp *ts)
1800 {
1801     TCGContext *s = tcg_ctx;
1802 
1803     switch (ts->kind) {
1804     case TEMP_CONST:
1805     case TEMP_TB:
1806         /* Silently ignore free. */
1807         break;
1808     case TEMP_EBB:
1809         tcg_debug_assert(ts->temp_allocated != 0);
1810         ts->temp_allocated = 0;
1811         set_bit(temp_idx(ts), s->free_temps[ts->base_type].l);
1812         break;
1813     default:
1814         /* It never made sense to free TEMP_FIXED or TEMP_GLOBAL. */
1815         g_assert_not_reached();
1816     }
1817 }
1818 
1819 void tcg_temp_free_i32(TCGv_i32 arg)
1820 {
1821     tcg_temp_free_internal(tcgv_i32_temp(arg));
1822 }
1823 
1824 void tcg_temp_free_i64(TCGv_i64 arg)
1825 {
1826     tcg_temp_free_internal(tcgv_i64_temp(arg));
1827 }
1828 
1829 void tcg_temp_free_i128(TCGv_i128 arg)
1830 {
1831     tcg_temp_free_internal(tcgv_i128_temp(arg));
1832 }
1833 
1834 void tcg_temp_free_ptr(TCGv_ptr arg)
1835 {
1836     tcg_temp_free_internal(tcgv_ptr_temp(arg));
1837 }
1838 
1839 void tcg_temp_free_vec(TCGv_vec arg)
1840 {
1841     tcg_temp_free_internal(tcgv_vec_temp(arg));
1842 }
1843 
1844 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1845 {
1846     TCGContext *s = tcg_ctx;
1847     GHashTable *h = s->const_table[type];
1848     TCGTemp *ts;
1849 
1850     if (h == NULL) {
1851         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1852         s->const_table[type] = h;
1853     }
1854 
1855     ts = g_hash_table_lookup(h, &val);
1856     if (ts == NULL) {
1857         int64_t *val_ptr;
1858 
1859         ts = tcg_temp_alloc(s);
1860 
1861         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1862             TCGTemp *ts2 = tcg_temp_alloc(s);
1863 
1864             tcg_debug_assert(ts2 == ts + 1);
1865 
1866             ts->base_type = TCG_TYPE_I64;
1867             ts->type = TCG_TYPE_I32;
1868             ts->kind = TEMP_CONST;
1869             ts->temp_allocated = 1;
1870 
1871             ts2->base_type = TCG_TYPE_I64;
1872             ts2->type = TCG_TYPE_I32;
1873             ts2->kind = TEMP_CONST;
1874             ts2->temp_allocated = 1;
1875             ts2->temp_subindex = 1;
1876 
1877             /*
1878              * Retain the full value of the 64-bit constant in the low
1879              * part, so that the hash table works.  Actual uses will
1880              * truncate the value to the low part.
1881              */
1882             ts[HOST_BIG_ENDIAN].val = val;
1883             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1884             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1885         } else {
1886             ts->base_type = type;
1887             ts->type = type;
1888             ts->kind = TEMP_CONST;
1889             ts->temp_allocated = 1;
1890             ts->val = val;
1891             val_ptr = &ts->val;
1892         }
1893         g_hash_table_insert(h, val_ptr, ts);
1894     }
1895 
1896     return ts;
1897 }
1898 
1899 TCGv_i32 tcg_constant_i32(int32_t val)
1900 {
1901     return temp_tcgv_i32(tcg_constant_internal(TCG_TYPE_I32, val));
1902 }
1903 
1904 TCGv_i64 tcg_constant_i64(int64_t val)
1905 {
1906     return temp_tcgv_i64(tcg_constant_internal(TCG_TYPE_I64, val));
1907 }
1908 
1909 TCGv_ptr tcg_constant_ptr_int(intptr_t val)
1910 {
1911     return temp_tcgv_ptr(tcg_constant_internal(TCG_TYPE_PTR, val));
1912 }
1913 
1914 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1915 {
1916     val = dup_const(vece, val);
1917     return temp_tcgv_vec(tcg_constant_internal(type, val));
1918 }
1919 
1920 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1921 {
1922     TCGTemp *t = tcgv_vec_temp(match);
1923 
1924     tcg_debug_assert(t->temp_allocated != 0);
1925     return tcg_constant_vec(t->base_type, vece, val);
1926 }
1927 
1928 #ifdef CONFIG_DEBUG_TCG
1929 size_t temp_idx(TCGTemp *ts)
1930 {
1931     ptrdiff_t n = ts - tcg_ctx->temps;
1932     assert(n >= 0 && n < tcg_ctx->nb_temps);
1933     return n;
1934 }
1935 
1936 TCGTemp *tcgv_i32_temp(TCGv_i32 v)
1937 {
1938     uintptr_t o = (uintptr_t)v - offsetof(TCGContext, temps);
1939 
1940     assert(o < sizeof(TCGTemp) * tcg_ctx->nb_temps);
1941     assert(o % sizeof(TCGTemp) == 0);
1942 
1943     return (void *)tcg_ctx + (uintptr_t)v;
1944 }
1945 #endif /* CONFIG_DEBUG_TCG */
1946 
1947 /* Return true if OP may appear in the opcode stream.
1948    Test the runtime variable that controls each opcode.  */
1949 bool tcg_op_supported(TCGOpcode op)
1950 {
1951     const bool have_vec
1952         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1953 
1954     switch (op) {
1955     case INDEX_op_discard:
1956     case INDEX_op_set_label:
1957     case INDEX_op_call:
1958     case INDEX_op_br:
1959     case INDEX_op_mb:
1960     case INDEX_op_insn_start:
1961     case INDEX_op_exit_tb:
1962     case INDEX_op_goto_tb:
1963     case INDEX_op_goto_ptr:
1964     case INDEX_op_qemu_ld_a32_i32:
1965     case INDEX_op_qemu_ld_a64_i32:
1966     case INDEX_op_qemu_st_a32_i32:
1967     case INDEX_op_qemu_st_a64_i32:
1968     case INDEX_op_qemu_ld_a32_i64:
1969     case INDEX_op_qemu_ld_a64_i64:
1970     case INDEX_op_qemu_st_a32_i64:
1971     case INDEX_op_qemu_st_a64_i64:
1972         return true;
1973 
1974     case INDEX_op_qemu_st8_a32_i32:
1975     case INDEX_op_qemu_st8_a64_i32:
1976         return TCG_TARGET_HAS_qemu_st8_i32;
1977 
1978     case INDEX_op_qemu_ld_a32_i128:
1979     case INDEX_op_qemu_ld_a64_i128:
1980     case INDEX_op_qemu_st_a32_i128:
1981     case INDEX_op_qemu_st_a64_i128:
1982         return TCG_TARGET_HAS_qemu_ldst_i128;
1983 
1984     case INDEX_op_mov_i32:
1985     case INDEX_op_setcond_i32:
1986     case INDEX_op_brcond_i32:
1987     case INDEX_op_movcond_i32:
1988     case INDEX_op_ld8u_i32:
1989     case INDEX_op_ld8s_i32:
1990     case INDEX_op_ld16u_i32:
1991     case INDEX_op_ld16s_i32:
1992     case INDEX_op_ld_i32:
1993     case INDEX_op_st8_i32:
1994     case INDEX_op_st16_i32:
1995     case INDEX_op_st_i32:
1996     case INDEX_op_add_i32:
1997     case INDEX_op_sub_i32:
1998     case INDEX_op_neg_i32:
1999     case INDEX_op_mul_i32:
2000     case INDEX_op_and_i32:
2001     case INDEX_op_or_i32:
2002     case INDEX_op_xor_i32:
2003     case INDEX_op_shl_i32:
2004     case INDEX_op_shr_i32:
2005     case INDEX_op_sar_i32:
2006         return true;
2007 
2008     case INDEX_op_negsetcond_i32:
2009         return TCG_TARGET_HAS_negsetcond_i32;
2010     case INDEX_op_div_i32:
2011     case INDEX_op_divu_i32:
2012         return TCG_TARGET_HAS_div_i32;
2013     case INDEX_op_rem_i32:
2014     case INDEX_op_remu_i32:
2015         return TCG_TARGET_HAS_rem_i32;
2016     case INDEX_op_div2_i32:
2017     case INDEX_op_divu2_i32:
2018         return TCG_TARGET_HAS_div2_i32;
2019     case INDEX_op_rotl_i32:
2020     case INDEX_op_rotr_i32:
2021         return TCG_TARGET_HAS_rot_i32;
2022     case INDEX_op_deposit_i32:
2023         return TCG_TARGET_HAS_deposit_i32;
2024     case INDEX_op_extract_i32:
2025         return TCG_TARGET_HAS_extract_i32;
2026     case INDEX_op_sextract_i32:
2027         return TCG_TARGET_HAS_sextract_i32;
2028     case INDEX_op_extract2_i32:
2029         return TCG_TARGET_HAS_extract2_i32;
2030     case INDEX_op_add2_i32:
2031         return TCG_TARGET_HAS_add2_i32;
2032     case INDEX_op_sub2_i32:
2033         return TCG_TARGET_HAS_sub2_i32;
2034     case INDEX_op_mulu2_i32:
2035         return TCG_TARGET_HAS_mulu2_i32;
2036     case INDEX_op_muls2_i32:
2037         return TCG_TARGET_HAS_muls2_i32;
2038     case INDEX_op_muluh_i32:
2039         return TCG_TARGET_HAS_muluh_i32;
2040     case INDEX_op_mulsh_i32:
2041         return TCG_TARGET_HAS_mulsh_i32;
2042     case INDEX_op_ext8s_i32:
2043         return TCG_TARGET_HAS_ext8s_i32;
2044     case INDEX_op_ext16s_i32:
2045         return TCG_TARGET_HAS_ext16s_i32;
2046     case INDEX_op_ext8u_i32:
2047         return TCG_TARGET_HAS_ext8u_i32;
2048     case INDEX_op_ext16u_i32:
2049         return TCG_TARGET_HAS_ext16u_i32;
2050     case INDEX_op_bswap16_i32:
2051         return TCG_TARGET_HAS_bswap16_i32;
2052     case INDEX_op_bswap32_i32:
2053         return TCG_TARGET_HAS_bswap32_i32;
2054     case INDEX_op_not_i32:
2055         return TCG_TARGET_HAS_not_i32;
2056     case INDEX_op_andc_i32:
2057         return TCG_TARGET_HAS_andc_i32;
2058     case INDEX_op_orc_i32:
2059         return TCG_TARGET_HAS_orc_i32;
2060     case INDEX_op_eqv_i32:
2061         return TCG_TARGET_HAS_eqv_i32;
2062     case INDEX_op_nand_i32:
2063         return TCG_TARGET_HAS_nand_i32;
2064     case INDEX_op_nor_i32:
2065         return TCG_TARGET_HAS_nor_i32;
2066     case INDEX_op_clz_i32:
2067         return TCG_TARGET_HAS_clz_i32;
2068     case INDEX_op_ctz_i32:
2069         return TCG_TARGET_HAS_ctz_i32;
2070     case INDEX_op_ctpop_i32:
2071         return TCG_TARGET_HAS_ctpop_i32;
2072 
2073     case INDEX_op_brcond2_i32:
2074     case INDEX_op_setcond2_i32:
2075         return TCG_TARGET_REG_BITS == 32;
2076 
2077     case INDEX_op_mov_i64:
2078     case INDEX_op_setcond_i64:
2079     case INDEX_op_brcond_i64:
2080     case INDEX_op_movcond_i64:
2081     case INDEX_op_ld8u_i64:
2082     case INDEX_op_ld8s_i64:
2083     case INDEX_op_ld16u_i64:
2084     case INDEX_op_ld16s_i64:
2085     case INDEX_op_ld32u_i64:
2086     case INDEX_op_ld32s_i64:
2087     case INDEX_op_ld_i64:
2088     case INDEX_op_st8_i64:
2089     case INDEX_op_st16_i64:
2090     case INDEX_op_st32_i64:
2091     case INDEX_op_st_i64:
2092     case INDEX_op_add_i64:
2093     case INDEX_op_sub_i64:
2094     case INDEX_op_neg_i64:
2095     case INDEX_op_mul_i64:
2096     case INDEX_op_and_i64:
2097     case INDEX_op_or_i64:
2098     case INDEX_op_xor_i64:
2099     case INDEX_op_shl_i64:
2100     case INDEX_op_shr_i64:
2101     case INDEX_op_sar_i64:
2102     case INDEX_op_ext_i32_i64:
2103     case INDEX_op_extu_i32_i64:
2104         return TCG_TARGET_REG_BITS == 64;
2105 
2106     case INDEX_op_negsetcond_i64:
2107         return TCG_TARGET_HAS_negsetcond_i64;
2108     case INDEX_op_div_i64:
2109     case INDEX_op_divu_i64:
2110         return TCG_TARGET_HAS_div_i64;
2111     case INDEX_op_rem_i64:
2112     case INDEX_op_remu_i64:
2113         return TCG_TARGET_HAS_rem_i64;
2114     case INDEX_op_div2_i64:
2115     case INDEX_op_divu2_i64:
2116         return TCG_TARGET_HAS_div2_i64;
2117     case INDEX_op_rotl_i64:
2118     case INDEX_op_rotr_i64:
2119         return TCG_TARGET_HAS_rot_i64;
2120     case INDEX_op_deposit_i64:
2121         return TCG_TARGET_HAS_deposit_i64;
2122     case INDEX_op_extract_i64:
2123         return TCG_TARGET_HAS_extract_i64;
2124     case INDEX_op_sextract_i64:
2125         return TCG_TARGET_HAS_sextract_i64;
2126     case INDEX_op_extract2_i64:
2127         return TCG_TARGET_HAS_extract2_i64;
2128     case INDEX_op_extrl_i64_i32:
2129     case INDEX_op_extrh_i64_i32:
2130         return TCG_TARGET_HAS_extr_i64_i32;
2131     case INDEX_op_ext8s_i64:
2132         return TCG_TARGET_HAS_ext8s_i64;
2133     case INDEX_op_ext16s_i64:
2134         return TCG_TARGET_HAS_ext16s_i64;
2135     case INDEX_op_ext32s_i64:
2136         return TCG_TARGET_HAS_ext32s_i64;
2137     case INDEX_op_ext8u_i64:
2138         return TCG_TARGET_HAS_ext8u_i64;
2139     case INDEX_op_ext16u_i64:
2140         return TCG_TARGET_HAS_ext16u_i64;
2141     case INDEX_op_ext32u_i64:
2142         return TCG_TARGET_HAS_ext32u_i64;
2143     case INDEX_op_bswap16_i64:
2144         return TCG_TARGET_HAS_bswap16_i64;
2145     case INDEX_op_bswap32_i64:
2146         return TCG_TARGET_HAS_bswap32_i64;
2147     case INDEX_op_bswap64_i64:
2148         return TCG_TARGET_HAS_bswap64_i64;
2149     case INDEX_op_not_i64:
2150         return TCG_TARGET_HAS_not_i64;
2151     case INDEX_op_andc_i64:
2152         return TCG_TARGET_HAS_andc_i64;
2153     case INDEX_op_orc_i64:
2154         return TCG_TARGET_HAS_orc_i64;
2155     case INDEX_op_eqv_i64:
2156         return TCG_TARGET_HAS_eqv_i64;
2157     case INDEX_op_nand_i64:
2158         return TCG_TARGET_HAS_nand_i64;
2159     case INDEX_op_nor_i64:
2160         return TCG_TARGET_HAS_nor_i64;
2161     case INDEX_op_clz_i64:
2162         return TCG_TARGET_HAS_clz_i64;
2163     case INDEX_op_ctz_i64:
2164         return TCG_TARGET_HAS_ctz_i64;
2165     case INDEX_op_ctpop_i64:
2166         return TCG_TARGET_HAS_ctpop_i64;
2167     case INDEX_op_add2_i64:
2168         return TCG_TARGET_HAS_add2_i64;
2169     case INDEX_op_sub2_i64:
2170         return TCG_TARGET_HAS_sub2_i64;
2171     case INDEX_op_mulu2_i64:
2172         return TCG_TARGET_HAS_mulu2_i64;
2173     case INDEX_op_muls2_i64:
2174         return TCG_TARGET_HAS_muls2_i64;
2175     case INDEX_op_muluh_i64:
2176         return TCG_TARGET_HAS_muluh_i64;
2177     case INDEX_op_mulsh_i64:
2178         return TCG_TARGET_HAS_mulsh_i64;
2179 
2180     case INDEX_op_mov_vec:
2181     case INDEX_op_dup_vec:
2182     case INDEX_op_dupm_vec:
2183     case INDEX_op_ld_vec:
2184     case INDEX_op_st_vec:
2185     case INDEX_op_add_vec:
2186     case INDEX_op_sub_vec:
2187     case INDEX_op_and_vec:
2188     case INDEX_op_or_vec:
2189     case INDEX_op_xor_vec:
2190     case INDEX_op_cmp_vec:
2191         return have_vec;
2192     case INDEX_op_dup2_vec:
2193         return have_vec && TCG_TARGET_REG_BITS == 32;
2194     case INDEX_op_not_vec:
2195         return have_vec && TCG_TARGET_HAS_not_vec;
2196     case INDEX_op_neg_vec:
2197         return have_vec && TCG_TARGET_HAS_neg_vec;
2198     case INDEX_op_abs_vec:
2199         return have_vec && TCG_TARGET_HAS_abs_vec;
2200     case INDEX_op_andc_vec:
2201         return have_vec && TCG_TARGET_HAS_andc_vec;
2202     case INDEX_op_orc_vec:
2203         return have_vec && TCG_TARGET_HAS_orc_vec;
2204     case INDEX_op_nand_vec:
2205         return have_vec && TCG_TARGET_HAS_nand_vec;
2206     case INDEX_op_nor_vec:
2207         return have_vec && TCG_TARGET_HAS_nor_vec;
2208     case INDEX_op_eqv_vec:
2209         return have_vec && TCG_TARGET_HAS_eqv_vec;
2210     case INDEX_op_mul_vec:
2211         return have_vec && TCG_TARGET_HAS_mul_vec;
2212     case INDEX_op_shli_vec:
2213     case INDEX_op_shri_vec:
2214     case INDEX_op_sari_vec:
2215         return have_vec && TCG_TARGET_HAS_shi_vec;
2216     case INDEX_op_shls_vec:
2217     case INDEX_op_shrs_vec:
2218     case INDEX_op_sars_vec:
2219         return have_vec && TCG_TARGET_HAS_shs_vec;
2220     case INDEX_op_shlv_vec:
2221     case INDEX_op_shrv_vec:
2222     case INDEX_op_sarv_vec:
2223         return have_vec && TCG_TARGET_HAS_shv_vec;
2224     case INDEX_op_rotli_vec:
2225         return have_vec && TCG_TARGET_HAS_roti_vec;
2226     case INDEX_op_rotls_vec:
2227         return have_vec && TCG_TARGET_HAS_rots_vec;
2228     case INDEX_op_rotlv_vec:
2229     case INDEX_op_rotrv_vec:
2230         return have_vec && TCG_TARGET_HAS_rotv_vec;
2231     case INDEX_op_ssadd_vec:
2232     case INDEX_op_usadd_vec:
2233     case INDEX_op_sssub_vec:
2234     case INDEX_op_ussub_vec:
2235         return have_vec && TCG_TARGET_HAS_sat_vec;
2236     case INDEX_op_smin_vec:
2237     case INDEX_op_umin_vec:
2238     case INDEX_op_smax_vec:
2239     case INDEX_op_umax_vec:
2240         return have_vec && TCG_TARGET_HAS_minmax_vec;
2241     case INDEX_op_bitsel_vec:
2242         return have_vec && TCG_TARGET_HAS_bitsel_vec;
2243     case INDEX_op_cmpsel_vec:
2244         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
2245 
2246     default:
2247         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
2248         return true;
2249     }
2250 }
2251 
2252 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
2253 
2254 static void tcg_gen_callN(TCGHelperInfo *info, TCGTemp *ret, TCGTemp **args)
2255 {
2256     TCGv_i64 extend_free[MAX_CALL_IARGS];
2257     int n_extend = 0;
2258     TCGOp *op;
2259     int i, n, pi = 0, total_args;
2260 
2261     if (unlikely(g_once_init_enter(HELPER_INFO_INIT(info)))) {
2262         init_call_layout(info);
2263         g_once_init_leave(HELPER_INFO_INIT(info), HELPER_INFO_INIT_VAL(info));
2264     }
2265 
2266     total_args = info->nr_out + info->nr_in + 2;
2267     op = tcg_op_alloc(INDEX_op_call, total_args);
2268 
2269 #ifdef CONFIG_PLUGIN
2270     /* Flag helpers that may affect guest state */
2271     if (tcg_ctx->plugin_insn &&
2272         !(info->flags & TCG_CALL_PLUGIN) &&
2273         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
2274         tcg_ctx->plugin_insn->calls_helpers = true;
2275     }
2276 #endif
2277 
2278     TCGOP_CALLO(op) = n = info->nr_out;
2279     switch (n) {
2280     case 0:
2281         tcg_debug_assert(ret == NULL);
2282         break;
2283     case 1:
2284         tcg_debug_assert(ret != NULL);
2285         op->args[pi++] = temp_arg(ret);
2286         break;
2287     case 2:
2288     case 4:
2289         tcg_debug_assert(ret != NULL);
2290         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
2291         tcg_debug_assert(ret->temp_subindex == 0);
2292         for (i = 0; i < n; ++i) {
2293             op->args[pi++] = temp_arg(ret + i);
2294         }
2295         break;
2296     default:
2297         g_assert_not_reached();
2298     }
2299 
2300     TCGOP_CALLI(op) = n = info->nr_in;
2301     for (i = 0; i < n; i++) {
2302         const TCGCallArgumentLoc *loc = &info->in[i];
2303         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
2304 
2305         switch (loc->kind) {
2306         case TCG_CALL_ARG_NORMAL:
2307         case TCG_CALL_ARG_BY_REF:
2308         case TCG_CALL_ARG_BY_REF_N:
2309             op->args[pi++] = temp_arg(ts);
2310             break;
2311 
2312         case TCG_CALL_ARG_EXTEND_U:
2313         case TCG_CALL_ARG_EXTEND_S:
2314             {
2315                 TCGv_i64 temp = tcg_temp_ebb_new_i64();
2316                 TCGv_i32 orig = temp_tcgv_i32(ts);
2317 
2318                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
2319                     tcg_gen_ext_i32_i64(temp, orig);
2320                 } else {
2321                     tcg_gen_extu_i32_i64(temp, orig);
2322                 }
2323                 op->args[pi++] = tcgv_i64_arg(temp);
2324                 extend_free[n_extend++] = temp;
2325             }
2326             break;
2327 
2328         default:
2329             g_assert_not_reached();
2330         }
2331     }
2332     op->args[pi++] = (uintptr_t)info->func;
2333     op->args[pi++] = (uintptr_t)info;
2334     tcg_debug_assert(pi == total_args);
2335 
2336     if (tcg_ctx->emit_before_op) {
2337         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
2338     } else {
2339         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2340     }
2341 
2342     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
2343     for (i = 0; i < n_extend; ++i) {
2344         tcg_temp_free_i64(extend_free[i]);
2345     }
2346 }
2347 
2348 void tcg_gen_call0(TCGHelperInfo *info, TCGTemp *ret)
2349 {
2350     tcg_gen_callN(info, ret, NULL);
2351 }
2352 
2353 void tcg_gen_call1(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1)
2354 {
2355     tcg_gen_callN(info, ret, &t1);
2356 }
2357 
2358 void tcg_gen_call2(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2)
2359 {
2360     TCGTemp *args[2] = { t1, t2 };
2361     tcg_gen_callN(info, ret, args);
2362 }
2363 
2364 void tcg_gen_call3(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2365                    TCGTemp *t2, TCGTemp *t3)
2366 {
2367     TCGTemp *args[3] = { t1, t2, t3 };
2368     tcg_gen_callN(info, ret, args);
2369 }
2370 
2371 void tcg_gen_call4(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2372                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4)
2373 {
2374     TCGTemp *args[4] = { t1, t2, t3, t4 };
2375     tcg_gen_callN(info, ret, args);
2376 }
2377 
2378 void tcg_gen_call5(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2379                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4, TCGTemp *t5)
2380 {
2381     TCGTemp *args[5] = { t1, t2, t3, t4, t5 };
2382     tcg_gen_callN(info, ret, args);
2383 }
2384 
2385 void tcg_gen_call6(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1, TCGTemp *t2,
2386                    TCGTemp *t3, TCGTemp *t4, TCGTemp *t5, TCGTemp *t6)
2387 {
2388     TCGTemp *args[6] = { t1, t2, t3, t4, t5, t6 };
2389     tcg_gen_callN(info, ret, args);
2390 }
2391 
2392 void tcg_gen_call7(TCGHelperInfo *info, TCGTemp *ret, TCGTemp *t1,
2393                    TCGTemp *t2, TCGTemp *t3, TCGTemp *t4,
2394                    TCGTemp *t5, TCGTemp *t6, TCGTemp *t7)
2395 {
2396     TCGTemp *args[7] = { t1, t2, t3, t4, t5, t6, t7 };
2397     tcg_gen_callN(info, ret, args);
2398 }
2399 
2400 static void tcg_reg_alloc_start(TCGContext *s)
2401 {
2402     int i, n;
2403 
2404     for (i = 0, n = s->nb_temps; i < n; i++) {
2405         TCGTemp *ts = &s->temps[i];
2406         TCGTempVal val = TEMP_VAL_MEM;
2407 
2408         switch (ts->kind) {
2409         case TEMP_CONST:
2410             val = TEMP_VAL_CONST;
2411             break;
2412         case TEMP_FIXED:
2413             val = TEMP_VAL_REG;
2414             break;
2415         case TEMP_GLOBAL:
2416             break;
2417         case TEMP_EBB:
2418             val = TEMP_VAL_DEAD;
2419             /* fall through */
2420         case TEMP_TB:
2421             ts->mem_allocated = 0;
2422             break;
2423         default:
2424             g_assert_not_reached();
2425         }
2426         ts->val_type = val;
2427     }
2428 
2429     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2430 }
2431 
2432 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2433                                  TCGTemp *ts)
2434 {
2435     int idx = temp_idx(ts);
2436 
2437     switch (ts->kind) {
2438     case TEMP_FIXED:
2439     case TEMP_GLOBAL:
2440         pstrcpy(buf, buf_size, ts->name);
2441         break;
2442     case TEMP_TB:
2443         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2444         break;
2445     case TEMP_EBB:
2446         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2447         break;
2448     case TEMP_CONST:
2449         switch (ts->type) {
2450         case TCG_TYPE_I32:
2451             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2452             break;
2453 #if TCG_TARGET_REG_BITS > 32
2454         case TCG_TYPE_I64:
2455             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2456             break;
2457 #endif
2458         case TCG_TYPE_V64:
2459         case TCG_TYPE_V128:
2460         case TCG_TYPE_V256:
2461             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2462                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2463             break;
2464         default:
2465             g_assert_not_reached();
2466         }
2467         break;
2468     }
2469     return buf;
2470 }
2471 
2472 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2473                              int buf_size, TCGArg arg)
2474 {
2475     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2476 }
2477 
2478 static const char * const cond_name[] =
2479 {
2480     [TCG_COND_NEVER] = "never",
2481     [TCG_COND_ALWAYS] = "always",
2482     [TCG_COND_EQ] = "eq",
2483     [TCG_COND_NE] = "ne",
2484     [TCG_COND_LT] = "lt",
2485     [TCG_COND_GE] = "ge",
2486     [TCG_COND_LE] = "le",
2487     [TCG_COND_GT] = "gt",
2488     [TCG_COND_LTU] = "ltu",
2489     [TCG_COND_GEU] = "geu",
2490     [TCG_COND_LEU] = "leu",
2491     [TCG_COND_GTU] = "gtu",
2492     [TCG_COND_TSTEQ] = "tsteq",
2493     [TCG_COND_TSTNE] = "tstne",
2494 };
2495 
2496 static const char * const ldst_name[(MO_BSWAP | MO_SSIZE) + 1] =
2497 {
2498     [MO_UB]   = "ub",
2499     [MO_SB]   = "sb",
2500     [MO_LEUW] = "leuw",
2501     [MO_LESW] = "lesw",
2502     [MO_LEUL] = "leul",
2503     [MO_LESL] = "lesl",
2504     [MO_LEUQ] = "leq",
2505     [MO_BEUW] = "beuw",
2506     [MO_BESW] = "besw",
2507     [MO_BEUL] = "beul",
2508     [MO_BESL] = "besl",
2509     [MO_BEUQ] = "beq",
2510     [MO_128 + MO_BE] = "beo",
2511     [MO_128 + MO_LE] = "leo",
2512 };
2513 
2514 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2515     [MO_UNALN >> MO_ASHIFT]    = "un+",
2516     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2517     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2518     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2519     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2520     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2521     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2522     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2523 };
2524 
2525 static const char * const atom_name[(MO_ATOM_MASK >> MO_ATOM_SHIFT) + 1] = {
2526     [MO_ATOM_IFALIGN >> MO_ATOM_SHIFT] = "",
2527     [MO_ATOM_IFALIGN_PAIR >> MO_ATOM_SHIFT] = "pair+",
2528     [MO_ATOM_WITHIN16 >> MO_ATOM_SHIFT] = "w16+",
2529     [MO_ATOM_WITHIN16_PAIR >> MO_ATOM_SHIFT] = "w16p+",
2530     [MO_ATOM_SUBALIGN >> MO_ATOM_SHIFT] = "sub+",
2531     [MO_ATOM_NONE >> MO_ATOM_SHIFT] = "noat+",
2532 };
2533 
2534 static const char bswap_flag_name[][6] = {
2535     [TCG_BSWAP_IZ] = "iz",
2536     [TCG_BSWAP_OZ] = "oz",
2537     [TCG_BSWAP_OS] = "os",
2538     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2539     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2540 };
2541 
2542 static inline bool tcg_regset_single(TCGRegSet d)
2543 {
2544     return (d & (d - 1)) == 0;
2545 }
2546 
2547 static inline TCGReg tcg_regset_first(TCGRegSet d)
2548 {
2549     if (TCG_TARGET_NB_REGS <= 32) {
2550         return ctz32(d);
2551     } else {
2552         return ctz64(d);
2553     }
2554 }
2555 
2556 /* Return only the number of characters output -- no error return. */
2557 #define ne_fprintf(...) \
2558     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2559 
2560 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2561 {
2562     char buf[128];
2563     TCGOp *op;
2564 
2565     QTAILQ_FOREACH(op, &s->ops, link) {
2566         int i, k, nb_oargs, nb_iargs, nb_cargs;
2567         const TCGOpDef *def;
2568         TCGOpcode c;
2569         int col = 0;
2570 
2571         c = op->opc;
2572         def = &tcg_op_defs[c];
2573 
2574         if (c == INDEX_op_insn_start) {
2575             nb_oargs = 0;
2576             col += ne_fprintf(f, "\n ----");
2577 
2578             for (i = 0, k = s->insn_start_words; i < k; ++i) {
2579                 col += ne_fprintf(f, " %016" PRIx64,
2580                                   tcg_get_insn_start_param(op, i));
2581             }
2582         } else if (c == INDEX_op_call) {
2583             const TCGHelperInfo *info = tcg_call_info(op);
2584             void *func = tcg_call_func(op);
2585 
2586             /* variable number of arguments */
2587             nb_oargs = TCGOP_CALLO(op);
2588             nb_iargs = TCGOP_CALLI(op);
2589             nb_cargs = def->nb_cargs;
2590 
2591             col += ne_fprintf(f, " %s ", def->name);
2592 
2593             /*
2594              * Print the function name from TCGHelperInfo, if available.
2595              * Note that plugins have a template function for the info,
2596              * but the actual function pointer comes from the plugin.
2597              */
2598             if (func == info->func) {
2599                 col += ne_fprintf(f, "%s", info->name);
2600             } else {
2601                 col += ne_fprintf(f, "plugin(%p)", func);
2602             }
2603 
2604             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2605             for (i = 0; i < nb_oargs; i++) {
2606                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2607                                                             op->args[i]));
2608             }
2609             for (i = 0; i < nb_iargs; i++) {
2610                 TCGArg arg = op->args[nb_oargs + i];
2611                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2612                 col += ne_fprintf(f, ",%s", t);
2613             }
2614         } else {
2615             col += ne_fprintf(f, " %s ", def->name);
2616 
2617             nb_oargs = def->nb_oargs;
2618             nb_iargs = def->nb_iargs;
2619             nb_cargs = def->nb_cargs;
2620 
2621             if (def->flags & TCG_OPF_VECTOR) {
2622                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2623                                   8 << TCGOP_VECE(op));
2624             }
2625 
2626             k = 0;
2627             for (i = 0; i < nb_oargs; i++) {
2628                 const char *sep =  k ? "," : "";
2629                 col += ne_fprintf(f, "%s%s", sep,
2630                                   tcg_get_arg_str(s, buf, sizeof(buf),
2631                                                   op->args[k++]));
2632             }
2633             for (i = 0; i < nb_iargs; i++) {
2634                 const char *sep =  k ? "," : "";
2635                 col += ne_fprintf(f, "%s%s", sep,
2636                                   tcg_get_arg_str(s, buf, sizeof(buf),
2637                                                   op->args[k++]));
2638             }
2639             switch (c) {
2640             case INDEX_op_brcond_i32:
2641             case INDEX_op_setcond_i32:
2642             case INDEX_op_negsetcond_i32:
2643             case INDEX_op_movcond_i32:
2644             case INDEX_op_brcond2_i32:
2645             case INDEX_op_setcond2_i32:
2646             case INDEX_op_brcond_i64:
2647             case INDEX_op_setcond_i64:
2648             case INDEX_op_negsetcond_i64:
2649             case INDEX_op_movcond_i64:
2650             case INDEX_op_cmp_vec:
2651             case INDEX_op_cmpsel_vec:
2652                 if (op->args[k] < ARRAY_SIZE(cond_name)
2653                     && cond_name[op->args[k]]) {
2654                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2655                 } else {
2656                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2657                 }
2658                 i = 1;
2659                 break;
2660             case INDEX_op_qemu_ld_a32_i32:
2661             case INDEX_op_qemu_ld_a64_i32:
2662             case INDEX_op_qemu_st_a32_i32:
2663             case INDEX_op_qemu_st_a64_i32:
2664             case INDEX_op_qemu_st8_a32_i32:
2665             case INDEX_op_qemu_st8_a64_i32:
2666             case INDEX_op_qemu_ld_a32_i64:
2667             case INDEX_op_qemu_ld_a64_i64:
2668             case INDEX_op_qemu_st_a32_i64:
2669             case INDEX_op_qemu_st_a64_i64:
2670             case INDEX_op_qemu_ld_a32_i128:
2671             case INDEX_op_qemu_ld_a64_i128:
2672             case INDEX_op_qemu_st_a32_i128:
2673             case INDEX_op_qemu_st_a64_i128:
2674                 {
2675                     const char *s_al, *s_op, *s_at;
2676                     MemOpIdx oi = op->args[k++];
2677                     MemOp mop = get_memop(oi);
2678                     unsigned ix = get_mmuidx(oi);
2679 
2680                     s_al = alignment_name[(mop & MO_AMASK) >> MO_ASHIFT];
2681                     s_op = ldst_name[mop & (MO_BSWAP | MO_SSIZE)];
2682                     s_at = atom_name[(mop & MO_ATOM_MASK) >> MO_ATOM_SHIFT];
2683                     mop &= ~(MO_AMASK | MO_BSWAP | MO_SSIZE | MO_ATOM_MASK);
2684 
2685                     /* If all fields are accounted for, print symbolically. */
2686                     if (!mop && s_al && s_op && s_at) {
2687                         col += ne_fprintf(f, ",%s%s%s,%u",
2688                                           s_at, s_al, s_op, ix);
2689                     } else {
2690                         mop = get_memop(oi);
2691                         col += ne_fprintf(f, ",$0x%x,%u", mop, ix);
2692                     }
2693                     i = 1;
2694                 }
2695                 break;
2696             case INDEX_op_bswap16_i32:
2697             case INDEX_op_bswap16_i64:
2698             case INDEX_op_bswap32_i32:
2699             case INDEX_op_bswap32_i64:
2700             case INDEX_op_bswap64_i64:
2701                 {
2702                     TCGArg flags = op->args[k];
2703                     const char *name = NULL;
2704 
2705                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2706                         name = bswap_flag_name[flags];
2707                     }
2708                     if (name) {
2709                         col += ne_fprintf(f, ",%s", name);
2710                     } else {
2711                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2712                     }
2713                     i = k = 1;
2714                 }
2715                 break;
2716             default:
2717                 i = 0;
2718                 break;
2719             }
2720             switch (c) {
2721             case INDEX_op_set_label:
2722             case INDEX_op_br:
2723             case INDEX_op_brcond_i32:
2724             case INDEX_op_brcond_i64:
2725             case INDEX_op_brcond2_i32:
2726                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2727                                   arg_label(op->args[k])->id);
2728                 i++, k++;
2729                 break;
2730             case INDEX_op_mb:
2731                 {
2732                     TCGBar membar = op->args[k];
2733                     const char *b_op, *m_op;
2734 
2735                     switch (membar & TCG_BAR_SC) {
2736                     case 0:
2737                         b_op = "none";
2738                         break;
2739                     case TCG_BAR_LDAQ:
2740                         b_op = "acq";
2741                         break;
2742                     case TCG_BAR_STRL:
2743                         b_op = "rel";
2744                         break;
2745                     case TCG_BAR_SC:
2746                         b_op = "seq";
2747                         break;
2748                     default:
2749                         g_assert_not_reached();
2750                     }
2751 
2752                     switch (membar & TCG_MO_ALL) {
2753                     case 0:
2754                         m_op = "none";
2755                         break;
2756                     case TCG_MO_LD_LD:
2757                         m_op = "rr";
2758                         break;
2759                     case TCG_MO_LD_ST:
2760                         m_op = "rw";
2761                         break;
2762                     case TCG_MO_ST_LD:
2763                         m_op = "wr";
2764                         break;
2765                     case TCG_MO_ST_ST:
2766                         m_op = "ww";
2767                         break;
2768                     case TCG_MO_LD_LD | TCG_MO_LD_ST:
2769                         m_op = "rr+rw";
2770                         break;
2771                     case TCG_MO_LD_LD | TCG_MO_ST_LD:
2772                         m_op = "rr+wr";
2773                         break;
2774                     case TCG_MO_LD_LD | TCG_MO_ST_ST:
2775                         m_op = "rr+ww";
2776                         break;
2777                     case TCG_MO_LD_ST | TCG_MO_ST_LD:
2778                         m_op = "rw+wr";
2779                         break;
2780                     case TCG_MO_LD_ST | TCG_MO_ST_ST:
2781                         m_op = "rw+ww";
2782                         break;
2783                     case TCG_MO_ST_LD | TCG_MO_ST_ST:
2784                         m_op = "wr+ww";
2785                         break;
2786                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_LD:
2787                         m_op = "rr+rw+wr";
2788                         break;
2789                     case TCG_MO_LD_LD | TCG_MO_LD_ST | TCG_MO_ST_ST:
2790                         m_op = "rr+rw+ww";
2791                         break;
2792                     case TCG_MO_LD_LD | TCG_MO_ST_LD | TCG_MO_ST_ST:
2793                         m_op = "rr+wr+ww";
2794                         break;
2795                     case TCG_MO_LD_ST | TCG_MO_ST_LD | TCG_MO_ST_ST:
2796                         m_op = "rw+wr+ww";
2797                         break;
2798                     case TCG_MO_ALL:
2799                         m_op = "all";
2800                         break;
2801                     default:
2802                         g_assert_not_reached();
2803                     }
2804 
2805                     col += ne_fprintf(f, "%s%s:%s", (k ? "," : ""), b_op, m_op);
2806                     i++, k++;
2807                 }
2808                 break;
2809             default:
2810                 break;
2811             }
2812             for (; i < nb_cargs; i++, k++) {
2813                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2814                                   op->args[k]);
2815             }
2816         }
2817 
2818         if (have_prefs || op->life) {
2819             for (; col < 40; ++col) {
2820                 putc(' ', f);
2821             }
2822         }
2823 
2824         if (op->life) {
2825             unsigned life = op->life;
2826 
2827             if (life & (SYNC_ARG * 3)) {
2828                 ne_fprintf(f, "  sync:");
2829                 for (i = 0; i < 2; ++i) {
2830                     if (life & (SYNC_ARG << i)) {
2831                         ne_fprintf(f, " %d", i);
2832                     }
2833                 }
2834             }
2835             life /= DEAD_ARG;
2836             if (life) {
2837                 ne_fprintf(f, "  dead:");
2838                 for (i = 0; life; ++i, life >>= 1) {
2839                     if (life & 1) {
2840                         ne_fprintf(f, " %d", i);
2841                     }
2842                 }
2843             }
2844         }
2845 
2846         if (have_prefs) {
2847             for (i = 0; i < nb_oargs; ++i) {
2848                 TCGRegSet set = output_pref(op, i);
2849 
2850                 if (i == 0) {
2851                     ne_fprintf(f, "  pref=");
2852                 } else {
2853                     ne_fprintf(f, ",");
2854                 }
2855                 if (set == 0) {
2856                     ne_fprintf(f, "none");
2857                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2858                     ne_fprintf(f, "all");
2859 #ifdef CONFIG_DEBUG_TCG
2860                 } else if (tcg_regset_single(set)) {
2861                     TCGReg reg = tcg_regset_first(set);
2862                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2863 #endif
2864                 } else if (TCG_TARGET_NB_REGS <= 32) {
2865                     ne_fprintf(f, "0x%x", (uint32_t)set);
2866                 } else {
2867                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2868                 }
2869             }
2870         }
2871 
2872         putc('\n', f);
2873     }
2874 }
2875 
2876 /* we give more priority to constraints with less registers */
2877 static int get_constraint_priority(const TCGOpDef *def, int k)
2878 {
2879     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2880     int n = ctpop64(arg_ct->regs);
2881 
2882     /*
2883      * Sort constraints of a single register first, which includes output
2884      * aliases (which must exactly match the input already allocated).
2885      */
2886     if (n == 1 || arg_ct->oalias) {
2887         return INT_MAX;
2888     }
2889 
2890     /*
2891      * Sort register pairs next, first then second immediately after.
2892      * Arbitrarily sort multiple pairs by the index of the first reg;
2893      * there shouldn't be many pairs.
2894      */
2895     switch (arg_ct->pair) {
2896     case 1:
2897     case 3:
2898         return (k + 1) * 2;
2899     case 2:
2900         return (arg_ct->pair_index + 1) * 2 - 1;
2901     }
2902 
2903     /* Finally, sort by decreasing register count. */
2904     assert(n > 1);
2905     return -n;
2906 }
2907 
2908 /* sort from highest priority to lowest */
2909 static void sort_constraints(TCGOpDef *def, int start, int n)
2910 {
2911     int i, j;
2912     TCGArgConstraint *a = def->args_ct;
2913 
2914     for (i = 0; i < n; i++) {
2915         a[start + i].sort_index = start + i;
2916     }
2917     if (n <= 1) {
2918         return;
2919     }
2920     for (i = 0; i < n - 1; i++) {
2921         for (j = i + 1; j < n; j++) {
2922             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2923             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2924             if (p1 < p2) {
2925                 int tmp = a[start + i].sort_index;
2926                 a[start + i].sort_index = a[start + j].sort_index;
2927                 a[start + j].sort_index = tmp;
2928             }
2929         }
2930     }
2931 }
2932 
2933 static void process_op_defs(TCGContext *s)
2934 {
2935     TCGOpcode op;
2936 
2937     for (op = 0; op < NB_OPS; op++) {
2938         TCGOpDef *def = &tcg_op_defs[op];
2939         const TCGTargetOpDef *tdefs;
2940         bool saw_alias_pair = false;
2941         int i, o, i2, o2, nb_args;
2942 
2943         if (def->flags & TCG_OPF_NOT_PRESENT) {
2944             continue;
2945         }
2946 
2947         nb_args = def->nb_iargs + def->nb_oargs;
2948         if (nb_args == 0) {
2949             continue;
2950         }
2951 
2952         /*
2953          * Macro magic should make it impossible, but double-check that
2954          * the array index is in range.  Since the signness of an enum
2955          * is implementation defined, force the result to unsigned.
2956          */
2957         unsigned con_set = tcg_target_op_def(op);
2958         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2959         tdefs = &constraint_sets[con_set];
2960 
2961         for (i = 0; i < nb_args; i++) {
2962             const char *ct_str = tdefs->args_ct_str[i];
2963             bool input_p = i >= def->nb_oargs;
2964 
2965             /* Incomplete TCGTargetOpDef entry. */
2966             tcg_debug_assert(ct_str != NULL);
2967 
2968             switch (*ct_str) {
2969             case '0' ... '9':
2970                 o = *ct_str - '0';
2971                 tcg_debug_assert(input_p);
2972                 tcg_debug_assert(o < def->nb_oargs);
2973                 tcg_debug_assert(def->args_ct[o].regs != 0);
2974                 tcg_debug_assert(!def->args_ct[o].oalias);
2975                 def->args_ct[i] = def->args_ct[o];
2976                 /* The output sets oalias.  */
2977                 def->args_ct[o].oalias = 1;
2978                 def->args_ct[o].alias_index = i;
2979                 /* The input sets ialias. */
2980                 def->args_ct[i].ialias = 1;
2981                 def->args_ct[i].alias_index = o;
2982                 if (def->args_ct[i].pair) {
2983                     saw_alias_pair = true;
2984                 }
2985                 tcg_debug_assert(ct_str[1] == '\0');
2986                 continue;
2987 
2988             case '&':
2989                 tcg_debug_assert(!input_p);
2990                 def->args_ct[i].newreg = true;
2991                 ct_str++;
2992                 break;
2993 
2994             case 'p': /* plus */
2995                 /* Allocate to the register after the previous. */
2996                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2997                 o = i - 1;
2998                 tcg_debug_assert(!def->args_ct[o].pair);
2999                 tcg_debug_assert(!def->args_ct[o].ct);
3000                 def->args_ct[i] = (TCGArgConstraint){
3001                     .pair = 2,
3002                     .pair_index = o,
3003                     .regs = def->args_ct[o].regs << 1,
3004                     .newreg = def->args_ct[o].newreg,
3005                 };
3006                 def->args_ct[o].pair = 1;
3007                 def->args_ct[o].pair_index = i;
3008                 tcg_debug_assert(ct_str[1] == '\0');
3009                 continue;
3010 
3011             case 'm': /* minus */
3012                 /* Allocate to the register before the previous. */
3013                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
3014                 o = i - 1;
3015                 tcg_debug_assert(!def->args_ct[o].pair);
3016                 tcg_debug_assert(!def->args_ct[o].ct);
3017                 def->args_ct[i] = (TCGArgConstraint){
3018                     .pair = 1,
3019                     .pair_index = o,
3020                     .regs = def->args_ct[o].regs >> 1,
3021                     .newreg = def->args_ct[o].newreg,
3022                 };
3023                 def->args_ct[o].pair = 2;
3024                 def->args_ct[o].pair_index = i;
3025                 tcg_debug_assert(ct_str[1] == '\0');
3026                 continue;
3027             }
3028 
3029             do {
3030                 switch (*ct_str) {
3031                 case 'i':
3032                     def->args_ct[i].ct |= TCG_CT_CONST;
3033                     break;
3034 
3035                 /* Include all of the target-specific constraints. */
3036 
3037 #undef CONST
3038 #define CONST(CASE, MASK) \
3039     case CASE: def->args_ct[i].ct |= MASK; break;
3040 #define REGS(CASE, MASK) \
3041     case CASE: def->args_ct[i].regs |= MASK; break;
3042 
3043 #include "tcg-target-con-str.h"
3044 
3045 #undef REGS
3046 #undef CONST
3047                 default:
3048                 case '0' ... '9':
3049                 case '&':
3050                 case 'p':
3051                 case 'm':
3052                     /* Typo in TCGTargetOpDef constraint. */
3053                     g_assert_not_reached();
3054                 }
3055             } while (*++ct_str != '\0');
3056         }
3057 
3058         /* TCGTargetOpDef entry with too much information? */
3059         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
3060 
3061         /*
3062          * Fix up output pairs that are aliased with inputs.
3063          * When we created the alias, we copied pair from the output.
3064          * There are three cases:
3065          *    (1a) Pairs of inputs alias pairs of outputs.
3066          *    (1b) One input aliases the first of a pair of outputs.
3067          *    (2)  One input aliases the second of a pair of outputs.
3068          *
3069          * Case 1a is handled by making sure that the pair_index'es are
3070          * properly updated so that they appear the same as a pair of inputs.
3071          *
3072          * Case 1b is handled by setting the pair_index of the input to
3073          * itself, simply so it doesn't point to an unrelated argument.
3074          * Since we don't encounter the "second" during the input allocation
3075          * phase, nothing happens with the second half of the input pair.
3076          *
3077          * Case 2 is handled by setting the second input to pair=3, the
3078          * first output to pair=3, and the pair_index'es to match.
3079          */
3080         if (saw_alias_pair) {
3081             for (i = def->nb_oargs; i < nb_args; i++) {
3082                 /*
3083                  * Since [0-9pm] must be alone in the constraint string,
3084                  * the only way they can both be set is if the pair comes
3085                  * from the output alias.
3086                  */
3087                 if (!def->args_ct[i].ialias) {
3088                     continue;
3089                 }
3090                 switch (def->args_ct[i].pair) {
3091                 case 0:
3092                     break;
3093                 case 1:
3094                     o = def->args_ct[i].alias_index;
3095                     o2 = def->args_ct[o].pair_index;
3096                     tcg_debug_assert(def->args_ct[o].pair == 1);
3097                     tcg_debug_assert(def->args_ct[o2].pair == 2);
3098                     if (def->args_ct[o2].oalias) {
3099                         /* Case 1a */
3100                         i2 = def->args_ct[o2].alias_index;
3101                         tcg_debug_assert(def->args_ct[i2].pair == 2);
3102                         def->args_ct[i2].pair_index = i;
3103                         def->args_ct[i].pair_index = i2;
3104                     } else {
3105                         /* Case 1b */
3106                         def->args_ct[i].pair_index = i;
3107                     }
3108                     break;
3109                 case 2:
3110                     o = def->args_ct[i].alias_index;
3111                     o2 = def->args_ct[o].pair_index;
3112                     tcg_debug_assert(def->args_ct[o].pair == 2);
3113                     tcg_debug_assert(def->args_ct[o2].pair == 1);
3114                     if (def->args_ct[o2].oalias) {
3115                         /* Case 1a */
3116                         i2 = def->args_ct[o2].alias_index;
3117                         tcg_debug_assert(def->args_ct[i2].pair == 1);
3118                         def->args_ct[i2].pair_index = i;
3119                         def->args_ct[i].pair_index = i2;
3120                     } else {
3121                         /* Case 2 */
3122                         def->args_ct[i].pair = 3;
3123                         def->args_ct[o2].pair = 3;
3124                         def->args_ct[i].pair_index = o2;
3125                         def->args_ct[o2].pair_index = i;
3126                     }
3127                     break;
3128                 default:
3129                     g_assert_not_reached();
3130                 }
3131             }
3132         }
3133 
3134         /* sort the constraints (XXX: this is just an heuristic) */
3135         sort_constraints(def, 0, def->nb_oargs);
3136         sort_constraints(def, def->nb_oargs, def->nb_iargs);
3137     }
3138 }
3139 
3140 static void remove_label_use(TCGOp *op, int idx)
3141 {
3142     TCGLabel *label = arg_label(op->args[idx]);
3143     TCGLabelUse *use;
3144 
3145     QSIMPLEQ_FOREACH(use, &label->branches, next) {
3146         if (use->op == op) {
3147             QSIMPLEQ_REMOVE(&label->branches, use, TCGLabelUse, next);
3148             return;
3149         }
3150     }
3151     g_assert_not_reached();
3152 }
3153 
3154 void tcg_op_remove(TCGContext *s, TCGOp *op)
3155 {
3156     switch (op->opc) {
3157     case INDEX_op_br:
3158         remove_label_use(op, 0);
3159         break;
3160     case INDEX_op_brcond_i32:
3161     case INDEX_op_brcond_i64:
3162         remove_label_use(op, 3);
3163         break;
3164     case INDEX_op_brcond2_i32:
3165         remove_label_use(op, 5);
3166         break;
3167     default:
3168         break;
3169     }
3170 
3171     QTAILQ_REMOVE(&s->ops, op, link);
3172     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
3173     s->nb_ops--;
3174 }
3175 
3176 void tcg_remove_ops_after(TCGOp *op)
3177 {
3178     TCGContext *s = tcg_ctx;
3179 
3180     while (true) {
3181         TCGOp *last = tcg_last_op();
3182         if (last == op) {
3183             return;
3184         }
3185         tcg_op_remove(s, last);
3186     }
3187 }
3188 
3189 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
3190 {
3191     TCGContext *s = tcg_ctx;
3192     TCGOp *op = NULL;
3193 
3194     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
3195         QTAILQ_FOREACH(op, &s->free_ops, link) {
3196             if (nargs <= op->nargs) {
3197                 QTAILQ_REMOVE(&s->free_ops, op, link);
3198                 nargs = op->nargs;
3199                 goto found;
3200             }
3201         }
3202     }
3203 
3204     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
3205     nargs = MAX(4, nargs);
3206     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
3207 
3208  found:
3209     memset(op, 0, offsetof(TCGOp, link));
3210     op->opc = opc;
3211     op->nargs = nargs;
3212 
3213     /* Check for bitfield overflow. */
3214     tcg_debug_assert(op->nargs == nargs);
3215 
3216     s->nb_ops++;
3217     return op;
3218 }
3219 
3220 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
3221 {
3222     TCGOp *op = tcg_op_alloc(opc, nargs);
3223 
3224     if (tcg_ctx->emit_before_op) {
3225         QTAILQ_INSERT_BEFORE(tcg_ctx->emit_before_op, op, link);
3226     } else {
3227         QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
3228     }
3229     return op;
3230 }
3231 
3232 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
3233                             TCGOpcode opc, unsigned nargs)
3234 {
3235     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3236     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
3237     return new_op;
3238 }
3239 
3240 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
3241                            TCGOpcode opc, unsigned nargs)
3242 {
3243     TCGOp *new_op = tcg_op_alloc(opc, nargs);
3244     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
3245     return new_op;
3246 }
3247 
3248 static void move_label_uses(TCGLabel *to, TCGLabel *from)
3249 {
3250     TCGLabelUse *u;
3251 
3252     QSIMPLEQ_FOREACH(u, &from->branches, next) {
3253         TCGOp *op = u->op;
3254         switch (op->opc) {
3255         case INDEX_op_br:
3256             op->args[0] = label_arg(to);
3257             break;
3258         case INDEX_op_brcond_i32:
3259         case INDEX_op_brcond_i64:
3260             op->args[3] = label_arg(to);
3261             break;
3262         case INDEX_op_brcond2_i32:
3263             op->args[5] = label_arg(to);
3264             break;
3265         default:
3266             g_assert_not_reached();
3267         }
3268     }
3269 
3270     QSIMPLEQ_CONCAT(&to->branches, &from->branches);
3271 }
3272 
3273 /* Reachable analysis : remove unreachable code.  */
3274 static void __attribute__((noinline))
3275 reachable_code_pass(TCGContext *s)
3276 {
3277     TCGOp *op, *op_next, *op_prev;
3278     bool dead = false;
3279 
3280     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3281         bool remove = dead;
3282         TCGLabel *label;
3283 
3284         switch (op->opc) {
3285         case INDEX_op_set_label:
3286             label = arg_label(op->args[0]);
3287 
3288             /*
3289              * Note that the first op in the TB is always a load,
3290              * so there is always something before a label.
3291              */
3292             op_prev = QTAILQ_PREV(op, link);
3293 
3294             /*
3295              * If we find two sequential labels, move all branches to
3296              * reference the second label and remove the first label.
3297              * Do this before branch to next optimization, so that the
3298              * middle label is out of the way.
3299              */
3300             if (op_prev->opc == INDEX_op_set_label) {
3301                 move_label_uses(label, arg_label(op_prev->args[0]));
3302                 tcg_op_remove(s, op_prev);
3303                 op_prev = QTAILQ_PREV(op, link);
3304             }
3305 
3306             /*
3307              * Optimization can fold conditional branches to unconditional.
3308              * If we find a label which is preceded by an unconditional
3309              * branch to next, remove the branch.  We couldn't do this when
3310              * processing the branch because any dead code between the branch
3311              * and label had not yet been removed.
3312              */
3313             if (op_prev->opc == INDEX_op_br &&
3314                 label == arg_label(op_prev->args[0])) {
3315                 tcg_op_remove(s, op_prev);
3316                 /* Fall through means insns become live again.  */
3317                 dead = false;
3318             }
3319 
3320             if (QSIMPLEQ_EMPTY(&label->branches)) {
3321                 /*
3322                  * While there is an occasional backward branch, virtually
3323                  * all branches generated by the translators are forward.
3324                  * Which means that generally we will have already removed
3325                  * all references to the label that will be, and there is
3326                  * little to be gained by iterating.
3327                  */
3328                 remove = true;
3329             } else {
3330                 /* Once we see a label, insns become live again.  */
3331                 dead = false;
3332                 remove = false;
3333             }
3334             break;
3335 
3336         case INDEX_op_br:
3337         case INDEX_op_exit_tb:
3338         case INDEX_op_goto_ptr:
3339             /* Unconditional branches; everything following is dead.  */
3340             dead = true;
3341             break;
3342 
3343         case INDEX_op_call:
3344             /* Notice noreturn helper calls, raising exceptions.  */
3345             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
3346                 dead = true;
3347             }
3348             break;
3349 
3350         case INDEX_op_insn_start:
3351             /* Never remove -- we need to keep these for unwind.  */
3352             remove = false;
3353             break;
3354 
3355         default:
3356             break;
3357         }
3358 
3359         if (remove) {
3360             tcg_op_remove(s, op);
3361         }
3362     }
3363 }
3364 
3365 #define TS_DEAD  1
3366 #define TS_MEM   2
3367 
3368 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
3369 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
3370 
3371 /* For liveness_pass_1, the register preferences for a given temp.  */
3372 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
3373 {
3374     return ts->state_ptr;
3375 }
3376 
3377 /* For liveness_pass_1, reset the preferences for a given temp to the
3378  * maximal regset for its type.
3379  */
3380 static inline void la_reset_pref(TCGTemp *ts)
3381 {
3382     *la_temp_pref(ts)
3383         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
3384 }
3385 
3386 /* liveness analysis: end of function: all temps are dead, and globals
3387    should be in memory. */
3388 static void la_func_end(TCGContext *s, int ng, int nt)
3389 {
3390     int i;
3391 
3392     for (i = 0; i < ng; ++i) {
3393         s->temps[i].state = TS_DEAD | TS_MEM;
3394         la_reset_pref(&s->temps[i]);
3395     }
3396     for (i = ng; i < nt; ++i) {
3397         s->temps[i].state = TS_DEAD;
3398         la_reset_pref(&s->temps[i]);
3399     }
3400 }
3401 
3402 /* liveness analysis: end of basic block: all temps are dead, globals
3403    and local temps should be in memory. */
3404 static void la_bb_end(TCGContext *s, int ng, int nt)
3405 {
3406     int i;
3407 
3408     for (i = 0; i < nt; ++i) {
3409         TCGTemp *ts = &s->temps[i];
3410         int state;
3411 
3412         switch (ts->kind) {
3413         case TEMP_FIXED:
3414         case TEMP_GLOBAL:
3415         case TEMP_TB:
3416             state = TS_DEAD | TS_MEM;
3417             break;
3418         case TEMP_EBB:
3419         case TEMP_CONST:
3420             state = TS_DEAD;
3421             break;
3422         default:
3423             g_assert_not_reached();
3424         }
3425         ts->state = state;
3426         la_reset_pref(ts);
3427     }
3428 }
3429 
3430 /* liveness analysis: sync globals back to memory.  */
3431 static void la_global_sync(TCGContext *s, int ng)
3432 {
3433     int i;
3434 
3435     for (i = 0; i < ng; ++i) {
3436         int state = s->temps[i].state;
3437         s->temps[i].state = state | TS_MEM;
3438         if (state == TS_DEAD) {
3439             /* If the global was previously dead, reset prefs.  */
3440             la_reset_pref(&s->temps[i]);
3441         }
3442     }
3443 }
3444 
3445 /*
3446  * liveness analysis: conditional branch: all temps are dead unless
3447  * explicitly live-across-conditional-branch, globals and local temps
3448  * should be synced.
3449  */
3450 static void la_bb_sync(TCGContext *s, int ng, int nt)
3451 {
3452     la_global_sync(s, ng);
3453 
3454     for (int i = ng; i < nt; ++i) {
3455         TCGTemp *ts = &s->temps[i];
3456         int state;
3457 
3458         switch (ts->kind) {
3459         case TEMP_TB:
3460             state = ts->state;
3461             ts->state = state | TS_MEM;
3462             if (state != TS_DEAD) {
3463                 continue;
3464             }
3465             break;
3466         case TEMP_EBB:
3467         case TEMP_CONST:
3468             continue;
3469         default:
3470             g_assert_not_reached();
3471         }
3472         la_reset_pref(&s->temps[i]);
3473     }
3474 }
3475 
3476 /* liveness analysis: sync globals back to memory and kill.  */
3477 static void la_global_kill(TCGContext *s, int ng)
3478 {
3479     int i;
3480 
3481     for (i = 0; i < ng; i++) {
3482         s->temps[i].state = TS_DEAD | TS_MEM;
3483         la_reset_pref(&s->temps[i]);
3484     }
3485 }
3486 
3487 /* liveness analysis: note live globals crossing calls.  */
3488 static void la_cross_call(TCGContext *s, int nt)
3489 {
3490     TCGRegSet mask = ~tcg_target_call_clobber_regs;
3491     int i;
3492 
3493     for (i = 0; i < nt; i++) {
3494         TCGTemp *ts = &s->temps[i];
3495         if (!(ts->state & TS_DEAD)) {
3496             TCGRegSet *pset = la_temp_pref(ts);
3497             TCGRegSet set = *pset;
3498 
3499             set &= mask;
3500             /* If the combination is not possible, restart.  */
3501             if (set == 0) {
3502                 set = tcg_target_available_regs[ts->type] & mask;
3503             }
3504             *pset = set;
3505         }
3506     }
3507 }
3508 
3509 /*
3510  * Liveness analysis: Verify the lifetime of TEMP_TB, and reduce
3511  * to TEMP_EBB, if possible.
3512  */
3513 static void __attribute__((noinline))
3514 liveness_pass_0(TCGContext *s)
3515 {
3516     void * const multiple_ebb = (void *)(uintptr_t)-1;
3517     int nb_temps = s->nb_temps;
3518     TCGOp *op, *ebb;
3519 
3520     for (int i = s->nb_globals; i < nb_temps; ++i) {
3521         s->temps[i].state_ptr = NULL;
3522     }
3523 
3524     /*
3525      * Represent each EBB by the op at which it begins.  In the case of
3526      * the first EBB, this is the first op, otherwise it is a label.
3527      * Collect the uses of each TEMP_TB: NULL for unused, EBB for use
3528      * within a single EBB, else MULTIPLE_EBB.
3529      */
3530     ebb = QTAILQ_FIRST(&s->ops);
3531     QTAILQ_FOREACH(op, &s->ops, link) {
3532         const TCGOpDef *def;
3533         int nb_oargs, nb_iargs;
3534 
3535         switch (op->opc) {
3536         case INDEX_op_set_label:
3537             ebb = op;
3538             continue;
3539         case INDEX_op_discard:
3540             continue;
3541         case INDEX_op_call:
3542             nb_oargs = TCGOP_CALLO(op);
3543             nb_iargs = TCGOP_CALLI(op);
3544             break;
3545         default:
3546             def = &tcg_op_defs[op->opc];
3547             nb_oargs = def->nb_oargs;
3548             nb_iargs = def->nb_iargs;
3549             break;
3550         }
3551 
3552         for (int i = 0; i < nb_oargs + nb_iargs; ++i) {
3553             TCGTemp *ts = arg_temp(op->args[i]);
3554 
3555             if (ts->kind != TEMP_TB) {
3556                 continue;
3557             }
3558             if (ts->state_ptr == NULL) {
3559                 ts->state_ptr = ebb;
3560             } else if (ts->state_ptr != ebb) {
3561                 ts->state_ptr = multiple_ebb;
3562             }
3563         }
3564     }
3565 
3566     /*
3567      * For TEMP_TB that turned out not to be used beyond one EBB,
3568      * reduce the liveness to TEMP_EBB.
3569      */
3570     for (int i = s->nb_globals; i < nb_temps; ++i) {
3571         TCGTemp *ts = &s->temps[i];
3572         if (ts->kind == TEMP_TB && ts->state_ptr != multiple_ebb) {
3573             ts->kind = TEMP_EBB;
3574         }
3575     }
3576 }
3577 
3578 /* Liveness analysis : update the opc_arg_life array to tell if a
3579    given input arguments is dead. Instructions updating dead
3580    temporaries are removed. */
3581 static void __attribute__((noinline))
3582 liveness_pass_1(TCGContext *s)
3583 {
3584     int nb_globals = s->nb_globals;
3585     int nb_temps = s->nb_temps;
3586     TCGOp *op, *op_prev;
3587     TCGRegSet *prefs;
3588     int i;
3589 
3590     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
3591     for (i = 0; i < nb_temps; ++i) {
3592         s->temps[i].state_ptr = prefs + i;
3593     }
3594 
3595     /* ??? Should be redundant with the exit_tb that ends the TB.  */
3596     la_func_end(s, nb_globals, nb_temps);
3597 
3598     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
3599         int nb_iargs, nb_oargs;
3600         TCGOpcode opc_new, opc_new2;
3601         bool have_opc_new2;
3602         TCGLifeData arg_life = 0;
3603         TCGTemp *ts;
3604         TCGOpcode opc = op->opc;
3605         const TCGOpDef *def = &tcg_op_defs[opc];
3606 
3607         switch (opc) {
3608         case INDEX_op_call:
3609             {
3610                 const TCGHelperInfo *info = tcg_call_info(op);
3611                 int call_flags = tcg_call_flags(op);
3612 
3613                 nb_oargs = TCGOP_CALLO(op);
3614                 nb_iargs = TCGOP_CALLI(op);
3615 
3616                 /* pure functions can be removed if their result is unused */
3617                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
3618                     for (i = 0; i < nb_oargs; i++) {
3619                         ts = arg_temp(op->args[i]);
3620                         if (ts->state != TS_DEAD) {
3621                             goto do_not_remove_call;
3622                         }
3623                     }
3624                     goto do_remove;
3625                 }
3626             do_not_remove_call:
3627 
3628                 /* Output args are dead.  */
3629                 for (i = 0; i < nb_oargs; i++) {
3630                     ts = arg_temp(op->args[i]);
3631                     if (ts->state & TS_DEAD) {
3632                         arg_life |= DEAD_ARG << i;
3633                     }
3634                     if (ts->state & TS_MEM) {
3635                         arg_life |= SYNC_ARG << i;
3636                     }
3637                     ts->state = TS_DEAD;
3638                     la_reset_pref(ts);
3639                 }
3640 
3641                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
3642                 memset(op->output_pref, 0, sizeof(op->output_pref));
3643 
3644                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
3645                                     TCG_CALL_NO_READ_GLOBALS))) {
3646                     la_global_kill(s, nb_globals);
3647                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
3648                     la_global_sync(s, nb_globals);
3649                 }
3650 
3651                 /* Record arguments that die in this helper.  */
3652                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3653                     ts = arg_temp(op->args[i]);
3654                     if (ts->state & TS_DEAD) {
3655                         arg_life |= DEAD_ARG << i;
3656                     }
3657                 }
3658 
3659                 /* For all live registers, remove call-clobbered prefs.  */
3660                 la_cross_call(s, nb_temps);
3661 
3662                 /*
3663                  * Input arguments are live for preceding opcodes.
3664                  *
3665                  * For those arguments that die, and will be allocated in
3666                  * registers, clear the register set for that arg, to be
3667                  * filled in below.  For args that will be on the stack,
3668                  * reset to any available reg.  Process arguments in reverse
3669                  * order so that if a temp is used more than once, the stack
3670                  * reset to max happens before the register reset to 0.
3671                  */
3672                 for (i = nb_iargs - 1; i >= 0; i--) {
3673                     const TCGCallArgumentLoc *loc = &info->in[i];
3674                     ts = arg_temp(op->args[nb_oargs + i]);
3675 
3676                     if (ts->state & TS_DEAD) {
3677                         switch (loc->kind) {
3678                         case TCG_CALL_ARG_NORMAL:
3679                         case TCG_CALL_ARG_EXTEND_U:
3680                         case TCG_CALL_ARG_EXTEND_S:
3681                             if (arg_slot_reg_p(loc->arg_slot)) {
3682                                 *la_temp_pref(ts) = 0;
3683                                 break;
3684                             }
3685                             /* fall through */
3686                         default:
3687                             *la_temp_pref(ts) =
3688                                 tcg_target_available_regs[ts->type];
3689                             break;
3690                         }
3691                         ts->state &= ~TS_DEAD;
3692                     }
3693                 }
3694 
3695                 /*
3696                  * For each input argument, add its input register to prefs.
3697                  * If a temp is used once, this produces a single set bit;
3698                  * if a temp is used multiple times, this produces a set.
3699                  */
3700                 for (i = 0; i < nb_iargs; i++) {
3701                     const TCGCallArgumentLoc *loc = &info->in[i];
3702                     ts = arg_temp(op->args[nb_oargs + i]);
3703 
3704                     switch (loc->kind) {
3705                     case TCG_CALL_ARG_NORMAL:
3706                     case TCG_CALL_ARG_EXTEND_U:
3707                     case TCG_CALL_ARG_EXTEND_S:
3708                         if (arg_slot_reg_p(loc->arg_slot)) {
3709                             tcg_regset_set_reg(*la_temp_pref(ts),
3710                                 tcg_target_call_iarg_regs[loc->arg_slot]);
3711                         }
3712                         break;
3713                     default:
3714                         break;
3715                     }
3716                 }
3717             }
3718             break;
3719         case INDEX_op_insn_start:
3720             break;
3721         case INDEX_op_discard:
3722             /* mark the temporary as dead */
3723             ts = arg_temp(op->args[0]);
3724             ts->state = TS_DEAD;
3725             la_reset_pref(ts);
3726             break;
3727 
3728         case INDEX_op_add2_i32:
3729             opc_new = INDEX_op_add_i32;
3730             goto do_addsub2;
3731         case INDEX_op_sub2_i32:
3732             opc_new = INDEX_op_sub_i32;
3733             goto do_addsub2;
3734         case INDEX_op_add2_i64:
3735             opc_new = INDEX_op_add_i64;
3736             goto do_addsub2;
3737         case INDEX_op_sub2_i64:
3738             opc_new = INDEX_op_sub_i64;
3739         do_addsub2:
3740             nb_iargs = 4;
3741             nb_oargs = 2;
3742             /* Test if the high part of the operation is dead, but not
3743                the low part.  The result can be optimized to a simple
3744                add or sub.  This happens often for x86_64 guest when the
3745                cpu mode is set to 32 bit.  */
3746             if (arg_temp(op->args[1])->state == TS_DEAD) {
3747                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3748                     goto do_remove;
3749                 }
3750                 /* Replace the opcode and adjust the args in place,
3751                    leaving 3 unused args at the end.  */
3752                 op->opc = opc = opc_new;
3753                 op->args[1] = op->args[2];
3754                 op->args[2] = op->args[4];
3755                 /* Fall through and mark the single-word operation live.  */
3756                 nb_iargs = 2;
3757                 nb_oargs = 1;
3758             }
3759             goto do_not_remove;
3760 
3761         case INDEX_op_mulu2_i32:
3762             opc_new = INDEX_op_mul_i32;
3763             opc_new2 = INDEX_op_muluh_i32;
3764             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3765             goto do_mul2;
3766         case INDEX_op_muls2_i32:
3767             opc_new = INDEX_op_mul_i32;
3768             opc_new2 = INDEX_op_mulsh_i32;
3769             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3770             goto do_mul2;
3771         case INDEX_op_mulu2_i64:
3772             opc_new = INDEX_op_mul_i64;
3773             opc_new2 = INDEX_op_muluh_i64;
3774             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3775             goto do_mul2;
3776         case INDEX_op_muls2_i64:
3777             opc_new = INDEX_op_mul_i64;
3778             opc_new2 = INDEX_op_mulsh_i64;
3779             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3780             goto do_mul2;
3781         do_mul2:
3782             nb_iargs = 2;
3783             nb_oargs = 2;
3784             if (arg_temp(op->args[1])->state == TS_DEAD) {
3785                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3786                     /* Both parts of the operation are dead.  */
3787                     goto do_remove;
3788                 }
3789                 /* The high part of the operation is dead; generate the low. */
3790                 op->opc = opc = opc_new;
3791                 op->args[1] = op->args[2];
3792                 op->args[2] = op->args[3];
3793             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3794                 /* The low part of the operation is dead; generate the high. */
3795                 op->opc = opc = opc_new2;
3796                 op->args[0] = op->args[1];
3797                 op->args[1] = op->args[2];
3798                 op->args[2] = op->args[3];
3799             } else {
3800                 goto do_not_remove;
3801             }
3802             /* Mark the single-word operation live.  */
3803             nb_oargs = 1;
3804             goto do_not_remove;
3805 
3806         default:
3807             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3808             nb_iargs = def->nb_iargs;
3809             nb_oargs = def->nb_oargs;
3810 
3811             /* Test if the operation can be removed because all
3812                its outputs are dead. We assume that nb_oargs == 0
3813                implies side effects */
3814             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3815                 for (i = 0; i < nb_oargs; i++) {
3816                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3817                         goto do_not_remove;
3818                     }
3819                 }
3820                 goto do_remove;
3821             }
3822             goto do_not_remove;
3823 
3824         do_remove:
3825             tcg_op_remove(s, op);
3826             break;
3827 
3828         do_not_remove:
3829             for (i = 0; i < nb_oargs; i++) {
3830                 ts = arg_temp(op->args[i]);
3831 
3832                 /* Remember the preference of the uses that followed.  */
3833                 if (i < ARRAY_SIZE(op->output_pref)) {
3834                     op->output_pref[i] = *la_temp_pref(ts);
3835                 }
3836 
3837                 /* Output args are dead.  */
3838                 if (ts->state & TS_DEAD) {
3839                     arg_life |= DEAD_ARG << i;
3840                 }
3841                 if (ts->state & TS_MEM) {
3842                     arg_life |= SYNC_ARG << i;
3843                 }
3844                 ts->state = TS_DEAD;
3845                 la_reset_pref(ts);
3846             }
3847 
3848             /* If end of basic block, update.  */
3849             if (def->flags & TCG_OPF_BB_EXIT) {
3850                 la_func_end(s, nb_globals, nb_temps);
3851             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3852                 la_bb_sync(s, nb_globals, nb_temps);
3853             } else if (def->flags & TCG_OPF_BB_END) {
3854                 la_bb_end(s, nb_globals, nb_temps);
3855             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3856                 la_global_sync(s, nb_globals);
3857                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3858                     la_cross_call(s, nb_temps);
3859                 }
3860             }
3861 
3862             /* Record arguments that die in this opcode.  */
3863             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3864                 ts = arg_temp(op->args[i]);
3865                 if (ts->state & TS_DEAD) {
3866                     arg_life |= DEAD_ARG << i;
3867                 }
3868             }
3869 
3870             /* Input arguments are live for preceding opcodes.  */
3871             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3872                 ts = arg_temp(op->args[i]);
3873                 if (ts->state & TS_DEAD) {
3874                     /* For operands that were dead, initially allow
3875                        all regs for the type.  */
3876                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3877                     ts->state &= ~TS_DEAD;
3878                 }
3879             }
3880 
3881             /* Incorporate constraints for this operand.  */
3882             switch (opc) {
3883             case INDEX_op_mov_i32:
3884             case INDEX_op_mov_i64:
3885                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3886                    have proper constraints.  That said, special case
3887                    moves to propagate preferences backward.  */
3888                 if (IS_DEAD_ARG(1)) {
3889                     *la_temp_pref(arg_temp(op->args[0]))
3890                         = *la_temp_pref(arg_temp(op->args[1]));
3891                 }
3892                 break;
3893 
3894             default:
3895                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3896                     const TCGArgConstraint *ct = &def->args_ct[i];
3897                     TCGRegSet set, *pset;
3898 
3899                     ts = arg_temp(op->args[i]);
3900                     pset = la_temp_pref(ts);
3901                     set = *pset;
3902 
3903                     set &= ct->regs;
3904                     if (ct->ialias) {
3905                         set &= output_pref(op, ct->alias_index);
3906                     }
3907                     /* If the combination is not possible, restart.  */
3908                     if (set == 0) {
3909                         set = ct->regs;
3910                     }
3911                     *pset = set;
3912                 }
3913                 break;
3914             }
3915             break;
3916         }
3917         op->life = arg_life;
3918     }
3919 }
3920 
3921 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3922 static bool __attribute__((noinline))
3923 liveness_pass_2(TCGContext *s)
3924 {
3925     int nb_globals = s->nb_globals;
3926     int nb_temps, i;
3927     bool changes = false;
3928     TCGOp *op, *op_next;
3929 
3930     /* Create a temporary for each indirect global.  */
3931     for (i = 0; i < nb_globals; ++i) {
3932         TCGTemp *its = &s->temps[i];
3933         if (its->indirect_reg) {
3934             TCGTemp *dts = tcg_temp_alloc(s);
3935             dts->type = its->type;
3936             dts->base_type = its->base_type;
3937             dts->temp_subindex = its->temp_subindex;
3938             dts->kind = TEMP_EBB;
3939             its->state_ptr = dts;
3940         } else {
3941             its->state_ptr = NULL;
3942         }
3943         /* All globals begin dead.  */
3944         its->state = TS_DEAD;
3945     }
3946     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3947         TCGTemp *its = &s->temps[i];
3948         its->state_ptr = NULL;
3949         its->state = TS_DEAD;
3950     }
3951 
3952     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3953         TCGOpcode opc = op->opc;
3954         const TCGOpDef *def = &tcg_op_defs[opc];
3955         TCGLifeData arg_life = op->life;
3956         int nb_iargs, nb_oargs, call_flags;
3957         TCGTemp *arg_ts, *dir_ts;
3958 
3959         if (opc == INDEX_op_call) {
3960             nb_oargs = TCGOP_CALLO(op);
3961             nb_iargs = TCGOP_CALLI(op);
3962             call_flags = tcg_call_flags(op);
3963         } else {
3964             nb_iargs = def->nb_iargs;
3965             nb_oargs = def->nb_oargs;
3966 
3967             /* Set flags similar to how calls require.  */
3968             if (def->flags & TCG_OPF_COND_BRANCH) {
3969                 /* Like reading globals: sync_globals */
3970                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3971             } else if (def->flags & TCG_OPF_BB_END) {
3972                 /* Like writing globals: save_globals */
3973                 call_flags = 0;
3974             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3975                 /* Like reading globals: sync_globals */
3976                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3977             } else {
3978                 /* No effect on globals.  */
3979                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3980                               TCG_CALL_NO_WRITE_GLOBALS);
3981             }
3982         }
3983 
3984         /* Make sure that input arguments are available.  */
3985         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3986             arg_ts = arg_temp(op->args[i]);
3987             dir_ts = arg_ts->state_ptr;
3988             if (dir_ts && arg_ts->state == TS_DEAD) {
3989                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3990                                   ? INDEX_op_ld_i32
3991                                   : INDEX_op_ld_i64);
3992                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3993 
3994                 lop->args[0] = temp_arg(dir_ts);
3995                 lop->args[1] = temp_arg(arg_ts->mem_base);
3996                 lop->args[2] = arg_ts->mem_offset;
3997 
3998                 /* Loaded, but synced with memory.  */
3999                 arg_ts->state = TS_MEM;
4000             }
4001         }
4002 
4003         /* Perform input replacement, and mark inputs that became dead.
4004            No action is required except keeping temp_state up to date
4005            so that we reload when needed.  */
4006         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4007             arg_ts = arg_temp(op->args[i]);
4008             dir_ts = arg_ts->state_ptr;
4009             if (dir_ts) {
4010                 op->args[i] = temp_arg(dir_ts);
4011                 changes = true;
4012                 if (IS_DEAD_ARG(i)) {
4013                     arg_ts->state = TS_DEAD;
4014                 }
4015             }
4016         }
4017 
4018         /* Liveness analysis should ensure that the following are
4019            all correct, for call sites and basic block end points.  */
4020         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
4021             /* Nothing to do */
4022         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
4023             for (i = 0; i < nb_globals; ++i) {
4024                 /* Liveness should see that globals are synced back,
4025                    that is, either TS_DEAD or TS_MEM.  */
4026                 arg_ts = &s->temps[i];
4027                 tcg_debug_assert(arg_ts->state_ptr == 0
4028                                  || arg_ts->state != 0);
4029             }
4030         } else {
4031             for (i = 0; i < nb_globals; ++i) {
4032                 /* Liveness should see that globals are saved back,
4033                    that is, TS_DEAD, waiting to be reloaded.  */
4034                 arg_ts = &s->temps[i];
4035                 tcg_debug_assert(arg_ts->state_ptr == 0
4036                                  || arg_ts->state == TS_DEAD);
4037             }
4038         }
4039 
4040         /* Outputs become available.  */
4041         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
4042             arg_ts = arg_temp(op->args[0]);
4043             dir_ts = arg_ts->state_ptr;
4044             if (dir_ts) {
4045                 op->args[0] = temp_arg(dir_ts);
4046                 changes = true;
4047 
4048                 /* The output is now live and modified.  */
4049                 arg_ts->state = 0;
4050 
4051                 if (NEED_SYNC_ARG(0)) {
4052                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4053                                       ? INDEX_op_st_i32
4054                                       : INDEX_op_st_i64);
4055                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4056                     TCGTemp *out_ts = dir_ts;
4057 
4058                     if (IS_DEAD_ARG(0)) {
4059                         out_ts = arg_temp(op->args[1]);
4060                         arg_ts->state = TS_DEAD;
4061                         tcg_op_remove(s, op);
4062                     } else {
4063                         arg_ts->state = TS_MEM;
4064                     }
4065 
4066                     sop->args[0] = temp_arg(out_ts);
4067                     sop->args[1] = temp_arg(arg_ts->mem_base);
4068                     sop->args[2] = arg_ts->mem_offset;
4069                 } else {
4070                     tcg_debug_assert(!IS_DEAD_ARG(0));
4071                 }
4072             }
4073         } else {
4074             for (i = 0; i < nb_oargs; i++) {
4075                 arg_ts = arg_temp(op->args[i]);
4076                 dir_ts = arg_ts->state_ptr;
4077                 if (!dir_ts) {
4078                     continue;
4079                 }
4080                 op->args[i] = temp_arg(dir_ts);
4081                 changes = true;
4082 
4083                 /* The output is now live and modified.  */
4084                 arg_ts->state = 0;
4085 
4086                 /* Sync outputs upon their last write.  */
4087                 if (NEED_SYNC_ARG(i)) {
4088                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
4089                                       ? INDEX_op_st_i32
4090                                       : INDEX_op_st_i64);
4091                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
4092 
4093                     sop->args[0] = temp_arg(dir_ts);
4094                     sop->args[1] = temp_arg(arg_ts->mem_base);
4095                     sop->args[2] = arg_ts->mem_offset;
4096 
4097                     arg_ts->state = TS_MEM;
4098                 }
4099                 /* Drop outputs that are dead.  */
4100                 if (IS_DEAD_ARG(i)) {
4101                     arg_ts->state = TS_DEAD;
4102                 }
4103             }
4104         }
4105     }
4106 
4107     return changes;
4108 }
4109 
4110 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
4111 {
4112     intptr_t off;
4113     int size, align;
4114 
4115     /* When allocating an object, look at the full type. */
4116     size = tcg_type_size(ts->base_type);
4117     switch (ts->base_type) {
4118     case TCG_TYPE_I32:
4119         align = 4;
4120         break;
4121     case TCG_TYPE_I64:
4122     case TCG_TYPE_V64:
4123         align = 8;
4124         break;
4125     case TCG_TYPE_I128:
4126     case TCG_TYPE_V128:
4127     case TCG_TYPE_V256:
4128         /*
4129          * Note that we do not require aligned storage for V256,
4130          * and that we provide alignment for I128 to match V128,
4131          * even if that's above what the host ABI requires.
4132          */
4133         align = 16;
4134         break;
4135     default:
4136         g_assert_not_reached();
4137     }
4138 
4139     /*
4140      * Assume the stack is sufficiently aligned.
4141      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
4142      * and do not require 16 byte vector alignment.  This seems slightly
4143      * easier than fully parameterizing the above switch statement.
4144      */
4145     align = MIN(TCG_TARGET_STACK_ALIGN, align);
4146     off = ROUND_UP(s->current_frame_offset, align);
4147 
4148     /* If we've exhausted the stack frame, restart with a smaller TB. */
4149     if (off + size > s->frame_end) {
4150         tcg_raise_tb_overflow(s);
4151     }
4152     s->current_frame_offset = off + size;
4153 #if defined(__sparc__)
4154     off += TCG_TARGET_STACK_BIAS;
4155 #endif
4156 
4157     /* If the object was subdivided, assign memory to all the parts. */
4158     if (ts->base_type != ts->type) {
4159         int part_size = tcg_type_size(ts->type);
4160         int part_count = size / part_size;
4161 
4162         /*
4163          * Each part is allocated sequentially in tcg_temp_new_internal.
4164          * Jump back to the first part by subtracting the current index.
4165          */
4166         ts -= ts->temp_subindex;
4167         for (int i = 0; i < part_count; ++i) {
4168             ts[i].mem_offset = off + i * part_size;
4169             ts[i].mem_base = s->frame_temp;
4170             ts[i].mem_allocated = 1;
4171         }
4172     } else {
4173         ts->mem_offset = off;
4174         ts->mem_base = s->frame_temp;
4175         ts->mem_allocated = 1;
4176     }
4177 }
4178 
4179 /* Assign @reg to @ts, and update reg_to_temp[]. */
4180 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
4181 {
4182     if (ts->val_type == TEMP_VAL_REG) {
4183         TCGReg old = ts->reg;
4184         tcg_debug_assert(s->reg_to_temp[old] == ts);
4185         if (old == reg) {
4186             return;
4187         }
4188         s->reg_to_temp[old] = NULL;
4189     }
4190     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4191     s->reg_to_temp[reg] = ts;
4192     ts->val_type = TEMP_VAL_REG;
4193     ts->reg = reg;
4194 }
4195 
4196 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
4197 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
4198 {
4199     tcg_debug_assert(type != TEMP_VAL_REG);
4200     if (ts->val_type == TEMP_VAL_REG) {
4201         TCGReg reg = ts->reg;
4202         tcg_debug_assert(s->reg_to_temp[reg] == ts);
4203         s->reg_to_temp[reg] = NULL;
4204     }
4205     ts->val_type = type;
4206 }
4207 
4208 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
4209 
4210 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
4211    mark it free; otherwise mark it dead.  */
4212 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
4213 {
4214     TCGTempVal new_type;
4215 
4216     switch (ts->kind) {
4217     case TEMP_FIXED:
4218         return;
4219     case TEMP_GLOBAL:
4220     case TEMP_TB:
4221         new_type = TEMP_VAL_MEM;
4222         break;
4223     case TEMP_EBB:
4224         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
4225         break;
4226     case TEMP_CONST:
4227         new_type = TEMP_VAL_CONST;
4228         break;
4229     default:
4230         g_assert_not_reached();
4231     }
4232     set_temp_val_nonreg(s, ts, new_type);
4233 }
4234 
4235 /* Mark a temporary as dead.  */
4236 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
4237 {
4238     temp_free_or_dead(s, ts, 1);
4239 }
4240 
4241 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
4242    registers needs to be allocated to store a constant.  If 'free_or_dead'
4243    is non-zero, subsequently release the temporary; if it is positive, the
4244    temp is dead; if it is negative, the temp is free.  */
4245 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
4246                       TCGRegSet preferred_regs, int free_or_dead)
4247 {
4248     if (!temp_readonly(ts) && !ts->mem_coherent) {
4249         if (!ts->mem_allocated) {
4250             temp_allocate_frame(s, ts);
4251         }
4252         switch (ts->val_type) {
4253         case TEMP_VAL_CONST:
4254             /* If we're going to free the temp immediately, then we won't
4255                require it later in a register, so attempt to store the
4256                constant to memory directly.  */
4257             if (free_or_dead
4258                 && tcg_out_sti(s, ts->type, ts->val,
4259                                ts->mem_base->reg, ts->mem_offset)) {
4260                 break;
4261             }
4262             temp_load(s, ts, tcg_target_available_regs[ts->type],
4263                       allocated_regs, preferred_regs);
4264             /* fallthrough */
4265 
4266         case TEMP_VAL_REG:
4267             tcg_out_st(s, ts->type, ts->reg,
4268                        ts->mem_base->reg, ts->mem_offset);
4269             break;
4270 
4271         case TEMP_VAL_MEM:
4272             break;
4273 
4274         case TEMP_VAL_DEAD:
4275         default:
4276             g_assert_not_reached();
4277         }
4278         ts->mem_coherent = 1;
4279     }
4280     if (free_or_dead) {
4281         temp_free_or_dead(s, ts, free_or_dead);
4282     }
4283 }
4284 
4285 /* free register 'reg' by spilling the corresponding temporary if necessary */
4286 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
4287 {
4288     TCGTemp *ts = s->reg_to_temp[reg];
4289     if (ts != NULL) {
4290         temp_sync(s, ts, allocated_regs, 0, -1);
4291     }
4292 }
4293 
4294 /**
4295  * tcg_reg_alloc:
4296  * @required_regs: Set of registers in which we must allocate.
4297  * @allocated_regs: Set of registers which must be avoided.
4298  * @preferred_regs: Set of registers we should prefer.
4299  * @rev: True if we search the registers in "indirect" order.
4300  *
4301  * The allocated register must be in @required_regs & ~@allocated_regs,
4302  * but if we can put it in @preferred_regs we may save a move later.
4303  */
4304 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
4305                             TCGRegSet allocated_regs,
4306                             TCGRegSet preferred_regs, bool rev)
4307 {
4308     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4309     TCGRegSet reg_ct[2];
4310     const int *order;
4311 
4312     reg_ct[1] = required_regs & ~allocated_regs;
4313     tcg_debug_assert(reg_ct[1] != 0);
4314     reg_ct[0] = reg_ct[1] & preferred_regs;
4315 
4316     /* Skip the preferred_regs option if it cannot be satisfied,
4317        or if the preference made no difference.  */
4318     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4319 
4320     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4321 
4322     /* Try free registers, preferences first.  */
4323     for (j = f; j < 2; j++) {
4324         TCGRegSet set = reg_ct[j];
4325 
4326         if (tcg_regset_single(set)) {
4327             /* One register in the set.  */
4328             TCGReg reg = tcg_regset_first(set);
4329             if (s->reg_to_temp[reg] == NULL) {
4330                 return reg;
4331             }
4332         } else {
4333             for (i = 0; i < n; i++) {
4334                 TCGReg reg = order[i];
4335                 if (s->reg_to_temp[reg] == NULL &&
4336                     tcg_regset_test_reg(set, reg)) {
4337                     return reg;
4338                 }
4339             }
4340         }
4341     }
4342 
4343     /* We must spill something.  */
4344     for (j = f; j < 2; j++) {
4345         TCGRegSet set = reg_ct[j];
4346 
4347         if (tcg_regset_single(set)) {
4348             /* One register in the set.  */
4349             TCGReg reg = tcg_regset_first(set);
4350             tcg_reg_free(s, reg, allocated_regs);
4351             return reg;
4352         } else {
4353             for (i = 0; i < n; i++) {
4354                 TCGReg reg = order[i];
4355                 if (tcg_regset_test_reg(set, reg)) {
4356                     tcg_reg_free(s, reg, allocated_regs);
4357                     return reg;
4358                 }
4359             }
4360         }
4361     }
4362 
4363     g_assert_not_reached();
4364 }
4365 
4366 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
4367                                  TCGRegSet allocated_regs,
4368                                  TCGRegSet preferred_regs, bool rev)
4369 {
4370     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
4371     TCGRegSet reg_ct[2];
4372     const int *order;
4373 
4374     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
4375     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
4376     tcg_debug_assert(reg_ct[1] != 0);
4377     reg_ct[0] = reg_ct[1] & preferred_regs;
4378 
4379     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
4380 
4381     /*
4382      * Skip the preferred_regs option if it cannot be satisfied,
4383      * or if the preference made no difference.
4384      */
4385     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
4386 
4387     /*
4388      * Minimize the number of flushes by looking for 2 free registers first,
4389      * then a single flush, then two flushes.
4390      */
4391     for (fmin = 2; fmin >= 0; fmin--) {
4392         for (j = k; j < 2; j++) {
4393             TCGRegSet set = reg_ct[j];
4394 
4395             for (i = 0; i < n; i++) {
4396                 TCGReg reg = order[i];
4397 
4398                 if (tcg_regset_test_reg(set, reg)) {
4399                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
4400                     if (f >= fmin) {
4401                         tcg_reg_free(s, reg, allocated_regs);
4402                         tcg_reg_free(s, reg + 1, allocated_regs);
4403                         return reg;
4404                     }
4405                 }
4406             }
4407         }
4408     }
4409     g_assert_not_reached();
4410 }
4411 
4412 /* Make sure the temporary is in a register.  If needed, allocate the register
4413    from DESIRED while avoiding ALLOCATED.  */
4414 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
4415                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
4416 {
4417     TCGReg reg;
4418 
4419     switch (ts->val_type) {
4420     case TEMP_VAL_REG:
4421         return;
4422     case TEMP_VAL_CONST:
4423         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4424                             preferred_regs, ts->indirect_base);
4425         if (ts->type <= TCG_TYPE_I64) {
4426             tcg_out_movi(s, ts->type, reg, ts->val);
4427         } else {
4428             uint64_t val = ts->val;
4429             MemOp vece = MO_64;
4430 
4431             /*
4432              * Find the minimal vector element that matches the constant.
4433              * The targets will, in general, have to do this search anyway,
4434              * do this generically.
4435              */
4436             if (val == dup_const(MO_8, val)) {
4437                 vece = MO_8;
4438             } else if (val == dup_const(MO_16, val)) {
4439                 vece = MO_16;
4440             } else if (val == dup_const(MO_32, val)) {
4441                 vece = MO_32;
4442             }
4443 
4444             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
4445         }
4446         ts->mem_coherent = 0;
4447         break;
4448     case TEMP_VAL_MEM:
4449         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
4450                             preferred_regs, ts->indirect_base);
4451         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
4452         ts->mem_coherent = 1;
4453         break;
4454     case TEMP_VAL_DEAD:
4455     default:
4456         g_assert_not_reached();
4457     }
4458     set_temp_val_reg(s, ts, reg);
4459 }
4460 
4461 /* Save a temporary to memory. 'allocated_regs' is used in case a
4462    temporary registers needs to be allocated to store a constant.  */
4463 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
4464 {
4465     /* The liveness analysis already ensures that globals are back
4466        in memory. Keep an tcg_debug_assert for safety. */
4467     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
4468 }
4469 
4470 /* save globals to their canonical location and assume they can be
4471    modified be the following code. 'allocated_regs' is used in case a
4472    temporary registers needs to be allocated to store a constant. */
4473 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
4474 {
4475     int i, n;
4476 
4477     for (i = 0, n = s->nb_globals; i < n; i++) {
4478         temp_save(s, &s->temps[i], allocated_regs);
4479     }
4480 }
4481 
4482 /* sync globals to their canonical location and assume they can be
4483    read by the following code. 'allocated_regs' is used in case a
4484    temporary registers needs to be allocated to store a constant. */
4485 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
4486 {
4487     int i, n;
4488 
4489     for (i = 0, n = s->nb_globals; i < n; i++) {
4490         TCGTemp *ts = &s->temps[i];
4491         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
4492                          || ts->kind == TEMP_FIXED
4493                          || ts->mem_coherent);
4494     }
4495 }
4496 
4497 /* at the end of a basic block, we assume all temporaries are dead and
4498    all globals are stored at their canonical location. */
4499 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
4500 {
4501     int i;
4502 
4503     for (i = s->nb_globals; i < s->nb_temps; i++) {
4504         TCGTemp *ts = &s->temps[i];
4505 
4506         switch (ts->kind) {
4507         case TEMP_TB:
4508             temp_save(s, ts, allocated_regs);
4509             break;
4510         case TEMP_EBB:
4511             /* The liveness analysis already ensures that temps are dead.
4512                Keep an tcg_debug_assert for safety. */
4513             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
4514             break;
4515         case TEMP_CONST:
4516             /* Similarly, we should have freed any allocated register. */
4517             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
4518             break;
4519         default:
4520             g_assert_not_reached();
4521         }
4522     }
4523 
4524     save_globals(s, allocated_regs);
4525 }
4526 
4527 /*
4528  * At a conditional branch, we assume all temporaries are dead unless
4529  * explicitly live-across-conditional-branch; all globals and local
4530  * temps are synced to their location.
4531  */
4532 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
4533 {
4534     sync_globals(s, allocated_regs);
4535 
4536     for (int i = s->nb_globals; i < s->nb_temps; i++) {
4537         TCGTemp *ts = &s->temps[i];
4538         /*
4539          * The liveness analysis already ensures that temps are dead.
4540          * Keep tcg_debug_asserts for safety.
4541          */
4542         switch (ts->kind) {
4543         case TEMP_TB:
4544             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
4545             break;
4546         case TEMP_EBB:
4547         case TEMP_CONST:
4548             break;
4549         default:
4550             g_assert_not_reached();
4551         }
4552     }
4553 }
4554 
4555 /*
4556  * Specialized code generation for INDEX_op_mov_* with a constant.
4557  */
4558 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
4559                                   tcg_target_ulong val, TCGLifeData arg_life,
4560                                   TCGRegSet preferred_regs)
4561 {
4562     /* ENV should not be modified.  */
4563     tcg_debug_assert(!temp_readonly(ots));
4564 
4565     /* The movi is not explicitly generated here.  */
4566     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
4567     ots->val = val;
4568     ots->mem_coherent = 0;
4569     if (NEED_SYNC_ARG(0)) {
4570         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
4571     } else if (IS_DEAD_ARG(0)) {
4572         temp_dead(s, ots);
4573     }
4574 }
4575 
4576 /*
4577  * Specialized code generation for INDEX_op_mov_*.
4578  */
4579 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
4580 {
4581     const TCGLifeData arg_life = op->life;
4582     TCGRegSet allocated_regs, preferred_regs;
4583     TCGTemp *ts, *ots;
4584     TCGType otype, itype;
4585     TCGReg oreg, ireg;
4586 
4587     allocated_regs = s->reserved_regs;
4588     preferred_regs = output_pref(op, 0);
4589     ots = arg_temp(op->args[0]);
4590     ts = arg_temp(op->args[1]);
4591 
4592     /* ENV should not be modified.  */
4593     tcg_debug_assert(!temp_readonly(ots));
4594 
4595     /* Note that otype != itype for no-op truncation.  */
4596     otype = ots->type;
4597     itype = ts->type;
4598 
4599     if (ts->val_type == TEMP_VAL_CONST) {
4600         /* propagate constant or generate sti */
4601         tcg_target_ulong val = ts->val;
4602         if (IS_DEAD_ARG(1)) {
4603             temp_dead(s, ts);
4604         }
4605         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
4606         return;
4607     }
4608 
4609     /* If the source value is in memory we're going to be forced
4610        to have it in a register in order to perform the copy.  Copy
4611        the SOURCE value into its own register first, that way we
4612        don't have to reload SOURCE the next time it is used. */
4613     if (ts->val_type == TEMP_VAL_MEM) {
4614         temp_load(s, ts, tcg_target_available_regs[itype],
4615                   allocated_regs, preferred_regs);
4616     }
4617     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
4618     ireg = ts->reg;
4619 
4620     if (IS_DEAD_ARG(0)) {
4621         /* mov to a non-saved dead register makes no sense (even with
4622            liveness analysis disabled). */
4623         tcg_debug_assert(NEED_SYNC_ARG(0));
4624         if (!ots->mem_allocated) {
4625             temp_allocate_frame(s, ots);
4626         }
4627         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
4628         if (IS_DEAD_ARG(1)) {
4629             temp_dead(s, ts);
4630         }
4631         temp_dead(s, ots);
4632         return;
4633     }
4634 
4635     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
4636         /*
4637          * The mov can be suppressed.  Kill input first, so that it
4638          * is unlinked from reg_to_temp, then set the output to the
4639          * reg that we saved from the input.
4640          */
4641         temp_dead(s, ts);
4642         oreg = ireg;
4643     } else {
4644         if (ots->val_type == TEMP_VAL_REG) {
4645             oreg = ots->reg;
4646         } else {
4647             /* Make sure to not spill the input register during allocation. */
4648             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
4649                                  allocated_regs | ((TCGRegSet)1 << ireg),
4650                                  preferred_regs, ots->indirect_base);
4651         }
4652         if (!tcg_out_mov(s, otype, oreg, ireg)) {
4653             /*
4654              * Cross register class move not supported.
4655              * Store the source register into the destination slot
4656              * and leave the destination temp as TEMP_VAL_MEM.
4657              */
4658             assert(!temp_readonly(ots));
4659             if (!ts->mem_allocated) {
4660                 temp_allocate_frame(s, ots);
4661             }
4662             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
4663             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
4664             ots->mem_coherent = 1;
4665             return;
4666         }
4667     }
4668     set_temp_val_reg(s, ots, oreg);
4669     ots->mem_coherent = 0;
4670 
4671     if (NEED_SYNC_ARG(0)) {
4672         temp_sync(s, ots, allocated_regs, 0, 0);
4673     }
4674 }
4675 
4676 /*
4677  * Specialized code generation for INDEX_op_dup_vec.
4678  */
4679 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
4680 {
4681     const TCGLifeData arg_life = op->life;
4682     TCGRegSet dup_out_regs, dup_in_regs;
4683     TCGTemp *its, *ots;
4684     TCGType itype, vtype;
4685     unsigned vece;
4686     int lowpart_ofs;
4687     bool ok;
4688 
4689     ots = arg_temp(op->args[0]);
4690     its = arg_temp(op->args[1]);
4691 
4692     /* ENV should not be modified.  */
4693     tcg_debug_assert(!temp_readonly(ots));
4694 
4695     itype = its->type;
4696     vece = TCGOP_VECE(op);
4697     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4698 
4699     if (its->val_type == TEMP_VAL_CONST) {
4700         /* Propagate constant via movi -> dupi.  */
4701         tcg_target_ulong val = its->val;
4702         if (IS_DEAD_ARG(1)) {
4703             temp_dead(s, its);
4704         }
4705         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
4706         return;
4707     }
4708 
4709     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4710     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
4711 
4712     /* Allocate the output register now.  */
4713     if (ots->val_type != TEMP_VAL_REG) {
4714         TCGRegSet allocated_regs = s->reserved_regs;
4715         TCGReg oreg;
4716 
4717         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4718             /* Make sure to not spill the input register. */
4719             tcg_regset_set_reg(allocated_regs, its->reg);
4720         }
4721         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4722                              output_pref(op, 0), ots->indirect_base);
4723         set_temp_val_reg(s, ots, oreg);
4724     }
4725 
4726     switch (its->val_type) {
4727     case TEMP_VAL_REG:
4728         /*
4729          * The dup constriaints must be broad, covering all possible VECE.
4730          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4731          * to fail, indicating that extra moves are required for that case.
4732          */
4733         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4734             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4735                 goto done;
4736             }
4737             /* Try again from memory or a vector input register.  */
4738         }
4739         if (!its->mem_coherent) {
4740             /*
4741              * The input register is not synced, and so an extra store
4742              * would be required to use memory.  Attempt an integer-vector
4743              * register move first.  We do not have a TCGRegSet for this.
4744              */
4745             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4746                 break;
4747             }
4748             /* Sync the temp back to its slot and load from there.  */
4749             temp_sync(s, its, s->reserved_regs, 0, 0);
4750         }
4751         /* fall through */
4752 
4753     case TEMP_VAL_MEM:
4754         lowpart_ofs = 0;
4755         if (HOST_BIG_ENDIAN) {
4756             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4757         }
4758         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4759                              its->mem_offset + lowpart_ofs)) {
4760             goto done;
4761         }
4762         /* Load the input into the destination vector register. */
4763         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4764         break;
4765 
4766     default:
4767         g_assert_not_reached();
4768     }
4769 
4770     /* We now have a vector input register, so dup must succeed. */
4771     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4772     tcg_debug_assert(ok);
4773 
4774  done:
4775     ots->mem_coherent = 0;
4776     if (IS_DEAD_ARG(1)) {
4777         temp_dead(s, its);
4778     }
4779     if (NEED_SYNC_ARG(0)) {
4780         temp_sync(s, ots, s->reserved_regs, 0, 0);
4781     }
4782     if (IS_DEAD_ARG(0)) {
4783         temp_dead(s, ots);
4784     }
4785 }
4786 
4787 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4788 {
4789     const TCGLifeData arg_life = op->life;
4790     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4791     TCGRegSet i_allocated_regs;
4792     TCGRegSet o_allocated_regs;
4793     int i, k, nb_iargs, nb_oargs;
4794     TCGReg reg;
4795     TCGArg arg;
4796     const TCGArgConstraint *arg_ct;
4797     TCGTemp *ts;
4798     TCGArg new_args[TCG_MAX_OP_ARGS];
4799     int const_args[TCG_MAX_OP_ARGS];
4800     TCGCond op_cond;
4801 
4802     nb_oargs = def->nb_oargs;
4803     nb_iargs = def->nb_iargs;
4804 
4805     /* copy constants */
4806     memcpy(new_args + nb_oargs + nb_iargs,
4807            op->args + nb_oargs + nb_iargs,
4808            sizeof(TCGArg) * def->nb_cargs);
4809 
4810     i_allocated_regs = s->reserved_regs;
4811     o_allocated_regs = s->reserved_regs;
4812 
4813     switch (op->opc) {
4814     case INDEX_op_brcond_i32:
4815     case INDEX_op_brcond_i64:
4816         op_cond = op->args[2];
4817         break;
4818     case INDEX_op_setcond_i32:
4819     case INDEX_op_setcond_i64:
4820     case INDEX_op_negsetcond_i32:
4821     case INDEX_op_negsetcond_i64:
4822     case INDEX_op_cmp_vec:
4823         op_cond = op->args[3];
4824         break;
4825     case INDEX_op_brcond2_i32:
4826         op_cond = op->args[4];
4827         break;
4828     case INDEX_op_movcond_i32:
4829     case INDEX_op_movcond_i64:
4830     case INDEX_op_setcond2_i32:
4831     case INDEX_op_cmpsel_vec:
4832         op_cond = op->args[5];
4833         break;
4834     default:
4835         /* No condition within opcode. */
4836         op_cond = TCG_COND_ALWAYS;
4837         break;
4838     }
4839 
4840     /* satisfy input constraints */
4841     for (k = 0; k < nb_iargs; k++) {
4842         TCGRegSet i_preferred_regs, i_required_regs;
4843         bool allocate_new_reg, copyto_new_reg;
4844         TCGTemp *ts2;
4845         int i1, i2;
4846 
4847         i = def->args_ct[nb_oargs + k].sort_index;
4848         arg = op->args[i];
4849         arg_ct = &def->args_ct[i];
4850         ts = arg_temp(arg);
4851 
4852         if (ts->val_type == TEMP_VAL_CONST
4853             && tcg_target_const_match(ts->val, arg_ct->ct, ts->type,
4854                                       op_cond, TCGOP_VECE(op))) {
4855             /* constant is OK for instruction */
4856             const_args[i] = 1;
4857             new_args[i] = ts->val;
4858             continue;
4859         }
4860 
4861         reg = ts->reg;
4862         i_preferred_regs = 0;
4863         i_required_regs = arg_ct->regs;
4864         allocate_new_reg = false;
4865         copyto_new_reg = false;
4866 
4867         switch (arg_ct->pair) {
4868         case 0: /* not paired */
4869             if (arg_ct->ialias) {
4870                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4871 
4872                 /*
4873                  * If the input is readonly, then it cannot also be an
4874                  * output and aliased to itself.  If the input is not
4875                  * dead after the instruction, we must allocate a new
4876                  * register and move it.
4877                  */
4878                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)
4879                     || def->args_ct[arg_ct->alias_index].newreg) {
4880                     allocate_new_reg = true;
4881                 } else if (ts->val_type == TEMP_VAL_REG) {
4882                     /*
4883                      * Check if the current register has already been
4884                      * allocated for another input.
4885                      */
4886                     allocate_new_reg =
4887                         tcg_regset_test_reg(i_allocated_regs, reg);
4888                 }
4889             }
4890             if (!allocate_new_reg) {
4891                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4892                           i_preferred_regs);
4893                 reg = ts->reg;
4894                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4895             }
4896             if (allocate_new_reg) {
4897                 /*
4898                  * Allocate a new register matching the constraint
4899                  * and move the temporary register into it.
4900                  */
4901                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4902                           i_allocated_regs, 0);
4903                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4904                                     i_preferred_regs, ts->indirect_base);
4905                 copyto_new_reg = true;
4906             }
4907             break;
4908 
4909         case 1:
4910             /* First of an input pair; if i1 == i2, the second is an output. */
4911             i1 = i;
4912             i2 = arg_ct->pair_index;
4913             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4914 
4915             /*
4916              * It is easier to default to allocating a new pair
4917              * and to identify a few cases where it's not required.
4918              */
4919             if (arg_ct->ialias) {
4920                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4921                 if (IS_DEAD_ARG(i1) &&
4922                     IS_DEAD_ARG(i2) &&
4923                     !temp_readonly(ts) &&
4924                     ts->val_type == TEMP_VAL_REG &&
4925                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4926                     tcg_regset_test_reg(i_required_regs, reg) &&
4927                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4928                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4929                     (ts2
4930                      ? ts2->val_type == TEMP_VAL_REG &&
4931                        ts2->reg == reg + 1 &&
4932                        !temp_readonly(ts2)
4933                      : s->reg_to_temp[reg + 1] == NULL)) {
4934                     break;
4935                 }
4936             } else {
4937                 /* Without aliasing, the pair must also be an input. */
4938                 tcg_debug_assert(ts2);
4939                 if (ts->val_type == TEMP_VAL_REG &&
4940                     ts2->val_type == TEMP_VAL_REG &&
4941                     ts2->reg == reg + 1 &&
4942                     tcg_regset_test_reg(i_required_regs, reg)) {
4943                     break;
4944                 }
4945             }
4946             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4947                                      0, ts->indirect_base);
4948             goto do_pair;
4949 
4950         case 2: /* pair second */
4951             reg = new_args[arg_ct->pair_index] + 1;
4952             goto do_pair;
4953 
4954         case 3: /* ialias with second output, no first input */
4955             tcg_debug_assert(arg_ct->ialias);
4956             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4957 
4958             if (IS_DEAD_ARG(i) &&
4959                 !temp_readonly(ts) &&
4960                 ts->val_type == TEMP_VAL_REG &&
4961                 reg > 0 &&
4962                 s->reg_to_temp[reg - 1] == NULL &&
4963                 tcg_regset_test_reg(i_required_regs, reg) &&
4964                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4965                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4966                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4967                 break;
4968             }
4969             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4970                                      i_allocated_regs, 0,
4971                                      ts->indirect_base);
4972             tcg_regset_set_reg(i_allocated_regs, reg);
4973             reg += 1;
4974             goto do_pair;
4975 
4976         do_pair:
4977             /*
4978              * If an aliased input is not dead after the instruction,
4979              * we must allocate a new register and move it.
4980              */
4981             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4982                 TCGRegSet t_allocated_regs = i_allocated_regs;
4983 
4984                 /*
4985                  * Because of the alias, and the continued life, make sure
4986                  * that the temp is somewhere *other* than the reg pair,
4987                  * and we get a copy in reg.
4988                  */
4989                 tcg_regset_set_reg(t_allocated_regs, reg);
4990                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4991                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4992                     /* If ts was already in reg, copy it somewhere else. */
4993                     TCGReg nr;
4994                     bool ok;
4995 
4996                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4997                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4998                                        t_allocated_regs, 0, ts->indirect_base);
4999                     ok = tcg_out_mov(s, ts->type, nr, reg);
5000                     tcg_debug_assert(ok);
5001 
5002                     set_temp_val_reg(s, ts, nr);
5003                 } else {
5004                     temp_load(s, ts, tcg_target_available_regs[ts->type],
5005                               t_allocated_regs, 0);
5006                     copyto_new_reg = true;
5007                 }
5008             } else {
5009                 /* Preferably allocate to reg, otherwise copy. */
5010                 i_required_regs = (TCGRegSet)1 << reg;
5011                 temp_load(s, ts, i_required_regs, i_allocated_regs,
5012                           i_preferred_regs);
5013                 copyto_new_reg = ts->reg != reg;
5014             }
5015             break;
5016 
5017         default:
5018             g_assert_not_reached();
5019         }
5020 
5021         if (copyto_new_reg) {
5022             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5023                 /*
5024                  * Cross register class move not supported.  Sync the
5025                  * temp back to its slot and load from there.
5026                  */
5027                 temp_sync(s, ts, i_allocated_regs, 0, 0);
5028                 tcg_out_ld(s, ts->type, reg,
5029                            ts->mem_base->reg, ts->mem_offset);
5030             }
5031         }
5032         new_args[i] = reg;
5033         const_args[i] = 0;
5034         tcg_regset_set_reg(i_allocated_regs, reg);
5035     }
5036 
5037     /* mark dead temporaries and free the associated registers */
5038     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
5039         if (IS_DEAD_ARG(i)) {
5040             temp_dead(s, arg_temp(op->args[i]));
5041         }
5042     }
5043 
5044     if (def->flags & TCG_OPF_COND_BRANCH) {
5045         tcg_reg_alloc_cbranch(s, i_allocated_regs);
5046     } else if (def->flags & TCG_OPF_BB_END) {
5047         tcg_reg_alloc_bb_end(s, i_allocated_regs);
5048     } else {
5049         if (def->flags & TCG_OPF_CALL_CLOBBER) {
5050             /* XXX: permit generic clobber register list ? */
5051             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5052                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5053                     tcg_reg_free(s, i, i_allocated_regs);
5054                 }
5055             }
5056         }
5057         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
5058             /* sync globals if the op has side effects and might trigger
5059                an exception. */
5060             sync_globals(s, i_allocated_regs);
5061         }
5062 
5063         /* satisfy the output constraints */
5064         for(k = 0; k < nb_oargs; k++) {
5065             i = def->args_ct[k].sort_index;
5066             arg = op->args[i];
5067             arg_ct = &def->args_ct[i];
5068             ts = arg_temp(arg);
5069 
5070             /* ENV should not be modified.  */
5071             tcg_debug_assert(!temp_readonly(ts));
5072 
5073             switch (arg_ct->pair) {
5074             case 0: /* not paired */
5075                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
5076                     reg = new_args[arg_ct->alias_index];
5077                 } else if (arg_ct->newreg) {
5078                     reg = tcg_reg_alloc(s, arg_ct->regs,
5079                                         i_allocated_regs | o_allocated_regs,
5080                                         output_pref(op, k), ts->indirect_base);
5081                 } else {
5082                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
5083                                         output_pref(op, k), ts->indirect_base);
5084                 }
5085                 break;
5086 
5087             case 1: /* first of pair */
5088                 if (arg_ct->oalias) {
5089                     reg = new_args[arg_ct->alias_index];
5090                 } else if (arg_ct->newreg) {
5091                     reg = tcg_reg_alloc_pair(s, arg_ct->regs,
5092                                              i_allocated_regs | o_allocated_regs,
5093                                              output_pref(op, k),
5094                                              ts->indirect_base);
5095                 } else {
5096                     reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
5097                                              output_pref(op, k),
5098                                              ts->indirect_base);
5099                 }
5100                 break;
5101 
5102             case 2: /* second of pair */
5103                 if (arg_ct->oalias) {
5104                     reg = new_args[arg_ct->alias_index];
5105                 } else {
5106                     reg = new_args[arg_ct->pair_index] + 1;
5107                 }
5108                 break;
5109 
5110             case 3: /* first of pair, aliasing with a second input */
5111                 tcg_debug_assert(!arg_ct->newreg);
5112                 reg = new_args[arg_ct->pair_index] - 1;
5113                 break;
5114 
5115             default:
5116                 g_assert_not_reached();
5117             }
5118             tcg_regset_set_reg(o_allocated_regs, reg);
5119             set_temp_val_reg(s, ts, reg);
5120             ts->mem_coherent = 0;
5121             new_args[i] = reg;
5122         }
5123     }
5124 
5125     /* emit instruction */
5126     switch (op->opc) {
5127     case INDEX_op_ext8s_i32:
5128         tcg_out_ext8s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5129         break;
5130     case INDEX_op_ext8s_i64:
5131         tcg_out_ext8s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5132         break;
5133     case INDEX_op_ext8u_i32:
5134     case INDEX_op_ext8u_i64:
5135         tcg_out_ext8u(s, new_args[0], new_args[1]);
5136         break;
5137     case INDEX_op_ext16s_i32:
5138         tcg_out_ext16s(s, TCG_TYPE_I32, new_args[0], new_args[1]);
5139         break;
5140     case INDEX_op_ext16s_i64:
5141         tcg_out_ext16s(s, TCG_TYPE_I64, new_args[0], new_args[1]);
5142         break;
5143     case INDEX_op_ext16u_i32:
5144     case INDEX_op_ext16u_i64:
5145         tcg_out_ext16u(s, new_args[0], new_args[1]);
5146         break;
5147     case INDEX_op_ext32s_i64:
5148         tcg_out_ext32s(s, new_args[0], new_args[1]);
5149         break;
5150     case INDEX_op_ext32u_i64:
5151         tcg_out_ext32u(s, new_args[0], new_args[1]);
5152         break;
5153     case INDEX_op_ext_i32_i64:
5154         tcg_out_exts_i32_i64(s, new_args[0], new_args[1]);
5155         break;
5156     case INDEX_op_extu_i32_i64:
5157         tcg_out_extu_i32_i64(s, new_args[0], new_args[1]);
5158         break;
5159     case INDEX_op_extrl_i64_i32:
5160         tcg_out_extrl_i64_i32(s, new_args[0], new_args[1]);
5161         break;
5162     default:
5163         if (def->flags & TCG_OPF_VECTOR) {
5164             tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
5165                            new_args, const_args);
5166         } else {
5167             tcg_out_op(s, op->opc, new_args, const_args);
5168         }
5169         break;
5170     }
5171 
5172     /* move the outputs in the correct register if needed */
5173     for(i = 0; i < nb_oargs; i++) {
5174         ts = arg_temp(op->args[i]);
5175 
5176         /* ENV should not be modified.  */
5177         tcg_debug_assert(!temp_readonly(ts));
5178 
5179         if (NEED_SYNC_ARG(i)) {
5180             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
5181         } else if (IS_DEAD_ARG(i)) {
5182             temp_dead(s, ts);
5183         }
5184     }
5185 }
5186 
5187 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
5188 {
5189     const TCGLifeData arg_life = op->life;
5190     TCGTemp *ots, *itsl, *itsh;
5191     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
5192 
5193     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
5194     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
5195     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
5196 
5197     ots = arg_temp(op->args[0]);
5198     itsl = arg_temp(op->args[1]);
5199     itsh = arg_temp(op->args[2]);
5200 
5201     /* ENV should not be modified.  */
5202     tcg_debug_assert(!temp_readonly(ots));
5203 
5204     /* Allocate the output register now.  */
5205     if (ots->val_type != TEMP_VAL_REG) {
5206         TCGRegSet allocated_regs = s->reserved_regs;
5207         TCGRegSet dup_out_regs =
5208             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
5209         TCGReg oreg;
5210 
5211         /* Make sure to not spill the input registers. */
5212         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
5213             tcg_regset_set_reg(allocated_regs, itsl->reg);
5214         }
5215         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
5216             tcg_regset_set_reg(allocated_regs, itsh->reg);
5217         }
5218 
5219         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
5220                              output_pref(op, 0), ots->indirect_base);
5221         set_temp_val_reg(s, ots, oreg);
5222     }
5223 
5224     /* Promote dup2 of immediates to dupi_vec. */
5225     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
5226         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
5227         MemOp vece = MO_64;
5228 
5229         if (val == dup_const(MO_8, val)) {
5230             vece = MO_8;
5231         } else if (val == dup_const(MO_16, val)) {
5232             vece = MO_16;
5233         } else if (val == dup_const(MO_32, val)) {
5234             vece = MO_32;
5235         }
5236 
5237         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
5238         goto done;
5239     }
5240 
5241     /* If the two inputs form one 64-bit value, try dupm_vec. */
5242     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
5243         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
5244         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
5245         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
5246 
5247         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
5248         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
5249 
5250         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
5251                              its->mem_base->reg, its->mem_offset)) {
5252             goto done;
5253         }
5254     }
5255 
5256     /* Fall back to generic expansion. */
5257     return false;
5258 
5259  done:
5260     ots->mem_coherent = 0;
5261     if (IS_DEAD_ARG(1)) {
5262         temp_dead(s, itsl);
5263     }
5264     if (IS_DEAD_ARG(2)) {
5265         temp_dead(s, itsh);
5266     }
5267     if (NEED_SYNC_ARG(0)) {
5268         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
5269     } else if (IS_DEAD_ARG(0)) {
5270         temp_dead(s, ots);
5271     }
5272     return true;
5273 }
5274 
5275 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
5276                          TCGRegSet allocated_regs)
5277 {
5278     if (ts->val_type == TEMP_VAL_REG) {
5279         if (ts->reg != reg) {
5280             tcg_reg_free(s, reg, allocated_regs);
5281             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
5282                 /*
5283                  * Cross register class move not supported.  Sync the
5284                  * temp back to its slot and load from there.
5285                  */
5286                 temp_sync(s, ts, allocated_regs, 0, 0);
5287                 tcg_out_ld(s, ts->type, reg,
5288                            ts->mem_base->reg, ts->mem_offset);
5289             }
5290         }
5291     } else {
5292         TCGRegSet arg_set = 0;
5293 
5294         tcg_reg_free(s, reg, allocated_regs);
5295         tcg_regset_set_reg(arg_set, reg);
5296         temp_load(s, ts, arg_set, allocated_regs, 0);
5297     }
5298 }
5299 
5300 static void load_arg_stk(TCGContext *s, unsigned arg_slot, TCGTemp *ts,
5301                          TCGRegSet allocated_regs)
5302 {
5303     /*
5304      * When the destination is on the stack, load up the temp and store.
5305      * If there are many call-saved registers, the temp might live to
5306      * see another use; otherwise it'll be discarded.
5307      */
5308     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
5309     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
5310                arg_slot_stk_ofs(arg_slot));
5311 }
5312 
5313 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
5314                             TCGTemp *ts, TCGRegSet *allocated_regs)
5315 {
5316     if (arg_slot_reg_p(l->arg_slot)) {
5317         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
5318         load_arg_reg(s, reg, ts, *allocated_regs);
5319         tcg_regset_set_reg(*allocated_regs, reg);
5320     } else {
5321         load_arg_stk(s, l->arg_slot, ts, *allocated_regs);
5322     }
5323 }
5324 
5325 static void load_arg_ref(TCGContext *s, unsigned arg_slot, TCGReg ref_base,
5326                          intptr_t ref_off, TCGRegSet *allocated_regs)
5327 {
5328     TCGReg reg;
5329 
5330     if (arg_slot_reg_p(arg_slot)) {
5331         reg = tcg_target_call_iarg_regs[arg_slot];
5332         tcg_reg_free(s, reg, *allocated_regs);
5333         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5334         tcg_regset_set_reg(*allocated_regs, reg);
5335     } else {
5336         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
5337                             *allocated_regs, 0, false);
5338         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
5339         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
5340                    arg_slot_stk_ofs(arg_slot));
5341     }
5342 }
5343 
5344 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
5345 {
5346     const int nb_oargs = TCGOP_CALLO(op);
5347     const int nb_iargs = TCGOP_CALLI(op);
5348     const TCGLifeData arg_life = op->life;
5349     const TCGHelperInfo *info = tcg_call_info(op);
5350     TCGRegSet allocated_regs = s->reserved_regs;
5351     int i;
5352 
5353     /*
5354      * Move inputs into place in reverse order,
5355      * so that we place stacked arguments first.
5356      */
5357     for (i = nb_iargs - 1; i >= 0; --i) {
5358         const TCGCallArgumentLoc *loc = &info->in[i];
5359         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
5360 
5361         switch (loc->kind) {
5362         case TCG_CALL_ARG_NORMAL:
5363         case TCG_CALL_ARG_EXTEND_U:
5364         case TCG_CALL_ARG_EXTEND_S:
5365             load_arg_normal(s, loc, ts, &allocated_regs);
5366             break;
5367         case TCG_CALL_ARG_BY_REF:
5368             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5369             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
5370                          arg_slot_stk_ofs(loc->ref_slot),
5371                          &allocated_regs);
5372             break;
5373         case TCG_CALL_ARG_BY_REF_N:
5374             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
5375             break;
5376         default:
5377             g_assert_not_reached();
5378         }
5379     }
5380 
5381     /* Mark dead temporaries and free the associated registers.  */
5382     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
5383         if (IS_DEAD_ARG(i)) {
5384             temp_dead(s, arg_temp(op->args[i]));
5385         }
5386     }
5387 
5388     /* Clobber call registers.  */
5389     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
5390         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
5391             tcg_reg_free(s, i, allocated_regs);
5392         }
5393     }
5394 
5395     /*
5396      * Save globals if they might be written by the helper,
5397      * sync them if they might be read.
5398      */
5399     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
5400         /* Nothing to do */
5401     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
5402         sync_globals(s, allocated_regs);
5403     } else {
5404         save_globals(s, allocated_regs);
5405     }
5406 
5407     /*
5408      * If the ABI passes a pointer to the returned struct as the first
5409      * argument, load that now.  Pass a pointer to the output home slot.
5410      */
5411     if (info->out_kind == TCG_CALL_RET_BY_REF) {
5412         TCGTemp *ts = arg_temp(op->args[0]);
5413 
5414         if (!ts->mem_allocated) {
5415             temp_allocate_frame(s, ts);
5416         }
5417         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
5418     }
5419 
5420     tcg_out_call(s, tcg_call_func(op), info);
5421 
5422     /* Assign output registers and emit moves if needed.  */
5423     switch (info->out_kind) {
5424     case TCG_CALL_RET_NORMAL:
5425         for (i = 0; i < nb_oargs; i++) {
5426             TCGTemp *ts = arg_temp(op->args[i]);
5427             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
5428 
5429             /* ENV should not be modified.  */
5430             tcg_debug_assert(!temp_readonly(ts));
5431 
5432             set_temp_val_reg(s, ts, reg);
5433             ts->mem_coherent = 0;
5434         }
5435         break;
5436 
5437     case TCG_CALL_RET_BY_VEC:
5438         {
5439             TCGTemp *ts = arg_temp(op->args[0]);
5440 
5441             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
5442             tcg_debug_assert(ts->temp_subindex == 0);
5443             if (!ts->mem_allocated) {
5444                 temp_allocate_frame(s, ts);
5445             }
5446             tcg_out_st(s, TCG_TYPE_V128,
5447                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5448                        ts->mem_base->reg, ts->mem_offset);
5449         }
5450         /* fall through to mark all parts in memory */
5451 
5452     case TCG_CALL_RET_BY_REF:
5453         /* The callee has performed a write through the reference. */
5454         for (i = 0; i < nb_oargs; i++) {
5455             TCGTemp *ts = arg_temp(op->args[i]);
5456             ts->val_type = TEMP_VAL_MEM;
5457         }
5458         break;
5459 
5460     default:
5461         g_assert_not_reached();
5462     }
5463 
5464     /* Flush or discard output registers as needed. */
5465     for (i = 0; i < nb_oargs; i++) {
5466         TCGTemp *ts = arg_temp(op->args[i]);
5467         if (NEED_SYNC_ARG(i)) {
5468             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
5469         } else if (IS_DEAD_ARG(i)) {
5470             temp_dead(s, ts);
5471         }
5472     }
5473 }
5474 
5475 /**
5476  * atom_and_align_for_opc:
5477  * @s: tcg context
5478  * @opc: memory operation code
5479  * @host_atom: MO_ATOM_{IFALIGN,WITHIN16,SUBALIGN} for host operations
5480  * @allow_two_ops: true if we are prepared to issue two operations
5481  *
5482  * Return the alignment and atomicity to use for the inline fast path
5483  * for the given memory operation.  The alignment may be larger than
5484  * that specified in @opc, and the correct alignment will be diagnosed
5485  * by the slow path helper.
5486  *
5487  * If @allow_two_ops, the host is prepared to test for 2x alignment,
5488  * and issue two loads or stores for subalignment.
5489  */
5490 static TCGAtomAlign atom_and_align_for_opc(TCGContext *s, MemOp opc,
5491                                            MemOp host_atom, bool allow_two_ops)
5492 {
5493     MemOp align = get_alignment_bits(opc);
5494     MemOp size = opc & MO_SIZE;
5495     MemOp half = size ? size - 1 : 0;
5496     MemOp atom = opc & MO_ATOM_MASK;
5497     MemOp atmax;
5498 
5499     switch (atom) {
5500     case MO_ATOM_NONE:
5501         /* The operation requires no specific atomicity. */
5502         atmax = MO_8;
5503         break;
5504 
5505     case MO_ATOM_IFALIGN:
5506         atmax = size;
5507         break;
5508 
5509     case MO_ATOM_IFALIGN_PAIR:
5510         atmax = half;
5511         break;
5512 
5513     case MO_ATOM_WITHIN16:
5514         atmax = size;
5515         if (size == MO_128) {
5516             /* Misalignment implies !within16, and therefore no atomicity. */
5517         } else if (host_atom != MO_ATOM_WITHIN16) {
5518             /* The host does not implement within16, so require alignment. */
5519             align = MAX(align, size);
5520         }
5521         break;
5522 
5523     case MO_ATOM_WITHIN16_PAIR:
5524         atmax = size;
5525         /*
5526          * Misalignment implies !within16, and therefore half atomicity.
5527          * Any host prepared for two operations can implement this with
5528          * half alignment.
5529          */
5530         if (host_atom != MO_ATOM_WITHIN16 && allow_two_ops) {
5531             align = MAX(align, half);
5532         }
5533         break;
5534 
5535     case MO_ATOM_SUBALIGN:
5536         atmax = size;
5537         if (host_atom != MO_ATOM_SUBALIGN) {
5538             /* If unaligned but not odd, there are subobjects up to half. */
5539             if (allow_two_ops) {
5540                 align = MAX(align, half);
5541             } else {
5542                 align = MAX(align, size);
5543             }
5544         }
5545         break;
5546 
5547     default:
5548         g_assert_not_reached();
5549     }
5550 
5551     return (TCGAtomAlign){ .atom = atmax, .align = align };
5552 }
5553 
5554 /*
5555  * Similarly for qemu_ld/st slow path helpers.
5556  * We must re-implement tcg_gen_callN and tcg_reg_alloc_call simultaneously,
5557  * using only the provided backend tcg_out_* functions.
5558  */
5559 
5560 static int tcg_out_helper_stk_ofs(TCGType type, unsigned slot)
5561 {
5562     int ofs = arg_slot_stk_ofs(slot);
5563 
5564     /*
5565      * Each stack slot is TCG_TARGET_LONG_BITS.  If the host does not
5566      * require extension to uint64_t, adjust the address for uint32_t.
5567      */
5568     if (HOST_BIG_ENDIAN &&
5569         TCG_TARGET_REG_BITS == 64 &&
5570         type == TCG_TYPE_I32) {
5571         ofs += 4;
5572     }
5573     return ofs;
5574 }
5575 
5576 static void tcg_out_helper_load_slots(TCGContext *s,
5577                                       unsigned nmov, TCGMovExtend *mov,
5578                                       const TCGLdstHelperParam *parm)
5579 {
5580     unsigned i;
5581     TCGReg dst3;
5582 
5583     /*
5584      * Start from the end, storing to the stack first.
5585      * This frees those registers, so we need not consider overlap.
5586      */
5587     for (i = nmov; i-- > 0; ) {
5588         unsigned slot = mov[i].dst;
5589 
5590         if (arg_slot_reg_p(slot)) {
5591             goto found_reg;
5592         }
5593 
5594         TCGReg src = mov[i].src;
5595         TCGType dst_type = mov[i].dst_type;
5596         MemOp dst_mo = dst_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5597 
5598         /* The argument is going onto the stack; extend into scratch. */
5599         if ((mov[i].src_ext & MO_SIZE) != dst_mo) {
5600             tcg_debug_assert(parm->ntmp != 0);
5601             mov[i].dst = src = parm->tmp[0];
5602             tcg_out_movext1(s, &mov[i]);
5603         }
5604 
5605         tcg_out_st(s, dst_type, src, TCG_REG_CALL_STACK,
5606                    tcg_out_helper_stk_ofs(dst_type, slot));
5607     }
5608     return;
5609 
5610  found_reg:
5611     /*
5612      * The remaining arguments are in registers.
5613      * Convert slot numbers to argument registers.
5614      */
5615     nmov = i + 1;
5616     for (i = 0; i < nmov; ++i) {
5617         mov[i].dst = tcg_target_call_iarg_regs[mov[i].dst];
5618     }
5619 
5620     switch (nmov) {
5621     case 4:
5622         /* The backend must have provided enough temps for the worst case. */
5623         tcg_debug_assert(parm->ntmp >= 2);
5624 
5625         dst3 = mov[3].dst;
5626         for (unsigned j = 0; j < 3; ++j) {
5627             if (dst3 == mov[j].src) {
5628                 /*
5629                  * Conflict. Copy the source to a temporary, perform the
5630                  * remaining moves, then the extension from our scratch
5631                  * on the way out.
5632                  */
5633                 TCGReg scratch = parm->tmp[1];
5634 
5635                 tcg_out_mov(s, mov[3].src_type, scratch, mov[3].src);
5636                 tcg_out_movext3(s, mov, mov + 1, mov + 2, parm->tmp[0]);
5637                 tcg_out_movext1_new_src(s, &mov[3], scratch);
5638                 break;
5639             }
5640         }
5641 
5642         /* No conflicts: perform this move and continue. */
5643         tcg_out_movext1(s, &mov[3]);
5644         /* fall through */
5645 
5646     case 3:
5647         tcg_out_movext3(s, mov, mov + 1, mov + 2,
5648                         parm->ntmp ? parm->tmp[0] : -1);
5649         break;
5650     case 2:
5651         tcg_out_movext2(s, mov, mov + 1,
5652                         parm->ntmp ? parm->tmp[0] : -1);
5653         break;
5654     case 1:
5655         tcg_out_movext1(s, mov);
5656         break;
5657     default:
5658         g_assert_not_reached();
5659     }
5660 }
5661 
5662 static void tcg_out_helper_load_imm(TCGContext *s, unsigned slot,
5663                                     TCGType type, tcg_target_long imm,
5664                                     const TCGLdstHelperParam *parm)
5665 {
5666     if (arg_slot_reg_p(slot)) {
5667         tcg_out_movi(s, type, tcg_target_call_iarg_regs[slot], imm);
5668     } else {
5669         int ofs = tcg_out_helper_stk_ofs(type, slot);
5670         if (!tcg_out_sti(s, type, imm, TCG_REG_CALL_STACK, ofs)) {
5671             tcg_debug_assert(parm->ntmp != 0);
5672             tcg_out_movi(s, type, parm->tmp[0], imm);
5673             tcg_out_st(s, type, parm->tmp[0], TCG_REG_CALL_STACK, ofs);
5674         }
5675     }
5676 }
5677 
5678 static void tcg_out_helper_load_common_args(TCGContext *s,
5679                                             const TCGLabelQemuLdst *ldst,
5680                                             const TCGLdstHelperParam *parm,
5681                                             const TCGHelperInfo *info,
5682                                             unsigned next_arg)
5683 {
5684     TCGMovExtend ptr_mov = {
5685         .dst_type = TCG_TYPE_PTR,
5686         .src_type = TCG_TYPE_PTR,
5687         .src_ext = sizeof(void *) == 4 ? MO_32 : MO_64
5688     };
5689     const TCGCallArgumentLoc *loc = &info->in[0];
5690     TCGType type;
5691     unsigned slot;
5692     tcg_target_ulong imm;
5693 
5694     /*
5695      * Handle env, which is always first.
5696      */
5697     ptr_mov.dst = loc->arg_slot;
5698     ptr_mov.src = TCG_AREG0;
5699     tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5700 
5701     /*
5702      * Handle oi.
5703      */
5704     imm = ldst->oi;
5705     loc = &info->in[next_arg];
5706     type = TCG_TYPE_I32;
5707     switch (loc->kind) {
5708     case TCG_CALL_ARG_NORMAL:
5709         break;
5710     case TCG_CALL_ARG_EXTEND_U:
5711     case TCG_CALL_ARG_EXTEND_S:
5712         /* No extension required for MemOpIdx. */
5713         tcg_debug_assert(imm <= INT32_MAX);
5714         type = TCG_TYPE_REG;
5715         break;
5716     default:
5717         g_assert_not_reached();
5718     }
5719     tcg_out_helper_load_imm(s, loc->arg_slot, type, imm, parm);
5720     next_arg++;
5721 
5722     /*
5723      * Handle ra.
5724      */
5725     loc = &info->in[next_arg];
5726     slot = loc->arg_slot;
5727     if (parm->ra_gen) {
5728         int arg_reg = -1;
5729         TCGReg ra_reg;
5730 
5731         if (arg_slot_reg_p(slot)) {
5732             arg_reg = tcg_target_call_iarg_regs[slot];
5733         }
5734         ra_reg = parm->ra_gen(s, ldst, arg_reg);
5735 
5736         ptr_mov.dst = slot;
5737         ptr_mov.src = ra_reg;
5738         tcg_out_helper_load_slots(s, 1, &ptr_mov, parm);
5739     } else {
5740         imm = (uintptr_t)ldst->raddr;
5741         tcg_out_helper_load_imm(s, slot, TCG_TYPE_PTR, imm, parm);
5742     }
5743 }
5744 
5745 static unsigned tcg_out_helper_add_mov(TCGMovExtend *mov,
5746                                        const TCGCallArgumentLoc *loc,
5747                                        TCGType dst_type, TCGType src_type,
5748                                        TCGReg lo, TCGReg hi)
5749 {
5750     MemOp reg_mo;
5751 
5752     if (dst_type <= TCG_TYPE_REG) {
5753         MemOp src_ext;
5754 
5755         switch (loc->kind) {
5756         case TCG_CALL_ARG_NORMAL:
5757             src_ext = src_type == TCG_TYPE_I32 ? MO_32 : MO_64;
5758             break;
5759         case TCG_CALL_ARG_EXTEND_U:
5760             dst_type = TCG_TYPE_REG;
5761             src_ext = MO_UL;
5762             break;
5763         case TCG_CALL_ARG_EXTEND_S:
5764             dst_type = TCG_TYPE_REG;
5765             src_ext = MO_SL;
5766             break;
5767         default:
5768             g_assert_not_reached();
5769         }
5770 
5771         mov[0].dst = loc->arg_slot;
5772         mov[0].dst_type = dst_type;
5773         mov[0].src = lo;
5774         mov[0].src_type = src_type;
5775         mov[0].src_ext = src_ext;
5776         return 1;
5777     }
5778 
5779     if (TCG_TARGET_REG_BITS == 32) {
5780         assert(dst_type == TCG_TYPE_I64);
5781         reg_mo = MO_32;
5782     } else {
5783         assert(dst_type == TCG_TYPE_I128);
5784         reg_mo = MO_64;
5785     }
5786 
5787     mov[0].dst = loc[HOST_BIG_ENDIAN].arg_slot;
5788     mov[0].src = lo;
5789     mov[0].dst_type = TCG_TYPE_REG;
5790     mov[0].src_type = TCG_TYPE_REG;
5791     mov[0].src_ext = reg_mo;
5792 
5793     mov[1].dst = loc[!HOST_BIG_ENDIAN].arg_slot;
5794     mov[1].src = hi;
5795     mov[1].dst_type = TCG_TYPE_REG;
5796     mov[1].src_type = TCG_TYPE_REG;
5797     mov[1].src_ext = reg_mo;
5798 
5799     return 2;
5800 }
5801 
5802 static void tcg_out_ld_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5803                                    const TCGLdstHelperParam *parm)
5804 {
5805     const TCGHelperInfo *info;
5806     const TCGCallArgumentLoc *loc;
5807     TCGMovExtend mov[2];
5808     unsigned next_arg, nmov;
5809     MemOp mop = get_memop(ldst->oi);
5810 
5811     switch (mop & MO_SIZE) {
5812     case MO_8:
5813     case MO_16:
5814     case MO_32:
5815         info = &info_helper_ld32_mmu;
5816         break;
5817     case MO_64:
5818         info = &info_helper_ld64_mmu;
5819         break;
5820     case MO_128:
5821         info = &info_helper_ld128_mmu;
5822         break;
5823     default:
5824         g_assert_not_reached();
5825     }
5826 
5827     /* Defer env argument. */
5828     next_arg = 1;
5829 
5830     loc = &info->in[next_arg];
5831     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
5832         /*
5833          * 32-bit host with 32-bit guest: zero-extend the guest address
5834          * to 64-bits for the helper by storing the low part, then
5835          * load a zero for the high part.
5836          */
5837         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
5838                                TCG_TYPE_I32, TCG_TYPE_I32,
5839                                ldst->addrlo_reg, -1);
5840         tcg_out_helper_load_slots(s, 1, mov, parm);
5841 
5842         tcg_out_helper_load_imm(s, loc[!HOST_BIG_ENDIAN].arg_slot,
5843                                 TCG_TYPE_I32, 0, parm);
5844         next_arg += 2;
5845     } else {
5846         nmov = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
5847                                       ldst->addrlo_reg, ldst->addrhi_reg);
5848         tcg_out_helper_load_slots(s, nmov, mov, parm);
5849         next_arg += nmov;
5850     }
5851 
5852     switch (info->out_kind) {
5853     case TCG_CALL_RET_NORMAL:
5854     case TCG_CALL_RET_BY_VEC:
5855         break;
5856     case TCG_CALL_RET_BY_REF:
5857         /*
5858          * The return reference is in the first argument slot.
5859          * We need memory in which to return: re-use the top of stack.
5860          */
5861         {
5862             int ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5863 
5864             if (arg_slot_reg_p(0)) {
5865                 tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[0],
5866                                  TCG_REG_CALL_STACK, ofs_slot0);
5867             } else {
5868                 tcg_debug_assert(parm->ntmp != 0);
5869                 tcg_out_addi_ptr(s, parm->tmp[0],
5870                                  TCG_REG_CALL_STACK, ofs_slot0);
5871                 tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
5872                            TCG_REG_CALL_STACK, ofs_slot0);
5873             }
5874         }
5875         break;
5876     default:
5877         g_assert_not_reached();
5878     }
5879 
5880     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
5881 }
5882 
5883 static void tcg_out_ld_helper_ret(TCGContext *s, const TCGLabelQemuLdst *ldst,
5884                                   bool load_sign,
5885                                   const TCGLdstHelperParam *parm)
5886 {
5887     MemOp mop = get_memop(ldst->oi);
5888     TCGMovExtend mov[2];
5889     int ofs_slot0;
5890 
5891     switch (ldst->type) {
5892     case TCG_TYPE_I64:
5893         if (TCG_TARGET_REG_BITS == 32) {
5894             break;
5895         }
5896         /* fall through */
5897 
5898     case TCG_TYPE_I32:
5899         mov[0].dst = ldst->datalo_reg;
5900         mov[0].src = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, 0);
5901         mov[0].dst_type = ldst->type;
5902         mov[0].src_type = TCG_TYPE_REG;
5903 
5904         /*
5905          * If load_sign, then we allowed the helper to perform the
5906          * appropriate sign extension to tcg_target_ulong, and all
5907          * we need now is a plain move.
5908          *
5909          * If they do not, then we expect the relevant extension
5910          * instruction to be no more expensive than a move, and
5911          * we thus save the icache etc by only using one of two
5912          * helper functions.
5913          */
5914         if (load_sign || !(mop & MO_SIGN)) {
5915             if (TCG_TARGET_REG_BITS == 32 || ldst->type == TCG_TYPE_I32) {
5916                 mov[0].src_ext = MO_32;
5917             } else {
5918                 mov[0].src_ext = MO_64;
5919             }
5920         } else {
5921             mov[0].src_ext = mop & MO_SSIZE;
5922         }
5923         tcg_out_movext1(s, mov);
5924         return;
5925 
5926     case TCG_TYPE_I128:
5927         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
5928         ofs_slot0 = TCG_TARGET_CALL_STACK_OFFSET;
5929         switch (TCG_TARGET_CALL_RET_I128) {
5930         case TCG_CALL_RET_NORMAL:
5931             break;
5932         case TCG_CALL_RET_BY_VEC:
5933             tcg_out_st(s, TCG_TYPE_V128,
5934                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
5935                        TCG_REG_CALL_STACK, ofs_slot0);
5936             /* fall through */
5937         case TCG_CALL_RET_BY_REF:
5938             tcg_out_ld(s, TCG_TYPE_I64, ldst->datalo_reg,
5939                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * HOST_BIG_ENDIAN);
5940             tcg_out_ld(s, TCG_TYPE_I64, ldst->datahi_reg,
5941                        TCG_REG_CALL_STACK, ofs_slot0 + 8 * !HOST_BIG_ENDIAN);
5942             return;
5943         default:
5944             g_assert_not_reached();
5945         }
5946         break;
5947 
5948     default:
5949         g_assert_not_reached();
5950     }
5951 
5952     mov[0].dst = ldst->datalo_reg;
5953     mov[0].src =
5954         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, HOST_BIG_ENDIAN);
5955     mov[0].dst_type = TCG_TYPE_REG;
5956     mov[0].src_type = TCG_TYPE_REG;
5957     mov[0].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5958 
5959     mov[1].dst = ldst->datahi_reg;
5960     mov[1].src =
5961         tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, !HOST_BIG_ENDIAN);
5962     mov[1].dst_type = TCG_TYPE_REG;
5963     mov[1].src_type = TCG_TYPE_REG;
5964     mov[1].src_ext = TCG_TARGET_REG_BITS == 32 ? MO_32 : MO_64;
5965 
5966     tcg_out_movext2(s, mov, mov + 1, parm->ntmp ? parm->tmp[0] : -1);
5967 }
5968 
5969 static void tcg_out_st_helper_args(TCGContext *s, const TCGLabelQemuLdst *ldst,
5970                                    const TCGLdstHelperParam *parm)
5971 {
5972     const TCGHelperInfo *info;
5973     const TCGCallArgumentLoc *loc;
5974     TCGMovExtend mov[4];
5975     TCGType data_type;
5976     unsigned next_arg, nmov, n;
5977     MemOp mop = get_memop(ldst->oi);
5978 
5979     switch (mop & MO_SIZE) {
5980     case MO_8:
5981     case MO_16:
5982     case MO_32:
5983         info = &info_helper_st32_mmu;
5984         data_type = TCG_TYPE_I32;
5985         break;
5986     case MO_64:
5987         info = &info_helper_st64_mmu;
5988         data_type = TCG_TYPE_I64;
5989         break;
5990     case MO_128:
5991         info = &info_helper_st128_mmu;
5992         data_type = TCG_TYPE_I128;
5993         break;
5994     default:
5995         g_assert_not_reached();
5996     }
5997 
5998     /* Defer env argument. */
5999     next_arg = 1;
6000     nmov = 0;
6001 
6002     /* Handle addr argument. */
6003     loc = &info->in[next_arg];
6004     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6005         /*
6006          * 32-bit host with 32-bit guest: zero-extend the guest address
6007          * to 64-bits for the helper by storing the low part.  Later,
6008          * after we have processed the register inputs, we will load a
6009          * zero for the high part.
6010          */
6011         tcg_out_helper_add_mov(mov, loc + HOST_BIG_ENDIAN,
6012                                TCG_TYPE_I32, TCG_TYPE_I32,
6013                                ldst->addrlo_reg, -1);
6014         next_arg += 2;
6015         nmov += 1;
6016     } else {
6017         n = tcg_out_helper_add_mov(mov, loc, TCG_TYPE_I64, s->addr_type,
6018                                    ldst->addrlo_reg, ldst->addrhi_reg);
6019         next_arg += n;
6020         nmov += n;
6021     }
6022 
6023     /* Handle data argument. */
6024     loc = &info->in[next_arg];
6025     switch (loc->kind) {
6026     case TCG_CALL_ARG_NORMAL:
6027     case TCG_CALL_ARG_EXTEND_U:
6028     case TCG_CALL_ARG_EXTEND_S:
6029         n = tcg_out_helper_add_mov(mov + nmov, loc, data_type, ldst->type,
6030                                    ldst->datalo_reg, ldst->datahi_reg);
6031         next_arg += n;
6032         nmov += n;
6033         tcg_out_helper_load_slots(s, nmov, mov, parm);
6034         break;
6035 
6036     case TCG_CALL_ARG_BY_REF:
6037         tcg_debug_assert(TCG_TARGET_REG_BITS == 64);
6038         tcg_debug_assert(data_type == TCG_TYPE_I128);
6039         tcg_out_st(s, TCG_TYPE_I64,
6040                    HOST_BIG_ENDIAN ? ldst->datahi_reg : ldst->datalo_reg,
6041                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[0].ref_slot));
6042         tcg_out_st(s, TCG_TYPE_I64,
6043                    HOST_BIG_ENDIAN ? ldst->datalo_reg : ldst->datahi_reg,
6044                    TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc[1].ref_slot));
6045 
6046         tcg_out_helper_load_slots(s, nmov, mov, parm);
6047 
6048         if (arg_slot_reg_p(loc->arg_slot)) {
6049             tcg_out_addi_ptr(s, tcg_target_call_iarg_regs[loc->arg_slot],
6050                              TCG_REG_CALL_STACK,
6051                              arg_slot_stk_ofs(loc->ref_slot));
6052         } else {
6053             tcg_debug_assert(parm->ntmp != 0);
6054             tcg_out_addi_ptr(s, parm->tmp[0], TCG_REG_CALL_STACK,
6055                              arg_slot_stk_ofs(loc->ref_slot));
6056             tcg_out_st(s, TCG_TYPE_PTR, parm->tmp[0],
6057                        TCG_REG_CALL_STACK, arg_slot_stk_ofs(loc->arg_slot));
6058         }
6059         next_arg += 2;
6060         break;
6061 
6062     default:
6063         g_assert_not_reached();
6064     }
6065 
6066     if (TCG_TARGET_REG_BITS == 32 && s->addr_type == TCG_TYPE_I32) {
6067         /* Zero extend the address by loading a zero for the high part. */
6068         loc = &info->in[1 + !HOST_BIG_ENDIAN];
6069         tcg_out_helper_load_imm(s, loc->arg_slot, TCG_TYPE_I32, 0, parm);
6070     }
6071 
6072     tcg_out_helper_load_common_args(s, ldst, parm, info, next_arg);
6073 }
6074 
6075 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, uint64_t pc_start)
6076 {
6077     int i, start_words, num_insns;
6078     TCGOp *op;
6079 
6080     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
6081                  && qemu_log_in_addr_range(pc_start))) {
6082         FILE *logfile = qemu_log_trylock();
6083         if (logfile) {
6084             fprintf(logfile, "OP:\n");
6085             tcg_dump_ops(s, logfile, false);
6086             fprintf(logfile, "\n");
6087             qemu_log_unlock(logfile);
6088         }
6089     }
6090 
6091 #ifdef CONFIG_DEBUG_TCG
6092     /* Ensure all labels referenced have been emitted.  */
6093     {
6094         TCGLabel *l;
6095         bool error = false;
6096 
6097         QSIMPLEQ_FOREACH(l, &s->labels, next) {
6098             if (unlikely(!l->present) && !QSIMPLEQ_EMPTY(&l->branches)) {
6099                 qemu_log_mask(CPU_LOG_TB_OP,
6100                               "$L%d referenced but not present.\n", l->id);
6101                 error = true;
6102             }
6103         }
6104         assert(!error);
6105     }
6106 #endif
6107 
6108     tcg_optimize(s);
6109 
6110     reachable_code_pass(s);
6111     liveness_pass_0(s);
6112     liveness_pass_1(s);
6113 
6114     if (s->nb_indirects > 0) {
6115         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
6116                      && qemu_log_in_addr_range(pc_start))) {
6117             FILE *logfile = qemu_log_trylock();
6118             if (logfile) {
6119                 fprintf(logfile, "OP before indirect lowering:\n");
6120                 tcg_dump_ops(s, logfile, false);
6121                 fprintf(logfile, "\n");
6122                 qemu_log_unlock(logfile);
6123             }
6124         }
6125 
6126         /* Replace indirect temps with direct temps.  */
6127         if (liveness_pass_2(s)) {
6128             /* If changes were made, re-run liveness.  */
6129             liveness_pass_1(s);
6130         }
6131     }
6132 
6133     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
6134                  && qemu_log_in_addr_range(pc_start))) {
6135         FILE *logfile = qemu_log_trylock();
6136         if (logfile) {
6137             fprintf(logfile, "OP after optimization and liveness analysis:\n");
6138             tcg_dump_ops(s, logfile, true);
6139             fprintf(logfile, "\n");
6140             qemu_log_unlock(logfile);
6141         }
6142     }
6143 
6144     /* Initialize goto_tb jump offsets. */
6145     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
6146     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
6147     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
6148     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
6149 
6150     tcg_reg_alloc_start(s);
6151 
6152     /*
6153      * Reset the buffer pointers when restarting after overflow.
6154      * TODO: Move this into translate-all.c with the rest of the
6155      * buffer management.  Having only this done here is confusing.
6156      */
6157     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
6158     s->code_ptr = s->code_buf;
6159 
6160 #ifdef TCG_TARGET_NEED_LDST_LABELS
6161     QSIMPLEQ_INIT(&s->ldst_labels);
6162 #endif
6163 #ifdef TCG_TARGET_NEED_POOL_LABELS
6164     s->pool_labels = NULL;
6165 #endif
6166 
6167     start_words = s->insn_start_words;
6168     s->gen_insn_data =
6169         tcg_malloc(sizeof(uint64_t) * s->gen_tb->icount * start_words);
6170 
6171     tcg_out_tb_start(s);
6172 
6173     num_insns = -1;
6174     QTAILQ_FOREACH(op, &s->ops, link) {
6175         TCGOpcode opc = op->opc;
6176 
6177         switch (opc) {
6178         case INDEX_op_mov_i32:
6179         case INDEX_op_mov_i64:
6180         case INDEX_op_mov_vec:
6181             tcg_reg_alloc_mov(s, op);
6182             break;
6183         case INDEX_op_dup_vec:
6184             tcg_reg_alloc_dup(s, op);
6185             break;
6186         case INDEX_op_insn_start:
6187             if (num_insns >= 0) {
6188                 size_t off = tcg_current_code_size(s);
6189                 s->gen_insn_end_off[num_insns] = off;
6190                 /* Assert that we do not overflow our stored offset.  */
6191                 assert(s->gen_insn_end_off[num_insns] == off);
6192             }
6193             num_insns++;
6194             for (i = 0; i < start_words; ++i) {
6195                 s->gen_insn_data[num_insns * start_words + i] =
6196                     tcg_get_insn_start_param(op, i);
6197             }
6198             break;
6199         case INDEX_op_discard:
6200             temp_dead(s, arg_temp(op->args[0]));
6201             break;
6202         case INDEX_op_set_label:
6203             tcg_reg_alloc_bb_end(s, s->reserved_regs);
6204             tcg_out_label(s, arg_label(op->args[0]));
6205             break;
6206         case INDEX_op_call:
6207             tcg_reg_alloc_call(s, op);
6208             break;
6209         case INDEX_op_exit_tb:
6210             tcg_out_exit_tb(s, op->args[0]);
6211             break;
6212         case INDEX_op_goto_tb:
6213             tcg_out_goto_tb(s, op->args[0]);
6214             break;
6215         case INDEX_op_dup2_vec:
6216             if (tcg_reg_alloc_dup2(s, op)) {
6217                 break;
6218             }
6219             /* fall through */
6220         default:
6221             /* Sanity check that we've not introduced any unhandled opcodes. */
6222             tcg_debug_assert(tcg_op_supported(opc));
6223             /* Note: in order to speed up the code, it would be much
6224                faster to have specialized register allocator functions for
6225                some common argument patterns */
6226             tcg_reg_alloc_op(s, op);
6227             break;
6228         }
6229         /* Test for (pending) buffer overflow.  The assumption is that any
6230            one operation beginning below the high water mark cannot overrun
6231            the buffer completely.  Thus we can test for overflow after
6232            generating code without having to check during generation.  */
6233         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
6234             return -1;
6235         }
6236         /* Test for TB overflow, as seen by gen_insn_end_off.  */
6237         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
6238             return -2;
6239         }
6240     }
6241     tcg_debug_assert(num_insns + 1 == s->gen_tb->icount);
6242     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
6243 
6244     /* Generate TB finalization at the end of block */
6245 #ifdef TCG_TARGET_NEED_LDST_LABELS
6246     i = tcg_out_ldst_finalize(s);
6247     if (i < 0) {
6248         return i;
6249     }
6250 #endif
6251 #ifdef TCG_TARGET_NEED_POOL_LABELS
6252     i = tcg_out_pool_finalize(s);
6253     if (i < 0) {
6254         return i;
6255     }
6256 #endif
6257     if (!tcg_resolve_relocs(s)) {
6258         return -2;
6259     }
6260 
6261 #ifndef CONFIG_TCG_INTERPRETER
6262     /* flush instruction cache */
6263     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
6264                         (uintptr_t)s->code_buf,
6265                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
6266 #endif
6267 
6268     return tcg_current_code_size(s);
6269 }
6270 
6271 #ifdef ELF_HOST_MACHINE
6272 /* In order to use this feature, the backend needs to do three things:
6273 
6274    (1) Define ELF_HOST_MACHINE to indicate both what value to
6275        put into the ELF image and to indicate support for the feature.
6276 
6277    (2) Define tcg_register_jit.  This should create a buffer containing
6278        the contents of a .debug_frame section that describes the post-
6279        prologue unwind info for the tcg machine.
6280 
6281    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
6282 */
6283 
6284 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
6285 typedef enum {
6286     JIT_NOACTION = 0,
6287     JIT_REGISTER_FN,
6288     JIT_UNREGISTER_FN
6289 } jit_actions_t;
6290 
6291 struct jit_code_entry {
6292     struct jit_code_entry *next_entry;
6293     struct jit_code_entry *prev_entry;
6294     const void *symfile_addr;
6295     uint64_t symfile_size;
6296 };
6297 
6298 struct jit_descriptor {
6299     uint32_t version;
6300     uint32_t action_flag;
6301     struct jit_code_entry *relevant_entry;
6302     struct jit_code_entry *first_entry;
6303 };
6304 
6305 void __jit_debug_register_code(void) __attribute__((noinline));
6306 void __jit_debug_register_code(void)
6307 {
6308     asm("");
6309 }
6310 
6311 /* Must statically initialize the version, because GDB may check
6312    the version before we can set it.  */
6313 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
6314 
6315 /* End GDB interface.  */
6316 
6317 static int find_string(const char *strtab, const char *str)
6318 {
6319     const char *p = strtab + 1;
6320 
6321     while (1) {
6322         if (strcmp(p, str) == 0) {
6323             return p - strtab;
6324         }
6325         p += strlen(p) + 1;
6326     }
6327 }
6328 
6329 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
6330                                  const void *debug_frame,
6331                                  size_t debug_frame_size)
6332 {
6333     struct __attribute__((packed)) DebugInfo {
6334         uint32_t  len;
6335         uint16_t  version;
6336         uint32_t  abbrev;
6337         uint8_t   ptr_size;
6338         uint8_t   cu_die;
6339         uint16_t  cu_lang;
6340         uintptr_t cu_low_pc;
6341         uintptr_t cu_high_pc;
6342         uint8_t   fn_die;
6343         char      fn_name[16];
6344         uintptr_t fn_low_pc;
6345         uintptr_t fn_high_pc;
6346         uint8_t   cu_eoc;
6347     };
6348 
6349     struct ElfImage {
6350         ElfW(Ehdr) ehdr;
6351         ElfW(Phdr) phdr;
6352         ElfW(Shdr) shdr[7];
6353         ElfW(Sym)  sym[2];
6354         struct DebugInfo di;
6355         uint8_t    da[24];
6356         char       str[80];
6357     };
6358 
6359     struct ElfImage *img;
6360 
6361     static const struct ElfImage img_template = {
6362         .ehdr = {
6363             .e_ident[EI_MAG0] = ELFMAG0,
6364             .e_ident[EI_MAG1] = ELFMAG1,
6365             .e_ident[EI_MAG2] = ELFMAG2,
6366             .e_ident[EI_MAG3] = ELFMAG3,
6367             .e_ident[EI_CLASS] = ELF_CLASS,
6368             .e_ident[EI_DATA] = ELF_DATA,
6369             .e_ident[EI_VERSION] = EV_CURRENT,
6370             .e_type = ET_EXEC,
6371             .e_machine = ELF_HOST_MACHINE,
6372             .e_version = EV_CURRENT,
6373             .e_phoff = offsetof(struct ElfImage, phdr),
6374             .e_shoff = offsetof(struct ElfImage, shdr),
6375             .e_ehsize = sizeof(ElfW(Shdr)),
6376             .e_phentsize = sizeof(ElfW(Phdr)),
6377             .e_phnum = 1,
6378             .e_shentsize = sizeof(ElfW(Shdr)),
6379             .e_shnum = ARRAY_SIZE(img->shdr),
6380             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
6381 #ifdef ELF_HOST_FLAGS
6382             .e_flags = ELF_HOST_FLAGS,
6383 #endif
6384 #ifdef ELF_OSABI
6385             .e_ident[EI_OSABI] = ELF_OSABI,
6386 #endif
6387         },
6388         .phdr = {
6389             .p_type = PT_LOAD,
6390             .p_flags = PF_X,
6391         },
6392         .shdr = {
6393             [0] = { .sh_type = SHT_NULL },
6394             /* Trick: The contents of code_gen_buffer are not present in
6395                this fake ELF file; that got allocated elsewhere.  Therefore
6396                we mark .text as SHT_NOBITS (similar to .bss) so that readers
6397                will not look for contents.  We can record any address.  */
6398             [1] = { /* .text */
6399                 .sh_type = SHT_NOBITS,
6400                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
6401             },
6402             [2] = { /* .debug_info */
6403                 .sh_type = SHT_PROGBITS,
6404                 .sh_offset = offsetof(struct ElfImage, di),
6405                 .sh_size = sizeof(struct DebugInfo),
6406             },
6407             [3] = { /* .debug_abbrev */
6408                 .sh_type = SHT_PROGBITS,
6409                 .sh_offset = offsetof(struct ElfImage, da),
6410                 .sh_size = sizeof(img->da),
6411             },
6412             [4] = { /* .debug_frame */
6413                 .sh_type = SHT_PROGBITS,
6414                 .sh_offset = sizeof(struct ElfImage),
6415             },
6416             [5] = { /* .symtab */
6417                 .sh_type = SHT_SYMTAB,
6418                 .sh_offset = offsetof(struct ElfImage, sym),
6419                 .sh_size = sizeof(img->sym),
6420                 .sh_info = 1,
6421                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
6422                 .sh_entsize = sizeof(ElfW(Sym)),
6423             },
6424             [6] = { /* .strtab */
6425                 .sh_type = SHT_STRTAB,
6426                 .sh_offset = offsetof(struct ElfImage, str),
6427                 .sh_size = sizeof(img->str),
6428             }
6429         },
6430         .sym = {
6431             [1] = { /* code_gen_buffer */
6432                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
6433                 .st_shndx = 1,
6434             }
6435         },
6436         .di = {
6437             .len = sizeof(struct DebugInfo) - 4,
6438             .version = 2,
6439             .ptr_size = sizeof(void *),
6440             .cu_die = 1,
6441             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
6442             .fn_die = 2,
6443             .fn_name = "code_gen_buffer"
6444         },
6445         .da = {
6446             1,          /* abbrev number (the cu) */
6447             0x11, 1,    /* DW_TAG_compile_unit, has children */
6448             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
6449             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6450             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6451             0, 0,       /* end of abbrev */
6452             2,          /* abbrev number (the fn) */
6453             0x2e, 0,    /* DW_TAG_subprogram, no children */
6454             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
6455             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
6456             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
6457             0, 0,       /* end of abbrev */
6458             0           /* no more abbrev */
6459         },
6460         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
6461                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
6462     };
6463 
6464     /* We only need a single jit entry; statically allocate it.  */
6465     static struct jit_code_entry one_entry;
6466 
6467     uintptr_t buf = (uintptr_t)buf_ptr;
6468     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
6469     DebugFrameHeader *dfh;
6470 
6471     img = g_malloc(img_size);
6472     *img = img_template;
6473 
6474     img->phdr.p_vaddr = buf;
6475     img->phdr.p_paddr = buf;
6476     img->phdr.p_memsz = buf_size;
6477 
6478     img->shdr[1].sh_name = find_string(img->str, ".text");
6479     img->shdr[1].sh_addr = buf;
6480     img->shdr[1].sh_size = buf_size;
6481 
6482     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
6483     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
6484 
6485     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
6486     img->shdr[4].sh_size = debug_frame_size;
6487 
6488     img->shdr[5].sh_name = find_string(img->str, ".symtab");
6489     img->shdr[6].sh_name = find_string(img->str, ".strtab");
6490 
6491     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
6492     img->sym[1].st_value = buf;
6493     img->sym[1].st_size = buf_size;
6494 
6495     img->di.cu_low_pc = buf;
6496     img->di.cu_high_pc = buf + buf_size;
6497     img->di.fn_low_pc = buf;
6498     img->di.fn_high_pc = buf + buf_size;
6499 
6500     dfh = (DebugFrameHeader *)(img + 1);
6501     memcpy(dfh, debug_frame, debug_frame_size);
6502     dfh->fde.func_start = buf;
6503     dfh->fde.func_len = buf_size;
6504 
6505 #ifdef DEBUG_JIT
6506     /* Enable this block to be able to debug the ELF image file creation.
6507        One can use readelf, objdump, or other inspection utilities.  */
6508     {
6509         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
6510         FILE *f = fopen(jit, "w+b");
6511         if (f) {
6512             if (fwrite(img, img_size, 1, f) != img_size) {
6513                 /* Avoid stupid unused return value warning for fwrite.  */
6514             }
6515             fclose(f);
6516         }
6517     }
6518 #endif
6519 
6520     one_entry.symfile_addr = img;
6521     one_entry.symfile_size = img_size;
6522 
6523     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
6524     __jit_debug_descriptor.relevant_entry = &one_entry;
6525     __jit_debug_descriptor.first_entry = &one_entry;
6526     __jit_debug_register_code();
6527 }
6528 #else
6529 /* No support for the feature.  Provide the entry point expected by exec.c,
6530    and implement the internal function we declared earlier.  */
6531 
6532 static void tcg_register_jit_int(const void *buf, size_t size,
6533                                  const void *debug_frame,
6534                                  size_t debug_frame_size)
6535 {
6536 }
6537 
6538 void tcg_register_jit(const void *buf, size_t buf_size)
6539 {
6540 }
6541 #endif /* ELF_HOST_MACHINE */
6542 
6543 #if !TCG_TARGET_MAYBE_vec
6544 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
6545 {
6546     g_assert_not_reached();
6547 }
6548 #endif
6549