xref: /qemu/tcg/tcg.c (revision bb509d94)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/cacheflush.h"
38 #include "qemu/cacheinfo.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #if HOST_BIG_ENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "tcg/tcg-ldst.h"
62 #include "tcg-internal.h"
63 #include "accel/tcg/perf.h"
64 
65 /* Forward declarations for functions declared in tcg-target.c.inc and
66    used here. */
67 static void tcg_target_init(TCGContext *s);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(const void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
101 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
102                        intptr_t arg2);
103 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
104 static void tcg_out_movi(TCGContext *s, TCGType type,
105                          TCGReg ret, tcg_target_long arg);
106 static void tcg_out_addi_ptr(TCGContext *s, TCGReg, TCGReg, tcg_target_long);
107 static void tcg_out_exit_tb(TCGContext *s, uintptr_t arg);
108 static void tcg_out_goto_tb(TCGContext *s, int which);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
110                        const TCGArg args[TCG_MAX_OP_ARGS],
111                        const int const_args[TCG_MAX_OP_ARGS]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
120                            unsigned vecl, unsigned vece,
121                            const TCGArg args[TCG_MAX_OP_ARGS],
122                            const int const_args[TCG_MAX_OP_ARGS]);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125                                    TCGReg dst, TCGReg src)
126 {
127     g_assert_not_reached();
128 }
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130                                     TCGReg dst, TCGReg base, intptr_t offset)
131 {
132     g_assert_not_reached();
133 }
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135                                     TCGReg dst, int64_t arg)
136 {
137     g_assert_not_reached();
138 }
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                                   unsigned vecl, unsigned vece,
141                                   const TCGArg args[TCG_MAX_OP_ARGS],
142                                   const int const_args[TCG_MAX_OP_ARGS])
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
152                          const TCGHelperInfo *info);
153 static TCGReg tcg_target_call_oarg_reg(TCGCallReturnKind kind, int slot);
154 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
155 #ifdef TCG_TARGET_NEED_LDST_LABELS
156 static int tcg_out_ldst_finalize(TCGContext *s);
157 #endif
158 
159 TCGContext tcg_init_ctx;
160 __thread TCGContext *tcg_ctx;
161 
162 TCGContext **tcg_ctxs;
163 unsigned int tcg_cur_ctxs;
164 unsigned int tcg_max_ctxs;
165 TCGv_env cpu_env = 0;
166 const void *tcg_code_gen_epilogue;
167 uintptr_t tcg_splitwx_diff;
168 
169 #ifndef CONFIG_TCG_INTERPRETER
170 tcg_prologue_fn *tcg_qemu_tb_exec;
171 #endif
172 
173 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
174 static TCGRegSet tcg_target_call_clobber_regs;
175 
176 #if TCG_TARGET_INSN_UNIT_SIZE == 1
177 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
178 {
179     *s->code_ptr++ = v;
180 }
181 
182 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
183                                                       uint8_t v)
184 {
185     *p = v;
186 }
187 #endif
188 
189 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
190 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
191 {
192     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
193         *s->code_ptr++ = v;
194     } else {
195         tcg_insn_unit *p = s->code_ptr;
196         memcpy(p, &v, sizeof(v));
197         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
198     }
199 }
200 
201 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
202                                                        uint16_t v)
203 {
204     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
205         *p = v;
206     } else {
207         memcpy(p, &v, sizeof(v));
208     }
209 }
210 #endif
211 
212 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
213 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
214 {
215     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
216         *s->code_ptr++ = v;
217     } else {
218         tcg_insn_unit *p = s->code_ptr;
219         memcpy(p, &v, sizeof(v));
220         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
221     }
222 }
223 
224 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
225                                                        uint32_t v)
226 {
227     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
228         *p = v;
229     } else {
230         memcpy(p, &v, sizeof(v));
231     }
232 }
233 #endif
234 
235 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
236 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
237 {
238     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
239         *s->code_ptr++ = v;
240     } else {
241         tcg_insn_unit *p = s->code_ptr;
242         memcpy(p, &v, sizeof(v));
243         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
244     }
245 }
246 
247 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
248                                                        uint64_t v)
249 {
250     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
251         *p = v;
252     } else {
253         memcpy(p, &v, sizeof(v));
254     }
255 }
256 #endif
257 
258 /* label relocation processing */
259 
260 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
261                           TCGLabel *l, intptr_t addend)
262 {
263     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
264 
265     r->type = type;
266     r->ptr = code_ptr;
267     r->addend = addend;
268     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
269 }
270 
271 static void tcg_out_label(TCGContext *s, TCGLabel *l)
272 {
273     tcg_debug_assert(!l->has_value);
274     l->has_value = 1;
275     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
276 }
277 
278 TCGLabel *gen_new_label(void)
279 {
280     TCGContext *s = tcg_ctx;
281     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
282 
283     memset(l, 0, sizeof(TCGLabel));
284     l->id = s->nb_labels++;
285     QSIMPLEQ_INIT(&l->relocs);
286 
287     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
288 
289     return l;
290 }
291 
292 static bool tcg_resolve_relocs(TCGContext *s)
293 {
294     TCGLabel *l;
295 
296     QSIMPLEQ_FOREACH(l, &s->labels, next) {
297         TCGRelocation *r;
298         uintptr_t value = l->u.value;
299 
300         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
301             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
302                 return false;
303             }
304         }
305     }
306     return true;
307 }
308 
309 static void set_jmp_reset_offset(TCGContext *s, int which)
310 {
311     /*
312      * We will check for overflow at the end of the opcode loop in
313      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
314      */
315     s->gen_tb->jmp_reset_offset[which] = tcg_current_code_size(s);
316 }
317 
318 static void G_GNUC_UNUSED set_jmp_insn_offset(TCGContext *s, int which)
319 {
320     /*
321      * We will check for overflow at the end of the opcode loop in
322      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
323      */
324     s->gen_tb->jmp_insn_offset[which] = tcg_current_code_size(s);
325 }
326 
327 static uintptr_t G_GNUC_UNUSED get_jmp_target_addr(TCGContext *s, int which)
328 {
329     /*
330      * Return the read-execute version of the pointer, for the benefit
331      * of any pc-relative addressing mode.
332      */
333     return (uintptr_t)tcg_splitwx_to_rx(&s->gen_tb->jmp_target_addr[which]);
334 }
335 
336 /* Signal overflow, starting over with fewer guest insns. */
337 static G_NORETURN
338 void tcg_raise_tb_overflow(TCGContext *s)
339 {
340     siglongjmp(s->jmp_trans, -2);
341 }
342 
343 #define C_PFX1(P, A)                    P##A
344 #define C_PFX2(P, A, B)                 P##A##_##B
345 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
346 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
347 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
348 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
349 
350 /* Define an enumeration for the various combinations. */
351 
352 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
353 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
354 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
355 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
356 
357 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
358 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
359 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
360 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
361 
362 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
363 
364 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
365 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
366 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
367 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
368 
369 typedef enum {
370 #include "tcg-target-con-set.h"
371 } TCGConstraintSetIndex;
372 
373 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
374 
375 #undef C_O0_I1
376 #undef C_O0_I2
377 #undef C_O0_I3
378 #undef C_O0_I4
379 #undef C_O1_I1
380 #undef C_O1_I2
381 #undef C_O1_I3
382 #undef C_O1_I4
383 #undef C_N1_I2
384 #undef C_O2_I1
385 #undef C_O2_I2
386 #undef C_O2_I3
387 #undef C_O2_I4
388 
389 /* Put all of the constraint sets into an array, indexed by the enum. */
390 
391 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
392 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
393 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
394 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
395 
396 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
397 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
398 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
399 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
400 
401 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
402 
403 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
404 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
405 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
406 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
407 
408 static const TCGTargetOpDef constraint_sets[] = {
409 #include "tcg-target-con-set.h"
410 };
411 
412 
413 #undef C_O0_I1
414 #undef C_O0_I2
415 #undef C_O0_I3
416 #undef C_O0_I4
417 #undef C_O1_I1
418 #undef C_O1_I2
419 #undef C_O1_I3
420 #undef C_O1_I4
421 #undef C_N1_I2
422 #undef C_O2_I1
423 #undef C_O2_I2
424 #undef C_O2_I3
425 #undef C_O2_I4
426 
427 /* Expand the enumerator to be returned from tcg_target_op_def(). */
428 
429 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
430 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
431 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
432 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
433 
434 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
435 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
436 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
437 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
438 
439 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
440 
441 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
442 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
443 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
444 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
445 
446 #include "tcg-target.c.inc"
447 
448 static void alloc_tcg_plugin_context(TCGContext *s)
449 {
450 #ifdef CONFIG_PLUGIN
451     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
452     s->plugin_tb->insns =
453         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
454 #endif
455 }
456 
457 /*
458  * All TCG threads except the parent (i.e. the one that called tcg_context_init
459  * and registered the target's TCG globals) must register with this function
460  * before initiating translation.
461  *
462  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
463  * of tcg_region_init() for the reasoning behind this.
464  *
465  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
466  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
467  * is not used anymore for translation once this function is called.
468  *
469  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
470  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
471  */
472 #ifdef CONFIG_USER_ONLY
473 void tcg_register_thread(void)
474 {
475     tcg_ctx = &tcg_init_ctx;
476 }
477 #else
478 void tcg_register_thread(void)
479 {
480     TCGContext *s = g_malloc(sizeof(*s));
481     unsigned int i, n;
482 
483     *s = tcg_init_ctx;
484 
485     /* Relink mem_base.  */
486     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
487         if (tcg_init_ctx.temps[i].mem_base) {
488             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
489             tcg_debug_assert(b >= 0 && b < n);
490             s->temps[i].mem_base = &s->temps[b];
491         }
492     }
493 
494     /* Claim an entry in tcg_ctxs */
495     n = qatomic_fetch_inc(&tcg_cur_ctxs);
496     g_assert(n < tcg_max_ctxs);
497     qatomic_set(&tcg_ctxs[n], s);
498 
499     if (n > 0) {
500         alloc_tcg_plugin_context(s);
501         tcg_region_initial_alloc(s);
502     }
503 
504     tcg_ctx = s;
505 }
506 #endif /* !CONFIG_USER_ONLY */
507 
508 /* pool based memory allocation */
509 void *tcg_malloc_internal(TCGContext *s, int size)
510 {
511     TCGPool *p;
512     int pool_size;
513 
514     if (size > TCG_POOL_CHUNK_SIZE) {
515         /* big malloc: insert a new pool (XXX: could optimize) */
516         p = g_malloc(sizeof(TCGPool) + size);
517         p->size = size;
518         p->next = s->pool_first_large;
519         s->pool_first_large = p;
520         return p->data;
521     } else {
522         p = s->pool_current;
523         if (!p) {
524             p = s->pool_first;
525             if (!p)
526                 goto new_pool;
527         } else {
528             if (!p->next) {
529             new_pool:
530                 pool_size = TCG_POOL_CHUNK_SIZE;
531                 p = g_malloc(sizeof(TCGPool) + pool_size);
532                 p->size = pool_size;
533                 p->next = NULL;
534                 if (s->pool_current) {
535                     s->pool_current->next = p;
536                 } else {
537                     s->pool_first = p;
538                 }
539             } else {
540                 p = p->next;
541             }
542         }
543     }
544     s->pool_current = p;
545     s->pool_cur = p->data + size;
546     s->pool_end = p->data + p->size;
547     return p->data;
548 }
549 
550 void tcg_pool_reset(TCGContext *s)
551 {
552     TCGPool *p, *t;
553     for (p = s->pool_first_large; p; p = t) {
554         t = p->next;
555         g_free(p);
556     }
557     s->pool_first_large = NULL;
558     s->pool_cur = s->pool_end = NULL;
559     s->pool_current = NULL;
560 }
561 
562 #include "exec/helper-proto.h"
563 
564 static TCGHelperInfo all_helpers[] = {
565 #include "exec/helper-tcg.h"
566 };
567 static GHashTable *helper_table;
568 
569 #ifdef CONFIG_TCG_INTERPRETER
570 static ffi_type *typecode_to_ffi(int argmask)
571 {
572     /*
573      * libffi does not support __int128_t, so we have forced Int128
574      * to use the structure definition instead of the builtin type.
575      */
576     static ffi_type *ffi_type_i128_elements[3] = {
577         &ffi_type_uint64,
578         &ffi_type_uint64,
579         NULL
580     };
581     static ffi_type ffi_type_i128 = {
582         .size = 16,
583         .alignment = __alignof__(Int128),
584         .type = FFI_TYPE_STRUCT,
585         .elements = ffi_type_i128_elements,
586     };
587 
588     switch (argmask) {
589     case dh_typecode_void:
590         return &ffi_type_void;
591     case dh_typecode_i32:
592         return &ffi_type_uint32;
593     case dh_typecode_s32:
594         return &ffi_type_sint32;
595     case dh_typecode_i64:
596         return &ffi_type_uint64;
597     case dh_typecode_s64:
598         return &ffi_type_sint64;
599     case dh_typecode_ptr:
600         return &ffi_type_pointer;
601     case dh_typecode_i128:
602         return &ffi_type_i128;
603     }
604     g_assert_not_reached();
605 }
606 
607 static void init_ffi_layouts(void)
608 {
609     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
610     GHashTable *ffi_table = g_hash_table_new(NULL, NULL);
611 
612     for (int i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613         TCGHelperInfo *info = &all_helpers[i];
614         unsigned typemask = info->typemask;
615         gpointer hash = (gpointer)(uintptr_t)typemask;
616         struct {
617             ffi_cif cif;
618             ffi_type *args[];
619         } *ca;
620         ffi_status status;
621         int nargs;
622         ffi_cif *cif;
623 
624         cif = g_hash_table_lookup(ffi_table, hash);
625         if (cif) {
626             info->cif = cif;
627             continue;
628         }
629 
630         /* Ignoring the return type, find the last non-zero field. */
631         nargs = 32 - clz32(typemask >> 3);
632         nargs = DIV_ROUND_UP(nargs, 3);
633         assert(nargs <= MAX_CALL_IARGS);
634 
635         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
636         ca->cif.rtype = typecode_to_ffi(typemask & 7);
637         ca->cif.nargs = nargs;
638 
639         if (nargs != 0) {
640             ca->cif.arg_types = ca->args;
641             for (int j = 0; j < nargs; ++j) {
642                 int typecode = extract32(typemask, (j + 1) * 3, 3);
643                 ca->args[j] = typecode_to_ffi(typecode);
644             }
645         }
646 
647         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
648                               ca->cif.rtype, ca->cif.arg_types);
649         assert(status == FFI_OK);
650 
651         cif = &ca->cif;
652         info->cif = cif;
653         g_hash_table_insert(ffi_table, hash, (gpointer)cif);
654     }
655 
656     g_hash_table_destroy(ffi_table);
657 }
658 #endif /* CONFIG_TCG_INTERPRETER */
659 
660 typedef struct TCGCumulativeArgs {
661     int arg_idx;                /* tcg_gen_callN args[] */
662     int info_in_idx;            /* TCGHelperInfo in[] */
663     int arg_slot;               /* regs+stack slot */
664     int ref_slot;               /* stack slots for references */
665 } TCGCumulativeArgs;
666 
667 static void layout_arg_even(TCGCumulativeArgs *cum)
668 {
669     cum->arg_slot += cum->arg_slot & 1;
670 }
671 
672 static void layout_arg_1(TCGCumulativeArgs *cum, TCGHelperInfo *info,
673                          TCGCallArgumentKind kind)
674 {
675     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
676 
677     *loc = (TCGCallArgumentLoc){
678         .kind = kind,
679         .arg_idx = cum->arg_idx,
680         .arg_slot = cum->arg_slot,
681     };
682     cum->info_in_idx++;
683     cum->arg_slot++;
684 }
685 
686 static void layout_arg_normal_n(TCGCumulativeArgs *cum,
687                                 TCGHelperInfo *info, int n)
688 {
689     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
690 
691     for (int i = 0; i < n; ++i) {
692         /* Layout all using the same arg_idx, adjusting the subindex. */
693         loc[i] = (TCGCallArgumentLoc){
694             .kind = TCG_CALL_ARG_NORMAL,
695             .arg_idx = cum->arg_idx,
696             .tmp_subindex = i,
697             .arg_slot = cum->arg_slot + i,
698         };
699     }
700     cum->info_in_idx += n;
701     cum->arg_slot += n;
702 }
703 
704 static void layout_arg_by_ref(TCGCumulativeArgs *cum, TCGHelperInfo *info)
705 {
706     TCGCallArgumentLoc *loc = &info->in[cum->info_in_idx];
707     int n = 128 / TCG_TARGET_REG_BITS;
708 
709     /* The first subindex carries the pointer. */
710     layout_arg_1(cum, info, TCG_CALL_ARG_BY_REF);
711 
712     /*
713      * The callee is allowed to clobber memory associated with
714      * structure pass by-reference.  Therefore we must make copies.
715      * Allocate space from "ref_slot", which will be adjusted to
716      * follow the parameters on the stack.
717      */
718     loc[0].ref_slot = cum->ref_slot;
719 
720     /*
721      * Subsequent words also go into the reference slot, but
722      * do not accumulate into the regular arguments.
723      */
724     for (int i = 1; i < n; ++i) {
725         loc[i] = (TCGCallArgumentLoc){
726             .kind = TCG_CALL_ARG_BY_REF_N,
727             .arg_idx = cum->arg_idx,
728             .tmp_subindex = i,
729             .ref_slot = cum->ref_slot + i,
730         };
731     }
732     cum->info_in_idx += n;
733     cum->ref_slot += n;
734 }
735 
736 static void init_call_layout(TCGHelperInfo *info)
737 {
738     int max_reg_slots = ARRAY_SIZE(tcg_target_call_iarg_regs);
739     int max_stk_slots = TCG_STATIC_CALL_ARGS_SIZE / sizeof(tcg_target_long);
740     unsigned typemask = info->typemask;
741     unsigned typecode;
742     TCGCumulativeArgs cum = { };
743 
744     /*
745      * Parse and place any function return value.
746      */
747     typecode = typemask & 7;
748     switch (typecode) {
749     case dh_typecode_void:
750         info->nr_out = 0;
751         break;
752     case dh_typecode_i32:
753     case dh_typecode_s32:
754     case dh_typecode_ptr:
755         info->nr_out = 1;
756         info->out_kind = TCG_CALL_RET_NORMAL;
757         break;
758     case dh_typecode_i64:
759     case dh_typecode_s64:
760         info->nr_out = 64 / TCG_TARGET_REG_BITS;
761         info->out_kind = TCG_CALL_RET_NORMAL;
762         /* Query the last register now to trigger any assert early. */
763         tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
764         break;
765     case dh_typecode_i128:
766         info->nr_out = 128 / TCG_TARGET_REG_BITS;
767         info->out_kind = TCG_TARGET_CALL_RET_I128;
768         switch (TCG_TARGET_CALL_RET_I128) {
769         case TCG_CALL_RET_NORMAL:
770             /* Query the last register now to trigger any assert early. */
771             tcg_target_call_oarg_reg(info->out_kind, info->nr_out - 1);
772             break;
773         case TCG_CALL_RET_BY_VEC:
774             /* Query the single register now to trigger any assert early. */
775             tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0);
776             break;
777         case TCG_CALL_RET_BY_REF:
778             /*
779              * Allocate the first argument to the output.
780              * We don't need to store this anywhere, just make it
781              * unavailable for use in the input loop below.
782              */
783             cum.arg_slot = 1;
784             break;
785         default:
786             qemu_build_not_reached();
787         }
788         break;
789     default:
790         g_assert_not_reached();
791     }
792 
793     /*
794      * Parse and place function arguments.
795      */
796     for (typemask >>= 3; typemask; typemask >>= 3, cum.arg_idx++) {
797         TCGCallArgumentKind kind;
798         TCGType type;
799 
800         typecode = typemask & 7;
801         switch (typecode) {
802         case dh_typecode_i32:
803         case dh_typecode_s32:
804             type = TCG_TYPE_I32;
805             break;
806         case dh_typecode_i64:
807         case dh_typecode_s64:
808             type = TCG_TYPE_I64;
809             break;
810         case dh_typecode_ptr:
811             type = TCG_TYPE_PTR;
812             break;
813         case dh_typecode_i128:
814             type = TCG_TYPE_I128;
815             break;
816         default:
817             g_assert_not_reached();
818         }
819 
820         switch (type) {
821         case TCG_TYPE_I32:
822             switch (TCG_TARGET_CALL_ARG_I32) {
823             case TCG_CALL_ARG_EVEN:
824                 layout_arg_even(&cum);
825                 /* fall through */
826             case TCG_CALL_ARG_NORMAL:
827                 layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
828                 break;
829             case TCG_CALL_ARG_EXTEND:
830                 kind = TCG_CALL_ARG_EXTEND_U + (typecode & 1);
831                 layout_arg_1(&cum, info, kind);
832                 break;
833             default:
834                 qemu_build_not_reached();
835             }
836             break;
837 
838         case TCG_TYPE_I64:
839             switch (TCG_TARGET_CALL_ARG_I64) {
840             case TCG_CALL_ARG_EVEN:
841                 layout_arg_even(&cum);
842                 /* fall through */
843             case TCG_CALL_ARG_NORMAL:
844                 if (TCG_TARGET_REG_BITS == 32) {
845                     layout_arg_normal_n(&cum, info, 2);
846                 } else {
847                     layout_arg_1(&cum, info, TCG_CALL_ARG_NORMAL);
848                 }
849                 break;
850             default:
851                 qemu_build_not_reached();
852             }
853             break;
854 
855         case TCG_TYPE_I128:
856             switch (TCG_TARGET_CALL_ARG_I128) {
857             case TCG_CALL_ARG_EVEN:
858                 layout_arg_even(&cum);
859                 /* fall through */
860             case TCG_CALL_ARG_NORMAL:
861                 layout_arg_normal_n(&cum, info, 128 / TCG_TARGET_REG_BITS);
862                 break;
863             case TCG_CALL_ARG_BY_REF:
864                 layout_arg_by_ref(&cum, info);
865                 break;
866             default:
867                 qemu_build_not_reached();
868             }
869             break;
870 
871         default:
872             g_assert_not_reached();
873         }
874     }
875     info->nr_in = cum.info_in_idx;
876 
877     /* Validate that we didn't overrun the input array. */
878     assert(cum.info_in_idx <= ARRAY_SIZE(info->in));
879     /* Validate the backend has enough argument space. */
880     assert(cum.arg_slot <= max_reg_slots + max_stk_slots);
881 
882     /*
883      * Relocate the "ref_slot" area to the end of the parameters.
884      * Minimizing this stack offset helps code size for x86,
885      * which has a signed 8-bit offset encoding.
886      */
887     if (cum.ref_slot != 0) {
888         int ref_base = 0;
889 
890         if (cum.arg_slot > max_reg_slots) {
891             int align = __alignof(Int128) / sizeof(tcg_target_long);
892 
893             ref_base = cum.arg_slot - max_reg_slots;
894             if (align > 1) {
895                 ref_base = ROUND_UP(ref_base, align);
896             }
897         }
898         assert(ref_base + cum.ref_slot <= max_stk_slots);
899 
900         if (ref_base != 0) {
901             for (int i = cum.info_in_idx - 1; i >= 0; --i) {
902                 TCGCallArgumentLoc *loc = &info->in[i];
903                 switch (loc->kind) {
904                 case TCG_CALL_ARG_BY_REF:
905                 case TCG_CALL_ARG_BY_REF_N:
906                     loc->ref_slot += ref_base;
907                     break;
908                 default:
909                     break;
910                 }
911             }
912         }
913     }
914 }
915 
916 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
917 static void process_op_defs(TCGContext *s);
918 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
919                                             TCGReg reg, const char *name);
920 
921 static void tcg_context_init(unsigned max_cpus)
922 {
923     TCGContext *s = &tcg_init_ctx;
924     int op, total_args, n, i;
925     TCGOpDef *def;
926     TCGArgConstraint *args_ct;
927     TCGTemp *ts;
928 
929     memset(s, 0, sizeof(*s));
930     s->nb_globals = 0;
931 
932     /* Count total number of arguments and allocate the corresponding
933        space */
934     total_args = 0;
935     for(op = 0; op < NB_OPS; op++) {
936         def = &tcg_op_defs[op];
937         n = def->nb_iargs + def->nb_oargs;
938         total_args += n;
939     }
940 
941     args_ct = g_new0(TCGArgConstraint, total_args);
942 
943     for(op = 0; op < NB_OPS; op++) {
944         def = &tcg_op_defs[op];
945         def->args_ct = args_ct;
946         n = def->nb_iargs + def->nb_oargs;
947         args_ct += n;
948     }
949 
950     /* Register helpers.  */
951     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
952     helper_table = g_hash_table_new(NULL, NULL);
953 
954     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
955         init_call_layout(&all_helpers[i]);
956         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
957                             (gpointer)&all_helpers[i]);
958     }
959 
960 #ifdef CONFIG_TCG_INTERPRETER
961     init_ffi_layouts();
962 #endif
963 
964     tcg_target_init(s);
965     process_op_defs(s);
966 
967     /* Reverse the order of the saved registers, assuming they're all at
968        the start of tcg_target_reg_alloc_order.  */
969     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
970         int r = tcg_target_reg_alloc_order[n];
971         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
972             break;
973         }
974     }
975     for (i = 0; i < n; ++i) {
976         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
977     }
978     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
979         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
980     }
981 
982     alloc_tcg_plugin_context(s);
983 
984     tcg_ctx = s;
985     /*
986      * In user-mode we simply share the init context among threads, since we
987      * use a single region. See the documentation tcg_region_init() for the
988      * reasoning behind this.
989      * In softmmu we will have at most max_cpus TCG threads.
990      */
991 #ifdef CONFIG_USER_ONLY
992     tcg_ctxs = &tcg_ctx;
993     tcg_cur_ctxs = 1;
994     tcg_max_ctxs = 1;
995 #else
996     tcg_max_ctxs = max_cpus;
997     tcg_ctxs = g_new0(TCGContext *, max_cpus);
998 #endif
999 
1000     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1001     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1002     cpu_env = temp_tcgv_ptr(ts);
1003 }
1004 
1005 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
1006 {
1007     tcg_context_init(max_cpus);
1008     tcg_region_init(tb_size, splitwx, max_cpus);
1009 }
1010 
1011 /*
1012  * Allocate TBs right before their corresponding translated code, making
1013  * sure that TBs and code are on different cache lines.
1014  */
1015 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1016 {
1017     uintptr_t align = qemu_icache_linesize;
1018     TranslationBlock *tb;
1019     void *next;
1020 
1021  retry:
1022     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1023     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1024 
1025     if (unlikely(next > s->code_gen_highwater)) {
1026         if (tcg_region_alloc(s)) {
1027             return NULL;
1028         }
1029         goto retry;
1030     }
1031     qatomic_set(&s->code_gen_ptr, next);
1032     s->data_gen_ptr = NULL;
1033     return tb;
1034 }
1035 
1036 void tcg_prologue_init(TCGContext *s)
1037 {
1038     size_t prologue_size;
1039 
1040     s->code_ptr = s->code_gen_ptr;
1041     s->code_buf = s->code_gen_ptr;
1042     s->data_gen_ptr = NULL;
1043 
1044 #ifndef CONFIG_TCG_INTERPRETER
1045     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
1046 #endif
1047 
1048 #ifdef TCG_TARGET_NEED_POOL_LABELS
1049     s->pool_labels = NULL;
1050 #endif
1051 
1052     qemu_thread_jit_write();
1053     /* Generate the prologue.  */
1054     tcg_target_qemu_prologue(s);
1055 
1056 #ifdef TCG_TARGET_NEED_POOL_LABELS
1057     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1058     {
1059         int result = tcg_out_pool_finalize(s);
1060         tcg_debug_assert(result == 0);
1061     }
1062 #endif
1063 
1064     prologue_size = tcg_current_code_size(s);
1065     perf_report_prologue(s->code_gen_ptr, prologue_size);
1066 
1067 #ifndef CONFIG_TCG_INTERPRETER
1068     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
1069                         (uintptr_t)s->code_buf, prologue_size);
1070 #endif
1071 
1072 #ifdef DEBUG_DISAS
1073     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1074         FILE *logfile = qemu_log_trylock();
1075         if (logfile) {
1076             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
1077             if (s->data_gen_ptr) {
1078                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
1079                 size_t data_size = prologue_size - code_size;
1080                 size_t i;
1081 
1082                 disas(logfile, s->code_gen_ptr, code_size);
1083 
1084                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1085                     if (sizeof(tcg_target_ulong) == 8) {
1086                         fprintf(logfile,
1087                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1088                                 (uintptr_t)s->data_gen_ptr + i,
1089                                 *(uint64_t *)(s->data_gen_ptr + i));
1090                     } else {
1091                         fprintf(logfile,
1092                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
1093                                 (uintptr_t)s->data_gen_ptr + i,
1094                                 *(uint32_t *)(s->data_gen_ptr + i));
1095                     }
1096                 }
1097             } else {
1098                 disas(logfile, s->code_gen_ptr, prologue_size);
1099             }
1100             fprintf(logfile, "\n");
1101             qemu_log_unlock(logfile);
1102         }
1103     }
1104 #endif
1105 
1106 #ifndef CONFIG_TCG_INTERPRETER
1107     /*
1108      * Assert that goto_ptr is implemented completely, setting an epilogue.
1109      * For tci, we use NULL as the signal to return from the interpreter,
1110      * so skip this check.
1111      */
1112     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1113 #endif
1114 
1115     tcg_region_prologue_set(s);
1116 }
1117 
1118 void tcg_func_start(TCGContext *s)
1119 {
1120     tcg_pool_reset(s);
1121     s->nb_temps = s->nb_globals;
1122 
1123     /* No temps have been previously allocated for size or locality.  */
1124     memset(s->free_temps, 0, sizeof(s->free_temps));
1125 
1126     /* No constant temps have been previously allocated. */
1127     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1128         if (s->const_table[i]) {
1129             g_hash_table_remove_all(s->const_table[i]);
1130         }
1131     }
1132 
1133     s->nb_ops = 0;
1134     s->nb_labels = 0;
1135     s->current_frame_offset = s->frame_start;
1136 
1137 #ifdef CONFIG_DEBUG_TCG
1138     s->goto_tb_issue_mask = 0;
1139 #endif
1140 
1141     QTAILQ_INIT(&s->ops);
1142     QTAILQ_INIT(&s->free_ops);
1143     QSIMPLEQ_INIT(&s->labels);
1144 }
1145 
1146 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1147 {
1148     int n = s->nb_temps++;
1149 
1150     if (n >= TCG_MAX_TEMPS) {
1151         tcg_raise_tb_overflow(s);
1152     }
1153     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1154 }
1155 
1156 static TCGTemp *tcg_global_alloc(TCGContext *s)
1157 {
1158     TCGTemp *ts;
1159 
1160     tcg_debug_assert(s->nb_globals == s->nb_temps);
1161     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1162     s->nb_globals++;
1163     ts = tcg_temp_alloc(s);
1164     ts->kind = TEMP_GLOBAL;
1165 
1166     return ts;
1167 }
1168 
1169 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1170                                             TCGReg reg, const char *name)
1171 {
1172     TCGTemp *ts;
1173 
1174     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1175         tcg_abort();
1176     }
1177 
1178     ts = tcg_global_alloc(s);
1179     ts->base_type = type;
1180     ts->type = type;
1181     ts->kind = TEMP_FIXED;
1182     ts->reg = reg;
1183     ts->name = name;
1184     tcg_regset_set_reg(s->reserved_regs, reg);
1185 
1186     return ts;
1187 }
1188 
1189 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1190 {
1191     s->frame_start = start;
1192     s->frame_end = start + size;
1193     s->frame_temp
1194         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1195 }
1196 
1197 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1198                                      intptr_t offset, const char *name)
1199 {
1200     TCGContext *s = tcg_ctx;
1201     TCGTemp *base_ts = tcgv_ptr_temp(base);
1202     TCGTemp *ts = tcg_global_alloc(s);
1203     int indirect_reg = 0;
1204 
1205     switch (base_ts->kind) {
1206     case TEMP_FIXED:
1207         break;
1208     case TEMP_GLOBAL:
1209         /* We do not support double-indirect registers.  */
1210         tcg_debug_assert(!base_ts->indirect_reg);
1211         base_ts->indirect_base = 1;
1212         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1213                             ? 2 : 1);
1214         indirect_reg = 1;
1215         break;
1216     default:
1217         g_assert_not_reached();
1218     }
1219 
1220     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1221         TCGTemp *ts2 = tcg_global_alloc(s);
1222         char buf[64];
1223 
1224         ts->base_type = TCG_TYPE_I64;
1225         ts->type = TCG_TYPE_I32;
1226         ts->indirect_reg = indirect_reg;
1227         ts->mem_allocated = 1;
1228         ts->mem_base = base_ts;
1229         ts->mem_offset = offset;
1230         pstrcpy(buf, sizeof(buf), name);
1231         pstrcat(buf, sizeof(buf), "_0");
1232         ts->name = strdup(buf);
1233 
1234         tcg_debug_assert(ts2 == ts + 1);
1235         ts2->base_type = TCG_TYPE_I64;
1236         ts2->type = TCG_TYPE_I32;
1237         ts2->indirect_reg = indirect_reg;
1238         ts2->mem_allocated = 1;
1239         ts2->mem_base = base_ts;
1240         ts2->mem_offset = offset + 4;
1241         ts2->temp_subindex = 1;
1242         pstrcpy(buf, sizeof(buf), name);
1243         pstrcat(buf, sizeof(buf), "_1");
1244         ts2->name = strdup(buf);
1245     } else {
1246         ts->base_type = type;
1247         ts->type = type;
1248         ts->indirect_reg = indirect_reg;
1249         ts->mem_allocated = 1;
1250         ts->mem_base = base_ts;
1251         ts->mem_offset = offset;
1252         ts->name = name;
1253     }
1254     return ts;
1255 }
1256 
1257 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1258 {
1259     TCGContext *s = tcg_ctx;
1260     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1261     TCGTemp *ts;
1262     int idx, k;
1263 
1264     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1265     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1266     if (idx < TCG_MAX_TEMPS) {
1267         /* There is already an available temp with the right type.  */
1268         clear_bit(idx, s->free_temps[k].l);
1269 
1270         ts = &s->temps[idx];
1271         ts->temp_allocated = 1;
1272         tcg_debug_assert(ts->base_type == type);
1273         tcg_debug_assert(ts->kind == kind);
1274     } else {
1275         int i, n;
1276 
1277         switch (type) {
1278         case TCG_TYPE_I32:
1279         case TCG_TYPE_V64:
1280         case TCG_TYPE_V128:
1281         case TCG_TYPE_V256:
1282             n = 1;
1283             break;
1284         case TCG_TYPE_I64:
1285             n = 64 / TCG_TARGET_REG_BITS;
1286             break;
1287         case TCG_TYPE_I128:
1288             n = 128 / TCG_TARGET_REG_BITS;
1289             break;
1290         default:
1291             g_assert_not_reached();
1292         }
1293 
1294         ts = tcg_temp_alloc(s);
1295         ts->base_type = type;
1296         ts->temp_allocated = 1;
1297         ts->kind = kind;
1298 
1299         if (n == 1) {
1300             ts->type = type;
1301         } else {
1302             ts->type = TCG_TYPE_REG;
1303 
1304             for (i = 1; i < n; ++i) {
1305                 TCGTemp *ts2 = tcg_temp_alloc(s);
1306 
1307                 tcg_debug_assert(ts2 == ts + i);
1308                 ts2->base_type = type;
1309                 ts2->type = TCG_TYPE_REG;
1310                 ts2->temp_allocated = 1;
1311                 ts2->temp_subindex = i;
1312                 ts2->kind = kind;
1313             }
1314         }
1315     }
1316 
1317 #if defined(CONFIG_DEBUG_TCG)
1318     s->temps_in_use++;
1319 #endif
1320     return ts;
1321 }
1322 
1323 TCGv_vec tcg_temp_new_vec(TCGType type)
1324 {
1325     TCGTemp *t;
1326 
1327 #ifdef CONFIG_DEBUG_TCG
1328     switch (type) {
1329     case TCG_TYPE_V64:
1330         assert(TCG_TARGET_HAS_v64);
1331         break;
1332     case TCG_TYPE_V128:
1333         assert(TCG_TARGET_HAS_v128);
1334         break;
1335     case TCG_TYPE_V256:
1336         assert(TCG_TARGET_HAS_v256);
1337         break;
1338     default:
1339         g_assert_not_reached();
1340     }
1341 #endif
1342 
1343     t = tcg_temp_new_internal(type, 0);
1344     return temp_tcgv_vec(t);
1345 }
1346 
1347 /* Create a new temp of the same type as an existing temp.  */
1348 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1349 {
1350     TCGTemp *t = tcgv_vec_temp(match);
1351 
1352     tcg_debug_assert(t->temp_allocated != 0);
1353 
1354     t = tcg_temp_new_internal(t->base_type, 0);
1355     return temp_tcgv_vec(t);
1356 }
1357 
1358 void tcg_temp_free_internal(TCGTemp *ts)
1359 {
1360     TCGContext *s = tcg_ctx;
1361     int k, idx;
1362 
1363     switch (ts->kind) {
1364     case TEMP_CONST:
1365         /*
1366          * In order to simplify users of tcg_constant_*,
1367          * silently ignore free.
1368          */
1369         return;
1370     case TEMP_NORMAL:
1371     case TEMP_LOCAL:
1372         break;
1373     default:
1374         g_assert_not_reached();
1375     }
1376 
1377 #if defined(CONFIG_DEBUG_TCG)
1378     s->temps_in_use--;
1379     if (s->temps_in_use < 0) {
1380         fprintf(stderr, "More temporaries freed than allocated!\n");
1381     }
1382 #endif
1383 
1384     tcg_debug_assert(ts->temp_allocated != 0);
1385     ts->temp_allocated = 0;
1386 
1387     idx = temp_idx(ts);
1388     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1389     set_bit(idx, s->free_temps[k].l);
1390 }
1391 
1392 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1393 {
1394     TCGContext *s = tcg_ctx;
1395     GHashTable *h = s->const_table[type];
1396     TCGTemp *ts;
1397 
1398     if (h == NULL) {
1399         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1400         s->const_table[type] = h;
1401     }
1402 
1403     ts = g_hash_table_lookup(h, &val);
1404     if (ts == NULL) {
1405         int64_t *val_ptr;
1406 
1407         ts = tcg_temp_alloc(s);
1408 
1409         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1410             TCGTemp *ts2 = tcg_temp_alloc(s);
1411 
1412             tcg_debug_assert(ts2 == ts + 1);
1413 
1414             ts->base_type = TCG_TYPE_I64;
1415             ts->type = TCG_TYPE_I32;
1416             ts->kind = TEMP_CONST;
1417             ts->temp_allocated = 1;
1418 
1419             ts2->base_type = TCG_TYPE_I64;
1420             ts2->type = TCG_TYPE_I32;
1421             ts2->kind = TEMP_CONST;
1422             ts2->temp_allocated = 1;
1423             ts2->temp_subindex = 1;
1424 
1425             /*
1426              * Retain the full value of the 64-bit constant in the low
1427              * part, so that the hash table works.  Actual uses will
1428              * truncate the value to the low part.
1429              */
1430             ts[HOST_BIG_ENDIAN].val = val;
1431             ts[!HOST_BIG_ENDIAN].val = val >> 32;
1432             val_ptr = &ts[HOST_BIG_ENDIAN].val;
1433         } else {
1434             ts->base_type = type;
1435             ts->type = type;
1436             ts->kind = TEMP_CONST;
1437             ts->temp_allocated = 1;
1438             ts->val = val;
1439             val_ptr = &ts->val;
1440         }
1441         g_hash_table_insert(h, val_ptr, ts);
1442     }
1443 
1444     return ts;
1445 }
1446 
1447 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1448 {
1449     val = dup_const(vece, val);
1450     return temp_tcgv_vec(tcg_constant_internal(type, val));
1451 }
1452 
1453 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1454 {
1455     TCGTemp *t = tcgv_vec_temp(match);
1456 
1457     tcg_debug_assert(t->temp_allocated != 0);
1458     return tcg_constant_vec(t->base_type, vece, val);
1459 }
1460 
1461 TCGv_i32 tcg_const_i32(int32_t val)
1462 {
1463     TCGv_i32 t0;
1464     t0 = tcg_temp_new_i32();
1465     tcg_gen_movi_i32(t0, val);
1466     return t0;
1467 }
1468 
1469 TCGv_i64 tcg_const_i64(int64_t val)
1470 {
1471     TCGv_i64 t0;
1472     t0 = tcg_temp_new_i64();
1473     tcg_gen_movi_i64(t0, val);
1474     return t0;
1475 }
1476 
1477 TCGv_i32 tcg_const_local_i32(int32_t val)
1478 {
1479     TCGv_i32 t0;
1480     t0 = tcg_temp_local_new_i32();
1481     tcg_gen_movi_i32(t0, val);
1482     return t0;
1483 }
1484 
1485 TCGv_i64 tcg_const_local_i64(int64_t val)
1486 {
1487     TCGv_i64 t0;
1488     t0 = tcg_temp_local_new_i64();
1489     tcg_gen_movi_i64(t0, val);
1490     return t0;
1491 }
1492 
1493 #if defined(CONFIG_DEBUG_TCG)
1494 void tcg_clear_temp_count(void)
1495 {
1496     TCGContext *s = tcg_ctx;
1497     s->temps_in_use = 0;
1498 }
1499 
1500 int tcg_check_temp_count(void)
1501 {
1502     TCGContext *s = tcg_ctx;
1503     if (s->temps_in_use) {
1504         /* Clear the count so that we don't give another
1505          * warning immediately next time around.
1506          */
1507         s->temps_in_use = 0;
1508         return 1;
1509     }
1510     return 0;
1511 }
1512 #endif
1513 
1514 /* Return true if OP may appear in the opcode stream.
1515    Test the runtime variable that controls each opcode.  */
1516 bool tcg_op_supported(TCGOpcode op)
1517 {
1518     const bool have_vec
1519         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1520 
1521     switch (op) {
1522     case INDEX_op_discard:
1523     case INDEX_op_set_label:
1524     case INDEX_op_call:
1525     case INDEX_op_br:
1526     case INDEX_op_mb:
1527     case INDEX_op_insn_start:
1528     case INDEX_op_exit_tb:
1529     case INDEX_op_goto_tb:
1530     case INDEX_op_goto_ptr:
1531     case INDEX_op_qemu_ld_i32:
1532     case INDEX_op_qemu_st_i32:
1533     case INDEX_op_qemu_ld_i64:
1534     case INDEX_op_qemu_st_i64:
1535         return true;
1536 
1537     case INDEX_op_qemu_st8_i32:
1538         return TCG_TARGET_HAS_qemu_st8_i32;
1539 
1540     case INDEX_op_mov_i32:
1541     case INDEX_op_setcond_i32:
1542     case INDEX_op_brcond_i32:
1543     case INDEX_op_ld8u_i32:
1544     case INDEX_op_ld8s_i32:
1545     case INDEX_op_ld16u_i32:
1546     case INDEX_op_ld16s_i32:
1547     case INDEX_op_ld_i32:
1548     case INDEX_op_st8_i32:
1549     case INDEX_op_st16_i32:
1550     case INDEX_op_st_i32:
1551     case INDEX_op_add_i32:
1552     case INDEX_op_sub_i32:
1553     case INDEX_op_mul_i32:
1554     case INDEX_op_and_i32:
1555     case INDEX_op_or_i32:
1556     case INDEX_op_xor_i32:
1557     case INDEX_op_shl_i32:
1558     case INDEX_op_shr_i32:
1559     case INDEX_op_sar_i32:
1560         return true;
1561 
1562     case INDEX_op_movcond_i32:
1563         return TCG_TARGET_HAS_movcond_i32;
1564     case INDEX_op_div_i32:
1565     case INDEX_op_divu_i32:
1566         return TCG_TARGET_HAS_div_i32;
1567     case INDEX_op_rem_i32:
1568     case INDEX_op_remu_i32:
1569         return TCG_TARGET_HAS_rem_i32;
1570     case INDEX_op_div2_i32:
1571     case INDEX_op_divu2_i32:
1572         return TCG_TARGET_HAS_div2_i32;
1573     case INDEX_op_rotl_i32:
1574     case INDEX_op_rotr_i32:
1575         return TCG_TARGET_HAS_rot_i32;
1576     case INDEX_op_deposit_i32:
1577         return TCG_TARGET_HAS_deposit_i32;
1578     case INDEX_op_extract_i32:
1579         return TCG_TARGET_HAS_extract_i32;
1580     case INDEX_op_sextract_i32:
1581         return TCG_TARGET_HAS_sextract_i32;
1582     case INDEX_op_extract2_i32:
1583         return TCG_TARGET_HAS_extract2_i32;
1584     case INDEX_op_add2_i32:
1585         return TCG_TARGET_HAS_add2_i32;
1586     case INDEX_op_sub2_i32:
1587         return TCG_TARGET_HAS_sub2_i32;
1588     case INDEX_op_mulu2_i32:
1589         return TCG_TARGET_HAS_mulu2_i32;
1590     case INDEX_op_muls2_i32:
1591         return TCG_TARGET_HAS_muls2_i32;
1592     case INDEX_op_muluh_i32:
1593         return TCG_TARGET_HAS_muluh_i32;
1594     case INDEX_op_mulsh_i32:
1595         return TCG_TARGET_HAS_mulsh_i32;
1596     case INDEX_op_ext8s_i32:
1597         return TCG_TARGET_HAS_ext8s_i32;
1598     case INDEX_op_ext16s_i32:
1599         return TCG_TARGET_HAS_ext16s_i32;
1600     case INDEX_op_ext8u_i32:
1601         return TCG_TARGET_HAS_ext8u_i32;
1602     case INDEX_op_ext16u_i32:
1603         return TCG_TARGET_HAS_ext16u_i32;
1604     case INDEX_op_bswap16_i32:
1605         return TCG_TARGET_HAS_bswap16_i32;
1606     case INDEX_op_bswap32_i32:
1607         return TCG_TARGET_HAS_bswap32_i32;
1608     case INDEX_op_not_i32:
1609         return TCG_TARGET_HAS_not_i32;
1610     case INDEX_op_neg_i32:
1611         return TCG_TARGET_HAS_neg_i32;
1612     case INDEX_op_andc_i32:
1613         return TCG_TARGET_HAS_andc_i32;
1614     case INDEX_op_orc_i32:
1615         return TCG_TARGET_HAS_orc_i32;
1616     case INDEX_op_eqv_i32:
1617         return TCG_TARGET_HAS_eqv_i32;
1618     case INDEX_op_nand_i32:
1619         return TCG_TARGET_HAS_nand_i32;
1620     case INDEX_op_nor_i32:
1621         return TCG_TARGET_HAS_nor_i32;
1622     case INDEX_op_clz_i32:
1623         return TCG_TARGET_HAS_clz_i32;
1624     case INDEX_op_ctz_i32:
1625         return TCG_TARGET_HAS_ctz_i32;
1626     case INDEX_op_ctpop_i32:
1627         return TCG_TARGET_HAS_ctpop_i32;
1628 
1629     case INDEX_op_brcond2_i32:
1630     case INDEX_op_setcond2_i32:
1631         return TCG_TARGET_REG_BITS == 32;
1632 
1633     case INDEX_op_mov_i64:
1634     case INDEX_op_setcond_i64:
1635     case INDEX_op_brcond_i64:
1636     case INDEX_op_ld8u_i64:
1637     case INDEX_op_ld8s_i64:
1638     case INDEX_op_ld16u_i64:
1639     case INDEX_op_ld16s_i64:
1640     case INDEX_op_ld32u_i64:
1641     case INDEX_op_ld32s_i64:
1642     case INDEX_op_ld_i64:
1643     case INDEX_op_st8_i64:
1644     case INDEX_op_st16_i64:
1645     case INDEX_op_st32_i64:
1646     case INDEX_op_st_i64:
1647     case INDEX_op_add_i64:
1648     case INDEX_op_sub_i64:
1649     case INDEX_op_mul_i64:
1650     case INDEX_op_and_i64:
1651     case INDEX_op_or_i64:
1652     case INDEX_op_xor_i64:
1653     case INDEX_op_shl_i64:
1654     case INDEX_op_shr_i64:
1655     case INDEX_op_sar_i64:
1656     case INDEX_op_ext_i32_i64:
1657     case INDEX_op_extu_i32_i64:
1658         return TCG_TARGET_REG_BITS == 64;
1659 
1660     case INDEX_op_movcond_i64:
1661         return TCG_TARGET_HAS_movcond_i64;
1662     case INDEX_op_div_i64:
1663     case INDEX_op_divu_i64:
1664         return TCG_TARGET_HAS_div_i64;
1665     case INDEX_op_rem_i64:
1666     case INDEX_op_remu_i64:
1667         return TCG_TARGET_HAS_rem_i64;
1668     case INDEX_op_div2_i64:
1669     case INDEX_op_divu2_i64:
1670         return TCG_TARGET_HAS_div2_i64;
1671     case INDEX_op_rotl_i64:
1672     case INDEX_op_rotr_i64:
1673         return TCG_TARGET_HAS_rot_i64;
1674     case INDEX_op_deposit_i64:
1675         return TCG_TARGET_HAS_deposit_i64;
1676     case INDEX_op_extract_i64:
1677         return TCG_TARGET_HAS_extract_i64;
1678     case INDEX_op_sextract_i64:
1679         return TCG_TARGET_HAS_sextract_i64;
1680     case INDEX_op_extract2_i64:
1681         return TCG_TARGET_HAS_extract2_i64;
1682     case INDEX_op_extrl_i64_i32:
1683         return TCG_TARGET_HAS_extrl_i64_i32;
1684     case INDEX_op_extrh_i64_i32:
1685         return TCG_TARGET_HAS_extrh_i64_i32;
1686     case INDEX_op_ext8s_i64:
1687         return TCG_TARGET_HAS_ext8s_i64;
1688     case INDEX_op_ext16s_i64:
1689         return TCG_TARGET_HAS_ext16s_i64;
1690     case INDEX_op_ext32s_i64:
1691         return TCG_TARGET_HAS_ext32s_i64;
1692     case INDEX_op_ext8u_i64:
1693         return TCG_TARGET_HAS_ext8u_i64;
1694     case INDEX_op_ext16u_i64:
1695         return TCG_TARGET_HAS_ext16u_i64;
1696     case INDEX_op_ext32u_i64:
1697         return TCG_TARGET_HAS_ext32u_i64;
1698     case INDEX_op_bswap16_i64:
1699         return TCG_TARGET_HAS_bswap16_i64;
1700     case INDEX_op_bswap32_i64:
1701         return TCG_TARGET_HAS_bswap32_i64;
1702     case INDEX_op_bswap64_i64:
1703         return TCG_TARGET_HAS_bswap64_i64;
1704     case INDEX_op_not_i64:
1705         return TCG_TARGET_HAS_not_i64;
1706     case INDEX_op_neg_i64:
1707         return TCG_TARGET_HAS_neg_i64;
1708     case INDEX_op_andc_i64:
1709         return TCG_TARGET_HAS_andc_i64;
1710     case INDEX_op_orc_i64:
1711         return TCG_TARGET_HAS_orc_i64;
1712     case INDEX_op_eqv_i64:
1713         return TCG_TARGET_HAS_eqv_i64;
1714     case INDEX_op_nand_i64:
1715         return TCG_TARGET_HAS_nand_i64;
1716     case INDEX_op_nor_i64:
1717         return TCG_TARGET_HAS_nor_i64;
1718     case INDEX_op_clz_i64:
1719         return TCG_TARGET_HAS_clz_i64;
1720     case INDEX_op_ctz_i64:
1721         return TCG_TARGET_HAS_ctz_i64;
1722     case INDEX_op_ctpop_i64:
1723         return TCG_TARGET_HAS_ctpop_i64;
1724     case INDEX_op_add2_i64:
1725         return TCG_TARGET_HAS_add2_i64;
1726     case INDEX_op_sub2_i64:
1727         return TCG_TARGET_HAS_sub2_i64;
1728     case INDEX_op_mulu2_i64:
1729         return TCG_TARGET_HAS_mulu2_i64;
1730     case INDEX_op_muls2_i64:
1731         return TCG_TARGET_HAS_muls2_i64;
1732     case INDEX_op_muluh_i64:
1733         return TCG_TARGET_HAS_muluh_i64;
1734     case INDEX_op_mulsh_i64:
1735         return TCG_TARGET_HAS_mulsh_i64;
1736 
1737     case INDEX_op_mov_vec:
1738     case INDEX_op_dup_vec:
1739     case INDEX_op_dupm_vec:
1740     case INDEX_op_ld_vec:
1741     case INDEX_op_st_vec:
1742     case INDEX_op_add_vec:
1743     case INDEX_op_sub_vec:
1744     case INDEX_op_and_vec:
1745     case INDEX_op_or_vec:
1746     case INDEX_op_xor_vec:
1747     case INDEX_op_cmp_vec:
1748         return have_vec;
1749     case INDEX_op_dup2_vec:
1750         return have_vec && TCG_TARGET_REG_BITS == 32;
1751     case INDEX_op_not_vec:
1752         return have_vec && TCG_TARGET_HAS_not_vec;
1753     case INDEX_op_neg_vec:
1754         return have_vec && TCG_TARGET_HAS_neg_vec;
1755     case INDEX_op_abs_vec:
1756         return have_vec && TCG_TARGET_HAS_abs_vec;
1757     case INDEX_op_andc_vec:
1758         return have_vec && TCG_TARGET_HAS_andc_vec;
1759     case INDEX_op_orc_vec:
1760         return have_vec && TCG_TARGET_HAS_orc_vec;
1761     case INDEX_op_nand_vec:
1762         return have_vec && TCG_TARGET_HAS_nand_vec;
1763     case INDEX_op_nor_vec:
1764         return have_vec && TCG_TARGET_HAS_nor_vec;
1765     case INDEX_op_eqv_vec:
1766         return have_vec && TCG_TARGET_HAS_eqv_vec;
1767     case INDEX_op_mul_vec:
1768         return have_vec && TCG_TARGET_HAS_mul_vec;
1769     case INDEX_op_shli_vec:
1770     case INDEX_op_shri_vec:
1771     case INDEX_op_sari_vec:
1772         return have_vec && TCG_TARGET_HAS_shi_vec;
1773     case INDEX_op_shls_vec:
1774     case INDEX_op_shrs_vec:
1775     case INDEX_op_sars_vec:
1776         return have_vec && TCG_TARGET_HAS_shs_vec;
1777     case INDEX_op_shlv_vec:
1778     case INDEX_op_shrv_vec:
1779     case INDEX_op_sarv_vec:
1780         return have_vec && TCG_TARGET_HAS_shv_vec;
1781     case INDEX_op_rotli_vec:
1782         return have_vec && TCG_TARGET_HAS_roti_vec;
1783     case INDEX_op_rotls_vec:
1784         return have_vec && TCG_TARGET_HAS_rots_vec;
1785     case INDEX_op_rotlv_vec:
1786     case INDEX_op_rotrv_vec:
1787         return have_vec && TCG_TARGET_HAS_rotv_vec;
1788     case INDEX_op_ssadd_vec:
1789     case INDEX_op_usadd_vec:
1790     case INDEX_op_sssub_vec:
1791     case INDEX_op_ussub_vec:
1792         return have_vec && TCG_TARGET_HAS_sat_vec;
1793     case INDEX_op_smin_vec:
1794     case INDEX_op_umin_vec:
1795     case INDEX_op_smax_vec:
1796     case INDEX_op_umax_vec:
1797         return have_vec && TCG_TARGET_HAS_minmax_vec;
1798     case INDEX_op_bitsel_vec:
1799         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1800     case INDEX_op_cmpsel_vec:
1801         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1802 
1803     default:
1804         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1805         return true;
1806     }
1807 }
1808 
1809 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs);
1810 
1811 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1812 {
1813     const TCGHelperInfo *info;
1814     TCGv_i64 extend_free[MAX_CALL_IARGS];
1815     int n_extend = 0;
1816     TCGOp *op;
1817     int i, n, pi = 0, total_args;
1818 
1819     info = g_hash_table_lookup(helper_table, (gpointer)func);
1820     total_args = info->nr_out + info->nr_in + 2;
1821     op = tcg_op_alloc(INDEX_op_call, total_args);
1822 
1823 #ifdef CONFIG_PLUGIN
1824     /* Flag helpers that may affect guest state */
1825     if (tcg_ctx->plugin_insn &&
1826         !(info->flags & TCG_CALL_PLUGIN) &&
1827         !(info->flags & TCG_CALL_NO_SIDE_EFFECTS)) {
1828         tcg_ctx->plugin_insn->calls_helpers = true;
1829     }
1830 #endif
1831 
1832     TCGOP_CALLO(op) = n = info->nr_out;
1833     switch (n) {
1834     case 0:
1835         tcg_debug_assert(ret == NULL);
1836         break;
1837     case 1:
1838         tcg_debug_assert(ret != NULL);
1839         op->args[pi++] = temp_arg(ret);
1840         break;
1841     case 2:
1842     case 4:
1843         tcg_debug_assert(ret != NULL);
1844         tcg_debug_assert(ret->base_type == ret->type + ctz32(n));
1845         tcg_debug_assert(ret->temp_subindex == 0);
1846         for (i = 0; i < n; ++i) {
1847             op->args[pi++] = temp_arg(ret + i);
1848         }
1849         break;
1850     default:
1851         g_assert_not_reached();
1852     }
1853 
1854     TCGOP_CALLI(op) = n = info->nr_in;
1855     for (i = 0; i < n; i++) {
1856         const TCGCallArgumentLoc *loc = &info->in[i];
1857         TCGTemp *ts = args[loc->arg_idx] + loc->tmp_subindex;
1858 
1859         switch (loc->kind) {
1860         case TCG_CALL_ARG_NORMAL:
1861         case TCG_CALL_ARG_BY_REF:
1862         case TCG_CALL_ARG_BY_REF_N:
1863             op->args[pi++] = temp_arg(ts);
1864             break;
1865 
1866         case TCG_CALL_ARG_EXTEND_U:
1867         case TCG_CALL_ARG_EXTEND_S:
1868             {
1869                 TCGv_i64 temp = tcg_temp_new_i64();
1870                 TCGv_i32 orig = temp_tcgv_i32(ts);
1871 
1872                 if (loc->kind == TCG_CALL_ARG_EXTEND_S) {
1873                     tcg_gen_ext_i32_i64(temp, orig);
1874                 } else {
1875                     tcg_gen_extu_i32_i64(temp, orig);
1876                 }
1877                 op->args[pi++] = tcgv_i64_arg(temp);
1878                 extend_free[n_extend++] = temp;
1879             }
1880             break;
1881 
1882         default:
1883             g_assert_not_reached();
1884         }
1885     }
1886     op->args[pi++] = (uintptr_t)func;
1887     op->args[pi++] = (uintptr_t)info;
1888     tcg_debug_assert(pi == total_args);
1889 
1890     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
1891 
1892     tcg_debug_assert(n_extend < ARRAY_SIZE(extend_free));
1893     for (i = 0; i < n_extend; ++i) {
1894         tcg_temp_free_i64(extend_free[i]);
1895     }
1896 }
1897 
1898 static void tcg_reg_alloc_start(TCGContext *s)
1899 {
1900     int i, n;
1901 
1902     for (i = 0, n = s->nb_temps; i < n; i++) {
1903         TCGTemp *ts = &s->temps[i];
1904         TCGTempVal val = TEMP_VAL_MEM;
1905 
1906         switch (ts->kind) {
1907         case TEMP_CONST:
1908             val = TEMP_VAL_CONST;
1909             break;
1910         case TEMP_FIXED:
1911             val = TEMP_VAL_REG;
1912             break;
1913         case TEMP_GLOBAL:
1914             break;
1915         case TEMP_NORMAL:
1916         case TEMP_EBB:
1917             val = TEMP_VAL_DEAD;
1918             /* fall through */
1919         case TEMP_LOCAL:
1920             ts->mem_allocated = 0;
1921             break;
1922         default:
1923             g_assert_not_reached();
1924         }
1925         ts->val_type = val;
1926     }
1927 
1928     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1929 }
1930 
1931 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1932                                  TCGTemp *ts)
1933 {
1934     int idx = temp_idx(ts);
1935 
1936     switch (ts->kind) {
1937     case TEMP_FIXED:
1938     case TEMP_GLOBAL:
1939         pstrcpy(buf, buf_size, ts->name);
1940         break;
1941     case TEMP_LOCAL:
1942         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1943         break;
1944     case TEMP_EBB:
1945         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1946         break;
1947     case TEMP_NORMAL:
1948         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1949         break;
1950     case TEMP_CONST:
1951         switch (ts->type) {
1952         case TCG_TYPE_I32:
1953             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1954             break;
1955 #if TCG_TARGET_REG_BITS > 32
1956         case TCG_TYPE_I64:
1957             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1958             break;
1959 #endif
1960         case TCG_TYPE_V64:
1961         case TCG_TYPE_V128:
1962         case TCG_TYPE_V256:
1963             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1964                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1965             break;
1966         default:
1967             g_assert_not_reached();
1968         }
1969         break;
1970     }
1971     return buf;
1972 }
1973 
1974 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1975                              int buf_size, TCGArg arg)
1976 {
1977     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1978 }
1979 
1980 static const char * const cond_name[] =
1981 {
1982     [TCG_COND_NEVER] = "never",
1983     [TCG_COND_ALWAYS] = "always",
1984     [TCG_COND_EQ] = "eq",
1985     [TCG_COND_NE] = "ne",
1986     [TCG_COND_LT] = "lt",
1987     [TCG_COND_GE] = "ge",
1988     [TCG_COND_LE] = "le",
1989     [TCG_COND_GT] = "gt",
1990     [TCG_COND_LTU] = "ltu",
1991     [TCG_COND_GEU] = "geu",
1992     [TCG_COND_LEU] = "leu",
1993     [TCG_COND_GTU] = "gtu"
1994 };
1995 
1996 static const char * const ldst_name[] =
1997 {
1998     [MO_UB]   = "ub",
1999     [MO_SB]   = "sb",
2000     [MO_LEUW] = "leuw",
2001     [MO_LESW] = "lesw",
2002     [MO_LEUL] = "leul",
2003     [MO_LESL] = "lesl",
2004     [MO_LEUQ] = "leq",
2005     [MO_BEUW] = "beuw",
2006     [MO_BESW] = "besw",
2007     [MO_BEUL] = "beul",
2008     [MO_BESL] = "besl",
2009     [MO_BEUQ] = "beq",
2010 };
2011 
2012 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2013 #ifdef TARGET_ALIGNED_ONLY
2014     [MO_UNALN >> MO_ASHIFT]    = "un+",
2015     [MO_ALIGN >> MO_ASHIFT]    = "",
2016 #else
2017     [MO_UNALN >> MO_ASHIFT]    = "",
2018     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2019 #endif
2020     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2021     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2022     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2023     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2024     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2025     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2026 };
2027 
2028 static const char bswap_flag_name[][6] = {
2029     [TCG_BSWAP_IZ] = "iz",
2030     [TCG_BSWAP_OZ] = "oz",
2031     [TCG_BSWAP_OS] = "os",
2032     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
2033     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
2034 };
2035 
2036 static inline bool tcg_regset_single(TCGRegSet d)
2037 {
2038     return (d & (d - 1)) == 0;
2039 }
2040 
2041 static inline TCGReg tcg_regset_first(TCGRegSet d)
2042 {
2043     if (TCG_TARGET_NB_REGS <= 32) {
2044         return ctz32(d);
2045     } else {
2046         return ctz64(d);
2047     }
2048 }
2049 
2050 /* Return only the number of characters output -- no error return. */
2051 #define ne_fprintf(...) \
2052     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
2053 
2054 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
2055 {
2056     char buf[128];
2057     TCGOp *op;
2058 
2059     QTAILQ_FOREACH(op, &s->ops, link) {
2060         int i, k, nb_oargs, nb_iargs, nb_cargs;
2061         const TCGOpDef *def;
2062         TCGOpcode c;
2063         int col = 0;
2064 
2065         c = op->opc;
2066         def = &tcg_op_defs[c];
2067 
2068         if (c == INDEX_op_insn_start) {
2069             nb_oargs = 0;
2070             col += ne_fprintf(f, "\n ----");
2071 
2072             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2073                 target_ulong a;
2074 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2075                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2076 #else
2077                 a = op->args[i];
2078 #endif
2079                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
2080             }
2081         } else if (c == INDEX_op_call) {
2082             const TCGHelperInfo *info = tcg_call_info(op);
2083             void *func = tcg_call_func(op);
2084 
2085             /* variable number of arguments */
2086             nb_oargs = TCGOP_CALLO(op);
2087             nb_iargs = TCGOP_CALLI(op);
2088             nb_cargs = def->nb_cargs;
2089 
2090             col += ne_fprintf(f, " %s ", def->name);
2091 
2092             /*
2093              * Print the function name from TCGHelperInfo, if available.
2094              * Note that plugins have a template function for the info,
2095              * but the actual function pointer comes from the plugin.
2096              */
2097             if (func == info->func) {
2098                 col += ne_fprintf(f, "%s", info->name);
2099             } else {
2100                 col += ne_fprintf(f, "plugin(%p)", func);
2101             }
2102 
2103             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
2104             for (i = 0; i < nb_oargs; i++) {
2105                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2106                                                             op->args[i]));
2107             }
2108             for (i = 0; i < nb_iargs; i++) {
2109                 TCGArg arg = op->args[nb_oargs + i];
2110                 const char *t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2111                 col += ne_fprintf(f, ",%s", t);
2112             }
2113         } else {
2114             col += ne_fprintf(f, " %s ", def->name);
2115 
2116             nb_oargs = def->nb_oargs;
2117             nb_iargs = def->nb_iargs;
2118             nb_cargs = def->nb_cargs;
2119 
2120             if (def->flags & TCG_OPF_VECTOR) {
2121                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
2122                                   8 << TCGOP_VECE(op));
2123             }
2124 
2125             k = 0;
2126             for (i = 0; i < nb_oargs; i++) {
2127                 const char *sep =  k ? "," : "";
2128                 col += ne_fprintf(f, "%s%s", sep,
2129                                   tcg_get_arg_str(s, buf, sizeof(buf),
2130                                                   op->args[k++]));
2131             }
2132             for (i = 0; i < nb_iargs; i++) {
2133                 const char *sep =  k ? "," : "";
2134                 col += ne_fprintf(f, "%s%s", sep,
2135                                   tcg_get_arg_str(s, buf, sizeof(buf),
2136                                                   op->args[k++]));
2137             }
2138             switch (c) {
2139             case INDEX_op_brcond_i32:
2140             case INDEX_op_setcond_i32:
2141             case INDEX_op_movcond_i32:
2142             case INDEX_op_brcond2_i32:
2143             case INDEX_op_setcond2_i32:
2144             case INDEX_op_brcond_i64:
2145             case INDEX_op_setcond_i64:
2146             case INDEX_op_movcond_i64:
2147             case INDEX_op_cmp_vec:
2148             case INDEX_op_cmpsel_vec:
2149                 if (op->args[k] < ARRAY_SIZE(cond_name)
2150                     && cond_name[op->args[k]]) {
2151                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
2152                 } else {
2153                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
2154                 }
2155                 i = 1;
2156                 break;
2157             case INDEX_op_qemu_ld_i32:
2158             case INDEX_op_qemu_st_i32:
2159             case INDEX_op_qemu_st8_i32:
2160             case INDEX_op_qemu_ld_i64:
2161             case INDEX_op_qemu_st_i64:
2162                 {
2163                     MemOpIdx oi = op->args[k++];
2164                     MemOp op = get_memop(oi);
2165                     unsigned ix = get_mmuidx(oi);
2166 
2167                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2168                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
2169                     } else {
2170                         const char *s_al, *s_op;
2171                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2172                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2173                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
2174                     }
2175                     i = 1;
2176                 }
2177                 break;
2178             case INDEX_op_bswap16_i32:
2179             case INDEX_op_bswap16_i64:
2180             case INDEX_op_bswap32_i32:
2181             case INDEX_op_bswap32_i64:
2182             case INDEX_op_bswap64_i64:
2183                 {
2184                     TCGArg flags = op->args[k];
2185                     const char *name = NULL;
2186 
2187                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
2188                         name = bswap_flag_name[flags];
2189                     }
2190                     if (name) {
2191                         col += ne_fprintf(f, ",%s", name);
2192                     } else {
2193                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
2194                     }
2195                     i = k = 1;
2196                 }
2197                 break;
2198             default:
2199                 i = 0;
2200                 break;
2201             }
2202             switch (c) {
2203             case INDEX_op_set_label:
2204             case INDEX_op_br:
2205             case INDEX_op_brcond_i32:
2206             case INDEX_op_brcond_i64:
2207             case INDEX_op_brcond2_i32:
2208                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
2209                                   arg_label(op->args[k])->id);
2210                 i++, k++;
2211                 break;
2212             default:
2213                 break;
2214             }
2215             for (; i < nb_cargs; i++, k++) {
2216                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
2217                                   op->args[k]);
2218             }
2219         }
2220 
2221         if (have_prefs || op->life) {
2222             for (; col < 40; ++col) {
2223                 putc(' ', f);
2224             }
2225         }
2226 
2227         if (op->life) {
2228             unsigned life = op->life;
2229 
2230             if (life & (SYNC_ARG * 3)) {
2231                 ne_fprintf(f, "  sync:");
2232                 for (i = 0; i < 2; ++i) {
2233                     if (life & (SYNC_ARG << i)) {
2234                         ne_fprintf(f, " %d", i);
2235                     }
2236                 }
2237             }
2238             life /= DEAD_ARG;
2239             if (life) {
2240                 ne_fprintf(f, "  dead:");
2241                 for (i = 0; life; ++i, life >>= 1) {
2242                     if (life & 1) {
2243                         ne_fprintf(f, " %d", i);
2244                     }
2245                 }
2246             }
2247         }
2248 
2249         if (have_prefs) {
2250             for (i = 0; i < nb_oargs; ++i) {
2251                 TCGRegSet set = output_pref(op, i);
2252 
2253                 if (i == 0) {
2254                     ne_fprintf(f, "  pref=");
2255                 } else {
2256                     ne_fprintf(f, ",");
2257                 }
2258                 if (set == 0) {
2259                     ne_fprintf(f, "none");
2260                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2261                     ne_fprintf(f, "all");
2262 #ifdef CONFIG_DEBUG_TCG
2263                 } else if (tcg_regset_single(set)) {
2264                     TCGReg reg = tcg_regset_first(set);
2265                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2266 #endif
2267                 } else if (TCG_TARGET_NB_REGS <= 32) {
2268                     ne_fprintf(f, "0x%x", (uint32_t)set);
2269                 } else {
2270                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2271                 }
2272             }
2273         }
2274 
2275         putc('\n', f);
2276     }
2277 }
2278 
2279 /* we give more priority to constraints with less registers */
2280 static int get_constraint_priority(const TCGOpDef *def, int k)
2281 {
2282     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2283     int n = ctpop64(arg_ct->regs);
2284 
2285     /*
2286      * Sort constraints of a single register first, which includes output
2287      * aliases (which must exactly match the input already allocated).
2288      */
2289     if (n == 1 || arg_ct->oalias) {
2290         return INT_MAX;
2291     }
2292 
2293     /*
2294      * Sort register pairs next, first then second immediately after.
2295      * Arbitrarily sort multiple pairs by the index of the first reg;
2296      * there shouldn't be many pairs.
2297      */
2298     switch (arg_ct->pair) {
2299     case 1:
2300     case 3:
2301         return (k + 1) * 2;
2302     case 2:
2303         return (arg_ct->pair_index + 1) * 2 - 1;
2304     }
2305 
2306     /* Finally, sort by decreasing register count. */
2307     assert(n > 1);
2308     return -n;
2309 }
2310 
2311 /* sort from highest priority to lowest */
2312 static void sort_constraints(TCGOpDef *def, int start, int n)
2313 {
2314     int i, j;
2315     TCGArgConstraint *a = def->args_ct;
2316 
2317     for (i = 0; i < n; i++) {
2318         a[start + i].sort_index = start + i;
2319     }
2320     if (n <= 1) {
2321         return;
2322     }
2323     for (i = 0; i < n - 1; i++) {
2324         for (j = i + 1; j < n; j++) {
2325             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2326             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2327             if (p1 < p2) {
2328                 int tmp = a[start + i].sort_index;
2329                 a[start + i].sort_index = a[start + j].sort_index;
2330                 a[start + j].sort_index = tmp;
2331             }
2332         }
2333     }
2334 }
2335 
2336 static void process_op_defs(TCGContext *s)
2337 {
2338     TCGOpcode op;
2339 
2340     for (op = 0; op < NB_OPS; op++) {
2341         TCGOpDef *def = &tcg_op_defs[op];
2342         const TCGTargetOpDef *tdefs;
2343         bool saw_alias_pair = false;
2344         int i, o, i2, o2, nb_args;
2345 
2346         if (def->flags & TCG_OPF_NOT_PRESENT) {
2347             continue;
2348         }
2349 
2350         nb_args = def->nb_iargs + def->nb_oargs;
2351         if (nb_args == 0) {
2352             continue;
2353         }
2354 
2355         /*
2356          * Macro magic should make it impossible, but double-check that
2357          * the array index is in range.  Since the signness of an enum
2358          * is implementation defined, force the result to unsigned.
2359          */
2360         unsigned con_set = tcg_target_op_def(op);
2361         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2362         tdefs = &constraint_sets[con_set];
2363 
2364         for (i = 0; i < nb_args; i++) {
2365             const char *ct_str = tdefs->args_ct_str[i];
2366             bool input_p = i >= def->nb_oargs;
2367 
2368             /* Incomplete TCGTargetOpDef entry. */
2369             tcg_debug_assert(ct_str != NULL);
2370 
2371             switch (*ct_str) {
2372             case '0' ... '9':
2373                 o = *ct_str - '0';
2374                 tcg_debug_assert(input_p);
2375                 tcg_debug_assert(o < def->nb_oargs);
2376                 tcg_debug_assert(def->args_ct[o].regs != 0);
2377                 tcg_debug_assert(!def->args_ct[o].oalias);
2378                 def->args_ct[i] = def->args_ct[o];
2379                 /* The output sets oalias.  */
2380                 def->args_ct[o].oalias = 1;
2381                 def->args_ct[o].alias_index = i;
2382                 /* The input sets ialias. */
2383                 def->args_ct[i].ialias = 1;
2384                 def->args_ct[i].alias_index = o;
2385                 if (def->args_ct[i].pair) {
2386                     saw_alias_pair = true;
2387                 }
2388                 tcg_debug_assert(ct_str[1] == '\0');
2389                 continue;
2390 
2391             case '&':
2392                 tcg_debug_assert(!input_p);
2393                 def->args_ct[i].newreg = true;
2394                 ct_str++;
2395                 break;
2396 
2397             case 'p': /* plus */
2398                 /* Allocate to the register after the previous. */
2399                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2400                 o = i - 1;
2401                 tcg_debug_assert(!def->args_ct[o].pair);
2402                 tcg_debug_assert(!def->args_ct[o].ct);
2403                 def->args_ct[i] = (TCGArgConstraint){
2404                     .pair = 2,
2405                     .pair_index = o,
2406                     .regs = def->args_ct[o].regs << 1,
2407                 };
2408                 def->args_ct[o].pair = 1;
2409                 def->args_ct[o].pair_index = i;
2410                 tcg_debug_assert(ct_str[1] == '\0');
2411                 continue;
2412 
2413             case 'm': /* minus */
2414                 /* Allocate to the register before the previous. */
2415                 tcg_debug_assert(i > (input_p ? def->nb_oargs : 0));
2416                 o = i - 1;
2417                 tcg_debug_assert(!def->args_ct[o].pair);
2418                 tcg_debug_assert(!def->args_ct[o].ct);
2419                 def->args_ct[i] = (TCGArgConstraint){
2420                     .pair = 1,
2421                     .pair_index = o,
2422                     .regs = def->args_ct[o].regs >> 1,
2423                 };
2424                 def->args_ct[o].pair = 2;
2425                 def->args_ct[o].pair_index = i;
2426                 tcg_debug_assert(ct_str[1] == '\0');
2427                 continue;
2428             }
2429 
2430             do {
2431                 switch (*ct_str) {
2432                 case 'i':
2433                     def->args_ct[i].ct |= TCG_CT_CONST;
2434                     break;
2435 
2436                 /* Include all of the target-specific constraints. */
2437 
2438 #undef CONST
2439 #define CONST(CASE, MASK) \
2440     case CASE: def->args_ct[i].ct |= MASK; break;
2441 #define REGS(CASE, MASK) \
2442     case CASE: def->args_ct[i].regs |= MASK; break;
2443 
2444 #include "tcg-target-con-str.h"
2445 
2446 #undef REGS
2447 #undef CONST
2448                 default:
2449                 case '0' ... '9':
2450                 case '&':
2451                 case 'p':
2452                 case 'm':
2453                     /* Typo in TCGTargetOpDef constraint. */
2454                     g_assert_not_reached();
2455                 }
2456             } while (*++ct_str != '\0');
2457         }
2458 
2459         /* TCGTargetOpDef entry with too much information? */
2460         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2461 
2462         /*
2463          * Fix up output pairs that are aliased with inputs.
2464          * When we created the alias, we copied pair from the output.
2465          * There are three cases:
2466          *    (1a) Pairs of inputs alias pairs of outputs.
2467          *    (1b) One input aliases the first of a pair of outputs.
2468          *    (2)  One input aliases the second of a pair of outputs.
2469          *
2470          * Case 1a is handled by making sure that the pair_index'es are
2471          * properly updated so that they appear the same as a pair of inputs.
2472          *
2473          * Case 1b is handled by setting the pair_index of the input to
2474          * itself, simply so it doesn't point to an unrelated argument.
2475          * Since we don't encounter the "second" during the input allocation
2476          * phase, nothing happens with the second half of the input pair.
2477          *
2478          * Case 2 is handled by setting the second input to pair=3, the
2479          * first output to pair=3, and the pair_index'es to match.
2480          */
2481         if (saw_alias_pair) {
2482             for (i = def->nb_oargs; i < nb_args; i++) {
2483                 /*
2484                  * Since [0-9pm] must be alone in the constraint string,
2485                  * the only way they can both be set is if the pair comes
2486                  * from the output alias.
2487                  */
2488                 if (!def->args_ct[i].ialias) {
2489                     continue;
2490                 }
2491                 switch (def->args_ct[i].pair) {
2492                 case 0:
2493                     break;
2494                 case 1:
2495                     o = def->args_ct[i].alias_index;
2496                     o2 = def->args_ct[o].pair_index;
2497                     tcg_debug_assert(def->args_ct[o].pair == 1);
2498                     tcg_debug_assert(def->args_ct[o2].pair == 2);
2499                     if (def->args_ct[o2].oalias) {
2500                         /* Case 1a */
2501                         i2 = def->args_ct[o2].alias_index;
2502                         tcg_debug_assert(def->args_ct[i2].pair == 2);
2503                         def->args_ct[i2].pair_index = i;
2504                         def->args_ct[i].pair_index = i2;
2505                     } else {
2506                         /* Case 1b */
2507                         def->args_ct[i].pair_index = i;
2508                     }
2509                     break;
2510                 case 2:
2511                     o = def->args_ct[i].alias_index;
2512                     o2 = def->args_ct[o].pair_index;
2513                     tcg_debug_assert(def->args_ct[o].pair == 2);
2514                     tcg_debug_assert(def->args_ct[o2].pair == 1);
2515                     if (def->args_ct[o2].oalias) {
2516                         /* Case 1a */
2517                         i2 = def->args_ct[o2].alias_index;
2518                         tcg_debug_assert(def->args_ct[i2].pair == 1);
2519                         def->args_ct[i2].pair_index = i;
2520                         def->args_ct[i].pair_index = i2;
2521                     } else {
2522                         /* Case 2 */
2523                         def->args_ct[i].pair = 3;
2524                         def->args_ct[o2].pair = 3;
2525                         def->args_ct[i].pair_index = o2;
2526                         def->args_ct[o2].pair_index = i;
2527                     }
2528                     break;
2529                 default:
2530                     g_assert_not_reached();
2531                 }
2532             }
2533         }
2534 
2535         /* sort the constraints (XXX: this is just an heuristic) */
2536         sort_constraints(def, 0, def->nb_oargs);
2537         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2538     }
2539 }
2540 
2541 void tcg_op_remove(TCGContext *s, TCGOp *op)
2542 {
2543     TCGLabel *label;
2544 
2545     switch (op->opc) {
2546     case INDEX_op_br:
2547         label = arg_label(op->args[0]);
2548         label->refs--;
2549         break;
2550     case INDEX_op_brcond_i32:
2551     case INDEX_op_brcond_i64:
2552         label = arg_label(op->args[3]);
2553         label->refs--;
2554         break;
2555     case INDEX_op_brcond2_i32:
2556         label = arg_label(op->args[5]);
2557         label->refs--;
2558         break;
2559     default:
2560         break;
2561     }
2562 
2563     QTAILQ_REMOVE(&s->ops, op, link);
2564     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2565     s->nb_ops--;
2566 
2567 #ifdef CONFIG_PROFILER
2568     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2569 #endif
2570 }
2571 
2572 void tcg_remove_ops_after(TCGOp *op)
2573 {
2574     TCGContext *s = tcg_ctx;
2575 
2576     while (true) {
2577         TCGOp *last = tcg_last_op();
2578         if (last == op) {
2579             return;
2580         }
2581         tcg_op_remove(s, last);
2582     }
2583 }
2584 
2585 static TCGOp *tcg_op_alloc(TCGOpcode opc, unsigned nargs)
2586 {
2587     TCGContext *s = tcg_ctx;
2588     TCGOp *op = NULL;
2589 
2590     if (unlikely(!QTAILQ_EMPTY(&s->free_ops))) {
2591         QTAILQ_FOREACH(op, &s->free_ops, link) {
2592             if (nargs <= op->nargs) {
2593                 QTAILQ_REMOVE(&s->free_ops, op, link);
2594                 nargs = op->nargs;
2595                 goto found;
2596             }
2597         }
2598     }
2599 
2600     /* Most opcodes have 3 or 4 operands: reduce fragmentation. */
2601     nargs = MAX(4, nargs);
2602     op = tcg_malloc(sizeof(TCGOp) + sizeof(TCGArg) * nargs);
2603 
2604  found:
2605     memset(op, 0, offsetof(TCGOp, link));
2606     op->opc = opc;
2607     op->nargs = nargs;
2608 
2609     /* Check for bitfield overflow. */
2610     tcg_debug_assert(op->nargs == nargs);
2611 
2612     s->nb_ops++;
2613     return op;
2614 }
2615 
2616 TCGOp *tcg_emit_op(TCGOpcode opc, unsigned nargs)
2617 {
2618     TCGOp *op = tcg_op_alloc(opc, nargs);
2619     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2620     return op;
2621 }
2622 
2623 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op,
2624                             TCGOpcode opc, unsigned nargs)
2625 {
2626     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2627     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2628     return new_op;
2629 }
2630 
2631 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op,
2632                            TCGOpcode opc, unsigned nargs)
2633 {
2634     TCGOp *new_op = tcg_op_alloc(opc, nargs);
2635     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2636     return new_op;
2637 }
2638 
2639 /* Reachable analysis : remove unreachable code.  */
2640 static void reachable_code_pass(TCGContext *s)
2641 {
2642     TCGOp *op, *op_next;
2643     bool dead = false;
2644 
2645     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2646         bool remove = dead;
2647         TCGLabel *label;
2648 
2649         switch (op->opc) {
2650         case INDEX_op_set_label:
2651             label = arg_label(op->args[0]);
2652             if (label->refs == 0) {
2653                 /*
2654                  * While there is an occasional backward branch, virtually
2655                  * all branches generated by the translators are forward.
2656                  * Which means that generally we will have already removed
2657                  * all references to the label that will be, and there is
2658                  * little to be gained by iterating.
2659                  */
2660                 remove = true;
2661             } else {
2662                 /* Once we see a label, insns become live again.  */
2663                 dead = false;
2664                 remove = false;
2665 
2666                 /*
2667                  * Optimization can fold conditional branches to unconditional.
2668                  * If we find a label with one reference which is preceded by
2669                  * an unconditional branch to it, remove both.  This needed to
2670                  * wait until the dead code in between them was removed.
2671                  */
2672                 if (label->refs == 1) {
2673                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2674                     if (op_prev->opc == INDEX_op_br &&
2675                         label == arg_label(op_prev->args[0])) {
2676                         tcg_op_remove(s, op_prev);
2677                         remove = true;
2678                     }
2679                 }
2680             }
2681             break;
2682 
2683         case INDEX_op_br:
2684         case INDEX_op_exit_tb:
2685         case INDEX_op_goto_ptr:
2686             /* Unconditional branches; everything following is dead.  */
2687             dead = true;
2688             break;
2689 
2690         case INDEX_op_call:
2691             /* Notice noreturn helper calls, raising exceptions.  */
2692             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2693                 dead = true;
2694             }
2695             break;
2696 
2697         case INDEX_op_insn_start:
2698             /* Never remove -- we need to keep these for unwind.  */
2699             remove = false;
2700             break;
2701 
2702         default:
2703             break;
2704         }
2705 
2706         if (remove) {
2707             tcg_op_remove(s, op);
2708         }
2709     }
2710 }
2711 
2712 #define TS_DEAD  1
2713 #define TS_MEM   2
2714 
2715 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2716 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2717 
2718 /* For liveness_pass_1, the register preferences for a given temp.  */
2719 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2720 {
2721     return ts->state_ptr;
2722 }
2723 
2724 /* For liveness_pass_1, reset the preferences for a given temp to the
2725  * maximal regset for its type.
2726  */
2727 static inline void la_reset_pref(TCGTemp *ts)
2728 {
2729     *la_temp_pref(ts)
2730         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2731 }
2732 
2733 /* liveness analysis: end of function: all temps are dead, and globals
2734    should be in memory. */
2735 static void la_func_end(TCGContext *s, int ng, int nt)
2736 {
2737     int i;
2738 
2739     for (i = 0; i < ng; ++i) {
2740         s->temps[i].state = TS_DEAD | TS_MEM;
2741         la_reset_pref(&s->temps[i]);
2742     }
2743     for (i = ng; i < nt; ++i) {
2744         s->temps[i].state = TS_DEAD;
2745         la_reset_pref(&s->temps[i]);
2746     }
2747 }
2748 
2749 /* liveness analysis: end of basic block: all temps are dead, globals
2750    and local temps should be in memory. */
2751 static void la_bb_end(TCGContext *s, int ng, int nt)
2752 {
2753     int i;
2754 
2755     for (i = 0; i < nt; ++i) {
2756         TCGTemp *ts = &s->temps[i];
2757         int state;
2758 
2759         switch (ts->kind) {
2760         case TEMP_FIXED:
2761         case TEMP_GLOBAL:
2762         case TEMP_LOCAL:
2763             state = TS_DEAD | TS_MEM;
2764             break;
2765         case TEMP_NORMAL:
2766         case TEMP_EBB:
2767         case TEMP_CONST:
2768             state = TS_DEAD;
2769             break;
2770         default:
2771             g_assert_not_reached();
2772         }
2773         ts->state = state;
2774         la_reset_pref(ts);
2775     }
2776 }
2777 
2778 /* liveness analysis: sync globals back to memory.  */
2779 static void la_global_sync(TCGContext *s, int ng)
2780 {
2781     int i;
2782 
2783     for (i = 0; i < ng; ++i) {
2784         int state = s->temps[i].state;
2785         s->temps[i].state = state | TS_MEM;
2786         if (state == TS_DEAD) {
2787             /* If the global was previously dead, reset prefs.  */
2788             la_reset_pref(&s->temps[i]);
2789         }
2790     }
2791 }
2792 
2793 /*
2794  * liveness analysis: conditional branch: all temps are dead unless
2795  * explicitly live-across-conditional-branch, globals and local temps
2796  * should be synced.
2797  */
2798 static void la_bb_sync(TCGContext *s, int ng, int nt)
2799 {
2800     la_global_sync(s, ng);
2801 
2802     for (int i = ng; i < nt; ++i) {
2803         TCGTemp *ts = &s->temps[i];
2804         int state;
2805 
2806         switch (ts->kind) {
2807         case TEMP_LOCAL:
2808             state = ts->state;
2809             ts->state = state | TS_MEM;
2810             if (state != TS_DEAD) {
2811                 continue;
2812             }
2813             break;
2814         case TEMP_NORMAL:
2815             s->temps[i].state = TS_DEAD;
2816             break;
2817         case TEMP_EBB:
2818         case TEMP_CONST:
2819             continue;
2820         default:
2821             g_assert_not_reached();
2822         }
2823         la_reset_pref(&s->temps[i]);
2824     }
2825 }
2826 
2827 /* liveness analysis: sync globals back to memory and kill.  */
2828 static void la_global_kill(TCGContext *s, int ng)
2829 {
2830     int i;
2831 
2832     for (i = 0; i < ng; i++) {
2833         s->temps[i].state = TS_DEAD | TS_MEM;
2834         la_reset_pref(&s->temps[i]);
2835     }
2836 }
2837 
2838 /* liveness analysis: note live globals crossing calls.  */
2839 static void la_cross_call(TCGContext *s, int nt)
2840 {
2841     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2842     int i;
2843 
2844     for (i = 0; i < nt; i++) {
2845         TCGTemp *ts = &s->temps[i];
2846         if (!(ts->state & TS_DEAD)) {
2847             TCGRegSet *pset = la_temp_pref(ts);
2848             TCGRegSet set = *pset;
2849 
2850             set &= mask;
2851             /* If the combination is not possible, restart.  */
2852             if (set == 0) {
2853                 set = tcg_target_available_regs[ts->type] & mask;
2854             }
2855             *pset = set;
2856         }
2857     }
2858 }
2859 
2860 /* Liveness analysis : update the opc_arg_life array to tell if a
2861    given input arguments is dead. Instructions updating dead
2862    temporaries are removed. */
2863 static void liveness_pass_1(TCGContext *s)
2864 {
2865     int nb_globals = s->nb_globals;
2866     int nb_temps = s->nb_temps;
2867     TCGOp *op, *op_prev;
2868     TCGRegSet *prefs;
2869     int i;
2870 
2871     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2872     for (i = 0; i < nb_temps; ++i) {
2873         s->temps[i].state_ptr = prefs + i;
2874     }
2875 
2876     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2877     la_func_end(s, nb_globals, nb_temps);
2878 
2879     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2880         int nb_iargs, nb_oargs;
2881         TCGOpcode opc_new, opc_new2;
2882         bool have_opc_new2;
2883         TCGLifeData arg_life = 0;
2884         TCGTemp *ts;
2885         TCGOpcode opc = op->opc;
2886         const TCGOpDef *def = &tcg_op_defs[opc];
2887 
2888         switch (opc) {
2889         case INDEX_op_call:
2890             {
2891                 const TCGHelperInfo *info = tcg_call_info(op);
2892                 int call_flags = tcg_call_flags(op);
2893 
2894                 nb_oargs = TCGOP_CALLO(op);
2895                 nb_iargs = TCGOP_CALLI(op);
2896 
2897                 /* pure functions can be removed if their result is unused */
2898                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2899                     for (i = 0; i < nb_oargs; i++) {
2900                         ts = arg_temp(op->args[i]);
2901                         if (ts->state != TS_DEAD) {
2902                             goto do_not_remove_call;
2903                         }
2904                     }
2905                     goto do_remove;
2906                 }
2907             do_not_remove_call:
2908 
2909                 /* Output args are dead.  */
2910                 for (i = 0; i < nb_oargs; i++) {
2911                     ts = arg_temp(op->args[i]);
2912                     if (ts->state & TS_DEAD) {
2913                         arg_life |= DEAD_ARG << i;
2914                     }
2915                     if (ts->state & TS_MEM) {
2916                         arg_life |= SYNC_ARG << i;
2917                     }
2918                     ts->state = TS_DEAD;
2919                     la_reset_pref(ts);
2920                 }
2921 
2922                 /* Not used -- it will be tcg_target_call_oarg_reg().  */
2923                 memset(op->output_pref, 0, sizeof(op->output_pref));
2924 
2925                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2926                                     TCG_CALL_NO_READ_GLOBALS))) {
2927                     la_global_kill(s, nb_globals);
2928                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2929                     la_global_sync(s, nb_globals);
2930                 }
2931 
2932                 /* Record arguments that die in this helper.  */
2933                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2934                     ts = arg_temp(op->args[i]);
2935                     if (ts->state & TS_DEAD) {
2936                         arg_life |= DEAD_ARG << i;
2937                     }
2938                 }
2939 
2940                 /* For all live registers, remove call-clobbered prefs.  */
2941                 la_cross_call(s, nb_temps);
2942 
2943                 /*
2944                  * Input arguments are live for preceding opcodes.
2945                  *
2946                  * For those arguments that die, and will be allocated in
2947                  * registers, clear the register set for that arg, to be
2948                  * filled in below.  For args that will be on the stack,
2949                  * reset to any available reg.  Process arguments in reverse
2950                  * order so that if a temp is used more than once, the stack
2951                  * reset to max happens before the register reset to 0.
2952                  */
2953                 for (i = nb_iargs - 1; i >= 0; i--) {
2954                     const TCGCallArgumentLoc *loc = &info->in[i];
2955                     ts = arg_temp(op->args[nb_oargs + i]);
2956 
2957                     if (ts->state & TS_DEAD) {
2958                         switch (loc->kind) {
2959                         case TCG_CALL_ARG_NORMAL:
2960                         case TCG_CALL_ARG_EXTEND_U:
2961                         case TCG_CALL_ARG_EXTEND_S:
2962                             if (REG_P(loc)) {
2963                                 *la_temp_pref(ts) = 0;
2964                                 break;
2965                             }
2966                             /* fall through */
2967                         default:
2968                             *la_temp_pref(ts) =
2969                                 tcg_target_available_regs[ts->type];
2970                             break;
2971                         }
2972                         ts->state &= ~TS_DEAD;
2973                     }
2974                 }
2975 
2976                 /*
2977                  * For each input argument, add its input register to prefs.
2978                  * If a temp is used once, this produces a single set bit;
2979                  * if a temp is used multiple times, this produces a set.
2980                  */
2981                 for (i = 0; i < nb_iargs; i++) {
2982                     const TCGCallArgumentLoc *loc = &info->in[i];
2983                     ts = arg_temp(op->args[nb_oargs + i]);
2984 
2985                     switch (loc->kind) {
2986                     case TCG_CALL_ARG_NORMAL:
2987                     case TCG_CALL_ARG_EXTEND_U:
2988                     case TCG_CALL_ARG_EXTEND_S:
2989                         if (REG_P(loc)) {
2990                             tcg_regset_set_reg(*la_temp_pref(ts),
2991                                 tcg_target_call_iarg_regs[loc->arg_slot]);
2992                         }
2993                         break;
2994                     default:
2995                         break;
2996                     }
2997                 }
2998             }
2999             break;
3000         case INDEX_op_insn_start:
3001             break;
3002         case INDEX_op_discard:
3003             /* mark the temporary as dead */
3004             ts = arg_temp(op->args[0]);
3005             ts->state = TS_DEAD;
3006             la_reset_pref(ts);
3007             break;
3008 
3009         case INDEX_op_add2_i32:
3010             opc_new = INDEX_op_add_i32;
3011             goto do_addsub2;
3012         case INDEX_op_sub2_i32:
3013             opc_new = INDEX_op_sub_i32;
3014             goto do_addsub2;
3015         case INDEX_op_add2_i64:
3016             opc_new = INDEX_op_add_i64;
3017             goto do_addsub2;
3018         case INDEX_op_sub2_i64:
3019             opc_new = INDEX_op_sub_i64;
3020         do_addsub2:
3021             nb_iargs = 4;
3022             nb_oargs = 2;
3023             /* Test if the high part of the operation is dead, but not
3024                the low part.  The result can be optimized to a simple
3025                add or sub.  This happens often for x86_64 guest when the
3026                cpu mode is set to 32 bit.  */
3027             if (arg_temp(op->args[1])->state == TS_DEAD) {
3028                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3029                     goto do_remove;
3030                 }
3031                 /* Replace the opcode and adjust the args in place,
3032                    leaving 3 unused args at the end.  */
3033                 op->opc = opc = opc_new;
3034                 op->args[1] = op->args[2];
3035                 op->args[2] = op->args[4];
3036                 /* Fall through and mark the single-word operation live.  */
3037                 nb_iargs = 2;
3038                 nb_oargs = 1;
3039             }
3040             goto do_not_remove;
3041 
3042         case INDEX_op_mulu2_i32:
3043             opc_new = INDEX_op_mul_i32;
3044             opc_new2 = INDEX_op_muluh_i32;
3045             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3046             goto do_mul2;
3047         case INDEX_op_muls2_i32:
3048             opc_new = INDEX_op_mul_i32;
3049             opc_new2 = INDEX_op_mulsh_i32;
3050             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3051             goto do_mul2;
3052         case INDEX_op_mulu2_i64:
3053             opc_new = INDEX_op_mul_i64;
3054             opc_new2 = INDEX_op_muluh_i64;
3055             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3056             goto do_mul2;
3057         case INDEX_op_muls2_i64:
3058             opc_new = INDEX_op_mul_i64;
3059             opc_new2 = INDEX_op_mulsh_i64;
3060             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3061             goto do_mul2;
3062         do_mul2:
3063             nb_iargs = 2;
3064             nb_oargs = 2;
3065             if (arg_temp(op->args[1])->state == TS_DEAD) {
3066                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3067                     /* Both parts of the operation are dead.  */
3068                     goto do_remove;
3069                 }
3070                 /* The high part of the operation is dead; generate the low. */
3071                 op->opc = opc = opc_new;
3072                 op->args[1] = op->args[2];
3073                 op->args[2] = op->args[3];
3074             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3075                 /* The low part of the operation is dead; generate the high. */
3076                 op->opc = opc = opc_new2;
3077                 op->args[0] = op->args[1];
3078                 op->args[1] = op->args[2];
3079                 op->args[2] = op->args[3];
3080             } else {
3081                 goto do_not_remove;
3082             }
3083             /* Mark the single-word operation live.  */
3084             nb_oargs = 1;
3085             goto do_not_remove;
3086 
3087         default:
3088             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3089             nb_iargs = def->nb_iargs;
3090             nb_oargs = def->nb_oargs;
3091 
3092             /* Test if the operation can be removed because all
3093                its outputs are dead. We assume that nb_oargs == 0
3094                implies side effects */
3095             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3096                 for (i = 0; i < nb_oargs; i++) {
3097                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3098                         goto do_not_remove;
3099                     }
3100                 }
3101                 goto do_remove;
3102             }
3103             goto do_not_remove;
3104 
3105         do_remove:
3106             tcg_op_remove(s, op);
3107             break;
3108 
3109         do_not_remove:
3110             for (i = 0; i < nb_oargs; i++) {
3111                 ts = arg_temp(op->args[i]);
3112 
3113                 /* Remember the preference of the uses that followed.  */
3114                 if (i < ARRAY_SIZE(op->output_pref)) {
3115                     op->output_pref[i] = *la_temp_pref(ts);
3116                 }
3117 
3118                 /* Output args are dead.  */
3119                 if (ts->state & TS_DEAD) {
3120                     arg_life |= DEAD_ARG << i;
3121                 }
3122                 if (ts->state & TS_MEM) {
3123                     arg_life |= SYNC_ARG << i;
3124                 }
3125                 ts->state = TS_DEAD;
3126                 la_reset_pref(ts);
3127             }
3128 
3129             /* If end of basic block, update.  */
3130             if (def->flags & TCG_OPF_BB_EXIT) {
3131                 la_func_end(s, nb_globals, nb_temps);
3132             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3133                 la_bb_sync(s, nb_globals, nb_temps);
3134             } else if (def->flags & TCG_OPF_BB_END) {
3135                 la_bb_end(s, nb_globals, nb_temps);
3136             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3137                 la_global_sync(s, nb_globals);
3138                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3139                     la_cross_call(s, nb_temps);
3140                 }
3141             }
3142 
3143             /* Record arguments that die in this opcode.  */
3144             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3145                 ts = arg_temp(op->args[i]);
3146                 if (ts->state & TS_DEAD) {
3147                     arg_life |= DEAD_ARG << i;
3148                 }
3149             }
3150 
3151             /* Input arguments are live for preceding opcodes.  */
3152             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3153                 ts = arg_temp(op->args[i]);
3154                 if (ts->state & TS_DEAD) {
3155                     /* For operands that were dead, initially allow
3156                        all regs for the type.  */
3157                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3158                     ts->state &= ~TS_DEAD;
3159                 }
3160             }
3161 
3162             /* Incorporate constraints for this operand.  */
3163             switch (opc) {
3164             case INDEX_op_mov_i32:
3165             case INDEX_op_mov_i64:
3166                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3167                    have proper constraints.  That said, special case
3168                    moves to propagate preferences backward.  */
3169                 if (IS_DEAD_ARG(1)) {
3170                     *la_temp_pref(arg_temp(op->args[0]))
3171                         = *la_temp_pref(arg_temp(op->args[1]));
3172                 }
3173                 break;
3174 
3175             default:
3176                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3177                     const TCGArgConstraint *ct = &def->args_ct[i];
3178                     TCGRegSet set, *pset;
3179 
3180                     ts = arg_temp(op->args[i]);
3181                     pset = la_temp_pref(ts);
3182                     set = *pset;
3183 
3184                     set &= ct->regs;
3185                     if (ct->ialias) {
3186                         set &= output_pref(op, ct->alias_index);
3187                     }
3188                     /* If the combination is not possible, restart.  */
3189                     if (set == 0) {
3190                         set = ct->regs;
3191                     }
3192                     *pset = set;
3193                 }
3194                 break;
3195             }
3196             break;
3197         }
3198         op->life = arg_life;
3199     }
3200 }
3201 
3202 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3203 static bool liveness_pass_2(TCGContext *s)
3204 {
3205     int nb_globals = s->nb_globals;
3206     int nb_temps, i;
3207     bool changes = false;
3208     TCGOp *op, *op_next;
3209 
3210     /* Create a temporary for each indirect global.  */
3211     for (i = 0; i < nb_globals; ++i) {
3212         TCGTemp *its = &s->temps[i];
3213         if (its->indirect_reg) {
3214             TCGTemp *dts = tcg_temp_alloc(s);
3215             dts->type = its->type;
3216             dts->base_type = its->base_type;
3217             dts->temp_subindex = its->temp_subindex;
3218             dts->kind = TEMP_EBB;
3219             its->state_ptr = dts;
3220         } else {
3221             its->state_ptr = NULL;
3222         }
3223         /* All globals begin dead.  */
3224         its->state = TS_DEAD;
3225     }
3226     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3227         TCGTemp *its = &s->temps[i];
3228         its->state_ptr = NULL;
3229         its->state = TS_DEAD;
3230     }
3231 
3232     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3233         TCGOpcode opc = op->opc;
3234         const TCGOpDef *def = &tcg_op_defs[opc];
3235         TCGLifeData arg_life = op->life;
3236         int nb_iargs, nb_oargs, call_flags;
3237         TCGTemp *arg_ts, *dir_ts;
3238 
3239         if (opc == INDEX_op_call) {
3240             nb_oargs = TCGOP_CALLO(op);
3241             nb_iargs = TCGOP_CALLI(op);
3242             call_flags = tcg_call_flags(op);
3243         } else {
3244             nb_iargs = def->nb_iargs;
3245             nb_oargs = def->nb_oargs;
3246 
3247             /* Set flags similar to how calls require.  */
3248             if (def->flags & TCG_OPF_COND_BRANCH) {
3249                 /* Like reading globals: sync_globals */
3250                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3251             } else if (def->flags & TCG_OPF_BB_END) {
3252                 /* Like writing globals: save_globals */
3253                 call_flags = 0;
3254             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3255                 /* Like reading globals: sync_globals */
3256                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3257             } else {
3258                 /* No effect on globals.  */
3259                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3260                               TCG_CALL_NO_WRITE_GLOBALS);
3261             }
3262         }
3263 
3264         /* Make sure that input arguments are available.  */
3265         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3266             arg_ts = arg_temp(op->args[i]);
3267             dir_ts = arg_ts->state_ptr;
3268             if (dir_ts && arg_ts->state == TS_DEAD) {
3269                 TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3270                                   ? INDEX_op_ld_i32
3271                                   : INDEX_op_ld_i64);
3272                 TCGOp *lop = tcg_op_insert_before(s, op, lopc, 3);
3273 
3274                 lop->args[0] = temp_arg(dir_ts);
3275                 lop->args[1] = temp_arg(arg_ts->mem_base);
3276                 lop->args[2] = arg_ts->mem_offset;
3277 
3278                 /* Loaded, but synced with memory.  */
3279                 arg_ts->state = TS_MEM;
3280             }
3281         }
3282 
3283         /* Perform input replacement, and mark inputs that became dead.
3284            No action is required except keeping temp_state up to date
3285            so that we reload when needed.  */
3286         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3287             arg_ts = arg_temp(op->args[i]);
3288             dir_ts = arg_ts->state_ptr;
3289             if (dir_ts) {
3290                 op->args[i] = temp_arg(dir_ts);
3291                 changes = true;
3292                 if (IS_DEAD_ARG(i)) {
3293                     arg_ts->state = TS_DEAD;
3294                 }
3295             }
3296         }
3297 
3298         /* Liveness analysis should ensure that the following are
3299            all correct, for call sites and basic block end points.  */
3300         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3301             /* Nothing to do */
3302         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3303             for (i = 0; i < nb_globals; ++i) {
3304                 /* Liveness should see that globals are synced back,
3305                    that is, either TS_DEAD or TS_MEM.  */
3306                 arg_ts = &s->temps[i];
3307                 tcg_debug_assert(arg_ts->state_ptr == 0
3308                                  || arg_ts->state != 0);
3309             }
3310         } else {
3311             for (i = 0; i < nb_globals; ++i) {
3312                 /* Liveness should see that globals are saved back,
3313                    that is, TS_DEAD, waiting to be reloaded.  */
3314                 arg_ts = &s->temps[i];
3315                 tcg_debug_assert(arg_ts->state_ptr == 0
3316                                  || arg_ts->state == TS_DEAD);
3317             }
3318         }
3319 
3320         /* Outputs become available.  */
3321         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3322             arg_ts = arg_temp(op->args[0]);
3323             dir_ts = arg_ts->state_ptr;
3324             if (dir_ts) {
3325                 op->args[0] = temp_arg(dir_ts);
3326                 changes = true;
3327 
3328                 /* The output is now live and modified.  */
3329                 arg_ts->state = 0;
3330 
3331                 if (NEED_SYNC_ARG(0)) {
3332                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3333                                       ? INDEX_op_st_i32
3334                                       : INDEX_op_st_i64);
3335                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3336                     TCGTemp *out_ts = dir_ts;
3337 
3338                     if (IS_DEAD_ARG(0)) {
3339                         out_ts = arg_temp(op->args[1]);
3340                         arg_ts->state = TS_DEAD;
3341                         tcg_op_remove(s, op);
3342                     } else {
3343                         arg_ts->state = TS_MEM;
3344                     }
3345 
3346                     sop->args[0] = temp_arg(out_ts);
3347                     sop->args[1] = temp_arg(arg_ts->mem_base);
3348                     sop->args[2] = arg_ts->mem_offset;
3349                 } else {
3350                     tcg_debug_assert(!IS_DEAD_ARG(0));
3351                 }
3352             }
3353         } else {
3354             for (i = 0; i < nb_oargs; i++) {
3355                 arg_ts = arg_temp(op->args[i]);
3356                 dir_ts = arg_ts->state_ptr;
3357                 if (!dir_ts) {
3358                     continue;
3359                 }
3360                 op->args[i] = temp_arg(dir_ts);
3361                 changes = true;
3362 
3363                 /* The output is now live and modified.  */
3364                 arg_ts->state = 0;
3365 
3366                 /* Sync outputs upon their last write.  */
3367                 if (NEED_SYNC_ARG(i)) {
3368                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3369                                       ? INDEX_op_st_i32
3370                                       : INDEX_op_st_i64);
3371                     TCGOp *sop = tcg_op_insert_after(s, op, sopc, 3);
3372 
3373                     sop->args[0] = temp_arg(dir_ts);
3374                     sop->args[1] = temp_arg(arg_ts->mem_base);
3375                     sop->args[2] = arg_ts->mem_offset;
3376 
3377                     arg_ts->state = TS_MEM;
3378                 }
3379                 /* Drop outputs that are dead.  */
3380                 if (IS_DEAD_ARG(i)) {
3381                     arg_ts->state = TS_DEAD;
3382                 }
3383             }
3384         }
3385     }
3386 
3387     return changes;
3388 }
3389 
3390 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3391 {
3392     intptr_t off;
3393     int size, align;
3394 
3395     /* When allocating an object, look at the full type. */
3396     size = tcg_type_size(ts->base_type);
3397     switch (ts->base_type) {
3398     case TCG_TYPE_I32:
3399         align = 4;
3400         break;
3401     case TCG_TYPE_I64:
3402     case TCG_TYPE_V64:
3403         align = 8;
3404         break;
3405     case TCG_TYPE_I128:
3406     case TCG_TYPE_V128:
3407     case TCG_TYPE_V256:
3408         /*
3409          * Note that we do not require aligned storage for V256,
3410          * and that we provide alignment for I128 to match V128,
3411          * even if that's above what the host ABI requires.
3412          */
3413         align = 16;
3414         break;
3415     default:
3416         g_assert_not_reached();
3417     }
3418 
3419     /*
3420      * Assume the stack is sufficiently aligned.
3421      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3422      * and do not require 16 byte vector alignment.  This seems slightly
3423      * easier than fully parameterizing the above switch statement.
3424      */
3425     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3426     off = ROUND_UP(s->current_frame_offset, align);
3427 
3428     /* If we've exhausted the stack frame, restart with a smaller TB. */
3429     if (off + size > s->frame_end) {
3430         tcg_raise_tb_overflow(s);
3431     }
3432     s->current_frame_offset = off + size;
3433 #if defined(__sparc__)
3434     off += TCG_TARGET_STACK_BIAS;
3435 #endif
3436 
3437     /* If the object was subdivided, assign memory to all the parts. */
3438     if (ts->base_type != ts->type) {
3439         int part_size = tcg_type_size(ts->type);
3440         int part_count = size / part_size;
3441 
3442         /*
3443          * Each part is allocated sequentially in tcg_temp_new_internal.
3444          * Jump back to the first part by subtracting the current index.
3445          */
3446         ts -= ts->temp_subindex;
3447         for (int i = 0; i < part_count; ++i) {
3448             ts[i].mem_offset = off + i * part_size;
3449             ts[i].mem_base = s->frame_temp;
3450             ts[i].mem_allocated = 1;
3451         }
3452     } else {
3453         ts->mem_offset = off;
3454         ts->mem_base = s->frame_temp;
3455         ts->mem_allocated = 1;
3456     }
3457 }
3458 
3459 /* Assign @reg to @ts, and update reg_to_temp[]. */
3460 static void set_temp_val_reg(TCGContext *s, TCGTemp *ts, TCGReg reg)
3461 {
3462     if (ts->val_type == TEMP_VAL_REG) {
3463         TCGReg old = ts->reg;
3464         tcg_debug_assert(s->reg_to_temp[old] == ts);
3465         if (old == reg) {
3466             return;
3467         }
3468         s->reg_to_temp[old] = NULL;
3469     }
3470     tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3471     s->reg_to_temp[reg] = ts;
3472     ts->val_type = TEMP_VAL_REG;
3473     ts->reg = reg;
3474 }
3475 
3476 /* Assign a non-register value type to @ts, and update reg_to_temp[]. */
3477 static void set_temp_val_nonreg(TCGContext *s, TCGTemp *ts, TCGTempVal type)
3478 {
3479     tcg_debug_assert(type != TEMP_VAL_REG);
3480     if (ts->val_type == TEMP_VAL_REG) {
3481         TCGReg reg = ts->reg;
3482         tcg_debug_assert(s->reg_to_temp[reg] == ts);
3483         s->reg_to_temp[reg] = NULL;
3484     }
3485     ts->val_type = type;
3486 }
3487 
3488 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3489 
3490 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3491    mark it free; otherwise mark it dead.  */
3492 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3493 {
3494     TCGTempVal new_type;
3495 
3496     switch (ts->kind) {
3497     case TEMP_FIXED:
3498         return;
3499     case TEMP_GLOBAL:
3500     case TEMP_LOCAL:
3501         new_type = TEMP_VAL_MEM;
3502         break;
3503     case TEMP_NORMAL:
3504     case TEMP_EBB:
3505         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3506         break;
3507     case TEMP_CONST:
3508         new_type = TEMP_VAL_CONST;
3509         break;
3510     default:
3511         g_assert_not_reached();
3512     }
3513     set_temp_val_nonreg(s, ts, new_type);
3514 }
3515 
3516 /* Mark a temporary as dead.  */
3517 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3518 {
3519     temp_free_or_dead(s, ts, 1);
3520 }
3521 
3522 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3523    registers needs to be allocated to store a constant.  If 'free_or_dead'
3524    is non-zero, subsequently release the temporary; if it is positive, the
3525    temp is dead; if it is negative, the temp is free.  */
3526 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3527                       TCGRegSet preferred_regs, int free_or_dead)
3528 {
3529     if (!temp_readonly(ts) && !ts->mem_coherent) {
3530         if (!ts->mem_allocated) {
3531             temp_allocate_frame(s, ts);
3532         }
3533         switch (ts->val_type) {
3534         case TEMP_VAL_CONST:
3535             /* If we're going to free the temp immediately, then we won't
3536                require it later in a register, so attempt to store the
3537                constant to memory directly.  */
3538             if (free_or_dead
3539                 && tcg_out_sti(s, ts->type, ts->val,
3540                                ts->mem_base->reg, ts->mem_offset)) {
3541                 break;
3542             }
3543             temp_load(s, ts, tcg_target_available_regs[ts->type],
3544                       allocated_regs, preferred_regs);
3545             /* fallthrough */
3546 
3547         case TEMP_VAL_REG:
3548             tcg_out_st(s, ts->type, ts->reg,
3549                        ts->mem_base->reg, ts->mem_offset);
3550             break;
3551 
3552         case TEMP_VAL_MEM:
3553             break;
3554 
3555         case TEMP_VAL_DEAD:
3556         default:
3557             tcg_abort();
3558         }
3559         ts->mem_coherent = 1;
3560     }
3561     if (free_or_dead) {
3562         temp_free_or_dead(s, ts, free_or_dead);
3563     }
3564 }
3565 
3566 /* free register 'reg' by spilling the corresponding temporary if necessary */
3567 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3568 {
3569     TCGTemp *ts = s->reg_to_temp[reg];
3570     if (ts != NULL) {
3571         temp_sync(s, ts, allocated_regs, 0, -1);
3572     }
3573 }
3574 
3575 /**
3576  * tcg_reg_alloc:
3577  * @required_regs: Set of registers in which we must allocate.
3578  * @allocated_regs: Set of registers which must be avoided.
3579  * @preferred_regs: Set of registers we should prefer.
3580  * @rev: True if we search the registers in "indirect" order.
3581  *
3582  * The allocated register must be in @required_regs & ~@allocated_regs,
3583  * but if we can put it in @preferred_regs we may save a move later.
3584  */
3585 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3586                             TCGRegSet allocated_regs,
3587                             TCGRegSet preferred_regs, bool rev)
3588 {
3589     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3590     TCGRegSet reg_ct[2];
3591     const int *order;
3592 
3593     reg_ct[1] = required_regs & ~allocated_regs;
3594     tcg_debug_assert(reg_ct[1] != 0);
3595     reg_ct[0] = reg_ct[1] & preferred_regs;
3596 
3597     /* Skip the preferred_regs option if it cannot be satisfied,
3598        or if the preference made no difference.  */
3599     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3600 
3601     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3602 
3603     /* Try free registers, preferences first.  */
3604     for (j = f; j < 2; j++) {
3605         TCGRegSet set = reg_ct[j];
3606 
3607         if (tcg_regset_single(set)) {
3608             /* One register in the set.  */
3609             TCGReg reg = tcg_regset_first(set);
3610             if (s->reg_to_temp[reg] == NULL) {
3611                 return reg;
3612             }
3613         } else {
3614             for (i = 0; i < n; i++) {
3615                 TCGReg reg = order[i];
3616                 if (s->reg_to_temp[reg] == NULL &&
3617                     tcg_regset_test_reg(set, reg)) {
3618                     return reg;
3619                 }
3620             }
3621         }
3622     }
3623 
3624     /* We must spill something.  */
3625     for (j = f; j < 2; j++) {
3626         TCGRegSet set = reg_ct[j];
3627 
3628         if (tcg_regset_single(set)) {
3629             /* One register in the set.  */
3630             TCGReg reg = tcg_regset_first(set);
3631             tcg_reg_free(s, reg, allocated_regs);
3632             return reg;
3633         } else {
3634             for (i = 0; i < n; i++) {
3635                 TCGReg reg = order[i];
3636                 if (tcg_regset_test_reg(set, reg)) {
3637                     tcg_reg_free(s, reg, allocated_regs);
3638                     return reg;
3639                 }
3640             }
3641         }
3642     }
3643 
3644     tcg_abort();
3645 }
3646 
3647 static TCGReg tcg_reg_alloc_pair(TCGContext *s, TCGRegSet required_regs,
3648                                  TCGRegSet allocated_regs,
3649                                  TCGRegSet preferred_regs, bool rev)
3650 {
3651     int i, j, k, fmin, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3652     TCGRegSet reg_ct[2];
3653     const int *order;
3654 
3655     /* Ensure that if I is not in allocated_regs, I+1 is not either. */
3656     reg_ct[1] = required_regs & ~(allocated_regs | (allocated_regs >> 1));
3657     tcg_debug_assert(reg_ct[1] != 0);
3658     reg_ct[0] = reg_ct[1] & preferred_regs;
3659 
3660     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3661 
3662     /*
3663      * Skip the preferred_regs option if it cannot be satisfied,
3664      * or if the preference made no difference.
3665      */
3666     k = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3667 
3668     /*
3669      * Minimize the number of flushes by looking for 2 free registers first,
3670      * then a single flush, then two flushes.
3671      */
3672     for (fmin = 2; fmin >= 0; fmin--) {
3673         for (j = k; j < 2; j++) {
3674             TCGRegSet set = reg_ct[j];
3675 
3676             for (i = 0; i < n; i++) {
3677                 TCGReg reg = order[i];
3678 
3679                 if (tcg_regset_test_reg(set, reg)) {
3680                     int f = !s->reg_to_temp[reg] + !s->reg_to_temp[reg + 1];
3681                     if (f >= fmin) {
3682                         tcg_reg_free(s, reg, allocated_regs);
3683                         tcg_reg_free(s, reg + 1, allocated_regs);
3684                         return reg;
3685                     }
3686                 }
3687             }
3688         }
3689     }
3690     tcg_abort();
3691 }
3692 
3693 /* Make sure the temporary is in a register.  If needed, allocate the register
3694    from DESIRED while avoiding ALLOCATED.  */
3695 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3696                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3697 {
3698     TCGReg reg;
3699 
3700     switch (ts->val_type) {
3701     case TEMP_VAL_REG:
3702         return;
3703     case TEMP_VAL_CONST:
3704         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3705                             preferred_regs, ts->indirect_base);
3706         if (ts->type <= TCG_TYPE_I64) {
3707             tcg_out_movi(s, ts->type, reg, ts->val);
3708         } else {
3709             uint64_t val = ts->val;
3710             MemOp vece = MO_64;
3711 
3712             /*
3713              * Find the minimal vector element that matches the constant.
3714              * The targets will, in general, have to do this search anyway,
3715              * do this generically.
3716              */
3717             if (val == dup_const(MO_8, val)) {
3718                 vece = MO_8;
3719             } else if (val == dup_const(MO_16, val)) {
3720                 vece = MO_16;
3721             } else if (val == dup_const(MO_32, val)) {
3722                 vece = MO_32;
3723             }
3724 
3725             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3726         }
3727         ts->mem_coherent = 0;
3728         break;
3729     case TEMP_VAL_MEM:
3730         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3731                             preferred_regs, ts->indirect_base);
3732         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3733         ts->mem_coherent = 1;
3734         break;
3735     case TEMP_VAL_DEAD:
3736     default:
3737         tcg_abort();
3738     }
3739     set_temp_val_reg(s, ts, reg);
3740 }
3741 
3742 /* Save a temporary to memory. 'allocated_regs' is used in case a
3743    temporary registers needs to be allocated to store a constant.  */
3744 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3745 {
3746     /* The liveness analysis already ensures that globals are back
3747        in memory. Keep an tcg_debug_assert for safety. */
3748     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3749 }
3750 
3751 /* save globals to their canonical location and assume they can be
3752    modified be the following code. 'allocated_regs' is used in case a
3753    temporary registers needs to be allocated to store a constant. */
3754 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3755 {
3756     int i, n;
3757 
3758     for (i = 0, n = s->nb_globals; i < n; i++) {
3759         temp_save(s, &s->temps[i], allocated_regs);
3760     }
3761 }
3762 
3763 /* sync globals to their canonical location and assume they can be
3764    read by the following code. 'allocated_regs' is used in case a
3765    temporary registers needs to be allocated to store a constant. */
3766 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3767 {
3768     int i, n;
3769 
3770     for (i = 0, n = s->nb_globals; i < n; i++) {
3771         TCGTemp *ts = &s->temps[i];
3772         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3773                          || ts->kind == TEMP_FIXED
3774                          || ts->mem_coherent);
3775     }
3776 }
3777 
3778 /* at the end of a basic block, we assume all temporaries are dead and
3779    all globals are stored at their canonical location. */
3780 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3781 {
3782     int i;
3783 
3784     for (i = s->nb_globals; i < s->nb_temps; i++) {
3785         TCGTemp *ts = &s->temps[i];
3786 
3787         switch (ts->kind) {
3788         case TEMP_LOCAL:
3789             temp_save(s, ts, allocated_regs);
3790             break;
3791         case TEMP_NORMAL:
3792         case TEMP_EBB:
3793             /* The liveness analysis already ensures that temps are dead.
3794                Keep an tcg_debug_assert for safety. */
3795             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3796             break;
3797         case TEMP_CONST:
3798             /* Similarly, we should have freed any allocated register. */
3799             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3800             break;
3801         default:
3802             g_assert_not_reached();
3803         }
3804     }
3805 
3806     save_globals(s, allocated_regs);
3807 }
3808 
3809 /*
3810  * At a conditional branch, we assume all temporaries are dead unless
3811  * explicitly live-across-conditional-branch; all globals and local
3812  * temps are synced to their location.
3813  */
3814 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3815 {
3816     sync_globals(s, allocated_regs);
3817 
3818     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3819         TCGTemp *ts = &s->temps[i];
3820         /*
3821          * The liveness analysis already ensures that temps are dead.
3822          * Keep tcg_debug_asserts for safety.
3823          */
3824         switch (ts->kind) {
3825         case TEMP_LOCAL:
3826             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3827             break;
3828         case TEMP_NORMAL:
3829             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3830             break;
3831         case TEMP_EBB:
3832         case TEMP_CONST:
3833             break;
3834         default:
3835             g_assert_not_reached();
3836         }
3837     }
3838 }
3839 
3840 /*
3841  * Specialized code generation for INDEX_op_mov_* with a constant.
3842  */
3843 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3844                                   tcg_target_ulong val, TCGLifeData arg_life,
3845                                   TCGRegSet preferred_regs)
3846 {
3847     /* ENV should not be modified.  */
3848     tcg_debug_assert(!temp_readonly(ots));
3849 
3850     /* The movi is not explicitly generated here.  */
3851     set_temp_val_nonreg(s, ots, TEMP_VAL_CONST);
3852     ots->val = val;
3853     ots->mem_coherent = 0;
3854     if (NEED_SYNC_ARG(0)) {
3855         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3856     } else if (IS_DEAD_ARG(0)) {
3857         temp_dead(s, ots);
3858     }
3859 }
3860 
3861 /*
3862  * Specialized code generation for INDEX_op_mov_*.
3863  */
3864 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3865 {
3866     const TCGLifeData arg_life = op->life;
3867     TCGRegSet allocated_regs, preferred_regs;
3868     TCGTemp *ts, *ots;
3869     TCGType otype, itype;
3870     TCGReg oreg, ireg;
3871 
3872     allocated_regs = s->reserved_regs;
3873     preferred_regs = output_pref(op, 0);
3874     ots = arg_temp(op->args[0]);
3875     ts = arg_temp(op->args[1]);
3876 
3877     /* ENV should not be modified.  */
3878     tcg_debug_assert(!temp_readonly(ots));
3879 
3880     /* Note that otype != itype for no-op truncation.  */
3881     otype = ots->type;
3882     itype = ts->type;
3883 
3884     if (ts->val_type == TEMP_VAL_CONST) {
3885         /* propagate constant or generate sti */
3886         tcg_target_ulong val = ts->val;
3887         if (IS_DEAD_ARG(1)) {
3888             temp_dead(s, ts);
3889         }
3890         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3891         return;
3892     }
3893 
3894     /* If the source value is in memory we're going to be forced
3895        to have it in a register in order to perform the copy.  Copy
3896        the SOURCE value into its own register first, that way we
3897        don't have to reload SOURCE the next time it is used. */
3898     if (ts->val_type == TEMP_VAL_MEM) {
3899         temp_load(s, ts, tcg_target_available_regs[itype],
3900                   allocated_regs, preferred_regs);
3901     }
3902     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3903     ireg = ts->reg;
3904 
3905     if (IS_DEAD_ARG(0)) {
3906         /* mov to a non-saved dead register makes no sense (even with
3907            liveness analysis disabled). */
3908         tcg_debug_assert(NEED_SYNC_ARG(0));
3909         if (!ots->mem_allocated) {
3910             temp_allocate_frame(s, ots);
3911         }
3912         tcg_out_st(s, otype, ireg, ots->mem_base->reg, ots->mem_offset);
3913         if (IS_DEAD_ARG(1)) {
3914             temp_dead(s, ts);
3915         }
3916         temp_dead(s, ots);
3917         return;
3918     }
3919 
3920     if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3921         /*
3922          * The mov can be suppressed.  Kill input first, so that it
3923          * is unlinked from reg_to_temp, then set the output to the
3924          * reg that we saved from the input.
3925          */
3926         temp_dead(s, ts);
3927         oreg = ireg;
3928     } else {
3929         if (ots->val_type == TEMP_VAL_REG) {
3930             oreg = ots->reg;
3931         } else {
3932             /* Make sure to not spill the input register during allocation. */
3933             oreg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3934                                  allocated_regs | ((TCGRegSet)1 << ireg),
3935                                  preferred_regs, ots->indirect_base);
3936         }
3937         if (!tcg_out_mov(s, otype, oreg, ireg)) {
3938             /*
3939              * Cross register class move not supported.
3940              * Store the source register into the destination slot
3941              * and leave the destination temp as TEMP_VAL_MEM.
3942              */
3943             assert(!temp_readonly(ots));
3944             if (!ts->mem_allocated) {
3945                 temp_allocate_frame(s, ots);
3946             }
3947             tcg_out_st(s, ts->type, ireg, ots->mem_base->reg, ots->mem_offset);
3948             set_temp_val_nonreg(s, ts, TEMP_VAL_MEM);
3949             ots->mem_coherent = 1;
3950             return;
3951         }
3952     }
3953     set_temp_val_reg(s, ots, oreg);
3954     ots->mem_coherent = 0;
3955 
3956     if (NEED_SYNC_ARG(0)) {
3957         temp_sync(s, ots, allocated_regs, 0, 0);
3958     }
3959 }
3960 
3961 /*
3962  * Specialized code generation for INDEX_op_dup_vec.
3963  */
3964 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3965 {
3966     const TCGLifeData arg_life = op->life;
3967     TCGRegSet dup_out_regs, dup_in_regs;
3968     TCGTemp *its, *ots;
3969     TCGType itype, vtype;
3970     unsigned vece;
3971     int lowpart_ofs;
3972     bool ok;
3973 
3974     ots = arg_temp(op->args[0]);
3975     its = arg_temp(op->args[1]);
3976 
3977     /* ENV should not be modified.  */
3978     tcg_debug_assert(!temp_readonly(ots));
3979 
3980     itype = its->type;
3981     vece = TCGOP_VECE(op);
3982     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3983 
3984     if (its->val_type == TEMP_VAL_CONST) {
3985         /* Propagate constant via movi -> dupi.  */
3986         tcg_target_ulong val = its->val;
3987         if (IS_DEAD_ARG(1)) {
3988             temp_dead(s, its);
3989         }
3990         tcg_reg_alloc_do_movi(s, ots, val, arg_life, output_pref(op, 0));
3991         return;
3992     }
3993 
3994     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3995     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3996 
3997     /* Allocate the output register now.  */
3998     if (ots->val_type != TEMP_VAL_REG) {
3999         TCGRegSet allocated_regs = s->reserved_regs;
4000         TCGReg oreg;
4001 
4002         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
4003             /* Make sure to not spill the input register. */
4004             tcg_regset_set_reg(allocated_regs, its->reg);
4005         }
4006         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4007                              output_pref(op, 0), ots->indirect_base);
4008         set_temp_val_reg(s, ots, oreg);
4009     }
4010 
4011     switch (its->val_type) {
4012     case TEMP_VAL_REG:
4013         /*
4014          * The dup constriaints must be broad, covering all possible VECE.
4015          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
4016          * to fail, indicating that extra moves are required for that case.
4017          */
4018         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
4019             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
4020                 goto done;
4021             }
4022             /* Try again from memory or a vector input register.  */
4023         }
4024         if (!its->mem_coherent) {
4025             /*
4026              * The input register is not synced, and so an extra store
4027              * would be required to use memory.  Attempt an integer-vector
4028              * register move first.  We do not have a TCGRegSet for this.
4029              */
4030             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4031                 break;
4032             }
4033             /* Sync the temp back to its slot and load from there.  */
4034             temp_sync(s, its, s->reserved_regs, 0, 0);
4035         }
4036         /* fall through */
4037 
4038     case TEMP_VAL_MEM:
4039         lowpart_ofs = 0;
4040         if (HOST_BIG_ENDIAN) {
4041             lowpart_ofs = tcg_type_size(itype) - (1 << vece);
4042         }
4043         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4044                              its->mem_offset + lowpart_ofs)) {
4045             goto done;
4046         }
4047         /* Load the input into the destination vector register. */
4048         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4049         break;
4050 
4051     default:
4052         g_assert_not_reached();
4053     }
4054 
4055     /* We now have a vector input register, so dup must succeed. */
4056     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4057     tcg_debug_assert(ok);
4058 
4059  done:
4060     ots->mem_coherent = 0;
4061     if (IS_DEAD_ARG(1)) {
4062         temp_dead(s, its);
4063     }
4064     if (NEED_SYNC_ARG(0)) {
4065         temp_sync(s, ots, s->reserved_regs, 0, 0);
4066     }
4067     if (IS_DEAD_ARG(0)) {
4068         temp_dead(s, ots);
4069     }
4070 }
4071 
4072 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4073 {
4074     const TCGLifeData arg_life = op->life;
4075     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4076     TCGRegSet i_allocated_regs;
4077     TCGRegSet o_allocated_regs;
4078     int i, k, nb_iargs, nb_oargs;
4079     TCGReg reg;
4080     TCGArg arg;
4081     const TCGArgConstraint *arg_ct;
4082     TCGTemp *ts;
4083     TCGArg new_args[TCG_MAX_OP_ARGS];
4084     int const_args[TCG_MAX_OP_ARGS];
4085 
4086     nb_oargs = def->nb_oargs;
4087     nb_iargs = def->nb_iargs;
4088 
4089     /* copy constants */
4090     memcpy(new_args + nb_oargs + nb_iargs,
4091            op->args + nb_oargs + nb_iargs,
4092            sizeof(TCGArg) * def->nb_cargs);
4093 
4094     i_allocated_regs = s->reserved_regs;
4095     o_allocated_regs = s->reserved_regs;
4096 
4097     /* satisfy input constraints */
4098     for (k = 0; k < nb_iargs; k++) {
4099         TCGRegSet i_preferred_regs, i_required_regs;
4100         bool allocate_new_reg, copyto_new_reg;
4101         TCGTemp *ts2;
4102         int i1, i2;
4103 
4104         i = def->args_ct[nb_oargs + k].sort_index;
4105         arg = op->args[i];
4106         arg_ct = &def->args_ct[i];
4107         ts = arg_temp(arg);
4108 
4109         if (ts->val_type == TEMP_VAL_CONST
4110             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
4111             /* constant is OK for instruction */
4112             const_args[i] = 1;
4113             new_args[i] = ts->val;
4114             continue;
4115         }
4116 
4117         reg = ts->reg;
4118         i_preferred_regs = 0;
4119         i_required_regs = arg_ct->regs;
4120         allocate_new_reg = false;
4121         copyto_new_reg = false;
4122 
4123         switch (arg_ct->pair) {
4124         case 0: /* not paired */
4125             if (arg_ct->ialias) {
4126                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4127 
4128                 /*
4129                  * If the input is readonly, then it cannot also be an
4130                  * output and aliased to itself.  If the input is not
4131                  * dead after the instruction, we must allocate a new
4132                  * register and move it.
4133                  */
4134                 if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4135                     allocate_new_reg = true;
4136                 } else if (ts->val_type == TEMP_VAL_REG) {
4137                     /*
4138                      * Check if the current register has already been
4139                      * allocated for another input.
4140                      */
4141                     allocate_new_reg =
4142                         tcg_regset_test_reg(i_allocated_regs, reg);
4143                 }
4144             }
4145             if (!allocate_new_reg) {
4146                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4147                           i_preferred_regs);
4148                 reg = ts->reg;
4149                 allocate_new_reg = !tcg_regset_test_reg(i_required_regs, reg);
4150             }
4151             if (allocate_new_reg) {
4152                 /*
4153                  * Allocate a new register matching the constraint
4154                  * and move the temporary register into it.
4155                  */
4156                 temp_load(s, ts, tcg_target_available_regs[ts->type],
4157                           i_allocated_regs, 0);
4158                 reg = tcg_reg_alloc(s, i_required_regs, i_allocated_regs,
4159                                     i_preferred_regs, ts->indirect_base);
4160                 copyto_new_reg = true;
4161             }
4162             break;
4163 
4164         case 1:
4165             /* First of an input pair; if i1 == i2, the second is an output. */
4166             i1 = i;
4167             i2 = arg_ct->pair_index;
4168             ts2 = i1 != i2 ? arg_temp(op->args[i2]) : NULL;
4169 
4170             /*
4171              * It is easier to default to allocating a new pair
4172              * and to identify a few cases where it's not required.
4173              */
4174             if (arg_ct->ialias) {
4175                 i_preferred_regs = output_pref(op, arg_ct->alias_index);
4176                 if (IS_DEAD_ARG(i1) &&
4177                     IS_DEAD_ARG(i2) &&
4178                     !temp_readonly(ts) &&
4179                     ts->val_type == TEMP_VAL_REG &&
4180                     ts->reg < TCG_TARGET_NB_REGS - 1 &&
4181                     tcg_regset_test_reg(i_required_regs, reg) &&
4182                     !tcg_regset_test_reg(i_allocated_regs, reg) &&
4183                     !tcg_regset_test_reg(i_allocated_regs, reg + 1) &&
4184                     (ts2
4185                      ? ts2->val_type == TEMP_VAL_REG &&
4186                        ts2->reg == reg + 1 &&
4187                        !temp_readonly(ts2)
4188                      : s->reg_to_temp[reg + 1] == NULL)) {
4189                     break;
4190                 }
4191             } else {
4192                 /* Without aliasing, the pair must also be an input. */
4193                 tcg_debug_assert(ts2);
4194                 if (ts->val_type == TEMP_VAL_REG &&
4195                     ts2->val_type == TEMP_VAL_REG &&
4196                     ts2->reg == reg + 1 &&
4197                     tcg_regset_test_reg(i_required_regs, reg)) {
4198                     break;
4199                 }
4200             }
4201             reg = tcg_reg_alloc_pair(s, i_required_regs, i_allocated_regs,
4202                                      0, ts->indirect_base);
4203             goto do_pair;
4204 
4205         case 2: /* pair second */
4206             reg = new_args[arg_ct->pair_index] + 1;
4207             goto do_pair;
4208 
4209         case 3: /* ialias with second output, no first input */
4210             tcg_debug_assert(arg_ct->ialias);
4211             i_preferred_regs = output_pref(op, arg_ct->alias_index);
4212 
4213             if (IS_DEAD_ARG(i) &&
4214                 !temp_readonly(ts) &&
4215                 ts->val_type == TEMP_VAL_REG &&
4216                 reg > 0 &&
4217                 s->reg_to_temp[reg - 1] == NULL &&
4218                 tcg_regset_test_reg(i_required_regs, reg) &&
4219                 !tcg_regset_test_reg(i_allocated_regs, reg) &&
4220                 !tcg_regset_test_reg(i_allocated_regs, reg - 1)) {
4221                 tcg_regset_set_reg(i_allocated_regs, reg - 1);
4222                 break;
4223             }
4224             reg = tcg_reg_alloc_pair(s, i_required_regs >> 1,
4225                                      i_allocated_regs, 0,
4226                                      ts->indirect_base);
4227             tcg_regset_set_reg(i_allocated_regs, reg);
4228             reg += 1;
4229             goto do_pair;
4230 
4231         do_pair:
4232             /*
4233              * If an aliased input is not dead after the instruction,
4234              * we must allocate a new register and move it.
4235              */
4236             if (arg_ct->ialias && (!IS_DEAD_ARG(i) || temp_readonly(ts))) {
4237                 TCGRegSet t_allocated_regs = i_allocated_regs;
4238 
4239                 /*
4240                  * Because of the alias, and the continued life, make sure
4241                  * that the temp is somewhere *other* than the reg pair,
4242                  * and we get a copy in reg.
4243                  */
4244                 tcg_regset_set_reg(t_allocated_regs, reg);
4245                 tcg_regset_set_reg(t_allocated_regs, reg + 1);
4246                 if (ts->val_type == TEMP_VAL_REG && ts->reg == reg) {
4247                     /* If ts was already in reg, copy it somewhere else. */
4248                     TCGReg nr;
4249                     bool ok;
4250 
4251                     tcg_debug_assert(ts->kind != TEMP_FIXED);
4252                     nr = tcg_reg_alloc(s, tcg_target_available_regs[ts->type],
4253                                        t_allocated_regs, 0, ts->indirect_base);
4254                     ok = tcg_out_mov(s, ts->type, nr, reg);
4255                     tcg_debug_assert(ok);
4256 
4257                     set_temp_val_reg(s, ts, nr);
4258                 } else {
4259                     temp_load(s, ts, tcg_target_available_regs[ts->type],
4260                               t_allocated_regs, 0);
4261                     copyto_new_reg = true;
4262                 }
4263             } else {
4264                 /* Preferably allocate to reg, otherwise copy. */
4265                 i_required_regs = (TCGRegSet)1 << reg;
4266                 temp_load(s, ts, i_required_regs, i_allocated_regs,
4267                           i_preferred_regs);
4268                 copyto_new_reg = ts->reg != reg;
4269             }
4270             break;
4271 
4272         default:
4273             g_assert_not_reached();
4274         }
4275 
4276         if (copyto_new_reg) {
4277             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4278                 /*
4279                  * Cross register class move not supported.  Sync the
4280                  * temp back to its slot and load from there.
4281                  */
4282                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4283                 tcg_out_ld(s, ts->type, reg,
4284                            ts->mem_base->reg, ts->mem_offset);
4285             }
4286         }
4287         new_args[i] = reg;
4288         const_args[i] = 0;
4289         tcg_regset_set_reg(i_allocated_regs, reg);
4290     }
4291 
4292     /* mark dead temporaries and free the associated registers */
4293     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4294         if (IS_DEAD_ARG(i)) {
4295             temp_dead(s, arg_temp(op->args[i]));
4296         }
4297     }
4298 
4299     if (def->flags & TCG_OPF_COND_BRANCH) {
4300         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4301     } else if (def->flags & TCG_OPF_BB_END) {
4302         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4303     } else {
4304         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4305             /* XXX: permit generic clobber register list ? */
4306             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4307                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4308                     tcg_reg_free(s, i, i_allocated_regs);
4309                 }
4310             }
4311         }
4312         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4313             /* sync globals if the op has side effects and might trigger
4314                an exception. */
4315             sync_globals(s, i_allocated_regs);
4316         }
4317 
4318         /* satisfy the output constraints */
4319         for(k = 0; k < nb_oargs; k++) {
4320             i = def->args_ct[k].sort_index;
4321             arg = op->args[i];
4322             arg_ct = &def->args_ct[i];
4323             ts = arg_temp(arg);
4324 
4325             /* ENV should not be modified.  */
4326             tcg_debug_assert(!temp_readonly(ts));
4327 
4328             switch (arg_ct->pair) {
4329             case 0: /* not paired */
4330                 if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4331                     reg = new_args[arg_ct->alias_index];
4332                 } else if (arg_ct->newreg) {
4333                     reg = tcg_reg_alloc(s, arg_ct->regs,
4334                                         i_allocated_regs | o_allocated_regs,
4335                                         output_pref(op, k), ts->indirect_base);
4336                 } else {
4337                     reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4338                                         output_pref(op, k), ts->indirect_base);
4339                 }
4340                 break;
4341 
4342             case 1: /* first of pair */
4343                 tcg_debug_assert(!arg_ct->newreg);
4344                 if (arg_ct->oalias) {
4345                     reg = new_args[arg_ct->alias_index];
4346                     break;
4347                 }
4348                 reg = tcg_reg_alloc_pair(s, arg_ct->regs, o_allocated_regs,
4349                                          output_pref(op, k), ts->indirect_base);
4350                 break;
4351 
4352             case 2: /* second of pair */
4353                 tcg_debug_assert(!arg_ct->newreg);
4354                 if (arg_ct->oalias) {
4355                     reg = new_args[arg_ct->alias_index];
4356                 } else {
4357                     reg = new_args[arg_ct->pair_index] + 1;
4358                 }
4359                 break;
4360 
4361             case 3: /* first of pair, aliasing with a second input */
4362                 tcg_debug_assert(!arg_ct->newreg);
4363                 reg = new_args[arg_ct->pair_index] - 1;
4364                 break;
4365 
4366             default:
4367                 g_assert_not_reached();
4368             }
4369             tcg_regset_set_reg(o_allocated_regs, reg);
4370             set_temp_val_reg(s, ts, reg);
4371             ts->mem_coherent = 0;
4372             new_args[i] = reg;
4373         }
4374     }
4375 
4376     /* emit instruction */
4377     if (def->flags & TCG_OPF_VECTOR) {
4378         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4379                        new_args, const_args);
4380     } else {
4381         tcg_out_op(s, op->opc, new_args, const_args);
4382     }
4383 
4384     /* move the outputs in the correct register if needed */
4385     for(i = 0; i < nb_oargs; i++) {
4386         ts = arg_temp(op->args[i]);
4387 
4388         /* ENV should not be modified.  */
4389         tcg_debug_assert(!temp_readonly(ts));
4390 
4391         if (NEED_SYNC_ARG(i)) {
4392             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4393         } else if (IS_DEAD_ARG(i)) {
4394             temp_dead(s, ts);
4395         }
4396     }
4397 }
4398 
4399 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4400 {
4401     const TCGLifeData arg_life = op->life;
4402     TCGTemp *ots, *itsl, *itsh;
4403     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4404 
4405     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4406     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4407     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4408 
4409     ots = arg_temp(op->args[0]);
4410     itsl = arg_temp(op->args[1]);
4411     itsh = arg_temp(op->args[2]);
4412 
4413     /* ENV should not be modified.  */
4414     tcg_debug_assert(!temp_readonly(ots));
4415 
4416     /* Allocate the output register now.  */
4417     if (ots->val_type != TEMP_VAL_REG) {
4418         TCGRegSet allocated_regs = s->reserved_regs;
4419         TCGRegSet dup_out_regs =
4420             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4421         TCGReg oreg;
4422 
4423         /* Make sure to not spill the input registers. */
4424         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4425             tcg_regset_set_reg(allocated_regs, itsl->reg);
4426         }
4427         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4428             tcg_regset_set_reg(allocated_regs, itsh->reg);
4429         }
4430 
4431         oreg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4432                              output_pref(op, 0), ots->indirect_base);
4433         set_temp_val_reg(s, ots, oreg);
4434     }
4435 
4436     /* Promote dup2 of immediates to dupi_vec. */
4437     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4438         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4439         MemOp vece = MO_64;
4440 
4441         if (val == dup_const(MO_8, val)) {
4442             vece = MO_8;
4443         } else if (val == dup_const(MO_16, val)) {
4444             vece = MO_16;
4445         } else if (val == dup_const(MO_32, val)) {
4446             vece = MO_32;
4447         }
4448 
4449         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4450         goto done;
4451     }
4452 
4453     /* If the two inputs form one 64-bit value, try dupm_vec. */
4454     if (itsl->temp_subindex == HOST_BIG_ENDIAN &&
4455         itsh->temp_subindex == !HOST_BIG_ENDIAN &&
4456         itsl == itsh + (HOST_BIG_ENDIAN ? 1 : -1)) {
4457         TCGTemp *its = itsl - HOST_BIG_ENDIAN;
4458 
4459         temp_sync(s, its + 0, s->reserved_regs, 0, 0);
4460         temp_sync(s, its + 1, s->reserved_regs, 0, 0);
4461 
4462         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4463                              its->mem_base->reg, its->mem_offset)) {
4464             goto done;
4465         }
4466     }
4467 
4468     /* Fall back to generic expansion. */
4469     return false;
4470 
4471  done:
4472     ots->mem_coherent = 0;
4473     if (IS_DEAD_ARG(1)) {
4474         temp_dead(s, itsl);
4475     }
4476     if (IS_DEAD_ARG(2)) {
4477         temp_dead(s, itsh);
4478     }
4479     if (NEED_SYNC_ARG(0)) {
4480         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4481     } else if (IS_DEAD_ARG(0)) {
4482         temp_dead(s, ots);
4483     }
4484     return true;
4485 }
4486 
4487 static void load_arg_reg(TCGContext *s, TCGReg reg, TCGTemp *ts,
4488                          TCGRegSet allocated_regs)
4489 {
4490     if (ts->val_type == TEMP_VAL_REG) {
4491         if (ts->reg != reg) {
4492             tcg_reg_free(s, reg, allocated_regs);
4493             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4494                 /*
4495                  * Cross register class move not supported.  Sync the
4496                  * temp back to its slot and load from there.
4497                  */
4498                 temp_sync(s, ts, allocated_regs, 0, 0);
4499                 tcg_out_ld(s, ts->type, reg,
4500                            ts->mem_base->reg, ts->mem_offset);
4501             }
4502         }
4503     } else {
4504         TCGRegSet arg_set = 0;
4505 
4506         tcg_reg_free(s, reg, allocated_regs);
4507         tcg_regset_set_reg(arg_set, reg);
4508         temp_load(s, ts, arg_set, allocated_regs, 0);
4509     }
4510 }
4511 
4512 static void load_arg_stk(TCGContext *s, int stk_slot, TCGTemp *ts,
4513                          TCGRegSet allocated_regs)
4514 {
4515     /*
4516      * When the destination is on the stack, load up the temp and store.
4517      * If there are many call-saved registers, the temp might live to
4518      * see another use; otherwise it'll be discarded.
4519      */
4520     temp_load(s, ts, tcg_target_available_regs[ts->type], allocated_regs, 0);
4521     tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK,
4522                TCG_TARGET_CALL_STACK_OFFSET +
4523                stk_slot * sizeof(tcg_target_long));
4524 }
4525 
4526 static void load_arg_normal(TCGContext *s, const TCGCallArgumentLoc *l,
4527                             TCGTemp *ts, TCGRegSet *allocated_regs)
4528 {
4529     if (REG_P(l)) {
4530         TCGReg reg = tcg_target_call_iarg_regs[l->arg_slot];
4531         load_arg_reg(s, reg, ts, *allocated_regs);
4532         tcg_regset_set_reg(*allocated_regs, reg);
4533     } else {
4534         load_arg_stk(s, l->arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs),
4535                      ts, *allocated_regs);
4536     }
4537 }
4538 
4539 static void load_arg_ref(TCGContext *s, int arg_slot, TCGReg ref_base,
4540                          intptr_t ref_off, TCGRegSet *allocated_regs)
4541 {
4542     TCGReg reg;
4543     int stk_slot = arg_slot - ARRAY_SIZE(tcg_target_call_iarg_regs);
4544 
4545     if (stk_slot < 0) {
4546         reg = tcg_target_call_iarg_regs[arg_slot];
4547         tcg_reg_free(s, reg, *allocated_regs);
4548         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4549         tcg_regset_set_reg(*allocated_regs, reg);
4550     } else {
4551         reg = tcg_reg_alloc(s, tcg_target_available_regs[TCG_TYPE_PTR],
4552                             *allocated_regs, 0, false);
4553         tcg_out_addi_ptr(s, reg, ref_base, ref_off);
4554         tcg_out_st(s, TCG_TYPE_PTR, reg, TCG_REG_CALL_STACK,
4555                    TCG_TARGET_CALL_STACK_OFFSET
4556                    + stk_slot * sizeof(tcg_target_long));
4557     }
4558 }
4559 
4560 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4561 {
4562     const int nb_oargs = TCGOP_CALLO(op);
4563     const int nb_iargs = TCGOP_CALLI(op);
4564     const TCGLifeData arg_life = op->life;
4565     const TCGHelperInfo *info = tcg_call_info(op);
4566     TCGRegSet allocated_regs = s->reserved_regs;
4567     int i;
4568 
4569     /*
4570      * Move inputs into place in reverse order,
4571      * so that we place stacked arguments first.
4572      */
4573     for (i = nb_iargs - 1; i >= 0; --i) {
4574         const TCGCallArgumentLoc *loc = &info->in[i];
4575         TCGTemp *ts = arg_temp(op->args[nb_oargs + i]);
4576 
4577         switch (loc->kind) {
4578         case TCG_CALL_ARG_NORMAL:
4579         case TCG_CALL_ARG_EXTEND_U:
4580         case TCG_CALL_ARG_EXTEND_S:
4581             load_arg_normal(s, loc, ts, &allocated_regs);
4582             break;
4583         case TCG_CALL_ARG_BY_REF:
4584             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4585             load_arg_ref(s, loc->arg_slot, TCG_REG_CALL_STACK,
4586                          TCG_TARGET_CALL_STACK_OFFSET
4587                          + loc->ref_slot * sizeof(tcg_target_long),
4588                          &allocated_regs);
4589             break;
4590         case TCG_CALL_ARG_BY_REF_N:
4591             load_arg_stk(s, loc->ref_slot, ts, allocated_regs);
4592             break;
4593         default:
4594             g_assert_not_reached();
4595         }
4596     }
4597 
4598     /* Mark dead temporaries and free the associated registers.  */
4599     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4600         if (IS_DEAD_ARG(i)) {
4601             temp_dead(s, arg_temp(op->args[i]));
4602         }
4603     }
4604 
4605     /* Clobber call registers.  */
4606     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4607         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4608             tcg_reg_free(s, i, allocated_regs);
4609         }
4610     }
4611 
4612     /*
4613      * Save globals if they might be written by the helper,
4614      * sync them if they might be read.
4615      */
4616     if (info->flags & TCG_CALL_NO_READ_GLOBALS) {
4617         /* Nothing to do */
4618     } else if (info->flags & TCG_CALL_NO_WRITE_GLOBALS) {
4619         sync_globals(s, allocated_regs);
4620     } else {
4621         save_globals(s, allocated_regs);
4622     }
4623 
4624     /*
4625      * If the ABI passes a pointer to the returned struct as the first
4626      * argument, load that now.  Pass a pointer to the output home slot.
4627      */
4628     if (info->out_kind == TCG_CALL_RET_BY_REF) {
4629         TCGTemp *ts = arg_temp(op->args[0]);
4630 
4631         if (!ts->mem_allocated) {
4632             temp_allocate_frame(s, ts);
4633         }
4634         load_arg_ref(s, 0, ts->mem_base->reg, ts->mem_offset, &allocated_regs);
4635     }
4636 
4637     tcg_out_call(s, tcg_call_func(op), info);
4638 
4639     /* Assign output registers and emit moves if needed.  */
4640     switch (info->out_kind) {
4641     case TCG_CALL_RET_NORMAL:
4642         for (i = 0; i < nb_oargs; i++) {
4643             TCGTemp *ts = arg_temp(op->args[i]);
4644             TCGReg reg = tcg_target_call_oarg_reg(TCG_CALL_RET_NORMAL, i);
4645 
4646             /* ENV should not be modified.  */
4647             tcg_debug_assert(!temp_readonly(ts));
4648 
4649             set_temp_val_reg(s, ts, reg);
4650             ts->mem_coherent = 0;
4651         }
4652         break;
4653 
4654     case TCG_CALL_RET_BY_VEC:
4655         {
4656             TCGTemp *ts = arg_temp(op->args[0]);
4657 
4658             tcg_debug_assert(ts->base_type == TCG_TYPE_I128);
4659             tcg_debug_assert(ts->temp_subindex == 0);
4660             if (!ts->mem_allocated) {
4661                 temp_allocate_frame(s, ts);
4662             }
4663             tcg_out_st(s, TCG_TYPE_V128,
4664                        tcg_target_call_oarg_reg(TCG_CALL_RET_BY_VEC, 0),
4665                        ts->mem_base->reg, ts->mem_offset);
4666         }
4667         /* fall through to mark all parts in memory */
4668 
4669     case TCG_CALL_RET_BY_REF:
4670         /* The callee has performed a write through the reference. */
4671         for (i = 0; i < nb_oargs; i++) {
4672             TCGTemp *ts = arg_temp(op->args[i]);
4673             ts->val_type = TEMP_VAL_MEM;
4674         }
4675         break;
4676 
4677     default:
4678         g_assert_not_reached();
4679     }
4680 
4681     /* Flush or discard output registers as needed. */
4682     for (i = 0; i < nb_oargs; i++) {
4683         TCGTemp *ts = arg_temp(op->args[i]);
4684         if (NEED_SYNC_ARG(i)) {
4685             temp_sync(s, ts, s->reserved_regs, 0, IS_DEAD_ARG(i));
4686         } else if (IS_DEAD_ARG(i)) {
4687             temp_dead(s, ts);
4688         }
4689     }
4690 }
4691 
4692 #ifdef CONFIG_PROFILER
4693 
4694 /* avoid copy/paste errors */
4695 #define PROF_ADD(to, from, field)                       \
4696     do {                                                \
4697         (to)->field += qatomic_read(&((from)->field));  \
4698     } while (0)
4699 
4700 #define PROF_MAX(to, from, field)                                       \
4701     do {                                                                \
4702         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4703         if (val__ > (to)->field) {                                      \
4704             (to)->field = val__;                                        \
4705         }                                                               \
4706     } while (0)
4707 
4708 /* Pass in a zero'ed @prof */
4709 static inline
4710 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4711 {
4712     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4713     unsigned int i;
4714 
4715     for (i = 0; i < n_ctxs; i++) {
4716         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4717         const TCGProfile *orig = &s->prof;
4718 
4719         if (counters) {
4720             PROF_ADD(prof, orig, cpu_exec_time);
4721             PROF_ADD(prof, orig, tb_count1);
4722             PROF_ADD(prof, orig, tb_count);
4723             PROF_ADD(prof, orig, op_count);
4724             PROF_MAX(prof, orig, op_count_max);
4725             PROF_ADD(prof, orig, temp_count);
4726             PROF_MAX(prof, orig, temp_count_max);
4727             PROF_ADD(prof, orig, del_op_count);
4728             PROF_ADD(prof, orig, code_in_len);
4729             PROF_ADD(prof, orig, code_out_len);
4730             PROF_ADD(prof, orig, search_out_len);
4731             PROF_ADD(prof, orig, interm_time);
4732             PROF_ADD(prof, orig, code_time);
4733             PROF_ADD(prof, orig, la_time);
4734             PROF_ADD(prof, orig, opt_time);
4735             PROF_ADD(prof, orig, restore_count);
4736             PROF_ADD(prof, orig, restore_time);
4737         }
4738         if (table) {
4739             int i;
4740 
4741             for (i = 0; i < NB_OPS; i++) {
4742                 PROF_ADD(prof, orig, table_op_count[i]);
4743             }
4744         }
4745     }
4746 }
4747 
4748 #undef PROF_ADD
4749 #undef PROF_MAX
4750 
4751 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4752 {
4753     tcg_profile_snapshot(prof, true, false);
4754 }
4755 
4756 static void tcg_profile_snapshot_table(TCGProfile *prof)
4757 {
4758     tcg_profile_snapshot(prof, false, true);
4759 }
4760 
4761 void tcg_dump_op_count(GString *buf)
4762 {
4763     TCGProfile prof = {};
4764     int i;
4765 
4766     tcg_profile_snapshot_table(&prof);
4767     for (i = 0; i < NB_OPS; i++) {
4768         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4769                                prof.table_op_count[i]);
4770     }
4771 }
4772 
4773 int64_t tcg_cpu_exec_time(void)
4774 {
4775     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4776     unsigned int i;
4777     int64_t ret = 0;
4778 
4779     for (i = 0; i < n_ctxs; i++) {
4780         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4781         const TCGProfile *prof = &s->prof;
4782 
4783         ret += qatomic_read(&prof->cpu_exec_time);
4784     }
4785     return ret;
4786 }
4787 #else
4788 void tcg_dump_op_count(GString *buf)
4789 {
4790     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4791 }
4792 
4793 int64_t tcg_cpu_exec_time(void)
4794 {
4795     error_report("%s: TCG profiler not compiled", __func__);
4796     exit(EXIT_FAILURE);
4797 }
4798 #endif
4799 
4800 
4801 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4802 {
4803 #ifdef CONFIG_PROFILER
4804     TCGProfile *prof = &s->prof;
4805 #endif
4806     int i, num_insns;
4807     TCGOp *op;
4808 
4809 #ifdef CONFIG_PROFILER
4810     {
4811         int n = 0;
4812 
4813         QTAILQ_FOREACH(op, &s->ops, link) {
4814             n++;
4815         }
4816         qatomic_set(&prof->op_count, prof->op_count + n);
4817         if (n > prof->op_count_max) {
4818             qatomic_set(&prof->op_count_max, n);
4819         }
4820 
4821         n = s->nb_temps;
4822         qatomic_set(&prof->temp_count, prof->temp_count + n);
4823         if (n > prof->temp_count_max) {
4824             qatomic_set(&prof->temp_count_max, n);
4825         }
4826     }
4827 #endif
4828 
4829 #ifdef DEBUG_DISAS
4830     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4831                  && qemu_log_in_addr_range(pc_start))) {
4832         FILE *logfile = qemu_log_trylock();
4833         if (logfile) {
4834             fprintf(logfile, "OP:\n");
4835             tcg_dump_ops(s, logfile, false);
4836             fprintf(logfile, "\n");
4837             qemu_log_unlock(logfile);
4838         }
4839     }
4840 #endif
4841 
4842 #ifdef CONFIG_DEBUG_TCG
4843     /* Ensure all labels referenced have been emitted.  */
4844     {
4845         TCGLabel *l;
4846         bool error = false;
4847 
4848         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4849             if (unlikely(!l->present) && l->refs) {
4850                 qemu_log_mask(CPU_LOG_TB_OP,
4851                               "$L%d referenced but not present.\n", l->id);
4852                 error = true;
4853             }
4854         }
4855         assert(!error);
4856     }
4857 #endif
4858 
4859 #ifdef CONFIG_PROFILER
4860     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4861 #endif
4862 
4863 #ifdef USE_TCG_OPTIMIZATIONS
4864     tcg_optimize(s);
4865 #endif
4866 
4867 #ifdef CONFIG_PROFILER
4868     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4869     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4870 #endif
4871 
4872     reachable_code_pass(s);
4873     liveness_pass_1(s);
4874 
4875     if (s->nb_indirects > 0) {
4876 #ifdef DEBUG_DISAS
4877         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4878                      && qemu_log_in_addr_range(pc_start))) {
4879             FILE *logfile = qemu_log_trylock();
4880             if (logfile) {
4881                 fprintf(logfile, "OP before indirect lowering:\n");
4882                 tcg_dump_ops(s, logfile, false);
4883                 fprintf(logfile, "\n");
4884                 qemu_log_unlock(logfile);
4885             }
4886         }
4887 #endif
4888         /* Replace indirect temps with direct temps.  */
4889         if (liveness_pass_2(s)) {
4890             /* If changes were made, re-run liveness.  */
4891             liveness_pass_1(s);
4892         }
4893     }
4894 
4895 #ifdef CONFIG_PROFILER
4896     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4897 #endif
4898 
4899 #ifdef DEBUG_DISAS
4900     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4901                  && qemu_log_in_addr_range(pc_start))) {
4902         FILE *logfile = qemu_log_trylock();
4903         if (logfile) {
4904             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4905             tcg_dump_ops(s, logfile, true);
4906             fprintf(logfile, "\n");
4907             qemu_log_unlock(logfile);
4908         }
4909     }
4910 #endif
4911 
4912     /* Initialize goto_tb jump offsets. */
4913     tb->jmp_reset_offset[0] = TB_JMP_OFFSET_INVALID;
4914     tb->jmp_reset_offset[1] = TB_JMP_OFFSET_INVALID;
4915     tb->jmp_insn_offset[0] = TB_JMP_OFFSET_INVALID;
4916     tb->jmp_insn_offset[1] = TB_JMP_OFFSET_INVALID;
4917 
4918     tcg_reg_alloc_start(s);
4919 
4920     /*
4921      * Reset the buffer pointers when restarting after overflow.
4922      * TODO: Move this into translate-all.c with the rest of the
4923      * buffer management.  Having only this done here is confusing.
4924      */
4925     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4926     s->code_ptr = s->code_buf;
4927 
4928 #ifdef TCG_TARGET_NEED_LDST_LABELS
4929     QSIMPLEQ_INIT(&s->ldst_labels);
4930 #endif
4931 #ifdef TCG_TARGET_NEED_POOL_LABELS
4932     s->pool_labels = NULL;
4933 #endif
4934 
4935     num_insns = -1;
4936     QTAILQ_FOREACH(op, &s->ops, link) {
4937         TCGOpcode opc = op->opc;
4938 
4939 #ifdef CONFIG_PROFILER
4940         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4941 #endif
4942 
4943         switch (opc) {
4944         case INDEX_op_mov_i32:
4945         case INDEX_op_mov_i64:
4946         case INDEX_op_mov_vec:
4947             tcg_reg_alloc_mov(s, op);
4948             break;
4949         case INDEX_op_dup_vec:
4950             tcg_reg_alloc_dup(s, op);
4951             break;
4952         case INDEX_op_insn_start:
4953             if (num_insns >= 0) {
4954                 size_t off = tcg_current_code_size(s);
4955                 s->gen_insn_end_off[num_insns] = off;
4956                 /* Assert that we do not overflow our stored offset.  */
4957                 assert(s->gen_insn_end_off[num_insns] == off);
4958             }
4959             num_insns++;
4960             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4961                 target_ulong a;
4962 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4963                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4964 #else
4965                 a = op->args[i];
4966 #endif
4967                 s->gen_insn_data[num_insns][i] = a;
4968             }
4969             break;
4970         case INDEX_op_discard:
4971             temp_dead(s, arg_temp(op->args[0]));
4972             break;
4973         case INDEX_op_set_label:
4974             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4975             tcg_out_label(s, arg_label(op->args[0]));
4976             break;
4977         case INDEX_op_call:
4978             tcg_reg_alloc_call(s, op);
4979             break;
4980         case INDEX_op_exit_tb:
4981             tcg_out_exit_tb(s, op->args[0]);
4982             break;
4983         case INDEX_op_goto_tb:
4984             tcg_out_goto_tb(s, op->args[0]);
4985             break;
4986         case INDEX_op_dup2_vec:
4987             if (tcg_reg_alloc_dup2(s, op)) {
4988                 break;
4989             }
4990             /* fall through */
4991         default:
4992             /* Sanity check that we've not introduced any unhandled opcodes. */
4993             tcg_debug_assert(tcg_op_supported(opc));
4994             /* Note: in order to speed up the code, it would be much
4995                faster to have specialized register allocator functions for
4996                some common argument patterns */
4997             tcg_reg_alloc_op(s, op);
4998             break;
4999         }
5000         /* Test for (pending) buffer overflow.  The assumption is that any
5001            one operation beginning below the high water mark cannot overrun
5002            the buffer completely.  Thus we can test for overflow after
5003            generating code without having to check during generation.  */
5004         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
5005             return -1;
5006         }
5007         /* Test for TB overflow, as seen by gen_insn_end_off.  */
5008         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
5009             return -2;
5010         }
5011     }
5012     tcg_debug_assert(num_insns >= 0);
5013     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
5014 
5015     /* Generate TB finalization at the end of block */
5016 #ifdef TCG_TARGET_NEED_LDST_LABELS
5017     i = tcg_out_ldst_finalize(s);
5018     if (i < 0) {
5019         return i;
5020     }
5021 #endif
5022 #ifdef TCG_TARGET_NEED_POOL_LABELS
5023     i = tcg_out_pool_finalize(s);
5024     if (i < 0) {
5025         return i;
5026     }
5027 #endif
5028     if (!tcg_resolve_relocs(s)) {
5029         return -2;
5030     }
5031 
5032 #ifndef CONFIG_TCG_INTERPRETER
5033     /* flush instruction cache */
5034     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
5035                         (uintptr_t)s->code_buf,
5036                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
5037 #endif
5038 
5039     return tcg_current_code_size(s);
5040 }
5041 
5042 #ifdef CONFIG_PROFILER
5043 void tcg_dump_info(GString *buf)
5044 {
5045     TCGProfile prof = {};
5046     const TCGProfile *s;
5047     int64_t tb_count;
5048     int64_t tb_div_count;
5049     int64_t tot;
5050 
5051     tcg_profile_snapshot_counters(&prof);
5052     s = &prof;
5053     tb_count = s->tb_count;
5054     tb_div_count = tb_count ? tb_count : 1;
5055     tot = s->interm_time + s->code_time;
5056 
5057     g_string_append_printf(buf, "JIT cycles          %" PRId64
5058                            " (%0.3f s at 2.4 GHz)\n",
5059                            tot, tot / 2.4e9);
5060     g_string_append_printf(buf, "translated TBs      %" PRId64
5061                            " (aborted=%" PRId64 " %0.1f%%)\n",
5062                            tb_count, s->tb_count1 - tb_count,
5063                            (double)(s->tb_count1 - s->tb_count)
5064                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
5065     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
5066                            (double)s->op_count / tb_div_count, s->op_count_max);
5067     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
5068                            (double)s->del_op_count / tb_div_count);
5069     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
5070                            (double)s->temp_count / tb_div_count,
5071                            s->temp_count_max);
5072     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
5073                            (double)s->code_out_len / tb_div_count);
5074     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
5075                            (double)s->search_out_len / tb_div_count);
5076 
5077     g_string_append_printf(buf, "cycles/op           %0.1f\n",
5078                            s->op_count ? (double)tot / s->op_count : 0);
5079     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
5080                            s->code_in_len ? (double)tot / s->code_in_len : 0);
5081     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
5082                            s->code_out_len ? (double)tot / s->code_out_len : 0);
5083     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
5084                            s->search_out_len ?
5085                            (double)tot / s->search_out_len : 0);
5086     if (tot == 0) {
5087         tot = 1;
5088     }
5089     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
5090                            (double)s->interm_time / tot * 100.0);
5091     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
5092                            (double)s->code_time / tot * 100.0);
5093     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
5094                            (double)s->opt_time / (s->code_time ?
5095                                                   s->code_time : 1)
5096                            * 100.0);
5097     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
5098                            (double)s->la_time / (s->code_time ?
5099                                                  s->code_time : 1) * 100.0);
5100     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
5101                            s->restore_count);
5102     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
5103                            s->restore_count ?
5104                            (double)s->restore_time / s->restore_count : 0);
5105 }
5106 #else
5107 void tcg_dump_info(GString *buf)
5108 {
5109     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
5110 }
5111 #endif
5112 
5113 #ifdef ELF_HOST_MACHINE
5114 /* In order to use this feature, the backend needs to do three things:
5115 
5116    (1) Define ELF_HOST_MACHINE to indicate both what value to
5117        put into the ELF image and to indicate support for the feature.
5118 
5119    (2) Define tcg_register_jit.  This should create a buffer containing
5120        the contents of a .debug_frame section that describes the post-
5121        prologue unwind info for the tcg machine.
5122 
5123    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
5124 */
5125 
5126 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
5127 typedef enum {
5128     JIT_NOACTION = 0,
5129     JIT_REGISTER_FN,
5130     JIT_UNREGISTER_FN
5131 } jit_actions_t;
5132 
5133 struct jit_code_entry {
5134     struct jit_code_entry *next_entry;
5135     struct jit_code_entry *prev_entry;
5136     const void *symfile_addr;
5137     uint64_t symfile_size;
5138 };
5139 
5140 struct jit_descriptor {
5141     uint32_t version;
5142     uint32_t action_flag;
5143     struct jit_code_entry *relevant_entry;
5144     struct jit_code_entry *first_entry;
5145 };
5146 
5147 void __jit_debug_register_code(void) __attribute__((noinline));
5148 void __jit_debug_register_code(void)
5149 {
5150     asm("");
5151 }
5152 
5153 /* Must statically initialize the version, because GDB may check
5154    the version before we can set it.  */
5155 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
5156 
5157 /* End GDB interface.  */
5158 
5159 static int find_string(const char *strtab, const char *str)
5160 {
5161     const char *p = strtab + 1;
5162 
5163     while (1) {
5164         if (strcmp(p, str) == 0) {
5165             return p - strtab;
5166         }
5167         p += strlen(p) + 1;
5168     }
5169 }
5170 
5171 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
5172                                  const void *debug_frame,
5173                                  size_t debug_frame_size)
5174 {
5175     struct __attribute__((packed)) DebugInfo {
5176         uint32_t  len;
5177         uint16_t  version;
5178         uint32_t  abbrev;
5179         uint8_t   ptr_size;
5180         uint8_t   cu_die;
5181         uint16_t  cu_lang;
5182         uintptr_t cu_low_pc;
5183         uintptr_t cu_high_pc;
5184         uint8_t   fn_die;
5185         char      fn_name[16];
5186         uintptr_t fn_low_pc;
5187         uintptr_t fn_high_pc;
5188         uint8_t   cu_eoc;
5189     };
5190 
5191     struct ElfImage {
5192         ElfW(Ehdr) ehdr;
5193         ElfW(Phdr) phdr;
5194         ElfW(Shdr) shdr[7];
5195         ElfW(Sym)  sym[2];
5196         struct DebugInfo di;
5197         uint8_t    da[24];
5198         char       str[80];
5199     };
5200 
5201     struct ElfImage *img;
5202 
5203     static const struct ElfImage img_template = {
5204         .ehdr = {
5205             .e_ident[EI_MAG0] = ELFMAG0,
5206             .e_ident[EI_MAG1] = ELFMAG1,
5207             .e_ident[EI_MAG2] = ELFMAG2,
5208             .e_ident[EI_MAG3] = ELFMAG3,
5209             .e_ident[EI_CLASS] = ELF_CLASS,
5210             .e_ident[EI_DATA] = ELF_DATA,
5211             .e_ident[EI_VERSION] = EV_CURRENT,
5212             .e_type = ET_EXEC,
5213             .e_machine = ELF_HOST_MACHINE,
5214             .e_version = EV_CURRENT,
5215             .e_phoff = offsetof(struct ElfImage, phdr),
5216             .e_shoff = offsetof(struct ElfImage, shdr),
5217             .e_ehsize = sizeof(ElfW(Shdr)),
5218             .e_phentsize = sizeof(ElfW(Phdr)),
5219             .e_phnum = 1,
5220             .e_shentsize = sizeof(ElfW(Shdr)),
5221             .e_shnum = ARRAY_SIZE(img->shdr),
5222             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
5223 #ifdef ELF_HOST_FLAGS
5224             .e_flags = ELF_HOST_FLAGS,
5225 #endif
5226 #ifdef ELF_OSABI
5227             .e_ident[EI_OSABI] = ELF_OSABI,
5228 #endif
5229         },
5230         .phdr = {
5231             .p_type = PT_LOAD,
5232             .p_flags = PF_X,
5233         },
5234         .shdr = {
5235             [0] = { .sh_type = SHT_NULL },
5236             /* Trick: The contents of code_gen_buffer are not present in
5237                this fake ELF file; that got allocated elsewhere.  Therefore
5238                we mark .text as SHT_NOBITS (similar to .bss) so that readers
5239                will not look for contents.  We can record any address.  */
5240             [1] = { /* .text */
5241                 .sh_type = SHT_NOBITS,
5242                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
5243             },
5244             [2] = { /* .debug_info */
5245                 .sh_type = SHT_PROGBITS,
5246                 .sh_offset = offsetof(struct ElfImage, di),
5247                 .sh_size = sizeof(struct DebugInfo),
5248             },
5249             [3] = { /* .debug_abbrev */
5250                 .sh_type = SHT_PROGBITS,
5251                 .sh_offset = offsetof(struct ElfImage, da),
5252                 .sh_size = sizeof(img->da),
5253             },
5254             [4] = { /* .debug_frame */
5255                 .sh_type = SHT_PROGBITS,
5256                 .sh_offset = sizeof(struct ElfImage),
5257             },
5258             [5] = { /* .symtab */
5259                 .sh_type = SHT_SYMTAB,
5260                 .sh_offset = offsetof(struct ElfImage, sym),
5261                 .sh_size = sizeof(img->sym),
5262                 .sh_info = 1,
5263                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5264                 .sh_entsize = sizeof(ElfW(Sym)),
5265             },
5266             [6] = { /* .strtab */
5267                 .sh_type = SHT_STRTAB,
5268                 .sh_offset = offsetof(struct ElfImage, str),
5269                 .sh_size = sizeof(img->str),
5270             }
5271         },
5272         .sym = {
5273             [1] = { /* code_gen_buffer */
5274                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5275                 .st_shndx = 1,
5276             }
5277         },
5278         .di = {
5279             .len = sizeof(struct DebugInfo) - 4,
5280             .version = 2,
5281             .ptr_size = sizeof(void *),
5282             .cu_die = 1,
5283             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5284             .fn_die = 2,
5285             .fn_name = "code_gen_buffer"
5286         },
5287         .da = {
5288             1,          /* abbrev number (the cu) */
5289             0x11, 1,    /* DW_TAG_compile_unit, has children */
5290             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5291             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5292             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5293             0, 0,       /* end of abbrev */
5294             2,          /* abbrev number (the fn) */
5295             0x2e, 0,    /* DW_TAG_subprogram, no children */
5296             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5297             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5298             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5299             0, 0,       /* end of abbrev */
5300             0           /* no more abbrev */
5301         },
5302         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5303                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5304     };
5305 
5306     /* We only need a single jit entry; statically allocate it.  */
5307     static struct jit_code_entry one_entry;
5308 
5309     uintptr_t buf = (uintptr_t)buf_ptr;
5310     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5311     DebugFrameHeader *dfh;
5312 
5313     img = g_malloc(img_size);
5314     *img = img_template;
5315 
5316     img->phdr.p_vaddr = buf;
5317     img->phdr.p_paddr = buf;
5318     img->phdr.p_memsz = buf_size;
5319 
5320     img->shdr[1].sh_name = find_string(img->str, ".text");
5321     img->shdr[1].sh_addr = buf;
5322     img->shdr[1].sh_size = buf_size;
5323 
5324     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5325     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5326 
5327     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5328     img->shdr[4].sh_size = debug_frame_size;
5329 
5330     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5331     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5332 
5333     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5334     img->sym[1].st_value = buf;
5335     img->sym[1].st_size = buf_size;
5336 
5337     img->di.cu_low_pc = buf;
5338     img->di.cu_high_pc = buf + buf_size;
5339     img->di.fn_low_pc = buf;
5340     img->di.fn_high_pc = buf + buf_size;
5341 
5342     dfh = (DebugFrameHeader *)(img + 1);
5343     memcpy(dfh, debug_frame, debug_frame_size);
5344     dfh->fde.func_start = buf;
5345     dfh->fde.func_len = buf_size;
5346 
5347 #ifdef DEBUG_JIT
5348     /* Enable this block to be able to debug the ELF image file creation.
5349        One can use readelf, objdump, or other inspection utilities.  */
5350     {
5351         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
5352         FILE *f = fopen(jit, "w+b");
5353         if (f) {
5354             if (fwrite(img, img_size, 1, f) != img_size) {
5355                 /* Avoid stupid unused return value warning for fwrite.  */
5356             }
5357             fclose(f);
5358         }
5359     }
5360 #endif
5361 
5362     one_entry.symfile_addr = img;
5363     one_entry.symfile_size = img_size;
5364 
5365     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5366     __jit_debug_descriptor.relevant_entry = &one_entry;
5367     __jit_debug_descriptor.first_entry = &one_entry;
5368     __jit_debug_register_code();
5369 }
5370 #else
5371 /* No support for the feature.  Provide the entry point expected by exec.c,
5372    and implement the internal function we declared earlier.  */
5373 
5374 static void tcg_register_jit_int(const void *buf, size_t size,
5375                                  const void *debug_frame,
5376                                  size_t debug_frame_size)
5377 {
5378 }
5379 
5380 void tcg_register_jit(const void *buf, size_t buf_size)
5381 {
5382 }
5383 #endif /* ELF_HOST_MACHINE */
5384 
5385 #if !TCG_TARGET_MAYBE_vec
5386 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5387 {
5388     g_assert_not_reached();
5389 }
5390 #endif
5391