xref: /qemu/tcg/tcg.c (revision 727385c4)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 
45 #include "exec/exec-all.h"
46 #include "tcg/tcg-op.h"
47 
48 #if UINTPTR_MAX == UINT32_MAX
49 # define ELF_CLASS  ELFCLASS32
50 #else
51 # define ELF_CLASS  ELFCLASS64
52 #endif
53 #ifdef HOST_WORDS_BIGENDIAN
54 # define ELF_DATA   ELFDATA2MSB
55 #else
56 # define ELF_DATA   ELFDATA2LSB
57 #endif
58 
59 #include "elf.h"
60 #include "exec/log.h"
61 #include "tcg/tcg-ldst.h"
62 #include "tcg-internal.h"
63 
64 #ifdef CONFIG_TCG_INTERPRETER
65 #include <ffi.h>
66 #endif
67 
68 /* Forward declarations for functions declared in tcg-target.c.inc and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static void tcg_target_qemu_prologue(TCGContext *s);
72 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
73                         intptr_t value, intptr_t addend);
74 
75 /* The CIE and FDE header definitions will be common to all hosts.  */
76 typedef struct {
77     uint32_t len __attribute__((aligned((sizeof(void *)))));
78     uint32_t id;
79     uint8_t version;
80     char augmentation[1];
81     uint8_t code_align;
82     uint8_t data_align;
83     uint8_t return_column;
84 } DebugFrameCIE;
85 
86 typedef struct QEMU_PACKED {
87     uint32_t len __attribute__((aligned((sizeof(void *)))));
88     uint32_t cie_offset;
89     uintptr_t func_start;
90     uintptr_t func_len;
91 } DebugFrameFDEHeader;
92 
93 typedef struct QEMU_PACKED {
94     DebugFrameCIE cie;
95     DebugFrameFDEHeader fde;
96 } DebugFrameHeader;
97 
98 static void tcg_register_jit_int(const void *buf, size_t size,
99                                  const void *debug_frame,
100                                  size_t debug_frame_size)
101     __attribute__((unused));
102 
103 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
104 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
105                        intptr_t arg2);
106 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
107 static void tcg_out_movi(TCGContext *s, TCGType type,
108                          TCGReg ret, tcg_target_long arg);
109 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
110                        const TCGArg args[TCG_MAX_OP_ARGS],
111                        const int const_args[TCG_MAX_OP_ARGS]);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
120                            unsigned vecl, unsigned vece,
121                            const TCGArg args[TCG_MAX_OP_ARGS],
122                            const int const_args[TCG_MAX_OP_ARGS]);
123 #else
124 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
125                                    TCGReg dst, TCGReg src)
126 {
127     g_assert_not_reached();
128 }
129 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
130                                     TCGReg dst, TCGReg base, intptr_t offset)
131 {
132     g_assert_not_reached();
133 }
134 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
135                                     TCGReg dst, int64_t arg)
136 {
137     g_assert_not_reached();
138 }
139 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
140                                   unsigned vecl, unsigned vece,
141                                   const TCGArg args[TCG_MAX_OP_ARGS],
142                                   const int const_args[TCG_MAX_OP_ARGS])
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 #ifdef CONFIG_TCG_INTERPRETER
152 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
153                          ffi_cif *cif);
154 #else
155 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
156 #endif
157 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
158 #ifdef TCG_TARGET_NEED_LDST_LABELS
159 static int tcg_out_ldst_finalize(TCGContext *s);
160 #endif
161 
162 TCGContext tcg_init_ctx;
163 __thread TCGContext *tcg_ctx;
164 
165 TCGContext **tcg_ctxs;
166 unsigned int tcg_cur_ctxs;
167 unsigned int tcg_max_ctxs;
168 TCGv_env cpu_env = 0;
169 const void *tcg_code_gen_epilogue;
170 uintptr_t tcg_splitwx_diff;
171 
172 #ifndef CONFIG_TCG_INTERPRETER
173 tcg_prologue_fn *tcg_qemu_tb_exec;
174 #endif
175 
176 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
177 static TCGRegSet tcg_target_call_clobber_regs;
178 
179 #if TCG_TARGET_INSN_UNIT_SIZE == 1
180 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
181 {
182     *s->code_ptr++ = v;
183 }
184 
185 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
186                                                       uint8_t v)
187 {
188     *p = v;
189 }
190 #endif
191 
192 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
193 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
194 {
195     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
196         *s->code_ptr++ = v;
197     } else {
198         tcg_insn_unit *p = s->code_ptr;
199         memcpy(p, &v, sizeof(v));
200         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
201     }
202 }
203 
204 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
205                                                        uint16_t v)
206 {
207     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
208         *p = v;
209     } else {
210         memcpy(p, &v, sizeof(v));
211     }
212 }
213 #endif
214 
215 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
216 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
217 {
218     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
219         *s->code_ptr++ = v;
220     } else {
221         tcg_insn_unit *p = s->code_ptr;
222         memcpy(p, &v, sizeof(v));
223         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
224     }
225 }
226 
227 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
228                                                        uint32_t v)
229 {
230     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
231         *p = v;
232     } else {
233         memcpy(p, &v, sizeof(v));
234     }
235 }
236 #endif
237 
238 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
239 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
240 {
241     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
242         *s->code_ptr++ = v;
243     } else {
244         tcg_insn_unit *p = s->code_ptr;
245         memcpy(p, &v, sizeof(v));
246         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
247     }
248 }
249 
250 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
251                                                        uint64_t v)
252 {
253     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
254         *p = v;
255     } else {
256         memcpy(p, &v, sizeof(v));
257     }
258 }
259 #endif
260 
261 /* label relocation processing */
262 
263 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
264                           TCGLabel *l, intptr_t addend)
265 {
266     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
267 
268     r->type = type;
269     r->ptr = code_ptr;
270     r->addend = addend;
271     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
272 }
273 
274 static void tcg_out_label(TCGContext *s, TCGLabel *l)
275 {
276     tcg_debug_assert(!l->has_value);
277     l->has_value = 1;
278     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
279 }
280 
281 TCGLabel *gen_new_label(void)
282 {
283     TCGContext *s = tcg_ctx;
284     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
285 
286     memset(l, 0, sizeof(TCGLabel));
287     l->id = s->nb_labels++;
288     QSIMPLEQ_INIT(&l->relocs);
289 
290     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
291 
292     return l;
293 }
294 
295 static bool tcg_resolve_relocs(TCGContext *s)
296 {
297     TCGLabel *l;
298 
299     QSIMPLEQ_FOREACH(l, &s->labels, next) {
300         TCGRelocation *r;
301         uintptr_t value = l->u.value;
302 
303         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
304             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
305                 return false;
306             }
307         }
308     }
309     return true;
310 }
311 
312 static void set_jmp_reset_offset(TCGContext *s, int which)
313 {
314     /*
315      * We will check for overflow at the end of the opcode loop in
316      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
317      */
318     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
319 }
320 
321 /* Signal overflow, starting over with fewer guest insns. */
322 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
323 {
324     siglongjmp(s->jmp_trans, -2);
325 }
326 
327 #define C_PFX1(P, A)                    P##A
328 #define C_PFX2(P, A, B)                 P##A##_##B
329 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
330 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
331 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
332 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
333 
334 /* Define an enumeration for the various combinations. */
335 
336 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
337 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
338 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
339 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
340 
341 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
342 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
343 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
344 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
345 
346 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
347 
348 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
349 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
350 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
351 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
352 
353 typedef enum {
354 #include "tcg-target-con-set.h"
355 } TCGConstraintSetIndex;
356 
357 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
358 
359 #undef C_O0_I1
360 #undef C_O0_I2
361 #undef C_O0_I3
362 #undef C_O0_I4
363 #undef C_O1_I1
364 #undef C_O1_I2
365 #undef C_O1_I3
366 #undef C_O1_I4
367 #undef C_N1_I2
368 #undef C_O2_I1
369 #undef C_O2_I2
370 #undef C_O2_I3
371 #undef C_O2_I4
372 
373 /* Put all of the constraint sets into an array, indexed by the enum. */
374 
375 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
376 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
377 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
378 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
379 
380 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
381 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
382 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
383 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
384 
385 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
386 
387 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
388 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
389 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
390 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
391 
392 static const TCGTargetOpDef constraint_sets[] = {
393 #include "tcg-target-con-set.h"
394 };
395 
396 
397 #undef C_O0_I1
398 #undef C_O0_I2
399 #undef C_O0_I3
400 #undef C_O0_I4
401 #undef C_O1_I1
402 #undef C_O1_I2
403 #undef C_O1_I3
404 #undef C_O1_I4
405 #undef C_N1_I2
406 #undef C_O2_I1
407 #undef C_O2_I2
408 #undef C_O2_I3
409 #undef C_O2_I4
410 
411 /* Expand the enumerator to be returned from tcg_target_op_def(). */
412 
413 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
414 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
415 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
416 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
417 
418 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
419 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
420 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
421 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
422 
423 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
424 
425 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
426 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
427 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
428 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
429 
430 #include "tcg-target.c.inc"
431 
432 static void alloc_tcg_plugin_context(TCGContext *s)
433 {
434 #ifdef CONFIG_PLUGIN
435     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
436     s->plugin_tb->insns =
437         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
438 #endif
439 }
440 
441 /*
442  * All TCG threads except the parent (i.e. the one that called tcg_context_init
443  * and registered the target's TCG globals) must register with this function
444  * before initiating translation.
445  *
446  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
447  * of tcg_region_init() for the reasoning behind this.
448  *
449  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
450  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
451  * is not used anymore for translation once this function is called.
452  *
453  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
454  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
455  */
456 #ifdef CONFIG_USER_ONLY
457 void tcg_register_thread(void)
458 {
459     tcg_ctx = &tcg_init_ctx;
460 }
461 #else
462 void tcg_register_thread(void)
463 {
464     TCGContext *s = g_malloc(sizeof(*s));
465     unsigned int i, n;
466 
467     *s = tcg_init_ctx;
468 
469     /* Relink mem_base.  */
470     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
471         if (tcg_init_ctx.temps[i].mem_base) {
472             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
473             tcg_debug_assert(b >= 0 && b < n);
474             s->temps[i].mem_base = &s->temps[b];
475         }
476     }
477 
478     /* Claim an entry in tcg_ctxs */
479     n = qatomic_fetch_inc(&tcg_cur_ctxs);
480     g_assert(n < tcg_max_ctxs);
481     qatomic_set(&tcg_ctxs[n], s);
482 
483     if (n > 0) {
484         alloc_tcg_plugin_context(s);
485         tcg_region_initial_alloc(s);
486     }
487 
488     tcg_ctx = s;
489 }
490 #endif /* !CONFIG_USER_ONLY */
491 
492 /* pool based memory allocation */
493 void *tcg_malloc_internal(TCGContext *s, int size)
494 {
495     TCGPool *p;
496     int pool_size;
497 
498     if (size > TCG_POOL_CHUNK_SIZE) {
499         /* big malloc: insert a new pool (XXX: could optimize) */
500         p = g_malloc(sizeof(TCGPool) + size);
501         p->size = size;
502         p->next = s->pool_first_large;
503         s->pool_first_large = p;
504         return p->data;
505     } else {
506         p = s->pool_current;
507         if (!p) {
508             p = s->pool_first;
509             if (!p)
510                 goto new_pool;
511         } else {
512             if (!p->next) {
513             new_pool:
514                 pool_size = TCG_POOL_CHUNK_SIZE;
515                 p = g_malloc(sizeof(TCGPool) + pool_size);
516                 p->size = pool_size;
517                 p->next = NULL;
518                 if (s->pool_current)
519                     s->pool_current->next = p;
520                 else
521                     s->pool_first = p;
522             } else {
523                 p = p->next;
524             }
525         }
526     }
527     s->pool_current = p;
528     s->pool_cur = p->data + size;
529     s->pool_end = p->data + p->size;
530     return p->data;
531 }
532 
533 void tcg_pool_reset(TCGContext *s)
534 {
535     TCGPool *p, *t;
536     for (p = s->pool_first_large; p; p = t) {
537         t = p->next;
538         g_free(p);
539     }
540     s->pool_first_large = NULL;
541     s->pool_cur = s->pool_end = NULL;
542     s->pool_current = NULL;
543 }
544 
545 #include "exec/helper-proto.h"
546 
547 static const TCGHelperInfo all_helpers[] = {
548 #include "exec/helper-tcg.h"
549 };
550 static GHashTable *helper_table;
551 
552 #ifdef CONFIG_TCG_INTERPRETER
553 static GHashTable *ffi_table;
554 
555 static ffi_type * const typecode_to_ffi[8] = {
556     [dh_typecode_void] = &ffi_type_void,
557     [dh_typecode_i32]  = &ffi_type_uint32,
558     [dh_typecode_s32]  = &ffi_type_sint32,
559     [dh_typecode_i64]  = &ffi_type_uint64,
560     [dh_typecode_s64]  = &ffi_type_sint64,
561     [dh_typecode_ptr]  = &ffi_type_pointer,
562 };
563 #endif
564 
565 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
566 static void process_op_defs(TCGContext *s);
567 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
568                                             TCGReg reg, const char *name);
569 
570 static void tcg_context_init(unsigned max_cpus)
571 {
572     TCGContext *s = &tcg_init_ctx;
573     int op, total_args, n, i;
574     TCGOpDef *def;
575     TCGArgConstraint *args_ct;
576     TCGTemp *ts;
577 
578     memset(s, 0, sizeof(*s));
579     s->nb_globals = 0;
580 
581     /* Count total number of arguments and allocate the corresponding
582        space */
583     total_args = 0;
584     for(op = 0; op < NB_OPS; op++) {
585         def = &tcg_op_defs[op];
586         n = def->nb_iargs + def->nb_oargs;
587         total_args += n;
588     }
589 
590     args_ct = g_new0(TCGArgConstraint, total_args);
591 
592     for(op = 0; op < NB_OPS; op++) {
593         def = &tcg_op_defs[op];
594         def->args_ct = args_ct;
595         n = def->nb_iargs + def->nb_oargs;
596         args_ct += n;
597     }
598 
599     /* Register helpers.  */
600     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
601     helper_table = g_hash_table_new(NULL, NULL);
602 
603     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
604         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
605                             (gpointer)&all_helpers[i]);
606     }
607 
608 #ifdef CONFIG_TCG_INTERPRETER
609     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
610     ffi_table = g_hash_table_new(NULL, NULL);
611     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
612         struct {
613             ffi_cif cif;
614             ffi_type *args[];
615         } *ca;
616         uint32_t typemask = all_helpers[i].typemask;
617         gpointer hash = (gpointer)(uintptr_t)typemask;
618         ffi_status status;
619         int nargs;
620 
621         if (g_hash_table_lookup(ffi_table, hash)) {
622             continue;
623         }
624 
625         /* Ignoring the return type, find the last non-zero field. */
626         nargs = 32 - clz32(typemask >> 3);
627         nargs = DIV_ROUND_UP(nargs, 3);
628 
629         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
630         ca->cif.rtype = typecode_to_ffi[typemask & 7];
631         ca->cif.nargs = nargs;
632 
633         if (nargs != 0) {
634             ca->cif.arg_types = ca->args;
635             for (i = 0; i < nargs; ++i) {
636                 int typecode = extract32(typemask, (i + 1) * 3, 3);
637                 ca->args[i] = typecode_to_ffi[typecode];
638             }
639         }
640 
641         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
642                               ca->cif.rtype, ca->cif.arg_types);
643         assert(status == FFI_OK);
644 
645         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
646     }
647 #endif
648 
649     tcg_target_init(s);
650     process_op_defs(s);
651 
652     /* Reverse the order of the saved registers, assuming they're all at
653        the start of tcg_target_reg_alloc_order.  */
654     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
655         int r = tcg_target_reg_alloc_order[n];
656         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
657             break;
658         }
659     }
660     for (i = 0; i < n; ++i) {
661         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
662     }
663     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
664         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
665     }
666 
667     alloc_tcg_plugin_context(s);
668 
669     tcg_ctx = s;
670     /*
671      * In user-mode we simply share the init context among threads, since we
672      * use a single region. See the documentation tcg_region_init() for the
673      * reasoning behind this.
674      * In softmmu we will have at most max_cpus TCG threads.
675      */
676 #ifdef CONFIG_USER_ONLY
677     tcg_ctxs = &tcg_ctx;
678     tcg_cur_ctxs = 1;
679     tcg_max_ctxs = 1;
680 #else
681     tcg_max_ctxs = max_cpus;
682     tcg_ctxs = g_new0(TCGContext *, max_cpus);
683 #endif
684 
685     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
686     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
687     cpu_env = temp_tcgv_ptr(ts);
688 }
689 
690 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
691 {
692     tcg_context_init(max_cpus);
693     tcg_region_init(tb_size, splitwx, max_cpus);
694 }
695 
696 /*
697  * Allocate TBs right before their corresponding translated code, making
698  * sure that TBs and code are on different cache lines.
699  */
700 TranslationBlock *tcg_tb_alloc(TCGContext *s)
701 {
702     uintptr_t align = qemu_icache_linesize;
703     TranslationBlock *tb;
704     void *next;
705 
706  retry:
707     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
708     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
709 
710     if (unlikely(next > s->code_gen_highwater)) {
711         if (tcg_region_alloc(s)) {
712             return NULL;
713         }
714         goto retry;
715     }
716     qatomic_set(&s->code_gen_ptr, next);
717     s->data_gen_ptr = NULL;
718     return tb;
719 }
720 
721 void tcg_prologue_init(TCGContext *s)
722 {
723     size_t prologue_size;
724 
725     s->code_ptr = s->code_gen_ptr;
726     s->code_buf = s->code_gen_ptr;
727     s->data_gen_ptr = NULL;
728 
729 #ifndef CONFIG_TCG_INTERPRETER
730     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
731 #endif
732 
733 #ifdef TCG_TARGET_NEED_POOL_LABELS
734     s->pool_labels = NULL;
735 #endif
736 
737     qemu_thread_jit_write();
738     /* Generate the prologue.  */
739     tcg_target_qemu_prologue(s);
740 
741 #ifdef TCG_TARGET_NEED_POOL_LABELS
742     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
743     {
744         int result = tcg_out_pool_finalize(s);
745         tcg_debug_assert(result == 0);
746     }
747 #endif
748 
749     prologue_size = tcg_current_code_size(s);
750 
751 #ifndef CONFIG_TCG_INTERPRETER
752     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
753                         (uintptr_t)s->code_buf, prologue_size);
754 #endif
755 
756 #ifdef DEBUG_DISAS
757     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
758         FILE *logfile = qemu_log_lock();
759         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
760         if (s->data_gen_ptr) {
761             size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
762             size_t data_size = prologue_size - code_size;
763             size_t i;
764 
765             log_disas(s->code_gen_ptr, code_size);
766 
767             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
768                 if (sizeof(tcg_target_ulong) == 8) {
769                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
770                              (uintptr_t)s->data_gen_ptr + i,
771                              *(uint64_t *)(s->data_gen_ptr + i));
772                 } else {
773                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
774                              (uintptr_t)s->data_gen_ptr + i,
775                              *(uint32_t *)(s->data_gen_ptr + i));
776                 }
777             }
778         } else {
779             log_disas(s->code_gen_ptr, prologue_size);
780         }
781         qemu_log("\n");
782         qemu_log_flush();
783         qemu_log_unlock(logfile);
784     }
785 #endif
786 
787 #ifndef CONFIG_TCG_INTERPRETER
788     /*
789      * Assert that goto_ptr is implemented completely, setting an epilogue.
790      * For tci, we use NULL as the signal to return from the interpreter,
791      * so skip this check.
792      */
793     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
794 #endif
795 
796     tcg_region_prologue_set(s);
797 }
798 
799 void tcg_func_start(TCGContext *s)
800 {
801     tcg_pool_reset(s);
802     s->nb_temps = s->nb_globals;
803 
804     /* No temps have been previously allocated for size or locality.  */
805     memset(s->free_temps, 0, sizeof(s->free_temps));
806 
807     /* No constant temps have been previously allocated. */
808     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
809         if (s->const_table[i]) {
810             g_hash_table_remove_all(s->const_table[i]);
811         }
812     }
813 
814     s->nb_ops = 0;
815     s->nb_labels = 0;
816     s->current_frame_offset = s->frame_start;
817 
818 #ifdef CONFIG_DEBUG_TCG
819     s->goto_tb_issue_mask = 0;
820 #endif
821 
822     QTAILQ_INIT(&s->ops);
823     QTAILQ_INIT(&s->free_ops);
824     QSIMPLEQ_INIT(&s->labels);
825 }
826 
827 static TCGTemp *tcg_temp_alloc(TCGContext *s)
828 {
829     int n = s->nb_temps++;
830 
831     if (n >= TCG_MAX_TEMPS) {
832         tcg_raise_tb_overflow(s);
833     }
834     return memset(&s->temps[n], 0, sizeof(TCGTemp));
835 }
836 
837 static TCGTemp *tcg_global_alloc(TCGContext *s)
838 {
839     TCGTemp *ts;
840 
841     tcg_debug_assert(s->nb_globals == s->nb_temps);
842     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
843     s->nb_globals++;
844     ts = tcg_temp_alloc(s);
845     ts->kind = TEMP_GLOBAL;
846 
847     return ts;
848 }
849 
850 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
851                                             TCGReg reg, const char *name)
852 {
853     TCGTemp *ts;
854 
855     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
856         tcg_abort();
857     }
858 
859     ts = tcg_global_alloc(s);
860     ts->base_type = type;
861     ts->type = type;
862     ts->kind = TEMP_FIXED;
863     ts->reg = reg;
864     ts->name = name;
865     tcg_regset_set_reg(s->reserved_regs, reg);
866 
867     return ts;
868 }
869 
870 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
871 {
872     s->frame_start = start;
873     s->frame_end = start + size;
874     s->frame_temp
875         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
876 }
877 
878 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
879                                      intptr_t offset, const char *name)
880 {
881     TCGContext *s = tcg_ctx;
882     TCGTemp *base_ts = tcgv_ptr_temp(base);
883     TCGTemp *ts = tcg_global_alloc(s);
884     int indirect_reg = 0, bigendian = 0;
885 #ifdef HOST_WORDS_BIGENDIAN
886     bigendian = 1;
887 #endif
888 
889     switch (base_ts->kind) {
890     case TEMP_FIXED:
891         break;
892     case TEMP_GLOBAL:
893         /* We do not support double-indirect registers.  */
894         tcg_debug_assert(!base_ts->indirect_reg);
895         base_ts->indirect_base = 1;
896         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
897                             ? 2 : 1);
898         indirect_reg = 1;
899         break;
900     default:
901         g_assert_not_reached();
902     }
903 
904     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
905         TCGTemp *ts2 = tcg_global_alloc(s);
906         char buf[64];
907 
908         ts->base_type = TCG_TYPE_I64;
909         ts->type = TCG_TYPE_I32;
910         ts->indirect_reg = indirect_reg;
911         ts->mem_allocated = 1;
912         ts->mem_base = base_ts;
913         ts->mem_offset = offset + bigendian * 4;
914         pstrcpy(buf, sizeof(buf), name);
915         pstrcat(buf, sizeof(buf), "_0");
916         ts->name = strdup(buf);
917 
918         tcg_debug_assert(ts2 == ts + 1);
919         ts2->base_type = TCG_TYPE_I64;
920         ts2->type = TCG_TYPE_I32;
921         ts2->indirect_reg = indirect_reg;
922         ts2->mem_allocated = 1;
923         ts2->mem_base = base_ts;
924         ts2->mem_offset = offset + (1 - bigendian) * 4;
925         pstrcpy(buf, sizeof(buf), name);
926         pstrcat(buf, sizeof(buf), "_1");
927         ts2->name = strdup(buf);
928     } else {
929         ts->base_type = type;
930         ts->type = type;
931         ts->indirect_reg = indirect_reg;
932         ts->mem_allocated = 1;
933         ts->mem_base = base_ts;
934         ts->mem_offset = offset;
935         ts->name = name;
936     }
937     return ts;
938 }
939 
940 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
941 {
942     TCGContext *s = tcg_ctx;
943     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
944     TCGTemp *ts;
945     int idx, k;
946 
947     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
948     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
949     if (idx < TCG_MAX_TEMPS) {
950         /* There is already an available temp with the right type.  */
951         clear_bit(idx, s->free_temps[k].l);
952 
953         ts = &s->temps[idx];
954         ts->temp_allocated = 1;
955         tcg_debug_assert(ts->base_type == type);
956         tcg_debug_assert(ts->kind == kind);
957     } else {
958         ts = tcg_temp_alloc(s);
959         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
960             TCGTemp *ts2 = tcg_temp_alloc(s);
961 
962             ts->base_type = type;
963             ts->type = TCG_TYPE_I32;
964             ts->temp_allocated = 1;
965             ts->kind = kind;
966 
967             tcg_debug_assert(ts2 == ts + 1);
968             ts2->base_type = TCG_TYPE_I64;
969             ts2->type = TCG_TYPE_I32;
970             ts2->temp_allocated = 1;
971             ts2->kind = kind;
972         } else {
973             ts->base_type = type;
974             ts->type = type;
975             ts->temp_allocated = 1;
976             ts->kind = kind;
977         }
978     }
979 
980 #if defined(CONFIG_DEBUG_TCG)
981     s->temps_in_use++;
982 #endif
983     return ts;
984 }
985 
986 TCGv_vec tcg_temp_new_vec(TCGType type)
987 {
988     TCGTemp *t;
989 
990 #ifdef CONFIG_DEBUG_TCG
991     switch (type) {
992     case TCG_TYPE_V64:
993         assert(TCG_TARGET_HAS_v64);
994         break;
995     case TCG_TYPE_V128:
996         assert(TCG_TARGET_HAS_v128);
997         break;
998     case TCG_TYPE_V256:
999         assert(TCG_TARGET_HAS_v256);
1000         break;
1001     default:
1002         g_assert_not_reached();
1003     }
1004 #endif
1005 
1006     t = tcg_temp_new_internal(type, 0);
1007     return temp_tcgv_vec(t);
1008 }
1009 
1010 /* Create a new temp of the same type as an existing temp.  */
1011 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1012 {
1013     TCGTemp *t = tcgv_vec_temp(match);
1014 
1015     tcg_debug_assert(t->temp_allocated != 0);
1016 
1017     t = tcg_temp_new_internal(t->base_type, 0);
1018     return temp_tcgv_vec(t);
1019 }
1020 
1021 void tcg_temp_free_internal(TCGTemp *ts)
1022 {
1023     TCGContext *s = tcg_ctx;
1024     int k, idx;
1025 
1026     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1027     if (ts->kind == TEMP_CONST) {
1028         return;
1029     }
1030 
1031 #if defined(CONFIG_DEBUG_TCG)
1032     s->temps_in_use--;
1033     if (s->temps_in_use < 0) {
1034         fprintf(stderr, "More temporaries freed than allocated!\n");
1035     }
1036 #endif
1037 
1038     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1039     tcg_debug_assert(ts->temp_allocated != 0);
1040     ts->temp_allocated = 0;
1041 
1042     idx = temp_idx(ts);
1043     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1044     set_bit(idx, s->free_temps[k].l);
1045 }
1046 
1047 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1048 {
1049     TCGContext *s = tcg_ctx;
1050     GHashTable *h = s->const_table[type];
1051     TCGTemp *ts;
1052 
1053     if (h == NULL) {
1054         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1055         s->const_table[type] = h;
1056     }
1057 
1058     ts = g_hash_table_lookup(h, &val);
1059     if (ts == NULL) {
1060         ts = tcg_temp_alloc(s);
1061 
1062         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1063             TCGTemp *ts2 = tcg_temp_alloc(s);
1064 
1065             ts->base_type = TCG_TYPE_I64;
1066             ts->type = TCG_TYPE_I32;
1067             ts->kind = TEMP_CONST;
1068             ts->temp_allocated = 1;
1069             /*
1070              * Retain the full value of the 64-bit constant in the low
1071              * part, so that the hash table works.  Actual uses will
1072              * truncate the value to the low part.
1073              */
1074             ts->val = val;
1075 
1076             tcg_debug_assert(ts2 == ts + 1);
1077             ts2->base_type = TCG_TYPE_I64;
1078             ts2->type = TCG_TYPE_I32;
1079             ts2->kind = TEMP_CONST;
1080             ts2->temp_allocated = 1;
1081             ts2->val = val >> 32;
1082         } else {
1083             ts->base_type = type;
1084             ts->type = type;
1085             ts->kind = TEMP_CONST;
1086             ts->temp_allocated = 1;
1087             ts->val = val;
1088         }
1089         g_hash_table_insert(h, &ts->val, ts);
1090     }
1091 
1092     return ts;
1093 }
1094 
1095 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1096 {
1097     val = dup_const(vece, val);
1098     return temp_tcgv_vec(tcg_constant_internal(type, val));
1099 }
1100 
1101 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1102 {
1103     TCGTemp *t = tcgv_vec_temp(match);
1104 
1105     tcg_debug_assert(t->temp_allocated != 0);
1106     return tcg_constant_vec(t->base_type, vece, val);
1107 }
1108 
1109 TCGv_i32 tcg_const_i32(int32_t val)
1110 {
1111     TCGv_i32 t0;
1112     t0 = tcg_temp_new_i32();
1113     tcg_gen_movi_i32(t0, val);
1114     return t0;
1115 }
1116 
1117 TCGv_i64 tcg_const_i64(int64_t val)
1118 {
1119     TCGv_i64 t0;
1120     t0 = tcg_temp_new_i64();
1121     tcg_gen_movi_i64(t0, val);
1122     return t0;
1123 }
1124 
1125 TCGv_i32 tcg_const_local_i32(int32_t val)
1126 {
1127     TCGv_i32 t0;
1128     t0 = tcg_temp_local_new_i32();
1129     tcg_gen_movi_i32(t0, val);
1130     return t0;
1131 }
1132 
1133 TCGv_i64 tcg_const_local_i64(int64_t val)
1134 {
1135     TCGv_i64 t0;
1136     t0 = tcg_temp_local_new_i64();
1137     tcg_gen_movi_i64(t0, val);
1138     return t0;
1139 }
1140 
1141 #if defined(CONFIG_DEBUG_TCG)
1142 void tcg_clear_temp_count(void)
1143 {
1144     TCGContext *s = tcg_ctx;
1145     s->temps_in_use = 0;
1146 }
1147 
1148 int tcg_check_temp_count(void)
1149 {
1150     TCGContext *s = tcg_ctx;
1151     if (s->temps_in_use) {
1152         /* Clear the count so that we don't give another
1153          * warning immediately next time around.
1154          */
1155         s->temps_in_use = 0;
1156         return 1;
1157     }
1158     return 0;
1159 }
1160 #endif
1161 
1162 /* Return true if OP may appear in the opcode stream.
1163    Test the runtime variable that controls each opcode.  */
1164 bool tcg_op_supported(TCGOpcode op)
1165 {
1166     const bool have_vec
1167         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1168 
1169     switch (op) {
1170     case INDEX_op_discard:
1171     case INDEX_op_set_label:
1172     case INDEX_op_call:
1173     case INDEX_op_br:
1174     case INDEX_op_mb:
1175     case INDEX_op_insn_start:
1176     case INDEX_op_exit_tb:
1177     case INDEX_op_goto_tb:
1178     case INDEX_op_goto_ptr:
1179     case INDEX_op_qemu_ld_i32:
1180     case INDEX_op_qemu_st_i32:
1181     case INDEX_op_qemu_ld_i64:
1182     case INDEX_op_qemu_st_i64:
1183         return true;
1184 
1185     case INDEX_op_qemu_st8_i32:
1186         return TCG_TARGET_HAS_qemu_st8_i32;
1187 
1188     case INDEX_op_mov_i32:
1189     case INDEX_op_setcond_i32:
1190     case INDEX_op_brcond_i32:
1191     case INDEX_op_ld8u_i32:
1192     case INDEX_op_ld8s_i32:
1193     case INDEX_op_ld16u_i32:
1194     case INDEX_op_ld16s_i32:
1195     case INDEX_op_ld_i32:
1196     case INDEX_op_st8_i32:
1197     case INDEX_op_st16_i32:
1198     case INDEX_op_st_i32:
1199     case INDEX_op_add_i32:
1200     case INDEX_op_sub_i32:
1201     case INDEX_op_mul_i32:
1202     case INDEX_op_and_i32:
1203     case INDEX_op_or_i32:
1204     case INDEX_op_xor_i32:
1205     case INDEX_op_shl_i32:
1206     case INDEX_op_shr_i32:
1207     case INDEX_op_sar_i32:
1208         return true;
1209 
1210     case INDEX_op_movcond_i32:
1211         return TCG_TARGET_HAS_movcond_i32;
1212     case INDEX_op_div_i32:
1213     case INDEX_op_divu_i32:
1214         return TCG_TARGET_HAS_div_i32;
1215     case INDEX_op_rem_i32:
1216     case INDEX_op_remu_i32:
1217         return TCG_TARGET_HAS_rem_i32;
1218     case INDEX_op_div2_i32:
1219     case INDEX_op_divu2_i32:
1220         return TCG_TARGET_HAS_div2_i32;
1221     case INDEX_op_rotl_i32:
1222     case INDEX_op_rotr_i32:
1223         return TCG_TARGET_HAS_rot_i32;
1224     case INDEX_op_deposit_i32:
1225         return TCG_TARGET_HAS_deposit_i32;
1226     case INDEX_op_extract_i32:
1227         return TCG_TARGET_HAS_extract_i32;
1228     case INDEX_op_sextract_i32:
1229         return TCG_TARGET_HAS_sextract_i32;
1230     case INDEX_op_extract2_i32:
1231         return TCG_TARGET_HAS_extract2_i32;
1232     case INDEX_op_add2_i32:
1233         return TCG_TARGET_HAS_add2_i32;
1234     case INDEX_op_sub2_i32:
1235         return TCG_TARGET_HAS_sub2_i32;
1236     case INDEX_op_mulu2_i32:
1237         return TCG_TARGET_HAS_mulu2_i32;
1238     case INDEX_op_muls2_i32:
1239         return TCG_TARGET_HAS_muls2_i32;
1240     case INDEX_op_muluh_i32:
1241         return TCG_TARGET_HAS_muluh_i32;
1242     case INDEX_op_mulsh_i32:
1243         return TCG_TARGET_HAS_mulsh_i32;
1244     case INDEX_op_ext8s_i32:
1245         return TCG_TARGET_HAS_ext8s_i32;
1246     case INDEX_op_ext16s_i32:
1247         return TCG_TARGET_HAS_ext16s_i32;
1248     case INDEX_op_ext8u_i32:
1249         return TCG_TARGET_HAS_ext8u_i32;
1250     case INDEX_op_ext16u_i32:
1251         return TCG_TARGET_HAS_ext16u_i32;
1252     case INDEX_op_bswap16_i32:
1253         return TCG_TARGET_HAS_bswap16_i32;
1254     case INDEX_op_bswap32_i32:
1255         return TCG_TARGET_HAS_bswap32_i32;
1256     case INDEX_op_not_i32:
1257         return TCG_TARGET_HAS_not_i32;
1258     case INDEX_op_neg_i32:
1259         return TCG_TARGET_HAS_neg_i32;
1260     case INDEX_op_andc_i32:
1261         return TCG_TARGET_HAS_andc_i32;
1262     case INDEX_op_orc_i32:
1263         return TCG_TARGET_HAS_orc_i32;
1264     case INDEX_op_eqv_i32:
1265         return TCG_TARGET_HAS_eqv_i32;
1266     case INDEX_op_nand_i32:
1267         return TCG_TARGET_HAS_nand_i32;
1268     case INDEX_op_nor_i32:
1269         return TCG_TARGET_HAS_nor_i32;
1270     case INDEX_op_clz_i32:
1271         return TCG_TARGET_HAS_clz_i32;
1272     case INDEX_op_ctz_i32:
1273         return TCG_TARGET_HAS_ctz_i32;
1274     case INDEX_op_ctpop_i32:
1275         return TCG_TARGET_HAS_ctpop_i32;
1276 
1277     case INDEX_op_brcond2_i32:
1278     case INDEX_op_setcond2_i32:
1279         return TCG_TARGET_REG_BITS == 32;
1280 
1281     case INDEX_op_mov_i64:
1282     case INDEX_op_setcond_i64:
1283     case INDEX_op_brcond_i64:
1284     case INDEX_op_ld8u_i64:
1285     case INDEX_op_ld8s_i64:
1286     case INDEX_op_ld16u_i64:
1287     case INDEX_op_ld16s_i64:
1288     case INDEX_op_ld32u_i64:
1289     case INDEX_op_ld32s_i64:
1290     case INDEX_op_ld_i64:
1291     case INDEX_op_st8_i64:
1292     case INDEX_op_st16_i64:
1293     case INDEX_op_st32_i64:
1294     case INDEX_op_st_i64:
1295     case INDEX_op_add_i64:
1296     case INDEX_op_sub_i64:
1297     case INDEX_op_mul_i64:
1298     case INDEX_op_and_i64:
1299     case INDEX_op_or_i64:
1300     case INDEX_op_xor_i64:
1301     case INDEX_op_shl_i64:
1302     case INDEX_op_shr_i64:
1303     case INDEX_op_sar_i64:
1304     case INDEX_op_ext_i32_i64:
1305     case INDEX_op_extu_i32_i64:
1306         return TCG_TARGET_REG_BITS == 64;
1307 
1308     case INDEX_op_movcond_i64:
1309         return TCG_TARGET_HAS_movcond_i64;
1310     case INDEX_op_div_i64:
1311     case INDEX_op_divu_i64:
1312         return TCG_TARGET_HAS_div_i64;
1313     case INDEX_op_rem_i64:
1314     case INDEX_op_remu_i64:
1315         return TCG_TARGET_HAS_rem_i64;
1316     case INDEX_op_div2_i64:
1317     case INDEX_op_divu2_i64:
1318         return TCG_TARGET_HAS_div2_i64;
1319     case INDEX_op_rotl_i64:
1320     case INDEX_op_rotr_i64:
1321         return TCG_TARGET_HAS_rot_i64;
1322     case INDEX_op_deposit_i64:
1323         return TCG_TARGET_HAS_deposit_i64;
1324     case INDEX_op_extract_i64:
1325         return TCG_TARGET_HAS_extract_i64;
1326     case INDEX_op_sextract_i64:
1327         return TCG_TARGET_HAS_sextract_i64;
1328     case INDEX_op_extract2_i64:
1329         return TCG_TARGET_HAS_extract2_i64;
1330     case INDEX_op_extrl_i64_i32:
1331         return TCG_TARGET_HAS_extrl_i64_i32;
1332     case INDEX_op_extrh_i64_i32:
1333         return TCG_TARGET_HAS_extrh_i64_i32;
1334     case INDEX_op_ext8s_i64:
1335         return TCG_TARGET_HAS_ext8s_i64;
1336     case INDEX_op_ext16s_i64:
1337         return TCG_TARGET_HAS_ext16s_i64;
1338     case INDEX_op_ext32s_i64:
1339         return TCG_TARGET_HAS_ext32s_i64;
1340     case INDEX_op_ext8u_i64:
1341         return TCG_TARGET_HAS_ext8u_i64;
1342     case INDEX_op_ext16u_i64:
1343         return TCG_TARGET_HAS_ext16u_i64;
1344     case INDEX_op_ext32u_i64:
1345         return TCG_TARGET_HAS_ext32u_i64;
1346     case INDEX_op_bswap16_i64:
1347         return TCG_TARGET_HAS_bswap16_i64;
1348     case INDEX_op_bswap32_i64:
1349         return TCG_TARGET_HAS_bswap32_i64;
1350     case INDEX_op_bswap64_i64:
1351         return TCG_TARGET_HAS_bswap64_i64;
1352     case INDEX_op_not_i64:
1353         return TCG_TARGET_HAS_not_i64;
1354     case INDEX_op_neg_i64:
1355         return TCG_TARGET_HAS_neg_i64;
1356     case INDEX_op_andc_i64:
1357         return TCG_TARGET_HAS_andc_i64;
1358     case INDEX_op_orc_i64:
1359         return TCG_TARGET_HAS_orc_i64;
1360     case INDEX_op_eqv_i64:
1361         return TCG_TARGET_HAS_eqv_i64;
1362     case INDEX_op_nand_i64:
1363         return TCG_TARGET_HAS_nand_i64;
1364     case INDEX_op_nor_i64:
1365         return TCG_TARGET_HAS_nor_i64;
1366     case INDEX_op_clz_i64:
1367         return TCG_TARGET_HAS_clz_i64;
1368     case INDEX_op_ctz_i64:
1369         return TCG_TARGET_HAS_ctz_i64;
1370     case INDEX_op_ctpop_i64:
1371         return TCG_TARGET_HAS_ctpop_i64;
1372     case INDEX_op_add2_i64:
1373         return TCG_TARGET_HAS_add2_i64;
1374     case INDEX_op_sub2_i64:
1375         return TCG_TARGET_HAS_sub2_i64;
1376     case INDEX_op_mulu2_i64:
1377         return TCG_TARGET_HAS_mulu2_i64;
1378     case INDEX_op_muls2_i64:
1379         return TCG_TARGET_HAS_muls2_i64;
1380     case INDEX_op_muluh_i64:
1381         return TCG_TARGET_HAS_muluh_i64;
1382     case INDEX_op_mulsh_i64:
1383         return TCG_TARGET_HAS_mulsh_i64;
1384 
1385     case INDEX_op_mov_vec:
1386     case INDEX_op_dup_vec:
1387     case INDEX_op_dupm_vec:
1388     case INDEX_op_ld_vec:
1389     case INDEX_op_st_vec:
1390     case INDEX_op_add_vec:
1391     case INDEX_op_sub_vec:
1392     case INDEX_op_and_vec:
1393     case INDEX_op_or_vec:
1394     case INDEX_op_xor_vec:
1395     case INDEX_op_cmp_vec:
1396         return have_vec;
1397     case INDEX_op_dup2_vec:
1398         return have_vec && TCG_TARGET_REG_BITS == 32;
1399     case INDEX_op_not_vec:
1400         return have_vec && TCG_TARGET_HAS_not_vec;
1401     case INDEX_op_neg_vec:
1402         return have_vec && TCG_TARGET_HAS_neg_vec;
1403     case INDEX_op_abs_vec:
1404         return have_vec && TCG_TARGET_HAS_abs_vec;
1405     case INDEX_op_andc_vec:
1406         return have_vec && TCG_TARGET_HAS_andc_vec;
1407     case INDEX_op_orc_vec:
1408         return have_vec && TCG_TARGET_HAS_orc_vec;
1409     case INDEX_op_mul_vec:
1410         return have_vec && TCG_TARGET_HAS_mul_vec;
1411     case INDEX_op_shli_vec:
1412     case INDEX_op_shri_vec:
1413     case INDEX_op_sari_vec:
1414         return have_vec && TCG_TARGET_HAS_shi_vec;
1415     case INDEX_op_shls_vec:
1416     case INDEX_op_shrs_vec:
1417     case INDEX_op_sars_vec:
1418         return have_vec && TCG_TARGET_HAS_shs_vec;
1419     case INDEX_op_shlv_vec:
1420     case INDEX_op_shrv_vec:
1421     case INDEX_op_sarv_vec:
1422         return have_vec && TCG_TARGET_HAS_shv_vec;
1423     case INDEX_op_rotli_vec:
1424         return have_vec && TCG_TARGET_HAS_roti_vec;
1425     case INDEX_op_rotls_vec:
1426         return have_vec && TCG_TARGET_HAS_rots_vec;
1427     case INDEX_op_rotlv_vec:
1428     case INDEX_op_rotrv_vec:
1429         return have_vec && TCG_TARGET_HAS_rotv_vec;
1430     case INDEX_op_ssadd_vec:
1431     case INDEX_op_usadd_vec:
1432     case INDEX_op_sssub_vec:
1433     case INDEX_op_ussub_vec:
1434         return have_vec && TCG_TARGET_HAS_sat_vec;
1435     case INDEX_op_smin_vec:
1436     case INDEX_op_umin_vec:
1437     case INDEX_op_smax_vec:
1438     case INDEX_op_umax_vec:
1439         return have_vec && TCG_TARGET_HAS_minmax_vec;
1440     case INDEX_op_bitsel_vec:
1441         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1442     case INDEX_op_cmpsel_vec:
1443         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1444 
1445     default:
1446         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1447         return true;
1448     }
1449 }
1450 
1451 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1452    and endian swap. Maybe it would be better to do the alignment
1453    and endian swap in tcg_reg_alloc_call(). */
1454 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1455 {
1456     int i, real_args, nb_rets, pi;
1457     unsigned typemask;
1458     const TCGHelperInfo *info;
1459     TCGOp *op;
1460 
1461     info = g_hash_table_lookup(helper_table, (gpointer)func);
1462     typemask = info->typemask;
1463 
1464 #ifdef CONFIG_PLUGIN
1465     /* detect non-plugin helpers */
1466     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1467         tcg_ctx->plugin_insn->calls_helpers = true;
1468     }
1469 #endif
1470 
1471 #if defined(__sparc__) && !defined(__arch64__) \
1472     && !defined(CONFIG_TCG_INTERPRETER)
1473     /* We have 64-bit values in one register, but need to pass as two
1474        separate parameters.  Split them.  */
1475     int orig_typemask = typemask;
1476     int orig_nargs = nargs;
1477     TCGv_i64 retl, reth;
1478     TCGTemp *split_args[MAX_OPC_PARAM];
1479 
1480     retl = NULL;
1481     reth = NULL;
1482     typemask = 0;
1483     for (i = real_args = 0; i < nargs; ++i) {
1484         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1485         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1486 
1487         if (is_64bit) {
1488             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1489             TCGv_i32 h = tcg_temp_new_i32();
1490             TCGv_i32 l = tcg_temp_new_i32();
1491             tcg_gen_extr_i64_i32(l, h, orig);
1492             split_args[real_args++] = tcgv_i32_temp(h);
1493             typemask |= dh_typecode_i32 << (real_args * 3);
1494             split_args[real_args++] = tcgv_i32_temp(l);
1495             typemask |= dh_typecode_i32 << (real_args * 3);
1496         } else {
1497             split_args[real_args++] = args[i];
1498             typemask |= argtype << (real_args * 3);
1499         }
1500     }
1501     nargs = real_args;
1502     args = split_args;
1503 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1504     for (i = 0; i < nargs; ++i) {
1505         int argtype = extract32(typemask, (i + 1) * 3, 3);
1506         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1507         bool is_signed = argtype & 1;
1508 
1509         if (is_32bit) {
1510             TCGv_i64 temp = tcg_temp_new_i64();
1511             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1512             if (is_signed) {
1513                 tcg_gen_ext_i32_i64(temp, orig);
1514             } else {
1515                 tcg_gen_extu_i32_i64(temp, orig);
1516             }
1517             args[i] = tcgv_i64_temp(temp);
1518         }
1519     }
1520 #endif /* TCG_TARGET_EXTEND_ARGS */
1521 
1522     op = tcg_emit_op(INDEX_op_call);
1523 
1524     pi = 0;
1525     if (ret != NULL) {
1526 #if defined(__sparc__) && !defined(__arch64__) \
1527     && !defined(CONFIG_TCG_INTERPRETER)
1528         if ((typemask & 6) == dh_typecode_i64) {
1529             /* The 32-bit ABI is going to return the 64-bit value in
1530                the %o0/%o1 register pair.  Prepare for this by using
1531                two return temporaries, and reassemble below.  */
1532             retl = tcg_temp_new_i64();
1533             reth = tcg_temp_new_i64();
1534             op->args[pi++] = tcgv_i64_arg(reth);
1535             op->args[pi++] = tcgv_i64_arg(retl);
1536             nb_rets = 2;
1537         } else {
1538             op->args[pi++] = temp_arg(ret);
1539             nb_rets = 1;
1540         }
1541 #else
1542         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1543 #ifdef HOST_WORDS_BIGENDIAN
1544             op->args[pi++] = temp_arg(ret + 1);
1545             op->args[pi++] = temp_arg(ret);
1546 #else
1547             op->args[pi++] = temp_arg(ret);
1548             op->args[pi++] = temp_arg(ret + 1);
1549 #endif
1550             nb_rets = 2;
1551         } else {
1552             op->args[pi++] = temp_arg(ret);
1553             nb_rets = 1;
1554         }
1555 #endif
1556     } else {
1557         nb_rets = 0;
1558     }
1559     TCGOP_CALLO(op) = nb_rets;
1560 
1561     real_args = 0;
1562     for (i = 0; i < nargs; i++) {
1563         int argtype = extract32(typemask, (i + 1) * 3, 3);
1564         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1565         bool want_align = false;
1566 
1567 #if defined(CONFIG_TCG_INTERPRETER)
1568         /*
1569          * Align all arguments, so that they land in predictable places
1570          * for passing off to ffi_call.
1571          */
1572         want_align = true;
1573 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1574         /* Some targets want aligned 64 bit args */
1575         want_align = is_64bit;
1576 #endif
1577 
1578         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1579             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1580             real_args++;
1581         }
1582 
1583         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1584             /*
1585              * If stack grows up, then we will be placing successive
1586              * arguments at lower addresses, which means we need to
1587              * reverse the order compared to how we would normally
1588              * treat either big or little-endian.  For those arguments
1589              * that will wind up in registers, this still works for
1590              * HPPA (the only current STACK_GROWSUP target) since the
1591              * argument registers are *also* allocated in decreasing
1592              * order.  If another such target is added, this logic may
1593              * have to get more complicated to differentiate between
1594              * stack arguments and register arguments.
1595              */
1596 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1597             op->args[pi++] = temp_arg(args[i] + 1);
1598             op->args[pi++] = temp_arg(args[i]);
1599 #else
1600             op->args[pi++] = temp_arg(args[i]);
1601             op->args[pi++] = temp_arg(args[i] + 1);
1602 #endif
1603             real_args += 2;
1604             continue;
1605         }
1606 
1607         op->args[pi++] = temp_arg(args[i]);
1608         real_args++;
1609     }
1610     op->args[pi++] = (uintptr_t)func;
1611     op->args[pi++] = (uintptr_t)info;
1612     TCGOP_CALLI(op) = real_args;
1613 
1614     /* Make sure the fields didn't overflow.  */
1615     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1616     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1617 
1618 #if defined(__sparc__) && !defined(__arch64__) \
1619     && !defined(CONFIG_TCG_INTERPRETER)
1620     /* Free all of the parts we allocated above.  */
1621     for (i = real_args = 0; i < orig_nargs; ++i) {
1622         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1623         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1624 
1625         if (is_64bit) {
1626             tcg_temp_free_internal(args[real_args++]);
1627             tcg_temp_free_internal(args[real_args++]);
1628         } else {
1629             real_args++;
1630         }
1631     }
1632     if ((orig_typemask & 6) == dh_typecode_i64) {
1633         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1634            Note that describing these as TCGv_i64 eliminates an unnecessary
1635            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1636         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1637         tcg_temp_free_i64(retl);
1638         tcg_temp_free_i64(reth);
1639     }
1640 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1641     for (i = 0; i < nargs; ++i) {
1642         int argtype = extract32(typemask, (i + 1) * 3, 3);
1643         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1644 
1645         if (is_32bit) {
1646             tcg_temp_free_internal(args[i]);
1647         }
1648     }
1649 #endif /* TCG_TARGET_EXTEND_ARGS */
1650 }
1651 
1652 static void tcg_reg_alloc_start(TCGContext *s)
1653 {
1654     int i, n;
1655 
1656     for (i = 0, n = s->nb_temps; i < n; i++) {
1657         TCGTemp *ts = &s->temps[i];
1658         TCGTempVal val = TEMP_VAL_MEM;
1659 
1660         switch (ts->kind) {
1661         case TEMP_CONST:
1662             val = TEMP_VAL_CONST;
1663             break;
1664         case TEMP_FIXED:
1665             val = TEMP_VAL_REG;
1666             break;
1667         case TEMP_GLOBAL:
1668             break;
1669         case TEMP_NORMAL:
1670             val = TEMP_VAL_DEAD;
1671             /* fall through */
1672         case TEMP_LOCAL:
1673             ts->mem_allocated = 0;
1674             break;
1675         default:
1676             g_assert_not_reached();
1677         }
1678         ts->val_type = val;
1679     }
1680 
1681     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1682 }
1683 
1684 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1685                                  TCGTemp *ts)
1686 {
1687     int idx = temp_idx(ts);
1688 
1689     switch (ts->kind) {
1690     case TEMP_FIXED:
1691     case TEMP_GLOBAL:
1692         pstrcpy(buf, buf_size, ts->name);
1693         break;
1694     case TEMP_LOCAL:
1695         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1696         break;
1697     case TEMP_NORMAL:
1698         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1699         break;
1700     case TEMP_CONST:
1701         switch (ts->type) {
1702         case TCG_TYPE_I32:
1703             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1704             break;
1705 #if TCG_TARGET_REG_BITS > 32
1706         case TCG_TYPE_I64:
1707             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1708             break;
1709 #endif
1710         case TCG_TYPE_V64:
1711         case TCG_TYPE_V128:
1712         case TCG_TYPE_V256:
1713             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1714                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1715             break;
1716         default:
1717             g_assert_not_reached();
1718         }
1719         break;
1720     }
1721     return buf;
1722 }
1723 
1724 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1725                              int buf_size, TCGArg arg)
1726 {
1727     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1728 }
1729 
1730 static const char * const cond_name[] =
1731 {
1732     [TCG_COND_NEVER] = "never",
1733     [TCG_COND_ALWAYS] = "always",
1734     [TCG_COND_EQ] = "eq",
1735     [TCG_COND_NE] = "ne",
1736     [TCG_COND_LT] = "lt",
1737     [TCG_COND_GE] = "ge",
1738     [TCG_COND_LE] = "le",
1739     [TCG_COND_GT] = "gt",
1740     [TCG_COND_LTU] = "ltu",
1741     [TCG_COND_GEU] = "geu",
1742     [TCG_COND_LEU] = "leu",
1743     [TCG_COND_GTU] = "gtu"
1744 };
1745 
1746 static const char * const ldst_name[] =
1747 {
1748     [MO_UB]   = "ub",
1749     [MO_SB]   = "sb",
1750     [MO_LEUW] = "leuw",
1751     [MO_LESW] = "lesw",
1752     [MO_LEUL] = "leul",
1753     [MO_LESL] = "lesl",
1754     [MO_LEQ]  = "leq",
1755     [MO_BEUW] = "beuw",
1756     [MO_BESW] = "besw",
1757     [MO_BEUL] = "beul",
1758     [MO_BESL] = "besl",
1759     [MO_BEQ]  = "beq",
1760 };
1761 
1762 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1763 #ifdef TARGET_ALIGNED_ONLY
1764     [MO_UNALN >> MO_ASHIFT]    = "un+",
1765     [MO_ALIGN >> MO_ASHIFT]    = "",
1766 #else
1767     [MO_UNALN >> MO_ASHIFT]    = "",
1768     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1769 #endif
1770     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1771     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1772     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1773     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1774     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1775     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1776 };
1777 
1778 static const char bswap_flag_name[][6] = {
1779     [TCG_BSWAP_IZ] = "iz",
1780     [TCG_BSWAP_OZ] = "oz",
1781     [TCG_BSWAP_OS] = "os",
1782     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1783     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1784 };
1785 
1786 static inline bool tcg_regset_single(TCGRegSet d)
1787 {
1788     return (d & (d - 1)) == 0;
1789 }
1790 
1791 static inline TCGReg tcg_regset_first(TCGRegSet d)
1792 {
1793     if (TCG_TARGET_NB_REGS <= 32) {
1794         return ctz32(d);
1795     } else {
1796         return ctz64(d);
1797     }
1798 }
1799 
1800 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1801 {
1802     char buf[128];
1803     TCGOp *op;
1804 
1805     QTAILQ_FOREACH(op, &s->ops, link) {
1806         int i, k, nb_oargs, nb_iargs, nb_cargs;
1807         const TCGOpDef *def;
1808         TCGOpcode c;
1809         int col = 0;
1810 
1811         c = op->opc;
1812         def = &tcg_op_defs[c];
1813 
1814         if (c == INDEX_op_insn_start) {
1815             nb_oargs = 0;
1816             col += qemu_log("\n ----");
1817 
1818             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1819                 target_ulong a;
1820 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1821                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1822 #else
1823                 a = op->args[i];
1824 #endif
1825                 col += qemu_log(" " TARGET_FMT_lx, a);
1826             }
1827         } else if (c == INDEX_op_call) {
1828             const TCGHelperInfo *info = tcg_call_info(op);
1829             void *func = tcg_call_func(op);
1830 
1831             /* variable number of arguments */
1832             nb_oargs = TCGOP_CALLO(op);
1833             nb_iargs = TCGOP_CALLI(op);
1834             nb_cargs = def->nb_cargs;
1835 
1836             col += qemu_log(" %s ", def->name);
1837 
1838             /*
1839              * Print the function name from TCGHelperInfo, if available.
1840              * Note that plugins have a template function for the info,
1841              * but the actual function pointer comes from the plugin.
1842              */
1843             if (func == info->func) {
1844                 col += qemu_log("%s", info->name);
1845             } else {
1846                 col += qemu_log("plugin(%p)", func);
1847             }
1848 
1849             col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1850             for (i = 0; i < nb_oargs; i++) {
1851                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1852                                                        op->args[i]));
1853             }
1854             for (i = 0; i < nb_iargs; i++) {
1855                 TCGArg arg = op->args[nb_oargs + i];
1856                 const char *t = "<dummy>";
1857                 if (arg != TCG_CALL_DUMMY_ARG) {
1858                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1859                 }
1860                 col += qemu_log(",%s", t);
1861             }
1862         } else {
1863             col += qemu_log(" %s ", def->name);
1864 
1865             nb_oargs = def->nb_oargs;
1866             nb_iargs = def->nb_iargs;
1867             nb_cargs = def->nb_cargs;
1868 
1869             if (def->flags & TCG_OPF_VECTOR) {
1870                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1871                                 8 << TCGOP_VECE(op));
1872             }
1873 
1874             k = 0;
1875             for (i = 0; i < nb_oargs; i++) {
1876                 if (k != 0) {
1877                     col += qemu_log(",");
1878                 }
1879                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1880                                                       op->args[k++]));
1881             }
1882             for (i = 0; i < nb_iargs; i++) {
1883                 if (k != 0) {
1884                     col += qemu_log(",");
1885                 }
1886                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1887                                                       op->args[k++]));
1888             }
1889             switch (c) {
1890             case INDEX_op_brcond_i32:
1891             case INDEX_op_setcond_i32:
1892             case INDEX_op_movcond_i32:
1893             case INDEX_op_brcond2_i32:
1894             case INDEX_op_setcond2_i32:
1895             case INDEX_op_brcond_i64:
1896             case INDEX_op_setcond_i64:
1897             case INDEX_op_movcond_i64:
1898             case INDEX_op_cmp_vec:
1899             case INDEX_op_cmpsel_vec:
1900                 if (op->args[k] < ARRAY_SIZE(cond_name)
1901                     && cond_name[op->args[k]]) {
1902                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1903                 } else {
1904                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1905                 }
1906                 i = 1;
1907                 break;
1908             case INDEX_op_qemu_ld_i32:
1909             case INDEX_op_qemu_st_i32:
1910             case INDEX_op_qemu_st8_i32:
1911             case INDEX_op_qemu_ld_i64:
1912             case INDEX_op_qemu_st_i64:
1913                 {
1914                     MemOpIdx oi = op->args[k++];
1915                     MemOp op = get_memop(oi);
1916                     unsigned ix = get_mmuidx(oi);
1917 
1918                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1919                         col += qemu_log(",$0x%x,%u", op, ix);
1920                     } else {
1921                         const char *s_al, *s_op;
1922                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1923                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1924                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1925                     }
1926                     i = 1;
1927                 }
1928                 break;
1929             case INDEX_op_bswap16_i32:
1930             case INDEX_op_bswap16_i64:
1931             case INDEX_op_bswap32_i32:
1932             case INDEX_op_bswap32_i64:
1933             case INDEX_op_bswap64_i64:
1934                 {
1935                     TCGArg flags = op->args[k];
1936                     const char *name = NULL;
1937 
1938                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1939                         name = bswap_flag_name[flags];
1940                     }
1941                     if (name) {
1942                         col += qemu_log(",%s", name);
1943                     } else {
1944                         col += qemu_log(",$0x%" TCG_PRIlx, flags);
1945                     }
1946                     i = k = 1;
1947                 }
1948                 break;
1949             default:
1950                 i = 0;
1951                 break;
1952             }
1953             switch (c) {
1954             case INDEX_op_set_label:
1955             case INDEX_op_br:
1956             case INDEX_op_brcond_i32:
1957             case INDEX_op_brcond_i64:
1958             case INDEX_op_brcond2_i32:
1959                 col += qemu_log("%s$L%d", k ? "," : "",
1960                                 arg_label(op->args[k])->id);
1961                 i++, k++;
1962                 break;
1963             default:
1964                 break;
1965             }
1966             for (; i < nb_cargs; i++, k++) {
1967                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1968             }
1969         }
1970 
1971         if (have_prefs || op->life) {
1972 
1973             QemuLogFile *logfile;
1974 
1975             rcu_read_lock();
1976             logfile = qatomic_rcu_read(&qemu_logfile);
1977             if (logfile) {
1978                 for (; col < 40; ++col) {
1979                     putc(' ', logfile->fd);
1980                 }
1981             }
1982             rcu_read_unlock();
1983         }
1984 
1985         if (op->life) {
1986             unsigned life = op->life;
1987 
1988             if (life & (SYNC_ARG * 3)) {
1989                 qemu_log("  sync:");
1990                 for (i = 0; i < 2; ++i) {
1991                     if (life & (SYNC_ARG << i)) {
1992                         qemu_log(" %d", i);
1993                     }
1994                 }
1995             }
1996             life /= DEAD_ARG;
1997             if (life) {
1998                 qemu_log("  dead:");
1999                 for (i = 0; life; ++i, life >>= 1) {
2000                     if (life & 1) {
2001                         qemu_log(" %d", i);
2002                     }
2003                 }
2004             }
2005         }
2006 
2007         if (have_prefs) {
2008             for (i = 0; i < nb_oargs; ++i) {
2009                 TCGRegSet set = op->output_pref[i];
2010 
2011                 if (i == 0) {
2012                     qemu_log("  pref=");
2013                 } else {
2014                     qemu_log(",");
2015                 }
2016                 if (set == 0) {
2017                     qemu_log("none");
2018                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2019                     qemu_log("all");
2020 #ifdef CONFIG_DEBUG_TCG
2021                 } else if (tcg_regset_single(set)) {
2022                     TCGReg reg = tcg_regset_first(set);
2023                     qemu_log("%s", tcg_target_reg_names[reg]);
2024 #endif
2025                 } else if (TCG_TARGET_NB_REGS <= 32) {
2026                     qemu_log("%#x", (uint32_t)set);
2027                 } else {
2028                     qemu_log("%#" PRIx64, (uint64_t)set);
2029                 }
2030             }
2031         }
2032 
2033         qemu_log("\n");
2034     }
2035 }
2036 
2037 /* we give more priority to constraints with less registers */
2038 static int get_constraint_priority(const TCGOpDef *def, int k)
2039 {
2040     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2041     int n;
2042 
2043     if (arg_ct->oalias) {
2044         /* an alias is equivalent to a single register */
2045         n = 1;
2046     } else {
2047         n = ctpop64(arg_ct->regs);
2048     }
2049     return TCG_TARGET_NB_REGS - n + 1;
2050 }
2051 
2052 /* sort from highest priority to lowest */
2053 static void sort_constraints(TCGOpDef *def, int start, int n)
2054 {
2055     int i, j;
2056     TCGArgConstraint *a = def->args_ct;
2057 
2058     for (i = 0; i < n; i++) {
2059         a[start + i].sort_index = start + i;
2060     }
2061     if (n <= 1) {
2062         return;
2063     }
2064     for (i = 0; i < n - 1; i++) {
2065         for (j = i + 1; j < n; j++) {
2066             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2067             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2068             if (p1 < p2) {
2069                 int tmp = a[start + i].sort_index;
2070                 a[start + i].sort_index = a[start + j].sort_index;
2071                 a[start + j].sort_index = tmp;
2072             }
2073         }
2074     }
2075 }
2076 
2077 static void process_op_defs(TCGContext *s)
2078 {
2079     TCGOpcode op;
2080 
2081     for (op = 0; op < NB_OPS; op++) {
2082         TCGOpDef *def = &tcg_op_defs[op];
2083         const TCGTargetOpDef *tdefs;
2084         int i, nb_args;
2085 
2086         if (def->flags & TCG_OPF_NOT_PRESENT) {
2087             continue;
2088         }
2089 
2090         nb_args = def->nb_iargs + def->nb_oargs;
2091         if (nb_args == 0) {
2092             continue;
2093         }
2094 
2095         /*
2096          * Macro magic should make it impossible, but double-check that
2097          * the array index is in range.  Since the signness of an enum
2098          * is implementation defined, force the result to unsigned.
2099          */
2100         unsigned con_set = tcg_target_op_def(op);
2101         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2102         tdefs = &constraint_sets[con_set];
2103 
2104         for (i = 0; i < nb_args; i++) {
2105             const char *ct_str = tdefs->args_ct_str[i];
2106             /* Incomplete TCGTargetOpDef entry. */
2107             tcg_debug_assert(ct_str != NULL);
2108 
2109             while (*ct_str != '\0') {
2110                 switch(*ct_str) {
2111                 case '0' ... '9':
2112                     {
2113                         int oarg = *ct_str - '0';
2114                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2115                         tcg_debug_assert(oarg < def->nb_oargs);
2116                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2117                         def->args_ct[i] = def->args_ct[oarg];
2118                         /* The output sets oalias.  */
2119                         def->args_ct[oarg].oalias = true;
2120                         def->args_ct[oarg].alias_index = i;
2121                         /* The input sets ialias. */
2122                         def->args_ct[i].ialias = true;
2123                         def->args_ct[i].alias_index = oarg;
2124                     }
2125                     ct_str++;
2126                     break;
2127                 case '&':
2128                     def->args_ct[i].newreg = true;
2129                     ct_str++;
2130                     break;
2131                 case 'i':
2132                     def->args_ct[i].ct |= TCG_CT_CONST;
2133                     ct_str++;
2134                     break;
2135 
2136                 /* Include all of the target-specific constraints. */
2137 
2138 #undef CONST
2139 #define CONST(CASE, MASK) \
2140     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2141 #define REGS(CASE, MASK) \
2142     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2143 
2144 #include "tcg-target-con-str.h"
2145 
2146 #undef REGS
2147 #undef CONST
2148                 default:
2149                     /* Typo in TCGTargetOpDef constraint. */
2150                     g_assert_not_reached();
2151                 }
2152             }
2153         }
2154 
2155         /* TCGTargetOpDef entry with too much information? */
2156         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2157 
2158         /* sort the constraints (XXX: this is just an heuristic) */
2159         sort_constraints(def, 0, def->nb_oargs);
2160         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2161     }
2162 }
2163 
2164 void tcg_op_remove(TCGContext *s, TCGOp *op)
2165 {
2166     TCGLabel *label;
2167 
2168     switch (op->opc) {
2169     case INDEX_op_br:
2170         label = arg_label(op->args[0]);
2171         label->refs--;
2172         break;
2173     case INDEX_op_brcond_i32:
2174     case INDEX_op_brcond_i64:
2175         label = arg_label(op->args[3]);
2176         label->refs--;
2177         break;
2178     case INDEX_op_brcond2_i32:
2179         label = arg_label(op->args[5]);
2180         label->refs--;
2181         break;
2182     default:
2183         break;
2184     }
2185 
2186     QTAILQ_REMOVE(&s->ops, op, link);
2187     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2188     s->nb_ops--;
2189 
2190 #ifdef CONFIG_PROFILER
2191     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2192 #endif
2193 }
2194 
2195 void tcg_remove_ops_after(TCGOp *op)
2196 {
2197     TCGContext *s = tcg_ctx;
2198 
2199     while (true) {
2200         TCGOp *last = tcg_last_op();
2201         if (last == op) {
2202             return;
2203         }
2204         tcg_op_remove(s, last);
2205     }
2206 }
2207 
2208 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2209 {
2210     TCGContext *s = tcg_ctx;
2211     TCGOp *op;
2212 
2213     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2214         op = tcg_malloc(sizeof(TCGOp));
2215     } else {
2216         op = QTAILQ_FIRST(&s->free_ops);
2217         QTAILQ_REMOVE(&s->free_ops, op, link);
2218     }
2219     memset(op, 0, offsetof(TCGOp, link));
2220     op->opc = opc;
2221     s->nb_ops++;
2222 
2223     return op;
2224 }
2225 
2226 TCGOp *tcg_emit_op(TCGOpcode opc)
2227 {
2228     TCGOp *op = tcg_op_alloc(opc);
2229     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2230     return op;
2231 }
2232 
2233 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2234 {
2235     TCGOp *new_op = tcg_op_alloc(opc);
2236     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2237     return new_op;
2238 }
2239 
2240 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2241 {
2242     TCGOp *new_op = tcg_op_alloc(opc);
2243     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2244     return new_op;
2245 }
2246 
2247 /* Reachable analysis : remove unreachable code.  */
2248 static void reachable_code_pass(TCGContext *s)
2249 {
2250     TCGOp *op, *op_next;
2251     bool dead = false;
2252 
2253     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2254         bool remove = dead;
2255         TCGLabel *label;
2256 
2257         switch (op->opc) {
2258         case INDEX_op_set_label:
2259             label = arg_label(op->args[0]);
2260             if (label->refs == 0) {
2261                 /*
2262                  * While there is an occasional backward branch, virtually
2263                  * all branches generated by the translators are forward.
2264                  * Which means that generally we will have already removed
2265                  * all references to the label that will be, and there is
2266                  * little to be gained by iterating.
2267                  */
2268                 remove = true;
2269             } else {
2270                 /* Once we see a label, insns become live again.  */
2271                 dead = false;
2272                 remove = false;
2273 
2274                 /*
2275                  * Optimization can fold conditional branches to unconditional.
2276                  * If we find a label with one reference which is preceded by
2277                  * an unconditional branch to it, remove both.  This needed to
2278                  * wait until the dead code in between them was removed.
2279                  */
2280                 if (label->refs == 1) {
2281                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2282                     if (op_prev->opc == INDEX_op_br &&
2283                         label == arg_label(op_prev->args[0])) {
2284                         tcg_op_remove(s, op_prev);
2285                         remove = true;
2286                     }
2287                 }
2288             }
2289             break;
2290 
2291         case INDEX_op_br:
2292         case INDEX_op_exit_tb:
2293         case INDEX_op_goto_ptr:
2294             /* Unconditional branches; everything following is dead.  */
2295             dead = true;
2296             break;
2297 
2298         case INDEX_op_call:
2299             /* Notice noreturn helper calls, raising exceptions.  */
2300             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2301                 dead = true;
2302             }
2303             break;
2304 
2305         case INDEX_op_insn_start:
2306             /* Never remove -- we need to keep these for unwind.  */
2307             remove = false;
2308             break;
2309 
2310         default:
2311             break;
2312         }
2313 
2314         if (remove) {
2315             tcg_op_remove(s, op);
2316         }
2317     }
2318 }
2319 
2320 #define TS_DEAD  1
2321 #define TS_MEM   2
2322 
2323 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2324 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2325 
2326 /* For liveness_pass_1, the register preferences for a given temp.  */
2327 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2328 {
2329     return ts->state_ptr;
2330 }
2331 
2332 /* For liveness_pass_1, reset the preferences for a given temp to the
2333  * maximal regset for its type.
2334  */
2335 static inline void la_reset_pref(TCGTemp *ts)
2336 {
2337     *la_temp_pref(ts)
2338         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2339 }
2340 
2341 /* liveness analysis: end of function: all temps are dead, and globals
2342    should be in memory. */
2343 static void la_func_end(TCGContext *s, int ng, int nt)
2344 {
2345     int i;
2346 
2347     for (i = 0; i < ng; ++i) {
2348         s->temps[i].state = TS_DEAD | TS_MEM;
2349         la_reset_pref(&s->temps[i]);
2350     }
2351     for (i = ng; i < nt; ++i) {
2352         s->temps[i].state = TS_DEAD;
2353         la_reset_pref(&s->temps[i]);
2354     }
2355 }
2356 
2357 /* liveness analysis: end of basic block: all temps are dead, globals
2358    and local temps should be in memory. */
2359 static void la_bb_end(TCGContext *s, int ng, int nt)
2360 {
2361     int i;
2362 
2363     for (i = 0; i < nt; ++i) {
2364         TCGTemp *ts = &s->temps[i];
2365         int state;
2366 
2367         switch (ts->kind) {
2368         case TEMP_FIXED:
2369         case TEMP_GLOBAL:
2370         case TEMP_LOCAL:
2371             state = TS_DEAD | TS_MEM;
2372             break;
2373         case TEMP_NORMAL:
2374         case TEMP_CONST:
2375             state = TS_DEAD;
2376             break;
2377         default:
2378             g_assert_not_reached();
2379         }
2380         ts->state = state;
2381         la_reset_pref(ts);
2382     }
2383 }
2384 
2385 /* liveness analysis: sync globals back to memory.  */
2386 static void la_global_sync(TCGContext *s, int ng)
2387 {
2388     int i;
2389 
2390     for (i = 0; i < ng; ++i) {
2391         int state = s->temps[i].state;
2392         s->temps[i].state = state | TS_MEM;
2393         if (state == TS_DEAD) {
2394             /* If the global was previously dead, reset prefs.  */
2395             la_reset_pref(&s->temps[i]);
2396         }
2397     }
2398 }
2399 
2400 /*
2401  * liveness analysis: conditional branch: all temps are dead,
2402  * globals and local temps should be synced.
2403  */
2404 static void la_bb_sync(TCGContext *s, int ng, int nt)
2405 {
2406     la_global_sync(s, ng);
2407 
2408     for (int i = ng; i < nt; ++i) {
2409         TCGTemp *ts = &s->temps[i];
2410         int state;
2411 
2412         switch (ts->kind) {
2413         case TEMP_LOCAL:
2414             state = ts->state;
2415             ts->state = state | TS_MEM;
2416             if (state != TS_DEAD) {
2417                 continue;
2418             }
2419             break;
2420         case TEMP_NORMAL:
2421             s->temps[i].state = TS_DEAD;
2422             break;
2423         case TEMP_CONST:
2424             continue;
2425         default:
2426             g_assert_not_reached();
2427         }
2428         la_reset_pref(&s->temps[i]);
2429     }
2430 }
2431 
2432 /* liveness analysis: sync globals back to memory and kill.  */
2433 static void la_global_kill(TCGContext *s, int ng)
2434 {
2435     int i;
2436 
2437     for (i = 0; i < ng; i++) {
2438         s->temps[i].state = TS_DEAD | TS_MEM;
2439         la_reset_pref(&s->temps[i]);
2440     }
2441 }
2442 
2443 /* liveness analysis: note live globals crossing calls.  */
2444 static void la_cross_call(TCGContext *s, int nt)
2445 {
2446     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2447     int i;
2448 
2449     for (i = 0; i < nt; i++) {
2450         TCGTemp *ts = &s->temps[i];
2451         if (!(ts->state & TS_DEAD)) {
2452             TCGRegSet *pset = la_temp_pref(ts);
2453             TCGRegSet set = *pset;
2454 
2455             set &= mask;
2456             /* If the combination is not possible, restart.  */
2457             if (set == 0) {
2458                 set = tcg_target_available_regs[ts->type] & mask;
2459             }
2460             *pset = set;
2461         }
2462     }
2463 }
2464 
2465 /* Liveness analysis : update the opc_arg_life array to tell if a
2466    given input arguments is dead. Instructions updating dead
2467    temporaries are removed. */
2468 static void liveness_pass_1(TCGContext *s)
2469 {
2470     int nb_globals = s->nb_globals;
2471     int nb_temps = s->nb_temps;
2472     TCGOp *op, *op_prev;
2473     TCGRegSet *prefs;
2474     int i;
2475 
2476     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2477     for (i = 0; i < nb_temps; ++i) {
2478         s->temps[i].state_ptr = prefs + i;
2479     }
2480 
2481     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2482     la_func_end(s, nb_globals, nb_temps);
2483 
2484     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2485         int nb_iargs, nb_oargs;
2486         TCGOpcode opc_new, opc_new2;
2487         bool have_opc_new2;
2488         TCGLifeData arg_life = 0;
2489         TCGTemp *ts;
2490         TCGOpcode opc = op->opc;
2491         const TCGOpDef *def = &tcg_op_defs[opc];
2492 
2493         switch (opc) {
2494         case INDEX_op_call:
2495             {
2496                 int call_flags;
2497                 int nb_call_regs;
2498 
2499                 nb_oargs = TCGOP_CALLO(op);
2500                 nb_iargs = TCGOP_CALLI(op);
2501                 call_flags = tcg_call_flags(op);
2502 
2503                 /* pure functions can be removed if their result is unused */
2504                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2505                     for (i = 0; i < nb_oargs; i++) {
2506                         ts = arg_temp(op->args[i]);
2507                         if (ts->state != TS_DEAD) {
2508                             goto do_not_remove_call;
2509                         }
2510                     }
2511                     goto do_remove;
2512                 }
2513             do_not_remove_call:
2514 
2515                 /* Output args are dead.  */
2516                 for (i = 0; i < nb_oargs; i++) {
2517                     ts = arg_temp(op->args[i]);
2518                     if (ts->state & TS_DEAD) {
2519                         arg_life |= DEAD_ARG << i;
2520                     }
2521                     if (ts->state & TS_MEM) {
2522                         arg_life |= SYNC_ARG << i;
2523                     }
2524                     ts->state = TS_DEAD;
2525                     la_reset_pref(ts);
2526 
2527                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2528                     op->output_pref[i] = 0;
2529                 }
2530 
2531                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2532                                     TCG_CALL_NO_READ_GLOBALS))) {
2533                     la_global_kill(s, nb_globals);
2534                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2535                     la_global_sync(s, nb_globals);
2536                 }
2537 
2538                 /* Record arguments that die in this helper.  */
2539                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2540                     ts = arg_temp(op->args[i]);
2541                     if (ts && ts->state & TS_DEAD) {
2542                         arg_life |= DEAD_ARG << i;
2543                     }
2544                 }
2545 
2546                 /* For all live registers, remove call-clobbered prefs.  */
2547                 la_cross_call(s, nb_temps);
2548 
2549                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2550 
2551                 /* Input arguments are live for preceding opcodes.  */
2552                 for (i = 0; i < nb_iargs; i++) {
2553                     ts = arg_temp(op->args[i + nb_oargs]);
2554                     if (ts && ts->state & TS_DEAD) {
2555                         /* For those arguments that die, and will be allocated
2556                          * in registers, clear the register set for that arg,
2557                          * to be filled in below.  For args that will be on
2558                          * the stack, reset to any available reg.
2559                          */
2560                         *la_temp_pref(ts)
2561                             = (i < nb_call_regs ? 0 :
2562                                tcg_target_available_regs[ts->type]);
2563                         ts->state &= ~TS_DEAD;
2564                     }
2565                 }
2566 
2567                 /* For each input argument, add its input register to prefs.
2568                    If a temp is used once, this produces a single set bit.  */
2569                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2570                     ts = arg_temp(op->args[i + nb_oargs]);
2571                     if (ts) {
2572                         tcg_regset_set_reg(*la_temp_pref(ts),
2573                                            tcg_target_call_iarg_regs[i]);
2574                     }
2575                 }
2576             }
2577             break;
2578         case INDEX_op_insn_start:
2579             break;
2580         case INDEX_op_discard:
2581             /* mark the temporary as dead */
2582             ts = arg_temp(op->args[0]);
2583             ts->state = TS_DEAD;
2584             la_reset_pref(ts);
2585             break;
2586 
2587         case INDEX_op_add2_i32:
2588             opc_new = INDEX_op_add_i32;
2589             goto do_addsub2;
2590         case INDEX_op_sub2_i32:
2591             opc_new = INDEX_op_sub_i32;
2592             goto do_addsub2;
2593         case INDEX_op_add2_i64:
2594             opc_new = INDEX_op_add_i64;
2595             goto do_addsub2;
2596         case INDEX_op_sub2_i64:
2597             opc_new = INDEX_op_sub_i64;
2598         do_addsub2:
2599             nb_iargs = 4;
2600             nb_oargs = 2;
2601             /* Test if the high part of the operation is dead, but not
2602                the low part.  The result can be optimized to a simple
2603                add or sub.  This happens often for x86_64 guest when the
2604                cpu mode is set to 32 bit.  */
2605             if (arg_temp(op->args[1])->state == TS_DEAD) {
2606                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2607                     goto do_remove;
2608                 }
2609                 /* Replace the opcode and adjust the args in place,
2610                    leaving 3 unused args at the end.  */
2611                 op->opc = opc = opc_new;
2612                 op->args[1] = op->args[2];
2613                 op->args[2] = op->args[4];
2614                 /* Fall through and mark the single-word operation live.  */
2615                 nb_iargs = 2;
2616                 nb_oargs = 1;
2617             }
2618             goto do_not_remove;
2619 
2620         case INDEX_op_mulu2_i32:
2621             opc_new = INDEX_op_mul_i32;
2622             opc_new2 = INDEX_op_muluh_i32;
2623             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2624             goto do_mul2;
2625         case INDEX_op_muls2_i32:
2626             opc_new = INDEX_op_mul_i32;
2627             opc_new2 = INDEX_op_mulsh_i32;
2628             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2629             goto do_mul2;
2630         case INDEX_op_mulu2_i64:
2631             opc_new = INDEX_op_mul_i64;
2632             opc_new2 = INDEX_op_muluh_i64;
2633             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2634             goto do_mul2;
2635         case INDEX_op_muls2_i64:
2636             opc_new = INDEX_op_mul_i64;
2637             opc_new2 = INDEX_op_mulsh_i64;
2638             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2639             goto do_mul2;
2640         do_mul2:
2641             nb_iargs = 2;
2642             nb_oargs = 2;
2643             if (arg_temp(op->args[1])->state == TS_DEAD) {
2644                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2645                     /* Both parts of the operation are dead.  */
2646                     goto do_remove;
2647                 }
2648                 /* The high part of the operation is dead; generate the low. */
2649                 op->opc = opc = opc_new;
2650                 op->args[1] = op->args[2];
2651                 op->args[2] = op->args[3];
2652             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2653                 /* The low part of the operation is dead; generate the high. */
2654                 op->opc = opc = opc_new2;
2655                 op->args[0] = op->args[1];
2656                 op->args[1] = op->args[2];
2657                 op->args[2] = op->args[3];
2658             } else {
2659                 goto do_not_remove;
2660             }
2661             /* Mark the single-word operation live.  */
2662             nb_oargs = 1;
2663             goto do_not_remove;
2664 
2665         default:
2666             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2667             nb_iargs = def->nb_iargs;
2668             nb_oargs = def->nb_oargs;
2669 
2670             /* Test if the operation can be removed because all
2671                its outputs are dead. We assume that nb_oargs == 0
2672                implies side effects */
2673             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2674                 for (i = 0; i < nb_oargs; i++) {
2675                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2676                         goto do_not_remove;
2677                     }
2678                 }
2679                 goto do_remove;
2680             }
2681             goto do_not_remove;
2682 
2683         do_remove:
2684             tcg_op_remove(s, op);
2685             break;
2686 
2687         do_not_remove:
2688             for (i = 0; i < nb_oargs; i++) {
2689                 ts = arg_temp(op->args[i]);
2690 
2691                 /* Remember the preference of the uses that followed.  */
2692                 op->output_pref[i] = *la_temp_pref(ts);
2693 
2694                 /* Output args are dead.  */
2695                 if (ts->state & TS_DEAD) {
2696                     arg_life |= DEAD_ARG << i;
2697                 }
2698                 if (ts->state & TS_MEM) {
2699                     arg_life |= SYNC_ARG << i;
2700                 }
2701                 ts->state = TS_DEAD;
2702                 la_reset_pref(ts);
2703             }
2704 
2705             /* If end of basic block, update.  */
2706             if (def->flags & TCG_OPF_BB_EXIT) {
2707                 la_func_end(s, nb_globals, nb_temps);
2708             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2709                 la_bb_sync(s, nb_globals, nb_temps);
2710             } else if (def->flags & TCG_OPF_BB_END) {
2711                 la_bb_end(s, nb_globals, nb_temps);
2712             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2713                 la_global_sync(s, nb_globals);
2714                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2715                     la_cross_call(s, nb_temps);
2716                 }
2717             }
2718 
2719             /* Record arguments that die in this opcode.  */
2720             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2721                 ts = arg_temp(op->args[i]);
2722                 if (ts->state & TS_DEAD) {
2723                     arg_life |= DEAD_ARG << i;
2724                 }
2725             }
2726 
2727             /* Input arguments are live for preceding opcodes.  */
2728             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2729                 ts = arg_temp(op->args[i]);
2730                 if (ts->state & TS_DEAD) {
2731                     /* For operands that were dead, initially allow
2732                        all regs for the type.  */
2733                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2734                     ts->state &= ~TS_DEAD;
2735                 }
2736             }
2737 
2738             /* Incorporate constraints for this operand.  */
2739             switch (opc) {
2740             case INDEX_op_mov_i32:
2741             case INDEX_op_mov_i64:
2742                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2743                    have proper constraints.  That said, special case
2744                    moves to propagate preferences backward.  */
2745                 if (IS_DEAD_ARG(1)) {
2746                     *la_temp_pref(arg_temp(op->args[0]))
2747                         = *la_temp_pref(arg_temp(op->args[1]));
2748                 }
2749                 break;
2750 
2751             default:
2752                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2753                     const TCGArgConstraint *ct = &def->args_ct[i];
2754                     TCGRegSet set, *pset;
2755 
2756                     ts = arg_temp(op->args[i]);
2757                     pset = la_temp_pref(ts);
2758                     set = *pset;
2759 
2760                     set &= ct->regs;
2761                     if (ct->ialias) {
2762                         set &= op->output_pref[ct->alias_index];
2763                     }
2764                     /* If the combination is not possible, restart.  */
2765                     if (set == 0) {
2766                         set = ct->regs;
2767                     }
2768                     *pset = set;
2769                 }
2770                 break;
2771             }
2772             break;
2773         }
2774         op->life = arg_life;
2775     }
2776 }
2777 
2778 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2779 static bool liveness_pass_2(TCGContext *s)
2780 {
2781     int nb_globals = s->nb_globals;
2782     int nb_temps, i;
2783     bool changes = false;
2784     TCGOp *op, *op_next;
2785 
2786     /* Create a temporary for each indirect global.  */
2787     for (i = 0; i < nb_globals; ++i) {
2788         TCGTemp *its = &s->temps[i];
2789         if (its->indirect_reg) {
2790             TCGTemp *dts = tcg_temp_alloc(s);
2791             dts->type = its->type;
2792             dts->base_type = its->base_type;
2793             its->state_ptr = dts;
2794         } else {
2795             its->state_ptr = NULL;
2796         }
2797         /* All globals begin dead.  */
2798         its->state = TS_DEAD;
2799     }
2800     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2801         TCGTemp *its = &s->temps[i];
2802         its->state_ptr = NULL;
2803         its->state = TS_DEAD;
2804     }
2805 
2806     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2807         TCGOpcode opc = op->opc;
2808         const TCGOpDef *def = &tcg_op_defs[opc];
2809         TCGLifeData arg_life = op->life;
2810         int nb_iargs, nb_oargs, call_flags;
2811         TCGTemp *arg_ts, *dir_ts;
2812 
2813         if (opc == INDEX_op_call) {
2814             nb_oargs = TCGOP_CALLO(op);
2815             nb_iargs = TCGOP_CALLI(op);
2816             call_flags = tcg_call_flags(op);
2817         } else {
2818             nb_iargs = def->nb_iargs;
2819             nb_oargs = def->nb_oargs;
2820 
2821             /* Set flags similar to how calls require.  */
2822             if (def->flags & TCG_OPF_COND_BRANCH) {
2823                 /* Like reading globals: sync_globals */
2824                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2825             } else if (def->flags & TCG_OPF_BB_END) {
2826                 /* Like writing globals: save_globals */
2827                 call_flags = 0;
2828             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2829                 /* Like reading globals: sync_globals */
2830                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2831             } else {
2832                 /* No effect on globals.  */
2833                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2834                               TCG_CALL_NO_WRITE_GLOBALS);
2835             }
2836         }
2837 
2838         /* Make sure that input arguments are available.  */
2839         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2840             arg_ts = arg_temp(op->args[i]);
2841             if (arg_ts) {
2842                 dir_ts = arg_ts->state_ptr;
2843                 if (dir_ts && arg_ts->state == TS_DEAD) {
2844                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2845                                       ? INDEX_op_ld_i32
2846                                       : INDEX_op_ld_i64);
2847                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2848 
2849                     lop->args[0] = temp_arg(dir_ts);
2850                     lop->args[1] = temp_arg(arg_ts->mem_base);
2851                     lop->args[2] = arg_ts->mem_offset;
2852 
2853                     /* Loaded, but synced with memory.  */
2854                     arg_ts->state = TS_MEM;
2855                 }
2856             }
2857         }
2858 
2859         /* Perform input replacement, and mark inputs that became dead.
2860            No action is required except keeping temp_state up to date
2861            so that we reload when needed.  */
2862         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2863             arg_ts = arg_temp(op->args[i]);
2864             if (arg_ts) {
2865                 dir_ts = arg_ts->state_ptr;
2866                 if (dir_ts) {
2867                     op->args[i] = temp_arg(dir_ts);
2868                     changes = true;
2869                     if (IS_DEAD_ARG(i)) {
2870                         arg_ts->state = TS_DEAD;
2871                     }
2872                 }
2873             }
2874         }
2875 
2876         /* Liveness analysis should ensure that the following are
2877            all correct, for call sites and basic block end points.  */
2878         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2879             /* Nothing to do */
2880         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2881             for (i = 0; i < nb_globals; ++i) {
2882                 /* Liveness should see that globals are synced back,
2883                    that is, either TS_DEAD or TS_MEM.  */
2884                 arg_ts = &s->temps[i];
2885                 tcg_debug_assert(arg_ts->state_ptr == 0
2886                                  || arg_ts->state != 0);
2887             }
2888         } else {
2889             for (i = 0; i < nb_globals; ++i) {
2890                 /* Liveness should see that globals are saved back,
2891                    that is, TS_DEAD, waiting to be reloaded.  */
2892                 arg_ts = &s->temps[i];
2893                 tcg_debug_assert(arg_ts->state_ptr == 0
2894                                  || arg_ts->state == TS_DEAD);
2895             }
2896         }
2897 
2898         /* Outputs become available.  */
2899         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2900             arg_ts = arg_temp(op->args[0]);
2901             dir_ts = arg_ts->state_ptr;
2902             if (dir_ts) {
2903                 op->args[0] = temp_arg(dir_ts);
2904                 changes = true;
2905 
2906                 /* The output is now live and modified.  */
2907                 arg_ts->state = 0;
2908 
2909                 if (NEED_SYNC_ARG(0)) {
2910                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2911                                       ? INDEX_op_st_i32
2912                                       : INDEX_op_st_i64);
2913                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2914                     TCGTemp *out_ts = dir_ts;
2915 
2916                     if (IS_DEAD_ARG(0)) {
2917                         out_ts = arg_temp(op->args[1]);
2918                         arg_ts->state = TS_DEAD;
2919                         tcg_op_remove(s, op);
2920                     } else {
2921                         arg_ts->state = TS_MEM;
2922                     }
2923 
2924                     sop->args[0] = temp_arg(out_ts);
2925                     sop->args[1] = temp_arg(arg_ts->mem_base);
2926                     sop->args[2] = arg_ts->mem_offset;
2927                 } else {
2928                     tcg_debug_assert(!IS_DEAD_ARG(0));
2929                 }
2930             }
2931         } else {
2932             for (i = 0; i < nb_oargs; i++) {
2933                 arg_ts = arg_temp(op->args[i]);
2934                 dir_ts = arg_ts->state_ptr;
2935                 if (!dir_ts) {
2936                     continue;
2937                 }
2938                 op->args[i] = temp_arg(dir_ts);
2939                 changes = true;
2940 
2941                 /* The output is now live and modified.  */
2942                 arg_ts->state = 0;
2943 
2944                 /* Sync outputs upon their last write.  */
2945                 if (NEED_SYNC_ARG(i)) {
2946                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2947                                       ? INDEX_op_st_i32
2948                                       : INDEX_op_st_i64);
2949                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2950 
2951                     sop->args[0] = temp_arg(dir_ts);
2952                     sop->args[1] = temp_arg(arg_ts->mem_base);
2953                     sop->args[2] = arg_ts->mem_offset;
2954 
2955                     arg_ts->state = TS_MEM;
2956                 }
2957                 /* Drop outputs that are dead.  */
2958                 if (IS_DEAD_ARG(i)) {
2959                     arg_ts->state = TS_DEAD;
2960                 }
2961             }
2962         }
2963     }
2964 
2965     return changes;
2966 }
2967 
2968 #ifdef CONFIG_DEBUG_TCG
2969 static void dump_regs(TCGContext *s)
2970 {
2971     TCGTemp *ts;
2972     int i;
2973     char buf[64];
2974 
2975     for(i = 0; i < s->nb_temps; i++) {
2976         ts = &s->temps[i];
2977         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2978         switch(ts->val_type) {
2979         case TEMP_VAL_REG:
2980             printf("%s", tcg_target_reg_names[ts->reg]);
2981             break;
2982         case TEMP_VAL_MEM:
2983             printf("%d(%s)", (int)ts->mem_offset,
2984                    tcg_target_reg_names[ts->mem_base->reg]);
2985             break;
2986         case TEMP_VAL_CONST:
2987             printf("$0x%" PRIx64, ts->val);
2988             break;
2989         case TEMP_VAL_DEAD:
2990             printf("D");
2991             break;
2992         default:
2993             printf("???");
2994             break;
2995         }
2996         printf("\n");
2997     }
2998 
2999     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3000         if (s->reg_to_temp[i] != NULL) {
3001             printf("%s: %s\n",
3002                    tcg_target_reg_names[i],
3003                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3004         }
3005     }
3006 }
3007 
3008 static void check_regs(TCGContext *s)
3009 {
3010     int reg;
3011     int k;
3012     TCGTemp *ts;
3013     char buf[64];
3014 
3015     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3016         ts = s->reg_to_temp[reg];
3017         if (ts != NULL) {
3018             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3019                 printf("Inconsistency for register %s:\n",
3020                        tcg_target_reg_names[reg]);
3021                 goto fail;
3022             }
3023         }
3024     }
3025     for (k = 0; k < s->nb_temps; k++) {
3026         ts = &s->temps[k];
3027         if (ts->val_type == TEMP_VAL_REG
3028             && ts->kind != TEMP_FIXED
3029             && s->reg_to_temp[ts->reg] != ts) {
3030             printf("Inconsistency for temp %s:\n",
3031                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3032         fail:
3033             printf("reg state:\n");
3034             dump_regs(s);
3035             tcg_abort();
3036         }
3037     }
3038 }
3039 #endif
3040 
3041 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3042 {
3043     intptr_t off, size, align;
3044 
3045     switch (ts->type) {
3046     case TCG_TYPE_I32:
3047         size = align = 4;
3048         break;
3049     case TCG_TYPE_I64:
3050     case TCG_TYPE_V64:
3051         size = align = 8;
3052         break;
3053     case TCG_TYPE_V128:
3054         size = align = 16;
3055         break;
3056     case TCG_TYPE_V256:
3057         /* Note that we do not require aligned storage for V256. */
3058         size = 32, align = 16;
3059         break;
3060     default:
3061         g_assert_not_reached();
3062     }
3063 
3064     assert(align <= TCG_TARGET_STACK_ALIGN);
3065     off = ROUND_UP(s->current_frame_offset, align);
3066 
3067     /* If we've exhausted the stack frame, restart with a smaller TB. */
3068     if (off + size > s->frame_end) {
3069         tcg_raise_tb_overflow(s);
3070     }
3071     s->current_frame_offset = off + size;
3072 
3073     ts->mem_offset = off;
3074 #if defined(__sparc__)
3075     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3076 #endif
3077     ts->mem_base = s->frame_temp;
3078     ts->mem_allocated = 1;
3079 }
3080 
3081 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3082 
3083 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3084    mark it free; otherwise mark it dead.  */
3085 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3086 {
3087     TCGTempVal new_type;
3088 
3089     switch (ts->kind) {
3090     case TEMP_FIXED:
3091         return;
3092     case TEMP_GLOBAL:
3093     case TEMP_LOCAL:
3094         new_type = TEMP_VAL_MEM;
3095         break;
3096     case TEMP_NORMAL:
3097         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3098         break;
3099     case TEMP_CONST:
3100         new_type = TEMP_VAL_CONST;
3101         break;
3102     default:
3103         g_assert_not_reached();
3104     }
3105     if (ts->val_type == TEMP_VAL_REG) {
3106         s->reg_to_temp[ts->reg] = NULL;
3107     }
3108     ts->val_type = new_type;
3109 }
3110 
3111 /* Mark a temporary as dead.  */
3112 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3113 {
3114     temp_free_or_dead(s, ts, 1);
3115 }
3116 
3117 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3118    registers needs to be allocated to store a constant.  If 'free_or_dead'
3119    is non-zero, subsequently release the temporary; if it is positive, the
3120    temp is dead; if it is negative, the temp is free.  */
3121 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3122                       TCGRegSet preferred_regs, int free_or_dead)
3123 {
3124     if (!temp_readonly(ts) && !ts->mem_coherent) {
3125         if (!ts->mem_allocated) {
3126             temp_allocate_frame(s, ts);
3127         }
3128         switch (ts->val_type) {
3129         case TEMP_VAL_CONST:
3130             /* If we're going to free the temp immediately, then we won't
3131                require it later in a register, so attempt to store the
3132                constant to memory directly.  */
3133             if (free_or_dead
3134                 && tcg_out_sti(s, ts->type, ts->val,
3135                                ts->mem_base->reg, ts->mem_offset)) {
3136                 break;
3137             }
3138             temp_load(s, ts, tcg_target_available_regs[ts->type],
3139                       allocated_regs, preferred_regs);
3140             /* fallthrough */
3141 
3142         case TEMP_VAL_REG:
3143             tcg_out_st(s, ts->type, ts->reg,
3144                        ts->mem_base->reg, ts->mem_offset);
3145             break;
3146 
3147         case TEMP_VAL_MEM:
3148             break;
3149 
3150         case TEMP_VAL_DEAD:
3151         default:
3152             tcg_abort();
3153         }
3154         ts->mem_coherent = 1;
3155     }
3156     if (free_or_dead) {
3157         temp_free_or_dead(s, ts, free_or_dead);
3158     }
3159 }
3160 
3161 /* free register 'reg' by spilling the corresponding temporary if necessary */
3162 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3163 {
3164     TCGTemp *ts = s->reg_to_temp[reg];
3165     if (ts != NULL) {
3166         temp_sync(s, ts, allocated_regs, 0, -1);
3167     }
3168 }
3169 
3170 /**
3171  * tcg_reg_alloc:
3172  * @required_regs: Set of registers in which we must allocate.
3173  * @allocated_regs: Set of registers which must be avoided.
3174  * @preferred_regs: Set of registers we should prefer.
3175  * @rev: True if we search the registers in "indirect" order.
3176  *
3177  * The allocated register must be in @required_regs & ~@allocated_regs,
3178  * but if we can put it in @preferred_regs we may save a move later.
3179  */
3180 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3181                             TCGRegSet allocated_regs,
3182                             TCGRegSet preferred_regs, bool rev)
3183 {
3184     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3185     TCGRegSet reg_ct[2];
3186     const int *order;
3187 
3188     reg_ct[1] = required_regs & ~allocated_regs;
3189     tcg_debug_assert(reg_ct[1] != 0);
3190     reg_ct[0] = reg_ct[1] & preferred_regs;
3191 
3192     /* Skip the preferred_regs option if it cannot be satisfied,
3193        or if the preference made no difference.  */
3194     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3195 
3196     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3197 
3198     /* Try free registers, preferences first.  */
3199     for (j = f; j < 2; j++) {
3200         TCGRegSet set = reg_ct[j];
3201 
3202         if (tcg_regset_single(set)) {
3203             /* One register in the set.  */
3204             TCGReg reg = tcg_regset_first(set);
3205             if (s->reg_to_temp[reg] == NULL) {
3206                 return reg;
3207             }
3208         } else {
3209             for (i = 0; i < n; i++) {
3210                 TCGReg reg = order[i];
3211                 if (s->reg_to_temp[reg] == NULL &&
3212                     tcg_regset_test_reg(set, reg)) {
3213                     return reg;
3214                 }
3215             }
3216         }
3217     }
3218 
3219     /* We must spill something.  */
3220     for (j = f; j < 2; j++) {
3221         TCGRegSet set = reg_ct[j];
3222 
3223         if (tcg_regset_single(set)) {
3224             /* One register in the set.  */
3225             TCGReg reg = tcg_regset_first(set);
3226             tcg_reg_free(s, reg, allocated_regs);
3227             return reg;
3228         } else {
3229             for (i = 0; i < n; i++) {
3230                 TCGReg reg = order[i];
3231                 if (tcg_regset_test_reg(set, reg)) {
3232                     tcg_reg_free(s, reg, allocated_regs);
3233                     return reg;
3234                 }
3235             }
3236         }
3237     }
3238 
3239     tcg_abort();
3240 }
3241 
3242 /* Make sure the temporary is in a register.  If needed, allocate the register
3243    from DESIRED while avoiding ALLOCATED.  */
3244 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3245                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3246 {
3247     TCGReg reg;
3248 
3249     switch (ts->val_type) {
3250     case TEMP_VAL_REG:
3251         return;
3252     case TEMP_VAL_CONST:
3253         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3254                             preferred_regs, ts->indirect_base);
3255         if (ts->type <= TCG_TYPE_I64) {
3256             tcg_out_movi(s, ts->type, reg, ts->val);
3257         } else {
3258             uint64_t val = ts->val;
3259             MemOp vece = MO_64;
3260 
3261             /*
3262              * Find the minimal vector element that matches the constant.
3263              * The targets will, in general, have to do this search anyway,
3264              * do this generically.
3265              */
3266             if (val == dup_const(MO_8, val)) {
3267                 vece = MO_8;
3268             } else if (val == dup_const(MO_16, val)) {
3269                 vece = MO_16;
3270             } else if (val == dup_const(MO_32, val)) {
3271                 vece = MO_32;
3272             }
3273 
3274             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3275         }
3276         ts->mem_coherent = 0;
3277         break;
3278     case TEMP_VAL_MEM:
3279         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3280                             preferred_regs, ts->indirect_base);
3281         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3282         ts->mem_coherent = 1;
3283         break;
3284     case TEMP_VAL_DEAD:
3285     default:
3286         tcg_abort();
3287     }
3288     ts->reg = reg;
3289     ts->val_type = TEMP_VAL_REG;
3290     s->reg_to_temp[reg] = ts;
3291 }
3292 
3293 /* Save a temporary to memory. 'allocated_regs' is used in case a
3294    temporary registers needs to be allocated to store a constant.  */
3295 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3296 {
3297     /* The liveness analysis already ensures that globals are back
3298        in memory. Keep an tcg_debug_assert for safety. */
3299     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3300 }
3301 
3302 /* save globals to their canonical location and assume they can be
3303    modified be the following code. 'allocated_regs' is used in case a
3304    temporary registers needs to be allocated to store a constant. */
3305 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3306 {
3307     int i, n;
3308 
3309     for (i = 0, n = s->nb_globals; i < n; i++) {
3310         temp_save(s, &s->temps[i], allocated_regs);
3311     }
3312 }
3313 
3314 /* sync globals to their canonical location and assume they can be
3315    read by the following code. 'allocated_regs' is used in case a
3316    temporary registers needs to be allocated to store a constant. */
3317 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3318 {
3319     int i, n;
3320 
3321     for (i = 0, n = s->nb_globals; i < n; i++) {
3322         TCGTemp *ts = &s->temps[i];
3323         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3324                          || ts->kind == TEMP_FIXED
3325                          || ts->mem_coherent);
3326     }
3327 }
3328 
3329 /* at the end of a basic block, we assume all temporaries are dead and
3330    all globals are stored at their canonical location. */
3331 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3332 {
3333     int i;
3334 
3335     for (i = s->nb_globals; i < s->nb_temps; i++) {
3336         TCGTemp *ts = &s->temps[i];
3337 
3338         switch (ts->kind) {
3339         case TEMP_LOCAL:
3340             temp_save(s, ts, allocated_regs);
3341             break;
3342         case TEMP_NORMAL:
3343             /* The liveness analysis already ensures that temps are dead.
3344                Keep an tcg_debug_assert for safety. */
3345             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3346             break;
3347         case TEMP_CONST:
3348             /* Similarly, we should have freed any allocated register. */
3349             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3350             break;
3351         default:
3352             g_assert_not_reached();
3353         }
3354     }
3355 
3356     save_globals(s, allocated_regs);
3357 }
3358 
3359 /*
3360  * At a conditional branch, we assume all temporaries are dead and
3361  * all globals and local temps are synced to their location.
3362  */
3363 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3364 {
3365     sync_globals(s, allocated_regs);
3366 
3367     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3368         TCGTemp *ts = &s->temps[i];
3369         /*
3370          * The liveness analysis already ensures that temps are dead.
3371          * Keep tcg_debug_asserts for safety.
3372          */
3373         switch (ts->kind) {
3374         case TEMP_LOCAL:
3375             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3376             break;
3377         case TEMP_NORMAL:
3378             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3379             break;
3380         case TEMP_CONST:
3381             break;
3382         default:
3383             g_assert_not_reached();
3384         }
3385     }
3386 }
3387 
3388 /*
3389  * Specialized code generation for INDEX_op_mov_* with a constant.
3390  */
3391 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3392                                   tcg_target_ulong val, TCGLifeData arg_life,
3393                                   TCGRegSet preferred_regs)
3394 {
3395     /* ENV should not be modified.  */
3396     tcg_debug_assert(!temp_readonly(ots));
3397 
3398     /* The movi is not explicitly generated here.  */
3399     if (ots->val_type == TEMP_VAL_REG) {
3400         s->reg_to_temp[ots->reg] = NULL;
3401     }
3402     ots->val_type = TEMP_VAL_CONST;
3403     ots->val = val;
3404     ots->mem_coherent = 0;
3405     if (NEED_SYNC_ARG(0)) {
3406         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3407     } else if (IS_DEAD_ARG(0)) {
3408         temp_dead(s, ots);
3409     }
3410 }
3411 
3412 /*
3413  * Specialized code generation for INDEX_op_mov_*.
3414  */
3415 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3416 {
3417     const TCGLifeData arg_life = op->life;
3418     TCGRegSet allocated_regs, preferred_regs;
3419     TCGTemp *ts, *ots;
3420     TCGType otype, itype;
3421 
3422     allocated_regs = s->reserved_regs;
3423     preferred_regs = op->output_pref[0];
3424     ots = arg_temp(op->args[0]);
3425     ts = arg_temp(op->args[1]);
3426 
3427     /* ENV should not be modified.  */
3428     tcg_debug_assert(!temp_readonly(ots));
3429 
3430     /* Note that otype != itype for no-op truncation.  */
3431     otype = ots->type;
3432     itype = ts->type;
3433 
3434     if (ts->val_type == TEMP_VAL_CONST) {
3435         /* propagate constant or generate sti */
3436         tcg_target_ulong val = ts->val;
3437         if (IS_DEAD_ARG(1)) {
3438             temp_dead(s, ts);
3439         }
3440         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3441         return;
3442     }
3443 
3444     /* If the source value is in memory we're going to be forced
3445        to have it in a register in order to perform the copy.  Copy
3446        the SOURCE value into its own register first, that way we
3447        don't have to reload SOURCE the next time it is used. */
3448     if (ts->val_type == TEMP_VAL_MEM) {
3449         temp_load(s, ts, tcg_target_available_regs[itype],
3450                   allocated_regs, preferred_regs);
3451     }
3452 
3453     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3454     if (IS_DEAD_ARG(0)) {
3455         /* mov to a non-saved dead register makes no sense (even with
3456            liveness analysis disabled). */
3457         tcg_debug_assert(NEED_SYNC_ARG(0));
3458         if (!ots->mem_allocated) {
3459             temp_allocate_frame(s, ots);
3460         }
3461         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3462         if (IS_DEAD_ARG(1)) {
3463             temp_dead(s, ts);
3464         }
3465         temp_dead(s, ots);
3466     } else {
3467         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3468             /* the mov can be suppressed */
3469             if (ots->val_type == TEMP_VAL_REG) {
3470                 s->reg_to_temp[ots->reg] = NULL;
3471             }
3472             ots->reg = ts->reg;
3473             temp_dead(s, ts);
3474         } else {
3475             if (ots->val_type != TEMP_VAL_REG) {
3476                 /* When allocating a new register, make sure to not spill the
3477                    input one. */
3478                 tcg_regset_set_reg(allocated_regs, ts->reg);
3479                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3480                                          allocated_regs, preferred_regs,
3481                                          ots->indirect_base);
3482             }
3483             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3484                 /*
3485                  * Cross register class move not supported.
3486                  * Store the source register into the destination slot
3487                  * and leave the destination temp as TEMP_VAL_MEM.
3488                  */
3489                 assert(!temp_readonly(ots));
3490                 if (!ts->mem_allocated) {
3491                     temp_allocate_frame(s, ots);
3492                 }
3493                 tcg_out_st(s, ts->type, ts->reg,
3494                            ots->mem_base->reg, ots->mem_offset);
3495                 ots->mem_coherent = 1;
3496                 temp_free_or_dead(s, ots, -1);
3497                 return;
3498             }
3499         }
3500         ots->val_type = TEMP_VAL_REG;
3501         ots->mem_coherent = 0;
3502         s->reg_to_temp[ots->reg] = ots;
3503         if (NEED_SYNC_ARG(0)) {
3504             temp_sync(s, ots, allocated_regs, 0, 0);
3505         }
3506     }
3507 }
3508 
3509 /*
3510  * Specialized code generation for INDEX_op_dup_vec.
3511  */
3512 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3513 {
3514     const TCGLifeData arg_life = op->life;
3515     TCGRegSet dup_out_regs, dup_in_regs;
3516     TCGTemp *its, *ots;
3517     TCGType itype, vtype;
3518     intptr_t endian_fixup;
3519     unsigned vece;
3520     bool ok;
3521 
3522     ots = arg_temp(op->args[0]);
3523     its = arg_temp(op->args[1]);
3524 
3525     /* ENV should not be modified.  */
3526     tcg_debug_assert(!temp_readonly(ots));
3527 
3528     itype = its->type;
3529     vece = TCGOP_VECE(op);
3530     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3531 
3532     if (its->val_type == TEMP_VAL_CONST) {
3533         /* Propagate constant via movi -> dupi.  */
3534         tcg_target_ulong val = its->val;
3535         if (IS_DEAD_ARG(1)) {
3536             temp_dead(s, its);
3537         }
3538         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3539         return;
3540     }
3541 
3542     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3543     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3544 
3545     /* Allocate the output register now.  */
3546     if (ots->val_type != TEMP_VAL_REG) {
3547         TCGRegSet allocated_regs = s->reserved_regs;
3548 
3549         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3550             /* Make sure to not spill the input register. */
3551             tcg_regset_set_reg(allocated_regs, its->reg);
3552         }
3553         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3554                                  op->output_pref[0], ots->indirect_base);
3555         ots->val_type = TEMP_VAL_REG;
3556         ots->mem_coherent = 0;
3557         s->reg_to_temp[ots->reg] = ots;
3558     }
3559 
3560     switch (its->val_type) {
3561     case TEMP_VAL_REG:
3562         /*
3563          * The dup constriaints must be broad, covering all possible VECE.
3564          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3565          * to fail, indicating that extra moves are required for that case.
3566          */
3567         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3568             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3569                 goto done;
3570             }
3571             /* Try again from memory or a vector input register.  */
3572         }
3573         if (!its->mem_coherent) {
3574             /*
3575              * The input register is not synced, and so an extra store
3576              * would be required to use memory.  Attempt an integer-vector
3577              * register move first.  We do not have a TCGRegSet for this.
3578              */
3579             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3580                 break;
3581             }
3582             /* Sync the temp back to its slot and load from there.  */
3583             temp_sync(s, its, s->reserved_regs, 0, 0);
3584         }
3585         /* fall through */
3586 
3587     case TEMP_VAL_MEM:
3588 #ifdef HOST_WORDS_BIGENDIAN
3589         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3590         endian_fixup -= 1 << vece;
3591 #else
3592         endian_fixup = 0;
3593 #endif
3594         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3595                              its->mem_offset + endian_fixup)) {
3596             goto done;
3597         }
3598         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3599         break;
3600 
3601     default:
3602         g_assert_not_reached();
3603     }
3604 
3605     /* We now have a vector input register, so dup must succeed. */
3606     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3607     tcg_debug_assert(ok);
3608 
3609  done:
3610     if (IS_DEAD_ARG(1)) {
3611         temp_dead(s, its);
3612     }
3613     if (NEED_SYNC_ARG(0)) {
3614         temp_sync(s, ots, s->reserved_regs, 0, 0);
3615     }
3616     if (IS_DEAD_ARG(0)) {
3617         temp_dead(s, ots);
3618     }
3619 }
3620 
3621 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3622 {
3623     const TCGLifeData arg_life = op->life;
3624     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3625     TCGRegSet i_allocated_regs;
3626     TCGRegSet o_allocated_regs;
3627     int i, k, nb_iargs, nb_oargs;
3628     TCGReg reg;
3629     TCGArg arg;
3630     const TCGArgConstraint *arg_ct;
3631     TCGTemp *ts;
3632     TCGArg new_args[TCG_MAX_OP_ARGS];
3633     int const_args[TCG_MAX_OP_ARGS];
3634 
3635     nb_oargs = def->nb_oargs;
3636     nb_iargs = def->nb_iargs;
3637 
3638     /* copy constants */
3639     memcpy(new_args + nb_oargs + nb_iargs,
3640            op->args + nb_oargs + nb_iargs,
3641            sizeof(TCGArg) * def->nb_cargs);
3642 
3643     i_allocated_regs = s->reserved_regs;
3644     o_allocated_regs = s->reserved_regs;
3645 
3646     /* satisfy input constraints */
3647     for (k = 0; k < nb_iargs; k++) {
3648         TCGRegSet i_preferred_regs, o_preferred_regs;
3649 
3650         i = def->args_ct[nb_oargs + k].sort_index;
3651         arg = op->args[i];
3652         arg_ct = &def->args_ct[i];
3653         ts = arg_temp(arg);
3654 
3655         if (ts->val_type == TEMP_VAL_CONST
3656             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3657             /* constant is OK for instruction */
3658             const_args[i] = 1;
3659             new_args[i] = ts->val;
3660             continue;
3661         }
3662 
3663         i_preferred_regs = o_preferred_regs = 0;
3664         if (arg_ct->ialias) {
3665             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3666 
3667             /*
3668              * If the input is readonly, then it cannot also be an
3669              * output and aliased to itself.  If the input is not
3670              * dead after the instruction, we must allocate a new
3671              * register and move it.
3672              */
3673             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3674                 goto allocate_in_reg;
3675             }
3676 
3677             /*
3678              * Check if the current register has already been allocated
3679              * for another input aliased to an output.
3680              */
3681             if (ts->val_type == TEMP_VAL_REG) {
3682                 reg = ts->reg;
3683                 for (int k2 = 0; k2 < k; k2++) {
3684                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3685                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3686                         goto allocate_in_reg;
3687                     }
3688                 }
3689             }
3690             i_preferred_regs = o_preferred_regs;
3691         }
3692 
3693         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3694         reg = ts->reg;
3695 
3696         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3697  allocate_in_reg:
3698             /*
3699              * Allocate a new register matching the constraint
3700              * and move the temporary register into it.
3701              */
3702             temp_load(s, ts, tcg_target_available_regs[ts->type],
3703                       i_allocated_regs, 0);
3704             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3705                                 o_preferred_regs, ts->indirect_base);
3706             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3707                 /*
3708                  * Cross register class move not supported.  Sync the
3709                  * temp back to its slot and load from there.
3710                  */
3711                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3712                 tcg_out_ld(s, ts->type, reg,
3713                            ts->mem_base->reg, ts->mem_offset);
3714             }
3715         }
3716         new_args[i] = reg;
3717         const_args[i] = 0;
3718         tcg_regset_set_reg(i_allocated_regs, reg);
3719     }
3720 
3721     /* mark dead temporaries and free the associated registers */
3722     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3723         if (IS_DEAD_ARG(i)) {
3724             temp_dead(s, arg_temp(op->args[i]));
3725         }
3726     }
3727 
3728     if (def->flags & TCG_OPF_COND_BRANCH) {
3729         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3730     } else if (def->flags & TCG_OPF_BB_END) {
3731         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3732     } else {
3733         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3734             /* XXX: permit generic clobber register list ? */
3735             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3736                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3737                     tcg_reg_free(s, i, i_allocated_regs);
3738                 }
3739             }
3740         }
3741         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3742             /* sync globals if the op has side effects and might trigger
3743                an exception. */
3744             sync_globals(s, i_allocated_regs);
3745         }
3746 
3747         /* satisfy the output constraints */
3748         for(k = 0; k < nb_oargs; k++) {
3749             i = def->args_ct[k].sort_index;
3750             arg = op->args[i];
3751             arg_ct = &def->args_ct[i];
3752             ts = arg_temp(arg);
3753 
3754             /* ENV should not be modified.  */
3755             tcg_debug_assert(!temp_readonly(ts));
3756 
3757             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3758                 reg = new_args[arg_ct->alias_index];
3759             } else if (arg_ct->newreg) {
3760                 reg = tcg_reg_alloc(s, arg_ct->regs,
3761                                     i_allocated_regs | o_allocated_regs,
3762                                     op->output_pref[k], ts->indirect_base);
3763             } else {
3764                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3765                                     op->output_pref[k], ts->indirect_base);
3766             }
3767             tcg_regset_set_reg(o_allocated_regs, reg);
3768             if (ts->val_type == TEMP_VAL_REG) {
3769                 s->reg_to_temp[ts->reg] = NULL;
3770             }
3771             ts->val_type = TEMP_VAL_REG;
3772             ts->reg = reg;
3773             /*
3774              * Temp value is modified, so the value kept in memory is
3775              * potentially not the same.
3776              */
3777             ts->mem_coherent = 0;
3778             s->reg_to_temp[reg] = ts;
3779             new_args[i] = reg;
3780         }
3781     }
3782 
3783     /* emit instruction */
3784     if (def->flags & TCG_OPF_VECTOR) {
3785         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3786                        new_args, const_args);
3787     } else {
3788         tcg_out_op(s, op->opc, new_args, const_args);
3789     }
3790 
3791     /* move the outputs in the correct register if needed */
3792     for(i = 0; i < nb_oargs; i++) {
3793         ts = arg_temp(op->args[i]);
3794 
3795         /* ENV should not be modified.  */
3796         tcg_debug_assert(!temp_readonly(ts));
3797 
3798         if (NEED_SYNC_ARG(i)) {
3799             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3800         } else if (IS_DEAD_ARG(i)) {
3801             temp_dead(s, ts);
3802         }
3803     }
3804 }
3805 
3806 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3807 {
3808     const TCGLifeData arg_life = op->life;
3809     TCGTemp *ots, *itsl, *itsh;
3810     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3811 
3812     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3813     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3814     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3815 
3816     ots = arg_temp(op->args[0]);
3817     itsl = arg_temp(op->args[1]);
3818     itsh = arg_temp(op->args[2]);
3819 
3820     /* ENV should not be modified.  */
3821     tcg_debug_assert(!temp_readonly(ots));
3822 
3823     /* Allocate the output register now.  */
3824     if (ots->val_type != TEMP_VAL_REG) {
3825         TCGRegSet allocated_regs = s->reserved_regs;
3826         TCGRegSet dup_out_regs =
3827             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3828 
3829         /* Make sure to not spill the input registers. */
3830         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3831             tcg_regset_set_reg(allocated_regs, itsl->reg);
3832         }
3833         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3834             tcg_regset_set_reg(allocated_regs, itsh->reg);
3835         }
3836 
3837         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3838                                  op->output_pref[0], ots->indirect_base);
3839         ots->val_type = TEMP_VAL_REG;
3840         ots->mem_coherent = 0;
3841         s->reg_to_temp[ots->reg] = ots;
3842     }
3843 
3844     /* Promote dup2 of immediates to dupi_vec. */
3845     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3846         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3847         MemOp vece = MO_64;
3848 
3849         if (val == dup_const(MO_8, val)) {
3850             vece = MO_8;
3851         } else if (val == dup_const(MO_16, val)) {
3852             vece = MO_16;
3853         } else if (val == dup_const(MO_32, val)) {
3854             vece = MO_32;
3855         }
3856 
3857         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3858         goto done;
3859     }
3860 
3861     /* If the two inputs form one 64-bit value, try dupm_vec. */
3862     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3863         if (!itsl->mem_coherent) {
3864             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3865         }
3866         if (!itsh->mem_coherent) {
3867             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3868         }
3869 #ifdef HOST_WORDS_BIGENDIAN
3870         TCGTemp *its = itsh;
3871 #else
3872         TCGTemp *its = itsl;
3873 #endif
3874         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3875                              its->mem_base->reg, its->mem_offset)) {
3876             goto done;
3877         }
3878     }
3879 
3880     /* Fall back to generic expansion. */
3881     return false;
3882 
3883  done:
3884     if (IS_DEAD_ARG(1)) {
3885         temp_dead(s, itsl);
3886     }
3887     if (IS_DEAD_ARG(2)) {
3888         temp_dead(s, itsh);
3889     }
3890     if (NEED_SYNC_ARG(0)) {
3891         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3892     } else if (IS_DEAD_ARG(0)) {
3893         temp_dead(s, ots);
3894     }
3895     return true;
3896 }
3897 
3898 #ifdef TCG_TARGET_STACK_GROWSUP
3899 #define STACK_DIR(x) (-(x))
3900 #else
3901 #define STACK_DIR(x) (x)
3902 #endif
3903 
3904 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3905 {
3906     const int nb_oargs = TCGOP_CALLO(op);
3907     const int nb_iargs = TCGOP_CALLI(op);
3908     const TCGLifeData arg_life = op->life;
3909     const TCGHelperInfo *info;
3910     int flags, nb_regs, i;
3911     TCGReg reg;
3912     TCGArg arg;
3913     TCGTemp *ts;
3914     intptr_t stack_offset;
3915     size_t call_stack_size;
3916     tcg_insn_unit *func_addr;
3917     int allocate_args;
3918     TCGRegSet allocated_regs;
3919 
3920     func_addr = tcg_call_func(op);
3921     info = tcg_call_info(op);
3922     flags = info->flags;
3923 
3924     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3925     if (nb_regs > nb_iargs) {
3926         nb_regs = nb_iargs;
3927     }
3928 
3929     /* assign stack slots first */
3930     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3931     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3932         ~(TCG_TARGET_STACK_ALIGN - 1);
3933     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3934     if (allocate_args) {
3935         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3936            preallocate call stack */
3937         tcg_abort();
3938     }
3939 
3940     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3941     for (i = nb_regs; i < nb_iargs; i++) {
3942         arg = op->args[nb_oargs + i];
3943 #ifdef TCG_TARGET_STACK_GROWSUP
3944         stack_offset -= sizeof(tcg_target_long);
3945 #endif
3946         if (arg != TCG_CALL_DUMMY_ARG) {
3947             ts = arg_temp(arg);
3948             temp_load(s, ts, tcg_target_available_regs[ts->type],
3949                       s->reserved_regs, 0);
3950             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3951         }
3952 #ifndef TCG_TARGET_STACK_GROWSUP
3953         stack_offset += sizeof(tcg_target_long);
3954 #endif
3955     }
3956 
3957     /* assign input registers */
3958     allocated_regs = s->reserved_regs;
3959     for (i = 0; i < nb_regs; i++) {
3960         arg = op->args[nb_oargs + i];
3961         if (arg != TCG_CALL_DUMMY_ARG) {
3962             ts = arg_temp(arg);
3963             reg = tcg_target_call_iarg_regs[i];
3964 
3965             if (ts->val_type == TEMP_VAL_REG) {
3966                 if (ts->reg != reg) {
3967                     tcg_reg_free(s, reg, allocated_regs);
3968                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3969                         /*
3970                          * Cross register class move not supported.  Sync the
3971                          * temp back to its slot and load from there.
3972                          */
3973                         temp_sync(s, ts, allocated_regs, 0, 0);
3974                         tcg_out_ld(s, ts->type, reg,
3975                                    ts->mem_base->reg, ts->mem_offset);
3976                     }
3977                 }
3978             } else {
3979                 TCGRegSet arg_set = 0;
3980 
3981                 tcg_reg_free(s, reg, allocated_regs);
3982                 tcg_regset_set_reg(arg_set, reg);
3983                 temp_load(s, ts, arg_set, allocated_regs, 0);
3984             }
3985 
3986             tcg_regset_set_reg(allocated_regs, reg);
3987         }
3988     }
3989 
3990     /* mark dead temporaries and free the associated registers */
3991     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3992         if (IS_DEAD_ARG(i)) {
3993             temp_dead(s, arg_temp(op->args[i]));
3994         }
3995     }
3996 
3997     /* clobber call registers */
3998     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3999         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4000             tcg_reg_free(s, i, allocated_regs);
4001         }
4002     }
4003 
4004     /* Save globals if they might be written by the helper, sync them if
4005        they might be read. */
4006     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4007         /* Nothing to do */
4008     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4009         sync_globals(s, allocated_regs);
4010     } else {
4011         save_globals(s, allocated_regs);
4012     }
4013 
4014 #ifdef CONFIG_TCG_INTERPRETER
4015     {
4016         gpointer hash = (gpointer)(uintptr_t)info->typemask;
4017         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4018         assert(cif != NULL);
4019         tcg_out_call(s, func_addr, cif);
4020     }
4021 #else
4022     tcg_out_call(s, func_addr);
4023 #endif
4024 
4025     /* assign output registers and emit moves if needed */
4026     for(i = 0; i < nb_oargs; i++) {
4027         arg = op->args[i];
4028         ts = arg_temp(arg);
4029 
4030         /* ENV should not be modified.  */
4031         tcg_debug_assert(!temp_readonly(ts));
4032 
4033         reg = tcg_target_call_oarg_regs[i];
4034         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4035         if (ts->val_type == TEMP_VAL_REG) {
4036             s->reg_to_temp[ts->reg] = NULL;
4037         }
4038         ts->val_type = TEMP_VAL_REG;
4039         ts->reg = reg;
4040         ts->mem_coherent = 0;
4041         s->reg_to_temp[reg] = ts;
4042         if (NEED_SYNC_ARG(i)) {
4043             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4044         } else if (IS_DEAD_ARG(i)) {
4045             temp_dead(s, ts);
4046         }
4047     }
4048 }
4049 
4050 #ifdef CONFIG_PROFILER
4051 
4052 /* avoid copy/paste errors */
4053 #define PROF_ADD(to, from, field)                       \
4054     do {                                                \
4055         (to)->field += qatomic_read(&((from)->field));  \
4056     } while (0)
4057 
4058 #define PROF_MAX(to, from, field)                                       \
4059     do {                                                                \
4060         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4061         if (val__ > (to)->field) {                                      \
4062             (to)->field = val__;                                        \
4063         }                                                               \
4064     } while (0)
4065 
4066 /* Pass in a zero'ed @prof */
4067 static inline
4068 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4069 {
4070     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4071     unsigned int i;
4072 
4073     for (i = 0; i < n_ctxs; i++) {
4074         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4075         const TCGProfile *orig = &s->prof;
4076 
4077         if (counters) {
4078             PROF_ADD(prof, orig, cpu_exec_time);
4079             PROF_ADD(prof, orig, tb_count1);
4080             PROF_ADD(prof, orig, tb_count);
4081             PROF_ADD(prof, orig, op_count);
4082             PROF_MAX(prof, orig, op_count_max);
4083             PROF_ADD(prof, orig, temp_count);
4084             PROF_MAX(prof, orig, temp_count_max);
4085             PROF_ADD(prof, orig, del_op_count);
4086             PROF_ADD(prof, orig, code_in_len);
4087             PROF_ADD(prof, orig, code_out_len);
4088             PROF_ADD(prof, orig, search_out_len);
4089             PROF_ADD(prof, orig, interm_time);
4090             PROF_ADD(prof, orig, code_time);
4091             PROF_ADD(prof, orig, la_time);
4092             PROF_ADD(prof, orig, opt_time);
4093             PROF_ADD(prof, orig, restore_count);
4094             PROF_ADD(prof, orig, restore_time);
4095         }
4096         if (table) {
4097             int i;
4098 
4099             for (i = 0; i < NB_OPS; i++) {
4100                 PROF_ADD(prof, orig, table_op_count[i]);
4101             }
4102         }
4103     }
4104 }
4105 
4106 #undef PROF_ADD
4107 #undef PROF_MAX
4108 
4109 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4110 {
4111     tcg_profile_snapshot(prof, true, false);
4112 }
4113 
4114 static void tcg_profile_snapshot_table(TCGProfile *prof)
4115 {
4116     tcg_profile_snapshot(prof, false, true);
4117 }
4118 
4119 void tcg_dump_op_count(GString *buf)
4120 {
4121     TCGProfile prof = {};
4122     int i;
4123 
4124     tcg_profile_snapshot_table(&prof);
4125     for (i = 0; i < NB_OPS; i++) {
4126         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4127                                prof.table_op_count[i]);
4128     }
4129 }
4130 
4131 int64_t tcg_cpu_exec_time(void)
4132 {
4133     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4134     unsigned int i;
4135     int64_t ret = 0;
4136 
4137     for (i = 0; i < n_ctxs; i++) {
4138         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4139         const TCGProfile *prof = &s->prof;
4140 
4141         ret += qatomic_read(&prof->cpu_exec_time);
4142     }
4143     return ret;
4144 }
4145 #else
4146 void tcg_dump_op_count(GString *buf)
4147 {
4148     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4149 }
4150 
4151 int64_t tcg_cpu_exec_time(void)
4152 {
4153     error_report("%s: TCG profiler not compiled", __func__);
4154     exit(EXIT_FAILURE);
4155 }
4156 #endif
4157 
4158 
4159 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4160 {
4161 #ifdef CONFIG_PROFILER
4162     TCGProfile *prof = &s->prof;
4163 #endif
4164     int i, num_insns;
4165     TCGOp *op;
4166 
4167 #ifdef CONFIG_PROFILER
4168     {
4169         int n = 0;
4170 
4171         QTAILQ_FOREACH(op, &s->ops, link) {
4172             n++;
4173         }
4174         qatomic_set(&prof->op_count, prof->op_count + n);
4175         if (n > prof->op_count_max) {
4176             qatomic_set(&prof->op_count_max, n);
4177         }
4178 
4179         n = s->nb_temps;
4180         qatomic_set(&prof->temp_count, prof->temp_count + n);
4181         if (n > prof->temp_count_max) {
4182             qatomic_set(&prof->temp_count_max, n);
4183         }
4184     }
4185 #endif
4186 
4187 #ifdef DEBUG_DISAS
4188     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4189                  && qemu_log_in_addr_range(tb->pc))) {
4190         FILE *logfile = qemu_log_lock();
4191         qemu_log("OP:\n");
4192         tcg_dump_ops(s, false);
4193         qemu_log("\n");
4194         qemu_log_unlock(logfile);
4195     }
4196 #endif
4197 
4198 #ifdef CONFIG_DEBUG_TCG
4199     /* Ensure all labels referenced have been emitted.  */
4200     {
4201         TCGLabel *l;
4202         bool error = false;
4203 
4204         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4205             if (unlikely(!l->present) && l->refs) {
4206                 qemu_log_mask(CPU_LOG_TB_OP,
4207                               "$L%d referenced but not present.\n", l->id);
4208                 error = true;
4209             }
4210         }
4211         assert(!error);
4212     }
4213 #endif
4214 
4215 #ifdef CONFIG_PROFILER
4216     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4217 #endif
4218 
4219 #ifdef USE_TCG_OPTIMIZATIONS
4220     tcg_optimize(s);
4221 #endif
4222 
4223 #ifdef CONFIG_PROFILER
4224     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4225     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4226 #endif
4227 
4228     reachable_code_pass(s);
4229     liveness_pass_1(s);
4230 
4231     if (s->nb_indirects > 0) {
4232 #ifdef DEBUG_DISAS
4233         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4234                      && qemu_log_in_addr_range(tb->pc))) {
4235             FILE *logfile = qemu_log_lock();
4236             qemu_log("OP before indirect lowering:\n");
4237             tcg_dump_ops(s, false);
4238             qemu_log("\n");
4239             qemu_log_unlock(logfile);
4240         }
4241 #endif
4242         /* Replace indirect temps with direct temps.  */
4243         if (liveness_pass_2(s)) {
4244             /* If changes were made, re-run liveness.  */
4245             liveness_pass_1(s);
4246         }
4247     }
4248 
4249 #ifdef CONFIG_PROFILER
4250     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4251 #endif
4252 
4253 #ifdef DEBUG_DISAS
4254     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4255                  && qemu_log_in_addr_range(tb->pc))) {
4256         FILE *logfile = qemu_log_lock();
4257         qemu_log("OP after optimization and liveness analysis:\n");
4258         tcg_dump_ops(s, true);
4259         qemu_log("\n");
4260         qemu_log_unlock(logfile);
4261     }
4262 #endif
4263 
4264     tcg_reg_alloc_start(s);
4265 
4266     /*
4267      * Reset the buffer pointers when restarting after overflow.
4268      * TODO: Move this into translate-all.c with the rest of the
4269      * buffer management.  Having only this done here is confusing.
4270      */
4271     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4272     s->code_ptr = s->code_buf;
4273 
4274 #ifdef TCG_TARGET_NEED_LDST_LABELS
4275     QSIMPLEQ_INIT(&s->ldst_labels);
4276 #endif
4277 #ifdef TCG_TARGET_NEED_POOL_LABELS
4278     s->pool_labels = NULL;
4279 #endif
4280 
4281     num_insns = -1;
4282     QTAILQ_FOREACH(op, &s->ops, link) {
4283         TCGOpcode opc = op->opc;
4284 
4285 #ifdef CONFIG_PROFILER
4286         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4287 #endif
4288 
4289         switch (opc) {
4290         case INDEX_op_mov_i32:
4291         case INDEX_op_mov_i64:
4292         case INDEX_op_mov_vec:
4293             tcg_reg_alloc_mov(s, op);
4294             break;
4295         case INDEX_op_dup_vec:
4296             tcg_reg_alloc_dup(s, op);
4297             break;
4298         case INDEX_op_insn_start:
4299             if (num_insns >= 0) {
4300                 size_t off = tcg_current_code_size(s);
4301                 s->gen_insn_end_off[num_insns] = off;
4302                 /* Assert that we do not overflow our stored offset.  */
4303                 assert(s->gen_insn_end_off[num_insns] == off);
4304             }
4305             num_insns++;
4306             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4307                 target_ulong a;
4308 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4309                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4310 #else
4311                 a = op->args[i];
4312 #endif
4313                 s->gen_insn_data[num_insns][i] = a;
4314             }
4315             break;
4316         case INDEX_op_discard:
4317             temp_dead(s, arg_temp(op->args[0]));
4318             break;
4319         case INDEX_op_set_label:
4320             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4321             tcg_out_label(s, arg_label(op->args[0]));
4322             break;
4323         case INDEX_op_call:
4324             tcg_reg_alloc_call(s, op);
4325             break;
4326         case INDEX_op_dup2_vec:
4327             if (tcg_reg_alloc_dup2(s, op)) {
4328                 break;
4329             }
4330             /* fall through */
4331         default:
4332             /* Sanity check that we've not introduced any unhandled opcodes. */
4333             tcg_debug_assert(tcg_op_supported(opc));
4334             /* Note: in order to speed up the code, it would be much
4335                faster to have specialized register allocator functions for
4336                some common argument patterns */
4337             tcg_reg_alloc_op(s, op);
4338             break;
4339         }
4340 #ifdef CONFIG_DEBUG_TCG
4341         check_regs(s);
4342 #endif
4343         /* Test for (pending) buffer overflow.  The assumption is that any
4344            one operation beginning below the high water mark cannot overrun
4345            the buffer completely.  Thus we can test for overflow after
4346            generating code without having to check during generation.  */
4347         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4348             return -1;
4349         }
4350         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4351         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4352             return -2;
4353         }
4354     }
4355     tcg_debug_assert(num_insns >= 0);
4356     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4357 
4358     /* Generate TB finalization at the end of block */
4359 #ifdef TCG_TARGET_NEED_LDST_LABELS
4360     i = tcg_out_ldst_finalize(s);
4361     if (i < 0) {
4362         return i;
4363     }
4364 #endif
4365 #ifdef TCG_TARGET_NEED_POOL_LABELS
4366     i = tcg_out_pool_finalize(s);
4367     if (i < 0) {
4368         return i;
4369     }
4370 #endif
4371     if (!tcg_resolve_relocs(s)) {
4372         return -2;
4373     }
4374 
4375 #ifndef CONFIG_TCG_INTERPRETER
4376     /* flush instruction cache */
4377     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4378                         (uintptr_t)s->code_buf,
4379                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4380 #endif
4381 
4382     return tcg_current_code_size(s);
4383 }
4384 
4385 #ifdef CONFIG_PROFILER
4386 void tcg_dump_info(GString *buf)
4387 {
4388     TCGProfile prof = {};
4389     const TCGProfile *s;
4390     int64_t tb_count;
4391     int64_t tb_div_count;
4392     int64_t tot;
4393 
4394     tcg_profile_snapshot_counters(&prof);
4395     s = &prof;
4396     tb_count = s->tb_count;
4397     tb_div_count = tb_count ? tb_count : 1;
4398     tot = s->interm_time + s->code_time;
4399 
4400     g_string_append_printf(buf, "JIT cycles          %" PRId64
4401                            " (%0.3f s at 2.4 GHz)\n",
4402                            tot, tot / 2.4e9);
4403     g_string_append_printf(buf, "translated TBs      %" PRId64
4404                            " (aborted=%" PRId64 " %0.1f%%)\n",
4405                            tb_count, s->tb_count1 - tb_count,
4406                            (double)(s->tb_count1 - s->tb_count)
4407                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4408     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4409                            (double)s->op_count / tb_div_count, s->op_count_max);
4410     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4411                            (double)s->del_op_count / tb_div_count);
4412     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4413                            (double)s->temp_count / tb_div_count,
4414                            s->temp_count_max);
4415     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4416                            (double)s->code_out_len / tb_div_count);
4417     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4418                            (double)s->search_out_len / tb_div_count);
4419 
4420     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4421                            s->op_count ? (double)tot / s->op_count : 0);
4422     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4423                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4424     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4425                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4426     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4427                            s->search_out_len ?
4428                            (double)tot / s->search_out_len : 0);
4429     if (tot == 0) {
4430         tot = 1;
4431     }
4432     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4433                            (double)s->interm_time / tot * 100.0);
4434     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4435                            (double)s->code_time / tot * 100.0);
4436     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4437                            (double)s->opt_time / (s->code_time ?
4438                                                   s->code_time : 1)
4439                            * 100.0);
4440     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4441                            (double)s->la_time / (s->code_time ?
4442                                                  s->code_time : 1) * 100.0);
4443     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4444                            s->restore_count);
4445     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4446                            s->restore_count ?
4447                            (double)s->restore_time / s->restore_count : 0);
4448 }
4449 #else
4450 void tcg_dump_info(GString *buf)
4451 {
4452     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4453 }
4454 #endif
4455 
4456 #ifdef ELF_HOST_MACHINE
4457 /* In order to use this feature, the backend needs to do three things:
4458 
4459    (1) Define ELF_HOST_MACHINE to indicate both what value to
4460        put into the ELF image and to indicate support for the feature.
4461 
4462    (2) Define tcg_register_jit.  This should create a buffer containing
4463        the contents of a .debug_frame section that describes the post-
4464        prologue unwind info for the tcg machine.
4465 
4466    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4467 */
4468 
4469 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4470 typedef enum {
4471     JIT_NOACTION = 0,
4472     JIT_REGISTER_FN,
4473     JIT_UNREGISTER_FN
4474 } jit_actions_t;
4475 
4476 struct jit_code_entry {
4477     struct jit_code_entry *next_entry;
4478     struct jit_code_entry *prev_entry;
4479     const void *symfile_addr;
4480     uint64_t symfile_size;
4481 };
4482 
4483 struct jit_descriptor {
4484     uint32_t version;
4485     uint32_t action_flag;
4486     struct jit_code_entry *relevant_entry;
4487     struct jit_code_entry *first_entry;
4488 };
4489 
4490 void __jit_debug_register_code(void) __attribute__((noinline));
4491 void __jit_debug_register_code(void)
4492 {
4493     asm("");
4494 }
4495 
4496 /* Must statically initialize the version, because GDB may check
4497    the version before we can set it.  */
4498 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4499 
4500 /* End GDB interface.  */
4501 
4502 static int find_string(const char *strtab, const char *str)
4503 {
4504     const char *p = strtab + 1;
4505 
4506     while (1) {
4507         if (strcmp(p, str) == 0) {
4508             return p - strtab;
4509         }
4510         p += strlen(p) + 1;
4511     }
4512 }
4513 
4514 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4515                                  const void *debug_frame,
4516                                  size_t debug_frame_size)
4517 {
4518     struct __attribute__((packed)) DebugInfo {
4519         uint32_t  len;
4520         uint16_t  version;
4521         uint32_t  abbrev;
4522         uint8_t   ptr_size;
4523         uint8_t   cu_die;
4524         uint16_t  cu_lang;
4525         uintptr_t cu_low_pc;
4526         uintptr_t cu_high_pc;
4527         uint8_t   fn_die;
4528         char      fn_name[16];
4529         uintptr_t fn_low_pc;
4530         uintptr_t fn_high_pc;
4531         uint8_t   cu_eoc;
4532     };
4533 
4534     struct ElfImage {
4535         ElfW(Ehdr) ehdr;
4536         ElfW(Phdr) phdr;
4537         ElfW(Shdr) shdr[7];
4538         ElfW(Sym)  sym[2];
4539         struct DebugInfo di;
4540         uint8_t    da[24];
4541         char       str[80];
4542     };
4543 
4544     struct ElfImage *img;
4545 
4546     static const struct ElfImage img_template = {
4547         .ehdr = {
4548             .e_ident[EI_MAG0] = ELFMAG0,
4549             .e_ident[EI_MAG1] = ELFMAG1,
4550             .e_ident[EI_MAG2] = ELFMAG2,
4551             .e_ident[EI_MAG3] = ELFMAG3,
4552             .e_ident[EI_CLASS] = ELF_CLASS,
4553             .e_ident[EI_DATA] = ELF_DATA,
4554             .e_ident[EI_VERSION] = EV_CURRENT,
4555             .e_type = ET_EXEC,
4556             .e_machine = ELF_HOST_MACHINE,
4557             .e_version = EV_CURRENT,
4558             .e_phoff = offsetof(struct ElfImage, phdr),
4559             .e_shoff = offsetof(struct ElfImage, shdr),
4560             .e_ehsize = sizeof(ElfW(Shdr)),
4561             .e_phentsize = sizeof(ElfW(Phdr)),
4562             .e_phnum = 1,
4563             .e_shentsize = sizeof(ElfW(Shdr)),
4564             .e_shnum = ARRAY_SIZE(img->shdr),
4565             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4566 #ifdef ELF_HOST_FLAGS
4567             .e_flags = ELF_HOST_FLAGS,
4568 #endif
4569 #ifdef ELF_OSABI
4570             .e_ident[EI_OSABI] = ELF_OSABI,
4571 #endif
4572         },
4573         .phdr = {
4574             .p_type = PT_LOAD,
4575             .p_flags = PF_X,
4576         },
4577         .shdr = {
4578             [0] = { .sh_type = SHT_NULL },
4579             /* Trick: The contents of code_gen_buffer are not present in
4580                this fake ELF file; that got allocated elsewhere.  Therefore
4581                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4582                will not look for contents.  We can record any address.  */
4583             [1] = { /* .text */
4584                 .sh_type = SHT_NOBITS,
4585                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4586             },
4587             [2] = { /* .debug_info */
4588                 .sh_type = SHT_PROGBITS,
4589                 .sh_offset = offsetof(struct ElfImage, di),
4590                 .sh_size = sizeof(struct DebugInfo),
4591             },
4592             [3] = { /* .debug_abbrev */
4593                 .sh_type = SHT_PROGBITS,
4594                 .sh_offset = offsetof(struct ElfImage, da),
4595                 .sh_size = sizeof(img->da),
4596             },
4597             [4] = { /* .debug_frame */
4598                 .sh_type = SHT_PROGBITS,
4599                 .sh_offset = sizeof(struct ElfImage),
4600             },
4601             [5] = { /* .symtab */
4602                 .sh_type = SHT_SYMTAB,
4603                 .sh_offset = offsetof(struct ElfImage, sym),
4604                 .sh_size = sizeof(img->sym),
4605                 .sh_info = 1,
4606                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4607                 .sh_entsize = sizeof(ElfW(Sym)),
4608             },
4609             [6] = { /* .strtab */
4610                 .sh_type = SHT_STRTAB,
4611                 .sh_offset = offsetof(struct ElfImage, str),
4612                 .sh_size = sizeof(img->str),
4613             }
4614         },
4615         .sym = {
4616             [1] = { /* code_gen_buffer */
4617                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4618                 .st_shndx = 1,
4619             }
4620         },
4621         .di = {
4622             .len = sizeof(struct DebugInfo) - 4,
4623             .version = 2,
4624             .ptr_size = sizeof(void *),
4625             .cu_die = 1,
4626             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4627             .fn_die = 2,
4628             .fn_name = "code_gen_buffer"
4629         },
4630         .da = {
4631             1,          /* abbrev number (the cu) */
4632             0x11, 1,    /* DW_TAG_compile_unit, has children */
4633             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4634             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4635             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4636             0, 0,       /* end of abbrev */
4637             2,          /* abbrev number (the fn) */
4638             0x2e, 0,    /* DW_TAG_subprogram, no children */
4639             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4640             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4641             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4642             0, 0,       /* end of abbrev */
4643             0           /* no more abbrev */
4644         },
4645         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4646                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4647     };
4648 
4649     /* We only need a single jit entry; statically allocate it.  */
4650     static struct jit_code_entry one_entry;
4651 
4652     uintptr_t buf = (uintptr_t)buf_ptr;
4653     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4654     DebugFrameHeader *dfh;
4655 
4656     img = g_malloc(img_size);
4657     *img = img_template;
4658 
4659     img->phdr.p_vaddr = buf;
4660     img->phdr.p_paddr = buf;
4661     img->phdr.p_memsz = buf_size;
4662 
4663     img->shdr[1].sh_name = find_string(img->str, ".text");
4664     img->shdr[1].sh_addr = buf;
4665     img->shdr[1].sh_size = buf_size;
4666 
4667     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4668     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4669 
4670     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4671     img->shdr[4].sh_size = debug_frame_size;
4672 
4673     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4674     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4675 
4676     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4677     img->sym[1].st_value = buf;
4678     img->sym[1].st_size = buf_size;
4679 
4680     img->di.cu_low_pc = buf;
4681     img->di.cu_high_pc = buf + buf_size;
4682     img->di.fn_low_pc = buf;
4683     img->di.fn_high_pc = buf + buf_size;
4684 
4685     dfh = (DebugFrameHeader *)(img + 1);
4686     memcpy(dfh, debug_frame, debug_frame_size);
4687     dfh->fde.func_start = buf;
4688     dfh->fde.func_len = buf_size;
4689 
4690 #ifdef DEBUG_JIT
4691     /* Enable this block to be able to debug the ELF image file creation.
4692        One can use readelf, objdump, or other inspection utilities.  */
4693     {
4694         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4695         if (f) {
4696             if (fwrite(img, img_size, 1, f) != img_size) {
4697                 /* Avoid stupid unused return value warning for fwrite.  */
4698             }
4699             fclose(f);
4700         }
4701     }
4702 #endif
4703 
4704     one_entry.symfile_addr = img;
4705     one_entry.symfile_size = img_size;
4706 
4707     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4708     __jit_debug_descriptor.relevant_entry = &one_entry;
4709     __jit_debug_descriptor.first_entry = &one_entry;
4710     __jit_debug_register_code();
4711 }
4712 #else
4713 /* No support for the feature.  Provide the entry point expected by exec.c,
4714    and implement the internal function we declared earlier.  */
4715 
4716 static void tcg_register_jit_int(const void *buf, size_t size,
4717                                  const void *debug_frame,
4718                                  size_t debug_frame_size)
4719 {
4720 }
4721 
4722 void tcg_register_jit(const void *buf, size_t buf_size)
4723 {
4724 }
4725 #endif /* ELF_HOST_MACHINE */
4726 
4727 #if !TCG_TARGET_MAYBE_vec
4728 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4729 {
4730     g_assert_not_reached();
4731 }
4732 #endif
4733