xref: /qemu/tcg/tcg.c (revision b4ad82aa)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #ifdef HOST_WORDS_BIGENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154                          ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162 
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165 
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172 
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176 
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179 
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183     *s->code_ptr++ = v;
184 }
185 
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187                                                       uint8_t v)
188 {
189     *p = v;
190 }
191 #endif
192 
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197         *s->code_ptr++ = v;
198     } else {
199         tcg_insn_unit *p = s->code_ptr;
200         memcpy(p, &v, sizeof(v));
201         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202     }
203 }
204 
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206                                                        uint16_t v)
207 {
208     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209         *p = v;
210     } else {
211         memcpy(p, &v, sizeof(v));
212     }
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229                                                        uint32_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252                                                        uint64_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 /* label relocation processing */
263 
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265                           TCGLabel *l, intptr_t addend)
266 {
267     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268 
269     r->type = type;
270     r->ptr = code_ptr;
271     r->addend = addend;
272     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274 
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277     tcg_debug_assert(!l->has_value);
278     l->has_value = 1;
279     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281 
282 TCGLabel *gen_new_label(void)
283 {
284     TCGContext *s = tcg_ctx;
285     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286 
287     memset(l, 0, sizeof(TCGLabel));
288     l->id = s->nb_labels++;
289     QSIMPLEQ_INIT(&l->relocs);
290 
291     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292 
293     return l;
294 }
295 
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298     TCGLabel *l;
299 
300     QSIMPLEQ_FOREACH(l, &s->labels, next) {
301         TCGRelocation *r;
302         uintptr_t value = l->u.value;
303 
304         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306                 return false;
307             }
308         }
309     }
310     return true;
311 }
312 
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315     /*
316      * We will check for overflow at the end of the opcode loop in
317      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318      */
319     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321 
322 /* Signal overflow, starting over with fewer guest insns. */
323 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
324 {
325     siglongjmp(s->jmp_trans, -2);
326 }
327 
328 #define C_PFX1(P, A)                    P##A
329 #define C_PFX2(P, A, B)                 P##A##_##B
330 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
331 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
332 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
333 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
334 
335 /* Define an enumeration for the various combinations. */
336 
337 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
338 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
339 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
340 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
341 
342 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
343 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
344 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
345 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
346 
347 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
348 
349 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
350 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
351 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
352 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
353 
354 typedef enum {
355 #include "tcg-target-con-set.h"
356 } TCGConstraintSetIndex;
357 
358 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
359 
360 #undef C_O0_I1
361 #undef C_O0_I2
362 #undef C_O0_I3
363 #undef C_O0_I4
364 #undef C_O1_I1
365 #undef C_O1_I2
366 #undef C_O1_I3
367 #undef C_O1_I4
368 #undef C_N1_I2
369 #undef C_O2_I1
370 #undef C_O2_I2
371 #undef C_O2_I3
372 #undef C_O2_I4
373 
374 /* Put all of the constraint sets into an array, indexed by the enum. */
375 
376 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
377 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
378 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
379 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
380 
381 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
382 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
383 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
384 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
385 
386 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
387 
388 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
389 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
390 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
391 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
392 
393 static const TCGTargetOpDef constraint_sets[] = {
394 #include "tcg-target-con-set.h"
395 };
396 
397 
398 #undef C_O0_I1
399 #undef C_O0_I2
400 #undef C_O0_I3
401 #undef C_O0_I4
402 #undef C_O1_I1
403 #undef C_O1_I2
404 #undef C_O1_I3
405 #undef C_O1_I4
406 #undef C_N1_I2
407 #undef C_O2_I1
408 #undef C_O2_I2
409 #undef C_O2_I3
410 #undef C_O2_I4
411 
412 /* Expand the enumerator to be returned from tcg_target_op_def(). */
413 
414 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
415 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
416 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
417 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
418 
419 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
420 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
421 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
422 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
423 
424 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
425 
426 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
427 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
428 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
429 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
430 
431 #include "tcg-target.c.inc"
432 
433 static void alloc_tcg_plugin_context(TCGContext *s)
434 {
435 #ifdef CONFIG_PLUGIN
436     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
437     s->plugin_tb->insns =
438         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
439 #endif
440 }
441 
442 /*
443  * All TCG threads except the parent (i.e. the one that called tcg_context_init
444  * and registered the target's TCG globals) must register with this function
445  * before initiating translation.
446  *
447  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
448  * of tcg_region_init() for the reasoning behind this.
449  *
450  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
451  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
452  * is not used anymore for translation once this function is called.
453  *
454  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
455  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
456  */
457 #ifdef CONFIG_USER_ONLY
458 void tcg_register_thread(void)
459 {
460     tcg_ctx = &tcg_init_ctx;
461 }
462 #else
463 void tcg_register_thread(void)
464 {
465     TCGContext *s = g_malloc(sizeof(*s));
466     unsigned int i, n;
467 
468     *s = tcg_init_ctx;
469 
470     /* Relink mem_base.  */
471     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
472         if (tcg_init_ctx.temps[i].mem_base) {
473             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
474             tcg_debug_assert(b >= 0 && b < n);
475             s->temps[i].mem_base = &s->temps[b];
476         }
477     }
478 
479     /* Claim an entry in tcg_ctxs */
480     n = qatomic_fetch_inc(&tcg_cur_ctxs);
481     g_assert(n < tcg_max_ctxs);
482     qatomic_set(&tcg_ctxs[n], s);
483 
484     if (n > 0) {
485         alloc_tcg_plugin_context(s);
486         tcg_region_initial_alloc(s);
487     }
488 
489     tcg_ctx = s;
490 }
491 #endif /* !CONFIG_USER_ONLY */
492 
493 /* pool based memory allocation */
494 void *tcg_malloc_internal(TCGContext *s, int size)
495 {
496     TCGPool *p;
497     int pool_size;
498 
499     if (size > TCG_POOL_CHUNK_SIZE) {
500         /* big malloc: insert a new pool (XXX: could optimize) */
501         p = g_malloc(sizeof(TCGPool) + size);
502         p->size = size;
503         p->next = s->pool_first_large;
504         s->pool_first_large = p;
505         return p->data;
506     } else {
507         p = s->pool_current;
508         if (!p) {
509             p = s->pool_first;
510             if (!p)
511                 goto new_pool;
512         } else {
513             if (!p->next) {
514             new_pool:
515                 pool_size = TCG_POOL_CHUNK_SIZE;
516                 p = g_malloc(sizeof(TCGPool) + pool_size);
517                 p->size = pool_size;
518                 p->next = NULL;
519                 if (s->pool_current)
520                     s->pool_current->next = p;
521                 else
522                     s->pool_first = p;
523             } else {
524                 p = p->next;
525             }
526         }
527     }
528     s->pool_current = p;
529     s->pool_cur = p->data + size;
530     s->pool_end = p->data + p->size;
531     return p->data;
532 }
533 
534 void tcg_pool_reset(TCGContext *s)
535 {
536     TCGPool *p, *t;
537     for (p = s->pool_first_large; p; p = t) {
538         t = p->next;
539         g_free(p);
540     }
541     s->pool_first_large = NULL;
542     s->pool_cur = s->pool_end = NULL;
543     s->pool_current = NULL;
544 }
545 
546 #include "exec/helper-proto.h"
547 
548 static const TCGHelperInfo all_helpers[] = {
549 #include "exec/helper-tcg.h"
550 };
551 static GHashTable *helper_table;
552 
553 #ifdef CONFIG_TCG_INTERPRETER
554 static GHashTable *ffi_table;
555 
556 static ffi_type * const typecode_to_ffi[8] = {
557     [dh_typecode_void] = &ffi_type_void,
558     [dh_typecode_i32]  = &ffi_type_uint32,
559     [dh_typecode_s32]  = &ffi_type_sint32,
560     [dh_typecode_i64]  = &ffi_type_uint64,
561     [dh_typecode_s64]  = &ffi_type_sint64,
562     [dh_typecode_ptr]  = &ffi_type_pointer,
563 };
564 #endif
565 
566 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
567 static void process_op_defs(TCGContext *s);
568 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
569                                             TCGReg reg, const char *name);
570 
571 static void tcg_context_init(unsigned max_cpus)
572 {
573     TCGContext *s = &tcg_init_ctx;
574     int op, total_args, n, i;
575     TCGOpDef *def;
576     TCGArgConstraint *args_ct;
577     TCGTemp *ts;
578 
579     memset(s, 0, sizeof(*s));
580     s->nb_globals = 0;
581 
582     /* Count total number of arguments and allocate the corresponding
583        space */
584     total_args = 0;
585     for(op = 0; op < NB_OPS; op++) {
586         def = &tcg_op_defs[op];
587         n = def->nb_iargs + def->nb_oargs;
588         total_args += n;
589     }
590 
591     args_ct = g_new0(TCGArgConstraint, total_args);
592 
593     for(op = 0; op < NB_OPS; op++) {
594         def = &tcg_op_defs[op];
595         def->args_ct = args_ct;
596         n = def->nb_iargs + def->nb_oargs;
597         args_ct += n;
598     }
599 
600     /* Register helpers.  */
601     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
602     helper_table = g_hash_table_new(NULL, NULL);
603 
604     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
605         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
606                             (gpointer)&all_helpers[i]);
607     }
608 
609 #ifdef CONFIG_TCG_INTERPRETER
610     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
611     ffi_table = g_hash_table_new(NULL, NULL);
612     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613         struct {
614             ffi_cif cif;
615             ffi_type *args[];
616         } *ca;
617         uint32_t typemask = all_helpers[i].typemask;
618         gpointer hash = (gpointer)(uintptr_t)typemask;
619         ffi_status status;
620         int nargs;
621 
622         if (g_hash_table_lookup(ffi_table, hash)) {
623             continue;
624         }
625 
626         /* Ignoring the return type, find the last non-zero field. */
627         nargs = 32 - clz32(typemask >> 3);
628         nargs = DIV_ROUND_UP(nargs, 3);
629 
630         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
631         ca->cif.rtype = typecode_to_ffi[typemask & 7];
632         ca->cif.nargs = nargs;
633 
634         if (nargs != 0) {
635             ca->cif.arg_types = ca->args;
636             for (i = 0; i < nargs; ++i) {
637                 int typecode = extract32(typemask, (i + 1) * 3, 3);
638                 ca->args[i] = typecode_to_ffi[typecode];
639             }
640         }
641 
642         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
643                               ca->cif.rtype, ca->cif.arg_types);
644         assert(status == FFI_OK);
645 
646         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
647     }
648 #endif
649 
650     tcg_target_init(s);
651     process_op_defs(s);
652 
653     /* Reverse the order of the saved registers, assuming they're all at
654        the start of tcg_target_reg_alloc_order.  */
655     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
656         int r = tcg_target_reg_alloc_order[n];
657         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
658             break;
659         }
660     }
661     for (i = 0; i < n; ++i) {
662         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
663     }
664     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
665         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
666     }
667 
668     alloc_tcg_plugin_context(s);
669 
670     tcg_ctx = s;
671     /*
672      * In user-mode we simply share the init context among threads, since we
673      * use a single region. See the documentation tcg_region_init() for the
674      * reasoning behind this.
675      * In softmmu we will have at most max_cpus TCG threads.
676      */
677 #ifdef CONFIG_USER_ONLY
678     tcg_ctxs = &tcg_ctx;
679     tcg_cur_ctxs = 1;
680     tcg_max_ctxs = 1;
681 #else
682     tcg_max_ctxs = max_cpus;
683     tcg_ctxs = g_new0(TCGContext *, max_cpus);
684 #endif
685 
686     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
687     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
688     cpu_env = temp_tcgv_ptr(ts);
689 }
690 
691 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
692 {
693     tcg_context_init(max_cpus);
694     tcg_region_init(tb_size, splitwx, max_cpus);
695 }
696 
697 /*
698  * Allocate TBs right before their corresponding translated code, making
699  * sure that TBs and code are on different cache lines.
700  */
701 TranslationBlock *tcg_tb_alloc(TCGContext *s)
702 {
703     uintptr_t align = qemu_icache_linesize;
704     TranslationBlock *tb;
705     void *next;
706 
707  retry:
708     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
709     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
710 
711     if (unlikely(next > s->code_gen_highwater)) {
712         if (tcg_region_alloc(s)) {
713             return NULL;
714         }
715         goto retry;
716     }
717     qatomic_set(&s->code_gen_ptr, next);
718     s->data_gen_ptr = NULL;
719     return tb;
720 }
721 
722 void tcg_prologue_init(TCGContext *s)
723 {
724     size_t prologue_size;
725 
726     s->code_ptr = s->code_gen_ptr;
727     s->code_buf = s->code_gen_ptr;
728     s->data_gen_ptr = NULL;
729 
730 #ifndef CONFIG_TCG_INTERPRETER
731     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
732 #endif
733 
734 #ifdef TCG_TARGET_NEED_POOL_LABELS
735     s->pool_labels = NULL;
736 #endif
737 
738     qemu_thread_jit_write();
739     /* Generate the prologue.  */
740     tcg_target_qemu_prologue(s);
741 
742 #ifdef TCG_TARGET_NEED_POOL_LABELS
743     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
744     {
745         int result = tcg_out_pool_finalize(s);
746         tcg_debug_assert(result == 0);
747     }
748 #endif
749 
750     prologue_size = tcg_current_code_size(s);
751 
752 #ifndef CONFIG_TCG_INTERPRETER
753     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
754                         (uintptr_t)s->code_buf, prologue_size);
755 #endif
756 
757 #ifdef DEBUG_DISAS
758     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
759         FILE *logfile = qemu_log_lock();
760         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
761         if (s->data_gen_ptr) {
762             size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
763             size_t data_size = prologue_size - code_size;
764             size_t i;
765 
766             log_disas(s->code_gen_ptr, code_size);
767 
768             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
769                 if (sizeof(tcg_target_ulong) == 8) {
770                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
771                              (uintptr_t)s->data_gen_ptr + i,
772                              *(uint64_t *)(s->data_gen_ptr + i));
773                 } else {
774                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
775                              (uintptr_t)s->data_gen_ptr + i,
776                              *(uint32_t *)(s->data_gen_ptr + i));
777                 }
778             }
779         } else {
780             log_disas(s->code_gen_ptr, prologue_size);
781         }
782         qemu_log("\n");
783         qemu_log_flush();
784         qemu_log_unlock(logfile);
785     }
786 #endif
787 
788 #ifndef CONFIG_TCG_INTERPRETER
789     /*
790      * Assert that goto_ptr is implemented completely, setting an epilogue.
791      * For tci, we use NULL as the signal to return from the interpreter,
792      * so skip this check.
793      */
794     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
795 #endif
796 
797     tcg_region_prologue_set(s);
798 }
799 
800 void tcg_func_start(TCGContext *s)
801 {
802     tcg_pool_reset(s);
803     s->nb_temps = s->nb_globals;
804 
805     /* No temps have been previously allocated for size or locality.  */
806     memset(s->free_temps, 0, sizeof(s->free_temps));
807 
808     /* No constant temps have been previously allocated. */
809     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
810         if (s->const_table[i]) {
811             g_hash_table_remove_all(s->const_table[i]);
812         }
813     }
814 
815     s->nb_ops = 0;
816     s->nb_labels = 0;
817     s->current_frame_offset = s->frame_start;
818 
819 #ifdef CONFIG_DEBUG_TCG
820     s->goto_tb_issue_mask = 0;
821 #endif
822 
823     QTAILQ_INIT(&s->ops);
824     QTAILQ_INIT(&s->free_ops);
825     QSIMPLEQ_INIT(&s->labels);
826 }
827 
828 static TCGTemp *tcg_temp_alloc(TCGContext *s)
829 {
830     int n = s->nb_temps++;
831 
832     if (n >= TCG_MAX_TEMPS) {
833         tcg_raise_tb_overflow(s);
834     }
835     return memset(&s->temps[n], 0, sizeof(TCGTemp));
836 }
837 
838 static TCGTemp *tcg_global_alloc(TCGContext *s)
839 {
840     TCGTemp *ts;
841 
842     tcg_debug_assert(s->nb_globals == s->nb_temps);
843     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
844     s->nb_globals++;
845     ts = tcg_temp_alloc(s);
846     ts->kind = TEMP_GLOBAL;
847 
848     return ts;
849 }
850 
851 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
852                                             TCGReg reg, const char *name)
853 {
854     TCGTemp *ts;
855 
856     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
857         tcg_abort();
858     }
859 
860     ts = tcg_global_alloc(s);
861     ts->base_type = type;
862     ts->type = type;
863     ts->kind = TEMP_FIXED;
864     ts->reg = reg;
865     ts->name = name;
866     tcg_regset_set_reg(s->reserved_regs, reg);
867 
868     return ts;
869 }
870 
871 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
872 {
873     s->frame_start = start;
874     s->frame_end = start + size;
875     s->frame_temp
876         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
877 }
878 
879 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
880                                      intptr_t offset, const char *name)
881 {
882     TCGContext *s = tcg_ctx;
883     TCGTemp *base_ts = tcgv_ptr_temp(base);
884     TCGTemp *ts = tcg_global_alloc(s);
885     int indirect_reg = 0, bigendian = 0;
886 #ifdef HOST_WORDS_BIGENDIAN
887     bigendian = 1;
888 #endif
889 
890     switch (base_ts->kind) {
891     case TEMP_FIXED:
892         break;
893     case TEMP_GLOBAL:
894         /* We do not support double-indirect registers.  */
895         tcg_debug_assert(!base_ts->indirect_reg);
896         base_ts->indirect_base = 1;
897         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
898                             ? 2 : 1);
899         indirect_reg = 1;
900         break;
901     default:
902         g_assert_not_reached();
903     }
904 
905     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
906         TCGTemp *ts2 = tcg_global_alloc(s);
907         char buf[64];
908 
909         ts->base_type = TCG_TYPE_I64;
910         ts->type = TCG_TYPE_I32;
911         ts->indirect_reg = indirect_reg;
912         ts->mem_allocated = 1;
913         ts->mem_base = base_ts;
914         ts->mem_offset = offset + bigendian * 4;
915         pstrcpy(buf, sizeof(buf), name);
916         pstrcat(buf, sizeof(buf), "_0");
917         ts->name = strdup(buf);
918 
919         tcg_debug_assert(ts2 == ts + 1);
920         ts2->base_type = TCG_TYPE_I64;
921         ts2->type = TCG_TYPE_I32;
922         ts2->indirect_reg = indirect_reg;
923         ts2->mem_allocated = 1;
924         ts2->mem_base = base_ts;
925         ts2->mem_offset = offset + (1 - bigendian) * 4;
926         pstrcpy(buf, sizeof(buf), name);
927         pstrcat(buf, sizeof(buf), "_1");
928         ts2->name = strdup(buf);
929     } else {
930         ts->base_type = type;
931         ts->type = type;
932         ts->indirect_reg = indirect_reg;
933         ts->mem_allocated = 1;
934         ts->mem_base = base_ts;
935         ts->mem_offset = offset;
936         ts->name = name;
937     }
938     return ts;
939 }
940 
941 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
942 {
943     TCGContext *s = tcg_ctx;
944     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
945     TCGTemp *ts;
946     int idx, k;
947 
948     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
949     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
950     if (idx < TCG_MAX_TEMPS) {
951         /* There is already an available temp with the right type.  */
952         clear_bit(idx, s->free_temps[k].l);
953 
954         ts = &s->temps[idx];
955         ts->temp_allocated = 1;
956         tcg_debug_assert(ts->base_type == type);
957         tcg_debug_assert(ts->kind == kind);
958     } else {
959         ts = tcg_temp_alloc(s);
960         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
961             TCGTemp *ts2 = tcg_temp_alloc(s);
962 
963             ts->base_type = type;
964             ts->type = TCG_TYPE_I32;
965             ts->temp_allocated = 1;
966             ts->kind = kind;
967 
968             tcg_debug_assert(ts2 == ts + 1);
969             ts2->base_type = TCG_TYPE_I64;
970             ts2->type = TCG_TYPE_I32;
971             ts2->temp_allocated = 1;
972             ts2->kind = kind;
973         } else {
974             ts->base_type = type;
975             ts->type = type;
976             ts->temp_allocated = 1;
977             ts->kind = kind;
978         }
979     }
980 
981 #if defined(CONFIG_DEBUG_TCG)
982     s->temps_in_use++;
983 #endif
984     return ts;
985 }
986 
987 TCGv_vec tcg_temp_new_vec(TCGType type)
988 {
989     TCGTemp *t;
990 
991 #ifdef CONFIG_DEBUG_TCG
992     switch (type) {
993     case TCG_TYPE_V64:
994         assert(TCG_TARGET_HAS_v64);
995         break;
996     case TCG_TYPE_V128:
997         assert(TCG_TARGET_HAS_v128);
998         break;
999     case TCG_TYPE_V256:
1000         assert(TCG_TARGET_HAS_v256);
1001         break;
1002     default:
1003         g_assert_not_reached();
1004     }
1005 #endif
1006 
1007     t = tcg_temp_new_internal(type, 0);
1008     return temp_tcgv_vec(t);
1009 }
1010 
1011 /* Create a new temp of the same type as an existing temp.  */
1012 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1013 {
1014     TCGTemp *t = tcgv_vec_temp(match);
1015 
1016     tcg_debug_assert(t->temp_allocated != 0);
1017 
1018     t = tcg_temp_new_internal(t->base_type, 0);
1019     return temp_tcgv_vec(t);
1020 }
1021 
1022 void tcg_temp_free_internal(TCGTemp *ts)
1023 {
1024     TCGContext *s = tcg_ctx;
1025     int k, idx;
1026 
1027     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1028     if (ts->kind == TEMP_CONST) {
1029         return;
1030     }
1031 
1032 #if defined(CONFIG_DEBUG_TCG)
1033     s->temps_in_use--;
1034     if (s->temps_in_use < 0) {
1035         fprintf(stderr, "More temporaries freed than allocated!\n");
1036     }
1037 #endif
1038 
1039     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1040     tcg_debug_assert(ts->temp_allocated != 0);
1041     ts->temp_allocated = 0;
1042 
1043     idx = temp_idx(ts);
1044     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1045     set_bit(idx, s->free_temps[k].l);
1046 }
1047 
1048 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1049 {
1050     TCGContext *s = tcg_ctx;
1051     GHashTable *h = s->const_table[type];
1052     TCGTemp *ts;
1053 
1054     if (h == NULL) {
1055         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1056         s->const_table[type] = h;
1057     }
1058 
1059     ts = g_hash_table_lookup(h, &val);
1060     if (ts == NULL) {
1061         ts = tcg_temp_alloc(s);
1062 
1063         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1064             TCGTemp *ts2 = tcg_temp_alloc(s);
1065 
1066             ts->base_type = TCG_TYPE_I64;
1067             ts->type = TCG_TYPE_I32;
1068             ts->kind = TEMP_CONST;
1069             ts->temp_allocated = 1;
1070             /*
1071              * Retain the full value of the 64-bit constant in the low
1072              * part, so that the hash table works.  Actual uses will
1073              * truncate the value to the low part.
1074              */
1075             ts->val = val;
1076 
1077             tcg_debug_assert(ts2 == ts + 1);
1078             ts2->base_type = TCG_TYPE_I64;
1079             ts2->type = TCG_TYPE_I32;
1080             ts2->kind = TEMP_CONST;
1081             ts2->temp_allocated = 1;
1082             ts2->val = val >> 32;
1083         } else {
1084             ts->base_type = type;
1085             ts->type = type;
1086             ts->kind = TEMP_CONST;
1087             ts->temp_allocated = 1;
1088             ts->val = val;
1089         }
1090         g_hash_table_insert(h, &ts->val, ts);
1091     }
1092 
1093     return ts;
1094 }
1095 
1096 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1097 {
1098     val = dup_const(vece, val);
1099     return temp_tcgv_vec(tcg_constant_internal(type, val));
1100 }
1101 
1102 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1103 {
1104     TCGTemp *t = tcgv_vec_temp(match);
1105 
1106     tcg_debug_assert(t->temp_allocated != 0);
1107     return tcg_constant_vec(t->base_type, vece, val);
1108 }
1109 
1110 TCGv_i32 tcg_const_i32(int32_t val)
1111 {
1112     TCGv_i32 t0;
1113     t0 = tcg_temp_new_i32();
1114     tcg_gen_movi_i32(t0, val);
1115     return t0;
1116 }
1117 
1118 TCGv_i64 tcg_const_i64(int64_t val)
1119 {
1120     TCGv_i64 t0;
1121     t0 = tcg_temp_new_i64();
1122     tcg_gen_movi_i64(t0, val);
1123     return t0;
1124 }
1125 
1126 TCGv_i32 tcg_const_local_i32(int32_t val)
1127 {
1128     TCGv_i32 t0;
1129     t0 = tcg_temp_local_new_i32();
1130     tcg_gen_movi_i32(t0, val);
1131     return t0;
1132 }
1133 
1134 TCGv_i64 tcg_const_local_i64(int64_t val)
1135 {
1136     TCGv_i64 t0;
1137     t0 = tcg_temp_local_new_i64();
1138     tcg_gen_movi_i64(t0, val);
1139     return t0;
1140 }
1141 
1142 #if defined(CONFIG_DEBUG_TCG)
1143 void tcg_clear_temp_count(void)
1144 {
1145     TCGContext *s = tcg_ctx;
1146     s->temps_in_use = 0;
1147 }
1148 
1149 int tcg_check_temp_count(void)
1150 {
1151     TCGContext *s = tcg_ctx;
1152     if (s->temps_in_use) {
1153         /* Clear the count so that we don't give another
1154          * warning immediately next time around.
1155          */
1156         s->temps_in_use = 0;
1157         return 1;
1158     }
1159     return 0;
1160 }
1161 #endif
1162 
1163 /* Return true if OP may appear in the opcode stream.
1164    Test the runtime variable that controls each opcode.  */
1165 bool tcg_op_supported(TCGOpcode op)
1166 {
1167     const bool have_vec
1168         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1169 
1170     switch (op) {
1171     case INDEX_op_discard:
1172     case INDEX_op_set_label:
1173     case INDEX_op_call:
1174     case INDEX_op_br:
1175     case INDEX_op_mb:
1176     case INDEX_op_insn_start:
1177     case INDEX_op_exit_tb:
1178     case INDEX_op_goto_tb:
1179     case INDEX_op_goto_ptr:
1180     case INDEX_op_qemu_ld_i32:
1181     case INDEX_op_qemu_st_i32:
1182     case INDEX_op_qemu_ld_i64:
1183     case INDEX_op_qemu_st_i64:
1184         return true;
1185 
1186     case INDEX_op_qemu_st8_i32:
1187         return TCG_TARGET_HAS_qemu_st8_i32;
1188 
1189     case INDEX_op_mov_i32:
1190     case INDEX_op_setcond_i32:
1191     case INDEX_op_brcond_i32:
1192     case INDEX_op_ld8u_i32:
1193     case INDEX_op_ld8s_i32:
1194     case INDEX_op_ld16u_i32:
1195     case INDEX_op_ld16s_i32:
1196     case INDEX_op_ld_i32:
1197     case INDEX_op_st8_i32:
1198     case INDEX_op_st16_i32:
1199     case INDEX_op_st_i32:
1200     case INDEX_op_add_i32:
1201     case INDEX_op_sub_i32:
1202     case INDEX_op_mul_i32:
1203     case INDEX_op_and_i32:
1204     case INDEX_op_or_i32:
1205     case INDEX_op_xor_i32:
1206     case INDEX_op_shl_i32:
1207     case INDEX_op_shr_i32:
1208     case INDEX_op_sar_i32:
1209         return true;
1210 
1211     case INDEX_op_movcond_i32:
1212         return TCG_TARGET_HAS_movcond_i32;
1213     case INDEX_op_div_i32:
1214     case INDEX_op_divu_i32:
1215         return TCG_TARGET_HAS_div_i32;
1216     case INDEX_op_rem_i32:
1217     case INDEX_op_remu_i32:
1218         return TCG_TARGET_HAS_rem_i32;
1219     case INDEX_op_div2_i32:
1220     case INDEX_op_divu2_i32:
1221         return TCG_TARGET_HAS_div2_i32;
1222     case INDEX_op_rotl_i32:
1223     case INDEX_op_rotr_i32:
1224         return TCG_TARGET_HAS_rot_i32;
1225     case INDEX_op_deposit_i32:
1226         return TCG_TARGET_HAS_deposit_i32;
1227     case INDEX_op_extract_i32:
1228         return TCG_TARGET_HAS_extract_i32;
1229     case INDEX_op_sextract_i32:
1230         return TCG_TARGET_HAS_sextract_i32;
1231     case INDEX_op_extract2_i32:
1232         return TCG_TARGET_HAS_extract2_i32;
1233     case INDEX_op_add2_i32:
1234         return TCG_TARGET_HAS_add2_i32;
1235     case INDEX_op_sub2_i32:
1236         return TCG_TARGET_HAS_sub2_i32;
1237     case INDEX_op_mulu2_i32:
1238         return TCG_TARGET_HAS_mulu2_i32;
1239     case INDEX_op_muls2_i32:
1240         return TCG_TARGET_HAS_muls2_i32;
1241     case INDEX_op_muluh_i32:
1242         return TCG_TARGET_HAS_muluh_i32;
1243     case INDEX_op_mulsh_i32:
1244         return TCG_TARGET_HAS_mulsh_i32;
1245     case INDEX_op_ext8s_i32:
1246         return TCG_TARGET_HAS_ext8s_i32;
1247     case INDEX_op_ext16s_i32:
1248         return TCG_TARGET_HAS_ext16s_i32;
1249     case INDEX_op_ext8u_i32:
1250         return TCG_TARGET_HAS_ext8u_i32;
1251     case INDEX_op_ext16u_i32:
1252         return TCG_TARGET_HAS_ext16u_i32;
1253     case INDEX_op_bswap16_i32:
1254         return TCG_TARGET_HAS_bswap16_i32;
1255     case INDEX_op_bswap32_i32:
1256         return TCG_TARGET_HAS_bswap32_i32;
1257     case INDEX_op_not_i32:
1258         return TCG_TARGET_HAS_not_i32;
1259     case INDEX_op_neg_i32:
1260         return TCG_TARGET_HAS_neg_i32;
1261     case INDEX_op_andc_i32:
1262         return TCG_TARGET_HAS_andc_i32;
1263     case INDEX_op_orc_i32:
1264         return TCG_TARGET_HAS_orc_i32;
1265     case INDEX_op_eqv_i32:
1266         return TCG_TARGET_HAS_eqv_i32;
1267     case INDEX_op_nand_i32:
1268         return TCG_TARGET_HAS_nand_i32;
1269     case INDEX_op_nor_i32:
1270         return TCG_TARGET_HAS_nor_i32;
1271     case INDEX_op_clz_i32:
1272         return TCG_TARGET_HAS_clz_i32;
1273     case INDEX_op_ctz_i32:
1274         return TCG_TARGET_HAS_ctz_i32;
1275     case INDEX_op_ctpop_i32:
1276         return TCG_TARGET_HAS_ctpop_i32;
1277 
1278     case INDEX_op_brcond2_i32:
1279     case INDEX_op_setcond2_i32:
1280         return TCG_TARGET_REG_BITS == 32;
1281 
1282     case INDEX_op_mov_i64:
1283     case INDEX_op_setcond_i64:
1284     case INDEX_op_brcond_i64:
1285     case INDEX_op_ld8u_i64:
1286     case INDEX_op_ld8s_i64:
1287     case INDEX_op_ld16u_i64:
1288     case INDEX_op_ld16s_i64:
1289     case INDEX_op_ld32u_i64:
1290     case INDEX_op_ld32s_i64:
1291     case INDEX_op_ld_i64:
1292     case INDEX_op_st8_i64:
1293     case INDEX_op_st16_i64:
1294     case INDEX_op_st32_i64:
1295     case INDEX_op_st_i64:
1296     case INDEX_op_add_i64:
1297     case INDEX_op_sub_i64:
1298     case INDEX_op_mul_i64:
1299     case INDEX_op_and_i64:
1300     case INDEX_op_or_i64:
1301     case INDEX_op_xor_i64:
1302     case INDEX_op_shl_i64:
1303     case INDEX_op_shr_i64:
1304     case INDEX_op_sar_i64:
1305     case INDEX_op_ext_i32_i64:
1306     case INDEX_op_extu_i32_i64:
1307         return TCG_TARGET_REG_BITS == 64;
1308 
1309     case INDEX_op_movcond_i64:
1310         return TCG_TARGET_HAS_movcond_i64;
1311     case INDEX_op_div_i64:
1312     case INDEX_op_divu_i64:
1313         return TCG_TARGET_HAS_div_i64;
1314     case INDEX_op_rem_i64:
1315     case INDEX_op_remu_i64:
1316         return TCG_TARGET_HAS_rem_i64;
1317     case INDEX_op_div2_i64:
1318     case INDEX_op_divu2_i64:
1319         return TCG_TARGET_HAS_div2_i64;
1320     case INDEX_op_rotl_i64:
1321     case INDEX_op_rotr_i64:
1322         return TCG_TARGET_HAS_rot_i64;
1323     case INDEX_op_deposit_i64:
1324         return TCG_TARGET_HAS_deposit_i64;
1325     case INDEX_op_extract_i64:
1326         return TCG_TARGET_HAS_extract_i64;
1327     case INDEX_op_sextract_i64:
1328         return TCG_TARGET_HAS_sextract_i64;
1329     case INDEX_op_extract2_i64:
1330         return TCG_TARGET_HAS_extract2_i64;
1331     case INDEX_op_extrl_i64_i32:
1332         return TCG_TARGET_HAS_extrl_i64_i32;
1333     case INDEX_op_extrh_i64_i32:
1334         return TCG_TARGET_HAS_extrh_i64_i32;
1335     case INDEX_op_ext8s_i64:
1336         return TCG_TARGET_HAS_ext8s_i64;
1337     case INDEX_op_ext16s_i64:
1338         return TCG_TARGET_HAS_ext16s_i64;
1339     case INDEX_op_ext32s_i64:
1340         return TCG_TARGET_HAS_ext32s_i64;
1341     case INDEX_op_ext8u_i64:
1342         return TCG_TARGET_HAS_ext8u_i64;
1343     case INDEX_op_ext16u_i64:
1344         return TCG_TARGET_HAS_ext16u_i64;
1345     case INDEX_op_ext32u_i64:
1346         return TCG_TARGET_HAS_ext32u_i64;
1347     case INDEX_op_bswap16_i64:
1348         return TCG_TARGET_HAS_bswap16_i64;
1349     case INDEX_op_bswap32_i64:
1350         return TCG_TARGET_HAS_bswap32_i64;
1351     case INDEX_op_bswap64_i64:
1352         return TCG_TARGET_HAS_bswap64_i64;
1353     case INDEX_op_not_i64:
1354         return TCG_TARGET_HAS_not_i64;
1355     case INDEX_op_neg_i64:
1356         return TCG_TARGET_HAS_neg_i64;
1357     case INDEX_op_andc_i64:
1358         return TCG_TARGET_HAS_andc_i64;
1359     case INDEX_op_orc_i64:
1360         return TCG_TARGET_HAS_orc_i64;
1361     case INDEX_op_eqv_i64:
1362         return TCG_TARGET_HAS_eqv_i64;
1363     case INDEX_op_nand_i64:
1364         return TCG_TARGET_HAS_nand_i64;
1365     case INDEX_op_nor_i64:
1366         return TCG_TARGET_HAS_nor_i64;
1367     case INDEX_op_clz_i64:
1368         return TCG_TARGET_HAS_clz_i64;
1369     case INDEX_op_ctz_i64:
1370         return TCG_TARGET_HAS_ctz_i64;
1371     case INDEX_op_ctpop_i64:
1372         return TCG_TARGET_HAS_ctpop_i64;
1373     case INDEX_op_add2_i64:
1374         return TCG_TARGET_HAS_add2_i64;
1375     case INDEX_op_sub2_i64:
1376         return TCG_TARGET_HAS_sub2_i64;
1377     case INDEX_op_mulu2_i64:
1378         return TCG_TARGET_HAS_mulu2_i64;
1379     case INDEX_op_muls2_i64:
1380         return TCG_TARGET_HAS_muls2_i64;
1381     case INDEX_op_muluh_i64:
1382         return TCG_TARGET_HAS_muluh_i64;
1383     case INDEX_op_mulsh_i64:
1384         return TCG_TARGET_HAS_mulsh_i64;
1385 
1386     case INDEX_op_mov_vec:
1387     case INDEX_op_dup_vec:
1388     case INDEX_op_dupm_vec:
1389     case INDEX_op_ld_vec:
1390     case INDEX_op_st_vec:
1391     case INDEX_op_add_vec:
1392     case INDEX_op_sub_vec:
1393     case INDEX_op_and_vec:
1394     case INDEX_op_or_vec:
1395     case INDEX_op_xor_vec:
1396     case INDEX_op_cmp_vec:
1397         return have_vec;
1398     case INDEX_op_dup2_vec:
1399         return have_vec && TCG_TARGET_REG_BITS == 32;
1400     case INDEX_op_not_vec:
1401         return have_vec && TCG_TARGET_HAS_not_vec;
1402     case INDEX_op_neg_vec:
1403         return have_vec && TCG_TARGET_HAS_neg_vec;
1404     case INDEX_op_abs_vec:
1405         return have_vec && TCG_TARGET_HAS_abs_vec;
1406     case INDEX_op_andc_vec:
1407         return have_vec && TCG_TARGET_HAS_andc_vec;
1408     case INDEX_op_orc_vec:
1409         return have_vec && TCG_TARGET_HAS_orc_vec;
1410     case INDEX_op_mul_vec:
1411         return have_vec && TCG_TARGET_HAS_mul_vec;
1412     case INDEX_op_shli_vec:
1413     case INDEX_op_shri_vec:
1414     case INDEX_op_sari_vec:
1415         return have_vec && TCG_TARGET_HAS_shi_vec;
1416     case INDEX_op_shls_vec:
1417     case INDEX_op_shrs_vec:
1418     case INDEX_op_sars_vec:
1419         return have_vec && TCG_TARGET_HAS_shs_vec;
1420     case INDEX_op_shlv_vec:
1421     case INDEX_op_shrv_vec:
1422     case INDEX_op_sarv_vec:
1423         return have_vec && TCG_TARGET_HAS_shv_vec;
1424     case INDEX_op_rotli_vec:
1425         return have_vec && TCG_TARGET_HAS_roti_vec;
1426     case INDEX_op_rotls_vec:
1427         return have_vec && TCG_TARGET_HAS_rots_vec;
1428     case INDEX_op_rotlv_vec:
1429     case INDEX_op_rotrv_vec:
1430         return have_vec && TCG_TARGET_HAS_rotv_vec;
1431     case INDEX_op_ssadd_vec:
1432     case INDEX_op_usadd_vec:
1433     case INDEX_op_sssub_vec:
1434     case INDEX_op_ussub_vec:
1435         return have_vec && TCG_TARGET_HAS_sat_vec;
1436     case INDEX_op_smin_vec:
1437     case INDEX_op_umin_vec:
1438     case INDEX_op_smax_vec:
1439     case INDEX_op_umax_vec:
1440         return have_vec && TCG_TARGET_HAS_minmax_vec;
1441     case INDEX_op_bitsel_vec:
1442         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1443     case INDEX_op_cmpsel_vec:
1444         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1445 
1446     default:
1447         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1448         return true;
1449     }
1450 }
1451 
1452 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1453    and endian swap. Maybe it would be better to do the alignment
1454    and endian swap in tcg_reg_alloc_call(). */
1455 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1456 {
1457     int i, real_args, nb_rets, pi;
1458     unsigned typemask;
1459     const TCGHelperInfo *info;
1460     TCGOp *op;
1461 
1462     info = g_hash_table_lookup(helper_table, (gpointer)func);
1463     typemask = info->typemask;
1464 
1465 #ifdef CONFIG_PLUGIN
1466     /* detect non-plugin helpers */
1467     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1468         tcg_ctx->plugin_insn->calls_helpers = true;
1469     }
1470 #endif
1471 
1472 #if defined(__sparc__) && !defined(__arch64__) \
1473     && !defined(CONFIG_TCG_INTERPRETER)
1474     /* We have 64-bit values in one register, but need to pass as two
1475        separate parameters.  Split them.  */
1476     int orig_typemask = typemask;
1477     int orig_nargs = nargs;
1478     TCGv_i64 retl, reth;
1479     TCGTemp *split_args[MAX_OPC_PARAM];
1480 
1481     retl = NULL;
1482     reth = NULL;
1483     typemask = 0;
1484     for (i = real_args = 0; i < nargs; ++i) {
1485         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1486         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1487 
1488         if (is_64bit) {
1489             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1490             TCGv_i32 h = tcg_temp_new_i32();
1491             TCGv_i32 l = tcg_temp_new_i32();
1492             tcg_gen_extr_i64_i32(l, h, orig);
1493             split_args[real_args++] = tcgv_i32_temp(h);
1494             typemask |= dh_typecode_i32 << (real_args * 3);
1495             split_args[real_args++] = tcgv_i32_temp(l);
1496             typemask |= dh_typecode_i32 << (real_args * 3);
1497         } else {
1498             split_args[real_args++] = args[i];
1499             typemask |= argtype << (real_args * 3);
1500         }
1501     }
1502     nargs = real_args;
1503     args = split_args;
1504 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1505     for (i = 0; i < nargs; ++i) {
1506         int argtype = extract32(typemask, (i + 1) * 3, 3);
1507         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1508         bool is_signed = argtype & 1;
1509 
1510         if (is_32bit) {
1511             TCGv_i64 temp = tcg_temp_new_i64();
1512             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1513             if (is_signed) {
1514                 tcg_gen_ext_i32_i64(temp, orig);
1515             } else {
1516                 tcg_gen_extu_i32_i64(temp, orig);
1517             }
1518             args[i] = tcgv_i64_temp(temp);
1519         }
1520     }
1521 #endif /* TCG_TARGET_EXTEND_ARGS */
1522 
1523     op = tcg_emit_op(INDEX_op_call);
1524 
1525     pi = 0;
1526     if (ret != NULL) {
1527 #if defined(__sparc__) && !defined(__arch64__) \
1528     && !defined(CONFIG_TCG_INTERPRETER)
1529         if ((typemask & 6) == dh_typecode_i64) {
1530             /* The 32-bit ABI is going to return the 64-bit value in
1531                the %o0/%o1 register pair.  Prepare for this by using
1532                two return temporaries, and reassemble below.  */
1533             retl = tcg_temp_new_i64();
1534             reth = tcg_temp_new_i64();
1535             op->args[pi++] = tcgv_i64_arg(reth);
1536             op->args[pi++] = tcgv_i64_arg(retl);
1537             nb_rets = 2;
1538         } else {
1539             op->args[pi++] = temp_arg(ret);
1540             nb_rets = 1;
1541         }
1542 #else
1543         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1544 #ifdef HOST_WORDS_BIGENDIAN
1545             op->args[pi++] = temp_arg(ret + 1);
1546             op->args[pi++] = temp_arg(ret);
1547 #else
1548             op->args[pi++] = temp_arg(ret);
1549             op->args[pi++] = temp_arg(ret + 1);
1550 #endif
1551             nb_rets = 2;
1552         } else {
1553             op->args[pi++] = temp_arg(ret);
1554             nb_rets = 1;
1555         }
1556 #endif
1557     } else {
1558         nb_rets = 0;
1559     }
1560     TCGOP_CALLO(op) = nb_rets;
1561 
1562     real_args = 0;
1563     for (i = 0; i < nargs; i++) {
1564         int argtype = extract32(typemask, (i + 1) * 3, 3);
1565         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1566         bool want_align = false;
1567 
1568 #if defined(CONFIG_TCG_INTERPRETER)
1569         /*
1570          * Align all arguments, so that they land in predictable places
1571          * for passing off to ffi_call.
1572          */
1573         want_align = true;
1574 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1575         /* Some targets want aligned 64 bit args */
1576         want_align = is_64bit;
1577 #endif
1578 
1579         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1580             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1581             real_args++;
1582         }
1583 
1584         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1585             /*
1586              * If stack grows up, then we will be placing successive
1587              * arguments at lower addresses, which means we need to
1588              * reverse the order compared to how we would normally
1589              * treat either big or little-endian.  For those arguments
1590              * that will wind up in registers, this still works for
1591              * HPPA (the only current STACK_GROWSUP target) since the
1592              * argument registers are *also* allocated in decreasing
1593              * order.  If another such target is added, this logic may
1594              * have to get more complicated to differentiate between
1595              * stack arguments and register arguments.
1596              */
1597 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1598             op->args[pi++] = temp_arg(args[i] + 1);
1599             op->args[pi++] = temp_arg(args[i]);
1600 #else
1601             op->args[pi++] = temp_arg(args[i]);
1602             op->args[pi++] = temp_arg(args[i] + 1);
1603 #endif
1604             real_args += 2;
1605             continue;
1606         }
1607 
1608         op->args[pi++] = temp_arg(args[i]);
1609         real_args++;
1610     }
1611     op->args[pi++] = (uintptr_t)func;
1612     op->args[pi++] = (uintptr_t)info;
1613     TCGOP_CALLI(op) = real_args;
1614 
1615     /* Make sure the fields didn't overflow.  */
1616     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1617     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1618 
1619 #if defined(__sparc__) && !defined(__arch64__) \
1620     && !defined(CONFIG_TCG_INTERPRETER)
1621     /* Free all of the parts we allocated above.  */
1622     for (i = real_args = 0; i < orig_nargs; ++i) {
1623         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1624         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1625 
1626         if (is_64bit) {
1627             tcg_temp_free_internal(args[real_args++]);
1628             tcg_temp_free_internal(args[real_args++]);
1629         } else {
1630             real_args++;
1631         }
1632     }
1633     if ((orig_typemask & 6) == dh_typecode_i64) {
1634         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1635            Note that describing these as TCGv_i64 eliminates an unnecessary
1636            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1637         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1638         tcg_temp_free_i64(retl);
1639         tcg_temp_free_i64(reth);
1640     }
1641 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1642     for (i = 0; i < nargs; ++i) {
1643         int argtype = extract32(typemask, (i + 1) * 3, 3);
1644         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1645 
1646         if (is_32bit) {
1647             tcg_temp_free_internal(args[i]);
1648         }
1649     }
1650 #endif /* TCG_TARGET_EXTEND_ARGS */
1651 }
1652 
1653 static void tcg_reg_alloc_start(TCGContext *s)
1654 {
1655     int i, n;
1656 
1657     for (i = 0, n = s->nb_temps; i < n; i++) {
1658         TCGTemp *ts = &s->temps[i];
1659         TCGTempVal val = TEMP_VAL_MEM;
1660 
1661         switch (ts->kind) {
1662         case TEMP_CONST:
1663             val = TEMP_VAL_CONST;
1664             break;
1665         case TEMP_FIXED:
1666             val = TEMP_VAL_REG;
1667             break;
1668         case TEMP_GLOBAL:
1669             break;
1670         case TEMP_NORMAL:
1671             val = TEMP_VAL_DEAD;
1672             /* fall through */
1673         case TEMP_LOCAL:
1674             ts->mem_allocated = 0;
1675             break;
1676         default:
1677             g_assert_not_reached();
1678         }
1679         ts->val_type = val;
1680     }
1681 
1682     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1683 }
1684 
1685 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1686                                  TCGTemp *ts)
1687 {
1688     int idx = temp_idx(ts);
1689 
1690     switch (ts->kind) {
1691     case TEMP_FIXED:
1692     case TEMP_GLOBAL:
1693         pstrcpy(buf, buf_size, ts->name);
1694         break;
1695     case TEMP_LOCAL:
1696         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1697         break;
1698     case TEMP_NORMAL:
1699         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1700         break;
1701     case TEMP_CONST:
1702         switch (ts->type) {
1703         case TCG_TYPE_I32:
1704             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1705             break;
1706 #if TCG_TARGET_REG_BITS > 32
1707         case TCG_TYPE_I64:
1708             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1709             break;
1710 #endif
1711         case TCG_TYPE_V64:
1712         case TCG_TYPE_V128:
1713         case TCG_TYPE_V256:
1714             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1715                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1716             break;
1717         default:
1718             g_assert_not_reached();
1719         }
1720         break;
1721     }
1722     return buf;
1723 }
1724 
1725 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1726                              int buf_size, TCGArg arg)
1727 {
1728     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1729 }
1730 
1731 static const char * const cond_name[] =
1732 {
1733     [TCG_COND_NEVER] = "never",
1734     [TCG_COND_ALWAYS] = "always",
1735     [TCG_COND_EQ] = "eq",
1736     [TCG_COND_NE] = "ne",
1737     [TCG_COND_LT] = "lt",
1738     [TCG_COND_GE] = "ge",
1739     [TCG_COND_LE] = "le",
1740     [TCG_COND_GT] = "gt",
1741     [TCG_COND_LTU] = "ltu",
1742     [TCG_COND_GEU] = "geu",
1743     [TCG_COND_LEU] = "leu",
1744     [TCG_COND_GTU] = "gtu"
1745 };
1746 
1747 static const char * const ldst_name[] =
1748 {
1749     [MO_UB]   = "ub",
1750     [MO_SB]   = "sb",
1751     [MO_LEUW] = "leuw",
1752     [MO_LESW] = "lesw",
1753     [MO_LEUL] = "leul",
1754     [MO_LESL] = "lesl",
1755     [MO_LEUQ] = "leq",
1756     [MO_BEUW] = "beuw",
1757     [MO_BESW] = "besw",
1758     [MO_BEUL] = "beul",
1759     [MO_BESL] = "besl",
1760     [MO_BEUQ] = "beq",
1761 };
1762 
1763 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1764 #ifdef TARGET_ALIGNED_ONLY
1765     [MO_UNALN >> MO_ASHIFT]    = "un+",
1766     [MO_ALIGN >> MO_ASHIFT]    = "",
1767 #else
1768     [MO_UNALN >> MO_ASHIFT]    = "",
1769     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1770 #endif
1771     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1772     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1773     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1774     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1775     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1776     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1777 };
1778 
1779 static const char bswap_flag_name[][6] = {
1780     [TCG_BSWAP_IZ] = "iz",
1781     [TCG_BSWAP_OZ] = "oz",
1782     [TCG_BSWAP_OS] = "os",
1783     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1784     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1785 };
1786 
1787 static inline bool tcg_regset_single(TCGRegSet d)
1788 {
1789     return (d & (d - 1)) == 0;
1790 }
1791 
1792 static inline TCGReg tcg_regset_first(TCGRegSet d)
1793 {
1794     if (TCG_TARGET_NB_REGS <= 32) {
1795         return ctz32(d);
1796     } else {
1797         return ctz64(d);
1798     }
1799 }
1800 
1801 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1802 {
1803     char buf[128];
1804     TCGOp *op;
1805 
1806     QTAILQ_FOREACH(op, &s->ops, link) {
1807         int i, k, nb_oargs, nb_iargs, nb_cargs;
1808         const TCGOpDef *def;
1809         TCGOpcode c;
1810         int col = 0;
1811 
1812         c = op->opc;
1813         def = &tcg_op_defs[c];
1814 
1815         if (c == INDEX_op_insn_start) {
1816             nb_oargs = 0;
1817             col += qemu_log("\n ----");
1818 
1819             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1820                 target_ulong a;
1821 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1822                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1823 #else
1824                 a = op->args[i];
1825 #endif
1826                 col += qemu_log(" " TARGET_FMT_lx, a);
1827             }
1828         } else if (c == INDEX_op_call) {
1829             const TCGHelperInfo *info = tcg_call_info(op);
1830             void *func = tcg_call_func(op);
1831 
1832             /* variable number of arguments */
1833             nb_oargs = TCGOP_CALLO(op);
1834             nb_iargs = TCGOP_CALLI(op);
1835             nb_cargs = def->nb_cargs;
1836 
1837             col += qemu_log(" %s ", def->name);
1838 
1839             /*
1840              * Print the function name from TCGHelperInfo, if available.
1841              * Note that plugins have a template function for the info,
1842              * but the actual function pointer comes from the plugin.
1843              */
1844             if (func == info->func) {
1845                 col += qemu_log("%s", info->name);
1846             } else {
1847                 col += qemu_log("plugin(%p)", func);
1848             }
1849 
1850             col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1851             for (i = 0; i < nb_oargs; i++) {
1852                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1853                                                        op->args[i]));
1854             }
1855             for (i = 0; i < nb_iargs; i++) {
1856                 TCGArg arg = op->args[nb_oargs + i];
1857                 const char *t = "<dummy>";
1858                 if (arg != TCG_CALL_DUMMY_ARG) {
1859                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1860                 }
1861                 col += qemu_log(",%s", t);
1862             }
1863         } else {
1864             col += qemu_log(" %s ", def->name);
1865 
1866             nb_oargs = def->nb_oargs;
1867             nb_iargs = def->nb_iargs;
1868             nb_cargs = def->nb_cargs;
1869 
1870             if (def->flags & TCG_OPF_VECTOR) {
1871                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1872                                 8 << TCGOP_VECE(op));
1873             }
1874 
1875             k = 0;
1876             for (i = 0; i < nb_oargs; i++) {
1877                 if (k != 0) {
1878                     col += qemu_log(",");
1879                 }
1880                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1881                                                       op->args[k++]));
1882             }
1883             for (i = 0; i < nb_iargs; i++) {
1884                 if (k != 0) {
1885                     col += qemu_log(",");
1886                 }
1887                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1888                                                       op->args[k++]));
1889             }
1890             switch (c) {
1891             case INDEX_op_brcond_i32:
1892             case INDEX_op_setcond_i32:
1893             case INDEX_op_movcond_i32:
1894             case INDEX_op_brcond2_i32:
1895             case INDEX_op_setcond2_i32:
1896             case INDEX_op_brcond_i64:
1897             case INDEX_op_setcond_i64:
1898             case INDEX_op_movcond_i64:
1899             case INDEX_op_cmp_vec:
1900             case INDEX_op_cmpsel_vec:
1901                 if (op->args[k] < ARRAY_SIZE(cond_name)
1902                     && cond_name[op->args[k]]) {
1903                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1904                 } else {
1905                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1906                 }
1907                 i = 1;
1908                 break;
1909             case INDEX_op_qemu_ld_i32:
1910             case INDEX_op_qemu_st_i32:
1911             case INDEX_op_qemu_st8_i32:
1912             case INDEX_op_qemu_ld_i64:
1913             case INDEX_op_qemu_st_i64:
1914                 {
1915                     MemOpIdx oi = op->args[k++];
1916                     MemOp op = get_memop(oi);
1917                     unsigned ix = get_mmuidx(oi);
1918 
1919                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1920                         col += qemu_log(",$0x%x,%u", op, ix);
1921                     } else {
1922                         const char *s_al, *s_op;
1923                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1924                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1925                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1926                     }
1927                     i = 1;
1928                 }
1929                 break;
1930             case INDEX_op_bswap16_i32:
1931             case INDEX_op_bswap16_i64:
1932             case INDEX_op_bswap32_i32:
1933             case INDEX_op_bswap32_i64:
1934             case INDEX_op_bswap64_i64:
1935                 {
1936                     TCGArg flags = op->args[k];
1937                     const char *name = NULL;
1938 
1939                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1940                         name = bswap_flag_name[flags];
1941                     }
1942                     if (name) {
1943                         col += qemu_log(",%s", name);
1944                     } else {
1945                         col += qemu_log(",$0x%" TCG_PRIlx, flags);
1946                     }
1947                     i = k = 1;
1948                 }
1949                 break;
1950             default:
1951                 i = 0;
1952                 break;
1953             }
1954             switch (c) {
1955             case INDEX_op_set_label:
1956             case INDEX_op_br:
1957             case INDEX_op_brcond_i32:
1958             case INDEX_op_brcond_i64:
1959             case INDEX_op_brcond2_i32:
1960                 col += qemu_log("%s$L%d", k ? "," : "",
1961                                 arg_label(op->args[k])->id);
1962                 i++, k++;
1963                 break;
1964             default:
1965                 break;
1966             }
1967             for (; i < nb_cargs; i++, k++) {
1968                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1969             }
1970         }
1971 
1972         if (have_prefs || op->life) {
1973 
1974             QemuLogFile *logfile;
1975 
1976             rcu_read_lock();
1977             logfile = qatomic_rcu_read(&qemu_logfile);
1978             if (logfile) {
1979                 for (; col < 40; ++col) {
1980                     putc(' ', logfile->fd);
1981                 }
1982             }
1983             rcu_read_unlock();
1984         }
1985 
1986         if (op->life) {
1987             unsigned life = op->life;
1988 
1989             if (life & (SYNC_ARG * 3)) {
1990                 qemu_log("  sync:");
1991                 for (i = 0; i < 2; ++i) {
1992                     if (life & (SYNC_ARG << i)) {
1993                         qemu_log(" %d", i);
1994                     }
1995                 }
1996             }
1997             life /= DEAD_ARG;
1998             if (life) {
1999                 qemu_log("  dead:");
2000                 for (i = 0; life; ++i, life >>= 1) {
2001                     if (life & 1) {
2002                         qemu_log(" %d", i);
2003                     }
2004                 }
2005             }
2006         }
2007 
2008         if (have_prefs) {
2009             for (i = 0; i < nb_oargs; ++i) {
2010                 TCGRegSet set = op->output_pref[i];
2011 
2012                 if (i == 0) {
2013                     qemu_log("  pref=");
2014                 } else {
2015                     qemu_log(",");
2016                 }
2017                 if (set == 0) {
2018                     qemu_log("none");
2019                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2020                     qemu_log("all");
2021 #ifdef CONFIG_DEBUG_TCG
2022                 } else if (tcg_regset_single(set)) {
2023                     TCGReg reg = tcg_regset_first(set);
2024                     qemu_log("%s", tcg_target_reg_names[reg]);
2025 #endif
2026                 } else if (TCG_TARGET_NB_REGS <= 32) {
2027                     qemu_log("%#x", (uint32_t)set);
2028                 } else {
2029                     qemu_log("%#" PRIx64, (uint64_t)set);
2030                 }
2031             }
2032         }
2033 
2034         qemu_log("\n");
2035     }
2036 }
2037 
2038 /* we give more priority to constraints with less registers */
2039 static int get_constraint_priority(const TCGOpDef *def, int k)
2040 {
2041     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2042     int n;
2043 
2044     if (arg_ct->oalias) {
2045         /* an alias is equivalent to a single register */
2046         n = 1;
2047     } else {
2048         n = ctpop64(arg_ct->regs);
2049     }
2050     return TCG_TARGET_NB_REGS - n + 1;
2051 }
2052 
2053 /* sort from highest priority to lowest */
2054 static void sort_constraints(TCGOpDef *def, int start, int n)
2055 {
2056     int i, j;
2057     TCGArgConstraint *a = def->args_ct;
2058 
2059     for (i = 0; i < n; i++) {
2060         a[start + i].sort_index = start + i;
2061     }
2062     if (n <= 1) {
2063         return;
2064     }
2065     for (i = 0; i < n - 1; i++) {
2066         for (j = i + 1; j < n; j++) {
2067             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2068             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2069             if (p1 < p2) {
2070                 int tmp = a[start + i].sort_index;
2071                 a[start + i].sort_index = a[start + j].sort_index;
2072                 a[start + j].sort_index = tmp;
2073             }
2074         }
2075     }
2076 }
2077 
2078 static void process_op_defs(TCGContext *s)
2079 {
2080     TCGOpcode op;
2081 
2082     for (op = 0; op < NB_OPS; op++) {
2083         TCGOpDef *def = &tcg_op_defs[op];
2084         const TCGTargetOpDef *tdefs;
2085         int i, nb_args;
2086 
2087         if (def->flags & TCG_OPF_NOT_PRESENT) {
2088             continue;
2089         }
2090 
2091         nb_args = def->nb_iargs + def->nb_oargs;
2092         if (nb_args == 0) {
2093             continue;
2094         }
2095 
2096         /*
2097          * Macro magic should make it impossible, but double-check that
2098          * the array index is in range.  Since the signness of an enum
2099          * is implementation defined, force the result to unsigned.
2100          */
2101         unsigned con_set = tcg_target_op_def(op);
2102         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2103         tdefs = &constraint_sets[con_set];
2104 
2105         for (i = 0; i < nb_args; i++) {
2106             const char *ct_str = tdefs->args_ct_str[i];
2107             /* Incomplete TCGTargetOpDef entry. */
2108             tcg_debug_assert(ct_str != NULL);
2109 
2110             while (*ct_str != '\0') {
2111                 switch(*ct_str) {
2112                 case '0' ... '9':
2113                     {
2114                         int oarg = *ct_str - '0';
2115                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2116                         tcg_debug_assert(oarg < def->nb_oargs);
2117                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2118                         def->args_ct[i] = def->args_ct[oarg];
2119                         /* The output sets oalias.  */
2120                         def->args_ct[oarg].oalias = true;
2121                         def->args_ct[oarg].alias_index = i;
2122                         /* The input sets ialias. */
2123                         def->args_ct[i].ialias = true;
2124                         def->args_ct[i].alias_index = oarg;
2125                     }
2126                     ct_str++;
2127                     break;
2128                 case '&':
2129                     def->args_ct[i].newreg = true;
2130                     ct_str++;
2131                     break;
2132                 case 'i':
2133                     def->args_ct[i].ct |= TCG_CT_CONST;
2134                     ct_str++;
2135                     break;
2136 
2137                 /* Include all of the target-specific constraints. */
2138 
2139 #undef CONST
2140 #define CONST(CASE, MASK) \
2141     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2142 #define REGS(CASE, MASK) \
2143     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2144 
2145 #include "tcg-target-con-str.h"
2146 
2147 #undef REGS
2148 #undef CONST
2149                 default:
2150                     /* Typo in TCGTargetOpDef constraint. */
2151                     g_assert_not_reached();
2152                 }
2153             }
2154         }
2155 
2156         /* TCGTargetOpDef entry with too much information? */
2157         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2158 
2159         /* sort the constraints (XXX: this is just an heuristic) */
2160         sort_constraints(def, 0, def->nb_oargs);
2161         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2162     }
2163 }
2164 
2165 void tcg_op_remove(TCGContext *s, TCGOp *op)
2166 {
2167     TCGLabel *label;
2168 
2169     switch (op->opc) {
2170     case INDEX_op_br:
2171         label = arg_label(op->args[0]);
2172         label->refs--;
2173         break;
2174     case INDEX_op_brcond_i32:
2175     case INDEX_op_brcond_i64:
2176         label = arg_label(op->args[3]);
2177         label->refs--;
2178         break;
2179     case INDEX_op_brcond2_i32:
2180         label = arg_label(op->args[5]);
2181         label->refs--;
2182         break;
2183     default:
2184         break;
2185     }
2186 
2187     QTAILQ_REMOVE(&s->ops, op, link);
2188     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2189     s->nb_ops--;
2190 
2191 #ifdef CONFIG_PROFILER
2192     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2193 #endif
2194 }
2195 
2196 void tcg_remove_ops_after(TCGOp *op)
2197 {
2198     TCGContext *s = tcg_ctx;
2199 
2200     while (true) {
2201         TCGOp *last = tcg_last_op();
2202         if (last == op) {
2203             return;
2204         }
2205         tcg_op_remove(s, last);
2206     }
2207 }
2208 
2209 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2210 {
2211     TCGContext *s = tcg_ctx;
2212     TCGOp *op;
2213 
2214     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2215         op = tcg_malloc(sizeof(TCGOp));
2216     } else {
2217         op = QTAILQ_FIRST(&s->free_ops);
2218         QTAILQ_REMOVE(&s->free_ops, op, link);
2219     }
2220     memset(op, 0, offsetof(TCGOp, link));
2221     op->opc = opc;
2222     s->nb_ops++;
2223 
2224     return op;
2225 }
2226 
2227 TCGOp *tcg_emit_op(TCGOpcode opc)
2228 {
2229     TCGOp *op = tcg_op_alloc(opc);
2230     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2231     return op;
2232 }
2233 
2234 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2235 {
2236     TCGOp *new_op = tcg_op_alloc(opc);
2237     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2238     return new_op;
2239 }
2240 
2241 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2242 {
2243     TCGOp *new_op = tcg_op_alloc(opc);
2244     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2245     return new_op;
2246 }
2247 
2248 /* Reachable analysis : remove unreachable code.  */
2249 static void reachable_code_pass(TCGContext *s)
2250 {
2251     TCGOp *op, *op_next;
2252     bool dead = false;
2253 
2254     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2255         bool remove = dead;
2256         TCGLabel *label;
2257 
2258         switch (op->opc) {
2259         case INDEX_op_set_label:
2260             label = arg_label(op->args[0]);
2261             if (label->refs == 0) {
2262                 /*
2263                  * While there is an occasional backward branch, virtually
2264                  * all branches generated by the translators are forward.
2265                  * Which means that generally we will have already removed
2266                  * all references to the label that will be, and there is
2267                  * little to be gained by iterating.
2268                  */
2269                 remove = true;
2270             } else {
2271                 /* Once we see a label, insns become live again.  */
2272                 dead = false;
2273                 remove = false;
2274 
2275                 /*
2276                  * Optimization can fold conditional branches to unconditional.
2277                  * If we find a label with one reference which is preceded by
2278                  * an unconditional branch to it, remove both.  This needed to
2279                  * wait until the dead code in between them was removed.
2280                  */
2281                 if (label->refs == 1) {
2282                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2283                     if (op_prev->opc == INDEX_op_br &&
2284                         label == arg_label(op_prev->args[0])) {
2285                         tcg_op_remove(s, op_prev);
2286                         remove = true;
2287                     }
2288                 }
2289             }
2290             break;
2291 
2292         case INDEX_op_br:
2293         case INDEX_op_exit_tb:
2294         case INDEX_op_goto_ptr:
2295             /* Unconditional branches; everything following is dead.  */
2296             dead = true;
2297             break;
2298 
2299         case INDEX_op_call:
2300             /* Notice noreturn helper calls, raising exceptions.  */
2301             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2302                 dead = true;
2303             }
2304             break;
2305 
2306         case INDEX_op_insn_start:
2307             /* Never remove -- we need to keep these for unwind.  */
2308             remove = false;
2309             break;
2310 
2311         default:
2312             break;
2313         }
2314 
2315         if (remove) {
2316             tcg_op_remove(s, op);
2317         }
2318     }
2319 }
2320 
2321 #define TS_DEAD  1
2322 #define TS_MEM   2
2323 
2324 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2325 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2326 
2327 /* For liveness_pass_1, the register preferences for a given temp.  */
2328 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2329 {
2330     return ts->state_ptr;
2331 }
2332 
2333 /* For liveness_pass_1, reset the preferences for a given temp to the
2334  * maximal regset for its type.
2335  */
2336 static inline void la_reset_pref(TCGTemp *ts)
2337 {
2338     *la_temp_pref(ts)
2339         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2340 }
2341 
2342 /* liveness analysis: end of function: all temps are dead, and globals
2343    should be in memory. */
2344 static void la_func_end(TCGContext *s, int ng, int nt)
2345 {
2346     int i;
2347 
2348     for (i = 0; i < ng; ++i) {
2349         s->temps[i].state = TS_DEAD | TS_MEM;
2350         la_reset_pref(&s->temps[i]);
2351     }
2352     for (i = ng; i < nt; ++i) {
2353         s->temps[i].state = TS_DEAD;
2354         la_reset_pref(&s->temps[i]);
2355     }
2356 }
2357 
2358 /* liveness analysis: end of basic block: all temps are dead, globals
2359    and local temps should be in memory. */
2360 static void la_bb_end(TCGContext *s, int ng, int nt)
2361 {
2362     int i;
2363 
2364     for (i = 0; i < nt; ++i) {
2365         TCGTemp *ts = &s->temps[i];
2366         int state;
2367 
2368         switch (ts->kind) {
2369         case TEMP_FIXED:
2370         case TEMP_GLOBAL:
2371         case TEMP_LOCAL:
2372             state = TS_DEAD | TS_MEM;
2373             break;
2374         case TEMP_NORMAL:
2375         case TEMP_CONST:
2376             state = TS_DEAD;
2377             break;
2378         default:
2379             g_assert_not_reached();
2380         }
2381         ts->state = state;
2382         la_reset_pref(ts);
2383     }
2384 }
2385 
2386 /* liveness analysis: sync globals back to memory.  */
2387 static void la_global_sync(TCGContext *s, int ng)
2388 {
2389     int i;
2390 
2391     for (i = 0; i < ng; ++i) {
2392         int state = s->temps[i].state;
2393         s->temps[i].state = state | TS_MEM;
2394         if (state == TS_DEAD) {
2395             /* If the global was previously dead, reset prefs.  */
2396             la_reset_pref(&s->temps[i]);
2397         }
2398     }
2399 }
2400 
2401 /*
2402  * liveness analysis: conditional branch: all temps are dead,
2403  * globals and local temps should be synced.
2404  */
2405 static void la_bb_sync(TCGContext *s, int ng, int nt)
2406 {
2407     la_global_sync(s, ng);
2408 
2409     for (int i = ng; i < nt; ++i) {
2410         TCGTemp *ts = &s->temps[i];
2411         int state;
2412 
2413         switch (ts->kind) {
2414         case TEMP_LOCAL:
2415             state = ts->state;
2416             ts->state = state | TS_MEM;
2417             if (state != TS_DEAD) {
2418                 continue;
2419             }
2420             break;
2421         case TEMP_NORMAL:
2422             s->temps[i].state = TS_DEAD;
2423             break;
2424         case TEMP_CONST:
2425             continue;
2426         default:
2427             g_assert_not_reached();
2428         }
2429         la_reset_pref(&s->temps[i]);
2430     }
2431 }
2432 
2433 /* liveness analysis: sync globals back to memory and kill.  */
2434 static void la_global_kill(TCGContext *s, int ng)
2435 {
2436     int i;
2437 
2438     for (i = 0; i < ng; i++) {
2439         s->temps[i].state = TS_DEAD | TS_MEM;
2440         la_reset_pref(&s->temps[i]);
2441     }
2442 }
2443 
2444 /* liveness analysis: note live globals crossing calls.  */
2445 static void la_cross_call(TCGContext *s, int nt)
2446 {
2447     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2448     int i;
2449 
2450     for (i = 0; i < nt; i++) {
2451         TCGTemp *ts = &s->temps[i];
2452         if (!(ts->state & TS_DEAD)) {
2453             TCGRegSet *pset = la_temp_pref(ts);
2454             TCGRegSet set = *pset;
2455 
2456             set &= mask;
2457             /* If the combination is not possible, restart.  */
2458             if (set == 0) {
2459                 set = tcg_target_available_regs[ts->type] & mask;
2460             }
2461             *pset = set;
2462         }
2463     }
2464 }
2465 
2466 /* Liveness analysis : update the opc_arg_life array to tell if a
2467    given input arguments is dead. Instructions updating dead
2468    temporaries are removed. */
2469 static void liveness_pass_1(TCGContext *s)
2470 {
2471     int nb_globals = s->nb_globals;
2472     int nb_temps = s->nb_temps;
2473     TCGOp *op, *op_prev;
2474     TCGRegSet *prefs;
2475     int i;
2476 
2477     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2478     for (i = 0; i < nb_temps; ++i) {
2479         s->temps[i].state_ptr = prefs + i;
2480     }
2481 
2482     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2483     la_func_end(s, nb_globals, nb_temps);
2484 
2485     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2486         int nb_iargs, nb_oargs;
2487         TCGOpcode opc_new, opc_new2;
2488         bool have_opc_new2;
2489         TCGLifeData arg_life = 0;
2490         TCGTemp *ts;
2491         TCGOpcode opc = op->opc;
2492         const TCGOpDef *def = &tcg_op_defs[opc];
2493 
2494         switch (opc) {
2495         case INDEX_op_call:
2496             {
2497                 int call_flags;
2498                 int nb_call_regs;
2499 
2500                 nb_oargs = TCGOP_CALLO(op);
2501                 nb_iargs = TCGOP_CALLI(op);
2502                 call_flags = tcg_call_flags(op);
2503 
2504                 /* pure functions can be removed if their result is unused */
2505                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2506                     for (i = 0; i < nb_oargs; i++) {
2507                         ts = arg_temp(op->args[i]);
2508                         if (ts->state != TS_DEAD) {
2509                             goto do_not_remove_call;
2510                         }
2511                     }
2512                     goto do_remove;
2513                 }
2514             do_not_remove_call:
2515 
2516                 /* Output args are dead.  */
2517                 for (i = 0; i < nb_oargs; i++) {
2518                     ts = arg_temp(op->args[i]);
2519                     if (ts->state & TS_DEAD) {
2520                         arg_life |= DEAD_ARG << i;
2521                     }
2522                     if (ts->state & TS_MEM) {
2523                         arg_life |= SYNC_ARG << i;
2524                     }
2525                     ts->state = TS_DEAD;
2526                     la_reset_pref(ts);
2527 
2528                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2529                     op->output_pref[i] = 0;
2530                 }
2531 
2532                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2533                                     TCG_CALL_NO_READ_GLOBALS))) {
2534                     la_global_kill(s, nb_globals);
2535                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2536                     la_global_sync(s, nb_globals);
2537                 }
2538 
2539                 /* Record arguments that die in this helper.  */
2540                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2541                     ts = arg_temp(op->args[i]);
2542                     if (ts && ts->state & TS_DEAD) {
2543                         arg_life |= DEAD_ARG << i;
2544                     }
2545                 }
2546 
2547                 /* For all live registers, remove call-clobbered prefs.  */
2548                 la_cross_call(s, nb_temps);
2549 
2550                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2551 
2552                 /* Input arguments are live for preceding opcodes.  */
2553                 for (i = 0; i < nb_iargs; i++) {
2554                     ts = arg_temp(op->args[i + nb_oargs]);
2555                     if (ts && ts->state & TS_DEAD) {
2556                         /* For those arguments that die, and will be allocated
2557                          * in registers, clear the register set for that arg,
2558                          * to be filled in below.  For args that will be on
2559                          * the stack, reset to any available reg.
2560                          */
2561                         *la_temp_pref(ts)
2562                             = (i < nb_call_regs ? 0 :
2563                                tcg_target_available_regs[ts->type]);
2564                         ts->state &= ~TS_DEAD;
2565                     }
2566                 }
2567 
2568                 /* For each input argument, add its input register to prefs.
2569                    If a temp is used once, this produces a single set bit.  */
2570                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2571                     ts = arg_temp(op->args[i + nb_oargs]);
2572                     if (ts) {
2573                         tcg_regset_set_reg(*la_temp_pref(ts),
2574                                            tcg_target_call_iarg_regs[i]);
2575                     }
2576                 }
2577             }
2578             break;
2579         case INDEX_op_insn_start:
2580             break;
2581         case INDEX_op_discard:
2582             /* mark the temporary as dead */
2583             ts = arg_temp(op->args[0]);
2584             ts->state = TS_DEAD;
2585             la_reset_pref(ts);
2586             break;
2587 
2588         case INDEX_op_add2_i32:
2589             opc_new = INDEX_op_add_i32;
2590             goto do_addsub2;
2591         case INDEX_op_sub2_i32:
2592             opc_new = INDEX_op_sub_i32;
2593             goto do_addsub2;
2594         case INDEX_op_add2_i64:
2595             opc_new = INDEX_op_add_i64;
2596             goto do_addsub2;
2597         case INDEX_op_sub2_i64:
2598             opc_new = INDEX_op_sub_i64;
2599         do_addsub2:
2600             nb_iargs = 4;
2601             nb_oargs = 2;
2602             /* Test if the high part of the operation is dead, but not
2603                the low part.  The result can be optimized to a simple
2604                add or sub.  This happens often for x86_64 guest when the
2605                cpu mode is set to 32 bit.  */
2606             if (arg_temp(op->args[1])->state == TS_DEAD) {
2607                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2608                     goto do_remove;
2609                 }
2610                 /* Replace the opcode and adjust the args in place,
2611                    leaving 3 unused args at the end.  */
2612                 op->opc = opc = opc_new;
2613                 op->args[1] = op->args[2];
2614                 op->args[2] = op->args[4];
2615                 /* Fall through and mark the single-word operation live.  */
2616                 nb_iargs = 2;
2617                 nb_oargs = 1;
2618             }
2619             goto do_not_remove;
2620 
2621         case INDEX_op_mulu2_i32:
2622             opc_new = INDEX_op_mul_i32;
2623             opc_new2 = INDEX_op_muluh_i32;
2624             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2625             goto do_mul2;
2626         case INDEX_op_muls2_i32:
2627             opc_new = INDEX_op_mul_i32;
2628             opc_new2 = INDEX_op_mulsh_i32;
2629             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2630             goto do_mul2;
2631         case INDEX_op_mulu2_i64:
2632             opc_new = INDEX_op_mul_i64;
2633             opc_new2 = INDEX_op_muluh_i64;
2634             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2635             goto do_mul2;
2636         case INDEX_op_muls2_i64:
2637             opc_new = INDEX_op_mul_i64;
2638             opc_new2 = INDEX_op_mulsh_i64;
2639             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2640             goto do_mul2;
2641         do_mul2:
2642             nb_iargs = 2;
2643             nb_oargs = 2;
2644             if (arg_temp(op->args[1])->state == TS_DEAD) {
2645                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2646                     /* Both parts of the operation are dead.  */
2647                     goto do_remove;
2648                 }
2649                 /* The high part of the operation is dead; generate the low. */
2650                 op->opc = opc = opc_new;
2651                 op->args[1] = op->args[2];
2652                 op->args[2] = op->args[3];
2653             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2654                 /* The low part of the operation is dead; generate the high. */
2655                 op->opc = opc = opc_new2;
2656                 op->args[0] = op->args[1];
2657                 op->args[1] = op->args[2];
2658                 op->args[2] = op->args[3];
2659             } else {
2660                 goto do_not_remove;
2661             }
2662             /* Mark the single-word operation live.  */
2663             nb_oargs = 1;
2664             goto do_not_remove;
2665 
2666         default:
2667             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2668             nb_iargs = def->nb_iargs;
2669             nb_oargs = def->nb_oargs;
2670 
2671             /* Test if the operation can be removed because all
2672                its outputs are dead. We assume that nb_oargs == 0
2673                implies side effects */
2674             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2675                 for (i = 0; i < nb_oargs; i++) {
2676                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2677                         goto do_not_remove;
2678                     }
2679                 }
2680                 goto do_remove;
2681             }
2682             goto do_not_remove;
2683 
2684         do_remove:
2685             tcg_op_remove(s, op);
2686             break;
2687 
2688         do_not_remove:
2689             for (i = 0; i < nb_oargs; i++) {
2690                 ts = arg_temp(op->args[i]);
2691 
2692                 /* Remember the preference of the uses that followed.  */
2693                 op->output_pref[i] = *la_temp_pref(ts);
2694 
2695                 /* Output args are dead.  */
2696                 if (ts->state & TS_DEAD) {
2697                     arg_life |= DEAD_ARG << i;
2698                 }
2699                 if (ts->state & TS_MEM) {
2700                     arg_life |= SYNC_ARG << i;
2701                 }
2702                 ts->state = TS_DEAD;
2703                 la_reset_pref(ts);
2704             }
2705 
2706             /* If end of basic block, update.  */
2707             if (def->flags & TCG_OPF_BB_EXIT) {
2708                 la_func_end(s, nb_globals, nb_temps);
2709             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2710                 la_bb_sync(s, nb_globals, nb_temps);
2711             } else if (def->flags & TCG_OPF_BB_END) {
2712                 la_bb_end(s, nb_globals, nb_temps);
2713             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2714                 la_global_sync(s, nb_globals);
2715                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2716                     la_cross_call(s, nb_temps);
2717                 }
2718             }
2719 
2720             /* Record arguments that die in this opcode.  */
2721             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2722                 ts = arg_temp(op->args[i]);
2723                 if (ts->state & TS_DEAD) {
2724                     arg_life |= DEAD_ARG << i;
2725                 }
2726             }
2727 
2728             /* Input arguments are live for preceding opcodes.  */
2729             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2730                 ts = arg_temp(op->args[i]);
2731                 if (ts->state & TS_DEAD) {
2732                     /* For operands that were dead, initially allow
2733                        all regs for the type.  */
2734                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2735                     ts->state &= ~TS_DEAD;
2736                 }
2737             }
2738 
2739             /* Incorporate constraints for this operand.  */
2740             switch (opc) {
2741             case INDEX_op_mov_i32:
2742             case INDEX_op_mov_i64:
2743                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2744                    have proper constraints.  That said, special case
2745                    moves to propagate preferences backward.  */
2746                 if (IS_DEAD_ARG(1)) {
2747                     *la_temp_pref(arg_temp(op->args[0]))
2748                         = *la_temp_pref(arg_temp(op->args[1]));
2749                 }
2750                 break;
2751 
2752             default:
2753                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2754                     const TCGArgConstraint *ct = &def->args_ct[i];
2755                     TCGRegSet set, *pset;
2756 
2757                     ts = arg_temp(op->args[i]);
2758                     pset = la_temp_pref(ts);
2759                     set = *pset;
2760 
2761                     set &= ct->regs;
2762                     if (ct->ialias) {
2763                         set &= op->output_pref[ct->alias_index];
2764                     }
2765                     /* If the combination is not possible, restart.  */
2766                     if (set == 0) {
2767                         set = ct->regs;
2768                     }
2769                     *pset = set;
2770                 }
2771                 break;
2772             }
2773             break;
2774         }
2775         op->life = arg_life;
2776     }
2777 }
2778 
2779 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2780 static bool liveness_pass_2(TCGContext *s)
2781 {
2782     int nb_globals = s->nb_globals;
2783     int nb_temps, i;
2784     bool changes = false;
2785     TCGOp *op, *op_next;
2786 
2787     /* Create a temporary for each indirect global.  */
2788     for (i = 0; i < nb_globals; ++i) {
2789         TCGTemp *its = &s->temps[i];
2790         if (its->indirect_reg) {
2791             TCGTemp *dts = tcg_temp_alloc(s);
2792             dts->type = its->type;
2793             dts->base_type = its->base_type;
2794             its->state_ptr = dts;
2795         } else {
2796             its->state_ptr = NULL;
2797         }
2798         /* All globals begin dead.  */
2799         its->state = TS_DEAD;
2800     }
2801     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2802         TCGTemp *its = &s->temps[i];
2803         its->state_ptr = NULL;
2804         its->state = TS_DEAD;
2805     }
2806 
2807     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2808         TCGOpcode opc = op->opc;
2809         const TCGOpDef *def = &tcg_op_defs[opc];
2810         TCGLifeData arg_life = op->life;
2811         int nb_iargs, nb_oargs, call_flags;
2812         TCGTemp *arg_ts, *dir_ts;
2813 
2814         if (opc == INDEX_op_call) {
2815             nb_oargs = TCGOP_CALLO(op);
2816             nb_iargs = TCGOP_CALLI(op);
2817             call_flags = tcg_call_flags(op);
2818         } else {
2819             nb_iargs = def->nb_iargs;
2820             nb_oargs = def->nb_oargs;
2821 
2822             /* Set flags similar to how calls require.  */
2823             if (def->flags & TCG_OPF_COND_BRANCH) {
2824                 /* Like reading globals: sync_globals */
2825                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2826             } else if (def->flags & TCG_OPF_BB_END) {
2827                 /* Like writing globals: save_globals */
2828                 call_flags = 0;
2829             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2830                 /* Like reading globals: sync_globals */
2831                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2832             } else {
2833                 /* No effect on globals.  */
2834                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2835                               TCG_CALL_NO_WRITE_GLOBALS);
2836             }
2837         }
2838 
2839         /* Make sure that input arguments are available.  */
2840         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2841             arg_ts = arg_temp(op->args[i]);
2842             if (arg_ts) {
2843                 dir_ts = arg_ts->state_ptr;
2844                 if (dir_ts && arg_ts->state == TS_DEAD) {
2845                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2846                                       ? INDEX_op_ld_i32
2847                                       : INDEX_op_ld_i64);
2848                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2849 
2850                     lop->args[0] = temp_arg(dir_ts);
2851                     lop->args[1] = temp_arg(arg_ts->mem_base);
2852                     lop->args[2] = arg_ts->mem_offset;
2853 
2854                     /* Loaded, but synced with memory.  */
2855                     arg_ts->state = TS_MEM;
2856                 }
2857             }
2858         }
2859 
2860         /* Perform input replacement, and mark inputs that became dead.
2861            No action is required except keeping temp_state up to date
2862            so that we reload when needed.  */
2863         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2864             arg_ts = arg_temp(op->args[i]);
2865             if (arg_ts) {
2866                 dir_ts = arg_ts->state_ptr;
2867                 if (dir_ts) {
2868                     op->args[i] = temp_arg(dir_ts);
2869                     changes = true;
2870                     if (IS_DEAD_ARG(i)) {
2871                         arg_ts->state = TS_DEAD;
2872                     }
2873                 }
2874             }
2875         }
2876 
2877         /* Liveness analysis should ensure that the following are
2878            all correct, for call sites and basic block end points.  */
2879         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2880             /* Nothing to do */
2881         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2882             for (i = 0; i < nb_globals; ++i) {
2883                 /* Liveness should see that globals are synced back,
2884                    that is, either TS_DEAD or TS_MEM.  */
2885                 arg_ts = &s->temps[i];
2886                 tcg_debug_assert(arg_ts->state_ptr == 0
2887                                  || arg_ts->state != 0);
2888             }
2889         } else {
2890             for (i = 0; i < nb_globals; ++i) {
2891                 /* Liveness should see that globals are saved back,
2892                    that is, TS_DEAD, waiting to be reloaded.  */
2893                 arg_ts = &s->temps[i];
2894                 tcg_debug_assert(arg_ts->state_ptr == 0
2895                                  || arg_ts->state == TS_DEAD);
2896             }
2897         }
2898 
2899         /* Outputs become available.  */
2900         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2901             arg_ts = arg_temp(op->args[0]);
2902             dir_ts = arg_ts->state_ptr;
2903             if (dir_ts) {
2904                 op->args[0] = temp_arg(dir_ts);
2905                 changes = true;
2906 
2907                 /* The output is now live and modified.  */
2908                 arg_ts->state = 0;
2909 
2910                 if (NEED_SYNC_ARG(0)) {
2911                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2912                                       ? INDEX_op_st_i32
2913                                       : INDEX_op_st_i64);
2914                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2915                     TCGTemp *out_ts = dir_ts;
2916 
2917                     if (IS_DEAD_ARG(0)) {
2918                         out_ts = arg_temp(op->args[1]);
2919                         arg_ts->state = TS_DEAD;
2920                         tcg_op_remove(s, op);
2921                     } else {
2922                         arg_ts->state = TS_MEM;
2923                     }
2924 
2925                     sop->args[0] = temp_arg(out_ts);
2926                     sop->args[1] = temp_arg(arg_ts->mem_base);
2927                     sop->args[2] = arg_ts->mem_offset;
2928                 } else {
2929                     tcg_debug_assert(!IS_DEAD_ARG(0));
2930                 }
2931             }
2932         } else {
2933             for (i = 0; i < nb_oargs; i++) {
2934                 arg_ts = arg_temp(op->args[i]);
2935                 dir_ts = arg_ts->state_ptr;
2936                 if (!dir_ts) {
2937                     continue;
2938                 }
2939                 op->args[i] = temp_arg(dir_ts);
2940                 changes = true;
2941 
2942                 /* The output is now live and modified.  */
2943                 arg_ts->state = 0;
2944 
2945                 /* Sync outputs upon their last write.  */
2946                 if (NEED_SYNC_ARG(i)) {
2947                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2948                                       ? INDEX_op_st_i32
2949                                       : INDEX_op_st_i64);
2950                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2951 
2952                     sop->args[0] = temp_arg(dir_ts);
2953                     sop->args[1] = temp_arg(arg_ts->mem_base);
2954                     sop->args[2] = arg_ts->mem_offset;
2955 
2956                     arg_ts->state = TS_MEM;
2957                 }
2958                 /* Drop outputs that are dead.  */
2959                 if (IS_DEAD_ARG(i)) {
2960                     arg_ts->state = TS_DEAD;
2961                 }
2962             }
2963         }
2964     }
2965 
2966     return changes;
2967 }
2968 
2969 #ifdef CONFIG_DEBUG_TCG
2970 static void dump_regs(TCGContext *s)
2971 {
2972     TCGTemp *ts;
2973     int i;
2974     char buf[64];
2975 
2976     for(i = 0; i < s->nb_temps; i++) {
2977         ts = &s->temps[i];
2978         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2979         switch(ts->val_type) {
2980         case TEMP_VAL_REG:
2981             printf("%s", tcg_target_reg_names[ts->reg]);
2982             break;
2983         case TEMP_VAL_MEM:
2984             printf("%d(%s)", (int)ts->mem_offset,
2985                    tcg_target_reg_names[ts->mem_base->reg]);
2986             break;
2987         case TEMP_VAL_CONST:
2988             printf("$0x%" PRIx64, ts->val);
2989             break;
2990         case TEMP_VAL_DEAD:
2991             printf("D");
2992             break;
2993         default:
2994             printf("???");
2995             break;
2996         }
2997         printf("\n");
2998     }
2999 
3000     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3001         if (s->reg_to_temp[i] != NULL) {
3002             printf("%s: %s\n",
3003                    tcg_target_reg_names[i],
3004                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3005         }
3006     }
3007 }
3008 
3009 static void check_regs(TCGContext *s)
3010 {
3011     int reg;
3012     int k;
3013     TCGTemp *ts;
3014     char buf[64];
3015 
3016     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3017         ts = s->reg_to_temp[reg];
3018         if (ts != NULL) {
3019             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3020                 printf("Inconsistency for register %s:\n",
3021                        tcg_target_reg_names[reg]);
3022                 goto fail;
3023             }
3024         }
3025     }
3026     for (k = 0; k < s->nb_temps; k++) {
3027         ts = &s->temps[k];
3028         if (ts->val_type == TEMP_VAL_REG
3029             && ts->kind != TEMP_FIXED
3030             && s->reg_to_temp[ts->reg] != ts) {
3031             printf("Inconsistency for temp %s:\n",
3032                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3033         fail:
3034             printf("reg state:\n");
3035             dump_regs(s);
3036             tcg_abort();
3037         }
3038     }
3039 }
3040 #endif
3041 
3042 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3043 {
3044     intptr_t off, size, align;
3045 
3046     switch (ts->type) {
3047     case TCG_TYPE_I32:
3048         size = align = 4;
3049         break;
3050     case TCG_TYPE_I64:
3051     case TCG_TYPE_V64:
3052         size = align = 8;
3053         break;
3054     case TCG_TYPE_V128:
3055         size = align = 16;
3056         break;
3057     case TCG_TYPE_V256:
3058         /* Note that we do not require aligned storage for V256. */
3059         size = 32, align = 16;
3060         break;
3061     default:
3062         g_assert_not_reached();
3063     }
3064 
3065     /*
3066      * Assume the stack is sufficiently aligned.
3067      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3068      * and do not require 16 byte vector alignment.  This seems slightly
3069      * easier than fully parameterizing the above switch statement.
3070      */
3071     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3072     off = ROUND_UP(s->current_frame_offset, align);
3073 
3074     /* If we've exhausted the stack frame, restart with a smaller TB. */
3075     if (off + size > s->frame_end) {
3076         tcg_raise_tb_overflow(s);
3077     }
3078     s->current_frame_offset = off + size;
3079 
3080     ts->mem_offset = off;
3081 #if defined(__sparc__)
3082     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3083 #endif
3084     ts->mem_base = s->frame_temp;
3085     ts->mem_allocated = 1;
3086 }
3087 
3088 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3089 
3090 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3091    mark it free; otherwise mark it dead.  */
3092 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3093 {
3094     TCGTempVal new_type;
3095 
3096     switch (ts->kind) {
3097     case TEMP_FIXED:
3098         return;
3099     case TEMP_GLOBAL:
3100     case TEMP_LOCAL:
3101         new_type = TEMP_VAL_MEM;
3102         break;
3103     case TEMP_NORMAL:
3104         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3105         break;
3106     case TEMP_CONST:
3107         new_type = TEMP_VAL_CONST;
3108         break;
3109     default:
3110         g_assert_not_reached();
3111     }
3112     if (ts->val_type == TEMP_VAL_REG) {
3113         s->reg_to_temp[ts->reg] = NULL;
3114     }
3115     ts->val_type = new_type;
3116 }
3117 
3118 /* Mark a temporary as dead.  */
3119 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3120 {
3121     temp_free_or_dead(s, ts, 1);
3122 }
3123 
3124 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3125    registers needs to be allocated to store a constant.  If 'free_or_dead'
3126    is non-zero, subsequently release the temporary; if it is positive, the
3127    temp is dead; if it is negative, the temp is free.  */
3128 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3129                       TCGRegSet preferred_regs, int free_or_dead)
3130 {
3131     if (!temp_readonly(ts) && !ts->mem_coherent) {
3132         if (!ts->mem_allocated) {
3133             temp_allocate_frame(s, ts);
3134         }
3135         switch (ts->val_type) {
3136         case TEMP_VAL_CONST:
3137             /* If we're going to free the temp immediately, then we won't
3138                require it later in a register, so attempt to store the
3139                constant to memory directly.  */
3140             if (free_or_dead
3141                 && tcg_out_sti(s, ts->type, ts->val,
3142                                ts->mem_base->reg, ts->mem_offset)) {
3143                 break;
3144             }
3145             temp_load(s, ts, tcg_target_available_regs[ts->type],
3146                       allocated_regs, preferred_regs);
3147             /* fallthrough */
3148 
3149         case TEMP_VAL_REG:
3150             tcg_out_st(s, ts->type, ts->reg,
3151                        ts->mem_base->reg, ts->mem_offset);
3152             break;
3153 
3154         case TEMP_VAL_MEM:
3155             break;
3156 
3157         case TEMP_VAL_DEAD:
3158         default:
3159             tcg_abort();
3160         }
3161         ts->mem_coherent = 1;
3162     }
3163     if (free_or_dead) {
3164         temp_free_or_dead(s, ts, free_or_dead);
3165     }
3166 }
3167 
3168 /* free register 'reg' by spilling the corresponding temporary if necessary */
3169 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3170 {
3171     TCGTemp *ts = s->reg_to_temp[reg];
3172     if (ts != NULL) {
3173         temp_sync(s, ts, allocated_regs, 0, -1);
3174     }
3175 }
3176 
3177 /**
3178  * tcg_reg_alloc:
3179  * @required_regs: Set of registers in which we must allocate.
3180  * @allocated_regs: Set of registers which must be avoided.
3181  * @preferred_regs: Set of registers we should prefer.
3182  * @rev: True if we search the registers in "indirect" order.
3183  *
3184  * The allocated register must be in @required_regs & ~@allocated_regs,
3185  * but if we can put it in @preferred_regs we may save a move later.
3186  */
3187 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3188                             TCGRegSet allocated_regs,
3189                             TCGRegSet preferred_regs, bool rev)
3190 {
3191     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3192     TCGRegSet reg_ct[2];
3193     const int *order;
3194 
3195     reg_ct[1] = required_regs & ~allocated_regs;
3196     tcg_debug_assert(reg_ct[1] != 0);
3197     reg_ct[0] = reg_ct[1] & preferred_regs;
3198 
3199     /* Skip the preferred_regs option if it cannot be satisfied,
3200        or if the preference made no difference.  */
3201     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3202 
3203     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3204 
3205     /* Try free registers, preferences first.  */
3206     for (j = f; j < 2; j++) {
3207         TCGRegSet set = reg_ct[j];
3208 
3209         if (tcg_regset_single(set)) {
3210             /* One register in the set.  */
3211             TCGReg reg = tcg_regset_first(set);
3212             if (s->reg_to_temp[reg] == NULL) {
3213                 return reg;
3214             }
3215         } else {
3216             for (i = 0; i < n; i++) {
3217                 TCGReg reg = order[i];
3218                 if (s->reg_to_temp[reg] == NULL &&
3219                     tcg_regset_test_reg(set, reg)) {
3220                     return reg;
3221                 }
3222             }
3223         }
3224     }
3225 
3226     /* We must spill something.  */
3227     for (j = f; j < 2; j++) {
3228         TCGRegSet set = reg_ct[j];
3229 
3230         if (tcg_regset_single(set)) {
3231             /* One register in the set.  */
3232             TCGReg reg = tcg_regset_first(set);
3233             tcg_reg_free(s, reg, allocated_regs);
3234             return reg;
3235         } else {
3236             for (i = 0; i < n; i++) {
3237                 TCGReg reg = order[i];
3238                 if (tcg_regset_test_reg(set, reg)) {
3239                     tcg_reg_free(s, reg, allocated_regs);
3240                     return reg;
3241                 }
3242             }
3243         }
3244     }
3245 
3246     tcg_abort();
3247 }
3248 
3249 /* Make sure the temporary is in a register.  If needed, allocate the register
3250    from DESIRED while avoiding ALLOCATED.  */
3251 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3252                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3253 {
3254     TCGReg reg;
3255 
3256     switch (ts->val_type) {
3257     case TEMP_VAL_REG:
3258         return;
3259     case TEMP_VAL_CONST:
3260         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3261                             preferred_regs, ts->indirect_base);
3262         if (ts->type <= TCG_TYPE_I64) {
3263             tcg_out_movi(s, ts->type, reg, ts->val);
3264         } else {
3265             uint64_t val = ts->val;
3266             MemOp vece = MO_64;
3267 
3268             /*
3269              * Find the minimal vector element that matches the constant.
3270              * The targets will, in general, have to do this search anyway,
3271              * do this generically.
3272              */
3273             if (val == dup_const(MO_8, val)) {
3274                 vece = MO_8;
3275             } else if (val == dup_const(MO_16, val)) {
3276                 vece = MO_16;
3277             } else if (val == dup_const(MO_32, val)) {
3278                 vece = MO_32;
3279             }
3280 
3281             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3282         }
3283         ts->mem_coherent = 0;
3284         break;
3285     case TEMP_VAL_MEM:
3286         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3287                             preferred_regs, ts->indirect_base);
3288         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3289         ts->mem_coherent = 1;
3290         break;
3291     case TEMP_VAL_DEAD:
3292     default:
3293         tcg_abort();
3294     }
3295     ts->reg = reg;
3296     ts->val_type = TEMP_VAL_REG;
3297     s->reg_to_temp[reg] = ts;
3298 }
3299 
3300 /* Save a temporary to memory. 'allocated_regs' is used in case a
3301    temporary registers needs to be allocated to store a constant.  */
3302 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3303 {
3304     /* The liveness analysis already ensures that globals are back
3305        in memory. Keep an tcg_debug_assert for safety. */
3306     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3307 }
3308 
3309 /* save globals to their canonical location and assume they can be
3310    modified be the following code. 'allocated_regs' is used in case a
3311    temporary registers needs to be allocated to store a constant. */
3312 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3313 {
3314     int i, n;
3315 
3316     for (i = 0, n = s->nb_globals; i < n; i++) {
3317         temp_save(s, &s->temps[i], allocated_regs);
3318     }
3319 }
3320 
3321 /* sync globals to their canonical location and assume they can be
3322    read by the following code. 'allocated_regs' is used in case a
3323    temporary registers needs to be allocated to store a constant. */
3324 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3325 {
3326     int i, n;
3327 
3328     for (i = 0, n = s->nb_globals; i < n; i++) {
3329         TCGTemp *ts = &s->temps[i];
3330         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3331                          || ts->kind == TEMP_FIXED
3332                          || ts->mem_coherent);
3333     }
3334 }
3335 
3336 /* at the end of a basic block, we assume all temporaries are dead and
3337    all globals are stored at their canonical location. */
3338 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3339 {
3340     int i;
3341 
3342     for (i = s->nb_globals; i < s->nb_temps; i++) {
3343         TCGTemp *ts = &s->temps[i];
3344 
3345         switch (ts->kind) {
3346         case TEMP_LOCAL:
3347             temp_save(s, ts, allocated_regs);
3348             break;
3349         case TEMP_NORMAL:
3350             /* The liveness analysis already ensures that temps are dead.
3351                Keep an tcg_debug_assert for safety. */
3352             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3353             break;
3354         case TEMP_CONST:
3355             /* Similarly, we should have freed any allocated register. */
3356             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3357             break;
3358         default:
3359             g_assert_not_reached();
3360         }
3361     }
3362 
3363     save_globals(s, allocated_regs);
3364 }
3365 
3366 /*
3367  * At a conditional branch, we assume all temporaries are dead and
3368  * all globals and local temps are synced to their location.
3369  */
3370 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3371 {
3372     sync_globals(s, allocated_regs);
3373 
3374     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3375         TCGTemp *ts = &s->temps[i];
3376         /*
3377          * The liveness analysis already ensures that temps are dead.
3378          * Keep tcg_debug_asserts for safety.
3379          */
3380         switch (ts->kind) {
3381         case TEMP_LOCAL:
3382             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3383             break;
3384         case TEMP_NORMAL:
3385             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3386             break;
3387         case TEMP_CONST:
3388             break;
3389         default:
3390             g_assert_not_reached();
3391         }
3392     }
3393 }
3394 
3395 /*
3396  * Specialized code generation for INDEX_op_mov_* with a constant.
3397  */
3398 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3399                                   tcg_target_ulong val, TCGLifeData arg_life,
3400                                   TCGRegSet preferred_regs)
3401 {
3402     /* ENV should not be modified.  */
3403     tcg_debug_assert(!temp_readonly(ots));
3404 
3405     /* The movi is not explicitly generated here.  */
3406     if (ots->val_type == TEMP_VAL_REG) {
3407         s->reg_to_temp[ots->reg] = NULL;
3408     }
3409     ots->val_type = TEMP_VAL_CONST;
3410     ots->val = val;
3411     ots->mem_coherent = 0;
3412     if (NEED_SYNC_ARG(0)) {
3413         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3414     } else if (IS_DEAD_ARG(0)) {
3415         temp_dead(s, ots);
3416     }
3417 }
3418 
3419 /*
3420  * Specialized code generation for INDEX_op_mov_*.
3421  */
3422 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3423 {
3424     const TCGLifeData arg_life = op->life;
3425     TCGRegSet allocated_regs, preferred_regs;
3426     TCGTemp *ts, *ots;
3427     TCGType otype, itype;
3428 
3429     allocated_regs = s->reserved_regs;
3430     preferred_regs = op->output_pref[0];
3431     ots = arg_temp(op->args[0]);
3432     ts = arg_temp(op->args[1]);
3433 
3434     /* ENV should not be modified.  */
3435     tcg_debug_assert(!temp_readonly(ots));
3436 
3437     /* Note that otype != itype for no-op truncation.  */
3438     otype = ots->type;
3439     itype = ts->type;
3440 
3441     if (ts->val_type == TEMP_VAL_CONST) {
3442         /* propagate constant or generate sti */
3443         tcg_target_ulong val = ts->val;
3444         if (IS_DEAD_ARG(1)) {
3445             temp_dead(s, ts);
3446         }
3447         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3448         return;
3449     }
3450 
3451     /* If the source value is in memory we're going to be forced
3452        to have it in a register in order to perform the copy.  Copy
3453        the SOURCE value into its own register first, that way we
3454        don't have to reload SOURCE the next time it is used. */
3455     if (ts->val_type == TEMP_VAL_MEM) {
3456         temp_load(s, ts, tcg_target_available_regs[itype],
3457                   allocated_regs, preferred_regs);
3458     }
3459 
3460     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3461     if (IS_DEAD_ARG(0)) {
3462         /* mov to a non-saved dead register makes no sense (even with
3463            liveness analysis disabled). */
3464         tcg_debug_assert(NEED_SYNC_ARG(0));
3465         if (!ots->mem_allocated) {
3466             temp_allocate_frame(s, ots);
3467         }
3468         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3469         if (IS_DEAD_ARG(1)) {
3470             temp_dead(s, ts);
3471         }
3472         temp_dead(s, ots);
3473     } else {
3474         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3475             /* the mov can be suppressed */
3476             if (ots->val_type == TEMP_VAL_REG) {
3477                 s->reg_to_temp[ots->reg] = NULL;
3478             }
3479             ots->reg = ts->reg;
3480             temp_dead(s, ts);
3481         } else {
3482             if (ots->val_type != TEMP_VAL_REG) {
3483                 /* When allocating a new register, make sure to not spill the
3484                    input one. */
3485                 tcg_regset_set_reg(allocated_regs, ts->reg);
3486                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3487                                          allocated_regs, preferred_regs,
3488                                          ots->indirect_base);
3489             }
3490             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3491                 /*
3492                  * Cross register class move not supported.
3493                  * Store the source register into the destination slot
3494                  * and leave the destination temp as TEMP_VAL_MEM.
3495                  */
3496                 assert(!temp_readonly(ots));
3497                 if (!ts->mem_allocated) {
3498                     temp_allocate_frame(s, ots);
3499                 }
3500                 tcg_out_st(s, ts->type, ts->reg,
3501                            ots->mem_base->reg, ots->mem_offset);
3502                 ots->mem_coherent = 1;
3503                 temp_free_or_dead(s, ots, -1);
3504                 return;
3505             }
3506         }
3507         ots->val_type = TEMP_VAL_REG;
3508         ots->mem_coherent = 0;
3509         s->reg_to_temp[ots->reg] = ots;
3510         if (NEED_SYNC_ARG(0)) {
3511             temp_sync(s, ots, allocated_regs, 0, 0);
3512         }
3513     }
3514 }
3515 
3516 /*
3517  * Specialized code generation for INDEX_op_dup_vec.
3518  */
3519 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3520 {
3521     const TCGLifeData arg_life = op->life;
3522     TCGRegSet dup_out_regs, dup_in_regs;
3523     TCGTemp *its, *ots;
3524     TCGType itype, vtype;
3525     intptr_t endian_fixup;
3526     unsigned vece;
3527     bool ok;
3528 
3529     ots = arg_temp(op->args[0]);
3530     its = arg_temp(op->args[1]);
3531 
3532     /* ENV should not be modified.  */
3533     tcg_debug_assert(!temp_readonly(ots));
3534 
3535     itype = its->type;
3536     vece = TCGOP_VECE(op);
3537     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3538 
3539     if (its->val_type == TEMP_VAL_CONST) {
3540         /* Propagate constant via movi -> dupi.  */
3541         tcg_target_ulong val = its->val;
3542         if (IS_DEAD_ARG(1)) {
3543             temp_dead(s, its);
3544         }
3545         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3546         return;
3547     }
3548 
3549     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3550     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3551 
3552     /* Allocate the output register now.  */
3553     if (ots->val_type != TEMP_VAL_REG) {
3554         TCGRegSet allocated_regs = s->reserved_regs;
3555 
3556         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3557             /* Make sure to not spill the input register. */
3558             tcg_regset_set_reg(allocated_regs, its->reg);
3559         }
3560         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3561                                  op->output_pref[0], ots->indirect_base);
3562         ots->val_type = TEMP_VAL_REG;
3563         ots->mem_coherent = 0;
3564         s->reg_to_temp[ots->reg] = ots;
3565     }
3566 
3567     switch (its->val_type) {
3568     case TEMP_VAL_REG:
3569         /*
3570          * The dup constriaints must be broad, covering all possible VECE.
3571          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3572          * to fail, indicating that extra moves are required for that case.
3573          */
3574         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3575             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3576                 goto done;
3577             }
3578             /* Try again from memory or a vector input register.  */
3579         }
3580         if (!its->mem_coherent) {
3581             /*
3582              * The input register is not synced, and so an extra store
3583              * would be required to use memory.  Attempt an integer-vector
3584              * register move first.  We do not have a TCGRegSet for this.
3585              */
3586             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3587                 break;
3588             }
3589             /* Sync the temp back to its slot and load from there.  */
3590             temp_sync(s, its, s->reserved_regs, 0, 0);
3591         }
3592         /* fall through */
3593 
3594     case TEMP_VAL_MEM:
3595 #ifdef HOST_WORDS_BIGENDIAN
3596         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3597         endian_fixup -= 1 << vece;
3598 #else
3599         endian_fixup = 0;
3600 #endif
3601         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3602                              its->mem_offset + endian_fixup)) {
3603             goto done;
3604         }
3605         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3606         break;
3607 
3608     default:
3609         g_assert_not_reached();
3610     }
3611 
3612     /* We now have a vector input register, so dup must succeed. */
3613     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3614     tcg_debug_assert(ok);
3615 
3616  done:
3617     if (IS_DEAD_ARG(1)) {
3618         temp_dead(s, its);
3619     }
3620     if (NEED_SYNC_ARG(0)) {
3621         temp_sync(s, ots, s->reserved_regs, 0, 0);
3622     }
3623     if (IS_DEAD_ARG(0)) {
3624         temp_dead(s, ots);
3625     }
3626 }
3627 
3628 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3629 {
3630     const TCGLifeData arg_life = op->life;
3631     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3632     TCGRegSet i_allocated_regs;
3633     TCGRegSet o_allocated_regs;
3634     int i, k, nb_iargs, nb_oargs;
3635     TCGReg reg;
3636     TCGArg arg;
3637     const TCGArgConstraint *arg_ct;
3638     TCGTemp *ts;
3639     TCGArg new_args[TCG_MAX_OP_ARGS];
3640     int const_args[TCG_MAX_OP_ARGS];
3641 
3642     nb_oargs = def->nb_oargs;
3643     nb_iargs = def->nb_iargs;
3644 
3645     /* copy constants */
3646     memcpy(new_args + nb_oargs + nb_iargs,
3647            op->args + nb_oargs + nb_iargs,
3648            sizeof(TCGArg) * def->nb_cargs);
3649 
3650     i_allocated_regs = s->reserved_regs;
3651     o_allocated_regs = s->reserved_regs;
3652 
3653     /* satisfy input constraints */
3654     for (k = 0; k < nb_iargs; k++) {
3655         TCGRegSet i_preferred_regs, o_preferred_regs;
3656 
3657         i = def->args_ct[nb_oargs + k].sort_index;
3658         arg = op->args[i];
3659         arg_ct = &def->args_ct[i];
3660         ts = arg_temp(arg);
3661 
3662         if (ts->val_type == TEMP_VAL_CONST
3663             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3664             /* constant is OK for instruction */
3665             const_args[i] = 1;
3666             new_args[i] = ts->val;
3667             continue;
3668         }
3669 
3670         i_preferred_regs = o_preferred_regs = 0;
3671         if (arg_ct->ialias) {
3672             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3673 
3674             /*
3675              * If the input is readonly, then it cannot also be an
3676              * output and aliased to itself.  If the input is not
3677              * dead after the instruction, we must allocate a new
3678              * register and move it.
3679              */
3680             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3681                 goto allocate_in_reg;
3682             }
3683 
3684             /*
3685              * Check if the current register has already been allocated
3686              * for another input aliased to an output.
3687              */
3688             if (ts->val_type == TEMP_VAL_REG) {
3689                 reg = ts->reg;
3690                 for (int k2 = 0; k2 < k; k2++) {
3691                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3692                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3693                         goto allocate_in_reg;
3694                     }
3695                 }
3696             }
3697             i_preferred_regs = o_preferred_regs;
3698         }
3699 
3700         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3701         reg = ts->reg;
3702 
3703         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3704  allocate_in_reg:
3705             /*
3706              * Allocate a new register matching the constraint
3707              * and move the temporary register into it.
3708              */
3709             temp_load(s, ts, tcg_target_available_regs[ts->type],
3710                       i_allocated_regs, 0);
3711             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3712                                 o_preferred_regs, ts->indirect_base);
3713             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3714                 /*
3715                  * Cross register class move not supported.  Sync the
3716                  * temp back to its slot and load from there.
3717                  */
3718                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3719                 tcg_out_ld(s, ts->type, reg,
3720                            ts->mem_base->reg, ts->mem_offset);
3721             }
3722         }
3723         new_args[i] = reg;
3724         const_args[i] = 0;
3725         tcg_regset_set_reg(i_allocated_regs, reg);
3726     }
3727 
3728     /* mark dead temporaries and free the associated registers */
3729     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3730         if (IS_DEAD_ARG(i)) {
3731             temp_dead(s, arg_temp(op->args[i]));
3732         }
3733     }
3734 
3735     if (def->flags & TCG_OPF_COND_BRANCH) {
3736         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3737     } else if (def->flags & TCG_OPF_BB_END) {
3738         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3739     } else {
3740         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3741             /* XXX: permit generic clobber register list ? */
3742             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3743                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3744                     tcg_reg_free(s, i, i_allocated_regs);
3745                 }
3746             }
3747         }
3748         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3749             /* sync globals if the op has side effects and might trigger
3750                an exception. */
3751             sync_globals(s, i_allocated_regs);
3752         }
3753 
3754         /* satisfy the output constraints */
3755         for(k = 0; k < nb_oargs; k++) {
3756             i = def->args_ct[k].sort_index;
3757             arg = op->args[i];
3758             arg_ct = &def->args_ct[i];
3759             ts = arg_temp(arg);
3760 
3761             /* ENV should not be modified.  */
3762             tcg_debug_assert(!temp_readonly(ts));
3763 
3764             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3765                 reg = new_args[arg_ct->alias_index];
3766             } else if (arg_ct->newreg) {
3767                 reg = tcg_reg_alloc(s, arg_ct->regs,
3768                                     i_allocated_regs | o_allocated_regs,
3769                                     op->output_pref[k], ts->indirect_base);
3770             } else {
3771                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3772                                     op->output_pref[k], ts->indirect_base);
3773             }
3774             tcg_regset_set_reg(o_allocated_regs, reg);
3775             if (ts->val_type == TEMP_VAL_REG) {
3776                 s->reg_to_temp[ts->reg] = NULL;
3777             }
3778             ts->val_type = TEMP_VAL_REG;
3779             ts->reg = reg;
3780             /*
3781              * Temp value is modified, so the value kept in memory is
3782              * potentially not the same.
3783              */
3784             ts->mem_coherent = 0;
3785             s->reg_to_temp[reg] = ts;
3786             new_args[i] = reg;
3787         }
3788     }
3789 
3790     /* emit instruction */
3791     if (def->flags & TCG_OPF_VECTOR) {
3792         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3793                        new_args, const_args);
3794     } else {
3795         tcg_out_op(s, op->opc, new_args, const_args);
3796     }
3797 
3798     /* move the outputs in the correct register if needed */
3799     for(i = 0; i < nb_oargs; i++) {
3800         ts = arg_temp(op->args[i]);
3801 
3802         /* ENV should not be modified.  */
3803         tcg_debug_assert(!temp_readonly(ts));
3804 
3805         if (NEED_SYNC_ARG(i)) {
3806             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3807         } else if (IS_DEAD_ARG(i)) {
3808             temp_dead(s, ts);
3809         }
3810     }
3811 }
3812 
3813 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3814 {
3815     const TCGLifeData arg_life = op->life;
3816     TCGTemp *ots, *itsl, *itsh;
3817     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3818 
3819     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3820     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3821     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3822 
3823     ots = arg_temp(op->args[0]);
3824     itsl = arg_temp(op->args[1]);
3825     itsh = arg_temp(op->args[2]);
3826 
3827     /* ENV should not be modified.  */
3828     tcg_debug_assert(!temp_readonly(ots));
3829 
3830     /* Allocate the output register now.  */
3831     if (ots->val_type != TEMP_VAL_REG) {
3832         TCGRegSet allocated_regs = s->reserved_regs;
3833         TCGRegSet dup_out_regs =
3834             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3835 
3836         /* Make sure to not spill the input registers. */
3837         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3838             tcg_regset_set_reg(allocated_regs, itsl->reg);
3839         }
3840         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3841             tcg_regset_set_reg(allocated_regs, itsh->reg);
3842         }
3843 
3844         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3845                                  op->output_pref[0], ots->indirect_base);
3846         ots->val_type = TEMP_VAL_REG;
3847         ots->mem_coherent = 0;
3848         s->reg_to_temp[ots->reg] = ots;
3849     }
3850 
3851     /* Promote dup2 of immediates to dupi_vec. */
3852     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3853         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3854         MemOp vece = MO_64;
3855 
3856         if (val == dup_const(MO_8, val)) {
3857             vece = MO_8;
3858         } else if (val == dup_const(MO_16, val)) {
3859             vece = MO_16;
3860         } else if (val == dup_const(MO_32, val)) {
3861             vece = MO_32;
3862         }
3863 
3864         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3865         goto done;
3866     }
3867 
3868     /* If the two inputs form one 64-bit value, try dupm_vec. */
3869     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3870         if (!itsl->mem_coherent) {
3871             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3872         }
3873         if (!itsh->mem_coherent) {
3874             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3875         }
3876 #ifdef HOST_WORDS_BIGENDIAN
3877         TCGTemp *its = itsh;
3878 #else
3879         TCGTemp *its = itsl;
3880 #endif
3881         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3882                              its->mem_base->reg, its->mem_offset)) {
3883             goto done;
3884         }
3885     }
3886 
3887     /* Fall back to generic expansion. */
3888     return false;
3889 
3890  done:
3891     if (IS_DEAD_ARG(1)) {
3892         temp_dead(s, itsl);
3893     }
3894     if (IS_DEAD_ARG(2)) {
3895         temp_dead(s, itsh);
3896     }
3897     if (NEED_SYNC_ARG(0)) {
3898         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3899     } else if (IS_DEAD_ARG(0)) {
3900         temp_dead(s, ots);
3901     }
3902     return true;
3903 }
3904 
3905 #ifdef TCG_TARGET_STACK_GROWSUP
3906 #define STACK_DIR(x) (-(x))
3907 #else
3908 #define STACK_DIR(x) (x)
3909 #endif
3910 
3911 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3912 {
3913     const int nb_oargs = TCGOP_CALLO(op);
3914     const int nb_iargs = TCGOP_CALLI(op);
3915     const TCGLifeData arg_life = op->life;
3916     const TCGHelperInfo *info;
3917     int flags, nb_regs, i;
3918     TCGReg reg;
3919     TCGArg arg;
3920     TCGTemp *ts;
3921     intptr_t stack_offset;
3922     size_t call_stack_size;
3923     tcg_insn_unit *func_addr;
3924     int allocate_args;
3925     TCGRegSet allocated_regs;
3926 
3927     func_addr = tcg_call_func(op);
3928     info = tcg_call_info(op);
3929     flags = info->flags;
3930 
3931     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3932     if (nb_regs > nb_iargs) {
3933         nb_regs = nb_iargs;
3934     }
3935 
3936     /* assign stack slots first */
3937     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3938     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3939         ~(TCG_TARGET_STACK_ALIGN - 1);
3940     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3941     if (allocate_args) {
3942         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3943            preallocate call stack */
3944         tcg_abort();
3945     }
3946 
3947     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3948     for (i = nb_regs; i < nb_iargs; i++) {
3949         arg = op->args[nb_oargs + i];
3950 #ifdef TCG_TARGET_STACK_GROWSUP
3951         stack_offset -= sizeof(tcg_target_long);
3952 #endif
3953         if (arg != TCG_CALL_DUMMY_ARG) {
3954             ts = arg_temp(arg);
3955             temp_load(s, ts, tcg_target_available_regs[ts->type],
3956                       s->reserved_regs, 0);
3957             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3958         }
3959 #ifndef TCG_TARGET_STACK_GROWSUP
3960         stack_offset += sizeof(tcg_target_long);
3961 #endif
3962     }
3963 
3964     /* assign input registers */
3965     allocated_regs = s->reserved_regs;
3966     for (i = 0; i < nb_regs; i++) {
3967         arg = op->args[nb_oargs + i];
3968         if (arg != TCG_CALL_DUMMY_ARG) {
3969             ts = arg_temp(arg);
3970             reg = tcg_target_call_iarg_regs[i];
3971 
3972             if (ts->val_type == TEMP_VAL_REG) {
3973                 if (ts->reg != reg) {
3974                     tcg_reg_free(s, reg, allocated_regs);
3975                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3976                         /*
3977                          * Cross register class move not supported.  Sync the
3978                          * temp back to its slot and load from there.
3979                          */
3980                         temp_sync(s, ts, allocated_regs, 0, 0);
3981                         tcg_out_ld(s, ts->type, reg,
3982                                    ts->mem_base->reg, ts->mem_offset);
3983                     }
3984                 }
3985             } else {
3986                 TCGRegSet arg_set = 0;
3987 
3988                 tcg_reg_free(s, reg, allocated_regs);
3989                 tcg_regset_set_reg(arg_set, reg);
3990                 temp_load(s, ts, arg_set, allocated_regs, 0);
3991             }
3992 
3993             tcg_regset_set_reg(allocated_regs, reg);
3994         }
3995     }
3996 
3997     /* mark dead temporaries and free the associated registers */
3998     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3999         if (IS_DEAD_ARG(i)) {
4000             temp_dead(s, arg_temp(op->args[i]));
4001         }
4002     }
4003 
4004     /* clobber call registers */
4005     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4006         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4007             tcg_reg_free(s, i, allocated_regs);
4008         }
4009     }
4010 
4011     /* Save globals if they might be written by the helper, sync them if
4012        they might be read. */
4013     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4014         /* Nothing to do */
4015     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4016         sync_globals(s, allocated_regs);
4017     } else {
4018         save_globals(s, allocated_regs);
4019     }
4020 
4021 #ifdef CONFIG_TCG_INTERPRETER
4022     {
4023         gpointer hash = (gpointer)(uintptr_t)info->typemask;
4024         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4025         assert(cif != NULL);
4026         tcg_out_call(s, func_addr, cif);
4027     }
4028 #else
4029     tcg_out_call(s, func_addr);
4030 #endif
4031 
4032     /* assign output registers and emit moves if needed */
4033     for(i = 0; i < nb_oargs; i++) {
4034         arg = op->args[i];
4035         ts = arg_temp(arg);
4036 
4037         /* ENV should not be modified.  */
4038         tcg_debug_assert(!temp_readonly(ts));
4039 
4040         reg = tcg_target_call_oarg_regs[i];
4041         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4042         if (ts->val_type == TEMP_VAL_REG) {
4043             s->reg_to_temp[ts->reg] = NULL;
4044         }
4045         ts->val_type = TEMP_VAL_REG;
4046         ts->reg = reg;
4047         ts->mem_coherent = 0;
4048         s->reg_to_temp[reg] = ts;
4049         if (NEED_SYNC_ARG(i)) {
4050             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4051         } else if (IS_DEAD_ARG(i)) {
4052             temp_dead(s, ts);
4053         }
4054     }
4055 }
4056 
4057 #ifdef CONFIG_PROFILER
4058 
4059 /* avoid copy/paste errors */
4060 #define PROF_ADD(to, from, field)                       \
4061     do {                                                \
4062         (to)->field += qatomic_read(&((from)->field));  \
4063     } while (0)
4064 
4065 #define PROF_MAX(to, from, field)                                       \
4066     do {                                                                \
4067         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4068         if (val__ > (to)->field) {                                      \
4069             (to)->field = val__;                                        \
4070         }                                                               \
4071     } while (0)
4072 
4073 /* Pass in a zero'ed @prof */
4074 static inline
4075 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4076 {
4077     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4078     unsigned int i;
4079 
4080     for (i = 0; i < n_ctxs; i++) {
4081         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4082         const TCGProfile *orig = &s->prof;
4083 
4084         if (counters) {
4085             PROF_ADD(prof, orig, cpu_exec_time);
4086             PROF_ADD(prof, orig, tb_count1);
4087             PROF_ADD(prof, orig, tb_count);
4088             PROF_ADD(prof, orig, op_count);
4089             PROF_MAX(prof, orig, op_count_max);
4090             PROF_ADD(prof, orig, temp_count);
4091             PROF_MAX(prof, orig, temp_count_max);
4092             PROF_ADD(prof, orig, del_op_count);
4093             PROF_ADD(prof, orig, code_in_len);
4094             PROF_ADD(prof, orig, code_out_len);
4095             PROF_ADD(prof, orig, search_out_len);
4096             PROF_ADD(prof, orig, interm_time);
4097             PROF_ADD(prof, orig, code_time);
4098             PROF_ADD(prof, orig, la_time);
4099             PROF_ADD(prof, orig, opt_time);
4100             PROF_ADD(prof, orig, restore_count);
4101             PROF_ADD(prof, orig, restore_time);
4102         }
4103         if (table) {
4104             int i;
4105 
4106             for (i = 0; i < NB_OPS; i++) {
4107                 PROF_ADD(prof, orig, table_op_count[i]);
4108             }
4109         }
4110     }
4111 }
4112 
4113 #undef PROF_ADD
4114 #undef PROF_MAX
4115 
4116 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4117 {
4118     tcg_profile_snapshot(prof, true, false);
4119 }
4120 
4121 static void tcg_profile_snapshot_table(TCGProfile *prof)
4122 {
4123     tcg_profile_snapshot(prof, false, true);
4124 }
4125 
4126 void tcg_dump_op_count(GString *buf)
4127 {
4128     TCGProfile prof = {};
4129     int i;
4130 
4131     tcg_profile_snapshot_table(&prof);
4132     for (i = 0; i < NB_OPS; i++) {
4133         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4134                                prof.table_op_count[i]);
4135     }
4136 }
4137 
4138 int64_t tcg_cpu_exec_time(void)
4139 {
4140     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4141     unsigned int i;
4142     int64_t ret = 0;
4143 
4144     for (i = 0; i < n_ctxs; i++) {
4145         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4146         const TCGProfile *prof = &s->prof;
4147 
4148         ret += qatomic_read(&prof->cpu_exec_time);
4149     }
4150     return ret;
4151 }
4152 #else
4153 void tcg_dump_op_count(GString *buf)
4154 {
4155     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4156 }
4157 
4158 int64_t tcg_cpu_exec_time(void)
4159 {
4160     error_report("%s: TCG profiler not compiled", __func__);
4161     exit(EXIT_FAILURE);
4162 }
4163 #endif
4164 
4165 
4166 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4167 {
4168 #ifdef CONFIG_PROFILER
4169     TCGProfile *prof = &s->prof;
4170 #endif
4171     int i, num_insns;
4172     TCGOp *op;
4173 
4174 #ifdef CONFIG_PROFILER
4175     {
4176         int n = 0;
4177 
4178         QTAILQ_FOREACH(op, &s->ops, link) {
4179             n++;
4180         }
4181         qatomic_set(&prof->op_count, prof->op_count + n);
4182         if (n > prof->op_count_max) {
4183             qatomic_set(&prof->op_count_max, n);
4184         }
4185 
4186         n = s->nb_temps;
4187         qatomic_set(&prof->temp_count, prof->temp_count + n);
4188         if (n > prof->temp_count_max) {
4189             qatomic_set(&prof->temp_count_max, n);
4190         }
4191     }
4192 #endif
4193 
4194 #ifdef DEBUG_DISAS
4195     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4196                  && qemu_log_in_addr_range(tb->pc))) {
4197         FILE *logfile = qemu_log_lock();
4198         qemu_log("OP:\n");
4199         tcg_dump_ops(s, false);
4200         qemu_log("\n");
4201         qemu_log_unlock(logfile);
4202     }
4203 #endif
4204 
4205 #ifdef CONFIG_DEBUG_TCG
4206     /* Ensure all labels referenced have been emitted.  */
4207     {
4208         TCGLabel *l;
4209         bool error = false;
4210 
4211         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4212             if (unlikely(!l->present) && l->refs) {
4213                 qemu_log_mask(CPU_LOG_TB_OP,
4214                               "$L%d referenced but not present.\n", l->id);
4215                 error = true;
4216             }
4217         }
4218         assert(!error);
4219     }
4220 #endif
4221 
4222 #ifdef CONFIG_PROFILER
4223     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4224 #endif
4225 
4226 #ifdef USE_TCG_OPTIMIZATIONS
4227     tcg_optimize(s);
4228 #endif
4229 
4230 #ifdef CONFIG_PROFILER
4231     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4232     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4233 #endif
4234 
4235     reachable_code_pass(s);
4236     liveness_pass_1(s);
4237 
4238     if (s->nb_indirects > 0) {
4239 #ifdef DEBUG_DISAS
4240         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4241                      && qemu_log_in_addr_range(tb->pc))) {
4242             FILE *logfile = qemu_log_lock();
4243             qemu_log("OP before indirect lowering:\n");
4244             tcg_dump_ops(s, false);
4245             qemu_log("\n");
4246             qemu_log_unlock(logfile);
4247         }
4248 #endif
4249         /* Replace indirect temps with direct temps.  */
4250         if (liveness_pass_2(s)) {
4251             /* If changes were made, re-run liveness.  */
4252             liveness_pass_1(s);
4253         }
4254     }
4255 
4256 #ifdef CONFIG_PROFILER
4257     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4258 #endif
4259 
4260 #ifdef DEBUG_DISAS
4261     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4262                  && qemu_log_in_addr_range(tb->pc))) {
4263         FILE *logfile = qemu_log_lock();
4264         qemu_log("OP after optimization and liveness analysis:\n");
4265         tcg_dump_ops(s, true);
4266         qemu_log("\n");
4267         qemu_log_unlock(logfile);
4268     }
4269 #endif
4270 
4271     tcg_reg_alloc_start(s);
4272 
4273     /*
4274      * Reset the buffer pointers when restarting after overflow.
4275      * TODO: Move this into translate-all.c with the rest of the
4276      * buffer management.  Having only this done here is confusing.
4277      */
4278     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4279     s->code_ptr = s->code_buf;
4280 
4281 #ifdef TCG_TARGET_NEED_LDST_LABELS
4282     QSIMPLEQ_INIT(&s->ldst_labels);
4283 #endif
4284 #ifdef TCG_TARGET_NEED_POOL_LABELS
4285     s->pool_labels = NULL;
4286 #endif
4287 
4288     num_insns = -1;
4289     QTAILQ_FOREACH(op, &s->ops, link) {
4290         TCGOpcode opc = op->opc;
4291 
4292 #ifdef CONFIG_PROFILER
4293         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4294 #endif
4295 
4296         switch (opc) {
4297         case INDEX_op_mov_i32:
4298         case INDEX_op_mov_i64:
4299         case INDEX_op_mov_vec:
4300             tcg_reg_alloc_mov(s, op);
4301             break;
4302         case INDEX_op_dup_vec:
4303             tcg_reg_alloc_dup(s, op);
4304             break;
4305         case INDEX_op_insn_start:
4306             if (num_insns >= 0) {
4307                 size_t off = tcg_current_code_size(s);
4308                 s->gen_insn_end_off[num_insns] = off;
4309                 /* Assert that we do not overflow our stored offset.  */
4310                 assert(s->gen_insn_end_off[num_insns] == off);
4311             }
4312             num_insns++;
4313             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4314                 target_ulong a;
4315 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4316                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4317 #else
4318                 a = op->args[i];
4319 #endif
4320                 s->gen_insn_data[num_insns][i] = a;
4321             }
4322             break;
4323         case INDEX_op_discard:
4324             temp_dead(s, arg_temp(op->args[0]));
4325             break;
4326         case INDEX_op_set_label:
4327             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4328             tcg_out_label(s, arg_label(op->args[0]));
4329             break;
4330         case INDEX_op_call:
4331             tcg_reg_alloc_call(s, op);
4332             break;
4333         case INDEX_op_dup2_vec:
4334             if (tcg_reg_alloc_dup2(s, op)) {
4335                 break;
4336             }
4337             /* fall through */
4338         default:
4339             /* Sanity check that we've not introduced any unhandled opcodes. */
4340             tcg_debug_assert(tcg_op_supported(opc));
4341             /* Note: in order to speed up the code, it would be much
4342                faster to have specialized register allocator functions for
4343                some common argument patterns */
4344             tcg_reg_alloc_op(s, op);
4345             break;
4346         }
4347 #ifdef CONFIG_DEBUG_TCG
4348         check_regs(s);
4349 #endif
4350         /* Test for (pending) buffer overflow.  The assumption is that any
4351            one operation beginning below the high water mark cannot overrun
4352            the buffer completely.  Thus we can test for overflow after
4353            generating code without having to check during generation.  */
4354         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4355             return -1;
4356         }
4357         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4358         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4359             return -2;
4360         }
4361     }
4362     tcg_debug_assert(num_insns >= 0);
4363     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4364 
4365     /* Generate TB finalization at the end of block */
4366 #ifdef TCG_TARGET_NEED_LDST_LABELS
4367     i = tcg_out_ldst_finalize(s);
4368     if (i < 0) {
4369         return i;
4370     }
4371 #endif
4372 #ifdef TCG_TARGET_NEED_POOL_LABELS
4373     i = tcg_out_pool_finalize(s);
4374     if (i < 0) {
4375         return i;
4376     }
4377 #endif
4378     if (!tcg_resolve_relocs(s)) {
4379         return -2;
4380     }
4381 
4382 #ifndef CONFIG_TCG_INTERPRETER
4383     /* flush instruction cache */
4384     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4385                         (uintptr_t)s->code_buf,
4386                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4387 #endif
4388 
4389     return tcg_current_code_size(s);
4390 }
4391 
4392 #ifdef CONFIG_PROFILER
4393 void tcg_dump_info(GString *buf)
4394 {
4395     TCGProfile prof = {};
4396     const TCGProfile *s;
4397     int64_t tb_count;
4398     int64_t tb_div_count;
4399     int64_t tot;
4400 
4401     tcg_profile_snapshot_counters(&prof);
4402     s = &prof;
4403     tb_count = s->tb_count;
4404     tb_div_count = tb_count ? tb_count : 1;
4405     tot = s->interm_time + s->code_time;
4406 
4407     g_string_append_printf(buf, "JIT cycles          %" PRId64
4408                            " (%0.3f s at 2.4 GHz)\n",
4409                            tot, tot / 2.4e9);
4410     g_string_append_printf(buf, "translated TBs      %" PRId64
4411                            " (aborted=%" PRId64 " %0.1f%%)\n",
4412                            tb_count, s->tb_count1 - tb_count,
4413                            (double)(s->tb_count1 - s->tb_count)
4414                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4415     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4416                            (double)s->op_count / tb_div_count, s->op_count_max);
4417     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4418                            (double)s->del_op_count / tb_div_count);
4419     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4420                            (double)s->temp_count / tb_div_count,
4421                            s->temp_count_max);
4422     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4423                            (double)s->code_out_len / tb_div_count);
4424     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4425                            (double)s->search_out_len / tb_div_count);
4426 
4427     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4428                            s->op_count ? (double)tot / s->op_count : 0);
4429     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4430                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4431     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4432                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4433     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4434                            s->search_out_len ?
4435                            (double)tot / s->search_out_len : 0);
4436     if (tot == 0) {
4437         tot = 1;
4438     }
4439     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4440                            (double)s->interm_time / tot * 100.0);
4441     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4442                            (double)s->code_time / tot * 100.0);
4443     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4444                            (double)s->opt_time / (s->code_time ?
4445                                                   s->code_time : 1)
4446                            * 100.0);
4447     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4448                            (double)s->la_time / (s->code_time ?
4449                                                  s->code_time : 1) * 100.0);
4450     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4451                            s->restore_count);
4452     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4453                            s->restore_count ?
4454                            (double)s->restore_time / s->restore_count : 0);
4455 }
4456 #else
4457 void tcg_dump_info(GString *buf)
4458 {
4459     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4460 }
4461 #endif
4462 
4463 #ifdef ELF_HOST_MACHINE
4464 /* In order to use this feature, the backend needs to do three things:
4465 
4466    (1) Define ELF_HOST_MACHINE to indicate both what value to
4467        put into the ELF image and to indicate support for the feature.
4468 
4469    (2) Define tcg_register_jit.  This should create a buffer containing
4470        the contents of a .debug_frame section that describes the post-
4471        prologue unwind info for the tcg machine.
4472 
4473    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4474 */
4475 
4476 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4477 typedef enum {
4478     JIT_NOACTION = 0,
4479     JIT_REGISTER_FN,
4480     JIT_UNREGISTER_FN
4481 } jit_actions_t;
4482 
4483 struct jit_code_entry {
4484     struct jit_code_entry *next_entry;
4485     struct jit_code_entry *prev_entry;
4486     const void *symfile_addr;
4487     uint64_t symfile_size;
4488 };
4489 
4490 struct jit_descriptor {
4491     uint32_t version;
4492     uint32_t action_flag;
4493     struct jit_code_entry *relevant_entry;
4494     struct jit_code_entry *first_entry;
4495 };
4496 
4497 void __jit_debug_register_code(void) __attribute__((noinline));
4498 void __jit_debug_register_code(void)
4499 {
4500     asm("");
4501 }
4502 
4503 /* Must statically initialize the version, because GDB may check
4504    the version before we can set it.  */
4505 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4506 
4507 /* End GDB interface.  */
4508 
4509 static int find_string(const char *strtab, const char *str)
4510 {
4511     const char *p = strtab + 1;
4512 
4513     while (1) {
4514         if (strcmp(p, str) == 0) {
4515             return p - strtab;
4516         }
4517         p += strlen(p) + 1;
4518     }
4519 }
4520 
4521 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4522                                  const void *debug_frame,
4523                                  size_t debug_frame_size)
4524 {
4525     struct __attribute__((packed)) DebugInfo {
4526         uint32_t  len;
4527         uint16_t  version;
4528         uint32_t  abbrev;
4529         uint8_t   ptr_size;
4530         uint8_t   cu_die;
4531         uint16_t  cu_lang;
4532         uintptr_t cu_low_pc;
4533         uintptr_t cu_high_pc;
4534         uint8_t   fn_die;
4535         char      fn_name[16];
4536         uintptr_t fn_low_pc;
4537         uintptr_t fn_high_pc;
4538         uint8_t   cu_eoc;
4539     };
4540 
4541     struct ElfImage {
4542         ElfW(Ehdr) ehdr;
4543         ElfW(Phdr) phdr;
4544         ElfW(Shdr) shdr[7];
4545         ElfW(Sym)  sym[2];
4546         struct DebugInfo di;
4547         uint8_t    da[24];
4548         char       str[80];
4549     };
4550 
4551     struct ElfImage *img;
4552 
4553     static const struct ElfImage img_template = {
4554         .ehdr = {
4555             .e_ident[EI_MAG0] = ELFMAG0,
4556             .e_ident[EI_MAG1] = ELFMAG1,
4557             .e_ident[EI_MAG2] = ELFMAG2,
4558             .e_ident[EI_MAG3] = ELFMAG3,
4559             .e_ident[EI_CLASS] = ELF_CLASS,
4560             .e_ident[EI_DATA] = ELF_DATA,
4561             .e_ident[EI_VERSION] = EV_CURRENT,
4562             .e_type = ET_EXEC,
4563             .e_machine = ELF_HOST_MACHINE,
4564             .e_version = EV_CURRENT,
4565             .e_phoff = offsetof(struct ElfImage, phdr),
4566             .e_shoff = offsetof(struct ElfImage, shdr),
4567             .e_ehsize = sizeof(ElfW(Shdr)),
4568             .e_phentsize = sizeof(ElfW(Phdr)),
4569             .e_phnum = 1,
4570             .e_shentsize = sizeof(ElfW(Shdr)),
4571             .e_shnum = ARRAY_SIZE(img->shdr),
4572             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4573 #ifdef ELF_HOST_FLAGS
4574             .e_flags = ELF_HOST_FLAGS,
4575 #endif
4576 #ifdef ELF_OSABI
4577             .e_ident[EI_OSABI] = ELF_OSABI,
4578 #endif
4579         },
4580         .phdr = {
4581             .p_type = PT_LOAD,
4582             .p_flags = PF_X,
4583         },
4584         .shdr = {
4585             [0] = { .sh_type = SHT_NULL },
4586             /* Trick: The contents of code_gen_buffer are not present in
4587                this fake ELF file; that got allocated elsewhere.  Therefore
4588                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4589                will not look for contents.  We can record any address.  */
4590             [1] = { /* .text */
4591                 .sh_type = SHT_NOBITS,
4592                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4593             },
4594             [2] = { /* .debug_info */
4595                 .sh_type = SHT_PROGBITS,
4596                 .sh_offset = offsetof(struct ElfImage, di),
4597                 .sh_size = sizeof(struct DebugInfo),
4598             },
4599             [3] = { /* .debug_abbrev */
4600                 .sh_type = SHT_PROGBITS,
4601                 .sh_offset = offsetof(struct ElfImage, da),
4602                 .sh_size = sizeof(img->da),
4603             },
4604             [4] = { /* .debug_frame */
4605                 .sh_type = SHT_PROGBITS,
4606                 .sh_offset = sizeof(struct ElfImage),
4607             },
4608             [5] = { /* .symtab */
4609                 .sh_type = SHT_SYMTAB,
4610                 .sh_offset = offsetof(struct ElfImage, sym),
4611                 .sh_size = sizeof(img->sym),
4612                 .sh_info = 1,
4613                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4614                 .sh_entsize = sizeof(ElfW(Sym)),
4615             },
4616             [6] = { /* .strtab */
4617                 .sh_type = SHT_STRTAB,
4618                 .sh_offset = offsetof(struct ElfImage, str),
4619                 .sh_size = sizeof(img->str),
4620             }
4621         },
4622         .sym = {
4623             [1] = { /* code_gen_buffer */
4624                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4625                 .st_shndx = 1,
4626             }
4627         },
4628         .di = {
4629             .len = sizeof(struct DebugInfo) - 4,
4630             .version = 2,
4631             .ptr_size = sizeof(void *),
4632             .cu_die = 1,
4633             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4634             .fn_die = 2,
4635             .fn_name = "code_gen_buffer"
4636         },
4637         .da = {
4638             1,          /* abbrev number (the cu) */
4639             0x11, 1,    /* DW_TAG_compile_unit, has children */
4640             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4641             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4642             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4643             0, 0,       /* end of abbrev */
4644             2,          /* abbrev number (the fn) */
4645             0x2e, 0,    /* DW_TAG_subprogram, no children */
4646             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4647             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4648             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4649             0, 0,       /* end of abbrev */
4650             0           /* no more abbrev */
4651         },
4652         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4653                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4654     };
4655 
4656     /* We only need a single jit entry; statically allocate it.  */
4657     static struct jit_code_entry one_entry;
4658 
4659     uintptr_t buf = (uintptr_t)buf_ptr;
4660     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4661     DebugFrameHeader *dfh;
4662 
4663     img = g_malloc(img_size);
4664     *img = img_template;
4665 
4666     img->phdr.p_vaddr = buf;
4667     img->phdr.p_paddr = buf;
4668     img->phdr.p_memsz = buf_size;
4669 
4670     img->shdr[1].sh_name = find_string(img->str, ".text");
4671     img->shdr[1].sh_addr = buf;
4672     img->shdr[1].sh_size = buf_size;
4673 
4674     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4675     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4676 
4677     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4678     img->shdr[4].sh_size = debug_frame_size;
4679 
4680     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4681     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4682 
4683     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4684     img->sym[1].st_value = buf;
4685     img->sym[1].st_size = buf_size;
4686 
4687     img->di.cu_low_pc = buf;
4688     img->di.cu_high_pc = buf + buf_size;
4689     img->di.fn_low_pc = buf;
4690     img->di.fn_high_pc = buf + buf_size;
4691 
4692     dfh = (DebugFrameHeader *)(img + 1);
4693     memcpy(dfh, debug_frame, debug_frame_size);
4694     dfh->fde.func_start = buf;
4695     dfh->fde.func_len = buf_size;
4696 
4697 #ifdef DEBUG_JIT
4698     /* Enable this block to be able to debug the ELF image file creation.
4699        One can use readelf, objdump, or other inspection utilities.  */
4700     {
4701         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4702         if (f) {
4703             if (fwrite(img, img_size, 1, f) != img_size) {
4704                 /* Avoid stupid unused return value warning for fwrite.  */
4705             }
4706             fclose(f);
4707         }
4708     }
4709 #endif
4710 
4711     one_entry.symfile_addr = img;
4712     one_entry.symfile_size = img_size;
4713 
4714     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4715     __jit_debug_descriptor.relevant_entry = &one_entry;
4716     __jit_debug_descriptor.first_entry = &one_entry;
4717     __jit_debug_register_code();
4718 }
4719 #else
4720 /* No support for the feature.  Provide the entry point expected by exec.c,
4721    and implement the internal function we declared earlier.  */
4722 
4723 static void tcg_register_jit_int(const void *buf, size_t size,
4724                                  const void *debug_frame,
4725                                  size_t debug_frame_size)
4726 {
4727 }
4728 
4729 void tcg_register_jit(const void *buf, size_t buf_size)
4730 {
4731 }
4732 #endif /* ELF_HOST_MACHINE */
4733 
4734 #if !TCG_TARGET_MAYBE_vec
4735 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4736 {
4737     g_assert_not_reached();
4738 }
4739 #endif
4740