xref: /qemu/tcg/tcg.c (revision 75ac231c)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154                          ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162 
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165 
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172 
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176 
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179 
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183     *s->code_ptr++ = v;
184 }
185 
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187                                                       uint8_t v)
188 {
189     *p = v;
190 }
191 #endif
192 
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197         *s->code_ptr++ = v;
198     } else {
199         tcg_insn_unit *p = s->code_ptr;
200         memcpy(p, &v, sizeof(v));
201         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202     }
203 }
204 
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206                                                        uint16_t v)
207 {
208     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209         *p = v;
210     } else {
211         memcpy(p, &v, sizeof(v));
212     }
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229                                                        uint32_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252                                                        uint64_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 /* label relocation processing */
263 
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265                           TCGLabel *l, intptr_t addend)
266 {
267     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268 
269     r->type = type;
270     r->ptr = code_ptr;
271     r->addend = addend;
272     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274 
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277     tcg_debug_assert(!l->has_value);
278     l->has_value = 1;
279     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281 
282 TCGLabel *gen_new_label(void)
283 {
284     TCGContext *s = tcg_ctx;
285     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286 
287     memset(l, 0, sizeof(TCGLabel));
288     l->id = s->nb_labels++;
289     QSIMPLEQ_INIT(&l->relocs);
290 
291     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292 
293     return l;
294 }
295 
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298     TCGLabel *l;
299 
300     QSIMPLEQ_FOREACH(l, &s->labels, next) {
301         TCGRelocation *r;
302         uintptr_t value = l->u.value;
303 
304         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306                 return false;
307             }
308         }
309     }
310     return true;
311 }
312 
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315     /*
316      * We will check for overflow at the end of the opcode loop in
317      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318      */
319     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321 
322 /* Signal overflow, starting over with fewer guest insns. */
323 static G_NORETURN
324 void tcg_raise_tb_overflow(TCGContext *s)
325 {
326     siglongjmp(s->jmp_trans, -2);
327 }
328 
329 #define C_PFX1(P, A)                    P##A
330 #define C_PFX2(P, A, B)                 P##A##_##B
331 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
332 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
333 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
334 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
335 
336 /* Define an enumeration for the various combinations. */
337 
338 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
339 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
340 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
341 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
342 
343 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
344 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
345 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
346 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
347 
348 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
349 
350 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
351 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
352 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
353 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
354 
355 typedef enum {
356 #include "tcg-target-con-set.h"
357 } TCGConstraintSetIndex;
358 
359 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
360 
361 #undef C_O0_I1
362 #undef C_O0_I2
363 #undef C_O0_I3
364 #undef C_O0_I4
365 #undef C_O1_I1
366 #undef C_O1_I2
367 #undef C_O1_I3
368 #undef C_O1_I4
369 #undef C_N1_I2
370 #undef C_O2_I1
371 #undef C_O2_I2
372 #undef C_O2_I3
373 #undef C_O2_I4
374 
375 /* Put all of the constraint sets into an array, indexed by the enum. */
376 
377 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
378 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
379 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
380 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
381 
382 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
383 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
384 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
385 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
386 
387 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
388 
389 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
390 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
391 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
392 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
393 
394 static const TCGTargetOpDef constraint_sets[] = {
395 #include "tcg-target-con-set.h"
396 };
397 
398 
399 #undef C_O0_I1
400 #undef C_O0_I2
401 #undef C_O0_I3
402 #undef C_O0_I4
403 #undef C_O1_I1
404 #undef C_O1_I2
405 #undef C_O1_I3
406 #undef C_O1_I4
407 #undef C_N1_I2
408 #undef C_O2_I1
409 #undef C_O2_I2
410 #undef C_O2_I3
411 #undef C_O2_I4
412 
413 /* Expand the enumerator to be returned from tcg_target_op_def(). */
414 
415 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
416 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
417 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
418 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
419 
420 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
421 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
422 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
423 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
424 
425 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
426 
427 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
428 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
429 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
430 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
431 
432 #include "tcg-target.c.inc"
433 
434 static void alloc_tcg_plugin_context(TCGContext *s)
435 {
436 #ifdef CONFIG_PLUGIN
437     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
438     s->plugin_tb->insns =
439         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
440 #endif
441 }
442 
443 /*
444  * All TCG threads except the parent (i.e. the one that called tcg_context_init
445  * and registered the target's TCG globals) must register with this function
446  * before initiating translation.
447  *
448  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
449  * of tcg_region_init() for the reasoning behind this.
450  *
451  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
452  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
453  * is not used anymore for translation once this function is called.
454  *
455  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
456  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
457  */
458 #ifdef CONFIG_USER_ONLY
459 void tcg_register_thread(void)
460 {
461     tcg_ctx = &tcg_init_ctx;
462 }
463 #else
464 void tcg_register_thread(void)
465 {
466     TCGContext *s = g_malloc(sizeof(*s));
467     unsigned int i, n;
468 
469     *s = tcg_init_ctx;
470 
471     /* Relink mem_base.  */
472     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
473         if (tcg_init_ctx.temps[i].mem_base) {
474             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
475             tcg_debug_assert(b >= 0 && b < n);
476             s->temps[i].mem_base = &s->temps[b];
477         }
478     }
479 
480     /* Claim an entry in tcg_ctxs */
481     n = qatomic_fetch_inc(&tcg_cur_ctxs);
482     g_assert(n < tcg_max_ctxs);
483     qatomic_set(&tcg_ctxs[n], s);
484 
485     if (n > 0) {
486         alloc_tcg_plugin_context(s);
487         tcg_region_initial_alloc(s);
488     }
489 
490     tcg_ctx = s;
491 }
492 #endif /* !CONFIG_USER_ONLY */
493 
494 /* pool based memory allocation */
495 void *tcg_malloc_internal(TCGContext *s, int size)
496 {
497     TCGPool *p;
498     int pool_size;
499 
500     if (size > TCG_POOL_CHUNK_SIZE) {
501         /* big malloc: insert a new pool (XXX: could optimize) */
502         p = g_malloc(sizeof(TCGPool) + size);
503         p->size = size;
504         p->next = s->pool_first_large;
505         s->pool_first_large = p;
506         return p->data;
507     } else {
508         p = s->pool_current;
509         if (!p) {
510             p = s->pool_first;
511             if (!p)
512                 goto new_pool;
513         } else {
514             if (!p->next) {
515             new_pool:
516                 pool_size = TCG_POOL_CHUNK_SIZE;
517                 p = g_malloc(sizeof(TCGPool) + pool_size);
518                 p->size = pool_size;
519                 p->next = NULL;
520                 if (s->pool_current)
521                     s->pool_current->next = p;
522                 else
523                     s->pool_first = p;
524             } else {
525                 p = p->next;
526             }
527         }
528     }
529     s->pool_current = p;
530     s->pool_cur = p->data + size;
531     s->pool_end = p->data + p->size;
532     return p->data;
533 }
534 
535 void tcg_pool_reset(TCGContext *s)
536 {
537     TCGPool *p, *t;
538     for (p = s->pool_first_large; p; p = t) {
539         t = p->next;
540         g_free(p);
541     }
542     s->pool_first_large = NULL;
543     s->pool_cur = s->pool_end = NULL;
544     s->pool_current = NULL;
545 }
546 
547 #include "exec/helper-proto.h"
548 
549 static const TCGHelperInfo all_helpers[] = {
550 #include "exec/helper-tcg.h"
551 };
552 static GHashTable *helper_table;
553 
554 #ifdef CONFIG_TCG_INTERPRETER
555 static GHashTable *ffi_table;
556 
557 static ffi_type * const typecode_to_ffi[8] = {
558     [dh_typecode_void] = &ffi_type_void,
559     [dh_typecode_i32]  = &ffi_type_uint32,
560     [dh_typecode_s32]  = &ffi_type_sint32,
561     [dh_typecode_i64]  = &ffi_type_uint64,
562     [dh_typecode_s64]  = &ffi_type_sint64,
563     [dh_typecode_ptr]  = &ffi_type_pointer,
564 };
565 #endif
566 
567 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
568 static void process_op_defs(TCGContext *s);
569 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
570                                             TCGReg reg, const char *name);
571 
572 static void tcg_context_init(unsigned max_cpus)
573 {
574     TCGContext *s = &tcg_init_ctx;
575     int op, total_args, n, i;
576     TCGOpDef *def;
577     TCGArgConstraint *args_ct;
578     TCGTemp *ts;
579 
580     memset(s, 0, sizeof(*s));
581     s->nb_globals = 0;
582 
583     /* Count total number of arguments and allocate the corresponding
584        space */
585     total_args = 0;
586     for(op = 0; op < NB_OPS; op++) {
587         def = &tcg_op_defs[op];
588         n = def->nb_iargs + def->nb_oargs;
589         total_args += n;
590     }
591 
592     args_ct = g_new0(TCGArgConstraint, total_args);
593 
594     for(op = 0; op < NB_OPS; op++) {
595         def = &tcg_op_defs[op];
596         def->args_ct = args_ct;
597         n = def->nb_iargs + def->nb_oargs;
598         args_ct += n;
599     }
600 
601     /* Register helpers.  */
602     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
603     helper_table = g_hash_table_new(NULL, NULL);
604 
605     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
606         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
607                             (gpointer)&all_helpers[i]);
608     }
609 
610 #ifdef CONFIG_TCG_INTERPRETER
611     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
612     ffi_table = g_hash_table_new(NULL, NULL);
613     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
614         struct {
615             ffi_cif cif;
616             ffi_type *args[];
617         } *ca;
618         uint32_t typemask = all_helpers[i].typemask;
619         gpointer hash = (gpointer)(uintptr_t)typemask;
620         ffi_status status;
621         int nargs;
622 
623         if (g_hash_table_lookup(ffi_table, hash)) {
624             continue;
625         }
626 
627         /* Ignoring the return type, find the last non-zero field. */
628         nargs = 32 - clz32(typemask >> 3);
629         nargs = DIV_ROUND_UP(nargs, 3);
630 
631         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
632         ca->cif.rtype = typecode_to_ffi[typemask & 7];
633         ca->cif.nargs = nargs;
634 
635         if (nargs != 0) {
636             ca->cif.arg_types = ca->args;
637             for (int j = 0; j < nargs; ++j) {
638                 int typecode = extract32(typemask, (j + 1) * 3, 3);
639                 ca->args[j] = typecode_to_ffi[typecode];
640             }
641         }
642 
643         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
644                               ca->cif.rtype, ca->cif.arg_types);
645         assert(status == FFI_OK);
646 
647         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
648     }
649 #endif
650 
651     tcg_target_init(s);
652     process_op_defs(s);
653 
654     /* Reverse the order of the saved registers, assuming they're all at
655        the start of tcg_target_reg_alloc_order.  */
656     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
657         int r = tcg_target_reg_alloc_order[n];
658         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
659             break;
660         }
661     }
662     for (i = 0; i < n; ++i) {
663         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
664     }
665     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
666         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
667     }
668 
669     alloc_tcg_plugin_context(s);
670 
671     tcg_ctx = s;
672     /*
673      * In user-mode we simply share the init context among threads, since we
674      * use a single region. See the documentation tcg_region_init() for the
675      * reasoning behind this.
676      * In softmmu we will have at most max_cpus TCG threads.
677      */
678 #ifdef CONFIG_USER_ONLY
679     tcg_ctxs = &tcg_ctx;
680     tcg_cur_ctxs = 1;
681     tcg_max_ctxs = 1;
682 #else
683     tcg_max_ctxs = max_cpus;
684     tcg_ctxs = g_new0(TCGContext *, max_cpus);
685 #endif
686 
687     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
688     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
689     cpu_env = temp_tcgv_ptr(ts);
690 }
691 
692 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
693 {
694     tcg_context_init(max_cpus);
695     tcg_region_init(tb_size, splitwx, max_cpus);
696 }
697 
698 /*
699  * Allocate TBs right before their corresponding translated code, making
700  * sure that TBs and code are on different cache lines.
701  */
702 TranslationBlock *tcg_tb_alloc(TCGContext *s)
703 {
704     uintptr_t align = qemu_icache_linesize;
705     TranslationBlock *tb;
706     void *next;
707 
708  retry:
709     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
710     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
711 
712     if (unlikely(next > s->code_gen_highwater)) {
713         if (tcg_region_alloc(s)) {
714             return NULL;
715         }
716         goto retry;
717     }
718     qatomic_set(&s->code_gen_ptr, next);
719     s->data_gen_ptr = NULL;
720     return tb;
721 }
722 
723 void tcg_prologue_init(TCGContext *s)
724 {
725     size_t prologue_size;
726 
727     s->code_ptr = s->code_gen_ptr;
728     s->code_buf = s->code_gen_ptr;
729     s->data_gen_ptr = NULL;
730 
731 #ifndef CONFIG_TCG_INTERPRETER
732     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
733 #endif
734 
735 #ifdef TCG_TARGET_NEED_POOL_LABELS
736     s->pool_labels = NULL;
737 #endif
738 
739     qemu_thread_jit_write();
740     /* Generate the prologue.  */
741     tcg_target_qemu_prologue(s);
742 
743 #ifdef TCG_TARGET_NEED_POOL_LABELS
744     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
745     {
746         int result = tcg_out_pool_finalize(s);
747         tcg_debug_assert(result == 0);
748     }
749 #endif
750 
751     prologue_size = tcg_current_code_size(s);
752 
753 #ifndef CONFIG_TCG_INTERPRETER
754     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
755                         (uintptr_t)s->code_buf, prologue_size);
756 #endif
757 
758 #ifdef DEBUG_DISAS
759     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
760         FILE *logfile = qemu_log_trylock();
761         if (logfile) {
762             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
763             if (s->data_gen_ptr) {
764                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
765                 size_t data_size = prologue_size - code_size;
766                 size_t i;
767 
768                 disas(logfile, s->code_gen_ptr, code_size);
769 
770                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
771                     if (sizeof(tcg_target_ulong) == 8) {
772                         fprintf(logfile,
773                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
774                                 (uintptr_t)s->data_gen_ptr + i,
775                                 *(uint64_t *)(s->data_gen_ptr + i));
776                     } else {
777                         fprintf(logfile,
778                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
779                                 (uintptr_t)s->data_gen_ptr + i,
780                                 *(uint32_t *)(s->data_gen_ptr + i));
781                     }
782                 }
783             } else {
784                 disas(logfile, s->code_gen_ptr, prologue_size);
785             }
786             fprintf(logfile, "\n");
787             qemu_log_unlock(logfile);
788         }
789     }
790 #endif
791 
792 #ifndef CONFIG_TCG_INTERPRETER
793     /*
794      * Assert that goto_ptr is implemented completely, setting an epilogue.
795      * For tci, we use NULL as the signal to return from the interpreter,
796      * so skip this check.
797      */
798     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
799 #endif
800 
801     tcg_region_prologue_set(s);
802 }
803 
804 void tcg_func_start(TCGContext *s)
805 {
806     tcg_pool_reset(s);
807     s->nb_temps = s->nb_globals;
808 
809     /* No temps have been previously allocated for size or locality.  */
810     memset(s->free_temps, 0, sizeof(s->free_temps));
811 
812     /* No constant temps have been previously allocated. */
813     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
814         if (s->const_table[i]) {
815             g_hash_table_remove_all(s->const_table[i]);
816         }
817     }
818 
819     s->nb_ops = 0;
820     s->nb_labels = 0;
821     s->current_frame_offset = s->frame_start;
822 
823 #ifdef CONFIG_DEBUG_TCG
824     s->goto_tb_issue_mask = 0;
825 #endif
826 
827     QTAILQ_INIT(&s->ops);
828     QTAILQ_INIT(&s->free_ops);
829     QSIMPLEQ_INIT(&s->labels);
830 }
831 
832 static TCGTemp *tcg_temp_alloc(TCGContext *s)
833 {
834     int n = s->nb_temps++;
835 
836     if (n >= TCG_MAX_TEMPS) {
837         tcg_raise_tb_overflow(s);
838     }
839     return memset(&s->temps[n], 0, sizeof(TCGTemp));
840 }
841 
842 static TCGTemp *tcg_global_alloc(TCGContext *s)
843 {
844     TCGTemp *ts;
845 
846     tcg_debug_assert(s->nb_globals == s->nb_temps);
847     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
848     s->nb_globals++;
849     ts = tcg_temp_alloc(s);
850     ts->kind = TEMP_GLOBAL;
851 
852     return ts;
853 }
854 
855 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
856                                             TCGReg reg, const char *name)
857 {
858     TCGTemp *ts;
859 
860     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
861         tcg_abort();
862     }
863 
864     ts = tcg_global_alloc(s);
865     ts->base_type = type;
866     ts->type = type;
867     ts->kind = TEMP_FIXED;
868     ts->reg = reg;
869     ts->name = name;
870     tcg_regset_set_reg(s->reserved_regs, reg);
871 
872     return ts;
873 }
874 
875 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
876 {
877     s->frame_start = start;
878     s->frame_end = start + size;
879     s->frame_temp
880         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
881 }
882 
883 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
884                                      intptr_t offset, const char *name)
885 {
886     TCGContext *s = tcg_ctx;
887     TCGTemp *base_ts = tcgv_ptr_temp(base);
888     TCGTemp *ts = tcg_global_alloc(s);
889     int indirect_reg = 0, bigendian = 0;
890 #if HOST_BIG_ENDIAN
891     bigendian = 1;
892 #endif
893 
894     switch (base_ts->kind) {
895     case TEMP_FIXED:
896         break;
897     case TEMP_GLOBAL:
898         /* We do not support double-indirect registers.  */
899         tcg_debug_assert(!base_ts->indirect_reg);
900         base_ts->indirect_base = 1;
901         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
902                             ? 2 : 1);
903         indirect_reg = 1;
904         break;
905     default:
906         g_assert_not_reached();
907     }
908 
909     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
910         TCGTemp *ts2 = tcg_global_alloc(s);
911         char buf[64];
912 
913         ts->base_type = TCG_TYPE_I64;
914         ts->type = TCG_TYPE_I32;
915         ts->indirect_reg = indirect_reg;
916         ts->mem_allocated = 1;
917         ts->mem_base = base_ts;
918         ts->mem_offset = offset + bigendian * 4;
919         pstrcpy(buf, sizeof(buf), name);
920         pstrcat(buf, sizeof(buf), "_0");
921         ts->name = strdup(buf);
922 
923         tcg_debug_assert(ts2 == ts + 1);
924         ts2->base_type = TCG_TYPE_I64;
925         ts2->type = TCG_TYPE_I32;
926         ts2->indirect_reg = indirect_reg;
927         ts2->mem_allocated = 1;
928         ts2->mem_base = base_ts;
929         ts2->mem_offset = offset + (1 - bigendian) * 4;
930         pstrcpy(buf, sizeof(buf), name);
931         pstrcat(buf, sizeof(buf), "_1");
932         ts2->name = strdup(buf);
933     } else {
934         ts->base_type = type;
935         ts->type = type;
936         ts->indirect_reg = indirect_reg;
937         ts->mem_allocated = 1;
938         ts->mem_base = base_ts;
939         ts->mem_offset = offset;
940         ts->name = name;
941     }
942     return ts;
943 }
944 
945 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
946 {
947     TCGContext *s = tcg_ctx;
948     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
949     TCGTemp *ts;
950     int idx, k;
951 
952     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
953     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
954     if (idx < TCG_MAX_TEMPS) {
955         /* There is already an available temp with the right type.  */
956         clear_bit(idx, s->free_temps[k].l);
957 
958         ts = &s->temps[idx];
959         ts->temp_allocated = 1;
960         tcg_debug_assert(ts->base_type == type);
961         tcg_debug_assert(ts->kind == kind);
962     } else {
963         ts = tcg_temp_alloc(s);
964         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
965             TCGTemp *ts2 = tcg_temp_alloc(s);
966 
967             ts->base_type = type;
968             ts->type = TCG_TYPE_I32;
969             ts->temp_allocated = 1;
970             ts->kind = kind;
971 
972             tcg_debug_assert(ts2 == ts + 1);
973             ts2->base_type = TCG_TYPE_I64;
974             ts2->type = TCG_TYPE_I32;
975             ts2->temp_allocated = 1;
976             ts2->kind = kind;
977         } else {
978             ts->base_type = type;
979             ts->type = type;
980             ts->temp_allocated = 1;
981             ts->kind = kind;
982         }
983     }
984 
985 #if defined(CONFIG_DEBUG_TCG)
986     s->temps_in_use++;
987 #endif
988     return ts;
989 }
990 
991 TCGv_vec tcg_temp_new_vec(TCGType type)
992 {
993     TCGTemp *t;
994 
995 #ifdef CONFIG_DEBUG_TCG
996     switch (type) {
997     case TCG_TYPE_V64:
998         assert(TCG_TARGET_HAS_v64);
999         break;
1000     case TCG_TYPE_V128:
1001         assert(TCG_TARGET_HAS_v128);
1002         break;
1003     case TCG_TYPE_V256:
1004         assert(TCG_TARGET_HAS_v256);
1005         break;
1006     default:
1007         g_assert_not_reached();
1008     }
1009 #endif
1010 
1011     t = tcg_temp_new_internal(type, 0);
1012     return temp_tcgv_vec(t);
1013 }
1014 
1015 /* Create a new temp of the same type as an existing temp.  */
1016 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1017 {
1018     TCGTemp *t = tcgv_vec_temp(match);
1019 
1020     tcg_debug_assert(t->temp_allocated != 0);
1021 
1022     t = tcg_temp_new_internal(t->base_type, 0);
1023     return temp_tcgv_vec(t);
1024 }
1025 
1026 void tcg_temp_free_internal(TCGTemp *ts)
1027 {
1028     TCGContext *s = tcg_ctx;
1029     int k, idx;
1030 
1031     switch (ts->kind) {
1032     case TEMP_CONST:
1033         /*
1034          * In order to simplify users of tcg_constant_*,
1035          * silently ignore free.
1036          */
1037         return;
1038     case TEMP_NORMAL:
1039     case TEMP_LOCAL:
1040         break;
1041     default:
1042         g_assert_not_reached();
1043     }
1044 
1045 #if defined(CONFIG_DEBUG_TCG)
1046     s->temps_in_use--;
1047     if (s->temps_in_use < 0) {
1048         fprintf(stderr, "More temporaries freed than allocated!\n");
1049     }
1050 #endif
1051 
1052     tcg_debug_assert(ts->temp_allocated != 0);
1053     ts->temp_allocated = 0;
1054 
1055     idx = temp_idx(ts);
1056     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1057     set_bit(idx, s->free_temps[k].l);
1058 }
1059 
1060 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1061 {
1062     TCGContext *s = tcg_ctx;
1063     GHashTable *h = s->const_table[type];
1064     TCGTemp *ts;
1065 
1066     if (h == NULL) {
1067         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1068         s->const_table[type] = h;
1069     }
1070 
1071     ts = g_hash_table_lookup(h, &val);
1072     if (ts == NULL) {
1073         ts = tcg_temp_alloc(s);
1074 
1075         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1076             TCGTemp *ts2 = tcg_temp_alloc(s);
1077 
1078             ts->base_type = TCG_TYPE_I64;
1079             ts->type = TCG_TYPE_I32;
1080             ts->kind = TEMP_CONST;
1081             ts->temp_allocated = 1;
1082             /*
1083              * Retain the full value of the 64-bit constant in the low
1084              * part, so that the hash table works.  Actual uses will
1085              * truncate the value to the low part.
1086              */
1087             ts->val = val;
1088 
1089             tcg_debug_assert(ts2 == ts + 1);
1090             ts2->base_type = TCG_TYPE_I64;
1091             ts2->type = TCG_TYPE_I32;
1092             ts2->kind = TEMP_CONST;
1093             ts2->temp_allocated = 1;
1094             ts2->val = val >> 32;
1095         } else {
1096             ts->base_type = type;
1097             ts->type = type;
1098             ts->kind = TEMP_CONST;
1099             ts->temp_allocated = 1;
1100             ts->val = val;
1101         }
1102         g_hash_table_insert(h, &ts->val, ts);
1103     }
1104 
1105     return ts;
1106 }
1107 
1108 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1109 {
1110     val = dup_const(vece, val);
1111     return temp_tcgv_vec(tcg_constant_internal(type, val));
1112 }
1113 
1114 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1115 {
1116     TCGTemp *t = tcgv_vec_temp(match);
1117 
1118     tcg_debug_assert(t->temp_allocated != 0);
1119     return tcg_constant_vec(t->base_type, vece, val);
1120 }
1121 
1122 TCGv_i32 tcg_const_i32(int32_t val)
1123 {
1124     TCGv_i32 t0;
1125     t0 = tcg_temp_new_i32();
1126     tcg_gen_movi_i32(t0, val);
1127     return t0;
1128 }
1129 
1130 TCGv_i64 tcg_const_i64(int64_t val)
1131 {
1132     TCGv_i64 t0;
1133     t0 = tcg_temp_new_i64();
1134     tcg_gen_movi_i64(t0, val);
1135     return t0;
1136 }
1137 
1138 TCGv_i32 tcg_const_local_i32(int32_t val)
1139 {
1140     TCGv_i32 t0;
1141     t0 = tcg_temp_local_new_i32();
1142     tcg_gen_movi_i32(t0, val);
1143     return t0;
1144 }
1145 
1146 TCGv_i64 tcg_const_local_i64(int64_t val)
1147 {
1148     TCGv_i64 t0;
1149     t0 = tcg_temp_local_new_i64();
1150     tcg_gen_movi_i64(t0, val);
1151     return t0;
1152 }
1153 
1154 #if defined(CONFIG_DEBUG_TCG)
1155 void tcg_clear_temp_count(void)
1156 {
1157     TCGContext *s = tcg_ctx;
1158     s->temps_in_use = 0;
1159 }
1160 
1161 int tcg_check_temp_count(void)
1162 {
1163     TCGContext *s = tcg_ctx;
1164     if (s->temps_in_use) {
1165         /* Clear the count so that we don't give another
1166          * warning immediately next time around.
1167          */
1168         s->temps_in_use = 0;
1169         return 1;
1170     }
1171     return 0;
1172 }
1173 #endif
1174 
1175 /* Return true if OP may appear in the opcode stream.
1176    Test the runtime variable that controls each opcode.  */
1177 bool tcg_op_supported(TCGOpcode op)
1178 {
1179     const bool have_vec
1180         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1181 
1182     switch (op) {
1183     case INDEX_op_discard:
1184     case INDEX_op_set_label:
1185     case INDEX_op_call:
1186     case INDEX_op_br:
1187     case INDEX_op_mb:
1188     case INDEX_op_insn_start:
1189     case INDEX_op_exit_tb:
1190     case INDEX_op_goto_tb:
1191     case INDEX_op_goto_ptr:
1192     case INDEX_op_qemu_ld_i32:
1193     case INDEX_op_qemu_st_i32:
1194     case INDEX_op_qemu_ld_i64:
1195     case INDEX_op_qemu_st_i64:
1196         return true;
1197 
1198     case INDEX_op_qemu_st8_i32:
1199         return TCG_TARGET_HAS_qemu_st8_i32;
1200 
1201     case INDEX_op_mov_i32:
1202     case INDEX_op_setcond_i32:
1203     case INDEX_op_brcond_i32:
1204     case INDEX_op_ld8u_i32:
1205     case INDEX_op_ld8s_i32:
1206     case INDEX_op_ld16u_i32:
1207     case INDEX_op_ld16s_i32:
1208     case INDEX_op_ld_i32:
1209     case INDEX_op_st8_i32:
1210     case INDEX_op_st16_i32:
1211     case INDEX_op_st_i32:
1212     case INDEX_op_add_i32:
1213     case INDEX_op_sub_i32:
1214     case INDEX_op_mul_i32:
1215     case INDEX_op_and_i32:
1216     case INDEX_op_or_i32:
1217     case INDEX_op_xor_i32:
1218     case INDEX_op_shl_i32:
1219     case INDEX_op_shr_i32:
1220     case INDEX_op_sar_i32:
1221         return true;
1222 
1223     case INDEX_op_movcond_i32:
1224         return TCG_TARGET_HAS_movcond_i32;
1225     case INDEX_op_div_i32:
1226     case INDEX_op_divu_i32:
1227         return TCG_TARGET_HAS_div_i32;
1228     case INDEX_op_rem_i32:
1229     case INDEX_op_remu_i32:
1230         return TCG_TARGET_HAS_rem_i32;
1231     case INDEX_op_div2_i32:
1232     case INDEX_op_divu2_i32:
1233         return TCG_TARGET_HAS_div2_i32;
1234     case INDEX_op_rotl_i32:
1235     case INDEX_op_rotr_i32:
1236         return TCG_TARGET_HAS_rot_i32;
1237     case INDEX_op_deposit_i32:
1238         return TCG_TARGET_HAS_deposit_i32;
1239     case INDEX_op_extract_i32:
1240         return TCG_TARGET_HAS_extract_i32;
1241     case INDEX_op_sextract_i32:
1242         return TCG_TARGET_HAS_sextract_i32;
1243     case INDEX_op_extract2_i32:
1244         return TCG_TARGET_HAS_extract2_i32;
1245     case INDEX_op_add2_i32:
1246         return TCG_TARGET_HAS_add2_i32;
1247     case INDEX_op_sub2_i32:
1248         return TCG_TARGET_HAS_sub2_i32;
1249     case INDEX_op_mulu2_i32:
1250         return TCG_TARGET_HAS_mulu2_i32;
1251     case INDEX_op_muls2_i32:
1252         return TCG_TARGET_HAS_muls2_i32;
1253     case INDEX_op_muluh_i32:
1254         return TCG_TARGET_HAS_muluh_i32;
1255     case INDEX_op_mulsh_i32:
1256         return TCG_TARGET_HAS_mulsh_i32;
1257     case INDEX_op_ext8s_i32:
1258         return TCG_TARGET_HAS_ext8s_i32;
1259     case INDEX_op_ext16s_i32:
1260         return TCG_TARGET_HAS_ext16s_i32;
1261     case INDEX_op_ext8u_i32:
1262         return TCG_TARGET_HAS_ext8u_i32;
1263     case INDEX_op_ext16u_i32:
1264         return TCG_TARGET_HAS_ext16u_i32;
1265     case INDEX_op_bswap16_i32:
1266         return TCG_TARGET_HAS_bswap16_i32;
1267     case INDEX_op_bswap32_i32:
1268         return TCG_TARGET_HAS_bswap32_i32;
1269     case INDEX_op_not_i32:
1270         return TCG_TARGET_HAS_not_i32;
1271     case INDEX_op_neg_i32:
1272         return TCG_TARGET_HAS_neg_i32;
1273     case INDEX_op_andc_i32:
1274         return TCG_TARGET_HAS_andc_i32;
1275     case INDEX_op_orc_i32:
1276         return TCG_TARGET_HAS_orc_i32;
1277     case INDEX_op_eqv_i32:
1278         return TCG_TARGET_HAS_eqv_i32;
1279     case INDEX_op_nand_i32:
1280         return TCG_TARGET_HAS_nand_i32;
1281     case INDEX_op_nor_i32:
1282         return TCG_TARGET_HAS_nor_i32;
1283     case INDEX_op_clz_i32:
1284         return TCG_TARGET_HAS_clz_i32;
1285     case INDEX_op_ctz_i32:
1286         return TCG_TARGET_HAS_ctz_i32;
1287     case INDEX_op_ctpop_i32:
1288         return TCG_TARGET_HAS_ctpop_i32;
1289 
1290     case INDEX_op_brcond2_i32:
1291     case INDEX_op_setcond2_i32:
1292         return TCG_TARGET_REG_BITS == 32;
1293 
1294     case INDEX_op_mov_i64:
1295     case INDEX_op_setcond_i64:
1296     case INDEX_op_brcond_i64:
1297     case INDEX_op_ld8u_i64:
1298     case INDEX_op_ld8s_i64:
1299     case INDEX_op_ld16u_i64:
1300     case INDEX_op_ld16s_i64:
1301     case INDEX_op_ld32u_i64:
1302     case INDEX_op_ld32s_i64:
1303     case INDEX_op_ld_i64:
1304     case INDEX_op_st8_i64:
1305     case INDEX_op_st16_i64:
1306     case INDEX_op_st32_i64:
1307     case INDEX_op_st_i64:
1308     case INDEX_op_add_i64:
1309     case INDEX_op_sub_i64:
1310     case INDEX_op_mul_i64:
1311     case INDEX_op_and_i64:
1312     case INDEX_op_or_i64:
1313     case INDEX_op_xor_i64:
1314     case INDEX_op_shl_i64:
1315     case INDEX_op_shr_i64:
1316     case INDEX_op_sar_i64:
1317     case INDEX_op_ext_i32_i64:
1318     case INDEX_op_extu_i32_i64:
1319         return TCG_TARGET_REG_BITS == 64;
1320 
1321     case INDEX_op_movcond_i64:
1322         return TCG_TARGET_HAS_movcond_i64;
1323     case INDEX_op_div_i64:
1324     case INDEX_op_divu_i64:
1325         return TCG_TARGET_HAS_div_i64;
1326     case INDEX_op_rem_i64:
1327     case INDEX_op_remu_i64:
1328         return TCG_TARGET_HAS_rem_i64;
1329     case INDEX_op_div2_i64:
1330     case INDEX_op_divu2_i64:
1331         return TCG_TARGET_HAS_div2_i64;
1332     case INDEX_op_rotl_i64:
1333     case INDEX_op_rotr_i64:
1334         return TCG_TARGET_HAS_rot_i64;
1335     case INDEX_op_deposit_i64:
1336         return TCG_TARGET_HAS_deposit_i64;
1337     case INDEX_op_extract_i64:
1338         return TCG_TARGET_HAS_extract_i64;
1339     case INDEX_op_sextract_i64:
1340         return TCG_TARGET_HAS_sextract_i64;
1341     case INDEX_op_extract2_i64:
1342         return TCG_TARGET_HAS_extract2_i64;
1343     case INDEX_op_extrl_i64_i32:
1344         return TCG_TARGET_HAS_extrl_i64_i32;
1345     case INDEX_op_extrh_i64_i32:
1346         return TCG_TARGET_HAS_extrh_i64_i32;
1347     case INDEX_op_ext8s_i64:
1348         return TCG_TARGET_HAS_ext8s_i64;
1349     case INDEX_op_ext16s_i64:
1350         return TCG_TARGET_HAS_ext16s_i64;
1351     case INDEX_op_ext32s_i64:
1352         return TCG_TARGET_HAS_ext32s_i64;
1353     case INDEX_op_ext8u_i64:
1354         return TCG_TARGET_HAS_ext8u_i64;
1355     case INDEX_op_ext16u_i64:
1356         return TCG_TARGET_HAS_ext16u_i64;
1357     case INDEX_op_ext32u_i64:
1358         return TCG_TARGET_HAS_ext32u_i64;
1359     case INDEX_op_bswap16_i64:
1360         return TCG_TARGET_HAS_bswap16_i64;
1361     case INDEX_op_bswap32_i64:
1362         return TCG_TARGET_HAS_bswap32_i64;
1363     case INDEX_op_bswap64_i64:
1364         return TCG_TARGET_HAS_bswap64_i64;
1365     case INDEX_op_not_i64:
1366         return TCG_TARGET_HAS_not_i64;
1367     case INDEX_op_neg_i64:
1368         return TCG_TARGET_HAS_neg_i64;
1369     case INDEX_op_andc_i64:
1370         return TCG_TARGET_HAS_andc_i64;
1371     case INDEX_op_orc_i64:
1372         return TCG_TARGET_HAS_orc_i64;
1373     case INDEX_op_eqv_i64:
1374         return TCG_TARGET_HAS_eqv_i64;
1375     case INDEX_op_nand_i64:
1376         return TCG_TARGET_HAS_nand_i64;
1377     case INDEX_op_nor_i64:
1378         return TCG_TARGET_HAS_nor_i64;
1379     case INDEX_op_clz_i64:
1380         return TCG_TARGET_HAS_clz_i64;
1381     case INDEX_op_ctz_i64:
1382         return TCG_TARGET_HAS_ctz_i64;
1383     case INDEX_op_ctpop_i64:
1384         return TCG_TARGET_HAS_ctpop_i64;
1385     case INDEX_op_add2_i64:
1386         return TCG_TARGET_HAS_add2_i64;
1387     case INDEX_op_sub2_i64:
1388         return TCG_TARGET_HAS_sub2_i64;
1389     case INDEX_op_mulu2_i64:
1390         return TCG_TARGET_HAS_mulu2_i64;
1391     case INDEX_op_muls2_i64:
1392         return TCG_TARGET_HAS_muls2_i64;
1393     case INDEX_op_muluh_i64:
1394         return TCG_TARGET_HAS_muluh_i64;
1395     case INDEX_op_mulsh_i64:
1396         return TCG_TARGET_HAS_mulsh_i64;
1397 
1398     case INDEX_op_mov_vec:
1399     case INDEX_op_dup_vec:
1400     case INDEX_op_dupm_vec:
1401     case INDEX_op_ld_vec:
1402     case INDEX_op_st_vec:
1403     case INDEX_op_add_vec:
1404     case INDEX_op_sub_vec:
1405     case INDEX_op_and_vec:
1406     case INDEX_op_or_vec:
1407     case INDEX_op_xor_vec:
1408     case INDEX_op_cmp_vec:
1409         return have_vec;
1410     case INDEX_op_dup2_vec:
1411         return have_vec && TCG_TARGET_REG_BITS == 32;
1412     case INDEX_op_not_vec:
1413         return have_vec && TCG_TARGET_HAS_not_vec;
1414     case INDEX_op_neg_vec:
1415         return have_vec && TCG_TARGET_HAS_neg_vec;
1416     case INDEX_op_abs_vec:
1417         return have_vec && TCG_TARGET_HAS_abs_vec;
1418     case INDEX_op_andc_vec:
1419         return have_vec && TCG_TARGET_HAS_andc_vec;
1420     case INDEX_op_orc_vec:
1421         return have_vec && TCG_TARGET_HAS_orc_vec;
1422     case INDEX_op_nand_vec:
1423         return have_vec && TCG_TARGET_HAS_nand_vec;
1424     case INDEX_op_nor_vec:
1425         return have_vec && TCG_TARGET_HAS_nor_vec;
1426     case INDEX_op_eqv_vec:
1427         return have_vec && TCG_TARGET_HAS_eqv_vec;
1428     case INDEX_op_mul_vec:
1429         return have_vec && TCG_TARGET_HAS_mul_vec;
1430     case INDEX_op_shli_vec:
1431     case INDEX_op_shri_vec:
1432     case INDEX_op_sari_vec:
1433         return have_vec && TCG_TARGET_HAS_shi_vec;
1434     case INDEX_op_shls_vec:
1435     case INDEX_op_shrs_vec:
1436     case INDEX_op_sars_vec:
1437         return have_vec && TCG_TARGET_HAS_shs_vec;
1438     case INDEX_op_shlv_vec:
1439     case INDEX_op_shrv_vec:
1440     case INDEX_op_sarv_vec:
1441         return have_vec && TCG_TARGET_HAS_shv_vec;
1442     case INDEX_op_rotli_vec:
1443         return have_vec && TCG_TARGET_HAS_roti_vec;
1444     case INDEX_op_rotls_vec:
1445         return have_vec && TCG_TARGET_HAS_rots_vec;
1446     case INDEX_op_rotlv_vec:
1447     case INDEX_op_rotrv_vec:
1448         return have_vec && TCG_TARGET_HAS_rotv_vec;
1449     case INDEX_op_ssadd_vec:
1450     case INDEX_op_usadd_vec:
1451     case INDEX_op_sssub_vec:
1452     case INDEX_op_ussub_vec:
1453         return have_vec && TCG_TARGET_HAS_sat_vec;
1454     case INDEX_op_smin_vec:
1455     case INDEX_op_umin_vec:
1456     case INDEX_op_smax_vec:
1457     case INDEX_op_umax_vec:
1458         return have_vec && TCG_TARGET_HAS_minmax_vec;
1459     case INDEX_op_bitsel_vec:
1460         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1461     case INDEX_op_cmpsel_vec:
1462         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1463 
1464     default:
1465         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1466         return true;
1467     }
1468 }
1469 
1470 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1471    and endian swap. Maybe it would be better to do the alignment
1472    and endian swap in tcg_reg_alloc_call(). */
1473 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1474 {
1475     int i, real_args, nb_rets, pi;
1476     unsigned typemask;
1477     const TCGHelperInfo *info;
1478     TCGOp *op;
1479 
1480     info = g_hash_table_lookup(helper_table, (gpointer)func);
1481     typemask = info->typemask;
1482 
1483 #ifdef CONFIG_PLUGIN
1484     /* detect non-plugin helpers */
1485     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1486         tcg_ctx->plugin_insn->calls_helpers = true;
1487     }
1488 #endif
1489 
1490 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1491     for (i = 0; i < nargs; ++i) {
1492         int argtype = extract32(typemask, (i + 1) * 3, 3);
1493         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1494         bool is_signed = argtype & 1;
1495 
1496         if (is_32bit) {
1497             TCGv_i64 temp = tcg_temp_new_i64();
1498             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1499             if (is_signed) {
1500                 tcg_gen_ext_i32_i64(temp, orig);
1501             } else {
1502                 tcg_gen_extu_i32_i64(temp, orig);
1503             }
1504             args[i] = tcgv_i64_temp(temp);
1505         }
1506     }
1507 #endif /* TCG_TARGET_EXTEND_ARGS */
1508 
1509     op = tcg_emit_op(INDEX_op_call);
1510 
1511     pi = 0;
1512     if (ret != NULL) {
1513         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1514 #if HOST_BIG_ENDIAN
1515             op->args[pi++] = temp_arg(ret + 1);
1516             op->args[pi++] = temp_arg(ret);
1517 #else
1518             op->args[pi++] = temp_arg(ret);
1519             op->args[pi++] = temp_arg(ret + 1);
1520 #endif
1521             nb_rets = 2;
1522         } else {
1523             op->args[pi++] = temp_arg(ret);
1524             nb_rets = 1;
1525         }
1526     } else {
1527         nb_rets = 0;
1528     }
1529     TCGOP_CALLO(op) = nb_rets;
1530 
1531     real_args = 0;
1532     for (i = 0; i < nargs; i++) {
1533         int argtype = extract32(typemask, (i + 1) * 3, 3);
1534         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1535         bool want_align = false;
1536 
1537 #if defined(CONFIG_TCG_INTERPRETER)
1538         /*
1539          * Align all arguments, so that they land in predictable places
1540          * for passing off to ffi_call.
1541          */
1542         want_align = true;
1543 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1544         /* Some targets want aligned 64 bit args */
1545         want_align = is_64bit;
1546 #endif
1547 
1548         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1549             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1550             real_args++;
1551         }
1552 
1553         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1554             /*
1555              * If stack grows up, then we will be placing successive
1556              * arguments at lower addresses, which means we need to
1557              * reverse the order compared to how we would normally
1558              * treat either big or little-endian.  For those arguments
1559              * that will wind up in registers, this still works for
1560              * HPPA (the only current STACK_GROWSUP target) since the
1561              * argument registers are *also* allocated in decreasing
1562              * order.  If another such target is added, this logic may
1563              * have to get more complicated to differentiate between
1564              * stack arguments and register arguments.
1565              */
1566 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP)
1567             op->args[pi++] = temp_arg(args[i] + 1);
1568             op->args[pi++] = temp_arg(args[i]);
1569 #else
1570             op->args[pi++] = temp_arg(args[i]);
1571             op->args[pi++] = temp_arg(args[i] + 1);
1572 #endif
1573             real_args += 2;
1574             continue;
1575         }
1576 
1577         op->args[pi++] = temp_arg(args[i]);
1578         real_args++;
1579     }
1580     op->args[pi++] = (uintptr_t)func;
1581     op->args[pi++] = (uintptr_t)info;
1582     TCGOP_CALLI(op) = real_args;
1583 
1584     /* Make sure the fields didn't overflow.  */
1585     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1586     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1587 
1588 #if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1589     for (i = 0; i < nargs; ++i) {
1590         int argtype = extract32(typemask, (i + 1) * 3, 3);
1591         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1592 
1593         if (is_32bit) {
1594             tcg_temp_free_internal(args[i]);
1595         }
1596     }
1597 #endif /* TCG_TARGET_EXTEND_ARGS */
1598 }
1599 
1600 static void tcg_reg_alloc_start(TCGContext *s)
1601 {
1602     int i, n;
1603 
1604     for (i = 0, n = s->nb_temps; i < n; i++) {
1605         TCGTemp *ts = &s->temps[i];
1606         TCGTempVal val = TEMP_VAL_MEM;
1607 
1608         switch (ts->kind) {
1609         case TEMP_CONST:
1610             val = TEMP_VAL_CONST;
1611             break;
1612         case TEMP_FIXED:
1613             val = TEMP_VAL_REG;
1614             break;
1615         case TEMP_GLOBAL:
1616             break;
1617         case TEMP_NORMAL:
1618         case TEMP_EBB:
1619             val = TEMP_VAL_DEAD;
1620             /* fall through */
1621         case TEMP_LOCAL:
1622             ts->mem_allocated = 0;
1623             break;
1624         default:
1625             g_assert_not_reached();
1626         }
1627         ts->val_type = val;
1628     }
1629 
1630     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1631 }
1632 
1633 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1634                                  TCGTemp *ts)
1635 {
1636     int idx = temp_idx(ts);
1637 
1638     switch (ts->kind) {
1639     case TEMP_FIXED:
1640     case TEMP_GLOBAL:
1641         pstrcpy(buf, buf_size, ts->name);
1642         break;
1643     case TEMP_LOCAL:
1644         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1645         break;
1646     case TEMP_EBB:
1647         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1648         break;
1649     case TEMP_NORMAL:
1650         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1651         break;
1652     case TEMP_CONST:
1653         switch (ts->type) {
1654         case TCG_TYPE_I32:
1655             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1656             break;
1657 #if TCG_TARGET_REG_BITS > 32
1658         case TCG_TYPE_I64:
1659             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1660             break;
1661 #endif
1662         case TCG_TYPE_V64:
1663         case TCG_TYPE_V128:
1664         case TCG_TYPE_V256:
1665             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1666                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1667             break;
1668         default:
1669             g_assert_not_reached();
1670         }
1671         break;
1672     }
1673     return buf;
1674 }
1675 
1676 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1677                              int buf_size, TCGArg arg)
1678 {
1679     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1680 }
1681 
1682 static const char * const cond_name[] =
1683 {
1684     [TCG_COND_NEVER] = "never",
1685     [TCG_COND_ALWAYS] = "always",
1686     [TCG_COND_EQ] = "eq",
1687     [TCG_COND_NE] = "ne",
1688     [TCG_COND_LT] = "lt",
1689     [TCG_COND_GE] = "ge",
1690     [TCG_COND_LE] = "le",
1691     [TCG_COND_GT] = "gt",
1692     [TCG_COND_LTU] = "ltu",
1693     [TCG_COND_GEU] = "geu",
1694     [TCG_COND_LEU] = "leu",
1695     [TCG_COND_GTU] = "gtu"
1696 };
1697 
1698 static const char * const ldst_name[] =
1699 {
1700     [MO_UB]   = "ub",
1701     [MO_SB]   = "sb",
1702     [MO_LEUW] = "leuw",
1703     [MO_LESW] = "lesw",
1704     [MO_LEUL] = "leul",
1705     [MO_LESL] = "lesl",
1706     [MO_LEUQ] = "leq",
1707     [MO_BEUW] = "beuw",
1708     [MO_BESW] = "besw",
1709     [MO_BEUL] = "beul",
1710     [MO_BESL] = "besl",
1711     [MO_BEUQ] = "beq",
1712 };
1713 
1714 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1715 #ifdef TARGET_ALIGNED_ONLY
1716     [MO_UNALN >> MO_ASHIFT]    = "un+",
1717     [MO_ALIGN >> MO_ASHIFT]    = "",
1718 #else
1719     [MO_UNALN >> MO_ASHIFT]    = "",
1720     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1721 #endif
1722     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1723     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1724     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1725     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1726     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1727     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1728 };
1729 
1730 static const char bswap_flag_name[][6] = {
1731     [TCG_BSWAP_IZ] = "iz",
1732     [TCG_BSWAP_OZ] = "oz",
1733     [TCG_BSWAP_OS] = "os",
1734     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1735     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1736 };
1737 
1738 static inline bool tcg_regset_single(TCGRegSet d)
1739 {
1740     return (d & (d - 1)) == 0;
1741 }
1742 
1743 static inline TCGReg tcg_regset_first(TCGRegSet d)
1744 {
1745     if (TCG_TARGET_NB_REGS <= 32) {
1746         return ctz32(d);
1747     } else {
1748         return ctz64(d);
1749     }
1750 }
1751 
1752 /* Return only the number of characters output -- no error return. */
1753 #define ne_fprintf(...) \
1754     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1755 
1756 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1757 {
1758     char buf[128];
1759     TCGOp *op;
1760 
1761     QTAILQ_FOREACH(op, &s->ops, link) {
1762         int i, k, nb_oargs, nb_iargs, nb_cargs;
1763         const TCGOpDef *def;
1764         TCGOpcode c;
1765         int col = 0;
1766 
1767         c = op->opc;
1768         def = &tcg_op_defs[c];
1769 
1770         if (c == INDEX_op_insn_start) {
1771             nb_oargs = 0;
1772             col += ne_fprintf(f, "\n ----");
1773 
1774             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1775                 target_ulong a;
1776 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1777                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1778 #else
1779                 a = op->args[i];
1780 #endif
1781                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1782             }
1783         } else if (c == INDEX_op_call) {
1784             const TCGHelperInfo *info = tcg_call_info(op);
1785             void *func = tcg_call_func(op);
1786 
1787             /* variable number of arguments */
1788             nb_oargs = TCGOP_CALLO(op);
1789             nb_iargs = TCGOP_CALLI(op);
1790             nb_cargs = def->nb_cargs;
1791 
1792             col += ne_fprintf(f, " %s ", def->name);
1793 
1794             /*
1795              * Print the function name from TCGHelperInfo, if available.
1796              * Note that plugins have a template function for the info,
1797              * but the actual function pointer comes from the plugin.
1798              */
1799             if (func == info->func) {
1800                 col += ne_fprintf(f, "%s", info->name);
1801             } else {
1802                 col += ne_fprintf(f, "plugin(%p)", func);
1803             }
1804 
1805             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1806             for (i = 0; i < nb_oargs; i++) {
1807                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1808                                                             op->args[i]));
1809             }
1810             for (i = 0; i < nb_iargs; i++) {
1811                 TCGArg arg = op->args[nb_oargs + i];
1812                 const char *t = "<dummy>";
1813                 if (arg != TCG_CALL_DUMMY_ARG) {
1814                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1815                 }
1816                 col += ne_fprintf(f, ",%s", t);
1817             }
1818         } else {
1819             col += ne_fprintf(f, " %s ", def->name);
1820 
1821             nb_oargs = def->nb_oargs;
1822             nb_iargs = def->nb_iargs;
1823             nb_cargs = def->nb_cargs;
1824 
1825             if (def->flags & TCG_OPF_VECTOR) {
1826                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1827                                   8 << TCGOP_VECE(op));
1828             }
1829 
1830             k = 0;
1831             for (i = 0; i < nb_oargs; i++) {
1832                 const char *sep =  k ? "," : "";
1833                 col += ne_fprintf(f, "%s%s", sep,
1834                                   tcg_get_arg_str(s, buf, sizeof(buf),
1835                                                   op->args[k++]));
1836             }
1837             for (i = 0; i < nb_iargs; i++) {
1838                 const char *sep =  k ? "," : "";
1839                 col += ne_fprintf(f, "%s%s", sep,
1840                                   tcg_get_arg_str(s, buf, sizeof(buf),
1841                                                   op->args[k++]));
1842             }
1843             switch (c) {
1844             case INDEX_op_brcond_i32:
1845             case INDEX_op_setcond_i32:
1846             case INDEX_op_movcond_i32:
1847             case INDEX_op_brcond2_i32:
1848             case INDEX_op_setcond2_i32:
1849             case INDEX_op_brcond_i64:
1850             case INDEX_op_setcond_i64:
1851             case INDEX_op_movcond_i64:
1852             case INDEX_op_cmp_vec:
1853             case INDEX_op_cmpsel_vec:
1854                 if (op->args[k] < ARRAY_SIZE(cond_name)
1855                     && cond_name[op->args[k]]) {
1856                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1857                 } else {
1858                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1859                 }
1860                 i = 1;
1861                 break;
1862             case INDEX_op_qemu_ld_i32:
1863             case INDEX_op_qemu_st_i32:
1864             case INDEX_op_qemu_st8_i32:
1865             case INDEX_op_qemu_ld_i64:
1866             case INDEX_op_qemu_st_i64:
1867                 {
1868                     MemOpIdx oi = op->args[k++];
1869                     MemOp op = get_memop(oi);
1870                     unsigned ix = get_mmuidx(oi);
1871 
1872                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1873                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1874                     } else {
1875                         const char *s_al, *s_op;
1876                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1877                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1878                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
1879                     }
1880                     i = 1;
1881                 }
1882                 break;
1883             case INDEX_op_bswap16_i32:
1884             case INDEX_op_bswap16_i64:
1885             case INDEX_op_bswap32_i32:
1886             case INDEX_op_bswap32_i64:
1887             case INDEX_op_bswap64_i64:
1888                 {
1889                     TCGArg flags = op->args[k];
1890                     const char *name = NULL;
1891 
1892                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1893                         name = bswap_flag_name[flags];
1894                     }
1895                     if (name) {
1896                         col += ne_fprintf(f, ",%s", name);
1897                     } else {
1898                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
1899                     }
1900                     i = k = 1;
1901                 }
1902                 break;
1903             default:
1904                 i = 0;
1905                 break;
1906             }
1907             switch (c) {
1908             case INDEX_op_set_label:
1909             case INDEX_op_br:
1910             case INDEX_op_brcond_i32:
1911             case INDEX_op_brcond_i64:
1912             case INDEX_op_brcond2_i32:
1913                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
1914                                   arg_label(op->args[k])->id);
1915                 i++, k++;
1916                 break;
1917             default:
1918                 break;
1919             }
1920             for (; i < nb_cargs; i++, k++) {
1921                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
1922                                   op->args[k]);
1923             }
1924         }
1925 
1926         if (have_prefs || op->life) {
1927             for (; col < 40; ++col) {
1928                 putc(' ', f);
1929             }
1930         }
1931 
1932         if (op->life) {
1933             unsigned life = op->life;
1934 
1935             if (life & (SYNC_ARG * 3)) {
1936                 ne_fprintf(f, "  sync:");
1937                 for (i = 0; i < 2; ++i) {
1938                     if (life & (SYNC_ARG << i)) {
1939                         ne_fprintf(f, " %d", i);
1940                     }
1941                 }
1942             }
1943             life /= DEAD_ARG;
1944             if (life) {
1945                 ne_fprintf(f, "  dead:");
1946                 for (i = 0; life; ++i, life >>= 1) {
1947                     if (life & 1) {
1948                         ne_fprintf(f, " %d", i);
1949                     }
1950                 }
1951             }
1952         }
1953 
1954         if (have_prefs) {
1955             for (i = 0; i < nb_oargs; ++i) {
1956                 TCGRegSet set = op->output_pref[i];
1957 
1958                 if (i == 0) {
1959                     ne_fprintf(f, "  pref=");
1960                 } else {
1961                     ne_fprintf(f, ",");
1962                 }
1963                 if (set == 0) {
1964                     ne_fprintf(f, "none");
1965                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
1966                     ne_fprintf(f, "all");
1967 #ifdef CONFIG_DEBUG_TCG
1968                 } else if (tcg_regset_single(set)) {
1969                     TCGReg reg = tcg_regset_first(set);
1970                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
1971 #endif
1972                 } else if (TCG_TARGET_NB_REGS <= 32) {
1973                     ne_fprintf(f, "0x%x", (uint32_t)set);
1974                 } else {
1975                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
1976                 }
1977             }
1978         }
1979 
1980         putc('\n', f);
1981     }
1982 }
1983 
1984 /* we give more priority to constraints with less registers */
1985 static int get_constraint_priority(const TCGOpDef *def, int k)
1986 {
1987     const TCGArgConstraint *arg_ct = &def->args_ct[k];
1988     int n;
1989 
1990     if (arg_ct->oalias) {
1991         /* an alias is equivalent to a single register */
1992         n = 1;
1993     } else {
1994         n = ctpop64(arg_ct->regs);
1995     }
1996     return TCG_TARGET_NB_REGS - n + 1;
1997 }
1998 
1999 /* sort from highest priority to lowest */
2000 static void sort_constraints(TCGOpDef *def, int start, int n)
2001 {
2002     int i, j;
2003     TCGArgConstraint *a = def->args_ct;
2004 
2005     for (i = 0; i < n; i++) {
2006         a[start + i].sort_index = start + i;
2007     }
2008     if (n <= 1) {
2009         return;
2010     }
2011     for (i = 0; i < n - 1; i++) {
2012         for (j = i + 1; j < n; j++) {
2013             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2014             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2015             if (p1 < p2) {
2016                 int tmp = a[start + i].sort_index;
2017                 a[start + i].sort_index = a[start + j].sort_index;
2018                 a[start + j].sort_index = tmp;
2019             }
2020         }
2021     }
2022 }
2023 
2024 static void process_op_defs(TCGContext *s)
2025 {
2026     TCGOpcode op;
2027 
2028     for (op = 0; op < NB_OPS; op++) {
2029         TCGOpDef *def = &tcg_op_defs[op];
2030         const TCGTargetOpDef *tdefs;
2031         int i, nb_args;
2032 
2033         if (def->flags & TCG_OPF_NOT_PRESENT) {
2034             continue;
2035         }
2036 
2037         nb_args = def->nb_iargs + def->nb_oargs;
2038         if (nb_args == 0) {
2039             continue;
2040         }
2041 
2042         /*
2043          * Macro magic should make it impossible, but double-check that
2044          * the array index is in range.  Since the signness of an enum
2045          * is implementation defined, force the result to unsigned.
2046          */
2047         unsigned con_set = tcg_target_op_def(op);
2048         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2049         tdefs = &constraint_sets[con_set];
2050 
2051         for (i = 0; i < nb_args; i++) {
2052             const char *ct_str = tdefs->args_ct_str[i];
2053             /* Incomplete TCGTargetOpDef entry. */
2054             tcg_debug_assert(ct_str != NULL);
2055 
2056             while (*ct_str != '\0') {
2057                 switch(*ct_str) {
2058                 case '0' ... '9':
2059                     {
2060                         int oarg = *ct_str - '0';
2061                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2062                         tcg_debug_assert(oarg < def->nb_oargs);
2063                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2064                         def->args_ct[i] = def->args_ct[oarg];
2065                         /* The output sets oalias.  */
2066                         def->args_ct[oarg].oalias = true;
2067                         def->args_ct[oarg].alias_index = i;
2068                         /* The input sets ialias. */
2069                         def->args_ct[i].ialias = true;
2070                         def->args_ct[i].alias_index = oarg;
2071                     }
2072                     ct_str++;
2073                     break;
2074                 case '&':
2075                     def->args_ct[i].newreg = true;
2076                     ct_str++;
2077                     break;
2078                 case 'i':
2079                     def->args_ct[i].ct |= TCG_CT_CONST;
2080                     ct_str++;
2081                     break;
2082 
2083                 /* Include all of the target-specific constraints. */
2084 
2085 #undef CONST
2086 #define CONST(CASE, MASK) \
2087     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2088 #define REGS(CASE, MASK) \
2089     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2090 
2091 #include "tcg-target-con-str.h"
2092 
2093 #undef REGS
2094 #undef CONST
2095                 default:
2096                     /* Typo in TCGTargetOpDef constraint. */
2097                     g_assert_not_reached();
2098                 }
2099             }
2100         }
2101 
2102         /* TCGTargetOpDef entry with too much information? */
2103         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2104 
2105         /* sort the constraints (XXX: this is just an heuristic) */
2106         sort_constraints(def, 0, def->nb_oargs);
2107         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2108     }
2109 }
2110 
2111 void tcg_op_remove(TCGContext *s, TCGOp *op)
2112 {
2113     TCGLabel *label;
2114 
2115     switch (op->opc) {
2116     case INDEX_op_br:
2117         label = arg_label(op->args[0]);
2118         label->refs--;
2119         break;
2120     case INDEX_op_brcond_i32:
2121     case INDEX_op_brcond_i64:
2122         label = arg_label(op->args[3]);
2123         label->refs--;
2124         break;
2125     case INDEX_op_brcond2_i32:
2126         label = arg_label(op->args[5]);
2127         label->refs--;
2128         break;
2129     default:
2130         break;
2131     }
2132 
2133     QTAILQ_REMOVE(&s->ops, op, link);
2134     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2135     s->nb_ops--;
2136 
2137 #ifdef CONFIG_PROFILER
2138     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2139 #endif
2140 }
2141 
2142 void tcg_remove_ops_after(TCGOp *op)
2143 {
2144     TCGContext *s = tcg_ctx;
2145 
2146     while (true) {
2147         TCGOp *last = tcg_last_op();
2148         if (last == op) {
2149             return;
2150         }
2151         tcg_op_remove(s, last);
2152     }
2153 }
2154 
2155 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2156 {
2157     TCGContext *s = tcg_ctx;
2158     TCGOp *op;
2159 
2160     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2161         op = tcg_malloc(sizeof(TCGOp));
2162     } else {
2163         op = QTAILQ_FIRST(&s->free_ops);
2164         QTAILQ_REMOVE(&s->free_ops, op, link);
2165     }
2166     memset(op, 0, offsetof(TCGOp, link));
2167     op->opc = opc;
2168     s->nb_ops++;
2169 
2170     return op;
2171 }
2172 
2173 TCGOp *tcg_emit_op(TCGOpcode opc)
2174 {
2175     TCGOp *op = tcg_op_alloc(opc);
2176     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2177     return op;
2178 }
2179 
2180 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2181 {
2182     TCGOp *new_op = tcg_op_alloc(opc);
2183     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2184     return new_op;
2185 }
2186 
2187 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2188 {
2189     TCGOp *new_op = tcg_op_alloc(opc);
2190     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2191     return new_op;
2192 }
2193 
2194 /* Reachable analysis : remove unreachable code.  */
2195 static void reachable_code_pass(TCGContext *s)
2196 {
2197     TCGOp *op, *op_next;
2198     bool dead = false;
2199 
2200     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2201         bool remove = dead;
2202         TCGLabel *label;
2203 
2204         switch (op->opc) {
2205         case INDEX_op_set_label:
2206             label = arg_label(op->args[0]);
2207             if (label->refs == 0) {
2208                 /*
2209                  * While there is an occasional backward branch, virtually
2210                  * all branches generated by the translators are forward.
2211                  * Which means that generally we will have already removed
2212                  * all references to the label that will be, and there is
2213                  * little to be gained by iterating.
2214                  */
2215                 remove = true;
2216             } else {
2217                 /* Once we see a label, insns become live again.  */
2218                 dead = false;
2219                 remove = false;
2220 
2221                 /*
2222                  * Optimization can fold conditional branches to unconditional.
2223                  * If we find a label with one reference which is preceded by
2224                  * an unconditional branch to it, remove both.  This needed to
2225                  * wait until the dead code in between them was removed.
2226                  */
2227                 if (label->refs == 1) {
2228                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2229                     if (op_prev->opc == INDEX_op_br &&
2230                         label == arg_label(op_prev->args[0])) {
2231                         tcg_op_remove(s, op_prev);
2232                         remove = true;
2233                     }
2234                 }
2235             }
2236             break;
2237 
2238         case INDEX_op_br:
2239         case INDEX_op_exit_tb:
2240         case INDEX_op_goto_ptr:
2241             /* Unconditional branches; everything following is dead.  */
2242             dead = true;
2243             break;
2244 
2245         case INDEX_op_call:
2246             /* Notice noreturn helper calls, raising exceptions.  */
2247             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2248                 dead = true;
2249             }
2250             break;
2251 
2252         case INDEX_op_insn_start:
2253             /* Never remove -- we need to keep these for unwind.  */
2254             remove = false;
2255             break;
2256 
2257         default:
2258             break;
2259         }
2260 
2261         if (remove) {
2262             tcg_op_remove(s, op);
2263         }
2264     }
2265 }
2266 
2267 #define TS_DEAD  1
2268 #define TS_MEM   2
2269 
2270 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2271 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2272 
2273 /* For liveness_pass_1, the register preferences for a given temp.  */
2274 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2275 {
2276     return ts->state_ptr;
2277 }
2278 
2279 /* For liveness_pass_1, reset the preferences for a given temp to the
2280  * maximal regset for its type.
2281  */
2282 static inline void la_reset_pref(TCGTemp *ts)
2283 {
2284     *la_temp_pref(ts)
2285         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2286 }
2287 
2288 /* liveness analysis: end of function: all temps are dead, and globals
2289    should be in memory. */
2290 static void la_func_end(TCGContext *s, int ng, int nt)
2291 {
2292     int i;
2293 
2294     for (i = 0; i < ng; ++i) {
2295         s->temps[i].state = TS_DEAD | TS_MEM;
2296         la_reset_pref(&s->temps[i]);
2297     }
2298     for (i = ng; i < nt; ++i) {
2299         s->temps[i].state = TS_DEAD;
2300         la_reset_pref(&s->temps[i]);
2301     }
2302 }
2303 
2304 /* liveness analysis: end of basic block: all temps are dead, globals
2305    and local temps should be in memory. */
2306 static void la_bb_end(TCGContext *s, int ng, int nt)
2307 {
2308     int i;
2309 
2310     for (i = 0; i < nt; ++i) {
2311         TCGTemp *ts = &s->temps[i];
2312         int state;
2313 
2314         switch (ts->kind) {
2315         case TEMP_FIXED:
2316         case TEMP_GLOBAL:
2317         case TEMP_LOCAL:
2318             state = TS_DEAD | TS_MEM;
2319             break;
2320         case TEMP_NORMAL:
2321         case TEMP_EBB:
2322         case TEMP_CONST:
2323             state = TS_DEAD;
2324             break;
2325         default:
2326             g_assert_not_reached();
2327         }
2328         ts->state = state;
2329         la_reset_pref(ts);
2330     }
2331 }
2332 
2333 /* liveness analysis: sync globals back to memory.  */
2334 static void la_global_sync(TCGContext *s, int ng)
2335 {
2336     int i;
2337 
2338     for (i = 0; i < ng; ++i) {
2339         int state = s->temps[i].state;
2340         s->temps[i].state = state | TS_MEM;
2341         if (state == TS_DEAD) {
2342             /* If the global was previously dead, reset prefs.  */
2343             la_reset_pref(&s->temps[i]);
2344         }
2345     }
2346 }
2347 
2348 /*
2349  * liveness analysis: conditional branch: all temps are dead unless
2350  * explicitly live-across-conditional-branch, globals and local temps
2351  * should be synced.
2352  */
2353 static void la_bb_sync(TCGContext *s, int ng, int nt)
2354 {
2355     la_global_sync(s, ng);
2356 
2357     for (int i = ng; i < nt; ++i) {
2358         TCGTemp *ts = &s->temps[i];
2359         int state;
2360 
2361         switch (ts->kind) {
2362         case TEMP_LOCAL:
2363             state = ts->state;
2364             ts->state = state | TS_MEM;
2365             if (state != TS_DEAD) {
2366                 continue;
2367             }
2368             break;
2369         case TEMP_NORMAL:
2370             s->temps[i].state = TS_DEAD;
2371             break;
2372         case TEMP_EBB:
2373         case TEMP_CONST:
2374             continue;
2375         default:
2376             g_assert_not_reached();
2377         }
2378         la_reset_pref(&s->temps[i]);
2379     }
2380 }
2381 
2382 /* liveness analysis: sync globals back to memory and kill.  */
2383 static void la_global_kill(TCGContext *s, int ng)
2384 {
2385     int i;
2386 
2387     for (i = 0; i < ng; i++) {
2388         s->temps[i].state = TS_DEAD | TS_MEM;
2389         la_reset_pref(&s->temps[i]);
2390     }
2391 }
2392 
2393 /* liveness analysis: note live globals crossing calls.  */
2394 static void la_cross_call(TCGContext *s, int nt)
2395 {
2396     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2397     int i;
2398 
2399     for (i = 0; i < nt; i++) {
2400         TCGTemp *ts = &s->temps[i];
2401         if (!(ts->state & TS_DEAD)) {
2402             TCGRegSet *pset = la_temp_pref(ts);
2403             TCGRegSet set = *pset;
2404 
2405             set &= mask;
2406             /* If the combination is not possible, restart.  */
2407             if (set == 0) {
2408                 set = tcg_target_available_regs[ts->type] & mask;
2409             }
2410             *pset = set;
2411         }
2412     }
2413 }
2414 
2415 /* Liveness analysis : update the opc_arg_life array to tell if a
2416    given input arguments is dead. Instructions updating dead
2417    temporaries are removed. */
2418 static void liveness_pass_1(TCGContext *s)
2419 {
2420     int nb_globals = s->nb_globals;
2421     int nb_temps = s->nb_temps;
2422     TCGOp *op, *op_prev;
2423     TCGRegSet *prefs;
2424     int i;
2425 
2426     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2427     for (i = 0; i < nb_temps; ++i) {
2428         s->temps[i].state_ptr = prefs + i;
2429     }
2430 
2431     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2432     la_func_end(s, nb_globals, nb_temps);
2433 
2434     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2435         int nb_iargs, nb_oargs;
2436         TCGOpcode opc_new, opc_new2;
2437         bool have_opc_new2;
2438         TCGLifeData arg_life = 0;
2439         TCGTemp *ts;
2440         TCGOpcode opc = op->opc;
2441         const TCGOpDef *def = &tcg_op_defs[opc];
2442 
2443         switch (opc) {
2444         case INDEX_op_call:
2445             {
2446                 int call_flags;
2447                 int nb_call_regs;
2448 
2449                 nb_oargs = TCGOP_CALLO(op);
2450                 nb_iargs = TCGOP_CALLI(op);
2451                 call_flags = tcg_call_flags(op);
2452 
2453                 /* pure functions can be removed if their result is unused */
2454                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2455                     for (i = 0; i < nb_oargs; i++) {
2456                         ts = arg_temp(op->args[i]);
2457                         if (ts->state != TS_DEAD) {
2458                             goto do_not_remove_call;
2459                         }
2460                     }
2461                     goto do_remove;
2462                 }
2463             do_not_remove_call:
2464 
2465                 /* Output args are dead.  */
2466                 for (i = 0; i < nb_oargs; i++) {
2467                     ts = arg_temp(op->args[i]);
2468                     if (ts->state & TS_DEAD) {
2469                         arg_life |= DEAD_ARG << i;
2470                     }
2471                     if (ts->state & TS_MEM) {
2472                         arg_life |= SYNC_ARG << i;
2473                     }
2474                     ts->state = TS_DEAD;
2475                     la_reset_pref(ts);
2476 
2477                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2478                     op->output_pref[i] = 0;
2479                 }
2480 
2481                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2482                                     TCG_CALL_NO_READ_GLOBALS))) {
2483                     la_global_kill(s, nb_globals);
2484                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2485                     la_global_sync(s, nb_globals);
2486                 }
2487 
2488                 /* Record arguments that die in this helper.  */
2489                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2490                     ts = arg_temp(op->args[i]);
2491                     if (ts && ts->state & TS_DEAD) {
2492                         arg_life |= DEAD_ARG << i;
2493                     }
2494                 }
2495 
2496                 /* For all live registers, remove call-clobbered prefs.  */
2497                 la_cross_call(s, nb_temps);
2498 
2499                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2500 
2501                 /* Input arguments are live for preceding opcodes.  */
2502                 for (i = 0; i < nb_iargs; i++) {
2503                     ts = arg_temp(op->args[i + nb_oargs]);
2504                     if (ts && ts->state & TS_DEAD) {
2505                         /* For those arguments that die, and will be allocated
2506                          * in registers, clear the register set for that arg,
2507                          * to be filled in below.  For args that will be on
2508                          * the stack, reset to any available reg.
2509                          */
2510                         *la_temp_pref(ts)
2511                             = (i < nb_call_regs ? 0 :
2512                                tcg_target_available_regs[ts->type]);
2513                         ts->state &= ~TS_DEAD;
2514                     }
2515                 }
2516 
2517                 /* For each input argument, add its input register to prefs.
2518                    If a temp is used once, this produces a single set bit.  */
2519                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2520                     ts = arg_temp(op->args[i + nb_oargs]);
2521                     if (ts) {
2522                         tcg_regset_set_reg(*la_temp_pref(ts),
2523                                            tcg_target_call_iarg_regs[i]);
2524                     }
2525                 }
2526             }
2527             break;
2528         case INDEX_op_insn_start:
2529             break;
2530         case INDEX_op_discard:
2531             /* mark the temporary as dead */
2532             ts = arg_temp(op->args[0]);
2533             ts->state = TS_DEAD;
2534             la_reset_pref(ts);
2535             break;
2536 
2537         case INDEX_op_add2_i32:
2538             opc_new = INDEX_op_add_i32;
2539             goto do_addsub2;
2540         case INDEX_op_sub2_i32:
2541             opc_new = INDEX_op_sub_i32;
2542             goto do_addsub2;
2543         case INDEX_op_add2_i64:
2544             opc_new = INDEX_op_add_i64;
2545             goto do_addsub2;
2546         case INDEX_op_sub2_i64:
2547             opc_new = INDEX_op_sub_i64;
2548         do_addsub2:
2549             nb_iargs = 4;
2550             nb_oargs = 2;
2551             /* Test if the high part of the operation is dead, but not
2552                the low part.  The result can be optimized to a simple
2553                add or sub.  This happens often for x86_64 guest when the
2554                cpu mode is set to 32 bit.  */
2555             if (arg_temp(op->args[1])->state == TS_DEAD) {
2556                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2557                     goto do_remove;
2558                 }
2559                 /* Replace the opcode and adjust the args in place,
2560                    leaving 3 unused args at the end.  */
2561                 op->opc = opc = opc_new;
2562                 op->args[1] = op->args[2];
2563                 op->args[2] = op->args[4];
2564                 /* Fall through and mark the single-word operation live.  */
2565                 nb_iargs = 2;
2566                 nb_oargs = 1;
2567             }
2568             goto do_not_remove;
2569 
2570         case INDEX_op_mulu2_i32:
2571             opc_new = INDEX_op_mul_i32;
2572             opc_new2 = INDEX_op_muluh_i32;
2573             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2574             goto do_mul2;
2575         case INDEX_op_muls2_i32:
2576             opc_new = INDEX_op_mul_i32;
2577             opc_new2 = INDEX_op_mulsh_i32;
2578             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2579             goto do_mul2;
2580         case INDEX_op_mulu2_i64:
2581             opc_new = INDEX_op_mul_i64;
2582             opc_new2 = INDEX_op_muluh_i64;
2583             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2584             goto do_mul2;
2585         case INDEX_op_muls2_i64:
2586             opc_new = INDEX_op_mul_i64;
2587             opc_new2 = INDEX_op_mulsh_i64;
2588             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2589             goto do_mul2;
2590         do_mul2:
2591             nb_iargs = 2;
2592             nb_oargs = 2;
2593             if (arg_temp(op->args[1])->state == TS_DEAD) {
2594                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2595                     /* Both parts of the operation are dead.  */
2596                     goto do_remove;
2597                 }
2598                 /* The high part of the operation is dead; generate the low. */
2599                 op->opc = opc = opc_new;
2600                 op->args[1] = op->args[2];
2601                 op->args[2] = op->args[3];
2602             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2603                 /* The low part of the operation is dead; generate the high. */
2604                 op->opc = opc = opc_new2;
2605                 op->args[0] = op->args[1];
2606                 op->args[1] = op->args[2];
2607                 op->args[2] = op->args[3];
2608             } else {
2609                 goto do_not_remove;
2610             }
2611             /* Mark the single-word operation live.  */
2612             nb_oargs = 1;
2613             goto do_not_remove;
2614 
2615         default:
2616             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2617             nb_iargs = def->nb_iargs;
2618             nb_oargs = def->nb_oargs;
2619 
2620             /* Test if the operation can be removed because all
2621                its outputs are dead. We assume that nb_oargs == 0
2622                implies side effects */
2623             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2624                 for (i = 0; i < nb_oargs; i++) {
2625                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2626                         goto do_not_remove;
2627                     }
2628                 }
2629                 goto do_remove;
2630             }
2631             goto do_not_remove;
2632 
2633         do_remove:
2634             tcg_op_remove(s, op);
2635             break;
2636 
2637         do_not_remove:
2638             for (i = 0; i < nb_oargs; i++) {
2639                 ts = arg_temp(op->args[i]);
2640 
2641                 /* Remember the preference of the uses that followed.  */
2642                 op->output_pref[i] = *la_temp_pref(ts);
2643 
2644                 /* Output args are dead.  */
2645                 if (ts->state & TS_DEAD) {
2646                     arg_life |= DEAD_ARG << i;
2647                 }
2648                 if (ts->state & TS_MEM) {
2649                     arg_life |= SYNC_ARG << i;
2650                 }
2651                 ts->state = TS_DEAD;
2652                 la_reset_pref(ts);
2653             }
2654 
2655             /* If end of basic block, update.  */
2656             if (def->flags & TCG_OPF_BB_EXIT) {
2657                 la_func_end(s, nb_globals, nb_temps);
2658             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2659                 la_bb_sync(s, nb_globals, nb_temps);
2660             } else if (def->flags & TCG_OPF_BB_END) {
2661                 la_bb_end(s, nb_globals, nb_temps);
2662             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2663                 la_global_sync(s, nb_globals);
2664                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2665                     la_cross_call(s, nb_temps);
2666                 }
2667             }
2668 
2669             /* Record arguments that die in this opcode.  */
2670             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2671                 ts = arg_temp(op->args[i]);
2672                 if (ts->state & TS_DEAD) {
2673                     arg_life |= DEAD_ARG << i;
2674                 }
2675             }
2676 
2677             /* Input arguments are live for preceding opcodes.  */
2678             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2679                 ts = arg_temp(op->args[i]);
2680                 if (ts->state & TS_DEAD) {
2681                     /* For operands that were dead, initially allow
2682                        all regs for the type.  */
2683                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2684                     ts->state &= ~TS_DEAD;
2685                 }
2686             }
2687 
2688             /* Incorporate constraints for this operand.  */
2689             switch (opc) {
2690             case INDEX_op_mov_i32:
2691             case INDEX_op_mov_i64:
2692                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2693                    have proper constraints.  That said, special case
2694                    moves to propagate preferences backward.  */
2695                 if (IS_DEAD_ARG(1)) {
2696                     *la_temp_pref(arg_temp(op->args[0]))
2697                         = *la_temp_pref(arg_temp(op->args[1]));
2698                 }
2699                 break;
2700 
2701             default:
2702                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2703                     const TCGArgConstraint *ct = &def->args_ct[i];
2704                     TCGRegSet set, *pset;
2705 
2706                     ts = arg_temp(op->args[i]);
2707                     pset = la_temp_pref(ts);
2708                     set = *pset;
2709 
2710                     set &= ct->regs;
2711                     if (ct->ialias) {
2712                         set &= op->output_pref[ct->alias_index];
2713                     }
2714                     /* If the combination is not possible, restart.  */
2715                     if (set == 0) {
2716                         set = ct->regs;
2717                     }
2718                     *pset = set;
2719                 }
2720                 break;
2721             }
2722             break;
2723         }
2724         op->life = arg_life;
2725     }
2726 }
2727 
2728 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2729 static bool liveness_pass_2(TCGContext *s)
2730 {
2731     int nb_globals = s->nb_globals;
2732     int nb_temps, i;
2733     bool changes = false;
2734     TCGOp *op, *op_next;
2735 
2736     /* Create a temporary for each indirect global.  */
2737     for (i = 0; i < nb_globals; ++i) {
2738         TCGTemp *its = &s->temps[i];
2739         if (its->indirect_reg) {
2740             TCGTemp *dts = tcg_temp_alloc(s);
2741             dts->type = its->type;
2742             dts->base_type = its->base_type;
2743             dts->kind = TEMP_EBB;
2744             its->state_ptr = dts;
2745         } else {
2746             its->state_ptr = NULL;
2747         }
2748         /* All globals begin dead.  */
2749         its->state = TS_DEAD;
2750     }
2751     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2752         TCGTemp *its = &s->temps[i];
2753         its->state_ptr = NULL;
2754         its->state = TS_DEAD;
2755     }
2756 
2757     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2758         TCGOpcode opc = op->opc;
2759         const TCGOpDef *def = &tcg_op_defs[opc];
2760         TCGLifeData arg_life = op->life;
2761         int nb_iargs, nb_oargs, call_flags;
2762         TCGTemp *arg_ts, *dir_ts;
2763 
2764         if (opc == INDEX_op_call) {
2765             nb_oargs = TCGOP_CALLO(op);
2766             nb_iargs = TCGOP_CALLI(op);
2767             call_flags = tcg_call_flags(op);
2768         } else {
2769             nb_iargs = def->nb_iargs;
2770             nb_oargs = def->nb_oargs;
2771 
2772             /* Set flags similar to how calls require.  */
2773             if (def->flags & TCG_OPF_COND_BRANCH) {
2774                 /* Like reading globals: sync_globals */
2775                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2776             } else if (def->flags & TCG_OPF_BB_END) {
2777                 /* Like writing globals: save_globals */
2778                 call_flags = 0;
2779             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2780                 /* Like reading globals: sync_globals */
2781                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2782             } else {
2783                 /* No effect on globals.  */
2784                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2785                               TCG_CALL_NO_WRITE_GLOBALS);
2786             }
2787         }
2788 
2789         /* Make sure that input arguments are available.  */
2790         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2791             arg_ts = arg_temp(op->args[i]);
2792             if (arg_ts) {
2793                 dir_ts = arg_ts->state_ptr;
2794                 if (dir_ts && arg_ts->state == TS_DEAD) {
2795                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2796                                       ? INDEX_op_ld_i32
2797                                       : INDEX_op_ld_i64);
2798                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2799 
2800                     lop->args[0] = temp_arg(dir_ts);
2801                     lop->args[1] = temp_arg(arg_ts->mem_base);
2802                     lop->args[2] = arg_ts->mem_offset;
2803 
2804                     /* Loaded, but synced with memory.  */
2805                     arg_ts->state = TS_MEM;
2806                 }
2807             }
2808         }
2809 
2810         /* Perform input replacement, and mark inputs that became dead.
2811            No action is required except keeping temp_state up to date
2812            so that we reload when needed.  */
2813         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2814             arg_ts = arg_temp(op->args[i]);
2815             if (arg_ts) {
2816                 dir_ts = arg_ts->state_ptr;
2817                 if (dir_ts) {
2818                     op->args[i] = temp_arg(dir_ts);
2819                     changes = true;
2820                     if (IS_DEAD_ARG(i)) {
2821                         arg_ts->state = TS_DEAD;
2822                     }
2823                 }
2824             }
2825         }
2826 
2827         /* Liveness analysis should ensure that the following are
2828            all correct, for call sites and basic block end points.  */
2829         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2830             /* Nothing to do */
2831         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2832             for (i = 0; i < nb_globals; ++i) {
2833                 /* Liveness should see that globals are synced back,
2834                    that is, either TS_DEAD or TS_MEM.  */
2835                 arg_ts = &s->temps[i];
2836                 tcg_debug_assert(arg_ts->state_ptr == 0
2837                                  || arg_ts->state != 0);
2838             }
2839         } else {
2840             for (i = 0; i < nb_globals; ++i) {
2841                 /* Liveness should see that globals are saved back,
2842                    that is, TS_DEAD, waiting to be reloaded.  */
2843                 arg_ts = &s->temps[i];
2844                 tcg_debug_assert(arg_ts->state_ptr == 0
2845                                  || arg_ts->state == TS_DEAD);
2846             }
2847         }
2848 
2849         /* Outputs become available.  */
2850         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2851             arg_ts = arg_temp(op->args[0]);
2852             dir_ts = arg_ts->state_ptr;
2853             if (dir_ts) {
2854                 op->args[0] = temp_arg(dir_ts);
2855                 changes = true;
2856 
2857                 /* The output is now live and modified.  */
2858                 arg_ts->state = 0;
2859 
2860                 if (NEED_SYNC_ARG(0)) {
2861                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2862                                       ? INDEX_op_st_i32
2863                                       : INDEX_op_st_i64);
2864                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2865                     TCGTemp *out_ts = dir_ts;
2866 
2867                     if (IS_DEAD_ARG(0)) {
2868                         out_ts = arg_temp(op->args[1]);
2869                         arg_ts->state = TS_DEAD;
2870                         tcg_op_remove(s, op);
2871                     } else {
2872                         arg_ts->state = TS_MEM;
2873                     }
2874 
2875                     sop->args[0] = temp_arg(out_ts);
2876                     sop->args[1] = temp_arg(arg_ts->mem_base);
2877                     sop->args[2] = arg_ts->mem_offset;
2878                 } else {
2879                     tcg_debug_assert(!IS_DEAD_ARG(0));
2880                 }
2881             }
2882         } else {
2883             for (i = 0; i < nb_oargs; i++) {
2884                 arg_ts = arg_temp(op->args[i]);
2885                 dir_ts = arg_ts->state_ptr;
2886                 if (!dir_ts) {
2887                     continue;
2888                 }
2889                 op->args[i] = temp_arg(dir_ts);
2890                 changes = true;
2891 
2892                 /* The output is now live and modified.  */
2893                 arg_ts->state = 0;
2894 
2895                 /* Sync outputs upon their last write.  */
2896                 if (NEED_SYNC_ARG(i)) {
2897                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2898                                       ? INDEX_op_st_i32
2899                                       : INDEX_op_st_i64);
2900                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2901 
2902                     sop->args[0] = temp_arg(dir_ts);
2903                     sop->args[1] = temp_arg(arg_ts->mem_base);
2904                     sop->args[2] = arg_ts->mem_offset;
2905 
2906                     arg_ts->state = TS_MEM;
2907                 }
2908                 /* Drop outputs that are dead.  */
2909                 if (IS_DEAD_ARG(i)) {
2910                     arg_ts->state = TS_DEAD;
2911                 }
2912             }
2913         }
2914     }
2915 
2916     return changes;
2917 }
2918 
2919 #ifdef CONFIG_DEBUG_TCG
2920 static void dump_regs(TCGContext *s)
2921 {
2922     TCGTemp *ts;
2923     int i;
2924     char buf[64];
2925 
2926     for(i = 0; i < s->nb_temps; i++) {
2927         ts = &s->temps[i];
2928         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2929         switch(ts->val_type) {
2930         case TEMP_VAL_REG:
2931             printf("%s", tcg_target_reg_names[ts->reg]);
2932             break;
2933         case TEMP_VAL_MEM:
2934             printf("%d(%s)", (int)ts->mem_offset,
2935                    tcg_target_reg_names[ts->mem_base->reg]);
2936             break;
2937         case TEMP_VAL_CONST:
2938             printf("$0x%" PRIx64, ts->val);
2939             break;
2940         case TEMP_VAL_DEAD:
2941             printf("D");
2942             break;
2943         default:
2944             printf("???");
2945             break;
2946         }
2947         printf("\n");
2948     }
2949 
2950     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2951         if (s->reg_to_temp[i] != NULL) {
2952             printf("%s: %s\n",
2953                    tcg_target_reg_names[i],
2954                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2955         }
2956     }
2957 }
2958 
2959 static void check_regs(TCGContext *s)
2960 {
2961     int reg;
2962     int k;
2963     TCGTemp *ts;
2964     char buf[64];
2965 
2966     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2967         ts = s->reg_to_temp[reg];
2968         if (ts != NULL) {
2969             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2970                 printf("Inconsistency for register %s:\n",
2971                        tcg_target_reg_names[reg]);
2972                 goto fail;
2973             }
2974         }
2975     }
2976     for (k = 0; k < s->nb_temps; k++) {
2977         ts = &s->temps[k];
2978         if (ts->val_type == TEMP_VAL_REG
2979             && ts->kind != TEMP_FIXED
2980             && s->reg_to_temp[ts->reg] != ts) {
2981             printf("Inconsistency for temp %s:\n",
2982                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2983         fail:
2984             printf("reg state:\n");
2985             dump_regs(s);
2986             tcg_abort();
2987         }
2988     }
2989 }
2990 #endif
2991 
2992 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
2993 {
2994     intptr_t off, size, align;
2995 
2996     switch (ts->type) {
2997     case TCG_TYPE_I32:
2998         size = align = 4;
2999         break;
3000     case TCG_TYPE_I64:
3001     case TCG_TYPE_V64:
3002         size = align = 8;
3003         break;
3004     case TCG_TYPE_V128:
3005         size = align = 16;
3006         break;
3007     case TCG_TYPE_V256:
3008         /* Note that we do not require aligned storage for V256. */
3009         size = 32, align = 16;
3010         break;
3011     default:
3012         g_assert_not_reached();
3013     }
3014 
3015     /*
3016      * Assume the stack is sufficiently aligned.
3017      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3018      * and do not require 16 byte vector alignment.  This seems slightly
3019      * easier than fully parameterizing the above switch statement.
3020      */
3021     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3022     off = ROUND_UP(s->current_frame_offset, align);
3023 
3024     /* If we've exhausted the stack frame, restart with a smaller TB. */
3025     if (off + size > s->frame_end) {
3026         tcg_raise_tb_overflow(s);
3027     }
3028     s->current_frame_offset = off + size;
3029 
3030     ts->mem_offset = off;
3031 #if defined(__sparc__)
3032     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3033 #endif
3034     ts->mem_base = s->frame_temp;
3035     ts->mem_allocated = 1;
3036 }
3037 
3038 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3039 
3040 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3041    mark it free; otherwise mark it dead.  */
3042 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3043 {
3044     TCGTempVal new_type;
3045 
3046     switch (ts->kind) {
3047     case TEMP_FIXED:
3048         return;
3049     case TEMP_GLOBAL:
3050     case TEMP_LOCAL:
3051         new_type = TEMP_VAL_MEM;
3052         break;
3053     case TEMP_NORMAL:
3054     case TEMP_EBB:
3055         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3056         break;
3057     case TEMP_CONST:
3058         new_type = TEMP_VAL_CONST;
3059         break;
3060     default:
3061         g_assert_not_reached();
3062     }
3063     if (ts->val_type == TEMP_VAL_REG) {
3064         s->reg_to_temp[ts->reg] = NULL;
3065     }
3066     ts->val_type = new_type;
3067 }
3068 
3069 /* Mark a temporary as dead.  */
3070 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3071 {
3072     temp_free_or_dead(s, ts, 1);
3073 }
3074 
3075 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3076    registers needs to be allocated to store a constant.  If 'free_or_dead'
3077    is non-zero, subsequently release the temporary; if it is positive, the
3078    temp is dead; if it is negative, the temp is free.  */
3079 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3080                       TCGRegSet preferred_regs, int free_or_dead)
3081 {
3082     if (!temp_readonly(ts) && !ts->mem_coherent) {
3083         if (!ts->mem_allocated) {
3084             temp_allocate_frame(s, ts);
3085         }
3086         switch (ts->val_type) {
3087         case TEMP_VAL_CONST:
3088             /* If we're going to free the temp immediately, then we won't
3089                require it later in a register, so attempt to store the
3090                constant to memory directly.  */
3091             if (free_or_dead
3092                 && tcg_out_sti(s, ts->type, ts->val,
3093                                ts->mem_base->reg, ts->mem_offset)) {
3094                 break;
3095             }
3096             temp_load(s, ts, tcg_target_available_regs[ts->type],
3097                       allocated_regs, preferred_regs);
3098             /* fallthrough */
3099 
3100         case TEMP_VAL_REG:
3101             tcg_out_st(s, ts->type, ts->reg,
3102                        ts->mem_base->reg, ts->mem_offset);
3103             break;
3104 
3105         case TEMP_VAL_MEM:
3106             break;
3107 
3108         case TEMP_VAL_DEAD:
3109         default:
3110             tcg_abort();
3111         }
3112         ts->mem_coherent = 1;
3113     }
3114     if (free_or_dead) {
3115         temp_free_or_dead(s, ts, free_or_dead);
3116     }
3117 }
3118 
3119 /* free register 'reg' by spilling the corresponding temporary if necessary */
3120 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3121 {
3122     TCGTemp *ts = s->reg_to_temp[reg];
3123     if (ts != NULL) {
3124         temp_sync(s, ts, allocated_regs, 0, -1);
3125     }
3126 }
3127 
3128 /**
3129  * tcg_reg_alloc:
3130  * @required_regs: Set of registers in which we must allocate.
3131  * @allocated_regs: Set of registers which must be avoided.
3132  * @preferred_regs: Set of registers we should prefer.
3133  * @rev: True if we search the registers in "indirect" order.
3134  *
3135  * The allocated register must be in @required_regs & ~@allocated_regs,
3136  * but if we can put it in @preferred_regs we may save a move later.
3137  */
3138 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3139                             TCGRegSet allocated_regs,
3140                             TCGRegSet preferred_regs, bool rev)
3141 {
3142     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3143     TCGRegSet reg_ct[2];
3144     const int *order;
3145 
3146     reg_ct[1] = required_regs & ~allocated_regs;
3147     tcg_debug_assert(reg_ct[1] != 0);
3148     reg_ct[0] = reg_ct[1] & preferred_regs;
3149 
3150     /* Skip the preferred_regs option if it cannot be satisfied,
3151        or if the preference made no difference.  */
3152     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3153 
3154     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3155 
3156     /* Try free registers, preferences first.  */
3157     for (j = f; j < 2; j++) {
3158         TCGRegSet set = reg_ct[j];
3159 
3160         if (tcg_regset_single(set)) {
3161             /* One register in the set.  */
3162             TCGReg reg = tcg_regset_first(set);
3163             if (s->reg_to_temp[reg] == NULL) {
3164                 return reg;
3165             }
3166         } else {
3167             for (i = 0; i < n; i++) {
3168                 TCGReg reg = order[i];
3169                 if (s->reg_to_temp[reg] == NULL &&
3170                     tcg_regset_test_reg(set, reg)) {
3171                     return reg;
3172                 }
3173             }
3174         }
3175     }
3176 
3177     /* We must spill something.  */
3178     for (j = f; j < 2; j++) {
3179         TCGRegSet set = reg_ct[j];
3180 
3181         if (tcg_regset_single(set)) {
3182             /* One register in the set.  */
3183             TCGReg reg = tcg_regset_first(set);
3184             tcg_reg_free(s, reg, allocated_regs);
3185             return reg;
3186         } else {
3187             for (i = 0; i < n; i++) {
3188                 TCGReg reg = order[i];
3189                 if (tcg_regset_test_reg(set, reg)) {
3190                     tcg_reg_free(s, reg, allocated_regs);
3191                     return reg;
3192                 }
3193             }
3194         }
3195     }
3196 
3197     tcg_abort();
3198 }
3199 
3200 /* Make sure the temporary is in a register.  If needed, allocate the register
3201    from DESIRED while avoiding ALLOCATED.  */
3202 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3203                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3204 {
3205     TCGReg reg;
3206 
3207     switch (ts->val_type) {
3208     case TEMP_VAL_REG:
3209         return;
3210     case TEMP_VAL_CONST:
3211         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3212                             preferred_regs, ts->indirect_base);
3213         if (ts->type <= TCG_TYPE_I64) {
3214             tcg_out_movi(s, ts->type, reg, ts->val);
3215         } else {
3216             uint64_t val = ts->val;
3217             MemOp vece = MO_64;
3218 
3219             /*
3220              * Find the minimal vector element that matches the constant.
3221              * The targets will, in general, have to do this search anyway,
3222              * do this generically.
3223              */
3224             if (val == dup_const(MO_8, val)) {
3225                 vece = MO_8;
3226             } else if (val == dup_const(MO_16, val)) {
3227                 vece = MO_16;
3228             } else if (val == dup_const(MO_32, val)) {
3229                 vece = MO_32;
3230             }
3231 
3232             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3233         }
3234         ts->mem_coherent = 0;
3235         break;
3236     case TEMP_VAL_MEM:
3237         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3238                             preferred_regs, ts->indirect_base);
3239         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3240         ts->mem_coherent = 1;
3241         break;
3242     case TEMP_VAL_DEAD:
3243     default:
3244         tcg_abort();
3245     }
3246     ts->reg = reg;
3247     ts->val_type = TEMP_VAL_REG;
3248     s->reg_to_temp[reg] = ts;
3249 }
3250 
3251 /* Save a temporary to memory. 'allocated_regs' is used in case a
3252    temporary registers needs to be allocated to store a constant.  */
3253 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3254 {
3255     /* The liveness analysis already ensures that globals are back
3256        in memory. Keep an tcg_debug_assert for safety. */
3257     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3258 }
3259 
3260 /* save globals to their canonical location and assume they can be
3261    modified be the following code. 'allocated_regs' is used in case a
3262    temporary registers needs to be allocated to store a constant. */
3263 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3264 {
3265     int i, n;
3266 
3267     for (i = 0, n = s->nb_globals; i < n; i++) {
3268         temp_save(s, &s->temps[i], allocated_regs);
3269     }
3270 }
3271 
3272 /* sync globals to their canonical location and assume they can be
3273    read by the following code. 'allocated_regs' is used in case a
3274    temporary registers needs to be allocated to store a constant. */
3275 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3276 {
3277     int i, n;
3278 
3279     for (i = 0, n = s->nb_globals; i < n; i++) {
3280         TCGTemp *ts = &s->temps[i];
3281         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3282                          || ts->kind == TEMP_FIXED
3283                          || ts->mem_coherent);
3284     }
3285 }
3286 
3287 /* at the end of a basic block, we assume all temporaries are dead and
3288    all globals are stored at their canonical location. */
3289 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3290 {
3291     int i;
3292 
3293     for (i = s->nb_globals; i < s->nb_temps; i++) {
3294         TCGTemp *ts = &s->temps[i];
3295 
3296         switch (ts->kind) {
3297         case TEMP_LOCAL:
3298             temp_save(s, ts, allocated_regs);
3299             break;
3300         case TEMP_NORMAL:
3301         case TEMP_EBB:
3302             /* The liveness analysis already ensures that temps are dead.
3303                Keep an tcg_debug_assert for safety. */
3304             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3305             break;
3306         case TEMP_CONST:
3307             /* Similarly, we should have freed any allocated register. */
3308             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3309             break;
3310         default:
3311             g_assert_not_reached();
3312         }
3313     }
3314 
3315     save_globals(s, allocated_regs);
3316 }
3317 
3318 /*
3319  * At a conditional branch, we assume all temporaries are dead unless
3320  * explicitly live-across-conditional-branch; all globals and local
3321  * temps are synced to their location.
3322  */
3323 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3324 {
3325     sync_globals(s, allocated_regs);
3326 
3327     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3328         TCGTemp *ts = &s->temps[i];
3329         /*
3330          * The liveness analysis already ensures that temps are dead.
3331          * Keep tcg_debug_asserts for safety.
3332          */
3333         switch (ts->kind) {
3334         case TEMP_LOCAL:
3335             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3336             break;
3337         case TEMP_NORMAL:
3338             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3339             break;
3340         case TEMP_EBB:
3341         case TEMP_CONST:
3342             break;
3343         default:
3344             g_assert_not_reached();
3345         }
3346     }
3347 }
3348 
3349 /*
3350  * Specialized code generation for INDEX_op_mov_* with a constant.
3351  */
3352 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3353                                   tcg_target_ulong val, TCGLifeData arg_life,
3354                                   TCGRegSet preferred_regs)
3355 {
3356     /* ENV should not be modified.  */
3357     tcg_debug_assert(!temp_readonly(ots));
3358 
3359     /* The movi is not explicitly generated here.  */
3360     if (ots->val_type == TEMP_VAL_REG) {
3361         s->reg_to_temp[ots->reg] = NULL;
3362     }
3363     ots->val_type = TEMP_VAL_CONST;
3364     ots->val = val;
3365     ots->mem_coherent = 0;
3366     if (NEED_SYNC_ARG(0)) {
3367         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3368     } else if (IS_DEAD_ARG(0)) {
3369         temp_dead(s, ots);
3370     }
3371 }
3372 
3373 /*
3374  * Specialized code generation for INDEX_op_mov_*.
3375  */
3376 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3377 {
3378     const TCGLifeData arg_life = op->life;
3379     TCGRegSet allocated_regs, preferred_regs;
3380     TCGTemp *ts, *ots;
3381     TCGType otype, itype;
3382 
3383     allocated_regs = s->reserved_regs;
3384     preferred_regs = op->output_pref[0];
3385     ots = arg_temp(op->args[0]);
3386     ts = arg_temp(op->args[1]);
3387 
3388     /* ENV should not be modified.  */
3389     tcg_debug_assert(!temp_readonly(ots));
3390 
3391     /* Note that otype != itype for no-op truncation.  */
3392     otype = ots->type;
3393     itype = ts->type;
3394 
3395     if (ts->val_type == TEMP_VAL_CONST) {
3396         /* propagate constant or generate sti */
3397         tcg_target_ulong val = ts->val;
3398         if (IS_DEAD_ARG(1)) {
3399             temp_dead(s, ts);
3400         }
3401         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3402         return;
3403     }
3404 
3405     /* If the source value is in memory we're going to be forced
3406        to have it in a register in order to perform the copy.  Copy
3407        the SOURCE value into its own register first, that way we
3408        don't have to reload SOURCE the next time it is used. */
3409     if (ts->val_type == TEMP_VAL_MEM) {
3410         temp_load(s, ts, tcg_target_available_regs[itype],
3411                   allocated_regs, preferred_regs);
3412     }
3413 
3414     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3415     if (IS_DEAD_ARG(0)) {
3416         /* mov to a non-saved dead register makes no sense (even with
3417            liveness analysis disabled). */
3418         tcg_debug_assert(NEED_SYNC_ARG(0));
3419         if (!ots->mem_allocated) {
3420             temp_allocate_frame(s, ots);
3421         }
3422         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3423         if (IS_DEAD_ARG(1)) {
3424             temp_dead(s, ts);
3425         }
3426         temp_dead(s, ots);
3427     } else {
3428         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3429             /* the mov can be suppressed */
3430             if (ots->val_type == TEMP_VAL_REG) {
3431                 s->reg_to_temp[ots->reg] = NULL;
3432             }
3433             ots->reg = ts->reg;
3434             temp_dead(s, ts);
3435         } else {
3436             if (ots->val_type != TEMP_VAL_REG) {
3437                 /* When allocating a new register, make sure to not spill the
3438                    input one. */
3439                 tcg_regset_set_reg(allocated_regs, ts->reg);
3440                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3441                                          allocated_regs, preferred_regs,
3442                                          ots->indirect_base);
3443             }
3444             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3445                 /*
3446                  * Cross register class move not supported.
3447                  * Store the source register into the destination slot
3448                  * and leave the destination temp as TEMP_VAL_MEM.
3449                  */
3450                 assert(!temp_readonly(ots));
3451                 if (!ts->mem_allocated) {
3452                     temp_allocate_frame(s, ots);
3453                 }
3454                 tcg_out_st(s, ts->type, ts->reg,
3455                            ots->mem_base->reg, ots->mem_offset);
3456                 ots->mem_coherent = 1;
3457                 temp_free_or_dead(s, ots, -1);
3458                 return;
3459             }
3460         }
3461         ots->val_type = TEMP_VAL_REG;
3462         ots->mem_coherent = 0;
3463         s->reg_to_temp[ots->reg] = ots;
3464         if (NEED_SYNC_ARG(0)) {
3465             temp_sync(s, ots, allocated_regs, 0, 0);
3466         }
3467     }
3468 }
3469 
3470 /*
3471  * Specialized code generation for INDEX_op_dup_vec.
3472  */
3473 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3474 {
3475     const TCGLifeData arg_life = op->life;
3476     TCGRegSet dup_out_regs, dup_in_regs;
3477     TCGTemp *its, *ots;
3478     TCGType itype, vtype;
3479     intptr_t endian_fixup;
3480     unsigned vece;
3481     bool ok;
3482 
3483     ots = arg_temp(op->args[0]);
3484     its = arg_temp(op->args[1]);
3485 
3486     /* ENV should not be modified.  */
3487     tcg_debug_assert(!temp_readonly(ots));
3488 
3489     itype = its->type;
3490     vece = TCGOP_VECE(op);
3491     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3492 
3493     if (its->val_type == TEMP_VAL_CONST) {
3494         /* Propagate constant via movi -> dupi.  */
3495         tcg_target_ulong val = its->val;
3496         if (IS_DEAD_ARG(1)) {
3497             temp_dead(s, its);
3498         }
3499         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3500         return;
3501     }
3502 
3503     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3504     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3505 
3506     /* Allocate the output register now.  */
3507     if (ots->val_type != TEMP_VAL_REG) {
3508         TCGRegSet allocated_regs = s->reserved_regs;
3509 
3510         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3511             /* Make sure to not spill the input register. */
3512             tcg_regset_set_reg(allocated_regs, its->reg);
3513         }
3514         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3515                                  op->output_pref[0], ots->indirect_base);
3516         ots->val_type = TEMP_VAL_REG;
3517         ots->mem_coherent = 0;
3518         s->reg_to_temp[ots->reg] = ots;
3519     }
3520 
3521     switch (its->val_type) {
3522     case TEMP_VAL_REG:
3523         /*
3524          * The dup constriaints must be broad, covering all possible VECE.
3525          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3526          * to fail, indicating that extra moves are required for that case.
3527          */
3528         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3529             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3530                 goto done;
3531             }
3532             /* Try again from memory or a vector input register.  */
3533         }
3534         if (!its->mem_coherent) {
3535             /*
3536              * The input register is not synced, and so an extra store
3537              * would be required to use memory.  Attempt an integer-vector
3538              * register move first.  We do not have a TCGRegSet for this.
3539              */
3540             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3541                 break;
3542             }
3543             /* Sync the temp back to its slot and load from there.  */
3544             temp_sync(s, its, s->reserved_regs, 0, 0);
3545         }
3546         /* fall through */
3547 
3548     case TEMP_VAL_MEM:
3549 #if HOST_BIG_ENDIAN
3550         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3551         endian_fixup -= 1 << vece;
3552 #else
3553         endian_fixup = 0;
3554 #endif
3555         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3556                              its->mem_offset + endian_fixup)) {
3557             goto done;
3558         }
3559         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3560         break;
3561 
3562     default:
3563         g_assert_not_reached();
3564     }
3565 
3566     /* We now have a vector input register, so dup must succeed. */
3567     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3568     tcg_debug_assert(ok);
3569 
3570  done:
3571     if (IS_DEAD_ARG(1)) {
3572         temp_dead(s, its);
3573     }
3574     if (NEED_SYNC_ARG(0)) {
3575         temp_sync(s, ots, s->reserved_regs, 0, 0);
3576     }
3577     if (IS_DEAD_ARG(0)) {
3578         temp_dead(s, ots);
3579     }
3580 }
3581 
3582 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3583 {
3584     const TCGLifeData arg_life = op->life;
3585     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3586     TCGRegSet i_allocated_regs;
3587     TCGRegSet o_allocated_regs;
3588     int i, k, nb_iargs, nb_oargs;
3589     TCGReg reg;
3590     TCGArg arg;
3591     const TCGArgConstraint *arg_ct;
3592     TCGTemp *ts;
3593     TCGArg new_args[TCG_MAX_OP_ARGS];
3594     int const_args[TCG_MAX_OP_ARGS];
3595 
3596     nb_oargs = def->nb_oargs;
3597     nb_iargs = def->nb_iargs;
3598 
3599     /* copy constants */
3600     memcpy(new_args + nb_oargs + nb_iargs,
3601            op->args + nb_oargs + nb_iargs,
3602            sizeof(TCGArg) * def->nb_cargs);
3603 
3604     i_allocated_regs = s->reserved_regs;
3605     o_allocated_regs = s->reserved_regs;
3606 
3607     /* satisfy input constraints */
3608     for (k = 0; k < nb_iargs; k++) {
3609         TCGRegSet i_preferred_regs, o_preferred_regs;
3610 
3611         i = def->args_ct[nb_oargs + k].sort_index;
3612         arg = op->args[i];
3613         arg_ct = &def->args_ct[i];
3614         ts = arg_temp(arg);
3615 
3616         if (ts->val_type == TEMP_VAL_CONST
3617             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3618             /* constant is OK for instruction */
3619             const_args[i] = 1;
3620             new_args[i] = ts->val;
3621             continue;
3622         }
3623 
3624         i_preferred_regs = o_preferred_regs = 0;
3625         if (arg_ct->ialias) {
3626             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3627 
3628             /*
3629              * If the input is readonly, then it cannot also be an
3630              * output and aliased to itself.  If the input is not
3631              * dead after the instruction, we must allocate a new
3632              * register and move it.
3633              */
3634             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3635                 goto allocate_in_reg;
3636             }
3637 
3638             /*
3639              * Check if the current register has already been allocated
3640              * for another input aliased to an output.
3641              */
3642             if (ts->val_type == TEMP_VAL_REG) {
3643                 reg = ts->reg;
3644                 for (int k2 = 0; k2 < k; k2++) {
3645                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3646                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3647                         goto allocate_in_reg;
3648                     }
3649                 }
3650             }
3651             i_preferred_regs = o_preferred_regs;
3652         }
3653 
3654         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3655         reg = ts->reg;
3656 
3657         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3658  allocate_in_reg:
3659             /*
3660              * Allocate a new register matching the constraint
3661              * and move the temporary register into it.
3662              */
3663             temp_load(s, ts, tcg_target_available_regs[ts->type],
3664                       i_allocated_regs, 0);
3665             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3666                                 o_preferred_regs, ts->indirect_base);
3667             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3668                 /*
3669                  * Cross register class move not supported.  Sync the
3670                  * temp back to its slot and load from there.
3671                  */
3672                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3673                 tcg_out_ld(s, ts->type, reg,
3674                            ts->mem_base->reg, ts->mem_offset);
3675             }
3676         }
3677         new_args[i] = reg;
3678         const_args[i] = 0;
3679         tcg_regset_set_reg(i_allocated_regs, reg);
3680     }
3681 
3682     /* mark dead temporaries and free the associated registers */
3683     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3684         if (IS_DEAD_ARG(i)) {
3685             temp_dead(s, arg_temp(op->args[i]));
3686         }
3687     }
3688 
3689     if (def->flags & TCG_OPF_COND_BRANCH) {
3690         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3691     } else if (def->flags & TCG_OPF_BB_END) {
3692         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3693     } else {
3694         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3695             /* XXX: permit generic clobber register list ? */
3696             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3697                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3698                     tcg_reg_free(s, i, i_allocated_regs);
3699                 }
3700             }
3701         }
3702         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3703             /* sync globals if the op has side effects and might trigger
3704                an exception. */
3705             sync_globals(s, i_allocated_regs);
3706         }
3707 
3708         /* satisfy the output constraints */
3709         for(k = 0; k < nb_oargs; k++) {
3710             i = def->args_ct[k].sort_index;
3711             arg = op->args[i];
3712             arg_ct = &def->args_ct[i];
3713             ts = arg_temp(arg);
3714 
3715             /* ENV should not be modified.  */
3716             tcg_debug_assert(!temp_readonly(ts));
3717 
3718             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3719                 reg = new_args[arg_ct->alias_index];
3720             } else if (arg_ct->newreg) {
3721                 reg = tcg_reg_alloc(s, arg_ct->regs,
3722                                     i_allocated_regs | o_allocated_regs,
3723                                     op->output_pref[k], ts->indirect_base);
3724             } else {
3725                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3726                                     op->output_pref[k], ts->indirect_base);
3727             }
3728             tcg_regset_set_reg(o_allocated_regs, reg);
3729             if (ts->val_type == TEMP_VAL_REG) {
3730                 s->reg_to_temp[ts->reg] = NULL;
3731             }
3732             ts->val_type = TEMP_VAL_REG;
3733             ts->reg = reg;
3734             /*
3735              * Temp value is modified, so the value kept in memory is
3736              * potentially not the same.
3737              */
3738             ts->mem_coherent = 0;
3739             s->reg_to_temp[reg] = ts;
3740             new_args[i] = reg;
3741         }
3742     }
3743 
3744     /* emit instruction */
3745     if (def->flags & TCG_OPF_VECTOR) {
3746         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3747                        new_args, const_args);
3748     } else {
3749         tcg_out_op(s, op->opc, new_args, const_args);
3750     }
3751 
3752     /* move the outputs in the correct register if needed */
3753     for(i = 0; i < nb_oargs; i++) {
3754         ts = arg_temp(op->args[i]);
3755 
3756         /* ENV should not be modified.  */
3757         tcg_debug_assert(!temp_readonly(ts));
3758 
3759         if (NEED_SYNC_ARG(i)) {
3760             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3761         } else if (IS_DEAD_ARG(i)) {
3762             temp_dead(s, ts);
3763         }
3764     }
3765 }
3766 
3767 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3768 {
3769     const TCGLifeData arg_life = op->life;
3770     TCGTemp *ots, *itsl, *itsh;
3771     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3772 
3773     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3774     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3775     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3776 
3777     ots = arg_temp(op->args[0]);
3778     itsl = arg_temp(op->args[1]);
3779     itsh = arg_temp(op->args[2]);
3780 
3781     /* ENV should not be modified.  */
3782     tcg_debug_assert(!temp_readonly(ots));
3783 
3784     /* Allocate the output register now.  */
3785     if (ots->val_type != TEMP_VAL_REG) {
3786         TCGRegSet allocated_regs = s->reserved_regs;
3787         TCGRegSet dup_out_regs =
3788             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3789 
3790         /* Make sure to not spill the input registers. */
3791         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3792             tcg_regset_set_reg(allocated_regs, itsl->reg);
3793         }
3794         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3795             tcg_regset_set_reg(allocated_regs, itsh->reg);
3796         }
3797 
3798         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3799                                  op->output_pref[0], ots->indirect_base);
3800         ots->val_type = TEMP_VAL_REG;
3801         ots->mem_coherent = 0;
3802         s->reg_to_temp[ots->reg] = ots;
3803     }
3804 
3805     /* Promote dup2 of immediates to dupi_vec. */
3806     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3807         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3808         MemOp vece = MO_64;
3809 
3810         if (val == dup_const(MO_8, val)) {
3811             vece = MO_8;
3812         } else if (val == dup_const(MO_16, val)) {
3813             vece = MO_16;
3814         } else if (val == dup_const(MO_32, val)) {
3815             vece = MO_32;
3816         }
3817 
3818         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3819         goto done;
3820     }
3821 
3822     /* If the two inputs form one 64-bit value, try dupm_vec. */
3823     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3824         if (!itsl->mem_coherent) {
3825             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3826         }
3827         if (!itsh->mem_coherent) {
3828             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3829         }
3830 #if HOST_BIG_ENDIAN
3831         TCGTemp *its = itsh;
3832 #else
3833         TCGTemp *its = itsl;
3834 #endif
3835         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3836                              its->mem_base->reg, its->mem_offset)) {
3837             goto done;
3838         }
3839     }
3840 
3841     /* Fall back to generic expansion. */
3842     return false;
3843 
3844  done:
3845     if (IS_DEAD_ARG(1)) {
3846         temp_dead(s, itsl);
3847     }
3848     if (IS_DEAD_ARG(2)) {
3849         temp_dead(s, itsh);
3850     }
3851     if (NEED_SYNC_ARG(0)) {
3852         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3853     } else if (IS_DEAD_ARG(0)) {
3854         temp_dead(s, ots);
3855     }
3856     return true;
3857 }
3858 
3859 #ifdef TCG_TARGET_STACK_GROWSUP
3860 #define STACK_DIR(x) (-(x))
3861 #else
3862 #define STACK_DIR(x) (x)
3863 #endif
3864 
3865 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3866 {
3867     const int nb_oargs = TCGOP_CALLO(op);
3868     const int nb_iargs = TCGOP_CALLI(op);
3869     const TCGLifeData arg_life = op->life;
3870     const TCGHelperInfo *info;
3871     int flags, nb_regs, i;
3872     TCGReg reg;
3873     TCGArg arg;
3874     TCGTemp *ts;
3875     intptr_t stack_offset;
3876     size_t call_stack_size;
3877     tcg_insn_unit *func_addr;
3878     int allocate_args;
3879     TCGRegSet allocated_regs;
3880 
3881     func_addr = tcg_call_func(op);
3882     info = tcg_call_info(op);
3883     flags = info->flags;
3884 
3885     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3886     if (nb_regs > nb_iargs) {
3887         nb_regs = nb_iargs;
3888     }
3889 
3890     /* assign stack slots first */
3891     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3892     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3893         ~(TCG_TARGET_STACK_ALIGN - 1);
3894     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3895     if (allocate_args) {
3896         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3897            preallocate call stack */
3898         tcg_abort();
3899     }
3900 
3901     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3902     for (i = nb_regs; i < nb_iargs; i++) {
3903         arg = op->args[nb_oargs + i];
3904 #ifdef TCG_TARGET_STACK_GROWSUP
3905         stack_offset -= sizeof(tcg_target_long);
3906 #endif
3907         if (arg != TCG_CALL_DUMMY_ARG) {
3908             ts = arg_temp(arg);
3909             temp_load(s, ts, tcg_target_available_regs[ts->type],
3910                       s->reserved_regs, 0);
3911             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3912         }
3913 #ifndef TCG_TARGET_STACK_GROWSUP
3914         stack_offset += sizeof(tcg_target_long);
3915 #endif
3916     }
3917 
3918     /* assign input registers */
3919     allocated_regs = s->reserved_regs;
3920     for (i = 0; i < nb_regs; i++) {
3921         arg = op->args[nb_oargs + i];
3922         if (arg != TCG_CALL_DUMMY_ARG) {
3923             ts = arg_temp(arg);
3924             reg = tcg_target_call_iarg_regs[i];
3925 
3926             if (ts->val_type == TEMP_VAL_REG) {
3927                 if (ts->reg != reg) {
3928                     tcg_reg_free(s, reg, allocated_regs);
3929                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3930                         /*
3931                          * Cross register class move not supported.  Sync the
3932                          * temp back to its slot and load from there.
3933                          */
3934                         temp_sync(s, ts, allocated_regs, 0, 0);
3935                         tcg_out_ld(s, ts->type, reg,
3936                                    ts->mem_base->reg, ts->mem_offset);
3937                     }
3938                 }
3939             } else {
3940                 TCGRegSet arg_set = 0;
3941 
3942                 tcg_reg_free(s, reg, allocated_regs);
3943                 tcg_regset_set_reg(arg_set, reg);
3944                 temp_load(s, ts, arg_set, allocated_regs, 0);
3945             }
3946 
3947             tcg_regset_set_reg(allocated_regs, reg);
3948         }
3949     }
3950 
3951     /* mark dead temporaries and free the associated registers */
3952     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3953         if (IS_DEAD_ARG(i)) {
3954             temp_dead(s, arg_temp(op->args[i]));
3955         }
3956     }
3957 
3958     /* clobber call registers */
3959     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3960         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3961             tcg_reg_free(s, i, allocated_regs);
3962         }
3963     }
3964 
3965     /* Save globals if they might be written by the helper, sync them if
3966        they might be read. */
3967     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3968         /* Nothing to do */
3969     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3970         sync_globals(s, allocated_regs);
3971     } else {
3972         save_globals(s, allocated_regs);
3973     }
3974 
3975 #ifdef CONFIG_TCG_INTERPRETER
3976     {
3977         gpointer hash = (gpointer)(uintptr_t)info->typemask;
3978         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
3979         assert(cif != NULL);
3980         tcg_out_call(s, func_addr, cif);
3981     }
3982 #else
3983     tcg_out_call(s, func_addr);
3984 #endif
3985 
3986     /* assign output registers and emit moves if needed */
3987     for(i = 0; i < nb_oargs; i++) {
3988         arg = op->args[i];
3989         ts = arg_temp(arg);
3990 
3991         /* ENV should not be modified.  */
3992         tcg_debug_assert(!temp_readonly(ts));
3993 
3994         reg = tcg_target_call_oarg_regs[i];
3995         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3996         if (ts->val_type == TEMP_VAL_REG) {
3997             s->reg_to_temp[ts->reg] = NULL;
3998         }
3999         ts->val_type = TEMP_VAL_REG;
4000         ts->reg = reg;
4001         ts->mem_coherent = 0;
4002         s->reg_to_temp[reg] = ts;
4003         if (NEED_SYNC_ARG(i)) {
4004             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4005         } else if (IS_DEAD_ARG(i)) {
4006             temp_dead(s, ts);
4007         }
4008     }
4009 }
4010 
4011 #ifdef CONFIG_PROFILER
4012 
4013 /* avoid copy/paste errors */
4014 #define PROF_ADD(to, from, field)                       \
4015     do {                                                \
4016         (to)->field += qatomic_read(&((from)->field));  \
4017     } while (0)
4018 
4019 #define PROF_MAX(to, from, field)                                       \
4020     do {                                                                \
4021         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4022         if (val__ > (to)->field) {                                      \
4023             (to)->field = val__;                                        \
4024         }                                                               \
4025     } while (0)
4026 
4027 /* Pass in a zero'ed @prof */
4028 static inline
4029 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4030 {
4031     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4032     unsigned int i;
4033 
4034     for (i = 0; i < n_ctxs; i++) {
4035         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4036         const TCGProfile *orig = &s->prof;
4037 
4038         if (counters) {
4039             PROF_ADD(prof, orig, cpu_exec_time);
4040             PROF_ADD(prof, orig, tb_count1);
4041             PROF_ADD(prof, orig, tb_count);
4042             PROF_ADD(prof, orig, op_count);
4043             PROF_MAX(prof, orig, op_count_max);
4044             PROF_ADD(prof, orig, temp_count);
4045             PROF_MAX(prof, orig, temp_count_max);
4046             PROF_ADD(prof, orig, del_op_count);
4047             PROF_ADD(prof, orig, code_in_len);
4048             PROF_ADD(prof, orig, code_out_len);
4049             PROF_ADD(prof, orig, search_out_len);
4050             PROF_ADD(prof, orig, interm_time);
4051             PROF_ADD(prof, orig, code_time);
4052             PROF_ADD(prof, orig, la_time);
4053             PROF_ADD(prof, orig, opt_time);
4054             PROF_ADD(prof, orig, restore_count);
4055             PROF_ADD(prof, orig, restore_time);
4056         }
4057         if (table) {
4058             int i;
4059 
4060             for (i = 0; i < NB_OPS; i++) {
4061                 PROF_ADD(prof, orig, table_op_count[i]);
4062             }
4063         }
4064     }
4065 }
4066 
4067 #undef PROF_ADD
4068 #undef PROF_MAX
4069 
4070 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4071 {
4072     tcg_profile_snapshot(prof, true, false);
4073 }
4074 
4075 static void tcg_profile_snapshot_table(TCGProfile *prof)
4076 {
4077     tcg_profile_snapshot(prof, false, true);
4078 }
4079 
4080 void tcg_dump_op_count(GString *buf)
4081 {
4082     TCGProfile prof = {};
4083     int i;
4084 
4085     tcg_profile_snapshot_table(&prof);
4086     for (i = 0; i < NB_OPS; i++) {
4087         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4088                                prof.table_op_count[i]);
4089     }
4090 }
4091 
4092 int64_t tcg_cpu_exec_time(void)
4093 {
4094     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4095     unsigned int i;
4096     int64_t ret = 0;
4097 
4098     for (i = 0; i < n_ctxs; i++) {
4099         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4100         const TCGProfile *prof = &s->prof;
4101 
4102         ret += qatomic_read(&prof->cpu_exec_time);
4103     }
4104     return ret;
4105 }
4106 #else
4107 void tcg_dump_op_count(GString *buf)
4108 {
4109     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4110 }
4111 
4112 int64_t tcg_cpu_exec_time(void)
4113 {
4114     error_report("%s: TCG profiler not compiled", __func__);
4115     exit(EXIT_FAILURE);
4116 }
4117 #endif
4118 
4119 
4120 int tcg_gen_code(TCGContext *s, TranslationBlock *tb, target_ulong pc_start)
4121 {
4122 #ifdef CONFIG_PROFILER
4123     TCGProfile *prof = &s->prof;
4124 #endif
4125     int i, num_insns;
4126     TCGOp *op;
4127 
4128 #ifdef CONFIG_PROFILER
4129     {
4130         int n = 0;
4131 
4132         QTAILQ_FOREACH(op, &s->ops, link) {
4133             n++;
4134         }
4135         qatomic_set(&prof->op_count, prof->op_count + n);
4136         if (n > prof->op_count_max) {
4137             qatomic_set(&prof->op_count_max, n);
4138         }
4139 
4140         n = s->nb_temps;
4141         qatomic_set(&prof->temp_count, prof->temp_count + n);
4142         if (n > prof->temp_count_max) {
4143             qatomic_set(&prof->temp_count_max, n);
4144         }
4145     }
4146 #endif
4147 
4148 #ifdef DEBUG_DISAS
4149     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4150                  && qemu_log_in_addr_range(pc_start))) {
4151         FILE *logfile = qemu_log_trylock();
4152         if (logfile) {
4153             fprintf(logfile, "OP:\n");
4154             tcg_dump_ops(s, logfile, false);
4155             fprintf(logfile, "\n");
4156             qemu_log_unlock(logfile);
4157         }
4158     }
4159 #endif
4160 
4161 #ifdef CONFIG_DEBUG_TCG
4162     /* Ensure all labels referenced have been emitted.  */
4163     {
4164         TCGLabel *l;
4165         bool error = false;
4166 
4167         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4168             if (unlikely(!l->present) && l->refs) {
4169                 qemu_log_mask(CPU_LOG_TB_OP,
4170                               "$L%d referenced but not present.\n", l->id);
4171                 error = true;
4172             }
4173         }
4174         assert(!error);
4175     }
4176 #endif
4177 
4178 #ifdef CONFIG_PROFILER
4179     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4180 #endif
4181 
4182 #ifdef USE_TCG_OPTIMIZATIONS
4183     tcg_optimize(s);
4184 #endif
4185 
4186 #ifdef CONFIG_PROFILER
4187     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4188     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4189 #endif
4190 
4191     reachable_code_pass(s);
4192     liveness_pass_1(s);
4193 
4194     if (s->nb_indirects > 0) {
4195 #ifdef DEBUG_DISAS
4196         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4197                      && qemu_log_in_addr_range(pc_start))) {
4198             FILE *logfile = qemu_log_trylock();
4199             if (logfile) {
4200                 fprintf(logfile, "OP before indirect lowering:\n");
4201                 tcg_dump_ops(s, logfile, false);
4202                 fprintf(logfile, "\n");
4203                 qemu_log_unlock(logfile);
4204             }
4205         }
4206 #endif
4207         /* Replace indirect temps with direct temps.  */
4208         if (liveness_pass_2(s)) {
4209             /* If changes were made, re-run liveness.  */
4210             liveness_pass_1(s);
4211         }
4212     }
4213 
4214 #ifdef CONFIG_PROFILER
4215     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4216 #endif
4217 
4218 #ifdef DEBUG_DISAS
4219     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4220                  && qemu_log_in_addr_range(pc_start))) {
4221         FILE *logfile = qemu_log_trylock();
4222         if (logfile) {
4223             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4224             tcg_dump_ops(s, logfile, true);
4225             fprintf(logfile, "\n");
4226             qemu_log_unlock(logfile);
4227         }
4228     }
4229 #endif
4230 
4231     tcg_reg_alloc_start(s);
4232 
4233     /*
4234      * Reset the buffer pointers when restarting after overflow.
4235      * TODO: Move this into translate-all.c with the rest of the
4236      * buffer management.  Having only this done here is confusing.
4237      */
4238     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4239     s->code_ptr = s->code_buf;
4240 
4241 #ifdef TCG_TARGET_NEED_LDST_LABELS
4242     QSIMPLEQ_INIT(&s->ldst_labels);
4243 #endif
4244 #ifdef TCG_TARGET_NEED_POOL_LABELS
4245     s->pool_labels = NULL;
4246 #endif
4247 
4248     num_insns = -1;
4249     QTAILQ_FOREACH(op, &s->ops, link) {
4250         TCGOpcode opc = op->opc;
4251 
4252 #ifdef CONFIG_PROFILER
4253         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4254 #endif
4255 
4256         switch (opc) {
4257         case INDEX_op_mov_i32:
4258         case INDEX_op_mov_i64:
4259         case INDEX_op_mov_vec:
4260             tcg_reg_alloc_mov(s, op);
4261             break;
4262         case INDEX_op_dup_vec:
4263             tcg_reg_alloc_dup(s, op);
4264             break;
4265         case INDEX_op_insn_start:
4266             if (num_insns >= 0) {
4267                 size_t off = tcg_current_code_size(s);
4268                 s->gen_insn_end_off[num_insns] = off;
4269                 /* Assert that we do not overflow our stored offset.  */
4270                 assert(s->gen_insn_end_off[num_insns] == off);
4271             }
4272             num_insns++;
4273             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4274                 target_ulong a;
4275 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4276                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4277 #else
4278                 a = op->args[i];
4279 #endif
4280                 s->gen_insn_data[num_insns][i] = a;
4281             }
4282             break;
4283         case INDEX_op_discard:
4284             temp_dead(s, arg_temp(op->args[0]));
4285             break;
4286         case INDEX_op_set_label:
4287             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4288             tcg_out_label(s, arg_label(op->args[0]));
4289             break;
4290         case INDEX_op_call:
4291             tcg_reg_alloc_call(s, op);
4292             break;
4293         case INDEX_op_dup2_vec:
4294             if (tcg_reg_alloc_dup2(s, op)) {
4295                 break;
4296             }
4297             /* fall through */
4298         default:
4299             /* Sanity check that we've not introduced any unhandled opcodes. */
4300             tcg_debug_assert(tcg_op_supported(opc));
4301             /* Note: in order to speed up the code, it would be much
4302                faster to have specialized register allocator functions for
4303                some common argument patterns */
4304             tcg_reg_alloc_op(s, op);
4305             break;
4306         }
4307 #ifdef CONFIG_DEBUG_TCG
4308         check_regs(s);
4309 #endif
4310         /* Test for (pending) buffer overflow.  The assumption is that any
4311            one operation beginning below the high water mark cannot overrun
4312            the buffer completely.  Thus we can test for overflow after
4313            generating code without having to check during generation.  */
4314         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4315             return -1;
4316         }
4317         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4318         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4319             return -2;
4320         }
4321     }
4322     tcg_debug_assert(num_insns >= 0);
4323     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4324 
4325     /* Generate TB finalization at the end of block */
4326 #ifdef TCG_TARGET_NEED_LDST_LABELS
4327     i = tcg_out_ldst_finalize(s);
4328     if (i < 0) {
4329         return i;
4330     }
4331 #endif
4332 #ifdef TCG_TARGET_NEED_POOL_LABELS
4333     i = tcg_out_pool_finalize(s);
4334     if (i < 0) {
4335         return i;
4336     }
4337 #endif
4338     if (!tcg_resolve_relocs(s)) {
4339         return -2;
4340     }
4341 
4342 #ifndef CONFIG_TCG_INTERPRETER
4343     /* flush instruction cache */
4344     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4345                         (uintptr_t)s->code_buf,
4346                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4347 #endif
4348 
4349     return tcg_current_code_size(s);
4350 }
4351 
4352 #ifdef CONFIG_PROFILER
4353 void tcg_dump_info(GString *buf)
4354 {
4355     TCGProfile prof = {};
4356     const TCGProfile *s;
4357     int64_t tb_count;
4358     int64_t tb_div_count;
4359     int64_t tot;
4360 
4361     tcg_profile_snapshot_counters(&prof);
4362     s = &prof;
4363     tb_count = s->tb_count;
4364     tb_div_count = tb_count ? tb_count : 1;
4365     tot = s->interm_time + s->code_time;
4366 
4367     g_string_append_printf(buf, "JIT cycles          %" PRId64
4368                            " (%0.3f s at 2.4 GHz)\n",
4369                            tot, tot / 2.4e9);
4370     g_string_append_printf(buf, "translated TBs      %" PRId64
4371                            " (aborted=%" PRId64 " %0.1f%%)\n",
4372                            tb_count, s->tb_count1 - tb_count,
4373                            (double)(s->tb_count1 - s->tb_count)
4374                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4375     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4376                            (double)s->op_count / tb_div_count, s->op_count_max);
4377     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4378                            (double)s->del_op_count / tb_div_count);
4379     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4380                            (double)s->temp_count / tb_div_count,
4381                            s->temp_count_max);
4382     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4383                            (double)s->code_out_len / tb_div_count);
4384     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4385                            (double)s->search_out_len / tb_div_count);
4386 
4387     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4388                            s->op_count ? (double)tot / s->op_count : 0);
4389     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4390                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4391     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4392                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4393     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4394                            s->search_out_len ?
4395                            (double)tot / s->search_out_len : 0);
4396     if (tot == 0) {
4397         tot = 1;
4398     }
4399     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4400                            (double)s->interm_time / tot * 100.0);
4401     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4402                            (double)s->code_time / tot * 100.0);
4403     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4404                            (double)s->opt_time / (s->code_time ?
4405                                                   s->code_time : 1)
4406                            * 100.0);
4407     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4408                            (double)s->la_time / (s->code_time ?
4409                                                  s->code_time : 1) * 100.0);
4410     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4411                            s->restore_count);
4412     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4413                            s->restore_count ?
4414                            (double)s->restore_time / s->restore_count : 0);
4415 }
4416 #else
4417 void tcg_dump_info(GString *buf)
4418 {
4419     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4420 }
4421 #endif
4422 
4423 #ifdef ELF_HOST_MACHINE
4424 /* In order to use this feature, the backend needs to do three things:
4425 
4426    (1) Define ELF_HOST_MACHINE to indicate both what value to
4427        put into the ELF image and to indicate support for the feature.
4428 
4429    (2) Define tcg_register_jit.  This should create a buffer containing
4430        the contents of a .debug_frame section that describes the post-
4431        prologue unwind info for the tcg machine.
4432 
4433    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4434 */
4435 
4436 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4437 typedef enum {
4438     JIT_NOACTION = 0,
4439     JIT_REGISTER_FN,
4440     JIT_UNREGISTER_FN
4441 } jit_actions_t;
4442 
4443 struct jit_code_entry {
4444     struct jit_code_entry *next_entry;
4445     struct jit_code_entry *prev_entry;
4446     const void *symfile_addr;
4447     uint64_t symfile_size;
4448 };
4449 
4450 struct jit_descriptor {
4451     uint32_t version;
4452     uint32_t action_flag;
4453     struct jit_code_entry *relevant_entry;
4454     struct jit_code_entry *first_entry;
4455 };
4456 
4457 void __jit_debug_register_code(void) __attribute__((noinline));
4458 void __jit_debug_register_code(void)
4459 {
4460     asm("");
4461 }
4462 
4463 /* Must statically initialize the version, because GDB may check
4464    the version before we can set it.  */
4465 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4466 
4467 /* End GDB interface.  */
4468 
4469 static int find_string(const char *strtab, const char *str)
4470 {
4471     const char *p = strtab + 1;
4472 
4473     while (1) {
4474         if (strcmp(p, str) == 0) {
4475             return p - strtab;
4476         }
4477         p += strlen(p) + 1;
4478     }
4479 }
4480 
4481 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4482                                  const void *debug_frame,
4483                                  size_t debug_frame_size)
4484 {
4485     struct __attribute__((packed)) DebugInfo {
4486         uint32_t  len;
4487         uint16_t  version;
4488         uint32_t  abbrev;
4489         uint8_t   ptr_size;
4490         uint8_t   cu_die;
4491         uint16_t  cu_lang;
4492         uintptr_t cu_low_pc;
4493         uintptr_t cu_high_pc;
4494         uint8_t   fn_die;
4495         char      fn_name[16];
4496         uintptr_t fn_low_pc;
4497         uintptr_t fn_high_pc;
4498         uint8_t   cu_eoc;
4499     };
4500 
4501     struct ElfImage {
4502         ElfW(Ehdr) ehdr;
4503         ElfW(Phdr) phdr;
4504         ElfW(Shdr) shdr[7];
4505         ElfW(Sym)  sym[2];
4506         struct DebugInfo di;
4507         uint8_t    da[24];
4508         char       str[80];
4509     };
4510 
4511     struct ElfImage *img;
4512 
4513     static const struct ElfImage img_template = {
4514         .ehdr = {
4515             .e_ident[EI_MAG0] = ELFMAG0,
4516             .e_ident[EI_MAG1] = ELFMAG1,
4517             .e_ident[EI_MAG2] = ELFMAG2,
4518             .e_ident[EI_MAG3] = ELFMAG3,
4519             .e_ident[EI_CLASS] = ELF_CLASS,
4520             .e_ident[EI_DATA] = ELF_DATA,
4521             .e_ident[EI_VERSION] = EV_CURRENT,
4522             .e_type = ET_EXEC,
4523             .e_machine = ELF_HOST_MACHINE,
4524             .e_version = EV_CURRENT,
4525             .e_phoff = offsetof(struct ElfImage, phdr),
4526             .e_shoff = offsetof(struct ElfImage, shdr),
4527             .e_ehsize = sizeof(ElfW(Shdr)),
4528             .e_phentsize = sizeof(ElfW(Phdr)),
4529             .e_phnum = 1,
4530             .e_shentsize = sizeof(ElfW(Shdr)),
4531             .e_shnum = ARRAY_SIZE(img->shdr),
4532             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4533 #ifdef ELF_HOST_FLAGS
4534             .e_flags = ELF_HOST_FLAGS,
4535 #endif
4536 #ifdef ELF_OSABI
4537             .e_ident[EI_OSABI] = ELF_OSABI,
4538 #endif
4539         },
4540         .phdr = {
4541             .p_type = PT_LOAD,
4542             .p_flags = PF_X,
4543         },
4544         .shdr = {
4545             [0] = { .sh_type = SHT_NULL },
4546             /* Trick: The contents of code_gen_buffer are not present in
4547                this fake ELF file; that got allocated elsewhere.  Therefore
4548                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4549                will not look for contents.  We can record any address.  */
4550             [1] = { /* .text */
4551                 .sh_type = SHT_NOBITS,
4552                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4553             },
4554             [2] = { /* .debug_info */
4555                 .sh_type = SHT_PROGBITS,
4556                 .sh_offset = offsetof(struct ElfImage, di),
4557                 .sh_size = sizeof(struct DebugInfo),
4558             },
4559             [3] = { /* .debug_abbrev */
4560                 .sh_type = SHT_PROGBITS,
4561                 .sh_offset = offsetof(struct ElfImage, da),
4562                 .sh_size = sizeof(img->da),
4563             },
4564             [4] = { /* .debug_frame */
4565                 .sh_type = SHT_PROGBITS,
4566                 .sh_offset = sizeof(struct ElfImage),
4567             },
4568             [5] = { /* .symtab */
4569                 .sh_type = SHT_SYMTAB,
4570                 .sh_offset = offsetof(struct ElfImage, sym),
4571                 .sh_size = sizeof(img->sym),
4572                 .sh_info = 1,
4573                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4574                 .sh_entsize = sizeof(ElfW(Sym)),
4575             },
4576             [6] = { /* .strtab */
4577                 .sh_type = SHT_STRTAB,
4578                 .sh_offset = offsetof(struct ElfImage, str),
4579                 .sh_size = sizeof(img->str),
4580             }
4581         },
4582         .sym = {
4583             [1] = { /* code_gen_buffer */
4584                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4585                 .st_shndx = 1,
4586             }
4587         },
4588         .di = {
4589             .len = sizeof(struct DebugInfo) - 4,
4590             .version = 2,
4591             .ptr_size = sizeof(void *),
4592             .cu_die = 1,
4593             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4594             .fn_die = 2,
4595             .fn_name = "code_gen_buffer"
4596         },
4597         .da = {
4598             1,          /* abbrev number (the cu) */
4599             0x11, 1,    /* DW_TAG_compile_unit, has children */
4600             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4601             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4602             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4603             0, 0,       /* end of abbrev */
4604             2,          /* abbrev number (the fn) */
4605             0x2e, 0,    /* DW_TAG_subprogram, no children */
4606             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4607             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4608             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4609             0, 0,       /* end of abbrev */
4610             0           /* no more abbrev */
4611         },
4612         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4613                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4614     };
4615 
4616     /* We only need a single jit entry; statically allocate it.  */
4617     static struct jit_code_entry one_entry;
4618 
4619     uintptr_t buf = (uintptr_t)buf_ptr;
4620     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4621     DebugFrameHeader *dfh;
4622 
4623     img = g_malloc(img_size);
4624     *img = img_template;
4625 
4626     img->phdr.p_vaddr = buf;
4627     img->phdr.p_paddr = buf;
4628     img->phdr.p_memsz = buf_size;
4629 
4630     img->shdr[1].sh_name = find_string(img->str, ".text");
4631     img->shdr[1].sh_addr = buf;
4632     img->shdr[1].sh_size = buf_size;
4633 
4634     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4635     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4636 
4637     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4638     img->shdr[4].sh_size = debug_frame_size;
4639 
4640     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4641     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4642 
4643     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4644     img->sym[1].st_value = buf;
4645     img->sym[1].st_size = buf_size;
4646 
4647     img->di.cu_low_pc = buf;
4648     img->di.cu_high_pc = buf + buf_size;
4649     img->di.fn_low_pc = buf;
4650     img->di.fn_high_pc = buf + buf_size;
4651 
4652     dfh = (DebugFrameHeader *)(img + 1);
4653     memcpy(dfh, debug_frame, debug_frame_size);
4654     dfh->fde.func_start = buf;
4655     dfh->fde.func_len = buf_size;
4656 
4657 #ifdef DEBUG_JIT
4658     /* Enable this block to be able to debug the ELF image file creation.
4659        One can use readelf, objdump, or other inspection utilities.  */
4660     {
4661         g_autofree char *jit = g_strdup_printf("%s/qemu.jit", g_get_tmp_dir());
4662         FILE *f = fopen(jit, "w+b");
4663         if (f) {
4664             if (fwrite(img, img_size, 1, f) != img_size) {
4665                 /* Avoid stupid unused return value warning for fwrite.  */
4666             }
4667             fclose(f);
4668         }
4669     }
4670 #endif
4671 
4672     one_entry.symfile_addr = img;
4673     one_entry.symfile_size = img_size;
4674 
4675     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4676     __jit_debug_descriptor.relevant_entry = &one_entry;
4677     __jit_debug_descriptor.first_entry = &one_entry;
4678     __jit_debug_register_code();
4679 }
4680 #else
4681 /* No support for the feature.  Provide the entry point expected by exec.c,
4682    and implement the internal function we declared earlier.  */
4683 
4684 static void tcg_register_jit_int(const void *buf, size_t size,
4685                                  const void *debug_frame,
4686                                  size_t debug_frame_size)
4687 {
4688 }
4689 
4690 void tcg_register_jit(const void *buf, size_t buf_size)
4691 {
4692 }
4693 #endif /* ELF_HOST_MACHINE */
4694 
4695 #if !TCG_TARGET_MAYBE_vec
4696 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4697 {
4698     g_assert_not_reached();
4699 }
4700 #endif
4701