xref: /qemu/tcg/tcg.c (revision 336d354b)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #ifdef HOST_WORDS_BIGENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154                          ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162 
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165 
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172 
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176 
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179 
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183     *s->code_ptr++ = v;
184 }
185 
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187                                                       uint8_t v)
188 {
189     *p = v;
190 }
191 #endif
192 
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197         *s->code_ptr++ = v;
198     } else {
199         tcg_insn_unit *p = s->code_ptr;
200         memcpy(p, &v, sizeof(v));
201         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202     }
203 }
204 
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206                                                        uint16_t v)
207 {
208     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209         *p = v;
210     } else {
211         memcpy(p, &v, sizeof(v));
212     }
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229                                                        uint32_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252                                                        uint64_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 /* label relocation processing */
263 
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265                           TCGLabel *l, intptr_t addend)
266 {
267     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268 
269     r->type = type;
270     r->ptr = code_ptr;
271     r->addend = addend;
272     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274 
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277     tcg_debug_assert(!l->has_value);
278     l->has_value = 1;
279     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281 
282 TCGLabel *gen_new_label(void)
283 {
284     TCGContext *s = tcg_ctx;
285     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286 
287     memset(l, 0, sizeof(TCGLabel));
288     l->id = s->nb_labels++;
289     QSIMPLEQ_INIT(&l->relocs);
290 
291     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292 
293     return l;
294 }
295 
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298     TCGLabel *l;
299 
300     QSIMPLEQ_FOREACH(l, &s->labels, next) {
301         TCGRelocation *r;
302         uintptr_t value = l->u.value;
303 
304         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306                 return false;
307             }
308         }
309     }
310     return true;
311 }
312 
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315     /*
316      * We will check for overflow at the end of the opcode loop in
317      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318      */
319     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321 
322 /* Signal overflow, starting over with fewer guest insns. */
323 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
324 {
325     siglongjmp(s->jmp_trans, -2);
326 }
327 
328 #define C_PFX1(P, A)                    P##A
329 #define C_PFX2(P, A, B)                 P##A##_##B
330 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
331 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
332 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
333 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
334 
335 /* Define an enumeration for the various combinations. */
336 
337 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
338 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
339 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
340 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
341 
342 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
343 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
344 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
345 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
346 
347 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
348 
349 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
350 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
351 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
352 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
353 
354 typedef enum {
355 #include "tcg-target-con-set.h"
356 } TCGConstraintSetIndex;
357 
358 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
359 
360 #undef C_O0_I1
361 #undef C_O0_I2
362 #undef C_O0_I3
363 #undef C_O0_I4
364 #undef C_O1_I1
365 #undef C_O1_I2
366 #undef C_O1_I3
367 #undef C_O1_I4
368 #undef C_N1_I2
369 #undef C_O2_I1
370 #undef C_O2_I2
371 #undef C_O2_I3
372 #undef C_O2_I4
373 
374 /* Put all of the constraint sets into an array, indexed by the enum. */
375 
376 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
377 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
378 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
379 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
380 
381 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
382 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
383 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
384 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
385 
386 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
387 
388 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
389 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
390 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
391 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
392 
393 static const TCGTargetOpDef constraint_sets[] = {
394 #include "tcg-target-con-set.h"
395 };
396 
397 
398 #undef C_O0_I1
399 #undef C_O0_I2
400 #undef C_O0_I3
401 #undef C_O0_I4
402 #undef C_O1_I1
403 #undef C_O1_I2
404 #undef C_O1_I3
405 #undef C_O1_I4
406 #undef C_N1_I2
407 #undef C_O2_I1
408 #undef C_O2_I2
409 #undef C_O2_I3
410 #undef C_O2_I4
411 
412 /* Expand the enumerator to be returned from tcg_target_op_def(). */
413 
414 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
415 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
416 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
417 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
418 
419 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
420 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
421 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
422 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
423 
424 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
425 
426 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
427 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
428 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
429 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
430 
431 #include "tcg-target.c.inc"
432 
433 static void alloc_tcg_plugin_context(TCGContext *s)
434 {
435 #ifdef CONFIG_PLUGIN
436     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
437     s->plugin_tb->insns =
438         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
439 #endif
440 }
441 
442 /*
443  * All TCG threads except the parent (i.e. the one that called tcg_context_init
444  * and registered the target's TCG globals) must register with this function
445  * before initiating translation.
446  *
447  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
448  * of tcg_region_init() for the reasoning behind this.
449  *
450  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
451  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
452  * is not used anymore for translation once this function is called.
453  *
454  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
455  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
456  */
457 #ifdef CONFIG_USER_ONLY
458 void tcg_register_thread(void)
459 {
460     tcg_ctx = &tcg_init_ctx;
461 }
462 #else
463 void tcg_register_thread(void)
464 {
465     TCGContext *s = g_malloc(sizeof(*s));
466     unsigned int i, n;
467 
468     *s = tcg_init_ctx;
469 
470     /* Relink mem_base.  */
471     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
472         if (tcg_init_ctx.temps[i].mem_base) {
473             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
474             tcg_debug_assert(b >= 0 && b < n);
475             s->temps[i].mem_base = &s->temps[b];
476         }
477     }
478 
479     /* Claim an entry in tcg_ctxs */
480     n = qatomic_fetch_inc(&tcg_cur_ctxs);
481     g_assert(n < tcg_max_ctxs);
482     qatomic_set(&tcg_ctxs[n], s);
483 
484     if (n > 0) {
485         alloc_tcg_plugin_context(s);
486         tcg_region_initial_alloc(s);
487     }
488 
489     tcg_ctx = s;
490 }
491 #endif /* !CONFIG_USER_ONLY */
492 
493 /* pool based memory allocation */
494 void *tcg_malloc_internal(TCGContext *s, int size)
495 {
496     TCGPool *p;
497     int pool_size;
498 
499     if (size > TCG_POOL_CHUNK_SIZE) {
500         /* big malloc: insert a new pool (XXX: could optimize) */
501         p = g_malloc(sizeof(TCGPool) + size);
502         p->size = size;
503         p->next = s->pool_first_large;
504         s->pool_first_large = p;
505         return p->data;
506     } else {
507         p = s->pool_current;
508         if (!p) {
509             p = s->pool_first;
510             if (!p)
511                 goto new_pool;
512         } else {
513             if (!p->next) {
514             new_pool:
515                 pool_size = TCG_POOL_CHUNK_SIZE;
516                 p = g_malloc(sizeof(TCGPool) + pool_size);
517                 p->size = pool_size;
518                 p->next = NULL;
519                 if (s->pool_current)
520                     s->pool_current->next = p;
521                 else
522                     s->pool_first = p;
523             } else {
524                 p = p->next;
525             }
526         }
527     }
528     s->pool_current = p;
529     s->pool_cur = p->data + size;
530     s->pool_end = p->data + p->size;
531     return p->data;
532 }
533 
534 void tcg_pool_reset(TCGContext *s)
535 {
536     TCGPool *p, *t;
537     for (p = s->pool_first_large; p; p = t) {
538         t = p->next;
539         g_free(p);
540     }
541     s->pool_first_large = NULL;
542     s->pool_cur = s->pool_end = NULL;
543     s->pool_current = NULL;
544 }
545 
546 #include "exec/helper-proto.h"
547 
548 static const TCGHelperInfo all_helpers[] = {
549 #include "exec/helper-tcg.h"
550 };
551 static GHashTable *helper_table;
552 
553 #ifdef CONFIG_TCG_INTERPRETER
554 static GHashTable *ffi_table;
555 
556 static ffi_type * const typecode_to_ffi[8] = {
557     [dh_typecode_void] = &ffi_type_void,
558     [dh_typecode_i32]  = &ffi_type_uint32,
559     [dh_typecode_s32]  = &ffi_type_sint32,
560     [dh_typecode_i64]  = &ffi_type_uint64,
561     [dh_typecode_s64]  = &ffi_type_sint64,
562     [dh_typecode_ptr]  = &ffi_type_pointer,
563 };
564 #endif
565 
566 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
567 static void process_op_defs(TCGContext *s);
568 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
569                                             TCGReg reg, const char *name);
570 
571 static void tcg_context_init(unsigned max_cpus)
572 {
573     TCGContext *s = &tcg_init_ctx;
574     int op, total_args, n, i;
575     TCGOpDef *def;
576     TCGArgConstraint *args_ct;
577     TCGTemp *ts;
578 
579     memset(s, 0, sizeof(*s));
580     s->nb_globals = 0;
581 
582     /* Count total number of arguments and allocate the corresponding
583        space */
584     total_args = 0;
585     for(op = 0; op < NB_OPS; op++) {
586         def = &tcg_op_defs[op];
587         n = def->nb_iargs + def->nb_oargs;
588         total_args += n;
589     }
590 
591     args_ct = g_new0(TCGArgConstraint, total_args);
592 
593     for(op = 0; op < NB_OPS; op++) {
594         def = &tcg_op_defs[op];
595         def->args_ct = args_ct;
596         n = def->nb_iargs + def->nb_oargs;
597         args_ct += n;
598     }
599 
600     /* Register helpers.  */
601     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
602     helper_table = g_hash_table_new(NULL, NULL);
603 
604     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
605         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
606                             (gpointer)&all_helpers[i]);
607     }
608 
609 #ifdef CONFIG_TCG_INTERPRETER
610     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
611     ffi_table = g_hash_table_new(NULL, NULL);
612     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613         struct {
614             ffi_cif cif;
615             ffi_type *args[];
616         } *ca;
617         uint32_t typemask = all_helpers[i].typemask;
618         gpointer hash = (gpointer)(uintptr_t)typemask;
619         ffi_status status;
620         int nargs;
621 
622         if (g_hash_table_lookup(ffi_table, hash)) {
623             continue;
624         }
625 
626         /* Ignoring the return type, find the last non-zero field. */
627         nargs = 32 - clz32(typemask >> 3);
628         nargs = DIV_ROUND_UP(nargs, 3);
629 
630         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
631         ca->cif.rtype = typecode_to_ffi[typemask & 7];
632         ca->cif.nargs = nargs;
633 
634         if (nargs != 0) {
635             ca->cif.arg_types = ca->args;
636             for (i = 0; i < nargs; ++i) {
637                 int typecode = extract32(typemask, (i + 1) * 3, 3);
638                 ca->args[i] = typecode_to_ffi[typecode];
639             }
640         }
641 
642         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
643                               ca->cif.rtype, ca->cif.arg_types);
644         assert(status == FFI_OK);
645 
646         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
647     }
648 #endif
649 
650     tcg_target_init(s);
651     process_op_defs(s);
652 
653     /* Reverse the order of the saved registers, assuming they're all at
654        the start of tcg_target_reg_alloc_order.  */
655     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
656         int r = tcg_target_reg_alloc_order[n];
657         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
658             break;
659         }
660     }
661     for (i = 0; i < n; ++i) {
662         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
663     }
664     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
665         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
666     }
667 
668     alloc_tcg_plugin_context(s);
669 
670     tcg_ctx = s;
671     /*
672      * In user-mode we simply share the init context among threads, since we
673      * use a single region. See the documentation tcg_region_init() for the
674      * reasoning behind this.
675      * In softmmu we will have at most max_cpus TCG threads.
676      */
677 #ifdef CONFIG_USER_ONLY
678     tcg_ctxs = &tcg_ctx;
679     tcg_cur_ctxs = 1;
680     tcg_max_ctxs = 1;
681 #else
682     tcg_max_ctxs = max_cpus;
683     tcg_ctxs = g_new0(TCGContext *, max_cpus);
684 #endif
685 
686     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
687     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
688     cpu_env = temp_tcgv_ptr(ts);
689 }
690 
691 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
692 {
693     tcg_context_init(max_cpus);
694     tcg_region_init(tb_size, splitwx, max_cpus);
695 }
696 
697 /*
698  * Allocate TBs right before their corresponding translated code, making
699  * sure that TBs and code are on different cache lines.
700  */
701 TranslationBlock *tcg_tb_alloc(TCGContext *s)
702 {
703     uintptr_t align = qemu_icache_linesize;
704     TranslationBlock *tb;
705     void *next;
706 
707  retry:
708     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
709     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
710 
711     if (unlikely(next > s->code_gen_highwater)) {
712         if (tcg_region_alloc(s)) {
713             return NULL;
714         }
715         goto retry;
716     }
717     qatomic_set(&s->code_gen_ptr, next);
718     s->data_gen_ptr = NULL;
719     return tb;
720 }
721 
722 void tcg_prologue_init(TCGContext *s)
723 {
724     size_t prologue_size;
725 
726     s->code_ptr = s->code_gen_ptr;
727     s->code_buf = s->code_gen_ptr;
728     s->data_gen_ptr = NULL;
729 
730 #ifndef CONFIG_TCG_INTERPRETER
731     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
732 #endif
733 
734 #ifdef TCG_TARGET_NEED_POOL_LABELS
735     s->pool_labels = NULL;
736 #endif
737 
738     qemu_thread_jit_write();
739     /* Generate the prologue.  */
740     tcg_target_qemu_prologue(s);
741 
742 #ifdef TCG_TARGET_NEED_POOL_LABELS
743     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
744     {
745         int result = tcg_out_pool_finalize(s);
746         tcg_debug_assert(result == 0);
747     }
748 #endif
749 
750     prologue_size = tcg_current_code_size(s);
751 
752 #ifndef CONFIG_TCG_INTERPRETER
753     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
754                         (uintptr_t)s->code_buf, prologue_size);
755 #endif
756 
757 #ifdef DEBUG_DISAS
758     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
759         FILE *logfile = qemu_log_lock();
760         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
761         if (s->data_gen_ptr) {
762             size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
763             size_t data_size = prologue_size - code_size;
764             size_t i;
765 
766             log_disas(s->code_gen_ptr, code_size);
767 
768             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
769                 if (sizeof(tcg_target_ulong) == 8) {
770                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
771                              (uintptr_t)s->data_gen_ptr + i,
772                              *(uint64_t *)(s->data_gen_ptr + i));
773                 } else {
774                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
775                              (uintptr_t)s->data_gen_ptr + i,
776                              *(uint32_t *)(s->data_gen_ptr + i));
777                 }
778             }
779         } else {
780             log_disas(s->code_gen_ptr, prologue_size);
781         }
782         qemu_log("\n");
783         qemu_log_flush();
784         qemu_log_unlock(logfile);
785     }
786 #endif
787 
788 #ifndef CONFIG_TCG_INTERPRETER
789     /*
790      * Assert that goto_ptr is implemented completely, setting an epilogue.
791      * For tci, we use NULL as the signal to return from the interpreter,
792      * so skip this check.
793      */
794     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
795 #endif
796 
797     tcg_region_prologue_set(s);
798 }
799 
800 void tcg_func_start(TCGContext *s)
801 {
802     tcg_pool_reset(s);
803     s->nb_temps = s->nb_globals;
804 
805     /* No temps have been previously allocated for size or locality.  */
806     memset(s->free_temps, 0, sizeof(s->free_temps));
807 
808     /* No constant temps have been previously allocated. */
809     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
810         if (s->const_table[i]) {
811             g_hash_table_remove_all(s->const_table[i]);
812         }
813     }
814 
815     s->nb_ops = 0;
816     s->nb_labels = 0;
817     s->current_frame_offset = s->frame_start;
818 
819 #ifdef CONFIG_DEBUG_TCG
820     s->goto_tb_issue_mask = 0;
821 #endif
822 
823     QTAILQ_INIT(&s->ops);
824     QTAILQ_INIT(&s->free_ops);
825     QSIMPLEQ_INIT(&s->labels);
826 }
827 
828 static TCGTemp *tcg_temp_alloc(TCGContext *s)
829 {
830     int n = s->nb_temps++;
831 
832     if (n >= TCG_MAX_TEMPS) {
833         tcg_raise_tb_overflow(s);
834     }
835     return memset(&s->temps[n], 0, sizeof(TCGTemp));
836 }
837 
838 static TCGTemp *tcg_global_alloc(TCGContext *s)
839 {
840     TCGTemp *ts;
841 
842     tcg_debug_assert(s->nb_globals == s->nb_temps);
843     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
844     s->nb_globals++;
845     ts = tcg_temp_alloc(s);
846     ts->kind = TEMP_GLOBAL;
847 
848     return ts;
849 }
850 
851 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
852                                             TCGReg reg, const char *name)
853 {
854     TCGTemp *ts;
855 
856     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
857         tcg_abort();
858     }
859 
860     ts = tcg_global_alloc(s);
861     ts->base_type = type;
862     ts->type = type;
863     ts->kind = TEMP_FIXED;
864     ts->reg = reg;
865     ts->name = name;
866     tcg_regset_set_reg(s->reserved_regs, reg);
867 
868     return ts;
869 }
870 
871 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
872 {
873     s->frame_start = start;
874     s->frame_end = start + size;
875     s->frame_temp
876         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
877 }
878 
879 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
880                                      intptr_t offset, const char *name)
881 {
882     TCGContext *s = tcg_ctx;
883     TCGTemp *base_ts = tcgv_ptr_temp(base);
884     TCGTemp *ts = tcg_global_alloc(s);
885     int indirect_reg = 0, bigendian = 0;
886 #ifdef HOST_WORDS_BIGENDIAN
887     bigendian = 1;
888 #endif
889 
890     switch (base_ts->kind) {
891     case TEMP_FIXED:
892         break;
893     case TEMP_GLOBAL:
894         /* We do not support double-indirect registers.  */
895         tcg_debug_assert(!base_ts->indirect_reg);
896         base_ts->indirect_base = 1;
897         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
898                             ? 2 : 1);
899         indirect_reg = 1;
900         break;
901     default:
902         g_assert_not_reached();
903     }
904 
905     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
906         TCGTemp *ts2 = tcg_global_alloc(s);
907         char buf[64];
908 
909         ts->base_type = TCG_TYPE_I64;
910         ts->type = TCG_TYPE_I32;
911         ts->indirect_reg = indirect_reg;
912         ts->mem_allocated = 1;
913         ts->mem_base = base_ts;
914         ts->mem_offset = offset + bigendian * 4;
915         pstrcpy(buf, sizeof(buf), name);
916         pstrcat(buf, sizeof(buf), "_0");
917         ts->name = strdup(buf);
918 
919         tcg_debug_assert(ts2 == ts + 1);
920         ts2->base_type = TCG_TYPE_I64;
921         ts2->type = TCG_TYPE_I32;
922         ts2->indirect_reg = indirect_reg;
923         ts2->mem_allocated = 1;
924         ts2->mem_base = base_ts;
925         ts2->mem_offset = offset + (1 - bigendian) * 4;
926         pstrcpy(buf, sizeof(buf), name);
927         pstrcat(buf, sizeof(buf), "_1");
928         ts2->name = strdup(buf);
929     } else {
930         ts->base_type = type;
931         ts->type = type;
932         ts->indirect_reg = indirect_reg;
933         ts->mem_allocated = 1;
934         ts->mem_base = base_ts;
935         ts->mem_offset = offset;
936         ts->name = name;
937     }
938     return ts;
939 }
940 
941 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
942 {
943     TCGContext *s = tcg_ctx;
944     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
945     TCGTemp *ts;
946     int idx, k;
947 
948     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
949     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
950     if (idx < TCG_MAX_TEMPS) {
951         /* There is already an available temp with the right type.  */
952         clear_bit(idx, s->free_temps[k].l);
953 
954         ts = &s->temps[idx];
955         ts->temp_allocated = 1;
956         tcg_debug_assert(ts->base_type == type);
957         tcg_debug_assert(ts->kind == kind);
958     } else {
959         ts = tcg_temp_alloc(s);
960         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
961             TCGTemp *ts2 = tcg_temp_alloc(s);
962 
963             ts->base_type = type;
964             ts->type = TCG_TYPE_I32;
965             ts->temp_allocated = 1;
966             ts->kind = kind;
967 
968             tcg_debug_assert(ts2 == ts + 1);
969             ts2->base_type = TCG_TYPE_I64;
970             ts2->type = TCG_TYPE_I32;
971             ts2->temp_allocated = 1;
972             ts2->kind = kind;
973         } else {
974             ts->base_type = type;
975             ts->type = type;
976             ts->temp_allocated = 1;
977             ts->kind = kind;
978         }
979     }
980 
981 #if defined(CONFIG_DEBUG_TCG)
982     s->temps_in_use++;
983 #endif
984     return ts;
985 }
986 
987 TCGv_vec tcg_temp_new_vec(TCGType type)
988 {
989     TCGTemp *t;
990 
991 #ifdef CONFIG_DEBUG_TCG
992     switch (type) {
993     case TCG_TYPE_V64:
994         assert(TCG_TARGET_HAS_v64);
995         break;
996     case TCG_TYPE_V128:
997         assert(TCG_TARGET_HAS_v128);
998         break;
999     case TCG_TYPE_V256:
1000         assert(TCG_TARGET_HAS_v256);
1001         break;
1002     default:
1003         g_assert_not_reached();
1004     }
1005 #endif
1006 
1007     t = tcg_temp_new_internal(type, 0);
1008     return temp_tcgv_vec(t);
1009 }
1010 
1011 /* Create a new temp of the same type as an existing temp.  */
1012 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1013 {
1014     TCGTemp *t = tcgv_vec_temp(match);
1015 
1016     tcg_debug_assert(t->temp_allocated != 0);
1017 
1018     t = tcg_temp_new_internal(t->base_type, 0);
1019     return temp_tcgv_vec(t);
1020 }
1021 
1022 void tcg_temp_free_internal(TCGTemp *ts)
1023 {
1024     TCGContext *s = tcg_ctx;
1025     int k, idx;
1026 
1027     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1028     if (ts->kind == TEMP_CONST) {
1029         return;
1030     }
1031 
1032 #if defined(CONFIG_DEBUG_TCG)
1033     s->temps_in_use--;
1034     if (s->temps_in_use < 0) {
1035         fprintf(stderr, "More temporaries freed than allocated!\n");
1036     }
1037 #endif
1038 
1039     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1040     tcg_debug_assert(ts->temp_allocated != 0);
1041     ts->temp_allocated = 0;
1042 
1043     idx = temp_idx(ts);
1044     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1045     set_bit(idx, s->free_temps[k].l);
1046 }
1047 
1048 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1049 {
1050     TCGContext *s = tcg_ctx;
1051     GHashTable *h = s->const_table[type];
1052     TCGTemp *ts;
1053 
1054     if (h == NULL) {
1055         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1056         s->const_table[type] = h;
1057     }
1058 
1059     ts = g_hash_table_lookup(h, &val);
1060     if (ts == NULL) {
1061         ts = tcg_temp_alloc(s);
1062 
1063         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1064             TCGTemp *ts2 = tcg_temp_alloc(s);
1065 
1066             ts->base_type = TCG_TYPE_I64;
1067             ts->type = TCG_TYPE_I32;
1068             ts->kind = TEMP_CONST;
1069             ts->temp_allocated = 1;
1070             /*
1071              * Retain the full value of the 64-bit constant in the low
1072              * part, so that the hash table works.  Actual uses will
1073              * truncate the value to the low part.
1074              */
1075             ts->val = val;
1076 
1077             tcg_debug_assert(ts2 == ts + 1);
1078             ts2->base_type = TCG_TYPE_I64;
1079             ts2->type = TCG_TYPE_I32;
1080             ts2->kind = TEMP_CONST;
1081             ts2->temp_allocated = 1;
1082             ts2->val = val >> 32;
1083         } else {
1084             ts->base_type = type;
1085             ts->type = type;
1086             ts->kind = TEMP_CONST;
1087             ts->temp_allocated = 1;
1088             ts->val = val;
1089         }
1090         g_hash_table_insert(h, &ts->val, ts);
1091     }
1092 
1093     return ts;
1094 }
1095 
1096 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1097 {
1098     val = dup_const(vece, val);
1099     return temp_tcgv_vec(tcg_constant_internal(type, val));
1100 }
1101 
1102 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1103 {
1104     TCGTemp *t = tcgv_vec_temp(match);
1105 
1106     tcg_debug_assert(t->temp_allocated != 0);
1107     return tcg_constant_vec(t->base_type, vece, val);
1108 }
1109 
1110 TCGv_i32 tcg_const_i32(int32_t val)
1111 {
1112     TCGv_i32 t0;
1113     t0 = tcg_temp_new_i32();
1114     tcg_gen_movi_i32(t0, val);
1115     return t0;
1116 }
1117 
1118 TCGv_i64 tcg_const_i64(int64_t val)
1119 {
1120     TCGv_i64 t0;
1121     t0 = tcg_temp_new_i64();
1122     tcg_gen_movi_i64(t0, val);
1123     return t0;
1124 }
1125 
1126 TCGv_i32 tcg_const_local_i32(int32_t val)
1127 {
1128     TCGv_i32 t0;
1129     t0 = tcg_temp_local_new_i32();
1130     tcg_gen_movi_i32(t0, val);
1131     return t0;
1132 }
1133 
1134 TCGv_i64 tcg_const_local_i64(int64_t val)
1135 {
1136     TCGv_i64 t0;
1137     t0 = tcg_temp_local_new_i64();
1138     tcg_gen_movi_i64(t0, val);
1139     return t0;
1140 }
1141 
1142 #if defined(CONFIG_DEBUG_TCG)
1143 void tcg_clear_temp_count(void)
1144 {
1145     TCGContext *s = tcg_ctx;
1146     s->temps_in_use = 0;
1147 }
1148 
1149 int tcg_check_temp_count(void)
1150 {
1151     TCGContext *s = tcg_ctx;
1152     if (s->temps_in_use) {
1153         /* Clear the count so that we don't give another
1154          * warning immediately next time around.
1155          */
1156         s->temps_in_use = 0;
1157         return 1;
1158     }
1159     return 0;
1160 }
1161 #endif
1162 
1163 /* Return true if OP may appear in the opcode stream.
1164    Test the runtime variable that controls each opcode.  */
1165 bool tcg_op_supported(TCGOpcode op)
1166 {
1167     const bool have_vec
1168         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1169 
1170     switch (op) {
1171     case INDEX_op_discard:
1172     case INDEX_op_set_label:
1173     case INDEX_op_call:
1174     case INDEX_op_br:
1175     case INDEX_op_mb:
1176     case INDEX_op_insn_start:
1177     case INDEX_op_exit_tb:
1178     case INDEX_op_goto_tb:
1179     case INDEX_op_goto_ptr:
1180     case INDEX_op_qemu_ld_i32:
1181     case INDEX_op_qemu_st_i32:
1182     case INDEX_op_qemu_ld_i64:
1183     case INDEX_op_qemu_st_i64:
1184         return true;
1185 
1186     case INDEX_op_qemu_st8_i32:
1187         return TCG_TARGET_HAS_qemu_st8_i32;
1188 
1189     case INDEX_op_mov_i32:
1190     case INDEX_op_setcond_i32:
1191     case INDEX_op_brcond_i32:
1192     case INDEX_op_ld8u_i32:
1193     case INDEX_op_ld8s_i32:
1194     case INDEX_op_ld16u_i32:
1195     case INDEX_op_ld16s_i32:
1196     case INDEX_op_ld_i32:
1197     case INDEX_op_st8_i32:
1198     case INDEX_op_st16_i32:
1199     case INDEX_op_st_i32:
1200     case INDEX_op_add_i32:
1201     case INDEX_op_sub_i32:
1202     case INDEX_op_mul_i32:
1203     case INDEX_op_and_i32:
1204     case INDEX_op_or_i32:
1205     case INDEX_op_xor_i32:
1206     case INDEX_op_shl_i32:
1207     case INDEX_op_shr_i32:
1208     case INDEX_op_sar_i32:
1209         return true;
1210 
1211     case INDEX_op_movcond_i32:
1212         return TCG_TARGET_HAS_movcond_i32;
1213     case INDEX_op_div_i32:
1214     case INDEX_op_divu_i32:
1215         return TCG_TARGET_HAS_div_i32;
1216     case INDEX_op_rem_i32:
1217     case INDEX_op_remu_i32:
1218         return TCG_TARGET_HAS_rem_i32;
1219     case INDEX_op_div2_i32:
1220     case INDEX_op_divu2_i32:
1221         return TCG_TARGET_HAS_div2_i32;
1222     case INDEX_op_rotl_i32:
1223     case INDEX_op_rotr_i32:
1224         return TCG_TARGET_HAS_rot_i32;
1225     case INDEX_op_deposit_i32:
1226         return TCG_TARGET_HAS_deposit_i32;
1227     case INDEX_op_extract_i32:
1228         return TCG_TARGET_HAS_extract_i32;
1229     case INDEX_op_sextract_i32:
1230         return TCG_TARGET_HAS_sextract_i32;
1231     case INDEX_op_extract2_i32:
1232         return TCG_TARGET_HAS_extract2_i32;
1233     case INDEX_op_add2_i32:
1234         return TCG_TARGET_HAS_add2_i32;
1235     case INDEX_op_sub2_i32:
1236         return TCG_TARGET_HAS_sub2_i32;
1237     case INDEX_op_mulu2_i32:
1238         return TCG_TARGET_HAS_mulu2_i32;
1239     case INDEX_op_muls2_i32:
1240         return TCG_TARGET_HAS_muls2_i32;
1241     case INDEX_op_muluh_i32:
1242         return TCG_TARGET_HAS_muluh_i32;
1243     case INDEX_op_mulsh_i32:
1244         return TCG_TARGET_HAS_mulsh_i32;
1245     case INDEX_op_ext8s_i32:
1246         return TCG_TARGET_HAS_ext8s_i32;
1247     case INDEX_op_ext16s_i32:
1248         return TCG_TARGET_HAS_ext16s_i32;
1249     case INDEX_op_ext8u_i32:
1250         return TCG_TARGET_HAS_ext8u_i32;
1251     case INDEX_op_ext16u_i32:
1252         return TCG_TARGET_HAS_ext16u_i32;
1253     case INDEX_op_bswap16_i32:
1254         return TCG_TARGET_HAS_bswap16_i32;
1255     case INDEX_op_bswap32_i32:
1256         return TCG_TARGET_HAS_bswap32_i32;
1257     case INDEX_op_not_i32:
1258         return TCG_TARGET_HAS_not_i32;
1259     case INDEX_op_neg_i32:
1260         return TCG_TARGET_HAS_neg_i32;
1261     case INDEX_op_andc_i32:
1262         return TCG_TARGET_HAS_andc_i32;
1263     case INDEX_op_orc_i32:
1264         return TCG_TARGET_HAS_orc_i32;
1265     case INDEX_op_eqv_i32:
1266         return TCG_TARGET_HAS_eqv_i32;
1267     case INDEX_op_nand_i32:
1268         return TCG_TARGET_HAS_nand_i32;
1269     case INDEX_op_nor_i32:
1270         return TCG_TARGET_HAS_nor_i32;
1271     case INDEX_op_clz_i32:
1272         return TCG_TARGET_HAS_clz_i32;
1273     case INDEX_op_ctz_i32:
1274         return TCG_TARGET_HAS_ctz_i32;
1275     case INDEX_op_ctpop_i32:
1276         return TCG_TARGET_HAS_ctpop_i32;
1277 
1278     case INDEX_op_brcond2_i32:
1279     case INDEX_op_setcond2_i32:
1280         return TCG_TARGET_REG_BITS == 32;
1281 
1282     case INDEX_op_mov_i64:
1283     case INDEX_op_setcond_i64:
1284     case INDEX_op_brcond_i64:
1285     case INDEX_op_ld8u_i64:
1286     case INDEX_op_ld8s_i64:
1287     case INDEX_op_ld16u_i64:
1288     case INDEX_op_ld16s_i64:
1289     case INDEX_op_ld32u_i64:
1290     case INDEX_op_ld32s_i64:
1291     case INDEX_op_ld_i64:
1292     case INDEX_op_st8_i64:
1293     case INDEX_op_st16_i64:
1294     case INDEX_op_st32_i64:
1295     case INDEX_op_st_i64:
1296     case INDEX_op_add_i64:
1297     case INDEX_op_sub_i64:
1298     case INDEX_op_mul_i64:
1299     case INDEX_op_and_i64:
1300     case INDEX_op_or_i64:
1301     case INDEX_op_xor_i64:
1302     case INDEX_op_shl_i64:
1303     case INDEX_op_shr_i64:
1304     case INDEX_op_sar_i64:
1305     case INDEX_op_ext_i32_i64:
1306     case INDEX_op_extu_i32_i64:
1307         return TCG_TARGET_REG_BITS == 64;
1308 
1309     case INDEX_op_movcond_i64:
1310         return TCG_TARGET_HAS_movcond_i64;
1311     case INDEX_op_div_i64:
1312     case INDEX_op_divu_i64:
1313         return TCG_TARGET_HAS_div_i64;
1314     case INDEX_op_rem_i64:
1315     case INDEX_op_remu_i64:
1316         return TCG_TARGET_HAS_rem_i64;
1317     case INDEX_op_div2_i64:
1318     case INDEX_op_divu2_i64:
1319         return TCG_TARGET_HAS_div2_i64;
1320     case INDEX_op_rotl_i64:
1321     case INDEX_op_rotr_i64:
1322         return TCG_TARGET_HAS_rot_i64;
1323     case INDEX_op_deposit_i64:
1324         return TCG_TARGET_HAS_deposit_i64;
1325     case INDEX_op_extract_i64:
1326         return TCG_TARGET_HAS_extract_i64;
1327     case INDEX_op_sextract_i64:
1328         return TCG_TARGET_HAS_sextract_i64;
1329     case INDEX_op_extract2_i64:
1330         return TCG_TARGET_HAS_extract2_i64;
1331     case INDEX_op_extrl_i64_i32:
1332         return TCG_TARGET_HAS_extrl_i64_i32;
1333     case INDEX_op_extrh_i64_i32:
1334         return TCG_TARGET_HAS_extrh_i64_i32;
1335     case INDEX_op_ext8s_i64:
1336         return TCG_TARGET_HAS_ext8s_i64;
1337     case INDEX_op_ext16s_i64:
1338         return TCG_TARGET_HAS_ext16s_i64;
1339     case INDEX_op_ext32s_i64:
1340         return TCG_TARGET_HAS_ext32s_i64;
1341     case INDEX_op_ext8u_i64:
1342         return TCG_TARGET_HAS_ext8u_i64;
1343     case INDEX_op_ext16u_i64:
1344         return TCG_TARGET_HAS_ext16u_i64;
1345     case INDEX_op_ext32u_i64:
1346         return TCG_TARGET_HAS_ext32u_i64;
1347     case INDEX_op_bswap16_i64:
1348         return TCG_TARGET_HAS_bswap16_i64;
1349     case INDEX_op_bswap32_i64:
1350         return TCG_TARGET_HAS_bswap32_i64;
1351     case INDEX_op_bswap64_i64:
1352         return TCG_TARGET_HAS_bswap64_i64;
1353     case INDEX_op_not_i64:
1354         return TCG_TARGET_HAS_not_i64;
1355     case INDEX_op_neg_i64:
1356         return TCG_TARGET_HAS_neg_i64;
1357     case INDEX_op_andc_i64:
1358         return TCG_TARGET_HAS_andc_i64;
1359     case INDEX_op_orc_i64:
1360         return TCG_TARGET_HAS_orc_i64;
1361     case INDEX_op_eqv_i64:
1362         return TCG_TARGET_HAS_eqv_i64;
1363     case INDEX_op_nand_i64:
1364         return TCG_TARGET_HAS_nand_i64;
1365     case INDEX_op_nor_i64:
1366         return TCG_TARGET_HAS_nor_i64;
1367     case INDEX_op_clz_i64:
1368         return TCG_TARGET_HAS_clz_i64;
1369     case INDEX_op_ctz_i64:
1370         return TCG_TARGET_HAS_ctz_i64;
1371     case INDEX_op_ctpop_i64:
1372         return TCG_TARGET_HAS_ctpop_i64;
1373     case INDEX_op_add2_i64:
1374         return TCG_TARGET_HAS_add2_i64;
1375     case INDEX_op_sub2_i64:
1376         return TCG_TARGET_HAS_sub2_i64;
1377     case INDEX_op_mulu2_i64:
1378         return TCG_TARGET_HAS_mulu2_i64;
1379     case INDEX_op_muls2_i64:
1380         return TCG_TARGET_HAS_muls2_i64;
1381     case INDEX_op_muluh_i64:
1382         return TCG_TARGET_HAS_muluh_i64;
1383     case INDEX_op_mulsh_i64:
1384         return TCG_TARGET_HAS_mulsh_i64;
1385 
1386     case INDEX_op_mov_vec:
1387     case INDEX_op_dup_vec:
1388     case INDEX_op_dupm_vec:
1389     case INDEX_op_ld_vec:
1390     case INDEX_op_st_vec:
1391     case INDEX_op_add_vec:
1392     case INDEX_op_sub_vec:
1393     case INDEX_op_and_vec:
1394     case INDEX_op_or_vec:
1395     case INDEX_op_xor_vec:
1396     case INDEX_op_cmp_vec:
1397         return have_vec;
1398     case INDEX_op_dup2_vec:
1399         return have_vec && TCG_TARGET_REG_BITS == 32;
1400     case INDEX_op_not_vec:
1401         return have_vec && TCG_TARGET_HAS_not_vec;
1402     case INDEX_op_neg_vec:
1403         return have_vec && TCG_TARGET_HAS_neg_vec;
1404     case INDEX_op_abs_vec:
1405         return have_vec && TCG_TARGET_HAS_abs_vec;
1406     case INDEX_op_andc_vec:
1407         return have_vec && TCG_TARGET_HAS_andc_vec;
1408     case INDEX_op_orc_vec:
1409         return have_vec && TCG_TARGET_HAS_orc_vec;
1410     case INDEX_op_nand_vec:
1411         return have_vec && TCG_TARGET_HAS_nand_vec;
1412     case INDEX_op_nor_vec:
1413         return have_vec && TCG_TARGET_HAS_nor_vec;
1414     case INDEX_op_eqv_vec:
1415         return have_vec && TCG_TARGET_HAS_eqv_vec;
1416     case INDEX_op_mul_vec:
1417         return have_vec && TCG_TARGET_HAS_mul_vec;
1418     case INDEX_op_shli_vec:
1419     case INDEX_op_shri_vec:
1420     case INDEX_op_sari_vec:
1421         return have_vec && TCG_TARGET_HAS_shi_vec;
1422     case INDEX_op_shls_vec:
1423     case INDEX_op_shrs_vec:
1424     case INDEX_op_sars_vec:
1425         return have_vec && TCG_TARGET_HAS_shs_vec;
1426     case INDEX_op_shlv_vec:
1427     case INDEX_op_shrv_vec:
1428     case INDEX_op_sarv_vec:
1429         return have_vec && TCG_TARGET_HAS_shv_vec;
1430     case INDEX_op_rotli_vec:
1431         return have_vec && TCG_TARGET_HAS_roti_vec;
1432     case INDEX_op_rotls_vec:
1433         return have_vec && TCG_TARGET_HAS_rots_vec;
1434     case INDEX_op_rotlv_vec:
1435     case INDEX_op_rotrv_vec:
1436         return have_vec && TCG_TARGET_HAS_rotv_vec;
1437     case INDEX_op_ssadd_vec:
1438     case INDEX_op_usadd_vec:
1439     case INDEX_op_sssub_vec:
1440     case INDEX_op_ussub_vec:
1441         return have_vec && TCG_TARGET_HAS_sat_vec;
1442     case INDEX_op_smin_vec:
1443     case INDEX_op_umin_vec:
1444     case INDEX_op_smax_vec:
1445     case INDEX_op_umax_vec:
1446         return have_vec && TCG_TARGET_HAS_minmax_vec;
1447     case INDEX_op_bitsel_vec:
1448         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1449     case INDEX_op_cmpsel_vec:
1450         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1451 
1452     default:
1453         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1454         return true;
1455     }
1456 }
1457 
1458 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1459    and endian swap. Maybe it would be better to do the alignment
1460    and endian swap in tcg_reg_alloc_call(). */
1461 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1462 {
1463     int i, real_args, nb_rets, pi;
1464     unsigned typemask;
1465     const TCGHelperInfo *info;
1466     TCGOp *op;
1467 
1468     info = g_hash_table_lookup(helper_table, (gpointer)func);
1469     typemask = info->typemask;
1470 
1471 #ifdef CONFIG_PLUGIN
1472     /* detect non-plugin helpers */
1473     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1474         tcg_ctx->plugin_insn->calls_helpers = true;
1475     }
1476 #endif
1477 
1478 #if defined(__sparc__) && !defined(__arch64__) \
1479     && !defined(CONFIG_TCG_INTERPRETER)
1480     /* We have 64-bit values in one register, but need to pass as two
1481        separate parameters.  Split them.  */
1482     int orig_typemask = typemask;
1483     int orig_nargs = nargs;
1484     TCGv_i64 retl, reth;
1485     TCGTemp *split_args[MAX_OPC_PARAM];
1486 
1487     retl = NULL;
1488     reth = NULL;
1489     typemask = 0;
1490     for (i = real_args = 0; i < nargs; ++i) {
1491         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1492         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1493 
1494         if (is_64bit) {
1495             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1496             TCGv_i32 h = tcg_temp_new_i32();
1497             TCGv_i32 l = tcg_temp_new_i32();
1498             tcg_gen_extr_i64_i32(l, h, orig);
1499             split_args[real_args++] = tcgv_i32_temp(h);
1500             typemask |= dh_typecode_i32 << (real_args * 3);
1501             split_args[real_args++] = tcgv_i32_temp(l);
1502             typemask |= dh_typecode_i32 << (real_args * 3);
1503         } else {
1504             split_args[real_args++] = args[i];
1505             typemask |= argtype << (real_args * 3);
1506         }
1507     }
1508     nargs = real_args;
1509     args = split_args;
1510 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1511     for (i = 0; i < nargs; ++i) {
1512         int argtype = extract32(typemask, (i + 1) * 3, 3);
1513         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1514         bool is_signed = argtype & 1;
1515 
1516         if (is_32bit) {
1517             TCGv_i64 temp = tcg_temp_new_i64();
1518             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1519             if (is_signed) {
1520                 tcg_gen_ext_i32_i64(temp, orig);
1521             } else {
1522                 tcg_gen_extu_i32_i64(temp, orig);
1523             }
1524             args[i] = tcgv_i64_temp(temp);
1525         }
1526     }
1527 #endif /* TCG_TARGET_EXTEND_ARGS */
1528 
1529     op = tcg_emit_op(INDEX_op_call);
1530 
1531     pi = 0;
1532     if (ret != NULL) {
1533 #if defined(__sparc__) && !defined(__arch64__) \
1534     && !defined(CONFIG_TCG_INTERPRETER)
1535         if ((typemask & 6) == dh_typecode_i64) {
1536             /* The 32-bit ABI is going to return the 64-bit value in
1537                the %o0/%o1 register pair.  Prepare for this by using
1538                two return temporaries, and reassemble below.  */
1539             retl = tcg_temp_new_i64();
1540             reth = tcg_temp_new_i64();
1541             op->args[pi++] = tcgv_i64_arg(reth);
1542             op->args[pi++] = tcgv_i64_arg(retl);
1543             nb_rets = 2;
1544         } else {
1545             op->args[pi++] = temp_arg(ret);
1546             nb_rets = 1;
1547         }
1548 #else
1549         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1550 #ifdef HOST_WORDS_BIGENDIAN
1551             op->args[pi++] = temp_arg(ret + 1);
1552             op->args[pi++] = temp_arg(ret);
1553 #else
1554             op->args[pi++] = temp_arg(ret);
1555             op->args[pi++] = temp_arg(ret + 1);
1556 #endif
1557             nb_rets = 2;
1558         } else {
1559             op->args[pi++] = temp_arg(ret);
1560             nb_rets = 1;
1561         }
1562 #endif
1563     } else {
1564         nb_rets = 0;
1565     }
1566     TCGOP_CALLO(op) = nb_rets;
1567 
1568     real_args = 0;
1569     for (i = 0; i < nargs; i++) {
1570         int argtype = extract32(typemask, (i + 1) * 3, 3);
1571         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1572         bool want_align = false;
1573 
1574 #if defined(CONFIG_TCG_INTERPRETER)
1575         /*
1576          * Align all arguments, so that they land in predictable places
1577          * for passing off to ffi_call.
1578          */
1579         want_align = true;
1580 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1581         /* Some targets want aligned 64 bit args */
1582         want_align = is_64bit;
1583 #endif
1584 
1585         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1586             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1587             real_args++;
1588         }
1589 
1590         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1591             /*
1592              * If stack grows up, then we will be placing successive
1593              * arguments at lower addresses, which means we need to
1594              * reverse the order compared to how we would normally
1595              * treat either big or little-endian.  For those arguments
1596              * that will wind up in registers, this still works for
1597              * HPPA (the only current STACK_GROWSUP target) since the
1598              * argument registers are *also* allocated in decreasing
1599              * order.  If another such target is added, this logic may
1600              * have to get more complicated to differentiate between
1601              * stack arguments and register arguments.
1602              */
1603 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1604             op->args[pi++] = temp_arg(args[i] + 1);
1605             op->args[pi++] = temp_arg(args[i]);
1606 #else
1607             op->args[pi++] = temp_arg(args[i]);
1608             op->args[pi++] = temp_arg(args[i] + 1);
1609 #endif
1610             real_args += 2;
1611             continue;
1612         }
1613 
1614         op->args[pi++] = temp_arg(args[i]);
1615         real_args++;
1616     }
1617     op->args[pi++] = (uintptr_t)func;
1618     op->args[pi++] = (uintptr_t)info;
1619     TCGOP_CALLI(op) = real_args;
1620 
1621     /* Make sure the fields didn't overflow.  */
1622     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1623     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1624 
1625 #if defined(__sparc__) && !defined(__arch64__) \
1626     && !defined(CONFIG_TCG_INTERPRETER)
1627     /* Free all of the parts we allocated above.  */
1628     for (i = real_args = 0; i < orig_nargs; ++i) {
1629         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1630         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1631 
1632         if (is_64bit) {
1633             tcg_temp_free_internal(args[real_args++]);
1634             tcg_temp_free_internal(args[real_args++]);
1635         } else {
1636             real_args++;
1637         }
1638     }
1639     if ((orig_typemask & 6) == dh_typecode_i64) {
1640         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1641            Note that describing these as TCGv_i64 eliminates an unnecessary
1642            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1643         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1644         tcg_temp_free_i64(retl);
1645         tcg_temp_free_i64(reth);
1646     }
1647 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1648     for (i = 0; i < nargs; ++i) {
1649         int argtype = extract32(typemask, (i + 1) * 3, 3);
1650         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1651 
1652         if (is_32bit) {
1653             tcg_temp_free_internal(args[i]);
1654         }
1655     }
1656 #endif /* TCG_TARGET_EXTEND_ARGS */
1657 }
1658 
1659 static void tcg_reg_alloc_start(TCGContext *s)
1660 {
1661     int i, n;
1662 
1663     for (i = 0, n = s->nb_temps; i < n; i++) {
1664         TCGTemp *ts = &s->temps[i];
1665         TCGTempVal val = TEMP_VAL_MEM;
1666 
1667         switch (ts->kind) {
1668         case TEMP_CONST:
1669             val = TEMP_VAL_CONST;
1670             break;
1671         case TEMP_FIXED:
1672             val = TEMP_VAL_REG;
1673             break;
1674         case TEMP_GLOBAL:
1675             break;
1676         case TEMP_NORMAL:
1677             val = TEMP_VAL_DEAD;
1678             /* fall through */
1679         case TEMP_LOCAL:
1680             ts->mem_allocated = 0;
1681             break;
1682         default:
1683             g_assert_not_reached();
1684         }
1685         ts->val_type = val;
1686     }
1687 
1688     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1689 }
1690 
1691 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1692                                  TCGTemp *ts)
1693 {
1694     int idx = temp_idx(ts);
1695 
1696     switch (ts->kind) {
1697     case TEMP_FIXED:
1698     case TEMP_GLOBAL:
1699         pstrcpy(buf, buf_size, ts->name);
1700         break;
1701     case TEMP_LOCAL:
1702         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1703         break;
1704     case TEMP_NORMAL:
1705         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1706         break;
1707     case TEMP_CONST:
1708         switch (ts->type) {
1709         case TCG_TYPE_I32:
1710             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1711             break;
1712 #if TCG_TARGET_REG_BITS > 32
1713         case TCG_TYPE_I64:
1714             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1715             break;
1716 #endif
1717         case TCG_TYPE_V64:
1718         case TCG_TYPE_V128:
1719         case TCG_TYPE_V256:
1720             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1721                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1722             break;
1723         default:
1724             g_assert_not_reached();
1725         }
1726         break;
1727     }
1728     return buf;
1729 }
1730 
1731 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1732                              int buf_size, TCGArg arg)
1733 {
1734     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1735 }
1736 
1737 static const char * const cond_name[] =
1738 {
1739     [TCG_COND_NEVER] = "never",
1740     [TCG_COND_ALWAYS] = "always",
1741     [TCG_COND_EQ] = "eq",
1742     [TCG_COND_NE] = "ne",
1743     [TCG_COND_LT] = "lt",
1744     [TCG_COND_GE] = "ge",
1745     [TCG_COND_LE] = "le",
1746     [TCG_COND_GT] = "gt",
1747     [TCG_COND_LTU] = "ltu",
1748     [TCG_COND_GEU] = "geu",
1749     [TCG_COND_LEU] = "leu",
1750     [TCG_COND_GTU] = "gtu"
1751 };
1752 
1753 static const char * const ldst_name[] =
1754 {
1755     [MO_UB]   = "ub",
1756     [MO_SB]   = "sb",
1757     [MO_LEUW] = "leuw",
1758     [MO_LESW] = "lesw",
1759     [MO_LEUL] = "leul",
1760     [MO_LESL] = "lesl",
1761     [MO_LEUQ] = "leq",
1762     [MO_BEUW] = "beuw",
1763     [MO_BESW] = "besw",
1764     [MO_BEUL] = "beul",
1765     [MO_BESL] = "besl",
1766     [MO_BEUQ] = "beq",
1767 };
1768 
1769 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1770 #ifdef TARGET_ALIGNED_ONLY
1771     [MO_UNALN >> MO_ASHIFT]    = "un+",
1772     [MO_ALIGN >> MO_ASHIFT]    = "",
1773 #else
1774     [MO_UNALN >> MO_ASHIFT]    = "",
1775     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1776 #endif
1777     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1778     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1779     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1780     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1781     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1782     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1783 };
1784 
1785 static const char bswap_flag_name[][6] = {
1786     [TCG_BSWAP_IZ] = "iz",
1787     [TCG_BSWAP_OZ] = "oz",
1788     [TCG_BSWAP_OS] = "os",
1789     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1790     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1791 };
1792 
1793 static inline bool tcg_regset_single(TCGRegSet d)
1794 {
1795     return (d & (d - 1)) == 0;
1796 }
1797 
1798 static inline TCGReg tcg_regset_first(TCGRegSet d)
1799 {
1800     if (TCG_TARGET_NB_REGS <= 32) {
1801         return ctz32(d);
1802     } else {
1803         return ctz64(d);
1804     }
1805 }
1806 
1807 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1808 {
1809     char buf[128];
1810     TCGOp *op;
1811 
1812     QTAILQ_FOREACH(op, &s->ops, link) {
1813         int i, k, nb_oargs, nb_iargs, nb_cargs;
1814         const TCGOpDef *def;
1815         TCGOpcode c;
1816         int col = 0;
1817 
1818         c = op->opc;
1819         def = &tcg_op_defs[c];
1820 
1821         if (c == INDEX_op_insn_start) {
1822             nb_oargs = 0;
1823             col += qemu_log("\n ----");
1824 
1825             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1826                 target_ulong a;
1827 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1828                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1829 #else
1830                 a = op->args[i];
1831 #endif
1832                 col += qemu_log(" " TARGET_FMT_lx, a);
1833             }
1834         } else if (c == INDEX_op_call) {
1835             const TCGHelperInfo *info = tcg_call_info(op);
1836             void *func = tcg_call_func(op);
1837 
1838             /* variable number of arguments */
1839             nb_oargs = TCGOP_CALLO(op);
1840             nb_iargs = TCGOP_CALLI(op);
1841             nb_cargs = def->nb_cargs;
1842 
1843             col += qemu_log(" %s ", def->name);
1844 
1845             /*
1846              * Print the function name from TCGHelperInfo, if available.
1847              * Note that plugins have a template function for the info,
1848              * but the actual function pointer comes from the plugin.
1849              */
1850             if (func == info->func) {
1851                 col += qemu_log("%s", info->name);
1852             } else {
1853                 col += qemu_log("plugin(%p)", func);
1854             }
1855 
1856             col += qemu_log(",$0x%x,$%d", info->flags, nb_oargs);
1857             for (i = 0; i < nb_oargs; i++) {
1858                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1859                                                        op->args[i]));
1860             }
1861             for (i = 0; i < nb_iargs; i++) {
1862                 TCGArg arg = op->args[nb_oargs + i];
1863                 const char *t = "<dummy>";
1864                 if (arg != TCG_CALL_DUMMY_ARG) {
1865                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1866                 }
1867                 col += qemu_log(",%s", t);
1868             }
1869         } else {
1870             col += qemu_log(" %s ", def->name);
1871 
1872             nb_oargs = def->nb_oargs;
1873             nb_iargs = def->nb_iargs;
1874             nb_cargs = def->nb_cargs;
1875 
1876             if (def->flags & TCG_OPF_VECTOR) {
1877                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1878                                 8 << TCGOP_VECE(op));
1879             }
1880 
1881             k = 0;
1882             for (i = 0; i < nb_oargs; i++) {
1883                 if (k != 0) {
1884                     col += qemu_log(",");
1885                 }
1886                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1887                                                       op->args[k++]));
1888             }
1889             for (i = 0; i < nb_iargs; i++) {
1890                 if (k != 0) {
1891                     col += qemu_log(",");
1892                 }
1893                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1894                                                       op->args[k++]));
1895             }
1896             switch (c) {
1897             case INDEX_op_brcond_i32:
1898             case INDEX_op_setcond_i32:
1899             case INDEX_op_movcond_i32:
1900             case INDEX_op_brcond2_i32:
1901             case INDEX_op_setcond2_i32:
1902             case INDEX_op_brcond_i64:
1903             case INDEX_op_setcond_i64:
1904             case INDEX_op_movcond_i64:
1905             case INDEX_op_cmp_vec:
1906             case INDEX_op_cmpsel_vec:
1907                 if (op->args[k] < ARRAY_SIZE(cond_name)
1908                     && cond_name[op->args[k]]) {
1909                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1910                 } else {
1911                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1912                 }
1913                 i = 1;
1914                 break;
1915             case INDEX_op_qemu_ld_i32:
1916             case INDEX_op_qemu_st_i32:
1917             case INDEX_op_qemu_st8_i32:
1918             case INDEX_op_qemu_ld_i64:
1919             case INDEX_op_qemu_st_i64:
1920                 {
1921                     MemOpIdx oi = op->args[k++];
1922                     MemOp op = get_memop(oi);
1923                     unsigned ix = get_mmuidx(oi);
1924 
1925                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1926                         col += qemu_log(",$0x%x,%u", op, ix);
1927                     } else {
1928                         const char *s_al, *s_op;
1929                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1930                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1931                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
1932                     }
1933                     i = 1;
1934                 }
1935                 break;
1936             case INDEX_op_bswap16_i32:
1937             case INDEX_op_bswap16_i64:
1938             case INDEX_op_bswap32_i32:
1939             case INDEX_op_bswap32_i64:
1940             case INDEX_op_bswap64_i64:
1941                 {
1942                     TCGArg flags = op->args[k];
1943                     const char *name = NULL;
1944 
1945                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1946                         name = bswap_flag_name[flags];
1947                     }
1948                     if (name) {
1949                         col += qemu_log(",%s", name);
1950                     } else {
1951                         col += qemu_log(",$0x%" TCG_PRIlx, flags);
1952                     }
1953                     i = k = 1;
1954                 }
1955                 break;
1956             default:
1957                 i = 0;
1958                 break;
1959             }
1960             switch (c) {
1961             case INDEX_op_set_label:
1962             case INDEX_op_br:
1963             case INDEX_op_brcond_i32:
1964             case INDEX_op_brcond_i64:
1965             case INDEX_op_brcond2_i32:
1966                 col += qemu_log("%s$L%d", k ? "," : "",
1967                                 arg_label(op->args[k])->id);
1968                 i++, k++;
1969                 break;
1970             default:
1971                 break;
1972             }
1973             for (; i < nb_cargs; i++, k++) {
1974                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
1975             }
1976         }
1977 
1978         if (have_prefs || op->life) {
1979 
1980             QemuLogFile *logfile;
1981 
1982             rcu_read_lock();
1983             logfile = qatomic_rcu_read(&qemu_logfile);
1984             if (logfile) {
1985                 for (; col < 40; ++col) {
1986                     putc(' ', logfile->fd);
1987                 }
1988             }
1989             rcu_read_unlock();
1990         }
1991 
1992         if (op->life) {
1993             unsigned life = op->life;
1994 
1995             if (life & (SYNC_ARG * 3)) {
1996                 qemu_log("  sync:");
1997                 for (i = 0; i < 2; ++i) {
1998                     if (life & (SYNC_ARG << i)) {
1999                         qemu_log(" %d", i);
2000                     }
2001                 }
2002             }
2003             life /= DEAD_ARG;
2004             if (life) {
2005                 qemu_log("  dead:");
2006                 for (i = 0; life; ++i, life >>= 1) {
2007                     if (life & 1) {
2008                         qemu_log(" %d", i);
2009                     }
2010                 }
2011             }
2012         }
2013 
2014         if (have_prefs) {
2015             for (i = 0; i < nb_oargs; ++i) {
2016                 TCGRegSet set = op->output_pref[i];
2017 
2018                 if (i == 0) {
2019                     qemu_log("  pref=");
2020                 } else {
2021                     qemu_log(",");
2022                 }
2023                 if (set == 0) {
2024                     qemu_log("none");
2025                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2026                     qemu_log("all");
2027 #ifdef CONFIG_DEBUG_TCG
2028                 } else if (tcg_regset_single(set)) {
2029                     TCGReg reg = tcg_regset_first(set);
2030                     qemu_log("%s", tcg_target_reg_names[reg]);
2031 #endif
2032                 } else if (TCG_TARGET_NB_REGS <= 32) {
2033                     qemu_log("%#x", (uint32_t)set);
2034                 } else {
2035                     qemu_log("%#" PRIx64, (uint64_t)set);
2036                 }
2037             }
2038         }
2039 
2040         qemu_log("\n");
2041     }
2042 }
2043 
2044 /* we give more priority to constraints with less registers */
2045 static int get_constraint_priority(const TCGOpDef *def, int k)
2046 {
2047     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2048     int n;
2049 
2050     if (arg_ct->oalias) {
2051         /* an alias is equivalent to a single register */
2052         n = 1;
2053     } else {
2054         n = ctpop64(arg_ct->regs);
2055     }
2056     return TCG_TARGET_NB_REGS - n + 1;
2057 }
2058 
2059 /* sort from highest priority to lowest */
2060 static void sort_constraints(TCGOpDef *def, int start, int n)
2061 {
2062     int i, j;
2063     TCGArgConstraint *a = def->args_ct;
2064 
2065     for (i = 0; i < n; i++) {
2066         a[start + i].sort_index = start + i;
2067     }
2068     if (n <= 1) {
2069         return;
2070     }
2071     for (i = 0; i < n - 1; i++) {
2072         for (j = i + 1; j < n; j++) {
2073             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2074             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2075             if (p1 < p2) {
2076                 int tmp = a[start + i].sort_index;
2077                 a[start + i].sort_index = a[start + j].sort_index;
2078                 a[start + j].sort_index = tmp;
2079             }
2080         }
2081     }
2082 }
2083 
2084 static void process_op_defs(TCGContext *s)
2085 {
2086     TCGOpcode op;
2087 
2088     for (op = 0; op < NB_OPS; op++) {
2089         TCGOpDef *def = &tcg_op_defs[op];
2090         const TCGTargetOpDef *tdefs;
2091         int i, nb_args;
2092 
2093         if (def->flags & TCG_OPF_NOT_PRESENT) {
2094             continue;
2095         }
2096 
2097         nb_args = def->nb_iargs + def->nb_oargs;
2098         if (nb_args == 0) {
2099             continue;
2100         }
2101 
2102         /*
2103          * Macro magic should make it impossible, but double-check that
2104          * the array index is in range.  Since the signness of an enum
2105          * is implementation defined, force the result to unsigned.
2106          */
2107         unsigned con_set = tcg_target_op_def(op);
2108         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2109         tdefs = &constraint_sets[con_set];
2110 
2111         for (i = 0; i < nb_args; i++) {
2112             const char *ct_str = tdefs->args_ct_str[i];
2113             /* Incomplete TCGTargetOpDef entry. */
2114             tcg_debug_assert(ct_str != NULL);
2115 
2116             while (*ct_str != '\0') {
2117                 switch(*ct_str) {
2118                 case '0' ... '9':
2119                     {
2120                         int oarg = *ct_str - '0';
2121                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2122                         tcg_debug_assert(oarg < def->nb_oargs);
2123                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2124                         def->args_ct[i] = def->args_ct[oarg];
2125                         /* The output sets oalias.  */
2126                         def->args_ct[oarg].oalias = true;
2127                         def->args_ct[oarg].alias_index = i;
2128                         /* The input sets ialias. */
2129                         def->args_ct[i].ialias = true;
2130                         def->args_ct[i].alias_index = oarg;
2131                     }
2132                     ct_str++;
2133                     break;
2134                 case '&':
2135                     def->args_ct[i].newreg = true;
2136                     ct_str++;
2137                     break;
2138                 case 'i':
2139                     def->args_ct[i].ct |= TCG_CT_CONST;
2140                     ct_str++;
2141                     break;
2142 
2143                 /* Include all of the target-specific constraints. */
2144 
2145 #undef CONST
2146 #define CONST(CASE, MASK) \
2147     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2148 #define REGS(CASE, MASK) \
2149     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2150 
2151 #include "tcg-target-con-str.h"
2152 
2153 #undef REGS
2154 #undef CONST
2155                 default:
2156                     /* Typo in TCGTargetOpDef constraint. */
2157                     g_assert_not_reached();
2158                 }
2159             }
2160         }
2161 
2162         /* TCGTargetOpDef entry with too much information? */
2163         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2164 
2165         /* sort the constraints (XXX: this is just an heuristic) */
2166         sort_constraints(def, 0, def->nb_oargs);
2167         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2168     }
2169 }
2170 
2171 void tcg_op_remove(TCGContext *s, TCGOp *op)
2172 {
2173     TCGLabel *label;
2174 
2175     switch (op->opc) {
2176     case INDEX_op_br:
2177         label = arg_label(op->args[0]);
2178         label->refs--;
2179         break;
2180     case INDEX_op_brcond_i32:
2181     case INDEX_op_brcond_i64:
2182         label = arg_label(op->args[3]);
2183         label->refs--;
2184         break;
2185     case INDEX_op_brcond2_i32:
2186         label = arg_label(op->args[5]);
2187         label->refs--;
2188         break;
2189     default:
2190         break;
2191     }
2192 
2193     QTAILQ_REMOVE(&s->ops, op, link);
2194     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2195     s->nb_ops--;
2196 
2197 #ifdef CONFIG_PROFILER
2198     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2199 #endif
2200 }
2201 
2202 void tcg_remove_ops_after(TCGOp *op)
2203 {
2204     TCGContext *s = tcg_ctx;
2205 
2206     while (true) {
2207         TCGOp *last = tcg_last_op();
2208         if (last == op) {
2209             return;
2210         }
2211         tcg_op_remove(s, last);
2212     }
2213 }
2214 
2215 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2216 {
2217     TCGContext *s = tcg_ctx;
2218     TCGOp *op;
2219 
2220     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2221         op = tcg_malloc(sizeof(TCGOp));
2222     } else {
2223         op = QTAILQ_FIRST(&s->free_ops);
2224         QTAILQ_REMOVE(&s->free_ops, op, link);
2225     }
2226     memset(op, 0, offsetof(TCGOp, link));
2227     op->opc = opc;
2228     s->nb_ops++;
2229 
2230     return op;
2231 }
2232 
2233 TCGOp *tcg_emit_op(TCGOpcode opc)
2234 {
2235     TCGOp *op = tcg_op_alloc(opc);
2236     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2237     return op;
2238 }
2239 
2240 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2241 {
2242     TCGOp *new_op = tcg_op_alloc(opc);
2243     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2244     return new_op;
2245 }
2246 
2247 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2248 {
2249     TCGOp *new_op = tcg_op_alloc(opc);
2250     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2251     return new_op;
2252 }
2253 
2254 /* Reachable analysis : remove unreachable code.  */
2255 static void reachable_code_pass(TCGContext *s)
2256 {
2257     TCGOp *op, *op_next;
2258     bool dead = false;
2259 
2260     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2261         bool remove = dead;
2262         TCGLabel *label;
2263 
2264         switch (op->opc) {
2265         case INDEX_op_set_label:
2266             label = arg_label(op->args[0]);
2267             if (label->refs == 0) {
2268                 /*
2269                  * While there is an occasional backward branch, virtually
2270                  * all branches generated by the translators are forward.
2271                  * Which means that generally we will have already removed
2272                  * all references to the label that will be, and there is
2273                  * little to be gained by iterating.
2274                  */
2275                 remove = true;
2276             } else {
2277                 /* Once we see a label, insns become live again.  */
2278                 dead = false;
2279                 remove = false;
2280 
2281                 /*
2282                  * Optimization can fold conditional branches to unconditional.
2283                  * If we find a label with one reference which is preceded by
2284                  * an unconditional branch to it, remove both.  This needed to
2285                  * wait until the dead code in between them was removed.
2286                  */
2287                 if (label->refs == 1) {
2288                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2289                     if (op_prev->opc == INDEX_op_br &&
2290                         label == arg_label(op_prev->args[0])) {
2291                         tcg_op_remove(s, op_prev);
2292                         remove = true;
2293                     }
2294                 }
2295             }
2296             break;
2297 
2298         case INDEX_op_br:
2299         case INDEX_op_exit_tb:
2300         case INDEX_op_goto_ptr:
2301             /* Unconditional branches; everything following is dead.  */
2302             dead = true;
2303             break;
2304 
2305         case INDEX_op_call:
2306             /* Notice noreturn helper calls, raising exceptions.  */
2307             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2308                 dead = true;
2309             }
2310             break;
2311 
2312         case INDEX_op_insn_start:
2313             /* Never remove -- we need to keep these for unwind.  */
2314             remove = false;
2315             break;
2316 
2317         default:
2318             break;
2319         }
2320 
2321         if (remove) {
2322             tcg_op_remove(s, op);
2323         }
2324     }
2325 }
2326 
2327 #define TS_DEAD  1
2328 #define TS_MEM   2
2329 
2330 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2331 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2332 
2333 /* For liveness_pass_1, the register preferences for a given temp.  */
2334 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2335 {
2336     return ts->state_ptr;
2337 }
2338 
2339 /* For liveness_pass_1, reset the preferences for a given temp to the
2340  * maximal regset for its type.
2341  */
2342 static inline void la_reset_pref(TCGTemp *ts)
2343 {
2344     *la_temp_pref(ts)
2345         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2346 }
2347 
2348 /* liveness analysis: end of function: all temps are dead, and globals
2349    should be in memory. */
2350 static void la_func_end(TCGContext *s, int ng, int nt)
2351 {
2352     int i;
2353 
2354     for (i = 0; i < ng; ++i) {
2355         s->temps[i].state = TS_DEAD | TS_MEM;
2356         la_reset_pref(&s->temps[i]);
2357     }
2358     for (i = ng; i < nt; ++i) {
2359         s->temps[i].state = TS_DEAD;
2360         la_reset_pref(&s->temps[i]);
2361     }
2362 }
2363 
2364 /* liveness analysis: end of basic block: all temps are dead, globals
2365    and local temps should be in memory. */
2366 static void la_bb_end(TCGContext *s, int ng, int nt)
2367 {
2368     int i;
2369 
2370     for (i = 0; i < nt; ++i) {
2371         TCGTemp *ts = &s->temps[i];
2372         int state;
2373 
2374         switch (ts->kind) {
2375         case TEMP_FIXED:
2376         case TEMP_GLOBAL:
2377         case TEMP_LOCAL:
2378             state = TS_DEAD | TS_MEM;
2379             break;
2380         case TEMP_NORMAL:
2381         case TEMP_CONST:
2382             state = TS_DEAD;
2383             break;
2384         default:
2385             g_assert_not_reached();
2386         }
2387         ts->state = state;
2388         la_reset_pref(ts);
2389     }
2390 }
2391 
2392 /* liveness analysis: sync globals back to memory.  */
2393 static void la_global_sync(TCGContext *s, int ng)
2394 {
2395     int i;
2396 
2397     for (i = 0; i < ng; ++i) {
2398         int state = s->temps[i].state;
2399         s->temps[i].state = state | TS_MEM;
2400         if (state == TS_DEAD) {
2401             /* If the global was previously dead, reset prefs.  */
2402             la_reset_pref(&s->temps[i]);
2403         }
2404     }
2405 }
2406 
2407 /*
2408  * liveness analysis: conditional branch: all temps are dead,
2409  * globals and local temps should be synced.
2410  */
2411 static void la_bb_sync(TCGContext *s, int ng, int nt)
2412 {
2413     la_global_sync(s, ng);
2414 
2415     for (int i = ng; i < nt; ++i) {
2416         TCGTemp *ts = &s->temps[i];
2417         int state;
2418 
2419         switch (ts->kind) {
2420         case TEMP_LOCAL:
2421             state = ts->state;
2422             ts->state = state | TS_MEM;
2423             if (state != TS_DEAD) {
2424                 continue;
2425             }
2426             break;
2427         case TEMP_NORMAL:
2428             s->temps[i].state = TS_DEAD;
2429             break;
2430         case TEMP_CONST:
2431             continue;
2432         default:
2433             g_assert_not_reached();
2434         }
2435         la_reset_pref(&s->temps[i]);
2436     }
2437 }
2438 
2439 /* liveness analysis: sync globals back to memory and kill.  */
2440 static void la_global_kill(TCGContext *s, int ng)
2441 {
2442     int i;
2443 
2444     for (i = 0; i < ng; i++) {
2445         s->temps[i].state = TS_DEAD | TS_MEM;
2446         la_reset_pref(&s->temps[i]);
2447     }
2448 }
2449 
2450 /* liveness analysis: note live globals crossing calls.  */
2451 static void la_cross_call(TCGContext *s, int nt)
2452 {
2453     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2454     int i;
2455 
2456     for (i = 0; i < nt; i++) {
2457         TCGTemp *ts = &s->temps[i];
2458         if (!(ts->state & TS_DEAD)) {
2459             TCGRegSet *pset = la_temp_pref(ts);
2460             TCGRegSet set = *pset;
2461 
2462             set &= mask;
2463             /* If the combination is not possible, restart.  */
2464             if (set == 0) {
2465                 set = tcg_target_available_regs[ts->type] & mask;
2466             }
2467             *pset = set;
2468         }
2469     }
2470 }
2471 
2472 /* Liveness analysis : update the opc_arg_life array to tell if a
2473    given input arguments is dead. Instructions updating dead
2474    temporaries are removed. */
2475 static void liveness_pass_1(TCGContext *s)
2476 {
2477     int nb_globals = s->nb_globals;
2478     int nb_temps = s->nb_temps;
2479     TCGOp *op, *op_prev;
2480     TCGRegSet *prefs;
2481     int i;
2482 
2483     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2484     for (i = 0; i < nb_temps; ++i) {
2485         s->temps[i].state_ptr = prefs + i;
2486     }
2487 
2488     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2489     la_func_end(s, nb_globals, nb_temps);
2490 
2491     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2492         int nb_iargs, nb_oargs;
2493         TCGOpcode opc_new, opc_new2;
2494         bool have_opc_new2;
2495         TCGLifeData arg_life = 0;
2496         TCGTemp *ts;
2497         TCGOpcode opc = op->opc;
2498         const TCGOpDef *def = &tcg_op_defs[opc];
2499 
2500         switch (opc) {
2501         case INDEX_op_call:
2502             {
2503                 int call_flags;
2504                 int nb_call_regs;
2505 
2506                 nb_oargs = TCGOP_CALLO(op);
2507                 nb_iargs = TCGOP_CALLI(op);
2508                 call_flags = tcg_call_flags(op);
2509 
2510                 /* pure functions can be removed if their result is unused */
2511                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2512                     for (i = 0; i < nb_oargs; i++) {
2513                         ts = arg_temp(op->args[i]);
2514                         if (ts->state != TS_DEAD) {
2515                             goto do_not_remove_call;
2516                         }
2517                     }
2518                     goto do_remove;
2519                 }
2520             do_not_remove_call:
2521 
2522                 /* Output args are dead.  */
2523                 for (i = 0; i < nb_oargs; i++) {
2524                     ts = arg_temp(op->args[i]);
2525                     if (ts->state & TS_DEAD) {
2526                         arg_life |= DEAD_ARG << i;
2527                     }
2528                     if (ts->state & TS_MEM) {
2529                         arg_life |= SYNC_ARG << i;
2530                     }
2531                     ts->state = TS_DEAD;
2532                     la_reset_pref(ts);
2533 
2534                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2535                     op->output_pref[i] = 0;
2536                 }
2537 
2538                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2539                                     TCG_CALL_NO_READ_GLOBALS))) {
2540                     la_global_kill(s, nb_globals);
2541                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2542                     la_global_sync(s, nb_globals);
2543                 }
2544 
2545                 /* Record arguments that die in this helper.  */
2546                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2547                     ts = arg_temp(op->args[i]);
2548                     if (ts && ts->state & TS_DEAD) {
2549                         arg_life |= DEAD_ARG << i;
2550                     }
2551                 }
2552 
2553                 /* For all live registers, remove call-clobbered prefs.  */
2554                 la_cross_call(s, nb_temps);
2555 
2556                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2557 
2558                 /* Input arguments are live for preceding opcodes.  */
2559                 for (i = 0; i < nb_iargs; i++) {
2560                     ts = arg_temp(op->args[i + nb_oargs]);
2561                     if (ts && ts->state & TS_DEAD) {
2562                         /* For those arguments that die, and will be allocated
2563                          * in registers, clear the register set for that arg,
2564                          * to be filled in below.  For args that will be on
2565                          * the stack, reset to any available reg.
2566                          */
2567                         *la_temp_pref(ts)
2568                             = (i < nb_call_regs ? 0 :
2569                                tcg_target_available_regs[ts->type]);
2570                         ts->state &= ~TS_DEAD;
2571                     }
2572                 }
2573 
2574                 /* For each input argument, add its input register to prefs.
2575                    If a temp is used once, this produces a single set bit.  */
2576                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2577                     ts = arg_temp(op->args[i + nb_oargs]);
2578                     if (ts) {
2579                         tcg_regset_set_reg(*la_temp_pref(ts),
2580                                            tcg_target_call_iarg_regs[i]);
2581                     }
2582                 }
2583             }
2584             break;
2585         case INDEX_op_insn_start:
2586             break;
2587         case INDEX_op_discard:
2588             /* mark the temporary as dead */
2589             ts = arg_temp(op->args[0]);
2590             ts->state = TS_DEAD;
2591             la_reset_pref(ts);
2592             break;
2593 
2594         case INDEX_op_add2_i32:
2595             opc_new = INDEX_op_add_i32;
2596             goto do_addsub2;
2597         case INDEX_op_sub2_i32:
2598             opc_new = INDEX_op_sub_i32;
2599             goto do_addsub2;
2600         case INDEX_op_add2_i64:
2601             opc_new = INDEX_op_add_i64;
2602             goto do_addsub2;
2603         case INDEX_op_sub2_i64:
2604             opc_new = INDEX_op_sub_i64;
2605         do_addsub2:
2606             nb_iargs = 4;
2607             nb_oargs = 2;
2608             /* Test if the high part of the operation is dead, but not
2609                the low part.  The result can be optimized to a simple
2610                add or sub.  This happens often for x86_64 guest when the
2611                cpu mode is set to 32 bit.  */
2612             if (arg_temp(op->args[1])->state == TS_DEAD) {
2613                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2614                     goto do_remove;
2615                 }
2616                 /* Replace the opcode and adjust the args in place,
2617                    leaving 3 unused args at the end.  */
2618                 op->opc = opc = opc_new;
2619                 op->args[1] = op->args[2];
2620                 op->args[2] = op->args[4];
2621                 /* Fall through and mark the single-word operation live.  */
2622                 nb_iargs = 2;
2623                 nb_oargs = 1;
2624             }
2625             goto do_not_remove;
2626 
2627         case INDEX_op_mulu2_i32:
2628             opc_new = INDEX_op_mul_i32;
2629             opc_new2 = INDEX_op_muluh_i32;
2630             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2631             goto do_mul2;
2632         case INDEX_op_muls2_i32:
2633             opc_new = INDEX_op_mul_i32;
2634             opc_new2 = INDEX_op_mulsh_i32;
2635             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2636             goto do_mul2;
2637         case INDEX_op_mulu2_i64:
2638             opc_new = INDEX_op_mul_i64;
2639             opc_new2 = INDEX_op_muluh_i64;
2640             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2641             goto do_mul2;
2642         case INDEX_op_muls2_i64:
2643             opc_new = INDEX_op_mul_i64;
2644             opc_new2 = INDEX_op_mulsh_i64;
2645             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2646             goto do_mul2;
2647         do_mul2:
2648             nb_iargs = 2;
2649             nb_oargs = 2;
2650             if (arg_temp(op->args[1])->state == TS_DEAD) {
2651                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2652                     /* Both parts of the operation are dead.  */
2653                     goto do_remove;
2654                 }
2655                 /* The high part of the operation is dead; generate the low. */
2656                 op->opc = opc = opc_new;
2657                 op->args[1] = op->args[2];
2658                 op->args[2] = op->args[3];
2659             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2660                 /* The low part of the operation is dead; generate the high. */
2661                 op->opc = opc = opc_new2;
2662                 op->args[0] = op->args[1];
2663                 op->args[1] = op->args[2];
2664                 op->args[2] = op->args[3];
2665             } else {
2666                 goto do_not_remove;
2667             }
2668             /* Mark the single-word operation live.  */
2669             nb_oargs = 1;
2670             goto do_not_remove;
2671 
2672         default:
2673             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2674             nb_iargs = def->nb_iargs;
2675             nb_oargs = def->nb_oargs;
2676 
2677             /* Test if the operation can be removed because all
2678                its outputs are dead. We assume that nb_oargs == 0
2679                implies side effects */
2680             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2681                 for (i = 0; i < nb_oargs; i++) {
2682                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2683                         goto do_not_remove;
2684                     }
2685                 }
2686                 goto do_remove;
2687             }
2688             goto do_not_remove;
2689 
2690         do_remove:
2691             tcg_op_remove(s, op);
2692             break;
2693 
2694         do_not_remove:
2695             for (i = 0; i < nb_oargs; i++) {
2696                 ts = arg_temp(op->args[i]);
2697 
2698                 /* Remember the preference of the uses that followed.  */
2699                 op->output_pref[i] = *la_temp_pref(ts);
2700 
2701                 /* Output args are dead.  */
2702                 if (ts->state & TS_DEAD) {
2703                     arg_life |= DEAD_ARG << i;
2704                 }
2705                 if (ts->state & TS_MEM) {
2706                     arg_life |= SYNC_ARG << i;
2707                 }
2708                 ts->state = TS_DEAD;
2709                 la_reset_pref(ts);
2710             }
2711 
2712             /* If end of basic block, update.  */
2713             if (def->flags & TCG_OPF_BB_EXIT) {
2714                 la_func_end(s, nb_globals, nb_temps);
2715             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2716                 la_bb_sync(s, nb_globals, nb_temps);
2717             } else if (def->flags & TCG_OPF_BB_END) {
2718                 la_bb_end(s, nb_globals, nb_temps);
2719             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2720                 la_global_sync(s, nb_globals);
2721                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2722                     la_cross_call(s, nb_temps);
2723                 }
2724             }
2725 
2726             /* Record arguments that die in this opcode.  */
2727             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2728                 ts = arg_temp(op->args[i]);
2729                 if (ts->state & TS_DEAD) {
2730                     arg_life |= DEAD_ARG << i;
2731                 }
2732             }
2733 
2734             /* Input arguments are live for preceding opcodes.  */
2735             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2736                 ts = arg_temp(op->args[i]);
2737                 if (ts->state & TS_DEAD) {
2738                     /* For operands that were dead, initially allow
2739                        all regs for the type.  */
2740                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2741                     ts->state &= ~TS_DEAD;
2742                 }
2743             }
2744 
2745             /* Incorporate constraints for this operand.  */
2746             switch (opc) {
2747             case INDEX_op_mov_i32:
2748             case INDEX_op_mov_i64:
2749                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2750                    have proper constraints.  That said, special case
2751                    moves to propagate preferences backward.  */
2752                 if (IS_DEAD_ARG(1)) {
2753                     *la_temp_pref(arg_temp(op->args[0]))
2754                         = *la_temp_pref(arg_temp(op->args[1]));
2755                 }
2756                 break;
2757 
2758             default:
2759                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2760                     const TCGArgConstraint *ct = &def->args_ct[i];
2761                     TCGRegSet set, *pset;
2762 
2763                     ts = arg_temp(op->args[i]);
2764                     pset = la_temp_pref(ts);
2765                     set = *pset;
2766 
2767                     set &= ct->regs;
2768                     if (ct->ialias) {
2769                         set &= op->output_pref[ct->alias_index];
2770                     }
2771                     /* If the combination is not possible, restart.  */
2772                     if (set == 0) {
2773                         set = ct->regs;
2774                     }
2775                     *pset = set;
2776                 }
2777                 break;
2778             }
2779             break;
2780         }
2781         op->life = arg_life;
2782     }
2783 }
2784 
2785 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2786 static bool liveness_pass_2(TCGContext *s)
2787 {
2788     int nb_globals = s->nb_globals;
2789     int nb_temps, i;
2790     bool changes = false;
2791     TCGOp *op, *op_next;
2792 
2793     /* Create a temporary for each indirect global.  */
2794     for (i = 0; i < nb_globals; ++i) {
2795         TCGTemp *its = &s->temps[i];
2796         if (its->indirect_reg) {
2797             TCGTemp *dts = tcg_temp_alloc(s);
2798             dts->type = its->type;
2799             dts->base_type = its->base_type;
2800             its->state_ptr = dts;
2801         } else {
2802             its->state_ptr = NULL;
2803         }
2804         /* All globals begin dead.  */
2805         its->state = TS_DEAD;
2806     }
2807     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2808         TCGTemp *its = &s->temps[i];
2809         its->state_ptr = NULL;
2810         its->state = TS_DEAD;
2811     }
2812 
2813     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2814         TCGOpcode opc = op->opc;
2815         const TCGOpDef *def = &tcg_op_defs[opc];
2816         TCGLifeData arg_life = op->life;
2817         int nb_iargs, nb_oargs, call_flags;
2818         TCGTemp *arg_ts, *dir_ts;
2819 
2820         if (opc == INDEX_op_call) {
2821             nb_oargs = TCGOP_CALLO(op);
2822             nb_iargs = TCGOP_CALLI(op);
2823             call_flags = tcg_call_flags(op);
2824         } else {
2825             nb_iargs = def->nb_iargs;
2826             nb_oargs = def->nb_oargs;
2827 
2828             /* Set flags similar to how calls require.  */
2829             if (def->flags & TCG_OPF_COND_BRANCH) {
2830                 /* Like reading globals: sync_globals */
2831                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2832             } else if (def->flags & TCG_OPF_BB_END) {
2833                 /* Like writing globals: save_globals */
2834                 call_flags = 0;
2835             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2836                 /* Like reading globals: sync_globals */
2837                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2838             } else {
2839                 /* No effect on globals.  */
2840                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2841                               TCG_CALL_NO_WRITE_GLOBALS);
2842             }
2843         }
2844 
2845         /* Make sure that input arguments are available.  */
2846         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2847             arg_ts = arg_temp(op->args[i]);
2848             if (arg_ts) {
2849                 dir_ts = arg_ts->state_ptr;
2850                 if (dir_ts && arg_ts->state == TS_DEAD) {
2851                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2852                                       ? INDEX_op_ld_i32
2853                                       : INDEX_op_ld_i64);
2854                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2855 
2856                     lop->args[0] = temp_arg(dir_ts);
2857                     lop->args[1] = temp_arg(arg_ts->mem_base);
2858                     lop->args[2] = arg_ts->mem_offset;
2859 
2860                     /* Loaded, but synced with memory.  */
2861                     arg_ts->state = TS_MEM;
2862                 }
2863             }
2864         }
2865 
2866         /* Perform input replacement, and mark inputs that became dead.
2867            No action is required except keeping temp_state up to date
2868            so that we reload when needed.  */
2869         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2870             arg_ts = arg_temp(op->args[i]);
2871             if (arg_ts) {
2872                 dir_ts = arg_ts->state_ptr;
2873                 if (dir_ts) {
2874                     op->args[i] = temp_arg(dir_ts);
2875                     changes = true;
2876                     if (IS_DEAD_ARG(i)) {
2877                         arg_ts->state = TS_DEAD;
2878                     }
2879                 }
2880             }
2881         }
2882 
2883         /* Liveness analysis should ensure that the following are
2884            all correct, for call sites and basic block end points.  */
2885         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2886             /* Nothing to do */
2887         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2888             for (i = 0; i < nb_globals; ++i) {
2889                 /* Liveness should see that globals are synced back,
2890                    that is, either TS_DEAD or TS_MEM.  */
2891                 arg_ts = &s->temps[i];
2892                 tcg_debug_assert(arg_ts->state_ptr == 0
2893                                  || arg_ts->state != 0);
2894             }
2895         } else {
2896             for (i = 0; i < nb_globals; ++i) {
2897                 /* Liveness should see that globals are saved back,
2898                    that is, TS_DEAD, waiting to be reloaded.  */
2899                 arg_ts = &s->temps[i];
2900                 tcg_debug_assert(arg_ts->state_ptr == 0
2901                                  || arg_ts->state == TS_DEAD);
2902             }
2903         }
2904 
2905         /* Outputs become available.  */
2906         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2907             arg_ts = arg_temp(op->args[0]);
2908             dir_ts = arg_ts->state_ptr;
2909             if (dir_ts) {
2910                 op->args[0] = temp_arg(dir_ts);
2911                 changes = true;
2912 
2913                 /* The output is now live and modified.  */
2914                 arg_ts->state = 0;
2915 
2916                 if (NEED_SYNC_ARG(0)) {
2917                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2918                                       ? INDEX_op_st_i32
2919                                       : INDEX_op_st_i64);
2920                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2921                     TCGTemp *out_ts = dir_ts;
2922 
2923                     if (IS_DEAD_ARG(0)) {
2924                         out_ts = arg_temp(op->args[1]);
2925                         arg_ts->state = TS_DEAD;
2926                         tcg_op_remove(s, op);
2927                     } else {
2928                         arg_ts->state = TS_MEM;
2929                     }
2930 
2931                     sop->args[0] = temp_arg(out_ts);
2932                     sop->args[1] = temp_arg(arg_ts->mem_base);
2933                     sop->args[2] = arg_ts->mem_offset;
2934                 } else {
2935                     tcg_debug_assert(!IS_DEAD_ARG(0));
2936                 }
2937             }
2938         } else {
2939             for (i = 0; i < nb_oargs; i++) {
2940                 arg_ts = arg_temp(op->args[i]);
2941                 dir_ts = arg_ts->state_ptr;
2942                 if (!dir_ts) {
2943                     continue;
2944                 }
2945                 op->args[i] = temp_arg(dir_ts);
2946                 changes = true;
2947 
2948                 /* The output is now live and modified.  */
2949                 arg_ts->state = 0;
2950 
2951                 /* Sync outputs upon their last write.  */
2952                 if (NEED_SYNC_ARG(i)) {
2953                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2954                                       ? INDEX_op_st_i32
2955                                       : INDEX_op_st_i64);
2956                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2957 
2958                     sop->args[0] = temp_arg(dir_ts);
2959                     sop->args[1] = temp_arg(arg_ts->mem_base);
2960                     sop->args[2] = arg_ts->mem_offset;
2961 
2962                     arg_ts->state = TS_MEM;
2963                 }
2964                 /* Drop outputs that are dead.  */
2965                 if (IS_DEAD_ARG(i)) {
2966                     arg_ts->state = TS_DEAD;
2967                 }
2968             }
2969         }
2970     }
2971 
2972     return changes;
2973 }
2974 
2975 #ifdef CONFIG_DEBUG_TCG
2976 static void dump_regs(TCGContext *s)
2977 {
2978     TCGTemp *ts;
2979     int i;
2980     char buf[64];
2981 
2982     for(i = 0; i < s->nb_temps; i++) {
2983         ts = &s->temps[i];
2984         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2985         switch(ts->val_type) {
2986         case TEMP_VAL_REG:
2987             printf("%s", tcg_target_reg_names[ts->reg]);
2988             break;
2989         case TEMP_VAL_MEM:
2990             printf("%d(%s)", (int)ts->mem_offset,
2991                    tcg_target_reg_names[ts->mem_base->reg]);
2992             break;
2993         case TEMP_VAL_CONST:
2994             printf("$0x%" PRIx64, ts->val);
2995             break;
2996         case TEMP_VAL_DEAD:
2997             printf("D");
2998             break;
2999         default:
3000             printf("???");
3001             break;
3002         }
3003         printf("\n");
3004     }
3005 
3006     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3007         if (s->reg_to_temp[i] != NULL) {
3008             printf("%s: %s\n",
3009                    tcg_target_reg_names[i],
3010                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3011         }
3012     }
3013 }
3014 
3015 static void check_regs(TCGContext *s)
3016 {
3017     int reg;
3018     int k;
3019     TCGTemp *ts;
3020     char buf[64];
3021 
3022     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3023         ts = s->reg_to_temp[reg];
3024         if (ts != NULL) {
3025             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3026                 printf("Inconsistency for register %s:\n",
3027                        tcg_target_reg_names[reg]);
3028                 goto fail;
3029             }
3030         }
3031     }
3032     for (k = 0; k < s->nb_temps; k++) {
3033         ts = &s->temps[k];
3034         if (ts->val_type == TEMP_VAL_REG
3035             && ts->kind != TEMP_FIXED
3036             && s->reg_to_temp[ts->reg] != ts) {
3037             printf("Inconsistency for temp %s:\n",
3038                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3039         fail:
3040             printf("reg state:\n");
3041             dump_regs(s);
3042             tcg_abort();
3043         }
3044     }
3045 }
3046 #endif
3047 
3048 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3049 {
3050     intptr_t off, size, align;
3051 
3052     switch (ts->type) {
3053     case TCG_TYPE_I32:
3054         size = align = 4;
3055         break;
3056     case TCG_TYPE_I64:
3057     case TCG_TYPE_V64:
3058         size = align = 8;
3059         break;
3060     case TCG_TYPE_V128:
3061         size = align = 16;
3062         break;
3063     case TCG_TYPE_V256:
3064         /* Note that we do not require aligned storage for V256. */
3065         size = 32, align = 16;
3066         break;
3067     default:
3068         g_assert_not_reached();
3069     }
3070 
3071     /*
3072      * Assume the stack is sufficiently aligned.
3073      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3074      * and do not require 16 byte vector alignment.  This seems slightly
3075      * easier than fully parameterizing the above switch statement.
3076      */
3077     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3078     off = ROUND_UP(s->current_frame_offset, align);
3079 
3080     /* If we've exhausted the stack frame, restart with a smaller TB. */
3081     if (off + size > s->frame_end) {
3082         tcg_raise_tb_overflow(s);
3083     }
3084     s->current_frame_offset = off + size;
3085 
3086     ts->mem_offset = off;
3087 #if defined(__sparc__)
3088     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3089 #endif
3090     ts->mem_base = s->frame_temp;
3091     ts->mem_allocated = 1;
3092 }
3093 
3094 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3095 
3096 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3097    mark it free; otherwise mark it dead.  */
3098 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3099 {
3100     TCGTempVal new_type;
3101 
3102     switch (ts->kind) {
3103     case TEMP_FIXED:
3104         return;
3105     case TEMP_GLOBAL:
3106     case TEMP_LOCAL:
3107         new_type = TEMP_VAL_MEM;
3108         break;
3109     case TEMP_NORMAL:
3110         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3111         break;
3112     case TEMP_CONST:
3113         new_type = TEMP_VAL_CONST;
3114         break;
3115     default:
3116         g_assert_not_reached();
3117     }
3118     if (ts->val_type == TEMP_VAL_REG) {
3119         s->reg_to_temp[ts->reg] = NULL;
3120     }
3121     ts->val_type = new_type;
3122 }
3123 
3124 /* Mark a temporary as dead.  */
3125 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3126 {
3127     temp_free_or_dead(s, ts, 1);
3128 }
3129 
3130 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3131    registers needs to be allocated to store a constant.  If 'free_or_dead'
3132    is non-zero, subsequently release the temporary; if it is positive, the
3133    temp is dead; if it is negative, the temp is free.  */
3134 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3135                       TCGRegSet preferred_regs, int free_or_dead)
3136 {
3137     if (!temp_readonly(ts) && !ts->mem_coherent) {
3138         if (!ts->mem_allocated) {
3139             temp_allocate_frame(s, ts);
3140         }
3141         switch (ts->val_type) {
3142         case TEMP_VAL_CONST:
3143             /* If we're going to free the temp immediately, then we won't
3144                require it later in a register, so attempt to store the
3145                constant to memory directly.  */
3146             if (free_or_dead
3147                 && tcg_out_sti(s, ts->type, ts->val,
3148                                ts->mem_base->reg, ts->mem_offset)) {
3149                 break;
3150             }
3151             temp_load(s, ts, tcg_target_available_regs[ts->type],
3152                       allocated_regs, preferred_regs);
3153             /* fallthrough */
3154 
3155         case TEMP_VAL_REG:
3156             tcg_out_st(s, ts->type, ts->reg,
3157                        ts->mem_base->reg, ts->mem_offset);
3158             break;
3159 
3160         case TEMP_VAL_MEM:
3161             break;
3162 
3163         case TEMP_VAL_DEAD:
3164         default:
3165             tcg_abort();
3166         }
3167         ts->mem_coherent = 1;
3168     }
3169     if (free_or_dead) {
3170         temp_free_or_dead(s, ts, free_or_dead);
3171     }
3172 }
3173 
3174 /* free register 'reg' by spilling the corresponding temporary if necessary */
3175 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3176 {
3177     TCGTemp *ts = s->reg_to_temp[reg];
3178     if (ts != NULL) {
3179         temp_sync(s, ts, allocated_regs, 0, -1);
3180     }
3181 }
3182 
3183 /**
3184  * tcg_reg_alloc:
3185  * @required_regs: Set of registers in which we must allocate.
3186  * @allocated_regs: Set of registers which must be avoided.
3187  * @preferred_regs: Set of registers we should prefer.
3188  * @rev: True if we search the registers in "indirect" order.
3189  *
3190  * The allocated register must be in @required_regs & ~@allocated_regs,
3191  * but if we can put it in @preferred_regs we may save a move later.
3192  */
3193 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3194                             TCGRegSet allocated_regs,
3195                             TCGRegSet preferred_regs, bool rev)
3196 {
3197     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3198     TCGRegSet reg_ct[2];
3199     const int *order;
3200 
3201     reg_ct[1] = required_regs & ~allocated_regs;
3202     tcg_debug_assert(reg_ct[1] != 0);
3203     reg_ct[0] = reg_ct[1] & preferred_regs;
3204 
3205     /* Skip the preferred_regs option if it cannot be satisfied,
3206        or if the preference made no difference.  */
3207     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3208 
3209     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3210 
3211     /* Try free registers, preferences first.  */
3212     for (j = f; j < 2; j++) {
3213         TCGRegSet set = reg_ct[j];
3214 
3215         if (tcg_regset_single(set)) {
3216             /* One register in the set.  */
3217             TCGReg reg = tcg_regset_first(set);
3218             if (s->reg_to_temp[reg] == NULL) {
3219                 return reg;
3220             }
3221         } else {
3222             for (i = 0; i < n; i++) {
3223                 TCGReg reg = order[i];
3224                 if (s->reg_to_temp[reg] == NULL &&
3225                     tcg_regset_test_reg(set, reg)) {
3226                     return reg;
3227                 }
3228             }
3229         }
3230     }
3231 
3232     /* We must spill something.  */
3233     for (j = f; j < 2; j++) {
3234         TCGRegSet set = reg_ct[j];
3235 
3236         if (tcg_regset_single(set)) {
3237             /* One register in the set.  */
3238             TCGReg reg = tcg_regset_first(set);
3239             tcg_reg_free(s, reg, allocated_regs);
3240             return reg;
3241         } else {
3242             for (i = 0; i < n; i++) {
3243                 TCGReg reg = order[i];
3244                 if (tcg_regset_test_reg(set, reg)) {
3245                     tcg_reg_free(s, reg, allocated_regs);
3246                     return reg;
3247                 }
3248             }
3249         }
3250     }
3251 
3252     tcg_abort();
3253 }
3254 
3255 /* Make sure the temporary is in a register.  If needed, allocate the register
3256    from DESIRED while avoiding ALLOCATED.  */
3257 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3258                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3259 {
3260     TCGReg reg;
3261 
3262     switch (ts->val_type) {
3263     case TEMP_VAL_REG:
3264         return;
3265     case TEMP_VAL_CONST:
3266         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3267                             preferred_regs, ts->indirect_base);
3268         if (ts->type <= TCG_TYPE_I64) {
3269             tcg_out_movi(s, ts->type, reg, ts->val);
3270         } else {
3271             uint64_t val = ts->val;
3272             MemOp vece = MO_64;
3273 
3274             /*
3275              * Find the minimal vector element that matches the constant.
3276              * The targets will, in general, have to do this search anyway,
3277              * do this generically.
3278              */
3279             if (val == dup_const(MO_8, val)) {
3280                 vece = MO_8;
3281             } else if (val == dup_const(MO_16, val)) {
3282                 vece = MO_16;
3283             } else if (val == dup_const(MO_32, val)) {
3284                 vece = MO_32;
3285             }
3286 
3287             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3288         }
3289         ts->mem_coherent = 0;
3290         break;
3291     case TEMP_VAL_MEM:
3292         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3293                             preferred_regs, ts->indirect_base);
3294         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3295         ts->mem_coherent = 1;
3296         break;
3297     case TEMP_VAL_DEAD:
3298     default:
3299         tcg_abort();
3300     }
3301     ts->reg = reg;
3302     ts->val_type = TEMP_VAL_REG;
3303     s->reg_to_temp[reg] = ts;
3304 }
3305 
3306 /* Save a temporary to memory. 'allocated_regs' is used in case a
3307    temporary registers needs to be allocated to store a constant.  */
3308 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3309 {
3310     /* The liveness analysis already ensures that globals are back
3311        in memory. Keep an tcg_debug_assert for safety. */
3312     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3313 }
3314 
3315 /* save globals to their canonical location and assume they can be
3316    modified be the following code. 'allocated_regs' is used in case a
3317    temporary registers needs to be allocated to store a constant. */
3318 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3319 {
3320     int i, n;
3321 
3322     for (i = 0, n = s->nb_globals; i < n; i++) {
3323         temp_save(s, &s->temps[i], allocated_regs);
3324     }
3325 }
3326 
3327 /* sync globals to their canonical location and assume they can be
3328    read by the following code. 'allocated_regs' is used in case a
3329    temporary registers needs to be allocated to store a constant. */
3330 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3331 {
3332     int i, n;
3333 
3334     for (i = 0, n = s->nb_globals; i < n; i++) {
3335         TCGTemp *ts = &s->temps[i];
3336         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3337                          || ts->kind == TEMP_FIXED
3338                          || ts->mem_coherent);
3339     }
3340 }
3341 
3342 /* at the end of a basic block, we assume all temporaries are dead and
3343    all globals are stored at their canonical location. */
3344 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3345 {
3346     int i;
3347 
3348     for (i = s->nb_globals; i < s->nb_temps; i++) {
3349         TCGTemp *ts = &s->temps[i];
3350 
3351         switch (ts->kind) {
3352         case TEMP_LOCAL:
3353             temp_save(s, ts, allocated_regs);
3354             break;
3355         case TEMP_NORMAL:
3356             /* The liveness analysis already ensures that temps are dead.
3357                Keep an tcg_debug_assert for safety. */
3358             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3359             break;
3360         case TEMP_CONST:
3361             /* Similarly, we should have freed any allocated register. */
3362             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3363             break;
3364         default:
3365             g_assert_not_reached();
3366         }
3367     }
3368 
3369     save_globals(s, allocated_regs);
3370 }
3371 
3372 /*
3373  * At a conditional branch, we assume all temporaries are dead and
3374  * all globals and local temps are synced to their location.
3375  */
3376 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3377 {
3378     sync_globals(s, allocated_regs);
3379 
3380     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3381         TCGTemp *ts = &s->temps[i];
3382         /*
3383          * The liveness analysis already ensures that temps are dead.
3384          * Keep tcg_debug_asserts for safety.
3385          */
3386         switch (ts->kind) {
3387         case TEMP_LOCAL:
3388             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3389             break;
3390         case TEMP_NORMAL:
3391             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3392             break;
3393         case TEMP_CONST:
3394             break;
3395         default:
3396             g_assert_not_reached();
3397         }
3398     }
3399 }
3400 
3401 /*
3402  * Specialized code generation for INDEX_op_mov_* with a constant.
3403  */
3404 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3405                                   tcg_target_ulong val, TCGLifeData arg_life,
3406                                   TCGRegSet preferred_regs)
3407 {
3408     /* ENV should not be modified.  */
3409     tcg_debug_assert(!temp_readonly(ots));
3410 
3411     /* The movi is not explicitly generated here.  */
3412     if (ots->val_type == TEMP_VAL_REG) {
3413         s->reg_to_temp[ots->reg] = NULL;
3414     }
3415     ots->val_type = TEMP_VAL_CONST;
3416     ots->val = val;
3417     ots->mem_coherent = 0;
3418     if (NEED_SYNC_ARG(0)) {
3419         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3420     } else if (IS_DEAD_ARG(0)) {
3421         temp_dead(s, ots);
3422     }
3423 }
3424 
3425 /*
3426  * Specialized code generation for INDEX_op_mov_*.
3427  */
3428 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3429 {
3430     const TCGLifeData arg_life = op->life;
3431     TCGRegSet allocated_regs, preferred_regs;
3432     TCGTemp *ts, *ots;
3433     TCGType otype, itype;
3434 
3435     allocated_regs = s->reserved_regs;
3436     preferred_regs = op->output_pref[0];
3437     ots = arg_temp(op->args[0]);
3438     ts = arg_temp(op->args[1]);
3439 
3440     /* ENV should not be modified.  */
3441     tcg_debug_assert(!temp_readonly(ots));
3442 
3443     /* Note that otype != itype for no-op truncation.  */
3444     otype = ots->type;
3445     itype = ts->type;
3446 
3447     if (ts->val_type == TEMP_VAL_CONST) {
3448         /* propagate constant or generate sti */
3449         tcg_target_ulong val = ts->val;
3450         if (IS_DEAD_ARG(1)) {
3451             temp_dead(s, ts);
3452         }
3453         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3454         return;
3455     }
3456 
3457     /* If the source value is in memory we're going to be forced
3458        to have it in a register in order to perform the copy.  Copy
3459        the SOURCE value into its own register first, that way we
3460        don't have to reload SOURCE the next time it is used. */
3461     if (ts->val_type == TEMP_VAL_MEM) {
3462         temp_load(s, ts, tcg_target_available_regs[itype],
3463                   allocated_regs, preferred_regs);
3464     }
3465 
3466     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3467     if (IS_DEAD_ARG(0)) {
3468         /* mov to a non-saved dead register makes no sense (even with
3469            liveness analysis disabled). */
3470         tcg_debug_assert(NEED_SYNC_ARG(0));
3471         if (!ots->mem_allocated) {
3472             temp_allocate_frame(s, ots);
3473         }
3474         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3475         if (IS_DEAD_ARG(1)) {
3476             temp_dead(s, ts);
3477         }
3478         temp_dead(s, ots);
3479     } else {
3480         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3481             /* the mov can be suppressed */
3482             if (ots->val_type == TEMP_VAL_REG) {
3483                 s->reg_to_temp[ots->reg] = NULL;
3484             }
3485             ots->reg = ts->reg;
3486             temp_dead(s, ts);
3487         } else {
3488             if (ots->val_type != TEMP_VAL_REG) {
3489                 /* When allocating a new register, make sure to not spill the
3490                    input one. */
3491                 tcg_regset_set_reg(allocated_regs, ts->reg);
3492                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3493                                          allocated_regs, preferred_regs,
3494                                          ots->indirect_base);
3495             }
3496             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3497                 /*
3498                  * Cross register class move not supported.
3499                  * Store the source register into the destination slot
3500                  * and leave the destination temp as TEMP_VAL_MEM.
3501                  */
3502                 assert(!temp_readonly(ots));
3503                 if (!ts->mem_allocated) {
3504                     temp_allocate_frame(s, ots);
3505                 }
3506                 tcg_out_st(s, ts->type, ts->reg,
3507                            ots->mem_base->reg, ots->mem_offset);
3508                 ots->mem_coherent = 1;
3509                 temp_free_or_dead(s, ots, -1);
3510                 return;
3511             }
3512         }
3513         ots->val_type = TEMP_VAL_REG;
3514         ots->mem_coherent = 0;
3515         s->reg_to_temp[ots->reg] = ots;
3516         if (NEED_SYNC_ARG(0)) {
3517             temp_sync(s, ots, allocated_regs, 0, 0);
3518         }
3519     }
3520 }
3521 
3522 /*
3523  * Specialized code generation for INDEX_op_dup_vec.
3524  */
3525 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3526 {
3527     const TCGLifeData arg_life = op->life;
3528     TCGRegSet dup_out_regs, dup_in_regs;
3529     TCGTemp *its, *ots;
3530     TCGType itype, vtype;
3531     intptr_t endian_fixup;
3532     unsigned vece;
3533     bool ok;
3534 
3535     ots = arg_temp(op->args[0]);
3536     its = arg_temp(op->args[1]);
3537 
3538     /* ENV should not be modified.  */
3539     tcg_debug_assert(!temp_readonly(ots));
3540 
3541     itype = its->type;
3542     vece = TCGOP_VECE(op);
3543     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3544 
3545     if (its->val_type == TEMP_VAL_CONST) {
3546         /* Propagate constant via movi -> dupi.  */
3547         tcg_target_ulong val = its->val;
3548         if (IS_DEAD_ARG(1)) {
3549             temp_dead(s, its);
3550         }
3551         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3552         return;
3553     }
3554 
3555     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3556     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3557 
3558     /* Allocate the output register now.  */
3559     if (ots->val_type != TEMP_VAL_REG) {
3560         TCGRegSet allocated_regs = s->reserved_regs;
3561 
3562         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3563             /* Make sure to not spill the input register. */
3564             tcg_regset_set_reg(allocated_regs, its->reg);
3565         }
3566         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3567                                  op->output_pref[0], ots->indirect_base);
3568         ots->val_type = TEMP_VAL_REG;
3569         ots->mem_coherent = 0;
3570         s->reg_to_temp[ots->reg] = ots;
3571     }
3572 
3573     switch (its->val_type) {
3574     case TEMP_VAL_REG:
3575         /*
3576          * The dup constriaints must be broad, covering all possible VECE.
3577          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3578          * to fail, indicating that extra moves are required for that case.
3579          */
3580         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3581             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3582                 goto done;
3583             }
3584             /* Try again from memory or a vector input register.  */
3585         }
3586         if (!its->mem_coherent) {
3587             /*
3588              * The input register is not synced, and so an extra store
3589              * would be required to use memory.  Attempt an integer-vector
3590              * register move first.  We do not have a TCGRegSet for this.
3591              */
3592             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3593                 break;
3594             }
3595             /* Sync the temp back to its slot and load from there.  */
3596             temp_sync(s, its, s->reserved_regs, 0, 0);
3597         }
3598         /* fall through */
3599 
3600     case TEMP_VAL_MEM:
3601 #ifdef HOST_WORDS_BIGENDIAN
3602         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3603         endian_fixup -= 1 << vece;
3604 #else
3605         endian_fixup = 0;
3606 #endif
3607         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3608                              its->mem_offset + endian_fixup)) {
3609             goto done;
3610         }
3611         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3612         break;
3613 
3614     default:
3615         g_assert_not_reached();
3616     }
3617 
3618     /* We now have a vector input register, so dup must succeed. */
3619     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3620     tcg_debug_assert(ok);
3621 
3622  done:
3623     if (IS_DEAD_ARG(1)) {
3624         temp_dead(s, its);
3625     }
3626     if (NEED_SYNC_ARG(0)) {
3627         temp_sync(s, ots, s->reserved_regs, 0, 0);
3628     }
3629     if (IS_DEAD_ARG(0)) {
3630         temp_dead(s, ots);
3631     }
3632 }
3633 
3634 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3635 {
3636     const TCGLifeData arg_life = op->life;
3637     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3638     TCGRegSet i_allocated_regs;
3639     TCGRegSet o_allocated_regs;
3640     int i, k, nb_iargs, nb_oargs;
3641     TCGReg reg;
3642     TCGArg arg;
3643     const TCGArgConstraint *arg_ct;
3644     TCGTemp *ts;
3645     TCGArg new_args[TCG_MAX_OP_ARGS];
3646     int const_args[TCG_MAX_OP_ARGS];
3647 
3648     nb_oargs = def->nb_oargs;
3649     nb_iargs = def->nb_iargs;
3650 
3651     /* copy constants */
3652     memcpy(new_args + nb_oargs + nb_iargs,
3653            op->args + nb_oargs + nb_iargs,
3654            sizeof(TCGArg) * def->nb_cargs);
3655 
3656     i_allocated_regs = s->reserved_regs;
3657     o_allocated_regs = s->reserved_regs;
3658 
3659     /* satisfy input constraints */
3660     for (k = 0; k < nb_iargs; k++) {
3661         TCGRegSet i_preferred_regs, o_preferred_regs;
3662 
3663         i = def->args_ct[nb_oargs + k].sort_index;
3664         arg = op->args[i];
3665         arg_ct = &def->args_ct[i];
3666         ts = arg_temp(arg);
3667 
3668         if (ts->val_type == TEMP_VAL_CONST
3669             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3670             /* constant is OK for instruction */
3671             const_args[i] = 1;
3672             new_args[i] = ts->val;
3673             continue;
3674         }
3675 
3676         i_preferred_regs = o_preferred_regs = 0;
3677         if (arg_ct->ialias) {
3678             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3679 
3680             /*
3681              * If the input is readonly, then it cannot also be an
3682              * output and aliased to itself.  If the input is not
3683              * dead after the instruction, we must allocate a new
3684              * register and move it.
3685              */
3686             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3687                 goto allocate_in_reg;
3688             }
3689 
3690             /*
3691              * Check if the current register has already been allocated
3692              * for another input aliased to an output.
3693              */
3694             if (ts->val_type == TEMP_VAL_REG) {
3695                 reg = ts->reg;
3696                 for (int k2 = 0; k2 < k; k2++) {
3697                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3698                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3699                         goto allocate_in_reg;
3700                     }
3701                 }
3702             }
3703             i_preferred_regs = o_preferred_regs;
3704         }
3705 
3706         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3707         reg = ts->reg;
3708 
3709         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3710  allocate_in_reg:
3711             /*
3712              * Allocate a new register matching the constraint
3713              * and move the temporary register into it.
3714              */
3715             temp_load(s, ts, tcg_target_available_regs[ts->type],
3716                       i_allocated_regs, 0);
3717             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3718                                 o_preferred_regs, ts->indirect_base);
3719             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3720                 /*
3721                  * Cross register class move not supported.  Sync the
3722                  * temp back to its slot and load from there.
3723                  */
3724                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3725                 tcg_out_ld(s, ts->type, reg,
3726                            ts->mem_base->reg, ts->mem_offset);
3727             }
3728         }
3729         new_args[i] = reg;
3730         const_args[i] = 0;
3731         tcg_regset_set_reg(i_allocated_regs, reg);
3732     }
3733 
3734     /* mark dead temporaries and free the associated registers */
3735     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3736         if (IS_DEAD_ARG(i)) {
3737             temp_dead(s, arg_temp(op->args[i]));
3738         }
3739     }
3740 
3741     if (def->flags & TCG_OPF_COND_BRANCH) {
3742         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3743     } else if (def->flags & TCG_OPF_BB_END) {
3744         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3745     } else {
3746         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3747             /* XXX: permit generic clobber register list ? */
3748             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3749                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3750                     tcg_reg_free(s, i, i_allocated_regs);
3751                 }
3752             }
3753         }
3754         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3755             /* sync globals if the op has side effects and might trigger
3756                an exception. */
3757             sync_globals(s, i_allocated_regs);
3758         }
3759 
3760         /* satisfy the output constraints */
3761         for(k = 0; k < nb_oargs; k++) {
3762             i = def->args_ct[k].sort_index;
3763             arg = op->args[i];
3764             arg_ct = &def->args_ct[i];
3765             ts = arg_temp(arg);
3766 
3767             /* ENV should not be modified.  */
3768             tcg_debug_assert(!temp_readonly(ts));
3769 
3770             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3771                 reg = new_args[arg_ct->alias_index];
3772             } else if (arg_ct->newreg) {
3773                 reg = tcg_reg_alloc(s, arg_ct->regs,
3774                                     i_allocated_regs | o_allocated_regs,
3775                                     op->output_pref[k], ts->indirect_base);
3776             } else {
3777                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3778                                     op->output_pref[k], ts->indirect_base);
3779             }
3780             tcg_regset_set_reg(o_allocated_regs, reg);
3781             if (ts->val_type == TEMP_VAL_REG) {
3782                 s->reg_to_temp[ts->reg] = NULL;
3783             }
3784             ts->val_type = TEMP_VAL_REG;
3785             ts->reg = reg;
3786             /*
3787              * Temp value is modified, so the value kept in memory is
3788              * potentially not the same.
3789              */
3790             ts->mem_coherent = 0;
3791             s->reg_to_temp[reg] = ts;
3792             new_args[i] = reg;
3793         }
3794     }
3795 
3796     /* emit instruction */
3797     if (def->flags & TCG_OPF_VECTOR) {
3798         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3799                        new_args, const_args);
3800     } else {
3801         tcg_out_op(s, op->opc, new_args, const_args);
3802     }
3803 
3804     /* move the outputs in the correct register if needed */
3805     for(i = 0; i < nb_oargs; i++) {
3806         ts = arg_temp(op->args[i]);
3807 
3808         /* ENV should not be modified.  */
3809         tcg_debug_assert(!temp_readonly(ts));
3810 
3811         if (NEED_SYNC_ARG(i)) {
3812             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3813         } else if (IS_DEAD_ARG(i)) {
3814             temp_dead(s, ts);
3815         }
3816     }
3817 }
3818 
3819 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3820 {
3821     const TCGLifeData arg_life = op->life;
3822     TCGTemp *ots, *itsl, *itsh;
3823     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3824 
3825     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3826     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3827     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3828 
3829     ots = arg_temp(op->args[0]);
3830     itsl = arg_temp(op->args[1]);
3831     itsh = arg_temp(op->args[2]);
3832 
3833     /* ENV should not be modified.  */
3834     tcg_debug_assert(!temp_readonly(ots));
3835 
3836     /* Allocate the output register now.  */
3837     if (ots->val_type != TEMP_VAL_REG) {
3838         TCGRegSet allocated_regs = s->reserved_regs;
3839         TCGRegSet dup_out_regs =
3840             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3841 
3842         /* Make sure to not spill the input registers. */
3843         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3844             tcg_regset_set_reg(allocated_regs, itsl->reg);
3845         }
3846         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3847             tcg_regset_set_reg(allocated_regs, itsh->reg);
3848         }
3849 
3850         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3851                                  op->output_pref[0], ots->indirect_base);
3852         ots->val_type = TEMP_VAL_REG;
3853         ots->mem_coherent = 0;
3854         s->reg_to_temp[ots->reg] = ots;
3855     }
3856 
3857     /* Promote dup2 of immediates to dupi_vec. */
3858     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3859         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3860         MemOp vece = MO_64;
3861 
3862         if (val == dup_const(MO_8, val)) {
3863             vece = MO_8;
3864         } else if (val == dup_const(MO_16, val)) {
3865             vece = MO_16;
3866         } else if (val == dup_const(MO_32, val)) {
3867             vece = MO_32;
3868         }
3869 
3870         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3871         goto done;
3872     }
3873 
3874     /* If the two inputs form one 64-bit value, try dupm_vec. */
3875     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3876         if (!itsl->mem_coherent) {
3877             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3878         }
3879         if (!itsh->mem_coherent) {
3880             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3881         }
3882 #ifdef HOST_WORDS_BIGENDIAN
3883         TCGTemp *its = itsh;
3884 #else
3885         TCGTemp *its = itsl;
3886 #endif
3887         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3888                              its->mem_base->reg, its->mem_offset)) {
3889             goto done;
3890         }
3891     }
3892 
3893     /* Fall back to generic expansion. */
3894     return false;
3895 
3896  done:
3897     if (IS_DEAD_ARG(1)) {
3898         temp_dead(s, itsl);
3899     }
3900     if (IS_DEAD_ARG(2)) {
3901         temp_dead(s, itsh);
3902     }
3903     if (NEED_SYNC_ARG(0)) {
3904         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3905     } else if (IS_DEAD_ARG(0)) {
3906         temp_dead(s, ots);
3907     }
3908     return true;
3909 }
3910 
3911 #ifdef TCG_TARGET_STACK_GROWSUP
3912 #define STACK_DIR(x) (-(x))
3913 #else
3914 #define STACK_DIR(x) (x)
3915 #endif
3916 
3917 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3918 {
3919     const int nb_oargs = TCGOP_CALLO(op);
3920     const int nb_iargs = TCGOP_CALLI(op);
3921     const TCGLifeData arg_life = op->life;
3922     const TCGHelperInfo *info;
3923     int flags, nb_regs, i;
3924     TCGReg reg;
3925     TCGArg arg;
3926     TCGTemp *ts;
3927     intptr_t stack_offset;
3928     size_t call_stack_size;
3929     tcg_insn_unit *func_addr;
3930     int allocate_args;
3931     TCGRegSet allocated_regs;
3932 
3933     func_addr = tcg_call_func(op);
3934     info = tcg_call_info(op);
3935     flags = info->flags;
3936 
3937     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3938     if (nb_regs > nb_iargs) {
3939         nb_regs = nb_iargs;
3940     }
3941 
3942     /* assign stack slots first */
3943     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3944     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3945         ~(TCG_TARGET_STACK_ALIGN - 1);
3946     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3947     if (allocate_args) {
3948         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3949            preallocate call stack */
3950         tcg_abort();
3951     }
3952 
3953     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3954     for (i = nb_regs; i < nb_iargs; i++) {
3955         arg = op->args[nb_oargs + i];
3956 #ifdef TCG_TARGET_STACK_GROWSUP
3957         stack_offset -= sizeof(tcg_target_long);
3958 #endif
3959         if (arg != TCG_CALL_DUMMY_ARG) {
3960             ts = arg_temp(arg);
3961             temp_load(s, ts, tcg_target_available_regs[ts->type],
3962                       s->reserved_regs, 0);
3963             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3964         }
3965 #ifndef TCG_TARGET_STACK_GROWSUP
3966         stack_offset += sizeof(tcg_target_long);
3967 #endif
3968     }
3969 
3970     /* assign input registers */
3971     allocated_regs = s->reserved_regs;
3972     for (i = 0; i < nb_regs; i++) {
3973         arg = op->args[nb_oargs + i];
3974         if (arg != TCG_CALL_DUMMY_ARG) {
3975             ts = arg_temp(arg);
3976             reg = tcg_target_call_iarg_regs[i];
3977 
3978             if (ts->val_type == TEMP_VAL_REG) {
3979                 if (ts->reg != reg) {
3980                     tcg_reg_free(s, reg, allocated_regs);
3981                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3982                         /*
3983                          * Cross register class move not supported.  Sync the
3984                          * temp back to its slot and load from there.
3985                          */
3986                         temp_sync(s, ts, allocated_regs, 0, 0);
3987                         tcg_out_ld(s, ts->type, reg,
3988                                    ts->mem_base->reg, ts->mem_offset);
3989                     }
3990                 }
3991             } else {
3992                 TCGRegSet arg_set = 0;
3993 
3994                 tcg_reg_free(s, reg, allocated_regs);
3995                 tcg_regset_set_reg(arg_set, reg);
3996                 temp_load(s, ts, arg_set, allocated_regs, 0);
3997             }
3998 
3999             tcg_regset_set_reg(allocated_regs, reg);
4000         }
4001     }
4002 
4003     /* mark dead temporaries and free the associated registers */
4004     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4005         if (IS_DEAD_ARG(i)) {
4006             temp_dead(s, arg_temp(op->args[i]));
4007         }
4008     }
4009 
4010     /* clobber call registers */
4011     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4012         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4013             tcg_reg_free(s, i, allocated_regs);
4014         }
4015     }
4016 
4017     /* Save globals if they might be written by the helper, sync them if
4018        they might be read. */
4019     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4020         /* Nothing to do */
4021     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4022         sync_globals(s, allocated_regs);
4023     } else {
4024         save_globals(s, allocated_regs);
4025     }
4026 
4027 #ifdef CONFIG_TCG_INTERPRETER
4028     {
4029         gpointer hash = (gpointer)(uintptr_t)info->typemask;
4030         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4031         assert(cif != NULL);
4032         tcg_out_call(s, func_addr, cif);
4033     }
4034 #else
4035     tcg_out_call(s, func_addr);
4036 #endif
4037 
4038     /* assign output registers and emit moves if needed */
4039     for(i = 0; i < nb_oargs; i++) {
4040         arg = op->args[i];
4041         ts = arg_temp(arg);
4042 
4043         /* ENV should not be modified.  */
4044         tcg_debug_assert(!temp_readonly(ts));
4045 
4046         reg = tcg_target_call_oarg_regs[i];
4047         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4048         if (ts->val_type == TEMP_VAL_REG) {
4049             s->reg_to_temp[ts->reg] = NULL;
4050         }
4051         ts->val_type = TEMP_VAL_REG;
4052         ts->reg = reg;
4053         ts->mem_coherent = 0;
4054         s->reg_to_temp[reg] = ts;
4055         if (NEED_SYNC_ARG(i)) {
4056             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4057         } else if (IS_DEAD_ARG(i)) {
4058             temp_dead(s, ts);
4059         }
4060     }
4061 }
4062 
4063 #ifdef CONFIG_PROFILER
4064 
4065 /* avoid copy/paste errors */
4066 #define PROF_ADD(to, from, field)                       \
4067     do {                                                \
4068         (to)->field += qatomic_read(&((from)->field));  \
4069     } while (0)
4070 
4071 #define PROF_MAX(to, from, field)                                       \
4072     do {                                                                \
4073         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4074         if (val__ > (to)->field) {                                      \
4075             (to)->field = val__;                                        \
4076         }                                                               \
4077     } while (0)
4078 
4079 /* Pass in a zero'ed @prof */
4080 static inline
4081 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4082 {
4083     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4084     unsigned int i;
4085 
4086     for (i = 0; i < n_ctxs; i++) {
4087         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4088         const TCGProfile *orig = &s->prof;
4089 
4090         if (counters) {
4091             PROF_ADD(prof, orig, cpu_exec_time);
4092             PROF_ADD(prof, orig, tb_count1);
4093             PROF_ADD(prof, orig, tb_count);
4094             PROF_ADD(prof, orig, op_count);
4095             PROF_MAX(prof, orig, op_count_max);
4096             PROF_ADD(prof, orig, temp_count);
4097             PROF_MAX(prof, orig, temp_count_max);
4098             PROF_ADD(prof, orig, del_op_count);
4099             PROF_ADD(prof, orig, code_in_len);
4100             PROF_ADD(prof, orig, code_out_len);
4101             PROF_ADD(prof, orig, search_out_len);
4102             PROF_ADD(prof, orig, interm_time);
4103             PROF_ADD(prof, orig, code_time);
4104             PROF_ADD(prof, orig, la_time);
4105             PROF_ADD(prof, orig, opt_time);
4106             PROF_ADD(prof, orig, restore_count);
4107             PROF_ADD(prof, orig, restore_time);
4108         }
4109         if (table) {
4110             int i;
4111 
4112             for (i = 0; i < NB_OPS; i++) {
4113                 PROF_ADD(prof, orig, table_op_count[i]);
4114             }
4115         }
4116     }
4117 }
4118 
4119 #undef PROF_ADD
4120 #undef PROF_MAX
4121 
4122 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4123 {
4124     tcg_profile_snapshot(prof, true, false);
4125 }
4126 
4127 static void tcg_profile_snapshot_table(TCGProfile *prof)
4128 {
4129     tcg_profile_snapshot(prof, false, true);
4130 }
4131 
4132 void tcg_dump_op_count(GString *buf)
4133 {
4134     TCGProfile prof = {};
4135     int i;
4136 
4137     tcg_profile_snapshot_table(&prof);
4138     for (i = 0; i < NB_OPS; i++) {
4139         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4140                                prof.table_op_count[i]);
4141     }
4142 }
4143 
4144 int64_t tcg_cpu_exec_time(void)
4145 {
4146     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4147     unsigned int i;
4148     int64_t ret = 0;
4149 
4150     for (i = 0; i < n_ctxs; i++) {
4151         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4152         const TCGProfile *prof = &s->prof;
4153 
4154         ret += qatomic_read(&prof->cpu_exec_time);
4155     }
4156     return ret;
4157 }
4158 #else
4159 void tcg_dump_op_count(GString *buf)
4160 {
4161     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4162 }
4163 
4164 int64_t tcg_cpu_exec_time(void)
4165 {
4166     error_report("%s: TCG profiler not compiled", __func__);
4167     exit(EXIT_FAILURE);
4168 }
4169 #endif
4170 
4171 
4172 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4173 {
4174 #ifdef CONFIG_PROFILER
4175     TCGProfile *prof = &s->prof;
4176 #endif
4177     int i, num_insns;
4178     TCGOp *op;
4179 
4180 #ifdef CONFIG_PROFILER
4181     {
4182         int n = 0;
4183 
4184         QTAILQ_FOREACH(op, &s->ops, link) {
4185             n++;
4186         }
4187         qatomic_set(&prof->op_count, prof->op_count + n);
4188         if (n > prof->op_count_max) {
4189             qatomic_set(&prof->op_count_max, n);
4190         }
4191 
4192         n = s->nb_temps;
4193         qatomic_set(&prof->temp_count, prof->temp_count + n);
4194         if (n > prof->temp_count_max) {
4195             qatomic_set(&prof->temp_count_max, n);
4196         }
4197     }
4198 #endif
4199 
4200 #ifdef DEBUG_DISAS
4201     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4202                  && qemu_log_in_addr_range(tb->pc))) {
4203         FILE *logfile = qemu_log_lock();
4204         qemu_log("OP:\n");
4205         tcg_dump_ops(s, false);
4206         qemu_log("\n");
4207         qemu_log_unlock(logfile);
4208     }
4209 #endif
4210 
4211 #ifdef CONFIG_DEBUG_TCG
4212     /* Ensure all labels referenced have been emitted.  */
4213     {
4214         TCGLabel *l;
4215         bool error = false;
4216 
4217         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4218             if (unlikely(!l->present) && l->refs) {
4219                 qemu_log_mask(CPU_LOG_TB_OP,
4220                               "$L%d referenced but not present.\n", l->id);
4221                 error = true;
4222             }
4223         }
4224         assert(!error);
4225     }
4226 #endif
4227 
4228 #ifdef CONFIG_PROFILER
4229     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4230 #endif
4231 
4232 #ifdef USE_TCG_OPTIMIZATIONS
4233     tcg_optimize(s);
4234 #endif
4235 
4236 #ifdef CONFIG_PROFILER
4237     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4238     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4239 #endif
4240 
4241     reachable_code_pass(s);
4242     liveness_pass_1(s);
4243 
4244     if (s->nb_indirects > 0) {
4245 #ifdef DEBUG_DISAS
4246         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4247                      && qemu_log_in_addr_range(tb->pc))) {
4248             FILE *logfile = qemu_log_lock();
4249             qemu_log("OP before indirect lowering:\n");
4250             tcg_dump_ops(s, false);
4251             qemu_log("\n");
4252             qemu_log_unlock(logfile);
4253         }
4254 #endif
4255         /* Replace indirect temps with direct temps.  */
4256         if (liveness_pass_2(s)) {
4257             /* If changes were made, re-run liveness.  */
4258             liveness_pass_1(s);
4259         }
4260     }
4261 
4262 #ifdef CONFIG_PROFILER
4263     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4264 #endif
4265 
4266 #ifdef DEBUG_DISAS
4267     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4268                  && qemu_log_in_addr_range(tb->pc))) {
4269         FILE *logfile = qemu_log_lock();
4270         qemu_log("OP after optimization and liveness analysis:\n");
4271         tcg_dump_ops(s, true);
4272         qemu_log("\n");
4273         qemu_log_unlock(logfile);
4274     }
4275 #endif
4276 
4277     tcg_reg_alloc_start(s);
4278 
4279     /*
4280      * Reset the buffer pointers when restarting after overflow.
4281      * TODO: Move this into translate-all.c with the rest of the
4282      * buffer management.  Having only this done here is confusing.
4283      */
4284     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4285     s->code_ptr = s->code_buf;
4286 
4287 #ifdef TCG_TARGET_NEED_LDST_LABELS
4288     QSIMPLEQ_INIT(&s->ldst_labels);
4289 #endif
4290 #ifdef TCG_TARGET_NEED_POOL_LABELS
4291     s->pool_labels = NULL;
4292 #endif
4293 
4294     num_insns = -1;
4295     QTAILQ_FOREACH(op, &s->ops, link) {
4296         TCGOpcode opc = op->opc;
4297 
4298 #ifdef CONFIG_PROFILER
4299         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4300 #endif
4301 
4302         switch (opc) {
4303         case INDEX_op_mov_i32:
4304         case INDEX_op_mov_i64:
4305         case INDEX_op_mov_vec:
4306             tcg_reg_alloc_mov(s, op);
4307             break;
4308         case INDEX_op_dup_vec:
4309             tcg_reg_alloc_dup(s, op);
4310             break;
4311         case INDEX_op_insn_start:
4312             if (num_insns >= 0) {
4313                 size_t off = tcg_current_code_size(s);
4314                 s->gen_insn_end_off[num_insns] = off;
4315                 /* Assert that we do not overflow our stored offset.  */
4316                 assert(s->gen_insn_end_off[num_insns] == off);
4317             }
4318             num_insns++;
4319             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4320                 target_ulong a;
4321 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4322                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4323 #else
4324                 a = op->args[i];
4325 #endif
4326                 s->gen_insn_data[num_insns][i] = a;
4327             }
4328             break;
4329         case INDEX_op_discard:
4330             temp_dead(s, arg_temp(op->args[0]));
4331             break;
4332         case INDEX_op_set_label:
4333             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4334             tcg_out_label(s, arg_label(op->args[0]));
4335             break;
4336         case INDEX_op_call:
4337             tcg_reg_alloc_call(s, op);
4338             break;
4339         case INDEX_op_dup2_vec:
4340             if (tcg_reg_alloc_dup2(s, op)) {
4341                 break;
4342             }
4343             /* fall through */
4344         default:
4345             /* Sanity check that we've not introduced any unhandled opcodes. */
4346             tcg_debug_assert(tcg_op_supported(opc));
4347             /* Note: in order to speed up the code, it would be much
4348                faster to have specialized register allocator functions for
4349                some common argument patterns */
4350             tcg_reg_alloc_op(s, op);
4351             break;
4352         }
4353 #ifdef CONFIG_DEBUG_TCG
4354         check_regs(s);
4355 #endif
4356         /* Test for (pending) buffer overflow.  The assumption is that any
4357            one operation beginning below the high water mark cannot overrun
4358            the buffer completely.  Thus we can test for overflow after
4359            generating code without having to check during generation.  */
4360         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4361             return -1;
4362         }
4363         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4364         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4365             return -2;
4366         }
4367     }
4368     tcg_debug_assert(num_insns >= 0);
4369     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4370 
4371     /* Generate TB finalization at the end of block */
4372 #ifdef TCG_TARGET_NEED_LDST_LABELS
4373     i = tcg_out_ldst_finalize(s);
4374     if (i < 0) {
4375         return i;
4376     }
4377 #endif
4378 #ifdef TCG_TARGET_NEED_POOL_LABELS
4379     i = tcg_out_pool_finalize(s);
4380     if (i < 0) {
4381         return i;
4382     }
4383 #endif
4384     if (!tcg_resolve_relocs(s)) {
4385         return -2;
4386     }
4387 
4388 #ifndef CONFIG_TCG_INTERPRETER
4389     /* flush instruction cache */
4390     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4391                         (uintptr_t)s->code_buf,
4392                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4393 #endif
4394 
4395     return tcg_current_code_size(s);
4396 }
4397 
4398 #ifdef CONFIG_PROFILER
4399 void tcg_dump_info(GString *buf)
4400 {
4401     TCGProfile prof = {};
4402     const TCGProfile *s;
4403     int64_t tb_count;
4404     int64_t tb_div_count;
4405     int64_t tot;
4406 
4407     tcg_profile_snapshot_counters(&prof);
4408     s = &prof;
4409     tb_count = s->tb_count;
4410     tb_div_count = tb_count ? tb_count : 1;
4411     tot = s->interm_time + s->code_time;
4412 
4413     g_string_append_printf(buf, "JIT cycles          %" PRId64
4414                            " (%0.3f s at 2.4 GHz)\n",
4415                            tot, tot / 2.4e9);
4416     g_string_append_printf(buf, "translated TBs      %" PRId64
4417                            " (aborted=%" PRId64 " %0.1f%%)\n",
4418                            tb_count, s->tb_count1 - tb_count,
4419                            (double)(s->tb_count1 - s->tb_count)
4420                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4421     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4422                            (double)s->op_count / tb_div_count, s->op_count_max);
4423     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4424                            (double)s->del_op_count / tb_div_count);
4425     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4426                            (double)s->temp_count / tb_div_count,
4427                            s->temp_count_max);
4428     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4429                            (double)s->code_out_len / tb_div_count);
4430     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4431                            (double)s->search_out_len / tb_div_count);
4432 
4433     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4434                            s->op_count ? (double)tot / s->op_count : 0);
4435     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4436                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4437     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4438                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4439     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4440                            s->search_out_len ?
4441                            (double)tot / s->search_out_len : 0);
4442     if (tot == 0) {
4443         tot = 1;
4444     }
4445     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4446                            (double)s->interm_time / tot * 100.0);
4447     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4448                            (double)s->code_time / tot * 100.0);
4449     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4450                            (double)s->opt_time / (s->code_time ?
4451                                                   s->code_time : 1)
4452                            * 100.0);
4453     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4454                            (double)s->la_time / (s->code_time ?
4455                                                  s->code_time : 1) * 100.0);
4456     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4457                            s->restore_count);
4458     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4459                            s->restore_count ?
4460                            (double)s->restore_time / s->restore_count : 0);
4461 }
4462 #else
4463 void tcg_dump_info(GString *buf)
4464 {
4465     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4466 }
4467 #endif
4468 
4469 #ifdef ELF_HOST_MACHINE
4470 /* In order to use this feature, the backend needs to do three things:
4471 
4472    (1) Define ELF_HOST_MACHINE to indicate both what value to
4473        put into the ELF image and to indicate support for the feature.
4474 
4475    (2) Define tcg_register_jit.  This should create a buffer containing
4476        the contents of a .debug_frame section that describes the post-
4477        prologue unwind info for the tcg machine.
4478 
4479    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4480 */
4481 
4482 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4483 typedef enum {
4484     JIT_NOACTION = 0,
4485     JIT_REGISTER_FN,
4486     JIT_UNREGISTER_FN
4487 } jit_actions_t;
4488 
4489 struct jit_code_entry {
4490     struct jit_code_entry *next_entry;
4491     struct jit_code_entry *prev_entry;
4492     const void *symfile_addr;
4493     uint64_t symfile_size;
4494 };
4495 
4496 struct jit_descriptor {
4497     uint32_t version;
4498     uint32_t action_flag;
4499     struct jit_code_entry *relevant_entry;
4500     struct jit_code_entry *first_entry;
4501 };
4502 
4503 void __jit_debug_register_code(void) __attribute__((noinline));
4504 void __jit_debug_register_code(void)
4505 {
4506     asm("");
4507 }
4508 
4509 /* Must statically initialize the version, because GDB may check
4510    the version before we can set it.  */
4511 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4512 
4513 /* End GDB interface.  */
4514 
4515 static int find_string(const char *strtab, const char *str)
4516 {
4517     const char *p = strtab + 1;
4518 
4519     while (1) {
4520         if (strcmp(p, str) == 0) {
4521             return p - strtab;
4522         }
4523         p += strlen(p) + 1;
4524     }
4525 }
4526 
4527 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4528                                  const void *debug_frame,
4529                                  size_t debug_frame_size)
4530 {
4531     struct __attribute__((packed)) DebugInfo {
4532         uint32_t  len;
4533         uint16_t  version;
4534         uint32_t  abbrev;
4535         uint8_t   ptr_size;
4536         uint8_t   cu_die;
4537         uint16_t  cu_lang;
4538         uintptr_t cu_low_pc;
4539         uintptr_t cu_high_pc;
4540         uint8_t   fn_die;
4541         char      fn_name[16];
4542         uintptr_t fn_low_pc;
4543         uintptr_t fn_high_pc;
4544         uint8_t   cu_eoc;
4545     };
4546 
4547     struct ElfImage {
4548         ElfW(Ehdr) ehdr;
4549         ElfW(Phdr) phdr;
4550         ElfW(Shdr) shdr[7];
4551         ElfW(Sym)  sym[2];
4552         struct DebugInfo di;
4553         uint8_t    da[24];
4554         char       str[80];
4555     };
4556 
4557     struct ElfImage *img;
4558 
4559     static const struct ElfImage img_template = {
4560         .ehdr = {
4561             .e_ident[EI_MAG0] = ELFMAG0,
4562             .e_ident[EI_MAG1] = ELFMAG1,
4563             .e_ident[EI_MAG2] = ELFMAG2,
4564             .e_ident[EI_MAG3] = ELFMAG3,
4565             .e_ident[EI_CLASS] = ELF_CLASS,
4566             .e_ident[EI_DATA] = ELF_DATA,
4567             .e_ident[EI_VERSION] = EV_CURRENT,
4568             .e_type = ET_EXEC,
4569             .e_machine = ELF_HOST_MACHINE,
4570             .e_version = EV_CURRENT,
4571             .e_phoff = offsetof(struct ElfImage, phdr),
4572             .e_shoff = offsetof(struct ElfImage, shdr),
4573             .e_ehsize = sizeof(ElfW(Shdr)),
4574             .e_phentsize = sizeof(ElfW(Phdr)),
4575             .e_phnum = 1,
4576             .e_shentsize = sizeof(ElfW(Shdr)),
4577             .e_shnum = ARRAY_SIZE(img->shdr),
4578             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4579 #ifdef ELF_HOST_FLAGS
4580             .e_flags = ELF_HOST_FLAGS,
4581 #endif
4582 #ifdef ELF_OSABI
4583             .e_ident[EI_OSABI] = ELF_OSABI,
4584 #endif
4585         },
4586         .phdr = {
4587             .p_type = PT_LOAD,
4588             .p_flags = PF_X,
4589         },
4590         .shdr = {
4591             [0] = { .sh_type = SHT_NULL },
4592             /* Trick: The contents of code_gen_buffer are not present in
4593                this fake ELF file; that got allocated elsewhere.  Therefore
4594                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4595                will not look for contents.  We can record any address.  */
4596             [1] = { /* .text */
4597                 .sh_type = SHT_NOBITS,
4598                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4599             },
4600             [2] = { /* .debug_info */
4601                 .sh_type = SHT_PROGBITS,
4602                 .sh_offset = offsetof(struct ElfImage, di),
4603                 .sh_size = sizeof(struct DebugInfo),
4604             },
4605             [3] = { /* .debug_abbrev */
4606                 .sh_type = SHT_PROGBITS,
4607                 .sh_offset = offsetof(struct ElfImage, da),
4608                 .sh_size = sizeof(img->da),
4609             },
4610             [4] = { /* .debug_frame */
4611                 .sh_type = SHT_PROGBITS,
4612                 .sh_offset = sizeof(struct ElfImage),
4613             },
4614             [5] = { /* .symtab */
4615                 .sh_type = SHT_SYMTAB,
4616                 .sh_offset = offsetof(struct ElfImage, sym),
4617                 .sh_size = sizeof(img->sym),
4618                 .sh_info = 1,
4619                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4620                 .sh_entsize = sizeof(ElfW(Sym)),
4621             },
4622             [6] = { /* .strtab */
4623                 .sh_type = SHT_STRTAB,
4624                 .sh_offset = offsetof(struct ElfImage, str),
4625                 .sh_size = sizeof(img->str),
4626             }
4627         },
4628         .sym = {
4629             [1] = { /* code_gen_buffer */
4630                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4631                 .st_shndx = 1,
4632             }
4633         },
4634         .di = {
4635             .len = sizeof(struct DebugInfo) - 4,
4636             .version = 2,
4637             .ptr_size = sizeof(void *),
4638             .cu_die = 1,
4639             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4640             .fn_die = 2,
4641             .fn_name = "code_gen_buffer"
4642         },
4643         .da = {
4644             1,          /* abbrev number (the cu) */
4645             0x11, 1,    /* DW_TAG_compile_unit, has children */
4646             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4647             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4648             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4649             0, 0,       /* end of abbrev */
4650             2,          /* abbrev number (the fn) */
4651             0x2e, 0,    /* DW_TAG_subprogram, no children */
4652             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4653             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4654             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4655             0, 0,       /* end of abbrev */
4656             0           /* no more abbrev */
4657         },
4658         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4659                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4660     };
4661 
4662     /* We only need a single jit entry; statically allocate it.  */
4663     static struct jit_code_entry one_entry;
4664 
4665     uintptr_t buf = (uintptr_t)buf_ptr;
4666     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4667     DebugFrameHeader *dfh;
4668 
4669     img = g_malloc(img_size);
4670     *img = img_template;
4671 
4672     img->phdr.p_vaddr = buf;
4673     img->phdr.p_paddr = buf;
4674     img->phdr.p_memsz = buf_size;
4675 
4676     img->shdr[1].sh_name = find_string(img->str, ".text");
4677     img->shdr[1].sh_addr = buf;
4678     img->shdr[1].sh_size = buf_size;
4679 
4680     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4681     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4682 
4683     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4684     img->shdr[4].sh_size = debug_frame_size;
4685 
4686     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4687     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4688 
4689     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4690     img->sym[1].st_value = buf;
4691     img->sym[1].st_size = buf_size;
4692 
4693     img->di.cu_low_pc = buf;
4694     img->di.cu_high_pc = buf + buf_size;
4695     img->di.fn_low_pc = buf;
4696     img->di.fn_high_pc = buf + buf_size;
4697 
4698     dfh = (DebugFrameHeader *)(img + 1);
4699     memcpy(dfh, debug_frame, debug_frame_size);
4700     dfh->fde.func_start = buf;
4701     dfh->fde.func_len = buf_size;
4702 
4703 #ifdef DEBUG_JIT
4704     /* Enable this block to be able to debug the ELF image file creation.
4705        One can use readelf, objdump, or other inspection utilities.  */
4706     {
4707         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4708         if (f) {
4709             if (fwrite(img, img_size, 1, f) != img_size) {
4710                 /* Avoid stupid unused return value warning for fwrite.  */
4711             }
4712             fclose(f);
4713         }
4714     }
4715 #endif
4716 
4717     one_entry.symfile_addr = img;
4718     one_entry.symfile_size = img_size;
4719 
4720     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4721     __jit_debug_descriptor.relevant_entry = &one_entry;
4722     __jit_debug_descriptor.first_entry = &one_entry;
4723     __jit_debug_register_code();
4724 }
4725 #else
4726 /* No support for the feature.  Provide the entry point expected by exec.c,
4727    and implement the internal function we declared earlier.  */
4728 
4729 static void tcg_register_jit_int(const void *buf, size_t size,
4730                                  const void *debug_frame,
4731                                  size_t debug_frame_size)
4732 {
4733 }
4734 
4735 void tcg_register_jit(const void *buf, size_t buf_size)
4736 {
4737 }
4738 #endif /* ELF_HOST_MACHINE */
4739 
4740 #if !TCG_TARGET_MAYBE_vec
4741 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4742 {
4743     g_assert_not_reached();
4744 }
4745 #endif
4746