xref: /qemu/tcg/tcg.c (revision d45c8332)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 #include "qemu/cacheinfo.h"
40 
41 /* Note: the long term plan is to reduce the dependencies on the QEMU
42    CPU definitions. Currently they are used for qemu_ld/st
43    instructions */
44 #define NO_CPU_IO_DEFS
45 
46 #include "exec/exec-all.h"
47 #include "tcg/tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #if HOST_BIG_ENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "tcg/tcg-ldst.h"
63 #include "tcg-internal.h"
64 
65 #ifdef CONFIG_TCG_INTERPRETER
66 #include <ffi.h>
67 #endif
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc,
111                        const TCGArg args[TCG_MAX_OP_ARGS],
112                        const int const_args[TCG_MAX_OP_ARGS]);
113 #if TCG_TARGET_MAYBE_vec
114 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
115                             TCGReg dst, TCGReg src);
116 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
117                              TCGReg dst, TCGReg base, intptr_t offset);
118 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
119                              TCGReg dst, int64_t arg);
120 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
121                            unsigned vecl, unsigned vece,
122                            const TCGArg args[TCG_MAX_OP_ARGS],
123                            const int const_args[TCG_MAX_OP_ARGS]);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
136                                     TCGReg dst, int64_t arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc,
141                                   unsigned vecl, unsigned vece,
142                                   const TCGArg args[TCG_MAX_OP_ARGS],
143                                   const int const_args[TCG_MAX_OP_ARGS])
144 {
145     g_assert_not_reached();
146 }
147 #endif
148 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
149                        intptr_t arg2);
150 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
151                         TCGReg base, intptr_t ofs);
152 #ifdef CONFIG_TCG_INTERPRETER
153 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target,
154                          ffi_cif *cif);
155 #else
156 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
157 #endif
158 static bool tcg_target_const_match(int64_t val, TCGType type, int ct);
159 #ifdef TCG_TARGET_NEED_LDST_LABELS
160 static int tcg_out_ldst_finalize(TCGContext *s);
161 #endif
162 
163 TCGContext tcg_init_ctx;
164 __thread TCGContext *tcg_ctx;
165 
166 TCGContext **tcg_ctxs;
167 unsigned int tcg_cur_ctxs;
168 unsigned int tcg_max_ctxs;
169 TCGv_env cpu_env = 0;
170 const void *tcg_code_gen_epilogue;
171 uintptr_t tcg_splitwx_diff;
172 
173 #ifndef CONFIG_TCG_INTERPRETER
174 tcg_prologue_fn *tcg_qemu_tb_exec;
175 #endif
176 
177 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
178 static TCGRegSet tcg_target_call_clobber_regs;
179 
180 #if TCG_TARGET_INSN_UNIT_SIZE == 1
181 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
182 {
183     *s->code_ptr++ = v;
184 }
185 
186 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
187                                                       uint8_t v)
188 {
189     *p = v;
190 }
191 #endif
192 
193 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
194 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
195 {
196     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
197         *s->code_ptr++ = v;
198     } else {
199         tcg_insn_unit *p = s->code_ptr;
200         memcpy(p, &v, sizeof(v));
201         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
202     }
203 }
204 
205 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
206                                                        uint16_t v)
207 {
208     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
209         *p = v;
210     } else {
211         memcpy(p, &v, sizeof(v));
212     }
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
217 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
229                                                        uint32_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
240 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
252                                                        uint64_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 /* label relocation processing */
263 
264 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
265                           TCGLabel *l, intptr_t addend)
266 {
267     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
268 
269     r->type = type;
270     r->ptr = code_ptr;
271     r->addend = addend;
272     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
273 }
274 
275 static void tcg_out_label(TCGContext *s, TCGLabel *l)
276 {
277     tcg_debug_assert(!l->has_value);
278     l->has_value = 1;
279     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
280 }
281 
282 TCGLabel *gen_new_label(void)
283 {
284     TCGContext *s = tcg_ctx;
285     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
286 
287     memset(l, 0, sizeof(TCGLabel));
288     l->id = s->nb_labels++;
289     QSIMPLEQ_INIT(&l->relocs);
290 
291     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
292 
293     return l;
294 }
295 
296 static bool tcg_resolve_relocs(TCGContext *s)
297 {
298     TCGLabel *l;
299 
300     QSIMPLEQ_FOREACH(l, &s->labels, next) {
301         TCGRelocation *r;
302         uintptr_t value = l->u.value;
303 
304         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
305             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
306                 return false;
307             }
308         }
309     }
310     return true;
311 }
312 
313 static void set_jmp_reset_offset(TCGContext *s, int which)
314 {
315     /*
316      * We will check for overflow at the end of the opcode loop in
317      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
318      */
319     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
320 }
321 
322 /* Signal overflow, starting over with fewer guest insns. */
323 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
324 {
325     siglongjmp(s->jmp_trans, -2);
326 }
327 
328 #define C_PFX1(P, A)                    P##A
329 #define C_PFX2(P, A, B)                 P##A##_##B
330 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
331 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
332 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
333 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
334 
335 /* Define an enumeration for the various combinations. */
336 
337 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
338 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
339 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
340 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
341 
342 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
343 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
344 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
345 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
346 
347 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
348 
349 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
350 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
351 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
352 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
353 
354 typedef enum {
355 #include "tcg-target-con-set.h"
356 } TCGConstraintSetIndex;
357 
358 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
359 
360 #undef C_O0_I1
361 #undef C_O0_I2
362 #undef C_O0_I3
363 #undef C_O0_I4
364 #undef C_O1_I1
365 #undef C_O1_I2
366 #undef C_O1_I3
367 #undef C_O1_I4
368 #undef C_N1_I2
369 #undef C_O2_I1
370 #undef C_O2_I2
371 #undef C_O2_I3
372 #undef C_O2_I4
373 
374 /* Put all of the constraint sets into an array, indexed by the enum. */
375 
376 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
377 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
378 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
379 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
380 
381 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
382 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
383 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
384 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
385 
386 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
387 
388 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
389 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
390 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
391 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
392 
393 static const TCGTargetOpDef constraint_sets[] = {
394 #include "tcg-target-con-set.h"
395 };
396 
397 
398 #undef C_O0_I1
399 #undef C_O0_I2
400 #undef C_O0_I3
401 #undef C_O0_I4
402 #undef C_O1_I1
403 #undef C_O1_I2
404 #undef C_O1_I3
405 #undef C_O1_I4
406 #undef C_N1_I2
407 #undef C_O2_I1
408 #undef C_O2_I2
409 #undef C_O2_I3
410 #undef C_O2_I4
411 
412 /* Expand the enumerator to be returned from tcg_target_op_def(). */
413 
414 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
415 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
416 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
417 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
418 
419 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
420 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
421 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
422 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
423 
424 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
425 
426 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
427 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
428 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
429 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
430 
431 #include "tcg-target.c.inc"
432 
433 static void alloc_tcg_plugin_context(TCGContext *s)
434 {
435 #ifdef CONFIG_PLUGIN
436     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
437     s->plugin_tb->insns =
438         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
439 #endif
440 }
441 
442 /*
443  * All TCG threads except the parent (i.e. the one that called tcg_context_init
444  * and registered the target's TCG globals) must register with this function
445  * before initiating translation.
446  *
447  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
448  * of tcg_region_init() for the reasoning behind this.
449  *
450  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
451  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
452  * is not used anymore for translation once this function is called.
453  *
454  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
455  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
456  */
457 #ifdef CONFIG_USER_ONLY
458 void tcg_register_thread(void)
459 {
460     tcg_ctx = &tcg_init_ctx;
461 }
462 #else
463 void tcg_register_thread(void)
464 {
465     TCGContext *s = g_malloc(sizeof(*s));
466     unsigned int i, n;
467 
468     *s = tcg_init_ctx;
469 
470     /* Relink mem_base.  */
471     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
472         if (tcg_init_ctx.temps[i].mem_base) {
473             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
474             tcg_debug_assert(b >= 0 && b < n);
475             s->temps[i].mem_base = &s->temps[b];
476         }
477     }
478 
479     /* Claim an entry in tcg_ctxs */
480     n = qatomic_fetch_inc(&tcg_cur_ctxs);
481     g_assert(n < tcg_max_ctxs);
482     qatomic_set(&tcg_ctxs[n], s);
483 
484     if (n > 0) {
485         alloc_tcg_plugin_context(s);
486         tcg_region_initial_alloc(s);
487     }
488 
489     tcg_ctx = s;
490 }
491 #endif /* !CONFIG_USER_ONLY */
492 
493 /* pool based memory allocation */
494 void *tcg_malloc_internal(TCGContext *s, int size)
495 {
496     TCGPool *p;
497     int pool_size;
498 
499     if (size > TCG_POOL_CHUNK_SIZE) {
500         /* big malloc: insert a new pool (XXX: could optimize) */
501         p = g_malloc(sizeof(TCGPool) + size);
502         p->size = size;
503         p->next = s->pool_first_large;
504         s->pool_first_large = p;
505         return p->data;
506     } else {
507         p = s->pool_current;
508         if (!p) {
509             p = s->pool_first;
510             if (!p)
511                 goto new_pool;
512         } else {
513             if (!p->next) {
514             new_pool:
515                 pool_size = TCG_POOL_CHUNK_SIZE;
516                 p = g_malloc(sizeof(TCGPool) + pool_size);
517                 p->size = pool_size;
518                 p->next = NULL;
519                 if (s->pool_current)
520                     s->pool_current->next = p;
521                 else
522                     s->pool_first = p;
523             } else {
524                 p = p->next;
525             }
526         }
527     }
528     s->pool_current = p;
529     s->pool_cur = p->data + size;
530     s->pool_end = p->data + p->size;
531     return p->data;
532 }
533 
534 void tcg_pool_reset(TCGContext *s)
535 {
536     TCGPool *p, *t;
537     for (p = s->pool_first_large; p; p = t) {
538         t = p->next;
539         g_free(p);
540     }
541     s->pool_first_large = NULL;
542     s->pool_cur = s->pool_end = NULL;
543     s->pool_current = NULL;
544 }
545 
546 #include "exec/helper-proto.h"
547 
548 static const TCGHelperInfo all_helpers[] = {
549 #include "exec/helper-tcg.h"
550 };
551 static GHashTable *helper_table;
552 
553 #ifdef CONFIG_TCG_INTERPRETER
554 static GHashTable *ffi_table;
555 
556 static ffi_type * const typecode_to_ffi[8] = {
557     [dh_typecode_void] = &ffi_type_void,
558     [dh_typecode_i32]  = &ffi_type_uint32,
559     [dh_typecode_s32]  = &ffi_type_sint32,
560     [dh_typecode_i64]  = &ffi_type_uint64,
561     [dh_typecode_s64]  = &ffi_type_sint64,
562     [dh_typecode_ptr]  = &ffi_type_pointer,
563 };
564 #endif
565 
566 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
567 static void process_op_defs(TCGContext *s);
568 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
569                                             TCGReg reg, const char *name);
570 
571 static void tcg_context_init(unsigned max_cpus)
572 {
573     TCGContext *s = &tcg_init_ctx;
574     int op, total_args, n, i;
575     TCGOpDef *def;
576     TCGArgConstraint *args_ct;
577     TCGTemp *ts;
578 
579     memset(s, 0, sizeof(*s));
580     s->nb_globals = 0;
581 
582     /* Count total number of arguments and allocate the corresponding
583        space */
584     total_args = 0;
585     for(op = 0; op < NB_OPS; op++) {
586         def = &tcg_op_defs[op];
587         n = def->nb_iargs + def->nb_oargs;
588         total_args += n;
589     }
590 
591     args_ct = g_new0(TCGArgConstraint, total_args);
592 
593     for(op = 0; op < NB_OPS; op++) {
594         def = &tcg_op_defs[op];
595         def->args_ct = args_ct;
596         n = def->nb_iargs + def->nb_oargs;
597         args_ct += n;
598     }
599 
600     /* Register helpers.  */
601     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
602     helper_table = g_hash_table_new(NULL, NULL);
603 
604     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
605         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
606                             (gpointer)&all_helpers[i]);
607     }
608 
609 #ifdef CONFIG_TCG_INTERPRETER
610     /* g_direct_hash/equal for direct comparisons on uint32_t.  */
611     ffi_table = g_hash_table_new(NULL, NULL);
612     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
613         struct {
614             ffi_cif cif;
615             ffi_type *args[];
616         } *ca;
617         uint32_t typemask = all_helpers[i].typemask;
618         gpointer hash = (gpointer)(uintptr_t)typemask;
619         ffi_status status;
620         int nargs;
621 
622         if (g_hash_table_lookup(ffi_table, hash)) {
623             continue;
624         }
625 
626         /* Ignoring the return type, find the last non-zero field. */
627         nargs = 32 - clz32(typemask >> 3);
628         nargs = DIV_ROUND_UP(nargs, 3);
629 
630         ca = g_malloc0(sizeof(*ca) + nargs * sizeof(ffi_type *));
631         ca->cif.rtype = typecode_to_ffi[typemask & 7];
632         ca->cif.nargs = nargs;
633 
634         if (nargs != 0) {
635             ca->cif.arg_types = ca->args;
636             for (i = 0; i < nargs; ++i) {
637                 int typecode = extract32(typemask, (i + 1) * 3, 3);
638                 ca->args[i] = typecode_to_ffi[typecode];
639             }
640         }
641 
642         status = ffi_prep_cif(&ca->cif, FFI_DEFAULT_ABI, nargs,
643                               ca->cif.rtype, ca->cif.arg_types);
644         assert(status == FFI_OK);
645 
646         g_hash_table_insert(ffi_table, hash, (gpointer)&ca->cif);
647     }
648 #endif
649 
650     tcg_target_init(s);
651     process_op_defs(s);
652 
653     /* Reverse the order of the saved registers, assuming they're all at
654        the start of tcg_target_reg_alloc_order.  */
655     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
656         int r = tcg_target_reg_alloc_order[n];
657         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
658             break;
659         }
660     }
661     for (i = 0; i < n; ++i) {
662         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
663     }
664     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
665         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
666     }
667 
668     alloc_tcg_plugin_context(s);
669 
670     tcg_ctx = s;
671     /*
672      * In user-mode we simply share the init context among threads, since we
673      * use a single region. See the documentation tcg_region_init() for the
674      * reasoning behind this.
675      * In softmmu we will have at most max_cpus TCG threads.
676      */
677 #ifdef CONFIG_USER_ONLY
678     tcg_ctxs = &tcg_ctx;
679     tcg_cur_ctxs = 1;
680     tcg_max_ctxs = 1;
681 #else
682     tcg_max_ctxs = max_cpus;
683     tcg_ctxs = g_new0(TCGContext *, max_cpus);
684 #endif
685 
686     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
687     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
688     cpu_env = temp_tcgv_ptr(ts);
689 }
690 
691 void tcg_init(size_t tb_size, int splitwx, unsigned max_cpus)
692 {
693     tcg_context_init(max_cpus);
694     tcg_region_init(tb_size, splitwx, max_cpus);
695 }
696 
697 /*
698  * Allocate TBs right before their corresponding translated code, making
699  * sure that TBs and code are on different cache lines.
700  */
701 TranslationBlock *tcg_tb_alloc(TCGContext *s)
702 {
703     uintptr_t align = qemu_icache_linesize;
704     TranslationBlock *tb;
705     void *next;
706 
707  retry:
708     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
709     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
710 
711     if (unlikely(next > s->code_gen_highwater)) {
712         if (tcg_region_alloc(s)) {
713             return NULL;
714         }
715         goto retry;
716     }
717     qatomic_set(&s->code_gen_ptr, next);
718     s->data_gen_ptr = NULL;
719     return tb;
720 }
721 
722 void tcg_prologue_init(TCGContext *s)
723 {
724     size_t prologue_size;
725 
726     s->code_ptr = s->code_gen_ptr;
727     s->code_buf = s->code_gen_ptr;
728     s->data_gen_ptr = NULL;
729 
730 #ifndef CONFIG_TCG_INTERPRETER
731     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(s->code_ptr);
732 #endif
733 
734 #ifdef TCG_TARGET_NEED_POOL_LABELS
735     s->pool_labels = NULL;
736 #endif
737 
738     qemu_thread_jit_write();
739     /* Generate the prologue.  */
740     tcg_target_qemu_prologue(s);
741 
742 #ifdef TCG_TARGET_NEED_POOL_LABELS
743     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
744     {
745         int result = tcg_out_pool_finalize(s);
746         tcg_debug_assert(result == 0);
747     }
748 #endif
749 
750     prologue_size = tcg_current_code_size(s);
751 
752 #ifndef CONFIG_TCG_INTERPRETER
753     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
754                         (uintptr_t)s->code_buf, prologue_size);
755 #endif
756 
757 #ifdef DEBUG_DISAS
758     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
759         FILE *logfile = qemu_log_trylock();
760         if (logfile) {
761             fprintf(logfile, "PROLOGUE: [size=%zu]\n", prologue_size);
762             if (s->data_gen_ptr) {
763                 size_t code_size = s->data_gen_ptr - s->code_gen_ptr;
764                 size_t data_size = prologue_size - code_size;
765                 size_t i;
766 
767                 disas(logfile, s->code_gen_ptr, code_size);
768 
769                 for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
770                     if (sizeof(tcg_target_ulong) == 8) {
771                         fprintf(logfile,
772                                 "0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
773                                 (uintptr_t)s->data_gen_ptr + i,
774                                 *(uint64_t *)(s->data_gen_ptr + i));
775                     } else {
776                         fprintf(logfile,
777                                 "0x%08" PRIxPTR ":  .long  0x%08x\n",
778                                 (uintptr_t)s->data_gen_ptr + i,
779                                 *(uint32_t *)(s->data_gen_ptr + i));
780                     }
781                 }
782             } else {
783                 disas(logfile, s->code_gen_ptr, prologue_size);
784             }
785             fprintf(logfile, "\n");
786             qemu_log_unlock(logfile);
787         }
788     }
789 #endif
790 
791 #ifndef CONFIG_TCG_INTERPRETER
792     /*
793      * Assert that goto_ptr is implemented completely, setting an epilogue.
794      * For tci, we use NULL as the signal to return from the interpreter,
795      * so skip this check.
796      */
797     tcg_debug_assert(tcg_code_gen_epilogue != NULL);
798 #endif
799 
800     tcg_region_prologue_set(s);
801 }
802 
803 void tcg_func_start(TCGContext *s)
804 {
805     tcg_pool_reset(s);
806     s->nb_temps = s->nb_globals;
807 
808     /* No temps have been previously allocated for size or locality.  */
809     memset(s->free_temps, 0, sizeof(s->free_temps));
810 
811     /* No constant temps have been previously allocated. */
812     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
813         if (s->const_table[i]) {
814             g_hash_table_remove_all(s->const_table[i]);
815         }
816     }
817 
818     s->nb_ops = 0;
819     s->nb_labels = 0;
820     s->current_frame_offset = s->frame_start;
821 
822 #ifdef CONFIG_DEBUG_TCG
823     s->goto_tb_issue_mask = 0;
824 #endif
825 
826     QTAILQ_INIT(&s->ops);
827     QTAILQ_INIT(&s->free_ops);
828     QSIMPLEQ_INIT(&s->labels);
829 }
830 
831 static TCGTemp *tcg_temp_alloc(TCGContext *s)
832 {
833     int n = s->nb_temps++;
834 
835     if (n >= TCG_MAX_TEMPS) {
836         tcg_raise_tb_overflow(s);
837     }
838     return memset(&s->temps[n], 0, sizeof(TCGTemp));
839 }
840 
841 static TCGTemp *tcg_global_alloc(TCGContext *s)
842 {
843     TCGTemp *ts;
844 
845     tcg_debug_assert(s->nb_globals == s->nb_temps);
846     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
847     s->nb_globals++;
848     ts = tcg_temp_alloc(s);
849     ts->kind = TEMP_GLOBAL;
850 
851     return ts;
852 }
853 
854 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
855                                             TCGReg reg, const char *name)
856 {
857     TCGTemp *ts;
858 
859     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
860         tcg_abort();
861     }
862 
863     ts = tcg_global_alloc(s);
864     ts->base_type = type;
865     ts->type = type;
866     ts->kind = TEMP_FIXED;
867     ts->reg = reg;
868     ts->name = name;
869     tcg_regset_set_reg(s->reserved_regs, reg);
870 
871     return ts;
872 }
873 
874 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
875 {
876     s->frame_start = start;
877     s->frame_end = start + size;
878     s->frame_temp
879         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
880 }
881 
882 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
883                                      intptr_t offset, const char *name)
884 {
885     TCGContext *s = tcg_ctx;
886     TCGTemp *base_ts = tcgv_ptr_temp(base);
887     TCGTemp *ts = tcg_global_alloc(s);
888     int indirect_reg = 0, bigendian = 0;
889 #if HOST_BIG_ENDIAN
890     bigendian = 1;
891 #endif
892 
893     switch (base_ts->kind) {
894     case TEMP_FIXED:
895         break;
896     case TEMP_GLOBAL:
897         /* We do not support double-indirect registers.  */
898         tcg_debug_assert(!base_ts->indirect_reg);
899         base_ts->indirect_base = 1;
900         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
901                             ? 2 : 1);
902         indirect_reg = 1;
903         break;
904     default:
905         g_assert_not_reached();
906     }
907 
908     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
909         TCGTemp *ts2 = tcg_global_alloc(s);
910         char buf[64];
911 
912         ts->base_type = TCG_TYPE_I64;
913         ts->type = TCG_TYPE_I32;
914         ts->indirect_reg = indirect_reg;
915         ts->mem_allocated = 1;
916         ts->mem_base = base_ts;
917         ts->mem_offset = offset + bigendian * 4;
918         pstrcpy(buf, sizeof(buf), name);
919         pstrcat(buf, sizeof(buf), "_0");
920         ts->name = strdup(buf);
921 
922         tcg_debug_assert(ts2 == ts + 1);
923         ts2->base_type = TCG_TYPE_I64;
924         ts2->type = TCG_TYPE_I32;
925         ts2->indirect_reg = indirect_reg;
926         ts2->mem_allocated = 1;
927         ts2->mem_base = base_ts;
928         ts2->mem_offset = offset + (1 - bigendian) * 4;
929         pstrcpy(buf, sizeof(buf), name);
930         pstrcat(buf, sizeof(buf), "_1");
931         ts2->name = strdup(buf);
932     } else {
933         ts->base_type = type;
934         ts->type = type;
935         ts->indirect_reg = indirect_reg;
936         ts->mem_allocated = 1;
937         ts->mem_base = base_ts;
938         ts->mem_offset = offset;
939         ts->name = name;
940     }
941     return ts;
942 }
943 
944 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
945 {
946     TCGContext *s = tcg_ctx;
947     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
948     TCGTemp *ts;
949     int idx, k;
950 
951     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
952     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
953     if (idx < TCG_MAX_TEMPS) {
954         /* There is already an available temp with the right type.  */
955         clear_bit(idx, s->free_temps[k].l);
956 
957         ts = &s->temps[idx];
958         ts->temp_allocated = 1;
959         tcg_debug_assert(ts->base_type == type);
960         tcg_debug_assert(ts->kind == kind);
961     } else {
962         ts = tcg_temp_alloc(s);
963         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
964             TCGTemp *ts2 = tcg_temp_alloc(s);
965 
966             ts->base_type = type;
967             ts->type = TCG_TYPE_I32;
968             ts->temp_allocated = 1;
969             ts->kind = kind;
970 
971             tcg_debug_assert(ts2 == ts + 1);
972             ts2->base_type = TCG_TYPE_I64;
973             ts2->type = TCG_TYPE_I32;
974             ts2->temp_allocated = 1;
975             ts2->kind = kind;
976         } else {
977             ts->base_type = type;
978             ts->type = type;
979             ts->temp_allocated = 1;
980             ts->kind = kind;
981         }
982     }
983 
984 #if defined(CONFIG_DEBUG_TCG)
985     s->temps_in_use++;
986 #endif
987     return ts;
988 }
989 
990 TCGv_vec tcg_temp_new_vec(TCGType type)
991 {
992     TCGTemp *t;
993 
994 #ifdef CONFIG_DEBUG_TCG
995     switch (type) {
996     case TCG_TYPE_V64:
997         assert(TCG_TARGET_HAS_v64);
998         break;
999     case TCG_TYPE_V128:
1000         assert(TCG_TARGET_HAS_v128);
1001         break;
1002     case TCG_TYPE_V256:
1003         assert(TCG_TARGET_HAS_v256);
1004         break;
1005     default:
1006         g_assert_not_reached();
1007     }
1008 #endif
1009 
1010     t = tcg_temp_new_internal(type, 0);
1011     return temp_tcgv_vec(t);
1012 }
1013 
1014 /* Create a new temp of the same type as an existing temp.  */
1015 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1016 {
1017     TCGTemp *t = tcgv_vec_temp(match);
1018 
1019     tcg_debug_assert(t->temp_allocated != 0);
1020 
1021     t = tcg_temp_new_internal(t->base_type, 0);
1022     return temp_tcgv_vec(t);
1023 }
1024 
1025 void tcg_temp_free_internal(TCGTemp *ts)
1026 {
1027     TCGContext *s = tcg_ctx;
1028     int k, idx;
1029 
1030     switch (ts->kind) {
1031     case TEMP_CONST:
1032         /*
1033          * In order to simplify users of tcg_constant_*,
1034          * silently ignore free.
1035          */
1036         return;
1037     case TEMP_NORMAL:
1038     case TEMP_LOCAL:
1039         break;
1040     default:
1041         g_assert_not_reached();
1042     }
1043 
1044 #if defined(CONFIG_DEBUG_TCG)
1045     s->temps_in_use--;
1046     if (s->temps_in_use < 0) {
1047         fprintf(stderr, "More temporaries freed than allocated!\n");
1048     }
1049 #endif
1050 
1051     tcg_debug_assert(ts->temp_allocated != 0);
1052     ts->temp_allocated = 0;
1053 
1054     idx = temp_idx(ts);
1055     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1056     set_bit(idx, s->free_temps[k].l);
1057 }
1058 
1059 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1060 {
1061     TCGContext *s = tcg_ctx;
1062     GHashTable *h = s->const_table[type];
1063     TCGTemp *ts;
1064 
1065     if (h == NULL) {
1066         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1067         s->const_table[type] = h;
1068     }
1069 
1070     ts = g_hash_table_lookup(h, &val);
1071     if (ts == NULL) {
1072         ts = tcg_temp_alloc(s);
1073 
1074         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1075             TCGTemp *ts2 = tcg_temp_alloc(s);
1076 
1077             ts->base_type = TCG_TYPE_I64;
1078             ts->type = TCG_TYPE_I32;
1079             ts->kind = TEMP_CONST;
1080             ts->temp_allocated = 1;
1081             /*
1082              * Retain the full value of the 64-bit constant in the low
1083              * part, so that the hash table works.  Actual uses will
1084              * truncate the value to the low part.
1085              */
1086             ts->val = val;
1087 
1088             tcg_debug_assert(ts2 == ts + 1);
1089             ts2->base_type = TCG_TYPE_I64;
1090             ts2->type = TCG_TYPE_I32;
1091             ts2->kind = TEMP_CONST;
1092             ts2->temp_allocated = 1;
1093             ts2->val = val >> 32;
1094         } else {
1095             ts->base_type = type;
1096             ts->type = type;
1097             ts->kind = TEMP_CONST;
1098             ts->temp_allocated = 1;
1099             ts->val = val;
1100         }
1101         g_hash_table_insert(h, &ts->val, ts);
1102     }
1103 
1104     return ts;
1105 }
1106 
1107 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1108 {
1109     val = dup_const(vece, val);
1110     return temp_tcgv_vec(tcg_constant_internal(type, val));
1111 }
1112 
1113 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1114 {
1115     TCGTemp *t = tcgv_vec_temp(match);
1116 
1117     tcg_debug_assert(t->temp_allocated != 0);
1118     return tcg_constant_vec(t->base_type, vece, val);
1119 }
1120 
1121 TCGv_i32 tcg_const_i32(int32_t val)
1122 {
1123     TCGv_i32 t0;
1124     t0 = tcg_temp_new_i32();
1125     tcg_gen_movi_i32(t0, val);
1126     return t0;
1127 }
1128 
1129 TCGv_i64 tcg_const_i64(int64_t val)
1130 {
1131     TCGv_i64 t0;
1132     t0 = tcg_temp_new_i64();
1133     tcg_gen_movi_i64(t0, val);
1134     return t0;
1135 }
1136 
1137 TCGv_i32 tcg_const_local_i32(int32_t val)
1138 {
1139     TCGv_i32 t0;
1140     t0 = tcg_temp_local_new_i32();
1141     tcg_gen_movi_i32(t0, val);
1142     return t0;
1143 }
1144 
1145 TCGv_i64 tcg_const_local_i64(int64_t val)
1146 {
1147     TCGv_i64 t0;
1148     t0 = tcg_temp_local_new_i64();
1149     tcg_gen_movi_i64(t0, val);
1150     return t0;
1151 }
1152 
1153 #if defined(CONFIG_DEBUG_TCG)
1154 void tcg_clear_temp_count(void)
1155 {
1156     TCGContext *s = tcg_ctx;
1157     s->temps_in_use = 0;
1158 }
1159 
1160 int tcg_check_temp_count(void)
1161 {
1162     TCGContext *s = tcg_ctx;
1163     if (s->temps_in_use) {
1164         /* Clear the count so that we don't give another
1165          * warning immediately next time around.
1166          */
1167         s->temps_in_use = 0;
1168         return 1;
1169     }
1170     return 0;
1171 }
1172 #endif
1173 
1174 /* Return true if OP may appear in the opcode stream.
1175    Test the runtime variable that controls each opcode.  */
1176 bool tcg_op_supported(TCGOpcode op)
1177 {
1178     const bool have_vec
1179         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1180 
1181     switch (op) {
1182     case INDEX_op_discard:
1183     case INDEX_op_set_label:
1184     case INDEX_op_call:
1185     case INDEX_op_br:
1186     case INDEX_op_mb:
1187     case INDEX_op_insn_start:
1188     case INDEX_op_exit_tb:
1189     case INDEX_op_goto_tb:
1190     case INDEX_op_goto_ptr:
1191     case INDEX_op_qemu_ld_i32:
1192     case INDEX_op_qemu_st_i32:
1193     case INDEX_op_qemu_ld_i64:
1194     case INDEX_op_qemu_st_i64:
1195         return true;
1196 
1197     case INDEX_op_qemu_st8_i32:
1198         return TCG_TARGET_HAS_qemu_st8_i32;
1199 
1200     case INDEX_op_mov_i32:
1201     case INDEX_op_setcond_i32:
1202     case INDEX_op_brcond_i32:
1203     case INDEX_op_ld8u_i32:
1204     case INDEX_op_ld8s_i32:
1205     case INDEX_op_ld16u_i32:
1206     case INDEX_op_ld16s_i32:
1207     case INDEX_op_ld_i32:
1208     case INDEX_op_st8_i32:
1209     case INDEX_op_st16_i32:
1210     case INDEX_op_st_i32:
1211     case INDEX_op_add_i32:
1212     case INDEX_op_sub_i32:
1213     case INDEX_op_mul_i32:
1214     case INDEX_op_and_i32:
1215     case INDEX_op_or_i32:
1216     case INDEX_op_xor_i32:
1217     case INDEX_op_shl_i32:
1218     case INDEX_op_shr_i32:
1219     case INDEX_op_sar_i32:
1220         return true;
1221 
1222     case INDEX_op_movcond_i32:
1223         return TCG_TARGET_HAS_movcond_i32;
1224     case INDEX_op_div_i32:
1225     case INDEX_op_divu_i32:
1226         return TCG_TARGET_HAS_div_i32;
1227     case INDEX_op_rem_i32:
1228     case INDEX_op_remu_i32:
1229         return TCG_TARGET_HAS_rem_i32;
1230     case INDEX_op_div2_i32:
1231     case INDEX_op_divu2_i32:
1232         return TCG_TARGET_HAS_div2_i32;
1233     case INDEX_op_rotl_i32:
1234     case INDEX_op_rotr_i32:
1235         return TCG_TARGET_HAS_rot_i32;
1236     case INDEX_op_deposit_i32:
1237         return TCG_TARGET_HAS_deposit_i32;
1238     case INDEX_op_extract_i32:
1239         return TCG_TARGET_HAS_extract_i32;
1240     case INDEX_op_sextract_i32:
1241         return TCG_TARGET_HAS_sextract_i32;
1242     case INDEX_op_extract2_i32:
1243         return TCG_TARGET_HAS_extract2_i32;
1244     case INDEX_op_add2_i32:
1245         return TCG_TARGET_HAS_add2_i32;
1246     case INDEX_op_sub2_i32:
1247         return TCG_TARGET_HAS_sub2_i32;
1248     case INDEX_op_mulu2_i32:
1249         return TCG_TARGET_HAS_mulu2_i32;
1250     case INDEX_op_muls2_i32:
1251         return TCG_TARGET_HAS_muls2_i32;
1252     case INDEX_op_muluh_i32:
1253         return TCG_TARGET_HAS_muluh_i32;
1254     case INDEX_op_mulsh_i32:
1255         return TCG_TARGET_HAS_mulsh_i32;
1256     case INDEX_op_ext8s_i32:
1257         return TCG_TARGET_HAS_ext8s_i32;
1258     case INDEX_op_ext16s_i32:
1259         return TCG_TARGET_HAS_ext16s_i32;
1260     case INDEX_op_ext8u_i32:
1261         return TCG_TARGET_HAS_ext8u_i32;
1262     case INDEX_op_ext16u_i32:
1263         return TCG_TARGET_HAS_ext16u_i32;
1264     case INDEX_op_bswap16_i32:
1265         return TCG_TARGET_HAS_bswap16_i32;
1266     case INDEX_op_bswap32_i32:
1267         return TCG_TARGET_HAS_bswap32_i32;
1268     case INDEX_op_not_i32:
1269         return TCG_TARGET_HAS_not_i32;
1270     case INDEX_op_neg_i32:
1271         return TCG_TARGET_HAS_neg_i32;
1272     case INDEX_op_andc_i32:
1273         return TCG_TARGET_HAS_andc_i32;
1274     case INDEX_op_orc_i32:
1275         return TCG_TARGET_HAS_orc_i32;
1276     case INDEX_op_eqv_i32:
1277         return TCG_TARGET_HAS_eqv_i32;
1278     case INDEX_op_nand_i32:
1279         return TCG_TARGET_HAS_nand_i32;
1280     case INDEX_op_nor_i32:
1281         return TCG_TARGET_HAS_nor_i32;
1282     case INDEX_op_clz_i32:
1283         return TCG_TARGET_HAS_clz_i32;
1284     case INDEX_op_ctz_i32:
1285         return TCG_TARGET_HAS_ctz_i32;
1286     case INDEX_op_ctpop_i32:
1287         return TCG_TARGET_HAS_ctpop_i32;
1288 
1289     case INDEX_op_brcond2_i32:
1290     case INDEX_op_setcond2_i32:
1291         return TCG_TARGET_REG_BITS == 32;
1292 
1293     case INDEX_op_mov_i64:
1294     case INDEX_op_setcond_i64:
1295     case INDEX_op_brcond_i64:
1296     case INDEX_op_ld8u_i64:
1297     case INDEX_op_ld8s_i64:
1298     case INDEX_op_ld16u_i64:
1299     case INDEX_op_ld16s_i64:
1300     case INDEX_op_ld32u_i64:
1301     case INDEX_op_ld32s_i64:
1302     case INDEX_op_ld_i64:
1303     case INDEX_op_st8_i64:
1304     case INDEX_op_st16_i64:
1305     case INDEX_op_st32_i64:
1306     case INDEX_op_st_i64:
1307     case INDEX_op_add_i64:
1308     case INDEX_op_sub_i64:
1309     case INDEX_op_mul_i64:
1310     case INDEX_op_and_i64:
1311     case INDEX_op_or_i64:
1312     case INDEX_op_xor_i64:
1313     case INDEX_op_shl_i64:
1314     case INDEX_op_shr_i64:
1315     case INDEX_op_sar_i64:
1316     case INDEX_op_ext_i32_i64:
1317     case INDEX_op_extu_i32_i64:
1318         return TCG_TARGET_REG_BITS == 64;
1319 
1320     case INDEX_op_movcond_i64:
1321         return TCG_TARGET_HAS_movcond_i64;
1322     case INDEX_op_div_i64:
1323     case INDEX_op_divu_i64:
1324         return TCG_TARGET_HAS_div_i64;
1325     case INDEX_op_rem_i64:
1326     case INDEX_op_remu_i64:
1327         return TCG_TARGET_HAS_rem_i64;
1328     case INDEX_op_div2_i64:
1329     case INDEX_op_divu2_i64:
1330         return TCG_TARGET_HAS_div2_i64;
1331     case INDEX_op_rotl_i64:
1332     case INDEX_op_rotr_i64:
1333         return TCG_TARGET_HAS_rot_i64;
1334     case INDEX_op_deposit_i64:
1335         return TCG_TARGET_HAS_deposit_i64;
1336     case INDEX_op_extract_i64:
1337         return TCG_TARGET_HAS_extract_i64;
1338     case INDEX_op_sextract_i64:
1339         return TCG_TARGET_HAS_sextract_i64;
1340     case INDEX_op_extract2_i64:
1341         return TCG_TARGET_HAS_extract2_i64;
1342     case INDEX_op_extrl_i64_i32:
1343         return TCG_TARGET_HAS_extrl_i64_i32;
1344     case INDEX_op_extrh_i64_i32:
1345         return TCG_TARGET_HAS_extrh_i64_i32;
1346     case INDEX_op_ext8s_i64:
1347         return TCG_TARGET_HAS_ext8s_i64;
1348     case INDEX_op_ext16s_i64:
1349         return TCG_TARGET_HAS_ext16s_i64;
1350     case INDEX_op_ext32s_i64:
1351         return TCG_TARGET_HAS_ext32s_i64;
1352     case INDEX_op_ext8u_i64:
1353         return TCG_TARGET_HAS_ext8u_i64;
1354     case INDEX_op_ext16u_i64:
1355         return TCG_TARGET_HAS_ext16u_i64;
1356     case INDEX_op_ext32u_i64:
1357         return TCG_TARGET_HAS_ext32u_i64;
1358     case INDEX_op_bswap16_i64:
1359         return TCG_TARGET_HAS_bswap16_i64;
1360     case INDEX_op_bswap32_i64:
1361         return TCG_TARGET_HAS_bswap32_i64;
1362     case INDEX_op_bswap64_i64:
1363         return TCG_TARGET_HAS_bswap64_i64;
1364     case INDEX_op_not_i64:
1365         return TCG_TARGET_HAS_not_i64;
1366     case INDEX_op_neg_i64:
1367         return TCG_TARGET_HAS_neg_i64;
1368     case INDEX_op_andc_i64:
1369         return TCG_TARGET_HAS_andc_i64;
1370     case INDEX_op_orc_i64:
1371         return TCG_TARGET_HAS_orc_i64;
1372     case INDEX_op_eqv_i64:
1373         return TCG_TARGET_HAS_eqv_i64;
1374     case INDEX_op_nand_i64:
1375         return TCG_TARGET_HAS_nand_i64;
1376     case INDEX_op_nor_i64:
1377         return TCG_TARGET_HAS_nor_i64;
1378     case INDEX_op_clz_i64:
1379         return TCG_TARGET_HAS_clz_i64;
1380     case INDEX_op_ctz_i64:
1381         return TCG_TARGET_HAS_ctz_i64;
1382     case INDEX_op_ctpop_i64:
1383         return TCG_TARGET_HAS_ctpop_i64;
1384     case INDEX_op_add2_i64:
1385         return TCG_TARGET_HAS_add2_i64;
1386     case INDEX_op_sub2_i64:
1387         return TCG_TARGET_HAS_sub2_i64;
1388     case INDEX_op_mulu2_i64:
1389         return TCG_TARGET_HAS_mulu2_i64;
1390     case INDEX_op_muls2_i64:
1391         return TCG_TARGET_HAS_muls2_i64;
1392     case INDEX_op_muluh_i64:
1393         return TCG_TARGET_HAS_muluh_i64;
1394     case INDEX_op_mulsh_i64:
1395         return TCG_TARGET_HAS_mulsh_i64;
1396 
1397     case INDEX_op_mov_vec:
1398     case INDEX_op_dup_vec:
1399     case INDEX_op_dupm_vec:
1400     case INDEX_op_ld_vec:
1401     case INDEX_op_st_vec:
1402     case INDEX_op_add_vec:
1403     case INDEX_op_sub_vec:
1404     case INDEX_op_and_vec:
1405     case INDEX_op_or_vec:
1406     case INDEX_op_xor_vec:
1407     case INDEX_op_cmp_vec:
1408         return have_vec;
1409     case INDEX_op_dup2_vec:
1410         return have_vec && TCG_TARGET_REG_BITS == 32;
1411     case INDEX_op_not_vec:
1412         return have_vec && TCG_TARGET_HAS_not_vec;
1413     case INDEX_op_neg_vec:
1414         return have_vec && TCG_TARGET_HAS_neg_vec;
1415     case INDEX_op_abs_vec:
1416         return have_vec && TCG_TARGET_HAS_abs_vec;
1417     case INDEX_op_andc_vec:
1418         return have_vec && TCG_TARGET_HAS_andc_vec;
1419     case INDEX_op_orc_vec:
1420         return have_vec && TCG_TARGET_HAS_orc_vec;
1421     case INDEX_op_nand_vec:
1422         return have_vec && TCG_TARGET_HAS_nand_vec;
1423     case INDEX_op_nor_vec:
1424         return have_vec && TCG_TARGET_HAS_nor_vec;
1425     case INDEX_op_eqv_vec:
1426         return have_vec && TCG_TARGET_HAS_eqv_vec;
1427     case INDEX_op_mul_vec:
1428         return have_vec && TCG_TARGET_HAS_mul_vec;
1429     case INDEX_op_shli_vec:
1430     case INDEX_op_shri_vec:
1431     case INDEX_op_sari_vec:
1432         return have_vec && TCG_TARGET_HAS_shi_vec;
1433     case INDEX_op_shls_vec:
1434     case INDEX_op_shrs_vec:
1435     case INDEX_op_sars_vec:
1436         return have_vec && TCG_TARGET_HAS_shs_vec;
1437     case INDEX_op_shlv_vec:
1438     case INDEX_op_shrv_vec:
1439     case INDEX_op_sarv_vec:
1440         return have_vec && TCG_TARGET_HAS_shv_vec;
1441     case INDEX_op_rotli_vec:
1442         return have_vec && TCG_TARGET_HAS_roti_vec;
1443     case INDEX_op_rotls_vec:
1444         return have_vec && TCG_TARGET_HAS_rots_vec;
1445     case INDEX_op_rotlv_vec:
1446     case INDEX_op_rotrv_vec:
1447         return have_vec && TCG_TARGET_HAS_rotv_vec;
1448     case INDEX_op_ssadd_vec:
1449     case INDEX_op_usadd_vec:
1450     case INDEX_op_sssub_vec:
1451     case INDEX_op_ussub_vec:
1452         return have_vec && TCG_TARGET_HAS_sat_vec;
1453     case INDEX_op_smin_vec:
1454     case INDEX_op_umin_vec:
1455     case INDEX_op_smax_vec:
1456     case INDEX_op_umax_vec:
1457         return have_vec && TCG_TARGET_HAS_minmax_vec;
1458     case INDEX_op_bitsel_vec:
1459         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1460     case INDEX_op_cmpsel_vec:
1461         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1462 
1463     default:
1464         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1465         return true;
1466     }
1467 }
1468 
1469 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1470    and endian swap. Maybe it would be better to do the alignment
1471    and endian swap in tcg_reg_alloc_call(). */
1472 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1473 {
1474     int i, real_args, nb_rets, pi;
1475     unsigned typemask;
1476     const TCGHelperInfo *info;
1477     TCGOp *op;
1478 
1479     info = g_hash_table_lookup(helper_table, (gpointer)func);
1480     typemask = info->typemask;
1481 
1482 #ifdef CONFIG_PLUGIN
1483     /* detect non-plugin helpers */
1484     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1485         tcg_ctx->plugin_insn->calls_helpers = true;
1486     }
1487 #endif
1488 
1489 #if defined(__sparc__) && !defined(__arch64__) \
1490     && !defined(CONFIG_TCG_INTERPRETER)
1491     /* We have 64-bit values in one register, but need to pass as two
1492        separate parameters.  Split them.  */
1493     int orig_typemask = typemask;
1494     int orig_nargs = nargs;
1495     TCGv_i64 retl, reth;
1496     TCGTemp *split_args[MAX_OPC_PARAM];
1497 
1498     retl = NULL;
1499     reth = NULL;
1500     typemask = 0;
1501     for (i = real_args = 0; i < nargs; ++i) {
1502         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1503         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1504 
1505         if (is_64bit) {
1506             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1507             TCGv_i32 h = tcg_temp_new_i32();
1508             TCGv_i32 l = tcg_temp_new_i32();
1509             tcg_gen_extr_i64_i32(l, h, orig);
1510             split_args[real_args++] = tcgv_i32_temp(h);
1511             typemask |= dh_typecode_i32 << (real_args * 3);
1512             split_args[real_args++] = tcgv_i32_temp(l);
1513             typemask |= dh_typecode_i32 << (real_args * 3);
1514         } else {
1515             split_args[real_args++] = args[i];
1516             typemask |= argtype << (real_args * 3);
1517         }
1518     }
1519     nargs = real_args;
1520     args = split_args;
1521 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1522     for (i = 0; i < nargs; ++i) {
1523         int argtype = extract32(typemask, (i + 1) * 3, 3);
1524         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1525         bool is_signed = argtype & 1;
1526 
1527         if (is_32bit) {
1528             TCGv_i64 temp = tcg_temp_new_i64();
1529             TCGv_i32 orig = temp_tcgv_i32(args[i]);
1530             if (is_signed) {
1531                 tcg_gen_ext_i32_i64(temp, orig);
1532             } else {
1533                 tcg_gen_extu_i32_i64(temp, orig);
1534             }
1535             args[i] = tcgv_i64_temp(temp);
1536         }
1537     }
1538 #endif /* TCG_TARGET_EXTEND_ARGS */
1539 
1540     op = tcg_emit_op(INDEX_op_call);
1541 
1542     pi = 0;
1543     if (ret != NULL) {
1544 #if defined(__sparc__) && !defined(__arch64__) \
1545     && !defined(CONFIG_TCG_INTERPRETER)
1546         if ((typemask & 6) == dh_typecode_i64) {
1547             /* The 32-bit ABI is going to return the 64-bit value in
1548                the %o0/%o1 register pair.  Prepare for this by using
1549                two return temporaries, and reassemble below.  */
1550             retl = tcg_temp_new_i64();
1551             reth = tcg_temp_new_i64();
1552             op->args[pi++] = tcgv_i64_arg(reth);
1553             op->args[pi++] = tcgv_i64_arg(retl);
1554             nb_rets = 2;
1555         } else {
1556             op->args[pi++] = temp_arg(ret);
1557             nb_rets = 1;
1558         }
1559 #else
1560         if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) {
1561 #if HOST_BIG_ENDIAN
1562             op->args[pi++] = temp_arg(ret + 1);
1563             op->args[pi++] = temp_arg(ret);
1564 #else
1565             op->args[pi++] = temp_arg(ret);
1566             op->args[pi++] = temp_arg(ret + 1);
1567 #endif
1568             nb_rets = 2;
1569         } else {
1570             op->args[pi++] = temp_arg(ret);
1571             nb_rets = 1;
1572         }
1573 #endif
1574     } else {
1575         nb_rets = 0;
1576     }
1577     TCGOP_CALLO(op) = nb_rets;
1578 
1579     real_args = 0;
1580     for (i = 0; i < nargs; i++) {
1581         int argtype = extract32(typemask, (i + 1) * 3, 3);
1582         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1583         bool want_align = false;
1584 
1585 #if defined(CONFIG_TCG_INTERPRETER)
1586         /*
1587          * Align all arguments, so that they land in predictable places
1588          * for passing off to ffi_call.
1589          */
1590         want_align = true;
1591 #elif defined(TCG_TARGET_CALL_ALIGN_ARGS)
1592         /* Some targets want aligned 64 bit args */
1593         want_align = is_64bit;
1594 #endif
1595 
1596         if (TCG_TARGET_REG_BITS < 64 && want_align && (real_args & 1)) {
1597             op->args[pi++] = TCG_CALL_DUMMY_ARG;
1598             real_args++;
1599         }
1600 
1601         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1602             /*
1603              * If stack grows up, then we will be placing successive
1604              * arguments at lower addresses, which means we need to
1605              * reverse the order compared to how we would normally
1606              * treat either big or little-endian.  For those arguments
1607              * that will wind up in registers, this still works for
1608              * HPPA (the only current STACK_GROWSUP target) since the
1609              * argument registers are *also* allocated in decreasing
1610              * order.  If another such target is added, this logic may
1611              * have to get more complicated to differentiate between
1612              * stack arguments and register arguments.
1613              */
1614 #if HOST_BIG_ENDIAN != defined(TCG_TARGET_STACK_GROWSUP)
1615             op->args[pi++] = temp_arg(args[i] + 1);
1616             op->args[pi++] = temp_arg(args[i]);
1617 #else
1618             op->args[pi++] = temp_arg(args[i]);
1619             op->args[pi++] = temp_arg(args[i] + 1);
1620 #endif
1621             real_args += 2;
1622             continue;
1623         }
1624 
1625         op->args[pi++] = temp_arg(args[i]);
1626         real_args++;
1627     }
1628     op->args[pi++] = (uintptr_t)func;
1629     op->args[pi++] = (uintptr_t)info;
1630     TCGOP_CALLI(op) = real_args;
1631 
1632     /* Make sure the fields didn't overflow.  */
1633     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1634     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1635 
1636 #if defined(__sparc__) && !defined(__arch64__) \
1637     && !defined(CONFIG_TCG_INTERPRETER)
1638     /* Free all of the parts we allocated above.  */
1639     for (i = real_args = 0; i < orig_nargs; ++i) {
1640         int argtype = extract32(orig_typemask, (i + 1) * 3, 3);
1641         bool is_64bit = (argtype & ~1) == dh_typecode_i64;
1642 
1643         if (is_64bit) {
1644             tcg_temp_free_internal(args[real_args++]);
1645             tcg_temp_free_internal(args[real_args++]);
1646         } else {
1647             real_args++;
1648         }
1649     }
1650     if ((orig_typemask & 6) == dh_typecode_i64) {
1651         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1652            Note that describing these as TCGv_i64 eliminates an unnecessary
1653            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1654         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1655         tcg_temp_free_i64(retl);
1656         tcg_temp_free_i64(reth);
1657     }
1658 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1659     for (i = 0; i < nargs; ++i) {
1660         int argtype = extract32(typemask, (i + 1) * 3, 3);
1661         bool is_32bit = (argtype & ~1) == dh_typecode_i32;
1662 
1663         if (is_32bit) {
1664             tcg_temp_free_internal(args[i]);
1665         }
1666     }
1667 #endif /* TCG_TARGET_EXTEND_ARGS */
1668 }
1669 
1670 static void tcg_reg_alloc_start(TCGContext *s)
1671 {
1672     int i, n;
1673 
1674     for (i = 0, n = s->nb_temps; i < n; i++) {
1675         TCGTemp *ts = &s->temps[i];
1676         TCGTempVal val = TEMP_VAL_MEM;
1677 
1678         switch (ts->kind) {
1679         case TEMP_CONST:
1680             val = TEMP_VAL_CONST;
1681             break;
1682         case TEMP_FIXED:
1683             val = TEMP_VAL_REG;
1684             break;
1685         case TEMP_GLOBAL:
1686             break;
1687         case TEMP_NORMAL:
1688         case TEMP_EBB:
1689             val = TEMP_VAL_DEAD;
1690             /* fall through */
1691         case TEMP_LOCAL:
1692             ts->mem_allocated = 0;
1693             break;
1694         default:
1695             g_assert_not_reached();
1696         }
1697         ts->val_type = val;
1698     }
1699 
1700     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1701 }
1702 
1703 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1704                                  TCGTemp *ts)
1705 {
1706     int idx = temp_idx(ts);
1707 
1708     switch (ts->kind) {
1709     case TEMP_FIXED:
1710     case TEMP_GLOBAL:
1711         pstrcpy(buf, buf_size, ts->name);
1712         break;
1713     case TEMP_LOCAL:
1714         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1715         break;
1716     case TEMP_EBB:
1717         snprintf(buf, buf_size, "ebb%d", idx - s->nb_globals);
1718         break;
1719     case TEMP_NORMAL:
1720         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1721         break;
1722     case TEMP_CONST:
1723         switch (ts->type) {
1724         case TCG_TYPE_I32:
1725             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
1726             break;
1727 #if TCG_TARGET_REG_BITS > 32
1728         case TCG_TYPE_I64:
1729             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
1730             break;
1731 #endif
1732         case TCG_TYPE_V64:
1733         case TCG_TYPE_V128:
1734         case TCG_TYPE_V256:
1735             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
1736                      64 << (ts->type - TCG_TYPE_V64), ts->val);
1737             break;
1738         default:
1739             g_assert_not_reached();
1740         }
1741         break;
1742     }
1743     return buf;
1744 }
1745 
1746 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1747                              int buf_size, TCGArg arg)
1748 {
1749     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1750 }
1751 
1752 static const char * const cond_name[] =
1753 {
1754     [TCG_COND_NEVER] = "never",
1755     [TCG_COND_ALWAYS] = "always",
1756     [TCG_COND_EQ] = "eq",
1757     [TCG_COND_NE] = "ne",
1758     [TCG_COND_LT] = "lt",
1759     [TCG_COND_GE] = "ge",
1760     [TCG_COND_LE] = "le",
1761     [TCG_COND_GT] = "gt",
1762     [TCG_COND_LTU] = "ltu",
1763     [TCG_COND_GEU] = "geu",
1764     [TCG_COND_LEU] = "leu",
1765     [TCG_COND_GTU] = "gtu"
1766 };
1767 
1768 static const char * const ldst_name[] =
1769 {
1770     [MO_UB]   = "ub",
1771     [MO_SB]   = "sb",
1772     [MO_LEUW] = "leuw",
1773     [MO_LESW] = "lesw",
1774     [MO_LEUL] = "leul",
1775     [MO_LESL] = "lesl",
1776     [MO_LEUQ] = "leq",
1777     [MO_BEUW] = "beuw",
1778     [MO_BESW] = "besw",
1779     [MO_BEUL] = "beul",
1780     [MO_BESL] = "besl",
1781     [MO_BEUQ] = "beq",
1782 };
1783 
1784 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1785 #ifdef TARGET_ALIGNED_ONLY
1786     [MO_UNALN >> MO_ASHIFT]    = "un+",
1787     [MO_ALIGN >> MO_ASHIFT]    = "",
1788 #else
1789     [MO_UNALN >> MO_ASHIFT]    = "",
1790     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1791 #endif
1792     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1793     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1794     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1795     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1796     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1797     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1798 };
1799 
1800 static const char bswap_flag_name[][6] = {
1801     [TCG_BSWAP_IZ] = "iz",
1802     [TCG_BSWAP_OZ] = "oz",
1803     [TCG_BSWAP_OS] = "os",
1804     [TCG_BSWAP_IZ | TCG_BSWAP_OZ] = "iz,oz",
1805     [TCG_BSWAP_IZ | TCG_BSWAP_OS] = "iz,os",
1806 };
1807 
1808 static inline bool tcg_regset_single(TCGRegSet d)
1809 {
1810     return (d & (d - 1)) == 0;
1811 }
1812 
1813 static inline TCGReg tcg_regset_first(TCGRegSet d)
1814 {
1815     if (TCG_TARGET_NB_REGS <= 32) {
1816         return ctz32(d);
1817     } else {
1818         return ctz64(d);
1819     }
1820 }
1821 
1822 /* Return only the number of characters output -- no error return. */
1823 #define ne_fprintf(...) \
1824     ({ int ret_ = fprintf(__VA_ARGS__); ret_ >= 0 ? ret_ : 0; })
1825 
1826 static void tcg_dump_ops(TCGContext *s, FILE *f, bool have_prefs)
1827 {
1828     char buf[128];
1829     TCGOp *op;
1830 
1831     QTAILQ_FOREACH(op, &s->ops, link) {
1832         int i, k, nb_oargs, nb_iargs, nb_cargs;
1833         const TCGOpDef *def;
1834         TCGOpcode c;
1835         int col = 0;
1836 
1837         c = op->opc;
1838         def = &tcg_op_defs[c];
1839 
1840         if (c == INDEX_op_insn_start) {
1841             nb_oargs = 0;
1842             col += ne_fprintf(f, "\n ----");
1843 
1844             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1845                 target_ulong a;
1846 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1847                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1848 #else
1849                 a = op->args[i];
1850 #endif
1851                 col += ne_fprintf(f, " " TARGET_FMT_lx, a);
1852             }
1853         } else if (c == INDEX_op_call) {
1854             const TCGHelperInfo *info = tcg_call_info(op);
1855             void *func = tcg_call_func(op);
1856 
1857             /* variable number of arguments */
1858             nb_oargs = TCGOP_CALLO(op);
1859             nb_iargs = TCGOP_CALLI(op);
1860             nb_cargs = def->nb_cargs;
1861 
1862             col += ne_fprintf(f, " %s ", def->name);
1863 
1864             /*
1865              * Print the function name from TCGHelperInfo, if available.
1866              * Note that plugins have a template function for the info,
1867              * but the actual function pointer comes from the plugin.
1868              */
1869             if (func == info->func) {
1870                 col += ne_fprintf(f, "%s", info->name);
1871             } else {
1872                 col += ne_fprintf(f, "plugin(%p)", func);
1873             }
1874 
1875             col += ne_fprintf(f, ",$0x%x,$%d", info->flags, nb_oargs);
1876             for (i = 0; i < nb_oargs; i++) {
1877                 col += ne_fprintf(f, ",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1878                                                             op->args[i]));
1879             }
1880             for (i = 0; i < nb_iargs; i++) {
1881                 TCGArg arg = op->args[nb_oargs + i];
1882                 const char *t = "<dummy>";
1883                 if (arg != TCG_CALL_DUMMY_ARG) {
1884                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1885                 }
1886                 col += ne_fprintf(f, ",%s", t);
1887             }
1888         } else {
1889             col += ne_fprintf(f, " %s ", def->name);
1890 
1891             nb_oargs = def->nb_oargs;
1892             nb_iargs = def->nb_iargs;
1893             nb_cargs = def->nb_cargs;
1894 
1895             if (def->flags & TCG_OPF_VECTOR) {
1896                 col += ne_fprintf(f, "v%d,e%d,", 64 << TCGOP_VECL(op),
1897                                   8 << TCGOP_VECE(op));
1898             }
1899 
1900             k = 0;
1901             for (i = 0; i < nb_oargs; i++) {
1902                 const char *sep =  k ? "," : "";
1903                 col += ne_fprintf(f, "%s%s", sep,
1904                                   tcg_get_arg_str(s, buf, sizeof(buf),
1905                                                   op->args[k++]));
1906             }
1907             for (i = 0; i < nb_iargs; i++) {
1908                 const char *sep =  k ? "," : "";
1909                 col += ne_fprintf(f, "%s%s", sep,
1910                                   tcg_get_arg_str(s, buf, sizeof(buf),
1911                                                   op->args[k++]));
1912             }
1913             switch (c) {
1914             case INDEX_op_brcond_i32:
1915             case INDEX_op_setcond_i32:
1916             case INDEX_op_movcond_i32:
1917             case INDEX_op_brcond2_i32:
1918             case INDEX_op_setcond2_i32:
1919             case INDEX_op_brcond_i64:
1920             case INDEX_op_setcond_i64:
1921             case INDEX_op_movcond_i64:
1922             case INDEX_op_cmp_vec:
1923             case INDEX_op_cmpsel_vec:
1924                 if (op->args[k] < ARRAY_SIZE(cond_name)
1925                     && cond_name[op->args[k]]) {
1926                     col += ne_fprintf(f, ",%s", cond_name[op->args[k++]]);
1927                 } else {
1928                     col += ne_fprintf(f, ",$0x%" TCG_PRIlx, op->args[k++]);
1929                 }
1930                 i = 1;
1931                 break;
1932             case INDEX_op_qemu_ld_i32:
1933             case INDEX_op_qemu_st_i32:
1934             case INDEX_op_qemu_st8_i32:
1935             case INDEX_op_qemu_ld_i64:
1936             case INDEX_op_qemu_st_i64:
1937                 {
1938                     MemOpIdx oi = op->args[k++];
1939                     MemOp op = get_memop(oi);
1940                     unsigned ix = get_mmuidx(oi);
1941 
1942                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
1943                         col += ne_fprintf(f, ",$0x%x,%u", op, ix);
1944                     } else {
1945                         const char *s_al, *s_op;
1946                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
1947                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
1948                         col += ne_fprintf(f, ",%s%s,%u", s_al, s_op, ix);
1949                     }
1950                     i = 1;
1951                 }
1952                 break;
1953             case INDEX_op_bswap16_i32:
1954             case INDEX_op_bswap16_i64:
1955             case INDEX_op_bswap32_i32:
1956             case INDEX_op_bswap32_i64:
1957             case INDEX_op_bswap64_i64:
1958                 {
1959                     TCGArg flags = op->args[k];
1960                     const char *name = NULL;
1961 
1962                     if (flags < ARRAY_SIZE(bswap_flag_name)) {
1963                         name = bswap_flag_name[flags];
1964                     }
1965                     if (name) {
1966                         col += ne_fprintf(f, ",%s", name);
1967                     } else {
1968                         col += ne_fprintf(f, ",$0x%" TCG_PRIlx, flags);
1969                     }
1970                     i = k = 1;
1971                 }
1972                 break;
1973             default:
1974                 i = 0;
1975                 break;
1976             }
1977             switch (c) {
1978             case INDEX_op_set_label:
1979             case INDEX_op_br:
1980             case INDEX_op_brcond_i32:
1981             case INDEX_op_brcond_i64:
1982             case INDEX_op_brcond2_i32:
1983                 col += ne_fprintf(f, "%s$L%d", k ? "," : "",
1984                                   arg_label(op->args[k])->id);
1985                 i++, k++;
1986                 break;
1987             default:
1988                 break;
1989             }
1990             for (; i < nb_cargs; i++, k++) {
1991                 col += ne_fprintf(f, "%s$0x%" TCG_PRIlx, k ? "," : "",
1992                                   op->args[k]);
1993             }
1994         }
1995 
1996         if (have_prefs || op->life) {
1997             for (; col < 40; ++col) {
1998                 putc(' ', f);
1999             }
2000         }
2001 
2002         if (op->life) {
2003             unsigned life = op->life;
2004 
2005             if (life & (SYNC_ARG * 3)) {
2006                 ne_fprintf(f, "  sync:");
2007                 for (i = 0; i < 2; ++i) {
2008                     if (life & (SYNC_ARG << i)) {
2009                         ne_fprintf(f, " %d", i);
2010                     }
2011                 }
2012             }
2013             life /= DEAD_ARG;
2014             if (life) {
2015                 ne_fprintf(f, "  dead:");
2016                 for (i = 0; life; ++i, life >>= 1) {
2017                     if (life & 1) {
2018                         ne_fprintf(f, " %d", i);
2019                     }
2020                 }
2021             }
2022         }
2023 
2024         if (have_prefs) {
2025             for (i = 0; i < nb_oargs; ++i) {
2026                 TCGRegSet set = op->output_pref[i];
2027 
2028                 if (i == 0) {
2029                     ne_fprintf(f, "  pref=");
2030                 } else {
2031                     ne_fprintf(f, ",");
2032                 }
2033                 if (set == 0) {
2034                     ne_fprintf(f, "none");
2035                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2036                     ne_fprintf(f, "all");
2037 #ifdef CONFIG_DEBUG_TCG
2038                 } else if (tcg_regset_single(set)) {
2039                     TCGReg reg = tcg_regset_first(set);
2040                     ne_fprintf(f, "%s", tcg_target_reg_names[reg]);
2041 #endif
2042                 } else if (TCG_TARGET_NB_REGS <= 32) {
2043                     ne_fprintf(f, "0x%x", (uint32_t)set);
2044                 } else {
2045                     ne_fprintf(f, "0x%" PRIx64, (uint64_t)set);
2046                 }
2047             }
2048         }
2049 
2050         putc('\n', f);
2051     }
2052 }
2053 
2054 /* we give more priority to constraints with less registers */
2055 static int get_constraint_priority(const TCGOpDef *def, int k)
2056 {
2057     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2058     int n;
2059 
2060     if (arg_ct->oalias) {
2061         /* an alias is equivalent to a single register */
2062         n = 1;
2063     } else {
2064         n = ctpop64(arg_ct->regs);
2065     }
2066     return TCG_TARGET_NB_REGS - n + 1;
2067 }
2068 
2069 /* sort from highest priority to lowest */
2070 static void sort_constraints(TCGOpDef *def, int start, int n)
2071 {
2072     int i, j;
2073     TCGArgConstraint *a = def->args_ct;
2074 
2075     for (i = 0; i < n; i++) {
2076         a[start + i].sort_index = start + i;
2077     }
2078     if (n <= 1) {
2079         return;
2080     }
2081     for (i = 0; i < n - 1; i++) {
2082         for (j = i + 1; j < n; j++) {
2083             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2084             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2085             if (p1 < p2) {
2086                 int tmp = a[start + i].sort_index;
2087                 a[start + i].sort_index = a[start + j].sort_index;
2088                 a[start + j].sort_index = tmp;
2089             }
2090         }
2091     }
2092 }
2093 
2094 static void process_op_defs(TCGContext *s)
2095 {
2096     TCGOpcode op;
2097 
2098     for (op = 0; op < NB_OPS; op++) {
2099         TCGOpDef *def = &tcg_op_defs[op];
2100         const TCGTargetOpDef *tdefs;
2101         int i, nb_args;
2102 
2103         if (def->flags & TCG_OPF_NOT_PRESENT) {
2104             continue;
2105         }
2106 
2107         nb_args = def->nb_iargs + def->nb_oargs;
2108         if (nb_args == 0) {
2109             continue;
2110         }
2111 
2112         /*
2113          * Macro magic should make it impossible, but double-check that
2114          * the array index is in range.  Since the signness of an enum
2115          * is implementation defined, force the result to unsigned.
2116          */
2117         unsigned con_set = tcg_target_op_def(op);
2118         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2119         tdefs = &constraint_sets[con_set];
2120 
2121         for (i = 0; i < nb_args; i++) {
2122             const char *ct_str = tdefs->args_ct_str[i];
2123             /* Incomplete TCGTargetOpDef entry. */
2124             tcg_debug_assert(ct_str != NULL);
2125 
2126             while (*ct_str != '\0') {
2127                 switch(*ct_str) {
2128                 case '0' ... '9':
2129                     {
2130                         int oarg = *ct_str - '0';
2131                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2132                         tcg_debug_assert(oarg < def->nb_oargs);
2133                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2134                         def->args_ct[i] = def->args_ct[oarg];
2135                         /* The output sets oalias.  */
2136                         def->args_ct[oarg].oalias = true;
2137                         def->args_ct[oarg].alias_index = i;
2138                         /* The input sets ialias. */
2139                         def->args_ct[i].ialias = true;
2140                         def->args_ct[i].alias_index = oarg;
2141                     }
2142                     ct_str++;
2143                     break;
2144                 case '&':
2145                     def->args_ct[i].newreg = true;
2146                     ct_str++;
2147                     break;
2148                 case 'i':
2149                     def->args_ct[i].ct |= TCG_CT_CONST;
2150                     ct_str++;
2151                     break;
2152 
2153                 /* Include all of the target-specific constraints. */
2154 
2155 #undef CONST
2156 #define CONST(CASE, MASK) \
2157     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2158 #define REGS(CASE, MASK) \
2159     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2160 
2161 #include "tcg-target-con-str.h"
2162 
2163 #undef REGS
2164 #undef CONST
2165                 default:
2166                     /* Typo in TCGTargetOpDef constraint. */
2167                     g_assert_not_reached();
2168                 }
2169             }
2170         }
2171 
2172         /* TCGTargetOpDef entry with too much information? */
2173         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2174 
2175         /* sort the constraints (XXX: this is just an heuristic) */
2176         sort_constraints(def, 0, def->nb_oargs);
2177         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2178     }
2179 }
2180 
2181 void tcg_op_remove(TCGContext *s, TCGOp *op)
2182 {
2183     TCGLabel *label;
2184 
2185     switch (op->opc) {
2186     case INDEX_op_br:
2187         label = arg_label(op->args[0]);
2188         label->refs--;
2189         break;
2190     case INDEX_op_brcond_i32:
2191     case INDEX_op_brcond_i64:
2192         label = arg_label(op->args[3]);
2193         label->refs--;
2194         break;
2195     case INDEX_op_brcond2_i32:
2196         label = arg_label(op->args[5]);
2197         label->refs--;
2198         break;
2199     default:
2200         break;
2201     }
2202 
2203     QTAILQ_REMOVE(&s->ops, op, link);
2204     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2205     s->nb_ops--;
2206 
2207 #ifdef CONFIG_PROFILER
2208     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2209 #endif
2210 }
2211 
2212 void tcg_remove_ops_after(TCGOp *op)
2213 {
2214     TCGContext *s = tcg_ctx;
2215 
2216     while (true) {
2217         TCGOp *last = tcg_last_op();
2218         if (last == op) {
2219             return;
2220         }
2221         tcg_op_remove(s, last);
2222     }
2223 }
2224 
2225 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2226 {
2227     TCGContext *s = tcg_ctx;
2228     TCGOp *op;
2229 
2230     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2231         op = tcg_malloc(sizeof(TCGOp));
2232     } else {
2233         op = QTAILQ_FIRST(&s->free_ops);
2234         QTAILQ_REMOVE(&s->free_ops, op, link);
2235     }
2236     memset(op, 0, offsetof(TCGOp, link));
2237     op->opc = opc;
2238     s->nb_ops++;
2239 
2240     return op;
2241 }
2242 
2243 TCGOp *tcg_emit_op(TCGOpcode opc)
2244 {
2245     TCGOp *op = tcg_op_alloc(opc);
2246     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2247     return op;
2248 }
2249 
2250 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2251 {
2252     TCGOp *new_op = tcg_op_alloc(opc);
2253     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2254     return new_op;
2255 }
2256 
2257 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2258 {
2259     TCGOp *new_op = tcg_op_alloc(opc);
2260     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2261     return new_op;
2262 }
2263 
2264 /* Reachable analysis : remove unreachable code.  */
2265 static void reachable_code_pass(TCGContext *s)
2266 {
2267     TCGOp *op, *op_next;
2268     bool dead = false;
2269 
2270     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2271         bool remove = dead;
2272         TCGLabel *label;
2273 
2274         switch (op->opc) {
2275         case INDEX_op_set_label:
2276             label = arg_label(op->args[0]);
2277             if (label->refs == 0) {
2278                 /*
2279                  * While there is an occasional backward branch, virtually
2280                  * all branches generated by the translators are forward.
2281                  * Which means that generally we will have already removed
2282                  * all references to the label that will be, and there is
2283                  * little to be gained by iterating.
2284                  */
2285                 remove = true;
2286             } else {
2287                 /* Once we see a label, insns become live again.  */
2288                 dead = false;
2289                 remove = false;
2290 
2291                 /*
2292                  * Optimization can fold conditional branches to unconditional.
2293                  * If we find a label with one reference which is preceded by
2294                  * an unconditional branch to it, remove both.  This needed to
2295                  * wait until the dead code in between them was removed.
2296                  */
2297                 if (label->refs == 1) {
2298                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2299                     if (op_prev->opc == INDEX_op_br &&
2300                         label == arg_label(op_prev->args[0])) {
2301                         tcg_op_remove(s, op_prev);
2302                         remove = true;
2303                     }
2304                 }
2305             }
2306             break;
2307 
2308         case INDEX_op_br:
2309         case INDEX_op_exit_tb:
2310         case INDEX_op_goto_ptr:
2311             /* Unconditional branches; everything following is dead.  */
2312             dead = true;
2313             break;
2314 
2315         case INDEX_op_call:
2316             /* Notice noreturn helper calls, raising exceptions.  */
2317             if (tcg_call_flags(op) & TCG_CALL_NO_RETURN) {
2318                 dead = true;
2319             }
2320             break;
2321 
2322         case INDEX_op_insn_start:
2323             /* Never remove -- we need to keep these for unwind.  */
2324             remove = false;
2325             break;
2326 
2327         default:
2328             break;
2329         }
2330 
2331         if (remove) {
2332             tcg_op_remove(s, op);
2333         }
2334     }
2335 }
2336 
2337 #define TS_DEAD  1
2338 #define TS_MEM   2
2339 
2340 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2341 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2342 
2343 /* For liveness_pass_1, the register preferences for a given temp.  */
2344 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2345 {
2346     return ts->state_ptr;
2347 }
2348 
2349 /* For liveness_pass_1, reset the preferences for a given temp to the
2350  * maximal regset for its type.
2351  */
2352 static inline void la_reset_pref(TCGTemp *ts)
2353 {
2354     *la_temp_pref(ts)
2355         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2356 }
2357 
2358 /* liveness analysis: end of function: all temps are dead, and globals
2359    should be in memory. */
2360 static void la_func_end(TCGContext *s, int ng, int nt)
2361 {
2362     int i;
2363 
2364     for (i = 0; i < ng; ++i) {
2365         s->temps[i].state = TS_DEAD | TS_MEM;
2366         la_reset_pref(&s->temps[i]);
2367     }
2368     for (i = ng; i < nt; ++i) {
2369         s->temps[i].state = TS_DEAD;
2370         la_reset_pref(&s->temps[i]);
2371     }
2372 }
2373 
2374 /* liveness analysis: end of basic block: all temps are dead, globals
2375    and local temps should be in memory. */
2376 static void la_bb_end(TCGContext *s, int ng, int nt)
2377 {
2378     int i;
2379 
2380     for (i = 0; i < nt; ++i) {
2381         TCGTemp *ts = &s->temps[i];
2382         int state;
2383 
2384         switch (ts->kind) {
2385         case TEMP_FIXED:
2386         case TEMP_GLOBAL:
2387         case TEMP_LOCAL:
2388             state = TS_DEAD | TS_MEM;
2389             break;
2390         case TEMP_NORMAL:
2391         case TEMP_EBB:
2392         case TEMP_CONST:
2393             state = TS_DEAD;
2394             break;
2395         default:
2396             g_assert_not_reached();
2397         }
2398         ts->state = state;
2399         la_reset_pref(ts);
2400     }
2401 }
2402 
2403 /* liveness analysis: sync globals back to memory.  */
2404 static void la_global_sync(TCGContext *s, int ng)
2405 {
2406     int i;
2407 
2408     for (i = 0; i < ng; ++i) {
2409         int state = s->temps[i].state;
2410         s->temps[i].state = state | TS_MEM;
2411         if (state == TS_DEAD) {
2412             /* If the global was previously dead, reset prefs.  */
2413             la_reset_pref(&s->temps[i]);
2414         }
2415     }
2416 }
2417 
2418 /*
2419  * liveness analysis: conditional branch: all temps are dead unless
2420  * explicitly live-across-conditional-branch, globals and local temps
2421  * should be synced.
2422  */
2423 static void la_bb_sync(TCGContext *s, int ng, int nt)
2424 {
2425     la_global_sync(s, ng);
2426 
2427     for (int i = ng; i < nt; ++i) {
2428         TCGTemp *ts = &s->temps[i];
2429         int state;
2430 
2431         switch (ts->kind) {
2432         case TEMP_LOCAL:
2433             state = ts->state;
2434             ts->state = state | TS_MEM;
2435             if (state != TS_DEAD) {
2436                 continue;
2437             }
2438             break;
2439         case TEMP_NORMAL:
2440             s->temps[i].state = TS_DEAD;
2441             break;
2442         case TEMP_EBB:
2443         case TEMP_CONST:
2444             continue;
2445         default:
2446             g_assert_not_reached();
2447         }
2448         la_reset_pref(&s->temps[i]);
2449     }
2450 }
2451 
2452 /* liveness analysis: sync globals back to memory and kill.  */
2453 static void la_global_kill(TCGContext *s, int ng)
2454 {
2455     int i;
2456 
2457     for (i = 0; i < ng; i++) {
2458         s->temps[i].state = TS_DEAD | TS_MEM;
2459         la_reset_pref(&s->temps[i]);
2460     }
2461 }
2462 
2463 /* liveness analysis: note live globals crossing calls.  */
2464 static void la_cross_call(TCGContext *s, int nt)
2465 {
2466     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2467     int i;
2468 
2469     for (i = 0; i < nt; i++) {
2470         TCGTemp *ts = &s->temps[i];
2471         if (!(ts->state & TS_DEAD)) {
2472             TCGRegSet *pset = la_temp_pref(ts);
2473             TCGRegSet set = *pset;
2474 
2475             set &= mask;
2476             /* If the combination is not possible, restart.  */
2477             if (set == 0) {
2478                 set = tcg_target_available_regs[ts->type] & mask;
2479             }
2480             *pset = set;
2481         }
2482     }
2483 }
2484 
2485 /* Liveness analysis : update the opc_arg_life array to tell if a
2486    given input arguments is dead. Instructions updating dead
2487    temporaries are removed. */
2488 static void liveness_pass_1(TCGContext *s)
2489 {
2490     int nb_globals = s->nb_globals;
2491     int nb_temps = s->nb_temps;
2492     TCGOp *op, *op_prev;
2493     TCGRegSet *prefs;
2494     int i;
2495 
2496     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2497     for (i = 0; i < nb_temps; ++i) {
2498         s->temps[i].state_ptr = prefs + i;
2499     }
2500 
2501     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2502     la_func_end(s, nb_globals, nb_temps);
2503 
2504     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2505         int nb_iargs, nb_oargs;
2506         TCGOpcode opc_new, opc_new2;
2507         bool have_opc_new2;
2508         TCGLifeData arg_life = 0;
2509         TCGTemp *ts;
2510         TCGOpcode opc = op->opc;
2511         const TCGOpDef *def = &tcg_op_defs[opc];
2512 
2513         switch (opc) {
2514         case INDEX_op_call:
2515             {
2516                 int call_flags;
2517                 int nb_call_regs;
2518 
2519                 nb_oargs = TCGOP_CALLO(op);
2520                 nb_iargs = TCGOP_CALLI(op);
2521                 call_flags = tcg_call_flags(op);
2522 
2523                 /* pure functions can be removed if their result is unused */
2524                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2525                     for (i = 0; i < nb_oargs; i++) {
2526                         ts = arg_temp(op->args[i]);
2527                         if (ts->state != TS_DEAD) {
2528                             goto do_not_remove_call;
2529                         }
2530                     }
2531                     goto do_remove;
2532                 }
2533             do_not_remove_call:
2534 
2535                 /* Output args are dead.  */
2536                 for (i = 0; i < nb_oargs; i++) {
2537                     ts = arg_temp(op->args[i]);
2538                     if (ts->state & TS_DEAD) {
2539                         arg_life |= DEAD_ARG << i;
2540                     }
2541                     if (ts->state & TS_MEM) {
2542                         arg_life |= SYNC_ARG << i;
2543                     }
2544                     ts->state = TS_DEAD;
2545                     la_reset_pref(ts);
2546 
2547                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2548                     op->output_pref[i] = 0;
2549                 }
2550 
2551                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2552                                     TCG_CALL_NO_READ_GLOBALS))) {
2553                     la_global_kill(s, nb_globals);
2554                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2555                     la_global_sync(s, nb_globals);
2556                 }
2557 
2558                 /* Record arguments that die in this helper.  */
2559                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2560                     ts = arg_temp(op->args[i]);
2561                     if (ts && ts->state & TS_DEAD) {
2562                         arg_life |= DEAD_ARG << i;
2563                     }
2564                 }
2565 
2566                 /* For all live registers, remove call-clobbered prefs.  */
2567                 la_cross_call(s, nb_temps);
2568 
2569                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2570 
2571                 /* Input arguments are live for preceding opcodes.  */
2572                 for (i = 0; i < nb_iargs; i++) {
2573                     ts = arg_temp(op->args[i + nb_oargs]);
2574                     if (ts && ts->state & TS_DEAD) {
2575                         /* For those arguments that die, and will be allocated
2576                          * in registers, clear the register set for that arg,
2577                          * to be filled in below.  For args that will be on
2578                          * the stack, reset to any available reg.
2579                          */
2580                         *la_temp_pref(ts)
2581                             = (i < nb_call_regs ? 0 :
2582                                tcg_target_available_regs[ts->type]);
2583                         ts->state &= ~TS_DEAD;
2584                     }
2585                 }
2586 
2587                 /* For each input argument, add its input register to prefs.
2588                    If a temp is used once, this produces a single set bit.  */
2589                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2590                     ts = arg_temp(op->args[i + nb_oargs]);
2591                     if (ts) {
2592                         tcg_regset_set_reg(*la_temp_pref(ts),
2593                                            tcg_target_call_iarg_regs[i]);
2594                     }
2595                 }
2596             }
2597             break;
2598         case INDEX_op_insn_start:
2599             break;
2600         case INDEX_op_discard:
2601             /* mark the temporary as dead */
2602             ts = arg_temp(op->args[0]);
2603             ts->state = TS_DEAD;
2604             la_reset_pref(ts);
2605             break;
2606 
2607         case INDEX_op_add2_i32:
2608             opc_new = INDEX_op_add_i32;
2609             goto do_addsub2;
2610         case INDEX_op_sub2_i32:
2611             opc_new = INDEX_op_sub_i32;
2612             goto do_addsub2;
2613         case INDEX_op_add2_i64:
2614             opc_new = INDEX_op_add_i64;
2615             goto do_addsub2;
2616         case INDEX_op_sub2_i64:
2617             opc_new = INDEX_op_sub_i64;
2618         do_addsub2:
2619             nb_iargs = 4;
2620             nb_oargs = 2;
2621             /* Test if the high part of the operation is dead, but not
2622                the low part.  The result can be optimized to a simple
2623                add or sub.  This happens often for x86_64 guest when the
2624                cpu mode is set to 32 bit.  */
2625             if (arg_temp(op->args[1])->state == TS_DEAD) {
2626                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2627                     goto do_remove;
2628                 }
2629                 /* Replace the opcode and adjust the args in place,
2630                    leaving 3 unused args at the end.  */
2631                 op->opc = opc = opc_new;
2632                 op->args[1] = op->args[2];
2633                 op->args[2] = op->args[4];
2634                 /* Fall through and mark the single-word operation live.  */
2635                 nb_iargs = 2;
2636                 nb_oargs = 1;
2637             }
2638             goto do_not_remove;
2639 
2640         case INDEX_op_mulu2_i32:
2641             opc_new = INDEX_op_mul_i32;
2642             opc_new2 = INDEX_op_muluh_i32;
2643             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2644             goto do_mul2;
2645         case INDEX_op_muls2_i32:
2646             opc_new = INDEX_op_mul_i32;
2647             opc_new2 = INDEX_op_mulsh_i32;
2648             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2649             goto do_mul2;
2650         case INDEX_op_mulu2_i64:
2651             opc_new = INDEX_op_mul_i64;
2652             opc_new2 = INDEX_op_muluh_i64;
2653             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2654             goto do_mul2;
2655         case INDEX_op_muls2_i64:
2656             opc_new = INDEX_op_mul_i64;
2657             opc_new2 = INDEX_op_mulsh_i64;
2658             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2659             goto do_mul2;
2660         do_mul2:
2661             nb_iargs = 2;
2662             nb_oargs = 2;
2663             if (arg_temp(op->args[1])->state == TS_DEAD) {
2664                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2665                     /* Both parts of the operation are dead.  */
2666                     goto do_remove;
2667                 }
2668                 /* The high part of the operation is dead; generate the low. */
2669                 op->opc = opc = opc_new;
2670                 op->args[1] = op->args[2];
2671                 op->args[2] = op->args[3];
2672             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2673                 /* The low part of the operation is dead; generate the high. */
2674                 op->opc = opc = opc_new2;
2675                 op->args[0] = op->args[1];
2676                 op->args[1] = op->args[2];
2677                 op->args[2] = op->args[3];
2678             } else {
2679                 goto do_not_remove;
2680             }
2681             /* Mark the single-word operation live.  */
2682             nb_oargs = 1;
2683             goto do_not_remove;
2684 
2685         default:
2686             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2687             nb_iargs = def->nb_iargs;
2688             nb_oargs = def->nb_oargs;
2689 
2690             /* Test if the operation can be removed because all
2691                its outputs are dead. We assume that nb_oargs == 0
2692                implies side effects */
2693             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2694                 for (i = 0; i < nb_oargs; i++) {
2695                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2696                         goto do_not_remove;
2697                     }
2698                 }
2699                 goto do_remove;
2700             }
2701             goto do_not_remove;
2702 
2703         do_remove:
2704             tcg_op_remove(s, op);
2705             break;
2706 
2707         do_not_remove:
2708             for (i = 0; i < nb_oargs; i++) {
2709                 ts = arg_temp(op->args[i]);
2710 
2711                 /* Remember the preference of the uses that followed.  */
2712                 op->output_pref[i] = *la_temp_pref(ts);
2713 
2714                 /* Output args are dead.  */
2715                 if (ts->state & TS_DEAD) {
2716                     arg_life |= DEAD_ARG << i;
2717                 }
2718                 if (ts->state & TS_MEM) {
2719                     arg_life |= SYNC_ARG << i;
2720                 }
2721                 ts->state = TS_DEAD;
2722                 la_reset_pref(ts);
2723             }
2724 
2725             /* If end of basic block, update.  */
2726             if (def->flags & TCG_OPF_BB_EXIT) {
2727                 la_func_end(s, nb_globals, nb_temps);
2728             } else if (def->flags & TCG_OPF_COND_BRANCH) {
2729                 la_bb_sync(s, nb_globals, nb_temps);
2730             } else if (def->flags & TCG_OPF_BB_END) {
2731                 la_bb_end(s, nb_globals, nb_temps);
2732             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2733                 la_global_sync(s, nb_globals);
2734                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2735                     la_cross_call(s, nb_temps);
2736                 }
2737             }
2738 
2739             /* Record arguments that die in this opcode.  */
2740             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2741                 ts = arg_temp(op->args[i]);
2742                 if (ts->state & TS_DEAD) {
2743                     arg_life |= DEAD_ARG << i;
2744                 }
2745             }
2746 
2747             /* Input arguments are live for preceding opcodes.  */
2748             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2749                 ts = arg_temp(op->args[i]);
2750                 if (ts->state & TS_DEAD) {
2751                     /* For operands that were dead, initially allow
2752                        all regs for the type.  */
2753                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2754                     ts->state &= ~TS_DEAD;
2755                 }
2756             }
2757 
2758             /* Incorporate constraints for this operand.  */
2759             switch (opc) {
2760             case INDEX_op_mov_i32:
2761             case INDEX_op_mov_i64:
2762                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2763                    have proper constraints.  That said, special case
2764                    moves to propagate preferences backward.  */
2765                 if (IS_DEAD_ARG(1)) {
2766                     *la_temp_pref(arg_temp(op->args[0]))
2767                         = *la_temp_pref(arg_temp(op->args[1]));
2768                 }
2769                 break;
2770 
2771             default:
2772                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2773                     const TCGArgConstraint *ct = &def->args_ct[i];
2774                     TCGRegSet set, *pset;
2775 
2776                     ts = arg_temp(op->args[i]);
2777                     pset = la_temp_pref(ts);
2778                     set = *pset;
2779 
2780                     set &= ct->regs;
2781                     if (ct->ialias) {
2782                         set &= op->output_pref[ct->alias_index];
2783                     }
2784                     /* If the combination is not possible, restart.  */
2785                     if (set == 0) {
2786                         set = ct->regs;
2787                     }
2788                     *pset = set;
2789                 }
2790                 break;
2791             }
2792             break;
2793         }
2794         op->life = arg_life;
2795     }
2796 }
2797 
2798 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2799 static bool liveness_pass_2(TCGContext *s)
2800 {
2801     int nb_globals = s->nb_globals;
2802     int nb_temps, i;
2803     bool changes = false;
2804     TCGOp *op, *op_next;
2805 
2806     /* Create a temporary for each indirect global.  */
2807     for (i = 0; i < nb_globals; ++i) {
2808         TCGTemp *its = &s->temps[i];
2809         if (its->indirect_reg) {
2810             TCGTemp *dts = tcg_temp_alloc(s);
2811             dts->type = its->type;
2812             dts->base_type = its->base_type;
2813             dts->kind = TEMP_EBB;
2814             its->state_ptr = dts;
2815         } else {
2816             its->state_ptr = NULL;
2817         }
2818         /* All globals begin dead.  */
2819         its->state = TS_DEAD;
2820     }
2821     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2822         TCGTemp *its = &s->temps[i];
2823         its->state_ptr = NULL;
2824         its->state = TS_DEAD;
2825     }
2826 
2827     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2828         TCGOpcode opc = op->opc;
2829         const TCGOpDef *def = &tcg_op_defs[opc];
2830         TCGLifeData arg_life = op->life;
2831         int nb_iargs, nb_oargs, call_flags;
2832         TCGTemp *arg_ts, *dir_ts;
2833 
2834         if (opc == INDEX_op_call) {
2835             nb_oargs = TCGOP_CALLO(op);
2836             nb_iargs = TCGOP_CALLI(op);
2837             call_flags = tcg_call_flags(op);
2838         } else {
2839             nb_iargs = def->nb_iargs;
2840             nb_oargs = def->nb_oargs;
2841 
2842             /* Set flags similar to how calls require.  */
2843             if (def->flags & TCG_OPF_COND_BRANCH) {
2844                 /* Like reading globals: sync_globals */
2845                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2846             } else if (def->flags & TCG_OPF_BB_END) {
2847                 /* Like writing globals: save_globals */
2848                 call_flags = 0;
2849             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2850                 /* Like reading globals: sync_globals */
2851                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2852             } else {
2853                 /* No effect on globals.  */
2854                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2855                               TCG_CALL_NO_WRITE_GLOBALS);
2856             }
2857         }
2858 
2859         /* Make sure that input arguments are available.  */
2860         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2861             arg_ts = arg_temp(op->args[i]);
2862             if (arg_ts) {
2863                 dir_ts = arg_ts->state_ptr;
2864                 if (dir_ts && arg_ts->state == TS_DEAD) {
2865                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2866                                       ? INDEX_op_ld_i32
2867                                       : INDEX_op_ld_i64);
2868                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2869 
2870                     lop->args[0] = temp_arg(dir_ts);
2871                     lop->args[1] = temp_arg(arg_ts->mem_base);
2872                     lop->args[2] = arg_ts->mem_offset;
2873 
2874                     /* Loaded, but synced with memory.  */
2875                     arg_ts->state = TS_MEM;
2876                 }
2877             }
2878         }
2879 
2880         /* Perform input replacement, and mark inputs that became dead.
2881            No action is required except keeping temp_state up to date
2882            so that we reload when needed.  */
2883         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2884             arg_ts = arg_temp(op->args[i]);
2885             if (arg_ts) {
2886                 dir_ts = arg_ts->state_ptr;
2887                 if (dir_ts) {
2888                     op->args[i] = temp_arg(dir_ts);
2889                     changes = true;
2890                     if (IS_DEAD_ARG(i)) {
2891                         arg_ts->state = TS_DEAD;
2892                     }
2893                 }
2894             }
2895         }
2896 
2897         /* Liveness analysis should ensure that the following are
2898            all correct, for call sites and basic block end points.  */
2899         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2900             /* Nothing to do */
2901         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2902             for (i = 0; i < nb_globals; ++i) {
2903                 /* Liveness should see that globals are synced back,
2904                    that is, either TS_DEAD or TS_MEM.  */
2905                 arg_ts = &s->temps[i];
2906                 tcg_debug_assert(arg_ts->state_ptr == 0
2907                                  || arg_ts->state != 0);
2908             }
2909         } else {
2910             for (i = 0; i < nb_globals; ++i) {
2911                 /* Liveness should see that globals are saved back,
2912                    that is, TS_DEAD, waiting to be reloaded.  */
2913                 arg_ts = &s->temps[i];
2914                 tcg_debug_assert(arg_ts->state_ptr == 0
2915                                  || arg_ts->state == TS_DEAD);
2916             }
2917         }
2918 
2919         /* Outputs become available.  */
2920         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2921             arg_ts = arg_temp(op->args[0]);
2922             dir_ts = arg_ts->state_ptr;
2923             if (dir_ts) {
2924                 op->args[0] = temp_arg(dir_ts);
2925                 changes = true;
2926 
2927                 /* The output is now live and modified.  */
2928                 arg_ts->state = 0;
2929 
2930                 if (NEED_SYNC_ARG(0)) {
2931                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2932                                       ? INDEX_op_st_i32
2933                                       : INDEX_op_st_i64);
2934                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2935                     TCGTemp *out_ts = dir_ts;
2936 
2937                     if (IS_DEAD_ARG(0)) {
2938                         out_ts = arg_temp(op->args[1]);
2939                         arg_ts->state = TS_DEAD;
2940                         tcg_op_remove(s, op);
2941                     } else {
2942                         arg_ts->state = TS_MEM;
2943                     }
2944 
2945                     sop->args[0] = temp_arg(out_ts);
2946                     sop->args[1] = temp_arg(arg_ts->mem_base);
2947                     sop->args[2] = arg_ts->mem_offset;
2948                 } else {
2949                     tcg_debug_assert(!IS_DEAD_ARG(0));
2950                 }
2951             }
2952         } else {
2953             for (i = 0; i < nb_oargs; i++) {
2954                 arg_ts = arg_temp(op->args[i]);
2955                 dir_ts = arg_ts->state_ptr;
2956                 if (!dir_ts) {
2957                     continue;
2958                 }
2959                 op->args[i] = temp_arg(dir_ts);
2960                 changes = true;
2961 
2962                 /* The output is now live and modified.  */
2963                 arg_ts->state = 0;
2964 
2965                 /* Sync outputs upon their last write.  */
2966                 if (NEED_SYNC_ARG(i)) {
2967                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2968                                       ? INDEX_op_st_i32
2969                                       : INDEX_op_st_i64);
2970                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2971 
2972                     sop->args[0] = temp_arg(dir_ts);
2973                     sop->args[1] = temp_arg(arg_ts->mem_base);
2974                     sop->args[2] = arg_ts->mem_offset;
2975 
2976                     arg_ts->state = TS_MEM;
2977                 }
2978                 /* Drop outputs that are dead.  */
2979                 if (IS_DEAD_ARG(i)) {
2980                     arg_ts->state = TS_DEAD;
2981                 }
2982             }
2983         }
2984     }
2985 
2986     return changes;
2987 }
2988 
2989 #ifdef CONFIG_DEBUG_TCG
2990 static void dump_regs(TCGContext *s)
2991 {
2992     TCGTemp *ts;
2993     int i;
2994     char buf[64];
2995 
2996     for(i = 0; i < s->nb_temps; i++) {
2997         ts = &s->temps[i];
2998         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2999         switch(ts->val_type) {
3000         case TEMP_VAL_REG:
3001             printf("%s", tcg_target_reg_names[ts->reg]);
3002             break;
3003         case TEMP_VAL_MEM:
3004             printf("%d(%s)", (int)ts->mem_offset,
3005                    tcg_target_reg_names[ts->mem_base->reg]);
3006             break;
3007         case TEMP_VAL_CONST:
3008             printf("$0x%" PRIx64, ts->val);
3009             break;
3010         case TEMP_VAL_DEAD:
3011             printf("D");
3012             break;
3013         default:
3014             printf("???");
3015             break;
3016         }
3017         printf("\n");
3018     }
3019 
3020     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3021         if (s->reg_to_temp[i] != NULL) {
3022             printf("%s: %s\n",
3023                    tcg_target_reg_names[i],
3024                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3025         }
3026     }
3027 }
3028 
3029 static void check_regs(TCGContext *s)
3030 {
3031     int reg;
3032     int k;
3033     TCGTemp *ts;
3034     char buf[64];
3035 
3036     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3037         ts = s->reg_to_temp[reg];
3038         if (ts != NULL) {
3039             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3040                 printf("Inconsistency for register %s:\n",
3041                        tcg_target_reg_names[reg]);
3042                 goto fail;
3043             }
3044         }
3045     }
3046     for (k = 0; k < s->nb_temps; k++) {
3047         ts = &s->temps[k];
3048         if (ts->val_type == TEMP_VAL_REG
3049             && ts->kind != TEMP_FIXED
3050             && s->reg_to_temp[ts->reg] != ts) {
3051             printf("Inconsistency for temp %s:\n",
3052                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3053         fail:
3054             printf("reg state:\n");
3055             dump_regs(s);
3056             tcg_abort();
3057         }
3058     }
3059 }
3060 #endif
3061 
3062 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3063 {
3064     intptr_t off, size, align;
3065 
3066     switch (ts->type) {
3067     case TCG_TYPE_I32:
3068         size = align = 4;
3069         break;
3070     case TCG_TYPE_I64:
3071     case TCG_TYPE_V64:
3072         size = align = 8;
3073         break;
3074     case TCG_TYPE_V128:
3075         size = align = 16;
3076         break;
3077     case TCG_TYPE_V256:
3078         /* Note that we do not require aligned storage for V256. */
3079         size = 32, align = 16;
3080         break;
3081     default:
3082         g_assert_not_reached();
3083     }
3084 
3085     /*
3086      * Assume the stack is sufficiently aligned.
3087      * This affects e.g. ARM NEON, where we have 8 byte stack alignment
3088      * and do not require 16 byte vector alignment.  This seems slightly
3089      * easier than fully parameterizing the above switch statement.
3090      */
3091     align = MIN(TCG_TARGET_STACK_ALIGN, align);
3092     off = ROUND_UP(s->current_frame_offset, align);
3093 
3094     /* If we've exhausted the stack frame, restart with a smaller TB. */
3095     if (off + size > s->frame_end) {
3096         tcg_raise_tb_overflow(s);
3097     }
3098     s->current_frame_offset = off + size;
3099 
3100     ts->mem_offset = off;
3101 #if defined(__sparc__)
3102     ts->mem_offset += TCG_TARGET_STACK_BIAS;
3103 #endif
3104     ts->mem_base = s->frame_temp;
3105     ts->mem_allocated = 1;
3106 }
3107 
3108 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3109 
3110 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3111    mark it free; otherwise mark it dead.  */
3112 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3113 {
3114     TCGTempVal new_type;
3115 
3116     switch (ts->kind) {
3117     case TEMP_FIXED:
3118         return;
3119     case TEMP_GLOBAL:
3120     case TEMP_LOCAL:
3121         new_type = TEMP_VAL_MEM;
3122         break;
3123     case TEMP_NORMAL:
3124     case TEMP_EBB:
3125         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3126         break;
3127     case TEMP_CONST:
3128         new_type = TEMP_VAL_CONST;
3129         break;
3130     default:
3131         g_assert_not_reached();
3132     }
3133     if (ts->val_type == TEMP_VAL_REG) {
3134         s->reg_to_temp[ts->reg] = NULL;
3135     }
3136     ts->val_type = new_type;
3137 }
3138 
3139 /* Mark a temporary as dead.  */
3140 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3141 {
3142     temp_free_or_dead(s, ts, 1);
3143 }
3144 
3145 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3146    registers needs to be allocated to store a constant.  If 'free_or_dead'
3147    is non-zero, subsequently release the temporary; if it is positive, the
3148    temp is dead; if it is negative, the temp is free.  */
3149 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3150                       TCGRegSet preferred_regs, int free_or_dead)
3151 {
3152     if (!temp_readonly(ts) && !ts->mem_coherent) {
3153         if (!ts->mem_allocated) {
3154             temp_allocate_frame(s, ts);
3155         }
3156         switch (ts->val_type) {
3157         case TEMP_VAL_CONST:
3158             /* If we're going to free the temp immediately, then we won't
3159                require it later in a register, so attempt to store the
3160                constant to memory directly.  */
3161             if (free_or_dead
3162                 && tcg_out_sti(s, ts->type, ts->val,
3163                                ts->mem_base->reg, ts->mem_offset)) {
3164                 break;
3165             }
3166             temp_load(s, ts, tcg_target_available_regs[ts->type],
3167                       allocated_regs, preferred_regs);
3168             /* fallthrough */
3169 
3170         case TEMP_VAL_REG:
3171             tcg_out_st(s, ts->type, ts->reg,
3172                        ts->mem_base->reg, ts->mem_offset);
3173             break;
3174 
3175         case TEMP_VAL_MEM:
3176             break;
3177 
3178         case TEMP_VAL_DEAD:
3179         default:
3180             tcg_abort();
3181         }
3182         ts->mem_coherent = 1;
3183     }
3184     if (free_or_dead) {
3185         temp_free_or_dead(s, ts, free_or_dead);
3186     }
3187 }
3188 
3189 /* free register 'reg' by spilling the corresponding temporary if necessary */
3190 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3191 {
3192     TCGTemp *ts = s->reg_to_temp[reg];
3193     if (ts != NULL) {
3194         temp_sync(s, ts, allocated_regs, 0, -1);
3195     }
3196 }
3197 
3198 /**
3199  * tcg_reg_alloc:
3200  * @required_regs: Set of registers in which we must allocate.
3201  * @allocated_regs: Set of registers which must be avoided.
3202  * @preferred_regs: Set of registers we should prefer.
3203  * @rev: True if we search the registers in "indirect" order.
3204  *
3205  * The allocated register must be in @required_regs & ~@allocated_regs,
3206  * but if we can put it in @preferred_regs we may save a move later.
3207  */
3208 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3209                             TCGRegSet allocated_regs,
3210                             TCGRegSet preferred_regs, bool rev)
3211 {
3212     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3213     TCGRegSet reg_ct[2];
3214     const int *order;
3215 
3216     reg_ct[1] = required_regs & ~allocated_regs;
3217     tcg_debug_assert(reg_ct[1] != 0);
3218     reg_ct[0] = reg_ct[1] & preferred_regs;
3219 
3220     /* Skip the preferred_regs option if it cannot be satisfied,
3221        or if the preference made no difference.  */
3222     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3223 
3224     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3225 
3226     /* Try free registers, preferences first.  */
3227     for (j = f; j < 2; j++) {
3228         TCGRegSet set = reg_ct[j];
3229 
3230         if (tcg_regset_single(set)) {
3231             /* One register in the set.  */
3232             TCGReg reg = tcg_regset_first(set);
3233             if (s->reg_to_temp[reg] == NULL) {
3234                 return reg;
3235             }
3236         } else {
3237             for (i = 0; i < n; i++) {
3238                 TCGReg reg = order[i];
3239                 if (s->reg_to_temp[reg] == NULL &&
3240                     tcg_regset_test_reg(set, reg)) {
3241                     return reg;
3242                 }
3243             }
3244         }
3245     }
3246 
3247     /* We must spill something.  */
3248     for (j = f; j < 2; j++) {
3249         TCGRegSet set = reg_ct[j];
3250 
3251         if (tcg_regset_single(set)) {
3252             /* One register in the set.  */
3253             TCGReg reg = tcg_regset_first(set);
3254             tcg_reg_free(s, reg, allocated_regs);
3255             return reg;
3256         } else {
3257             for (i = 0; i < n; i++) {
3258                 TCGReg reg = order[i];
3259                 if (tcg_regset_test_reg(set, reg)) {
3260                     tcg_reg_free(s, reg, allocated_regs);
3261                     return reg;
3262                 }
3263             }
3264         }
3265     }
3266 
3267     tcg_abort();
3268 }
3269 
3270 /* Make sure the temporary is in a register.  If needed, allocate the register
3271    from DESIRED while avoiding ALLOCATED.  */
3272 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3273                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3274 {
3275     TCGReg reg;
3276 
3277     switch (ts->val_type) {
3278     case TEMP_VAL_REG:
3279         return;
3280     case TEMP_VAL_CONST:
3281         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3282                             preferred_regs, ts->indirect_base);
3283         if (ts->type <= TCG_TYPE_I64) {
3284             tcg_out_movi(s, ts->type, reg, ts->val);
3285         } else {
3286             uint64_t val = ts->val;
3287             MemOp vece = MO_64;
3288 
3289             /*
3290              * Find the minimal vector element that matches the constant.
3291              * The targets will, in general, have to do this search anyway,
3292              * do this generically.
3293              */
3294             if (val == dup_const(MO_8, val)) {
3295                 vece = MO_8;
3296             } else if (val == dup_const(MO_16, val)) {
3297                 vece = MO_16;
3298             } else if (val == dup_const(MO_32, val)) {
3299                 vece = MO_32;
3300             }
3301 
3302             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3303         }
3304         ts->mem_coherent = 0;
3305         break;
3306     case TEMP_VAL_MEM:
3307         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3308                             preferred_regs, ts->indirect_base);
3309         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3310         ts->mem_coherent = 1;
3311         break;
3312     case TEMP_VAL_DEAD:
3313     default:
3314         tcg_abort();
3315     }
3316     ts->reg = reg;
3317     ts->val_type = TEMP_VAL_REG;
3318     s->reg_to_temp[reg] = ts;
3319 }
3320 
3321 /* Save a temporary to memory. 'allocated_regs' is used in case a
3322    temporary registers needs to be allocated to store a constant.  */
3323 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3324 {
3325     /* The liveness analysis already ensures that globals are back
3326        in memory. Keep an tcg_debug_assert for safety. */
3327     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3328 }
3329 
3330 /* save globals to their canonical location and assume they can be
3331    modified be the following code. 'allocated_regs' is used in case a
3332    temporary registers needs to be allocated to store a constant. */
3333 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3334 {
3335     int i, n;
3336 
3337     for (i = 0, n = s->nb_globals; i < n; i++) {
3338         temp_save(s, &s->temps[i], allocated_regs);
3339     }
3340 }
3341 
3342 /* sync globals to their canonical location and assume they can be
3343    read by the following code. 'allocated_regs' is used in case a
3344    temporary registers needs to be allocated to store a constant. */
3345 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3346 {
3347     int i, n;
3348 
3349     for (i = 0, n = s->nb_globals; i < n; i++) {
3350         TCGTemp *ts = &s->temps[i];
3351         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3352                          || ts->kind == TEMP_FIXED
3353                          || ts->mem_coherent);
3354     }
3355 }
3356 
3357 /* at the end of a basic block, we assume all temporaries are dead and
3358    all globals are stored at their canonical location. */
3359 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3360 {
3361     int i;
3362 
3363     for (i = s->nb_globals; i < s->nb_temps; i++) {
3364         TCGTemp *ts = &s->temps[i];
3365 
3366         switch (ts->kind) {
3367         case TEMP_LOCAL:
3368             temp_save(s, ts, allocated_regs);
3369             break;
3370         case TEMP_NORMAL:
3371         case TEMP_EBB:
3372             /* The liveness analysis already ensures that temps are dead.
3373                Keep an tcg_debug_assert for safety. */
3374             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3375             break;
3376         case TEMP_CONST:
3377             /* Similarly, we should have freed any allocated register. */
3378             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3379             break;
3380         default:
3381             g_assert_not_reached();
3382         }
3383     }
3384 
3385     save_globals(s, allocated_regs);
3386 }
3387 
3388 /*
3389  * At a conditional branch, we assume all temporaries are dead unless
3390  * explicitly live-across-conditional-branch; all globals and local
3391  * temps are synced to their location.
3392  */
3393 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3394 {
3395     sync_globals(s, allocated_regs);
3396 
3397     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3398         TCGTemp *ts = &s->temps[i];
3399         /*
3400          * The liveness analysis already ensures that temps are dead.
3401          * Keep tcg_debug_asserts for safety.
3402          */
3403         switch (ts->kind) {
3404         case TEMP_LOCAL:
3405             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3406             break;
3407         case TEMP_NORMAL:
3408             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3409             break;
3410         case TEMP_EBB:
3411         case TEMP_CONST:
3412             break;
3413         default:
3414             g_assert_not_reached();
3415         }
3416     }
3417 }
3418 
3419 /*
3420  * Specialized code generation for INDEX_op_mov_* with a constant.
3421  */
3422 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3423                                   tcg_target_ulong val, TCGLifeData arg_life,
3424                                   TCGRegSet preferred_regs)
3425 {
3426     /* ENV should not be modified.  */
3427     tcg_debug_assert(!temp_readonly(ots));
3428 
3429     /* The movi is not explicitly generated here.  */
3430     if (ots->val_type == TEMP_VAL_REG) {
3431         s->reg_to_temp[ots->reg] = NULL;
3432     }
3433     ots->val_type = TEMP_VAL_CONST;
3434     ots->val = val;
3435     ots->mem_coherent = 0;
3436     if (NEED_SYNC_ARG(0)) {
3437         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3438     } else if (IS_DEAD_ARG(0)) {
3439         temp_dead(s, ots);
3440     }
3441 }
3442 
3443 /*
3444  * Specialized code generation for INDEX_op_mov_*.
3445  */
3446 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3447 {
3448     const TCGLifeData arg_life = op->life;
3449     TCGRegSet allocated_regs, preferred_regs;
3450     TCGTemp *ts, *ots;
3451     TCGType otype, itype;
3452 
3453     allocated_regs = s->reserved_regs;
3454     preferred_regs = op->output_pref[0];
3455     ots = arg_temp(op->args[0]);
3456     ts = arg_temp(op->args[1]);
3457 
3458     /* ENV should not be modified.  */
3459     tcg_debug_assert(!temp_readonly(ots));
3460 
3461     /* Note that otype != itype for no-op truncation.  */
3462     otype = ots->type;
3463     itype = ts->type;
3464 
3465     if (ts->val_type == TEMP_VAL_CONST) {
3466         /* propagate constant or generate sti */
3467         tcg_target_ulong val = ts->val;
3468         if (IS_DEAD_ARG(1)) {
3469             temp_dead(s, ts);
3470         }
3471         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3472         return;
3473     }
3474 
3475     /* If the source value is in memory we're going to be forced
3476        to have it in a register in order to perform the copy.  Copy
3477        the SOURCE value into its own register first, that way we
3478        don't have to reload SOURCE the next time it is used. */
3479     if (ts->val_type == TEMP_VAL_MEM) {
3480         temp_load(s, ts, tcg_target_available_regs[itype],
3481                   allocated_regs, preferred_regs);
3482     }
3483 
3484     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3485     if (IS_DEAD_ARG(0)) {
3486         /* mov to a non-saved dead register makes no sense (even with
3487            liveness analysis disabled). */
3488         tcg_debug_assert(NEED_SYNC_ARG(0));
3489         if (!ots->mem_allocated) {
3490             temp_allocate_frame(s, ots);
3491         }
3492         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3493         if (IS_DEAD_ARG(1)) {
3494             temp_dead(s, ts);
3495         }
3496         temp_dead(s, ots);
3497     } else {
3498         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3499             /* the mov can be suppressed */
3500             if (ots->val_type == TEMP_VAL_REG) {
3501                 s->reg_to_temp[ots->reg] = NULL;
3502             }
3503             ots->reg = ts->reg;
3504             temp_dead(s, ts);
3505         } else {
3506             if (ots->val_type != TEMP_VAL_REG) {
3507                 /* When allocating a new register, make sure to not spill the
3508                    input one. */
3509                 tcg_regset_set_reg(allocated_regs, ts->reg);
3510                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3511                                          allocated_regs, preferred_regs,
3512                                          ots->indirect_base);
3513             }
3514             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3515                 /*
3516                  * Cross register class move not supported.
3517                  * Store the source register into the destination slot
3518                  * and leave the destination temp as TEMP_VAL_MEM.
3519                  */
3520                 assert(!temp_readonly(ots));
3521                 if (!ts->mem_allocated) {
3522                     temp_allocate_frame(s, ots);
3523                 }
3524                 tcg_out_st(s, ts->type, ts->reg,
3525                            ots->mem_base->reg, ots->mem_offset);
3526                 ots->mem_coherent = 1;
3527                 temp_free_or_dead(s, ots, -1);
3528                 return;
3529             }
3530         }
3531         ots->val_type = TEMP_VAL_REG;
3532         ots->mem_coherent = 0;
3533         s->reg_to_temp[ots->reg] = ots;
3534         if (NEED_SYNC_ARG(0)) {
3535             temp_sync(s, ots, allocated_regs, 0, 0);
3536         }
3537     }
3538 }
3539 
3540 /*
3541  * Specialized code generation for INDEX_op_dup_vec.
3542  */
3543 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3544 {
3545     const TCGLifeData arg_life = op->life;
3546     TCGRegSet dup_out_regs, dup_in_regs;
3547     TCGTemp *its, *ots;
3548     TCGType itype, vtype;
3549     intptr_t endian_fixup;
3550     unsigned vece;
3551     bool ok;
3552 
3553     ots = arg_temp(op->args[0]);
3554     its = arg_temp(op->args[1]);
3555 
3556     /* ENV should not be modified.  */
3557     tcg_debug_assert(!temp_readonly(ots));
3558 
3559     itype = its->type;
3560     vece = TCGOP_VECE(op);
3561     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3562 
3563     if (its->val_type == TEMP_VAL_CONST) {
3564         /* Propagate constant via movi -> dupi.  */
3565         tcg_target_ulong val = its->val;
3566         if (IS_DEAD_ARG(1)) {
3567             temp_dead(s, its);
3568         }
3569         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3570         return;
3571     }
3572 
3573     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3574     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3575 
3576     /* Allocate the output register now.  */
3577     if (ots->val_type != TEMP_VAL_REG) {
3578         TCGRegSet allocated_regs = s->reserved_regs;
3579 
3580         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3581             /* Make sure to not spill the input register. */
3582             tcg_regset_set_reg(allocated_regs, its->reg);
3583         }
3584         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3585                                  op->output_pref[0], ots->indirect_base);
3586         ots->val_type = TEMP_VAL_REG;
3587         ots->mem_coherent = 0;
3588         s->reg_to_temp[ots->reg] = ots;
3589     }
3590 
3591     switch (its->val_type) {
3592     case TEMP_VAL_REG:
3593         /*
3594          * The dup constriaints must be broad, covering all possible VECE.
3595          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3596          * to fail, indicating that extra moves are required for that case.
3597          */
3598         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3599             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3600                 goto done;
3601             }
3602             /* Try again from memory or a vector input register.  */
3603         }
3604         if (!its->mem_coherent) {
3605             /*
3606              * The input register is not synced, and so an extra store
3607              * would be required to use memory.  Attempt an integer-vector
3608              * register move first.  We do not have a TCGRegSet for this.
3609              */
3610             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3611                 break;
3612             }
3613             /* Sync the temp back to its slot and load from there.  */
3614             temp_sync(s, its, s->reserved_regs, 0, 0);
3615         }
3616         /* fall through */
3617 
3618     case TEMP_VAL_MEM:
3619 #if HOST_BIG_ENDIAN
3620         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3621         endian_fixup -= 1 << vece;
3622 #else
3623         endian_fixup = 0;
3624 #endif
3625         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3626                              its->mem_offset + endian_fixup)) {
3627             goto done;
3628         }
3629         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3630         break;
3631 
3632     default:
3633         g_assert_not_reached();
3634     }
3635 
3636     /* We now have a vector input register, so dup must succeed. */
3637     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3638     tcg_debug_assert(ok);
3639 
3640  done:
3641     if (IS_DEAD_ARG(1)) {
3642         temp_dead(s, its);
3643     }
3644     if (NEED_SYNC_ARG(0)) {
3645         temp_sync(s, ots, s->reserved_regs, 0, 0);
3646     }
3647     if (IS_DEAD_ARG(0)) {
3648         temp_dead(s, ots);
3649     }
3650 }
3651 
3652 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3653 {
3654     const TCGLifeData arg_life = op->life;
3655     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3656     TCGRegSet i_allocated_regs;
3657     TCGRegSet o_allocated_regs;
3658     int i, k, nb_iargs, nb_oargs;
3659     TCGReg reg;
3660     TCGArg arg;
3661     const TCGArgConstraint *arg_ct;
3662     TCGTemp *ts;
3663     TCGArg new_args[TCG_MAX_OP_ARGS];
3664     int const_args[TCG_MAX_OP_ARGS];
3665 
3666     nb_oargs = def->nb_oargs;
3667     nb_iargs = def->nb_iargs;
3668 
3669     /* copy constants */
3670     memcpy(new_args + nb_oargs + nb_iargs,
3671            op->args + nb_oargs + nb_iargs,
3672            sizeof(TCGArg) * def->nb_cargs);
3673 
3674     i_allocated_regs = s->reserved_regs;
3675     o_allocated_regs = s->reserved_regs;
3676 
3677     /* satisfy input constraints */
3678     for (k = 0; k < nb_iargs; k++) {
3679         TCGRegSet i_preferred_regs, o_preferred_regs;
3680 
3681         i = def->args_ct[nb_oargs + k].sort_index;
3682         arg = op->args[i];
3683         arg_ct = &def->args_ct[i];
3684         ts = arg_temp(arg);
3685 
3686         if (ts->val_type == TEMP_VAL_CONST
3687             && tcg_target_const_match(ts->val, ts->type, arg_ct->ct)) {
3688             /* constant is OK for instruction */
3689             const_args[i] = 1;
3690             new_args[i] = ts->val;
3691             continue;
3692         }
3693 
3694         i_preferred_regs = o_preferred_regs = 0;
3695         if (arg_ct->ialias) {
3696             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3697 
3698             /*
3699              * If the input is readonly, then it cannot also be an
3700              * output and aliased to itself.  If the input is not
3701              * dead after the instruction, we must allocate a new
3702              * register and move it.
3703              */
3704             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
3705                 goto allocate_in_reg;
3706             }
3707 
3708             /*
3709              * Check if the current register has already been allocated
3710              * for another input aliased to an output.
3711              */
3712             if (ts->val_type == TEMP_VAL_REG) {
3713                 reg = ts->reg;
3714                 for (int k2 = 0; k2 < k; k2++) {
3715                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
3716                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3717                         goto allocate_in_reg;
3718                     }
3719                 }
3720             }
3721             i_preferred_regs = o_preferred_regs;
3722         }
3723 
3724         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3725         reg = ts->reg;
3726 
3727         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
3728  allocate_in_reg:
3729             /*
3730              * Allocate a new register matching the constraint
3731              * and move the temporary register into it.
3732              */
3733             temp_load(s, ts, tcg_target_available_regs[ts->type],
3734                       i_allocated_regs, 0);
3735             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3736                                 o_preferred_regs, ts->indirect_base);
3737             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3738                 /*
3739                  * Cross register class move not supported.  Sync the
3740                  * temp back to its slot and load from there.
3741                  */
3742                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3743                 tcg_out_ld(s, ts->type, reg,
3744                            ts->mem_base->reg, ts->mem_offset);
3745             }
3746         }
3747         new_args[i] = reg;
3748         const_args[i] = 0;
3749         tcg_regset_set_reg(i_allocated_regs, reg);
3750     }
3751 
3752     /* mark dead temporaries and free the associated registers */
3753     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3754         if (IS_DEAD_ARG(i)) {
3755             temp_dead(s, arg_temp(op->args[i]));
3756         }
3757     }
3758 
3759     if (def->flags & TCG_OPF_COND_BRANCH) {
3760         tcg_reg_alloc_cbranch(s, i_allocated_regs);
3761     } else if (def->flags & TCG_OPF_BB_END) {
3762         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3763     } else {
3764         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3765             /* XXX: permit generic clobber register list ? */
3766             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3767                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3768                     tcg_reg_free(s, i, i_allocated_regs);
3769                 }
3770             }
3771         }
3772         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3773             /* sync globals if the op has side effects and might trigger
3774                an exception. */
3775             sync_globals(s, i_allocated_regs);
3776         }
3777 
3778         /* satisfy the output constraints */
3779         for(k = 0; k < nb_oargs; k++) {
3780             i = def->args_ct[k].sort_index;
3781             arg = op->args[i];
3782             arg_ct = &def->args_ct[i];
3783             ts = arg_temp(arg);
3784 
3785             /* ENV should not be modified.  */
3786             tcg_debug_assert(!temp_readonly(ts));
3787 
3788             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3789                 reg = new_args[arg_ct->alias_index];
3790             } else if (arg_ct->newreg) {
3791                 reg = tcg_reg_alloc(s, arg_ct->regs,
3792                                     i_allocated_regs | o_allocated_regs,
3793                                     op->output_pref[k], ts->indirect_base);
3794             } else {
3795                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3796                                     op->output_pref[k], ts->indirect_base);
3797             }
3798             tcg_regset_set_reg(o_allocated_regs, reg);
3799             if (ts->val_type == TEMP_VAL_REG) {
3800                 s->reg_to_temp[ts->reg] = NULL;
3801             }
3802             ts->val_type = TEMP_VAL_REG;
3803             ts->reg = reg;
3804             /*
3805              * Temp value is modified, so the value kept in memory is
3806              * potentially not the same.
3807              */
3808             ts->mem_coherent = 0;
3809             s->reg_to_temp[reg] = ts;
3810             new_args[i] = reg;
3811         }
3812     }
3813 
3814     /* emit instruction */
3815     if (def->flags & TCG_OPF_VECTOR) {
3816         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3817                        new_args, const_args);
3818     } else {
3819         tcg_out_op(s, op->opc, new_args, const_args);
3820     }
3821 
3822     /* move the outputs in the correct register if needed */
3823     for(i = 0; i < nb_oargs; i++) {
3824         ts = arg_temp(op->args[i]);
3825 
3826         /* ENV should not be modified.  */
3827         tcg_debug_assert(!temp_readonly(ts));
3828 
3829         if (NEED_SYNC_ARG(i)) {
3830             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3831         } else if (IS_DEAD_ARG(i)) {
3832             temp_dead(s, ts);
3833         }
3834     }
3835 }
3836 
3837 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
3838 {
3839     const TCGLifeData arg_life = op->life;
3840     TCGTemp *ots, *itsl, *itsh;
3841     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3842 
3843     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
3844     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
3845     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
3846 
3847     ots = arg_temp(op->args[0]);
3848     itsl = arg_temp(op->args[1]);
3849     itsh = arg_temp(op->args[2]);
3850 
3851     /* ENV should not be modified.  */
3852     tcg_debug_assert(!temp_readonly(ots));
3853 
3854     /* Allocate the output register now.  */
3855     if (ots->val_type != TEMP_VAL_REG) {
3856         TCGRegSet allocated_regs = s->reserved_regs;
3857         TCGRegSet dup_out_regs =
3858             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3859 
3860         /* Make sure to not spill the input registers. */
3861         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
3862             tcg_regset_set_reg(allocated_regs, itsl->reg);
3863         }
3864         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
3865             tcg_regset_set_reg(allocated_regs, itsh->reg);
3866         }
3867 
3868         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3869                                  op->output_pref[0], ots->indirect_base);
3870         ots->val_type = TEMP_VAL_REG;
3871         ots->mem_coherent = 0;
3872         s->reg_to_temp[ots->reg] = ots;
3873     }
3874 
3875     /* Promote dup2 of immediates to dupi_vec. */
3876     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
3877         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
3878         MemOp vece = MO_64;
3879 
3880         if (val == dup_const(MO_8, val)) {
3881             vece = MO_8;
3882         } else if (val == dup_const(MO_16, val)) {
3883             vece = MO_16;
3884         } else if (val == dup_const(MO_32, val)) {
3885             vece = MO_32;
3886         }
3887 
3888         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
3889         goto done;
3890     }
3891 
3892     /* If the two inputs form one 64-bit value, try dupm_vec. */
3893     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
3894         if (!itsl->mem_coherent) {
3895             temp_sync(s, itsl, s->reserved_regs, 0, 0);
3896         }
3897         if (!itsh->mem_coherent) {
3898             temp_sync(s, itsh, s->reserved_regs, 0, 0);
3899         }
3900 #if HOST_BIG_ENDIAN
3901         TCGTemp *its = itsh;
3902 #else
3903         TCGTemp *its = itsl;
3904 #endif
3905         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
3906                              its->mem_base->reg, its->mem_offset)) {
3907             goto done;
3908         }
3909     }
3910 
3911     /* Fall back to generic expansion. */
3912     return false;
3913 
3914  done:
3915     if (IS_DEAD_ARG(1)) {
3916         temp_dead(s, itsl);
3917     }
3918     if (IS_DEAD_ARG(2)) {
3919         temp_dead(s, itsh);
3920     }
3921     if (NEED_SYNC_ARG(0)) {
3922         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
3923     } else if (IS_DEAD_ARG(0)) {
3924         temp_dead(s, ots);
3925     }
3926     return true;
3927 }
3928 
3929 #ifdef TCG_TARGET_STACK_GROWSUP
3930 #define STACK_DIR(x) (-(x))
3931 #else
3932 #define STACK_DIR(x) (x)
3933 #endif
3934 
3935 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3936 {
3937     const int nb_oargs = TCGOP_CALLO(op);
3938     const int nb_iargs = TCGOP_CALLI(op);
3939     const TCGLifeData arg_life = op->life;
3940     const TCGHelperInfo *info;
3941     int flags, nb_regs, i;
3942     TCGReg reg;
3943     TCGArg arg;
3944     TCGTemp *ts;
3945     intptr_t stack_offset;
3946     size_t call_stack_size;
3947     tcg_insn_unit *func_addr;
3948     int allocate_args;
3949     TCGRegSet allocated_regs;
3950 
3951     func_addr = tcg_call_func(op);
3952     info = tcg_call_info(op);
3953     flags = info->flags;
3954 
3955     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3956     if (nb_regs > nb_iargs) {
3957         nb_regs = nb_iargs;
3958     }
3959 
3960     /* assign stack slots first */
3961     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3962     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3963         ~(TCG_TARGET_STACK_ALIGN - 1);
3964     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3965     if (allocate_args) {
3966         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3967            preallocate call stack */
3968         tcg_abort();
3969     }
3970 
3971     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3972     for (i = nb_regs; i < nb_iargs; i++) {
3973         arg = op->args[nb_oargs + i];
3974 #ifdef TCG_TARGET_STACK_GROWSUP
3975         stack_offset -= sizeof(tcg_target_long);
3976 #endif
3977         if (arg != TCG_CALL_DUMMY_ARG) {
3978             ts = arg_temp(arg);
3979             temp_load(s, ts, tcg_target_available_regs[ts->type],
3980                       s->reserved_regs, 0);
3981             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3982         }
3983 #ifndef TCG_TARGET_STACK_GROWSUP
3984         stack_offset += sizeof(tcg_target_long);
3985 #endif
3986     }
3987 
3988     /* assign input registers */
3989     allocated_regs = s->reserved_regs;
3990     for (i = 0; i < nb_regs; i++) {
3991         arg = op->args[nb_oargs + i];
3992         if (arg != TCG_CALL_DUMMY_ARG) {
3993             ts = arg_temp(arg);
3994             reg = tcg_target_call_iarg_regs[i];
3995 
3996             if (ts->val_type == TEMP_VAL_REG) {
3997                 if (ts->reg != reg) {
3998                     tcg_reg_free(s, reg, allocated_regs);
3999                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4000                         /*
4001                          * Cross register class move not supported.  Sync the
4002                          * temp back to its slot and load from there.
4003                          */
4004                         temp_sync(s, ts, allocated_regs, 0, 0);
4005                         tcg_out_ld(s, ts->type, reg,
4006                                    ts->mem_base->reg, ts->mem_offset);
4007                     }
4008                 }
4009             } else {
4010                 TCGRegSet arg_set = 0;
4011 
4012                 tcg_reg_free(s, reg, allocated_regs);
4013                 tcg_regset_set_reg(arg_set, reg);
4014                 temp_load(s, ts, arg_set, allocated_regs, 0);
4015             }
4016 
4017             tcg_regset_set_reg(allocated_regs, reg);
4018         }
4019     }
4020 
4021     /* mark dead temporaries and free the associated registers */
4022     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4023         if (IS_DEAD_ARG(i)) {
4024             temp_dead(s, arg_temp(op->args[i]));
4025         }
4026     }
4027 
4028     /* clobber call registers */
4029     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4030         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4031             tcg_reg_free(s, i, allocated_regs);
4032         }
4033     }
4034 
4035     /* Save globals if they might be written by the helper, sync them if
4036        they might be read. */
4037     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4038         /* Nothing to do */
4039     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4040         sync_globals(s, allocated_regs);
4041     } else {
4042         save_globals(s, allocated_regs);
4043     }
4044 
4045 #ifdef CONFIG_TCG_INTERPRETER
4046     {
4047         gpointer hash = (gpointer)(uintptr_t)info->typemask;
4048         ffi_cif *cif = g_hash_table_lookup(ffi_table, hash);
4049         assert(cif != NULL);
4050         tcg_out_call(s, func_addr, cif);
4051     }
4052 #else
4053     tcg_out_call(s, func_addr);
4054 #endif
4055 
4056     /* assign output registers and emit moves if needed */
4057     for(i = 0; i < nb_oargs; i++) {
4058         arg = op->args[i];
4059         ts = arg_temp(arg);
4060 
4061         /* ENV should not be modified.  */
4062         tcg_debug_assert(!temp_readonly(ts));
4063 
4064         reg = tcg_target_call_oarg_regs[i];
4065         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4066         if (ts->val_type == TEMP_VAL_REG) {
4067             s->reg_to_temp[ts->reg] = NULL;
4068         }
4069         ts->val_type = TEMP_VAL_REG;
4070         ts->reg = reg;
4071         ts->mem_coherent = 0;
4072         s->reg_to_temp[reg] = ts;
4073         if (NEED_SYNC_ARG(i)) {
4074             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4075         } else if (IS_DEAD_ARG(i)) {
4076             temp_dead(s, ts);
4077         }
4078     }
4079 }
4080 
4081 #ifdef CONFIG_PROFILER
4082 
4083 /* avoid copy/paste errors */
4084 #define PROF_ADD(to, from, field)                       \
4085     do {                                                \
4086         (to)->field += qatomic_read(&((from)->field));  \
4087     } while (0)
4088 
4089 #define PROF_MAX(to, from, field)                                       \
4090     do {                                                                \
4091         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4092         if (val__ > (to)->field) {                                      \
4093             (to)->field = val__;                                        \
4094         }                                                               \
4095     } while (0)
4096 
4097 /* Pass in a zero'ed @prof */
4098 static inline
4099 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4100 {
4101     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4102     unsigned int i;
4103 
4104     for (i = 0; i < n_ctxs; i++) {
4105         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4106         const TCGProfile *orig = &s->prof;
4107 
4108         if (counters) {
4109             PROF_ADD(prof, orig, cpu_exec_time);
4110             PROF_ADD(prof, orig, tb_count1);
4111             PROF_ADD(prof, orig, tb_count);
4112             PROF_ADD(prof, orig, op_count);
4113             PROF_MAX(prof, orig, op_count_max);
4114             PROF_ADD(prof, orig, temp_count);
4115             PROF_MAX(prof, orig, temp_count_max);
4116             PROF_ADD(prof, orig, del_op_count);
4117             PROF_ADD(prof, orig, code_in_len);
4118             PROF_ADD(prof, orig, code_out_len);
4119             PROF_ADD(prof, orig, search_out_len);
4120             PROF_ADD(prof, orig, interm_time);
4121             PROF_ADD(prof, orig, code_time);
4122             PROF_ADD(prof, orig, la_time);
4123             PROF_ADD(prof, orig, opt_time);
4124             PROF_ADD(prof, orig, restore_count);
4125             PROF_ADD(prof, orig, restore_time);
4126         }
4127         if (table) {
4128             int i;
4129 
4130             for (i = 0; i < NB_OPS; i++) {
4131                 PROF_ADD(prof, orig, table_op_count[i]);
4132             }
4133         }
4134     }
4135 }
4136 
4137 #undef PROF_ADD
4138 #undef PROF_MAX
4139 
4140 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4141 {
4142     tcg_profile_snapshot(prof, true, false);
4143 }
4144 
4145 static void tcg_profile_snapshot_table(TCGProfile *prof)
4146 {
4147     tcg_profile_snapshot(prof, false, true);
4148 }
4149 
4150 void tcg_dump_op_count(GString *buf)
4151 {
4152     TCGProfile prof = {};
4153     int i;
4154 
4155     tcg_profile_snapshot_table(&prof);
4156     for (i = 0; i < NB_OPS; i++) {
4157         g_string_append_printf(buf, "%s %" PRId64 "\n", tcg_op_defs[i].name,
4158                                prof.table_op_count[i]);
4159     }
4160 }
4161 
4162 int64_t tcg_cpu_exec_time(void)
4163 {
4164     unsigned int n_ctxs = qatomic_read(&tcg_cur_ctxs);
4165     unsigned int i;
4166     int64_t ret = 0;
4167 
4168     for (i = 0; i < n_ctxs; i++) {
4169         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4170         const TCGProfile *prof = &s->prof;
4171 
4172         ret += qatomic_read(&prof->cpu_exec_time);
4173     }
4174     return ret;
4175 }
4176 #else
4177 void tcg_dump_op_count(GString *buf)
4178 {
4179     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4180 }
4181 
4182 int64_t tcg_cpu_exec_time(void)
4183 {
4184     error_report("%s: TCG profiler not compiled", __func__);
4185     exit(EXIT_FAILURE);
4186 }
4187 #endif
4188 
4189 
4190 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4191 {
4192 #ifdef CONFIG_PROFILER
4193     TCGProfile *prof = &s->prof;
4194 #endif
4195     int i, num_insns;
4196     TCGOp *op;
4197 
4198 #ifdef CONFIG_PROFILER
4199     {
4200         int n = 0;
4201 
4202         QTAILQ_FOREACH(op, &s->ops, link) {
4203             n++;
4204         }
4205         qatomic_set(&prof->op_count, prof->op_count + n);
4206         if (n > prof->op_count_max) {
4207             qatomic_set(&prof->op_count_max, n);
4208         }
4209 
4210         n = s->nb_temps;
4211         qatomic_set(&prof->temp_count, prof->temp_count + n);
4212         if (n > prof->temp_count_max) {
4213             qatomic_set(&prof->temp_count_max, n);
4214         }
4215     }
4216 #endif
4217 
4218 #ifdef DEBUG_DISAS
4219     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4220                  && qemu_log_in_addr_range(tb->pc))) {
4221         FILE *logfile = qemu_log_trylock();
4222         if (logfile) {
4223             fprintf(logfile, "OP:\n");
4224             tcg_dump_ops(s, logfile, false);
4225             fprintf(logfile, "\n");
4226             qemu_log_unlock(logfile);
4227         }
4228     }
4229 #endif
4230 
4231 #ifdef CONFIG_DEBUG_TCG
4232     /* Ensure all labels referenced have been emitted.  */
4233     {
4234         TCGLabel *l;
4235         bool error = false;
4236 
4237         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4238             if (unlikely(!l->present) && l->refs) {
4239                 qemu_log_mask(CPU_LOG_TB_OP,
4240                               "$L%d referenced but not present.\n", l->id);
4241                 error = true;
4242             }
4243         }
4244         assert(!error);
4245     }
4246 #endif
4247 
4248 #ifdef CONFIG_PROFILER
4249     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4250 #endif
4251 
4252 #ifdef USE_TCG_OPTIMIZATIONS
4253     tcg_optimize(s);
4254 #endif
4255 
4256 #ifdef CONFIG_PROFILER
4257     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4258     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4259 #endif
4260 
4261     reachable_code_pass(s);
4262     liveness_pass_1(s);
4263 
4264     if (s->nb_indirects > 0) {
4265 #ifdef DEBUG_DISAS
4266         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4267                      && qemu_log_in_addr_range(tb->pc))) {
4268             FILE *logfile = qemu_log_trylock();
4269             if (logfile) {
4270                 fprintf(logfile, "OP before indirect lowering:\n");
4271                 tcg_dump_ops(s, logfile, false);
4272                 fprintf(logfile, "\n");
4273                 qemu_log_unlock(logfile);
4274             }
4275         }
4276 #endif
4277         /* Replace indirect temps with direct temps.  */
4278         if (liveness_pass_2(s)) {
4279             /* If changes were made, re-run liveness.  */
4280             liveness_pass_1(s);
4281         }
4282     }
4283 
4284 #ifdef CONFIG_PROFILER
4285     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4286 #endif
4287 
4288 #ifdef DEBUG_DISAS
4289     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4290                  && qemu_log_in_addr_range(tb->pc))) {
4291         FILE *logfile = qemu_log_trylock();
4292         if (logfile) {
4293             fprintf(logfile, "OP after optimization and liveness analysis:\n");
4294             tcg_dump_ops(s, logfile, true);
4295             fprintf(logfile, "\n");
4296             qemu_log_unlock(logfile);
4297         }
4298     }
4299 #endif
4300 
4301     tcg_reg_alloc_start(s);
4302 
4303     /*
4304      * Reset the buffer pointers when restarting after overflow.
4305      * TODO: Move this into translate-all.c with the rest of the
4306      * buffer management.  Having only this done here is confusing.
4307      */
4308     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4309     s->code_ptr = s->code_buf;
4310 
4311 #ifdef TCG_TARGET_NEED_LDST_LABELS
4312     QSIMPLEQ_INIT(&s->ldst_labels);
4313 #endif
4314 #ifdef TCG_TARGET_NEED_POOL_LABELS
4315     s->pool_labels = NULL;
4316 #endif
4317 
4318     num_insns = -1;
4319     QTAILQ_FOREACH(op, &s->ops, link) {
4320         TCGOpcode opc = op->opc;
4321 
4322 #ifdef CONFIG_PROFILER
4323         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4324 #endif
4325 
4326         switch (opc) {
4327         case INDEX_op_mov_i32:
4328         case INDEX_op_mov_i64:
4329         case INDEX_op_mov_vec:
4330             tcg_reg_alloc_mov(s, op);
4331             break;
4332         case INDEX_op_dup_vec:
4333             tcg_reg_alloc_dup(s, op);
4334             break;
4335         case INDEX_op_insn_start:
4336             if (num_insns >= 0) {
4337                 size_t off = tcg_current_code_size(s);
4338                 s->gen_insn_end_off[num_insns] = off;
4339                 /* Assert that we do not overflow our stored offset.  */
4340                 assert(s->gen_insn_end_off[num_insns] == off);
4341             }
4342             num_insns++;
4343             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4344                 target_ulong a;
4345 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4346                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4347 #else
4348                 a = op->args[i];
4349 #endif
4350                 s->gen_insn_data[num_insns][i] = a;
4351             }
4352             break;
4353         case INDEX_op_discard:
4354             temp_dead(s, arg_temp(op->args[0]));
4355             break;
4356         case INDEX_op_set_label:
4357             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4358             tcg_out_label(s, arg_label(op->args[0]));
4359             break;
4360         case INDEX_op_call:
4361             tcg_reg_alloc_call(s, op);
4362             break;
4363         case INDEX_op_dup2_vec:
4364             if (tcg_reg_alloc_dup2(s, op)) {
4365                 break;
4366             }
4367             /* fall through */
4368         default:
4369             /* Sanity check that we've not introduced any unhandled opcodes. */
4370             tcg_debug_assert(tcg_op_supported(opc));
4371             /* Note: in order to speed up the code, it would be much
4372                faster to have specialized register allocator functions for
4373                some common argument patterns */
4374             tcg_reg_alloc_op(s, op);
4375             break;
4376         }
4377 #ifdef CONFIG_DEBUG_TCG
4378         check_regs(s);
4379 #endif
4380         /* Test for (pending) buffer overflow.  The assumption is that any
4381            one operation beginning below the high water mark cannot overrun
4382            the buffer completely.  Thus we can test for overflow after
4383            generating code without having to check during generation.  */
4384         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4385             return -1;
4386         }
4387         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4388         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4389             return -2;
4390         }
4391     }
4392     tcg_debug_assert(num_insns >= 0);
4393     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4394 
4395     /* Generate TB finalization at the end of block */
4396 #ifdef TCG_TARGET_NEED_LDST_LABELS
4397     i = tcg_out_ldst_finalize(s);
4398     if (i < 0) {
4399         return i;
4400     }
4401 #endif
4402 #ifdef TCG_TARGET_NEED_POOL_LABELS
4403     i = tcg_out_pool_finalize(s);
4404     if (i < 0) {
4405         return i;
4406     }
4407 #endif
4408     if (!tcg_resolve_relocs(s)) {
4409         return -2;
4410     }
4411 
4412 #ifndef CONFIG_TCG_INTERPRETER
4413     /* flush instruction cache */
4414     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4415                         (uintptr_t)s->code_buf,
4416                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4417 #endif
4418 
4419     return tcg_current_code_size(s);
4420 }
4421 
4422 #ifdef CONFIG_PROFILER
4423 void tcg_dump_info(GString *buf)
4424 {
4425     TCGProfile prof = {};
4426     const TCGProfile *s;
4427     int64_t tb_count;
4428     int64_t tb_div_count;
4429     int64_t tot;
4430 
4431     tcg_profile_snapshot_counters(&prof);
4432     s = &prof;
4433     tb_count = s->tb_count;
4434     tb_div_count = tb_count ? tb_count : 1;
4435     tot = s->interm_time + s->code_time;
4436 
4437     g_string_append_printf(buf, "JIT cycles          %" PRId64
4438                            " (%0.3f s at 2.4 GHz)\n",
4439                            tot, tot / 2.4e9);
4440     g_string_append_printf(buf, "translated TBs      %" PRId64
4441                            " (aborted=%" PRId64 " %0.1f%%)\n",
4442                            tb_count, s->tb_count1 - tb_count,
4443                            (double)(s->tb_count1 - s->tb_count)
4444                            / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4445     g_string_append_printf(buf, "avg ops/TB          %0.1f max=%d\n",
4446                            (double)s->op_count / tb_div_count, s->op_count_max);
4447     g_string_append_printf(buf, "deleted ops/TB      %0.2f\n",
4448                            (double)s->del_op_count / tb_div_count);
4449     g_string_append_printf(buf, "avg temps/TB        %0.2f max=%d\n",
4450                            (double)s->temp_count / tb_div_count,
4451                            s->temp_count_max);
4452     g_string_append_printf(buf, "avg host code/TB    %0.1f\n",
4453                            (double)s->code_out_len / tb_div_count);
4454     g_string_append_printf(buf, "avg search data/TB  %0.1f\n",
4455                            (double)s->search_out_len / tb_div_count);
4456 
4457     g_string_append_printf(buf, "cycles/op           %0.1f\n",
4458                            s->op_count ? (double)tot / s->op_count : 0);
4459     g_string_append_printf(buf, "cycles/in byte      %0.1f\n",
4460                            s->code_in_len ? (double)tot / s->code_in_len : 0);
4461     g_string_append_printf(buf, "cycles/out byte     %0.1f\n",
4462                            s->code_out_len ? (double)tot / s->code_out_len : 0);
4463     g_string_append_printf(buf, "cycles/search byte     %0.1f\n",
4464                            s->search_out_len ?
4465                            (double)tot / s->search_out_len : 0);
4466     if (tot == 0) {
4467         tot = 1;
4468     }
4469     g_string_append_printf(buf, "  gen_interm time   %0.1f%%\n",
4470                            (double)s->interm_time / tot * 100.0);
4471     g_string_append_printf(buf, "  gen_code time     %0.1f%%\n",
4472                            (double)s->code_time / tot * 100.0);
4473     g_string_append_printf(buf, "optim./code time    %0.1f%%\n",
4474                            (double)s->opt_time / (s->code_time ?
4475                                                   s->code_time : 1)
4476                            * 100.0);
4477     g_string_append_printf(buf, "liveness/code time  %0.1f%%\n",
4478                            (double)s->la_time / (s->code_time ?
4479                                                  s->code_time : 1) * 100.0);
4480     g_string_append_printf(buf, "cpu_restore count   %" PRId64 "\n",
4481                            s->restore_count);
4482     g_string_append_printf(buf, "  avg cycles        %0.1f\n",
4483                            s->restore_count ?
4484                            (double)s->restore_time / s->restore_count : 0);
4485 }
4486 #else
4487 void tcg_dump_info(GString *buf)
4488 {
4489     g_string_append_printf(buf, "[TCG profiler not compiled]\n");
4490 }
4491 #endif
4492 
4493 #ifdef ELF_HOST_MACHINE
4494 /* In order to use this feature, the backend needs to do three things:
4495 
4496    (1) Define ELF_HOST_MACHINE to indicate both what value to
4497        put into the ELF image and to indicate support for the feature.
4498 
4499    (2) Define tcg_register_jit.  This should create a buffer containing
4500        the contents of a .debug_frame section that describes the post-
4501        prologue unwind info for the tcg machine.
4502 
4503    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4504 */
4505 
4506 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4507 typedef enum {
4508     JIT_NOACTION = 0,
4509     JIT_REGISTER_FN,
4510     JIT_UNREGISTER_FN
4511 } jit_actions_t;
4512 
4513 struct jit_code_entry {
4514     struct jit_code_entry *next_entry;
4515     struct jit_code_entry *prev_entry;
4516     const void *symfile_addr;
4517     uint64_t symfile_size;
4518 };
4519 
4520 struct jit_descriptor {
4521     uint32_t version;
4522     uint32_t action_flag;
4523     struct jit_code_entry *relevant_entry;
4524     struct jit_code_entry *first_entry;
4525 };
4526 
4527 void __jit_debug_register_code(void) __attribute__((noinline));
4528 void __jit_debug_register_code(void)
4529 {
4530     asm("");
4531 }
4532 
4533 /* Must statically initialize the version, because GDB may check
4534    the version before we can set it.  */
4535 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4536 
4537 /* End GDB interface.  */
4538 
4539 static int find_string(const char *strtab, const char *str)
4540 {
4541     const char *p = strtab + 1;
4542 
4543     while (1) {
4544         if (strcmp(p, str) == 0) {
4545             return p - strtab;
4546         }
4547         p += strlen(p) + 1;
4548     }
4549 }
4550 
4551 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4552                                  const void *debug_frame,
4553                                  size_t debug_frame_size)
4554 {
4555     struct __attribute__((packed)) DebugInfo {
4556         uint32_t  len;
4557         uint16_t  version;
4558         uint32_t  abbrev;
4559         uint8_t   ptr_size;
4560         uint8_t   cu_die;
4561         uint16_t  cu_lang;
4562         uintptr_t cu_low_pc;
4563         uintptr_t cu_high_pc;
4564         uint8_t   fn_die;
4565         char      fn_name[16];
4566         uintptr_t fn_low_pc;
4567         uintptr_t fn_high_pc;
4568         uint8_t   cu_eoc;
4569     };
4570 
4571     struct ElfImage {
4572         ElfW(Ehdr) ehdr;
4573         ElfW(Phdr) phdr;
4574         ElfW(Shdr) shdr[7];
4575         ElfW(Sym)  sym[2];
4576         struct DebugInfo di;
4577         uint8_t    da[24];
4578         char       str[80];
4579     };
4580 
4581     struct ElfImage *img;
4582 
4583     static const struct ElfImage img_template = {
4584         .ehdr = {
4585             .e_ident[EI_MAG0] = ELFMAG0,
4586             .e_ident[EI_MAG1] = ELFMAG1,
4587             .e_ident[EI_MAG2] = ELFMAG2,
4588             .e_ident[EI_MAG3] = ELFMAG3,
4589             .e_ident[EI_CLASS] = ELF_CLASS,
4590             .e_ident[EI_DATA] = ELF_DATA,
4591             .e_ident[EI_VERSION] = EV_CURRENT,
4592             .e_type = ET_EXEC,
4593             .e_machine = ELF_HOST_MACHINE,
4594             .e_version = EV_CURRENT,
4595             .e_phoff = offsetof(struct ElfImage, phdr),
4596             .e_shoff = offsetof(struct ElfImage, shdr),
4597             .e_ehsize = sizeof(ElfW(Shdr)),
4598             .e_phentsize = sizeof(ElfW(Phdr)),
4599             .e_phnum = 1,
4600             .e_shentsize = sizeof(ElfW(Shdr)),
4601             .e_shnum = ARRAY_SIZE(img->shdr),
4602             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4603 #ifdef ELF_HOST_FLAGS
4604             .e_flags = ELF_HOST_FLAGS,
4605 #endif
4606 #ifdef ELF_OSABI
4607             .e_ident[EI_OSABI] = ELF_OSABI,
4608 #endif
4609         },
4610         .phdr = {
4611             .p_type = PT_LOAD,
4612             .p_flags = PF_X,
4613         },
4614         .shdr = {
4615             [0] = { .sh_type = SHT_NULL },
4616             /* Trick: The contents of code_gen_buffer are not present in
4617                this fake ELF file; that got allocated elsewhere.  Therefore
4618                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4619                will not look for contents.  We can record any address.  */
4620             [1] = { /* .text */
4621                 .sh_type = SHT_NOBITS,
4622                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4623             },
4624             [2] = { /* .debug_info */
4625                 .sh_type = SHT_PROGBITS,
4626                 .sh_offset = offsetof(struct ElfImage, di),
4627                 .sh_size = sizeof(struct DebugInfo),
4628             },
4629             [3] = { /* .debug_abbrev */
4630                 .sh_type = SHT_PROGBITS,
4631                 .sh_offset = offsetof(struct ElfImage, da),
4632                 .sh_size = sizeof(img->da),
4633             },
4634             [4] = { /* .debug_frame */
4635                 .sh_type = SHT_PROGBITS,
4636                 .sh_offset = sizeof(struct ElfImage),
4637             },
4638             [5] = { /* .symtab */
4639                 .sh_type = SHT_SYMTAB,
4640                 .sh_offset = offsetof(struct ElfImage, sym),
4641                 .sh_size = sizeof(img->sym),
4642                 .sh_info = 1,
4643                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4644                 .sh_entsize = sizeof(ElfW(Sym)),
4645             },
4646             [6] = { /* .strtab */
4647                 .sh_type = SHT_STRTAB,
4648                 .sh_offset = offsetof(struct ElfImage, str),
4649                 .sh_size = sizeof(img->str),
4650             }
4651         },
4652         .sym = {
4653             [1] = { /* code_gen_buffer */
4654                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4655                 .st_shndx = 1,
4656             }
4657         },
4658         .di = {
4659             .len = sizeof(struct DebugInfo) - 4,
4660             .version = 2,
4661             .ptr_size = sizeof(void *),
4662             .cu_die = 1,
4663             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4664             .fn_die = 2,
4665             .fn_name = "code_gen_buffer"
4666         },
4667         .da = {
4668             1,          /* abbrev number (the cu) */
4669             0x11, 1,    /* DW_TAG_compile_unit, has children */
4670             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4671             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4672             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4673             0, 0,       /* end of abbrev */
4674             2,          /* abbrev number (the fn) */
4675             0x2e, 0,    /* DW_TAG_subprogram, no children */
4676             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4677             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4678             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4679             0, 0,       /* end of abbrev */
4680             0           /* no more abbrev */
4681         },
4682         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4683                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4684     };
4685 
4686     /* We only need a single jit entry; statically allocate it.  */
4687     static struct jit_code_entry one_entry;
4688 
4689     uintptr_t buf = (uintptr_t)buf_ptr;
4690     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4691     DebugFrameHeader *dfh;
4692 
4693     img = g_malloc(img_size);
4694     *img = img_template;
4695 
4696     img->phdr.p_vaddr = buf;
4697     img->phdr.p_paddr = buf;
4698     img->phdr.p_memsz = buf_size;
4699 
4700     img->shdr[1].sh_name = find_string(img->str, ".text");
4701     img->shdr[1].sh_addr = buf;
4702     img->shdr[1].sh_size = buf_size;
4703 
4704     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4705     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4706 
4707     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4708     img->shdr[4].sh_size = debug_frame_size;
4709 
4710     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4711     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4712 
4713     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4714     img->sym[1].st_value = buf;
4715     img->sym[1].st_size = buf_size;
4716 
4717     img->di.cu_low_pc = buf;
4718     img->di.cu_high_pc = buf + buf_size;
4719     img->di.fn_low_pc = buf;
4720     img->di.fn_high_pc = buf + buf_size;
4721 
4722     dfh = (DebugFrameHeader *)(img + 1);
4723     memcpy(dfh, debug_frame, debug_frame_size);
4724     dfh->fde.func_start = buf;
4725     dfh->fde.func_len = buf_size;
4726 
4727 #ifdef DEBUG_JIT
4728     /* Enable this block to be able to debug the ELF image file creation.
4729        One can use readelf, objdump, or other inspection utilities.  */
4730     {
4731         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4732         if (f) {
4733             if (fwrite(img, img_size, 1, f) != img_size) {
4734                 /* Avoid stupid unused return value warning for fwrite.  */
4735             }
4736             fclose(f);
4737         }
4738     }
4739 #endif
4740 
4741     one_entry.symfile_addr = img;
4742     one_entry.symfile_size = img_size;
4743 
4744     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4745     __jit_debug_descriptor.relevant_entry = &one_entry;
4746     __jit_debug_descriptor.first_entry = &one_entry;
4747     __jit_debug_register_code();
4748 }
4749 #else
4750 /* No support for the feature.  Provide the entry point expected by exec.c,
4751    and implement the internal function we declared earlier.  */
4752 
4753 static void tcg_register_jit_int(const void *buf, size_t size,
4754                                  const void *debug_frame,
4755                                  size_t debug_frame_size)
4756 {
4757 }
4758 
4759 void tcg_register_jit(const void *buf, size_t buf_size)
4760 {
4761 }
4762 #endif /* ELF_HOST_MACHINE */
4763 
4764 #if !TCG_TARGET_MAYBE_vec
4765 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4766 {
4767     g_assert_not_reached();
4768 }
4769 #endif
4770