xref: /qemu/tcg/tcg.c (revision dc293f60)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 #include "qemu/cacheflush.h"
39 
40 /* Note: the long term plan is to reduce the dependencies on the QEMU
41    CPU definitions. Currently they are used for qemu_ld/st
42    instructions */
43 #define NO_CPU_IO_DEFS
44 #include "cpu.h"
45 
46 #include "exec/exec-all.h"
47 
48 #if !defined(CONFIG_USER_ONLY)
49 #include "hw/boards.h"
50 #endif
51 
52 #include "tcg/tcg-op.h"
53 
54 #if UINTPTR_MAX == UINT32_MAX
55 # define ELF_CLASS  ELFCLASS32
56 #else
57 # define ELF_CLASS  ELFCLASS64
58 #endif
59 #ifdef HOST_WORDS_BIGENDIAN
60 # define ELF_DATA   ELFDATA2MSB
61 #else
62 # define ELF_DATA   ELFDATA2LSB
63 #endif
64 
65 #include "elf.h"
66 #include "exec/log.h"
67 #include "sysemu/sysemu.h"
68 
69 /* Forward declarations for functions declared in tcg-target.c.inc and
70    used here. */
71 static void tcg_target_init(TCGContext *s);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(const void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
106                        intptr_t arg2);
107 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
108 static void tcg_out_movi(TCGContext *s, TCGType type,
109                          TCGReg ret, tcg_target_long arg);
110 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
111                        const int *const_args);
112 #if TCG_TARGET_MAYBE_vec
113 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
114                             TCGReg dst, TCGReg src);
115 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
116                              TCGReg dst, TCGReg base, intptr_t offset);
117 static void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, int64_t arg);
119 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
120                            unsigned vece, const TCGArg *args,
121                            const int *const_args);
122 #else
123 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
124                                    TCGReg dst, TCGReg src)
125 {
126     g_assert_not_reached();
127 }
128 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
129                                     TCGReg dst, TCGReg base, intptr_t offset)
130 {
131     g_assert_not_reached();
132 }
133 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type, unsigned vece,
134                                     TCGReg dst, int64_t arg)
135 {
136     g_assert_not_reached();
137 }
138 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
139                                   unsigned vece, const TCGArg *args,
140                                   const int *const_args)
141 {
142     g_assert_not_reached();
143 }
144 #endif
145 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
146                        intptr_t arg2);
147 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
148                         TCGReg base, intptr_t ofs);
149 static void tcg_out_call(TCGContext *s, const tcg_insn_unit *target);
150 static int tcg_target_const_match(tcg_target_long val, TCGType type,
151                                   const TCGArgConstraint *arg_ct);
152 #ifdef TCG_TARGET_NEED_LDST_LABELS
153 static int tcg_out_ldst_finalize(TCGContext *s);
154 #endif
155 
156 #define TCG_HIGHWATER 1024
157 
158 static TCGContext **tcg_ctxs;
159 static unsigned int n_tcg_ctxs;
160 TCGv_env cpu_env = 0;
161 const void *tcg_code_gen_epilogue;
162 uintptr_t tcg_splitwx_diff;
163 
164 #ifndef CONFIG_TCG_INTERPRETER
165 tcg_prologue_fn *tcg_qemu_tb_exec;
166 #endif
167 
168 struct tcg_region_tree {
169     QemuMutex lock;
170     GTree *tree;
171     /* padding to avoid false sharing is computed at run-time */
172 };
173 
174 /*
175  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
176  * dynamically allocate from as demand dictates. Given appropriate region
177  * sizing, this minimizes flushes even when some TCG threads generate a lot
178  * more code than others.
179  */
180 struct tcg_region_state {
181     QemuMutex lock;
182 
183     /* fields set at init time */
184     void *start;
185     void *start_aligned;
186     void *end;
187     size_t n;
188     size_t size; /* size of one region */
189     size_t stride; /* .size + guard size */
190 
191     /* fields protected by the lock */
192     size_t current; /* current region index */
193     size_t agg_size_full; /* aggregate size of full regions */
194 };
195 
196 static struct tcg_region_state region;
197 /*
198  * This is an array of struct tcg_region_tree's, with padding.
199  * We use void * to simplify the computation of region_trees[i]; each
200  * struct is found every tree_size bytes.
201  */
202 static void *region_trees;
203 static size_t tree_size;
204 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
205 static TCGRegSet tcg_target_call_clobber_regs;
206 
207 #if TCG_TARGET_INSN_UNIT_SIZE == 1
208 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
209 {
210     *s->code_ptr++ = v;
211 }
212 
213 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
214                                                       uint8_t v)
215 {
216     *p = v;
217 }
218 #endif
219 
220 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
221 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
222 {
223     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
224         *s->code_ptr++ = v;
225     } else {
226         tcg_insn_unit *p = s->code_ptr;
227         memcpy(p, &v, sizeof(v));
228         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
229     }
230 }
231 
232 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
233                                                        uint16_t v)
234 {
235     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
236         *p = v;
237     } else {
238         memcpy(p, &v, sizeof(v));
239     }
240 }
241 #endif
242 
243 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
244 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
245 {
246     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
247         *s->code_ptr++ = v;
248     } else {
249         tcg_insn_unit *p = s->code_ptr;
250         memcpy(p, &v, sizeof(v));
251         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
252     }
253 }
254 
255 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
256                                                        uint32_t v)
257 {
258     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
259         *p = v;
260     } else {
261         memcpy(p, &v, sizeof(v));
262     }
263 }
264 #endif
265 
266 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
267 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
268 {
269     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
270         *s->code_ptr++ = v;
271     } else {
272         tcg_insn_unit *p = s->code_ptr;
273         memcpy(p, &v, sizeof(v));
274         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
275     }
276 }
277 
278 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
279                                                        uint64_t v)
280 {
281     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
282         *p = v;
283     } else {
284         memcpy(p, &v, sizeof(v));
285     }
286 }
287 #endif
288 
289 /* label relocation processing */
290 
291 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
292                           TCGLabel *l, intptr_t addend)
293 {
294     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
295 
296     r->type = type;
297     r->ptr = code_ptr;
298     r->addend = addend;
299     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
300 }
301 
302 static void tcg_out_label(TCGContext *s, TCGLabel *l)
303 {
304     tcg_debug_assert(!l->has_value);
305     l->has_value = 1;
306     l->u.value_ptr = tcg_splitwx_to_rx(s->code_ptr);
307 }
308 
309 TCGLabel *gen_new_label(void)
310 {
311     TCGContext *s = tcg_ctx;
312     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
313 
314     memset(l, 0, sizeof(TCGLabel));
315     l->id = s->nb_labels++;
316     QSIMPLEQ_INIT(&l->relocs);
317 
318     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
319 
320     return l;
321 }
322 
323 static bool tcg_resolve_relocs(TCGContext *s)
324 {
325     TCGLabel *l;
326 
327     QSIMPLEQ_FOREACH(l, &s->labels, next) {
328         TCGRelocation *r;
329         uintptr_t value = l->u.value;
330 
331         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
332             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
333                 return false;
334             }
335         }
336     }
337     return true;
338 }
339 
340 static void set_jmp_reset_offset(TCGContext *s, int which)
341 {
342     /*
343      * We will check for overflow at the end of the opcode loop in
344      * tcg_gen_code, where we bound tcg_current_code_size to UINT16_MAX.
345      */
346     s->tb_jmp_reset_offset[which] = tcg_current_code_size(s);
347 }
348 
349 /* Signal overflow, starting over with fewer guest insns. */
350 static void QEMU_NORETURN tcg_raise_tb_overflow(TCGContext *s)
351 {
352     siglongjmp(s->jmp_trans, -2);
353 }
354 
355 #define C_PFX1(P, A)                    P##A
356 #define C_PFX2(P, A, B)                 P##A##_##B
357 #define C_PFX3(P, A, B, C)              P##A##_##B##_##C
358 #define C_PFX4(P, A, B, C, D)           P##A##_##B##_##C##_##D
359 #define C_PFX5(P, A, B, C, D, E)        P##A##_##B##_##C##_##D##_##E
360 #define C_PFX6(P, A, B, C, D, E, F)     P##A##_##B##_##C##_##D##_##E##_##F
361 
362 /* Define an enumeration for the various combinations. */
363 
364 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1),
365 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2),
366 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3),
367 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4),
368 
369 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1),
370 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2),
371 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3),
372 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4),
373 
374 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2),
375 
376 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1),
377 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2),
378 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3),
379 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4),
380 
381 typedef enum {
382 #include "tcg-target-con-set.h"
383 } TCGConstraintSetIndex;
384 
385 static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode);
386 
387 #undef C_O0_I1
388 #undef C_O0_I2
389 #undef C_O0_I3
390 #undef C_O0_I4
391 #undef C_O1_I1
392 #undef C_O1_I2
393 #undef C_O1_I3
394 #undef C_O1_I4
395 #undef C_N1_I2
396 #undef C_O2_I1
397 #undef C_O2_I2
398 #undef C_O2_I3
399 #undef C_O2_I4
400 
401 /* Put all of the constraint sets into an array, indexed by the enum. */
402 
403 #define C_O0_I1(I1)                     { .args_ct_str = { #I1 } },
404 #define C_O0_I2(I1, I2)                 { .args_ct_str = { #I1, #I2 } },
405 #define C_O0_I3(I1, I2, I3)             { .args_ct_str = { #I1, #I2, #I3 } },
406 #define C_O0_I4(I1, I2, I3, I4)         { .args_ct_str = { #I1, #I2, #I3, #I4 } },
407 
408 #define C_O1_I1(O1, I1)                 { .args_ct_str = { #O1, #I1 } },
409 #define C_O1_I2(O1, I1, I2)             { .args_ct_str = { #O1, #I1, #I2 } },
410 #define C_O1_I3(O1, I1, I2, I3)         { .args_ct_str = { #O1, #I1, #I2, #I3 } },
411 #define C_O1_I4(O1, I1, I2, I3, I4)     { .args_ct_str = { #O1, #I1, #I2, #I3, #I4 } },
412 
413 #define C_N1_I2(O1, I1, I2)             { .args_ct_str = { "&" #O1, #I1, #I2 } },
414 
415 #define C_O2_I1(O1, O2, I1)             { .args_ct_str = { #O1, #O2, #I1 } },
416 #define C_O2_I2(O1, O2, I1, I2)         { .args_ct_str = { #O1, #O2, #I1, #I2 } },
417 #define C_O2_I3(O1, O2, I1, I2, I3)     { .args_ct_str = { #O1, #O2, #I1, #I2, #I3 } },
418 #define C_O2_I4(O1, O2, I1, I2, I3, I4) { .args_ct_str = { #O1, #O2, #I1, #I2, #I3, #I4 } },
419 
420 static const TCGTargetOpDef constraint_sets[] = {
421 #include "tcg-target-con-set.h"
422 };
423 
424 
425 #undef C_O0_I1
426 #undef C_O0_I2
427 #undef C_O0_I3
428 #undef C_O0_I4
429 #undef C_O1_I1
430 #undef C_O1_I2
431 #undef C_O1_I3
432 #undef C_O1_I4
433 #undef C_N1_I2
434 #undef C_O2_I1
435 #undef C_O2_I2
436 #undef C_O2_I3
437 #undef C_O2_I4
438 
439 /* Expand the enumerator to be returned from tcg_target_op_def(). */
440 
441 #define C_O0_I1(I1)                     C_PFX1(c_o0_i1_, I1)
442 #define C_O0_I2(I1, I2)                 C_PFX2(c_o0_i2_, I1, I2)
443 #define C_O0_I3(I1, I2, I3)             C_PFX3(c_o0_i3_, I1, I2, I3)
444 #define C_O0_I4(I1, I2, I3, I4)         C_PFX4(c_o0_i4_, I1, I2, I3, I4)
445 
446 #define C_O1_I1(O1, I1)                 C_PFX2(c_o1_i1_, O1, I1)
447 #define C_O1_I2(O1, I1, I2)             C_PFX3(c_o1_i2_, O1, I1, I2)
448 #define C_O1_I3(O1, I1, I2, I3)         C_PFX4(c_o1_i3_, O1, I1, I2, I3)
449 #define C_O1_I4(O1, I1, I2, I3, I4)     C_PFX5(c_o1_i4_, O1, I1, I2, I3, I4)
450 
451 #define C_N1_I2(O1, I1, I2)             C_PFX3(c_n1_i2_, O1, I1, I2)
452 
453 #define C_O2_I1(O1, O2, I1)             C_PFX3(c_o2_i1_, O1, O2, I1)
454 #define C_O2_I2(O1, O2, I1, I2)         C_PFX4(c_o2_i2_, O1, O2, I1, I2)
455 #define C_O2_I3(O1, O2, I1, I2, I3)     C_PFX5(c_o2_i3_, O1, O2, I1, I2, I3)
456 #define C_O2_I4(O1, O2, I1, I2, I3, I4) C_PFX6(c_o2_i4_, O1, O2, I1, I2, I3, I4)
457 
458 #include "tcg-target.c.inc"
459 
460 /* compare a pointer @ptr and a tb_tc @s */
461 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
462 {
463     if (ptr >= s->ptr + s->size) {
464         return 1;
465     } else if (ptr < s->ptr) {
466         return -1;
467     }
468     return 0;
469 }
470 
471 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
472 {
473     const struct tb_tc *a = ap;
474     const struct tb_tc *b = bp;
475 
476     /*
477      * When both sizes are set, we know this isn't a lookup.
478      * This is the most likely case: every TB must be inserted; lookups
479      * are a lot less frequent.
480      */
481     if (likely(a->size && b->size)) {
482         if (a->ptr > b->ptr) {
483             return 1;
484         } else if (a->ptr < b->ptr) {
485             return -1;
486         }
487         /* a->ptr == b->ptr should happen only on deletions */
488         g_assert(a->size == b->size);
489         return 0;
490     }
491     /*
492      * All lookups have either .size field set to 0.
493      * From the glib sources we see that @ap is always the lookup key. However
494      * the docs provide no guarantee, so we just mark this case as likely.
495      */
496     if (likely(a->size == 0)) {
497         return ptr_cmp_tb_tc(a->ptr, b);
498     }
499     return ptr_cmp_tb_tc(b->ptr, a);
500 }
501 
502 static void tcg_region_trees_init(void)
503 {
504     size_t i;
505 
506     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
507     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
508     for (i = 0; i < region.n; i++) {
509         struct tcg_region_tree *rt = region_trees + i * tree_size;
510 
511         qemu_mutex_init(&rt->lock);
512         rt->tree = g_tree_new(tb_tc_cmp);
513     }
514 }
515 
516 static struct tcg_region_tree *tc_ptr_to_region_tree(const void *p)
517 {
518     size_t region_idx;
519 
520     /*
521      * Like tcg_splitwx_to_rw, with no assert.  The pc may come from
522      * a signal handler over which the caller has no control.
523      */
524     if (!in_code_gen_buffer(p)) {
525         p -= tcg_splitwx_diff;
526         if (!in_code_gen_buffer(p)) {
527             return NULL;
528         }
529     }
530 
531     if (p < region.start_aligned) {
532         region_idx = 0;
533     } else {
534         ptrdiff_t offset = p - region.start_aligned;
535 
536         if (offset > region.stride * (region.n - 1)) {
537             region_idx = region.n - 1;
538         } else {
539             region_idx = offset / region.stride;
540         }
541     }
542     return region_trees + region_idx * tree_size;
543 }
544 
545 void tcg_tb_insert(TranslationBlock *tb)
546 {
547     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
548 
549     g_assert(rt != NULL);
550     qemu_mutex_lock(&rt->lock);
551     g_tree_insert(rt->tree, &tb->tc, tb);
552     qemu_mutex_unlock(&rt->lock);
553 }
554 
555 void tcg_tb_remove(TranslationBlock *tb)
556 {
557     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
558 
559     g_assert(rt != NULL);
560     qemu_mutex_lock(&rt->lock);
561     g_tree_remove(rt->tree, &tb->tc);
562     qemu_mutex_unlock(&rt->lock);
563 }
564 
565 /*
566  * Find the TB 'tb' such that
567  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
568  * Return NULL if not found.
569  */
570 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
571 {
572     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
573     TranslationBlock *tb;
574     struct tb_tc s = { .ptr = (void *)tc_ptr };
575 
576     if (rt == NULL) {
577         return NULL;
578     }
579 
580     qemu_mutex_lock(&rt->lock);
581     tb = g_tree_lookup(rt->tree, &s);
582     qemu_mutex_unlock(&rt->lock);
583     return tb;
584 }
585 
586 static void tcg_region_tree_lock_all(void)
587 {
588     size_t i;
589 
590     for (i = 0; i < region.n; i++) {
591         struct tcg_region_tree *rt = region_trees + i * tree_size;
592 
593         qemu_mutex_lock(&rt->lock);
594     }
595 }
596 
597 static void tcg_region_tree_unlock_all(void)
598 {
599     size_t i;
600 
601     for (i = 0; i < region.n; i++) {
602         struct tcg_region_tree *rt = region_trees + i * tree_size;
603 
604         qemu_mutex_unlock(&rt->lock);
605     }
606 }
607 
608 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
609 {
610     size_t i;
611 
612     tcg_region_tree_lock_all();
613     for (i = 0; i < region.n; i++) {
614         struct tcg_region_tree *rt = region_trees + i * tree_size;
615 
616         g_tree_foreach(rt->tree, func, user_data);
617     }
618     tcg_region_tree_unlock_all();
619 }
620 
621 size_t tcg_nb_tbs(void)
622 {
623     size_t nb_tbs = 0;
624     size_t i;
625 
626     tcg_region_tree_lock_all();
627     for (i = 0; i < region.n; i++) {
628         struct tcg_region_tree *rt = region_trees + i * tree_size;
629 
630         nb_tbs += g_tree_nnodes(rt->tree);
631     }
632     tcg_region_tree_unlock_all();
633     return nb_tbs;
634 }
635 
636 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
637 {
638     TranslationBlock *tb = v;
639 
640     tb_destroy(tb);
641     return FALSE;
642 }
643 
644 static void tcg_region_tree_reset_all(void)
645 {
646     size_t i;
647 
648     tcg_region_tree_lock_all();
649     for (i = 0; i < region.n; i++) {
650         struct tcg_region_tree *rt = region_trees + i * tree_size;
651 
652         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
653         /* Increment the refcount first so that destroy acts as a reset */
654         g_tree_ref(rt->tree);
655         g_tree_destroy(rt->tree);
656     }
657     tcg_region_tree_unlock_all();
658 }
659 
660 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
661 {
662     void *start, *end;
663 
664     start = region.start_aligned + curr_region * region.stride;
665     end = start + region.size;
666 
667     if (curr_region == 0) {
668         start = region.start;
669     }
670     if (curr_region == region.n - 1) {
671         end = region.end;
672     }
673 
674     *pstart = start;
675     *pend = end;
676 }
677 
678 static void tcg_region_assign(TCGContext *s, size_t curr_region)
679 {
680     void *start, *end;
681 
682     tcg_region_bounds(curr_region, &start, &end);
683 
684     s->code_gen_buffer = start;
685     s->code_gen_ptr = start;
686     s->code_gen_buffer_size = end - start;
687     s->code_gen_highwater = end - TCG_HIGHWATER;
688 }
689 
690 static bool tcg_region_alloc__locked(TCGContext *s)
691 {
692     if (region.current == region.n) {
693         return true;
694     }
695     tcg_region_assign(s, region.current);
696     region.current++;
697     return false;
698 }
699 
700 /*
701  * Request a new region once the one in use has filled up.
702  * Returns true on error.
703  */
704 static bool tcg_region_alloc(TCGContext *s)
705 {
706     bool err;
707     /* read the region size now; alloc__locked will overwrite it on success */
708     size_t size_full = s->code_gen_buffer_size;
709 
710     qemu_mutex_lock(&region.lock);
711     err = tcg_region_alloc__locked(s);
712     if (!err) {
713         region.agg_size_full += size_full - TCG_HIGHWATER;
714     }
715     qemu_mutex_unlock(&region.lock);
716     return err;
717 }
718 
719 /*
720  * Perform a context's first region allocation.
721  * This function does _not_ increment region.agg_size_full.
722  */
723 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
724 {
725     return tcg_region_alloc__locked(s);
726 }
727 
728 /* Call from a safe-work context */
729 void tcg_region_reset_all(void)
730 {
731     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
732     unsigned int i;
733 
734     qemu_mutex_lock(&region.lock);
735     region.current = 0;
736     region.agg_size_full = 0;
737 
738     for (i = 0; i < n_ctxs; i++) {
739         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
740         bool err = tcg_region_initial_alloc__locked(s);
741 
742         g_assert(!err);
743     }
744     qemu_mutex_unlock(&region.lock);
745 
746     tcg_region_tree_reset_all();
747 }
748 
749 #ifdef CONFIG_USER_ONLY
750 static size_t tcg_n_regions(void)
751 {
752     return 1;
753 }
754 #else
755 /*
756  * It is likely that some vCPUs will translate more code than others, so we
757  * first try to set more regions than max_cpus, with those regions being of
758  * reasonable size. If that's not possible we make do by evenly dividing
759  * the code_gen_buffer among the vCPUs.
760  */
761 static size_t tcg_n_regions(void)
762 {
763     size_t i;
764 
765     /* Use a single region if all we have is one vCPU thread */
766 #if !defined(CONFIG_USER_ONLY)
767     MachineState *ms = MACHINE(qdev_get_machine());
768     unsigned int max_cpus = ms->smp.max_cpus;
769 #endif
770     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
771         return 1;
772     }
773 
774     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
775     for (i = 8; i > 0; i--) {
776         size_t regions_per_thread = i;
777         size_t region_size;
778 
779         region_size = tcg_init_ctx.code_gen_buffer_size;
780         region_size /= max_cpus * regions_per_thread;
781 
782         if (region_size >= 2 * 1024u * 1024) {
783             return max_cpus * regions_per_thread;
784         }
785     }
786     /* If we can't, then just allocate one region per vCPU thread */
787     return max_cpus;
788 }
789 #endif
790 
791 /*
792  * Initializes region partitioning.
793  *
794  * Called at init time from the parent thread (i.e. the one calling
795  * tcg_context_init), after the target's TCG globals have been set.
796  *
797  * Region partitioning works by splitting code_gen_buffer into separate regions,
798  * and then assigning regions to TCG threads so that the threads can translate
799  * code in parallel without synchronization.
800  *
801  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
802  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
803  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
804  * must have been parsed before calling this function, since it calls
805  * qemu_tcg_mttcg_enabled().
806  *
807  * In user-mode we use a single region.  Having multiple regions in user-mode
808  * is not supported, because the number of vCPU threads (recall that each thread
809  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
810  * OS, and usually this number is huge (tens of thousands is not uncommon).
811  * Thus, given this large bound on the number of vCPU threads and the fact
812  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
813  * that the availability of at least one region per vCPU thread.
814  *
815  * However, this user-mode limitation is unlikely to be a significant problem
816  * in practice. Multi-threaded guests share most if not all of their translated
817  * code, which makes parallel code generation less appealing than in softmmu.
818  */
819 void tcg_region_init(void)
820 {
821     void *buf = tcg_init_ctx.code_gen_buffer;
822     void *aligned;
823     size_t size = tcg_init_ctx.code_gen_buffer_size;
824     size_t page_size = qemu_real_host_page_size;
825     size_t region_size;
826     size_t n_regions;
827     size_t i;
828     uintptr_t splitwx_diff;
829 
830     n_regions = tcg_n_regions();
831 
832     /* The first region will be 'aligned - buf' bytes larger than the others */
833     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
834     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
835     /*
836      * Make region_size a multiple of page_size, using aligned as the start.
837      * As a result of this we might end up with a few extra pages at the end of
838      * the buffer; we will assign those to the last region.
839      */
840     region_size = (size - (aligned - buf)) / n_regions;
841     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
842 
843     /* A region must have at least 2 pages; one code, one guard */
844     g_assert(region_size >= 2 * page_size);
845 
846     /* init the region struct */
847     qemu_mutex_init(&region.lock);
848     region.n = n_regions;
849     region.size = region_size - page_size;
850     region.stride = region_size;
851     region.start = buf;
852     region.start_aligned = aligned;
853     /* page-align the end, since its last page will be a guard page */
854     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
855     /* account for that last guard page */
856     region.end -= page_size;
857 
858     /* set guard pages */
859     splitwx_diff = tcg_splitwx_diff;
860     for (i = 0; i < region.n; i++) {
861         void *start, *end;
862         int rc;
863 
864         tcg_region_bounds(i, &start, &end);
865         rc = qemu_mprotect_none(end, page_size);
866         g_assert(!rc);
867         if (splitwx_diff) {
868             rc = qemu_mprotect_none(end + splitwx_diff, page_size);
869             g_assert(!rc);
870         }
871     }
872 
873     tcg_region_trees_init();
874 
875     /* In user-mode we support only one ctx, so do the initial allocation now */
876 #ifdef CONFIG_USER_ONLY
877     {
878         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
879 
880         g_assert(!err);
881     }
882 #endif
883 }
884 
885 #ifdef CONFIG_DEBUG_TCG
886 const void *tcg_splitwx_to_rx(void *rw)
887 {
888     /* Pass NULL pointers unchanged. */
889     if (rw) {
890         g_assert(in_code_gen_buffer(rw));
891         rw += tcg_splitwx_diff;
892     }
893     return rw;
894 }
895 
896 void *tcg_splitwx_to_rw(const void *rx)
897 {
898     /* Pass NULL pointers unchanged. */
899     if (rx) {
900         rx -= tcg_splitwx_diff;
901         /* Assert that we end with a pointer in the rw region. */
902         g_assert(in_code_gen_buffer(rx));
903     }
904     return (void *)rx;
905 }
906 #endif /* CONFIG_DEBUG_TCG */
907 
908 static void alloc_tcg_plugin_context(TCGContext *s)
909 {
910 #ifdef CONFIG_PLUGIN
911     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
912     s->plugin_tb->insns =
913         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
914 #endif
915 }
916 
917 /*
918  * All TCG threads except the parent (i.e. the one that called tcg_context_init
919  * and registered the target's TCG globals) must register with this function
920  * before initiating translation.
921  *
922  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
923  * of tcg_region_init() for the reasoning behind this.
924  *
925  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
926  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
927  * is not used anymore for translation once this function is called.
928  *
929  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
930  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
931  */
932 #ifdef CONFIG_USER_ONLY
933 void tcg_register_thread(void)
934 {
935     tcg_ctx = &tcg_init_ctx;
936 }
937 #else
938 void tcg_register_thread(void)
939 {
940     MachineState *ms = MACHINE(qdev_get_machine());
941     TCGContext *s = g_malloc(sizeof(*s));
942     unsigned int i, n;
943     bool err;
944 
945     *s = tcg_init_ctx;
946 
947     /* Relink mem_base.  */
948     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
949         if (tcg_init_ctx.temps[i].mem_base) {
950             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
951             tcg_debug_assert(b >= 0 && b < n);
952             s->temps[i].mem_base = &s->temps[b];
953         }
954     }
955 
956     /* Claim an entry in tcg_ctxs */
957     n = qatomic_fetch_inc(&n_tcg_ctxs);
958     g_assert(n < ms->smp.max_cpus);
959     qatomic_set(&tcg_ctxs[n], s);
960 
961     if (n > 0) {
962         alloc_tcg_plugin_context(s);
963     }
964 
965     tcg_ctx = s;
966     qemu_mutex_lock(&region.lock);
967     err = tcg_region_initial_alloc__locked(tcg_ctx);
968     g_assert(!err);
969     qemu_mutex_unlock(&region.lock);
970 }
971 #endif /* !CONFIG_USER_ONLY */
972 
973 /*
974  * Returns the size (in bytes) of all translated code (i.e. from all regions)
975  * currently in the cache.
976  * See also: tcg_code_capacity()
977  * Do not confuse with tcg_current_code_size(); that one applies to a single
978  * TCG context.
979  */
980 size_t tcg_code_size(void)
981 {
982     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
983     unsigned int i;
984     size_t total;
985 
986     qemu_mutex_lock(&region.lock);
987     total = region.agg_size_full;
988     for (i = 0; i < n_ctxs; i++) {
989         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
990         size_t size;
991 
992         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
993         g_assert(size <= s->code_gen_buffer_size);
994         total += size;
995     }
996     qemu_mutex_unlock(&region.lock);
997     return total;
998 }
999 
1000 /*
1001  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
1002  * regions.
1003  * See also: tcg_code_size()
1004  */
1005 size_t tcg_code_capacity(void)
1006 {
1007     size_t guard_size, capacity;
1008 
1009     /* no need for synchronization; these variables are set at init time */
1010     guard_size = region.stride - region.size;
1011     capacity = region.end + guard_size - region.start;
1012     capacity -= region.n * (guard_size + TCG_HIGHWATER);
1013     return capacity;
1014 }
1015 
1016 size_t tcg_tb_phys_invalidate_count(void)
1017 {
1018     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
1019     unsigned int i;
1020     size_t total = 0;
1021 
1022     for (i = 0; i < n_ctxs; i++) {
1023         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
1024 
1025         total += qatomic_read(&s->tb_phys_invalidate_count);
1026     }
1027     return total;
1028 }
1029 
1030 /* pool based memory allocation */
1031 void *tcg_malloc_internal(TCGContext *s, int size)
1032 {
1033     TCGPool *p;
1034     int pool_size;
1035 
1036     if (size > TCG_POOL_CHUNK_SIZE) {
1037         /* big malloc: insert a new pool (XXX: could optimize) */
1038         p = g_malloc(sizeof(TCGPool) + size);
1039         p->size = size;
1040         p->next = s->pool_first_large;
1041         s->pool_first_large = p;
1042         return p->data;
1043     } else {
1044         p = s->pool_current;
1045         if (!p) {
1046             p = s->pool_first;
1047             if (!p)
1048                 goto new_pool;
1049         } else {
1050             if (!p->next) {
1051             new_pool:
1052                 pool_size = TCG_POOL_CHUNK_SIZE;
1053                 p = g_malloc(sizeof(TCGPool) + pool_size);
1054                 p->size = pool_size;
1055                 p->next = NULL;
1056                 if (s->pool_current)
1057                     s->pool_current->next = p;
1058                 else
1059                     s->pool_first = p;
1060             } else {
1061                 p = p->next;
1062             }
1063         }
1064     }
1065     s->pool_current = p;
1066     s->pool_cur = p->data + size;
1067     s->pool_end = p->data + p->size;
1068     return p->data;
1069 }
1070 
1071 void tcg_pool_reset(TCGContext *s)
1072 {
1073     TCGPool *p, *t;
1074     for (p = s->pool_first_large; p; p = t) {
1075         t = p->next;
1076         g_free(p);
1077     }
1078     s->pool_first_large = NULL;
1079     s->pool_cur = s->pool_end = NULL;
1080     s->pool_current = NULL;
1081 }
1082 
1083 typedef struct TCGHelperInfo {
1084     void *func;
1085     const char *name;
1086     unsigned flags;
1087     unsigned sizemask;
1088 } TCGHelperInfo;
1089 
1090 #include "exec/helper-proto.h"
1091 
1092 static const TCGHelperInfo all_helpers[] = {
1093 #include "exec/helper-tcg.h"
1094 };
1095 static GHashTable *helper_table;
1096 
1097 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
1098 static void process_op_defs(TCGContext *s);
1099 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1100                                             TCGReg reg, const char *name);
1101 
1102 void tcg_context_init(TCGContext *s)
1103 {
1104     int op, total_args, n, i;
1105     TCGOpDef *def;
1106     TCGArgConstraint *args_ct;
1107     TCGTemp *ts;
1108 
1109     memset(s, 0, sizeof(*s));
1110     s->nb_globals = 0;
1111 
1112     /* Count total number of arguments and allocate the corresponding
1113        space */
1114     total_args = 0;
1115     for(op = 0; op < NB_OPS; op++) {
1116         def = &tcg_op_defs[op];
1117         n = def->nb_iargs + def->nb_oargs;
1118         total_args += n;
1119     }
1120 
1121     args_ct = g_new0(TCGArgConstraint, total_args);
1122 
1123     for(op = 0; op < NB_OPS; op++) {
1124         def = &tcg_op_defs[op];
1125         def->args_ct = args_ct;
1126         n = def->nb_iargs + def->nb_oargs;
1127         args_ct += n;
1128     }
1129 
1130     /* Register helpers.  */
1131     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
1132     helper_table = g_hash_table_new(NULL, NULL);
1133 
1134     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
1135         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
1136                             (gpointer)&all_helpers[i]);
1137     }
1138 
1139     tcg_target_init(s);
1140     process_op_defs(s);
1141 
1142     /* Reverse the order of the saved registers, assuming they're all at
1143        the start of tcg_target_reg_alloc_order.  */
1144     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
1145         int r = tcg_target_reg_alloc_order[n];
1146         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
1147             break;
1148         }
1149     }
1150     for (i = 0; i < n; ++i) {
1151         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
1152     }
1153     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
1154         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
1155     }
1156 
1157     alloc_tcg_plugin_context(s);
1158 
1159     tcg_ctx = s;
1160     /*
1161      * In user-mode we simply share the init context among threads, since we
1162      * use a single region. See the documentation tcg_region_init() for the
1163      * reasoning behind this.
1164      * In softmmu we will have at most max_cpus TCG threads.
1165      */
1166 #ifdef CONFIG_USER_ONLY
1167     tcg_ctxs = &tcg_ctx;
1168     n_tcg_ctxs = 1;
1169 #else
1170     MachineState *ms = MACHINE(qdev_get_machine());
1171     unsigned int max_cpus = ms->smp.max_cpus;
1172     tcg_ctxs = g_new(TCGContext *, max_cpus);
1173 #endif
1174 
1175     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1176     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1177     cpu_env = temp_tcgv_ptr(ts);
1178 }
1179 
1180 /*
1181  * Allocate TBs right before their corresponding translated code, making
1182  * sure that TBs and code are on different cache lines.
1183  */
1184 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1185 {
1186     uintptr_t align = qemu_icache_linesize;
1187     TranslationBlock *tb;
1188     void *next;
1189 
1190  retry:
1191     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1192     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1193 
1194     if (unlikely(next > s->code_gen_highwater)) {
1195         if (tcg_region_alloc(s)) {
1196             return NULL;
1197         }
1198         goto retry;
1199     }
1200     qatomic_set(&s->code_gen_ptr, next);
1201     s->data_gen_ptr = NULL;
1202     return tb;
1203 }
1204 
1205 void tcg_prologue_init(TCGContext *s)
1206 {
1207     size_t prologue_size, total_size;
1208     void *buf0, *buf1;
1209 
1210     /* Put the prologue at the beginning of code_gen_buffer.  */
1211     buf0 = s->code_gen_buffer;
1212     total_size = s->code_gen_buffer_size;
1213     s->code_ptr = buf0;
1214     s->code_buf = buf0;
1215     s->data_gen_ptr = NULL;
1216 
1217     /*
1218      * The region trees are not yet configured, but tcg_splitwx_to_rx
1219      * needs the bounds for an assert.
1220      */
1221     region.start = buf0;
1222     region.end = buf0 + total_size;
1223 
1224 #ifndef CONFIG_TCG_INTERPRETER
1225     tcg_qemu_tb_exec = (tcg_prologue_fn *)tcg_splitwx_to_rx(buf0);
1226 #endif
1227 
1228     /* Compute a high-water mark, at which we voluntarily flush the buffer
1229        and start over.  The size here is arbitrary, significantly larger
1230        than we expect the code generation for any one opcode to require.  */
1231     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1232 
1233 #ifdef TCG_TARGET_NEED_POOL_LABELS
1234     s->pool_labels = NULL;
1235 #endif
1236 
1237     qemu_thread_jit_write();
1238     /* Generate the prologue.  */
1239     tcg_target_qemu_prologue(s);
1240 
1241 #ifdef TCG_TARGET_NEED_POOL_LABELS
1242     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1243     {
1244         int result = tcg_out_pool_finalize(s);
1245         tcg_debug_assert(result == 0);
1246     }
1247 #endif
1248 
1249     buf1 = s->code_ptr;
1250 #ifndef CONFIG_TCG_INTERPRETER
1251     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(buf0), (uintptr_t)buf0,
1252                         tcg_ptr_byte_diff(buf1, buf0));
1253 #endif
1254 
1255     /* Deduct the prologue from the buffer.  */
1256     prologue_size = tcg_current_code_size(s);
1257     s->code_gen_ptr = buf1;
1258     s->code_gen_buffer = buf1;
1259     s->code_buf = buf1;
1260     total_size -= prologue_size;
1261     s->code_gen_buffer_size = total_size;
1262 
1263     tcg_register_jit(tcg_splitwx_to_rx(s->code_gen_buffer), total_size);
1264 
1265 #ifdef DEBUG_DISAS
1266     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1267         FILE *logfile = qemu_log_lock();
1268         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1269         if (s->data_gen_ptr) {
1270             size_t code_size = s->data_gen_ptr - buf0;
1271             size_t data_size = prologue_size - code_size;
1272             size_t i;
1273 
1274             log_disas(buf0, code_size);
1275 
1276             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1277                 if (sizeof(tcg_target_ulong) == 8) {
1278                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1279                              (uintptr_t)s->data_gen_ptr + i,
1280                              *(uint64_t *)(s->data_gen_ptr + i));
1281                 } else {
1282                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1283                              (uintptr_t)s->data_gen_ptr + i,
1284                              *(uint32_t *)(s->data_gen_ptr + i));
1285                 }
1286             }
1287         } else {
1288             log_disas(buf0, prologue_size);
1289         }
1290         qemu_log("\n");
1291         qemu_log_flush();
1292         qemu_log_unlock(logfile);
1293     }
1294 #endif
1295 
1296     /* Assert that goto_ptr is implemented completely.  */
1297     if (TCG_TARGET_HAS_goto_ptr) {
1298         tcg_debug_assert(tcg_code_gen_epilogue != NULL);
1299     }
1300 }
1301 
1302 void tcg_func_start(TCGContext *s)
1303 {
1304     tcg_pool_reset(s);
1305     s->nb_temps = s->nb_globals;
1306 
1307     /* No temps have been previously allocated for size or locality.  */
1308     memset(s->free_temps, 0, sizeof(s->free_temps));
1309 
1310     /* No constant temps have been previously allocated. */
1311     for (int i = 0; i < TCG_TYPE_COUNT; ++i) {
1312         if (s->const_table[i]) {
1313             g_hash_table_remove_all(s->const_table[i]);
1314         }
1315     }
1316 
1317     s->nb_ops = 0;
1318     s->nb_labels = 0;
1319     s->current_frame_offset = s->frame_start;
1320 
1321 #ifdef CONFIG_DEBUG_TCG
1322     s->goto_tb_issue_mask = 0;
1323 #endif
1324 
1325     QTAILQ_INIT(&s->ops);
1326     QTAILQ_INIT(&s->free_ops);
1327     QSIMPLEQ_INIT(&s->labels);
1328 }
1329 
1330 static TCGTemp *tcg_temp_alloc(TCGContext *s)
1331 {
1332     int n = s->nb_temps++;
1333 
1334     if (n >= TCG_MAX_TEMPS) {
1335         tcg_raise_tb_overflow(s);
1336     }
1337     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1338 }
1339 
1340 static TCGTemp *tcg_global_alloc(TCGContext *s)
1341 {
1342     TCGTemp *ts;
1343 
1344     tcg_debug_assert(s->nb_globals == s->nb_temps);
1345     tcg_debug_assert(s->nb_globals < TCG_MAX_TEMPS);
1346     s->nb_globals++;
1347     ts = tcg_temp_alloc(s);
1348     ts->kind = TEMP_GLOBAL;
1349 
1350     return ts;
1351 }
1352 
1353 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1354                                             TCGReg reg, const char *name)
1355 {
1356     TCGTemp *ts;
1357 
1358     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1359         tcg_abort();
1360     }
1361 
1362     ts = tcg_global_alloc(s);
1363     ts->base_type = type;
1364     ts->type = type;
1365     ts->kind = TEMP_FIXED;
1366     ts->reg = reg;
1367     ts->name = name;
1368     tcg_regset_set_reg(s->reserved_regs, reg);
1369 
1370     return ts;
1371 }
1372 
1373 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1374 {
1375     s->frame_start = start;
1376     s->frame_end = start + size;
1377     s->frame_temp
1378         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1379 }
1380 
1381 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1382                                      intptr_t offset, const char *name)
1383 {
1384     TCGContext *s = tcg_ctx;
1385     TCGTemp *base_ts = tcgv_ptr_temp(base);
1386     TCGTemp *ts = tcg_global_alloc(s);
1387     int indirect_reg = 0, bigendian = 0;
1388 #ifdef HOST_WORDS_BIGENDIAN
1389     bigendian = 1;
1390 #endif
1391 
1392     switch (base_ts->kind) {
1393     case TEMP_FIXED:
1394         break;
1395     case TEMP_GLOBAL:
1396         /* We do not support double-indirect registers.  */
1397         tcg_debug_assert(!base_ts->indirect_reg);
1398         base_ts->indirect_base = 1;
1399         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1400                             ? 2 : 1);
1401         indirect_reg = 1;
1402         break;
1403     default:
1404         g_assert_not_reached();
1405     }
1406 
1407     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1408         TCGTemp *ts2 = tcg_global_alloc(s);
1409         char buf[64];
1410 
1411         ts->base_type = TCG_TYPE_I64;
1412         ts->type = TCG_TYPE_I32;
1413         ts->indirect_reg = indirect_reg;
1414         ts->mem_allocated = 1;
1415         ts->mem_base = base_ts;
1416         ts->mem_offset = offset + bigendian * 4;
1417         pstrcpy(buf, sizeof(buf), name);
1418         pstrcat(buf, sizeof(buf), "_0");
1419         ts->name = strdup(buf);
1420 
1421         tcg_debug_assert(ts2 == ts + 1);
1422         ts2->base_type = TCG_TYPE_I64;
1423         ts2->type = TCG_TYPE_I32;
1424         ts2->indirect_reg = indirect_reg;
1425         ts2->mem_allocated = 1;
1426         ts2->mem_base = base_ts;
1427         ts2->mem_offset = offset + (1 - bigendian) * 4;
1428         pstrcpy(buf, sizeof(buf), name);
1429         pstrcat(buf, sizeof(buf), "_1");
1430         ts2->name = strdup(buf);
1431     } else {
1432         ts->base_type = type;
1433         ts->type = type;
1434         ts->indirect_reg = indirect_reg;
1435         ts->mem_allocated = 1;
1436         ts->mem_base = base_ts;
1437         ts->mem_offset = offset;
1438         ts->name = name;
1439     }
1440     return ts;
1441 }
1442 
1443 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1444 {
1445     TCGContext *s = tcg_ctx;
1446     TCGTempKind kind = temp_local ? TEMP_LOCAL : TEMP_NORMAL;
1447     TCGTemp *ts;
1448     int idx, k;
1449 
1450     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1451     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1452     if (idx < TCG_MAX_TEMPS) {
1453         /* There is already an available temp with the right type.  */
1454         clear_bit(idx, s->free_temps[k].l);
1455 
1456         ts = &s->temps[idx];
1457         ts->temp_allocated = 1;
1458         tcg_debug_assert(ts->base_type == type);
1459         tcg_debug_assert(ts->kind == kind);
1460     } else {
1461         ts = tcg_temp_alloc(s);
1462         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1463             TCGTemp *ts2 = tcg_temp_alloc(s);
1464 
1465             ts->base_type = type;
1466             ts->type = TCG_TYPE_I32;
1467             ts->temp_allocated = 1;
1468             ts->kind = kind;
1469 
1470             tcg_debug_assert(ts2 == ts + 1);
1471             ts2->base_type = TCG_TYPE_I64;
1472             ts2->type = TCG_TYPE_I32;
1473             ts2->temp_allocated = 1;
1474             ts2->kind = kind;
1475         } else {
1476             ts->base_type = type;
1477             ts->type = type;
1478             ts->temp_allocated = 1;
1479             ts->kind = kind;
1480         }
1481     }
1482 
1483 #if defined(CONFIG_DEBUG_TCG)
1484     s->temps_in_use++;
1485 #endif
1486     return ts;
1487 }
1488 
1489 TCGv_vec tcg_temp_new_vec(TCGType type)
1490 {
1491     TCGTemp *t;
1492 
1493 #ifdef CONFIG_DEBUG_TCG
1494     switch (type) {
1495     case TCG_TYPE_V64:
1496         assert(TCG_TARGET_HAS_v64);
1497         break;
1498     case TCG_TYPE_V128:
1499         assert(TCG_TARGET_HAS_v128);
1500         break;
1501     case TCG_TYPE_V256:
1502         assert(TCG_TARGET_HAS_v256);
1503         break;
1504     default:
1505         g_assert_not_reached();
1506     }
1507 #endif
1508 
1509     t = tcg_temp_new_internal(type, 0);
1510     return temp_tcgv_vec(t);
1511 }
1512 
1513 /* Create a new temp of the same type as an existing temp.  */
1514 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1515 {
1516     TCGTemp *t = tcgv_vec_temp(match);
1517 
1518     tcg_debug_assert(t->temp_allocated != 0);
1519 
1520     t = tcg_temp_new_internal(t->base_type, 0);
1521     return temp_tcgv_vec(t);
1522 }
1523 
1524 void tcg_temp_free_internal(TCGTemp *ts)
1525 {
1526     TCGContext *s = tcg_ctx;
1527     int k, idx;
1528 
1529     /* In order to simplify users of tcg_constant_*, silently ignore free. */
1530     if (ts->kind == TEMP_CONST) {
1531         return;
1532     }
1533 
1534 #if defined(CONFIG_DEBUG_TCG)
1535     s->temps_in_use--;
1536     if (s->temps_in_use < 0) {
1537         fprintf(stderr, "More temporaries freed than allocated!\n");
1538     }
1539 #endif
1540 
1541     tcg_debug_assert(ts->kind < TEMP_GLOBAL);
1542     tcg_debug_assert(ts->temp_allocated != 0);
1543     ts->temp_allocated = 0;
1544 
1545     idx = temp_idx(ts);
1546     k = ts->base_type + (ts->kind == TEMP_NORMAL ? 0 : TCG_TYPE_COUNT);
1547     set_bit(idx, s->free_temps[k].l);
1548 }
1549 
1550 TCGTemp *tcg_constant_internal(TCGType type, int64_t val)
1551 {
1552     TCGContext *s = tcg_ctx;
1553     GHashTable *h = s->const_table[type];
1554     TCGTemp *ts;
1555 
1556     if (h == NULL) {
1557         h = g_hash_table_new(g_int64_hash, g_int64_equal);
1558         s->const_table[type] = h;
1559     }
1560 
1561     ts = g_hash_table_lookup(h, &val);
1562     if (ts == NULL) {
1563         ts = tcg_temp_alloc(s);
1564 
1565         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1566             TCGTemp *ts2 = tcg_temp_alloc(s);
1567 
1568             ts->base_type = TCG_TYPE_I64;
1569             ts->type = TCG_TYPE_I32;
1570             ts->kind = TEMP_CONST;
1571             ts->temp_allocated = 1;
1572             /*
1573              * Retain the full value of the 64-bit constant in the low
1574              * part, so that the hash table works.  Actual uses will
1575              * truncate the value to the low part.
1576              */
1577             ts->val = val;
1578 
1579             tcg_debug_assert(ts2 == ts + 1);
1580             ts2->base_type = TCG_TYPE_I64;
1581             ts2->type = TCG_TYPE_I32;
1582             ts2->kind = TEMP_CONST;
1583             ts2->temp_allocated = 1;
1584             ts2->val = val >> 32;
1585         } else {
1586             ts->base_type = type;
1587             ts->type = type;
1588             ts->kind = TEMP_CONST;
1589             ts->temp_allocated = 1;
1590             ts->val = val;
1591         }
1592         g_hash_table_insert(h, &ts->val, ts);
1593     }
1594 
1595     return ts;
1596 }
1597 
1598 TCGv_vec tcg_constant_vec(TCGType type, unsigned vece, int64_t val)
1599 {
1600     val = dup_const(vece, val);
1601     return temp_tcgv_vec(tcg_constant_internal(type, val));
1602 }
1603 
1604 TCGv_vec tcg_constant_vec_matching(TCGv_vec match, unsigned vece, int64_t val)
1605 {
1606     TCGTemp *t = tcgv_vec_temp(match);
1607 
1608     tcg_debug_assert(t->temp_allocated != 0);
1609     return tcg_constant_vec(t->base_type, vece, val);
1610 }
1611 
1612 TCGv_i32 tcg_const_i32(int32_t val)
1613 {
1614     TCGv_i32 t0;
1615     t0 = tcg_temp_new_i32();
1616     tcg_gen_movi_i32(t0, val);
1617     return t0;
1618 }
1619 
1620 TCGv_i64 tcg_const_i64(int64_t val)
1621 {
1622     TCGv_i64 t0;
1623     t0 = tcg_temp_new_i64();
1624     tcg_gen_movi_i64(t0, val);
1625     return t0;
1626 }
1627 
1628 TCGv_i32 tcg_const_local_i32(int32_t val)
1629 {
1630     TCGv_i32 t0;
1631     t0 = tcg_temp_local_new_i32();
1632     tcg_gen_movi_i32(t0, val);
1633     return t0;
1634 }
1635 
1636 TCGv_i64 tcg_const_local_i64(int64_t val)
1637 {
1638     TCGv_i64 t0;
1639     t0 = tcg_temp_local_new_i64();
1640     tcg_gen_movi_i64(t0, val);
1641     return t0;
1642 }
1643 
1644 #if defined(CONFIG_DEBUG_TCG)
1645 void tcg_clear_temp_count(void)
1646 {
1647     TCGContext *s = tcg_ctx;
1648     s->temps_in_use = 0;
1649 }
1650 
1651 int tcg_check_temp_count(void)
1652 {
1653     TCGContext *s = tcg_ctx;
1654     if (s->temps_in_use) {
1655         /* Clear the count so that we don't give another
1656          * warning immediately next time around.
1657          */
1658         s->temps_in_use = 0;
1659         return 1;
1660     }
1661     return 0;
1662 }
1663 #endif
1664 
1665 /* Return true if OP may appear in the opcode stream.
1666    Test the runtime variable that controls each opcode.  */
1667 bool tcg_op_supported(TCGOpcode op)
1668 {
1669     const bool have_vec
1670         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1671 
1672     switch (op) {
1673     case INDEX_op_discard:
1674     case INDEX_op_set_label:
1675     case INDEX_op_call:
1676     case INDEX_op_br:
1677     case INDEX_op_mb:
1678     case INDEX_op_insn_start:
1679     case INDEX_op_exit_tb:
1680     case INDEX_op_goto_tb:
1681     case INDEX_op_qemu_ld_i32:
1682     case INDEX_op_qemu_st_i32:
1683     case INDEX_op_qemu_ld_i64:
1684     case INDEX_op_qemu_st_i64:
1685         return true;
1686 
1687     case INDEX_op_qemu_st8_i32:
1688         return TCG_TARGET_HAS_qemu_st8_i32;
1689 
1690     case INDEX_op_goto_ptr:
1691         return TCG_TARGET_HAS_goto_ptr;
1692 
1693     case INDEX_op_mov_i32:
1694     case INDEX_op_setcond_i32:
1695     case INDEX_op_brcond_i32:
1696     case INDEX_op_ld8u_i32:
1697     case INDEX_op_ld8s_i32:
1698     case INDEX_op_ld16u_i32:
1699     case INDEX_op_ld16s_i32:
1700     case INDEX_op_ld_i32:
1701     case INDEX_op_st8_i32:
1702     case INDEX_op_st16_i32:
1703     case INDEX_op_st_i32:
1704     case INDEX_op_add_i32:
1705     case INDEX_op_sub_i32:
1706     case INDEX_op_mul_i32:
1707     case INDEX_op_and_i32:
1708     case INDEX_op_or_i32:
1709     case INDEX_op_xor_i32:
1710     case INDEX_op_shl_i32:
1711     case INDEX_op_shr_i32:
1712     case INDEX_op_sar_i32:
1713         return true;
1714 
1715     case INDEX_op_movcond_i32:
1716         return TCG_TARGET_HAS_movcond_i32;
1717     case INDEX_op_div_i32:
1718     case INDEX_op_divu_i32:
1719         return TCG_TARGET_HAS_div_i32;
1720     case INDEX_op_rem_i32:
1721     case INDEX_op_remu_i32:
1722         return TCG_TARGET_HAS_rem_i32;
1723     case INDEX_op_div2_i32:
1724     case INDEX_op_divu2_i32:
1725         return TCG_TARGET_HAS_div2_i32;
1726     case INDEX_op_rotl_i32:
1727     case INDEX_op_rotr_i32:
1728         return TCG_TARGET_HAS_rot_i32;
1729     case INDEX_op_deposit_i32:
1730         return TCG_TARGET_HAS_deposit_i32;
1731     case INDEX_op_extract_i32:
1732         return TCG_TARGET_HAS_extract_i32;
1733     case INDEX_op_sextract_i32:
1734         return TCG_TARGET_HAS_sextract_i32;
1735     case INDEX_op_extract2_i32:
1736         return TCG_TARGET_HAS_extract2_i32;
1737     case INDEX_op_add2_i32:
1738         return TCG_TARGET_HAS_add2_i32;
1739     case INDEX_op_sub2_i32:
1740         return TCG_TARGET_HAS_sub2_i32;
1741     case INDEX_op_mulu2_i32:
1742         return TCG_TARGET_HAS_mulu2_i32;
1743     case INDEX_op_muls2_i32:
1744         return TCG_TARGET_HAS_muls2_i32;
1745     case INDEX_op_muluh_i32:
1746         return TCG_TARGET_HAS_muluh_i32;
1747     case INDEX_op_mulsh_i32:
1748         return TCG_TARGET_HAS_mulsh_i32;
1749     case INDEX_op_ext8s_i32:
1750         return TCG_TARGET_HAS_ext8s_i32;
1751     case INDEX_op_ext16s_i32:
1752         return TCG_TARGET_HAS_ext16s_i32;
1753     case INDEX_op_ext8u_i32:
1754         return TCG_TARGET_HAS_ext8u_i32;
1755     case INDEX_op_ext16u_i32:
1756         return TCG_TARGET_HAS_ext16u_i32;
1757     case INDEX_op_bswap16_i32:
1758         return TCG_TARGET_HAS_bswap16_i32;
1759     case INDEX_op_bswap32_i32:
1760         return TCG_TARGET_HAS_bswap32_i32;
1761     case INDEX_op_not_i32:
1762         return TCG_TARGET_HAS_not_i32;
1763     case INDEX_op_neg_i32:
1764         return TCG_TARGET_HAS_neg_i32;
1765     case INDEX_op_andc_i32:
1766         return TCG_TARGET_HAS_andc_i32;
1767     case INDEX_op_orc_i32:
1768         return TCG_TARGET_HAS_orc_i32;
1769     case INDEX_op_eqv_i32:
1770         return TCG_TARGET_HAS_eqv_i32;
1771     case INDEX_op_nand_i32:
1772         return TCG_TARGET_HAS_nand_i32;
1773     case INDEX_op_nor_i32:
1774         return TCG_TARGET_HAS_nor_i32;
1775     case INDEX_op_clz_i32:
1776         return TCG_TARGET_HAS_clz_i32;
1777     case INDEX_op_ctz_i32:
1778         return TCG_TARGET_HAS_ctz_i32;
1779     case INDEX_op_ctpop_i32:
1780         return TCG_TARGET_HAS_ctpop_i32;
1781 
1782     case INDEX_op_brcond2_i32:
1783     case INDEX_op_setcond2_i32:
1784         return TCG_TARGET_REG_BITS == 32;
1785 
1786     case INDEX_op_mov_i64:
1787     case INDEX_op_setcond_i64:
1788     case INDEX_op_brcond_i64:
1789     case INDEX_op_ld8u_i64:
1790     case INDEX_op_ld8s_i64:
1791     case INDEX_op_ld16u_i64:
1792     case INDEX_op_ld16s_i64:
1793     case INDEX_op_ld32u_i64:
1794     case INDEX_op_ld32s_i64:
1795     case INDEX_op_ld_i64:
1796     case INDEX_op_st8_i64:
1797     case INDEX_op_st16_i64:
1798     case INDEX_op_st32_i64:
1799     case INDEX_op_st_i64:
1800     case INDEX_op_add_i64:
1801     case INDEX_op_sub_i64:
1802     case INDEX_op_mul_i64:
1803     case INDEX_op_and_i64:
1804     case INDEX_op_or_i64:
1805     case INDEX_op_xor_i64:
1806     case INDEX_op_shl_i64:
1807     case INDEX_op_shr_i64:
1808     case INDEX_op_sar_i64:
1809     case INDEX_op_ext_i32_i64:
1810     case INDEX_op_extu_i32_i64:
1811         return TCG_TARGET_REG_BITS == 64;
1812 
1813     case INDEX_op_movcond_i64:
1814         return TCG_TARGET_HAS_movcond_i64;
1815     case INDEX_op_div_i64:
1816     case INDEX_op_divu_i64:
1817         return TCG_TARGET_HAS_div_i64;
1818     case INDEX_op_rem_i64:
1819     case INDEX_op_remu_i64:
1820         return TCG_TARGET_HAS_rem_i64;
1821     case INDEX_op_div2_i64:
1822     case INDEX_op_divu2_i64:
1823         return TCG_TARGET_HAS_div2_i64;
1824     case INDEX_op_rotl_i64:
1825     case INDEX_op_rotr_i64:
1826         return TCG_TARGET_HAS_rot_i64;
1827     case INDEX_op_deposit_i64:
1828         return TCG_TARGET_HAS_deposit_i64;
1829     case INDEX_op_extract_i64:
1830         return TCG_TARGET_HAS_extract_i64;
1831     case INDEX_op_sextract_i64:
1832         return TCG_TARGET_HAS_sextract_i64;
1833     case INDEX_op_extract2_i64:
1834         return TCG_TARGET_HAS_extract2_i64;
1835     case INDEX_op_extrl_i64_i32:
1836         return TCG_TARGET_HAS_extrl_i64_i32;
1837     case INDEX_op_extrh_i64_i32:
1838         return TCG_TARGET_HAS_extrh_i64_i32;
1839     case INDEX_op_ext8s_i64:
1840         return TCG_TARGET_HAS_ext8s_i64;
1841     case INDEX_op_ext16s_i64:
1842         return TCG_TARGET_HAS_ext16s_i64;
1843     case INDEX_op_ext32s_i64:
1844         return TCG_TARGET_HAS_ext32s_i64;
1845     case INDEX_op_ext8u_i64:
1846         return TCG_TARGET_HAS_ext8u_i64;
1847     case INDEX_op_ext16u_i64:
1848         return TCG_TARGET_HAS_ext16u_i64;
1849     case INDEX_op_ext32u_i64:
1850         return TCG_TARGET_HAS_ext32u_i64;
1851     case INDEX_op_bswap16_i64:
1852         return TCG_TARGET_HAS_bswap16_i64;
1853     case INDEX_op_bswap32_i64:
1854         return TCG_TARGET_HAS_bswap32_i64;
1855     case INDEX_op_bswap64_i64:
1856         return TCG_TARGET_HAS_bswap64_i64;
1857     case INDEX_op_not_i64:
1858         return TCG_TARGET_HAS_not_i64;
1859     case INDEX_op_neg_i64:
1860         return TCG_TARGET_HAS_neg_i64;
1861     case INDEX_op_andc_i64:
1862         return TCG_TARGET_HAS_andc_i64;
1863     case INDEX_op_orc_i64:
1864         return TCG_TARGET_HAS_orc_i64;
1865     case INDEX_op_eqv_i64:
1866         return TCG_TARGET_HAS_eqv_i64;
1867     case INDEX_op_nand_i64:
1868         return TCG_TARGET_HAS_nand_i64;
1869     case INDEX_op_nor_i64:
1870         return TCG_TARGET_HAS_nor_i64;
1871     case INDEX_op_clz_i64:
1872         return TCG_TARGET_HAS_clz_i64;
1873     case INDEX_op_ctz_i64:
1874         return TCG_TARGET_HAS_ctz_i64;
1875     case INDEX_op_ctpop_i64:
1876         return TCG_TARGET_HAS_ctpop_i64;
1877     case INDEX_op_add2_i64:
1878         return TCG_TARGET_HAS_add2_i64;
1879     case INDEX_op_sub2_i64:
1880         return TCG_TARGET_HAS_sub2_i64;
1881     case INDEX_op_mulu2_i64:
1882         return TCG_TARGET_HAS_mulu2_i64;
1883     case INDEX_op_muls2_i64:
1884         return TCG_TARGET_HAS_muls2_i64;
1885     case INDEX_op_muluh_i64:
1886         return TCG_TARGET_HAS_muluh_i64;
1887     case INDEX_op_mulsh_i64:
1888         return TCG_TARGET_HAS_mulsh_i64;
1889 
1890     case INDEX_op_mov_vec:
1891     case INDEX_op_dup_vec:
1892     case INDEX_op_dupm_vec:
1893     case INDEX_op_ld_vec:
1894     case INDEX_op_st_vec:
1895     case INDEX_op_add_vec:
1896     case INDEX_op_sub_vec:
1897     case INDEX_op_and_vec:
1898     case INDEX_op_or_vec:
1899     case INDEX_op_xor_vec:
1900     case INDEX_op_cmp_vec:
1901         return have_vec;
1902     case INDEX_op_dup2_vec:
1903         return have_vec && TCG_TARGET_REG_BITS == 32;
1904     case INDEX_op_not_vec:
1905         return have_vec && TCG_TARGET_HAS_not_vec;
1906     case INDEX_op_neg_vec:
1907         return have_vec && TCG_TARGET_HAS_neg_vec;
1908     case INDEX_op_abs_vec:
1909         return have_vec && TCG_TARGET_HAS_abs_vec;
1910     case INDEX_op_andc_vec:
1911         return have_vec && TCG_TARGET_HAS_andc_vec;
1912     case INDEX_op_orc_vec:
1913         return have_vec && TCG_TARGET_HAS_orc_vec;
1914     case INDEX_op_mul_vec:
1915         return have_vec && TCG_TARGET_HAS_mul_vec;
1916     case INDEX_op_shli_vec:
1917     case INDEX_op_shri_vec:
1918     case INDEX_op_sari_vec:
1919         return have_vec && TCG_TARGET_HAS_shi_vec;
1920     case INDEX_op_shls_vec:
1921     case INDEX_op_shrs_vec:
1922     case INDEX_op_sars_vec:
1923         return have_vec && TCG_TARGET_HAS_shs_vec;
1924     case INDEX_op_shlv_vec:
1925     case INDEX_op_shrv_vec:
1926     case INDEX_op_sarv_vec:
1927         return have_vec && TCG_TARGET_HAS_shv_vec;
1928     case INDEX_op_rotli_vec:
1929         return have_vec && TCG_TARGET_HAS_roti_vec;
1930     case INDEX_op_rotls_vec:
1931         return have_vec && TCG_TARGET_HAS_rots_vec;
1932     case INDEX_op_rotlv_vec:
1933     case INDEX_op_rotrv_vec:
1934         return have_vec && TCG_TARGET_HAS_rotv_vec;
1935     case INDEX_op_ssadd_vec:
1936     case INDEX_op_usadd_vec:
1937     case INDEX_op_sssub_vec:
1938     case INDEX_op_ussub_vec:
1939         return have_vec && TCG_TARGET_HAS_sat_vec;
1940     case INDEX_op_smin_vec:
1941     case INDEX_op_umin_vec:
1942     case INDEX_op_smax_vec:
1943     case INDEX_op_umax_vec:
1944         return have_vec && TCG_TARGET_HAS_minmax_vec;
1945     case INDEX_op_bitsel_vec:
1946         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1947     case INDEX_op_cmpsel_vec:
1948         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1949 
1950     default:
1951         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1952         return true;
1953     }
1954 }
1955 
1956 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1957    and endian swap. Maybe it would be better to do the alignment
1958    and endian swap in tcg_reg_alloc_call(). */
1959 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1960 {
1961     int i, real_args, nb_rets, pi;
1962     unsigned sizemask, flags;
1963     TCGHelperInfo *info;
1964     TCGOp *op;
1965 
1966     info = g_hash_table_lookup(helper_table, (gpointer)func);
1967     flags = info->flags;
1968     sizemask = info->sizemask;
1969 
1970 #ifdef CONFIG_PLUGIN
1971     /* detect non-plugin helpers */
1972     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1973         tcg_ctx->plugin_insn->calls_helpers = true;
1974     }
1975 #endif
1976 
1977 #if defined(__sparc__) && !defined(__arch64__) \
1978     && !defined(CONFIG_TCG_INTERPRETER)
1979     /* We have 64-bit values in one register, but need to pass as two
1980        separate parameters.  Split them.  */
1981     int orig_sizemask = sizemask;
1982     int orig_nargs = nargs;
1983     TCGv_i64 retl, reth;
1984     TCGTemp *split_args[MAX_OPC_PARAM];
1985 
1986     retl = NULL;
1987     reth = NULL;
1988     if (sizemask != 0) {
1989         for (i = real_args = 0; i < nargs; ++i) {
1990             int is_64bit = sizemask & (1 << (i+1)*2);
1991             if (is_64bit) {
1992                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1993                 TCGv_i32 h = tcg_temp_new_i32();
1994                 TCGv_i32 l = tcg_temp_new_i32();
1995                 tcg_gen_extr_i64_i32(l, h, orig);
1996                 split_args[real_args++] = tcgv_i32_temp(h);
1997                 split_args[real_args++] = tcgv_i32_temp(l);
1998             } else {
1999                 split_args[real_args++] = args[i];
2000             }
2001         }
2002         nargs = real_args;
2003         args = split_args;
2004         sizemask = 0;
2005     }
2006 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2007     for (i = 0; i < nargs; ++i) {
2008         int is_64bit = sizemask & (1 << (i+1)*2);
2009         int is_signed = sizemask & (2 << (i+1)*2);
2010         if (!is_64bit) {
2011             TCGv_i64 temp = tcg_temp_new_i64();
2012             TCGv_i64 orig = temp_tcgv_i64(args[i]);
2013             if (is_signed) {
2014                 tcg_gen_ext32s_i64(temp, orig);
2015             } else {
2016                 tcg_gen_ext32u_i64(temp, orig);
2017             }
2018             args[i] = tcgv_i64_temp(temp);
2019         }
2020     }
2021 #endif /* TCG_TARGET_EXTEND_ARGS */
2022 
2023     op = tcg_emit_op(INDEX_op_call);
2024 
2025     pi = 0;
2026     if (ret != NULL) {
2027 #if defined(__sparc__) && !defined(__arch64__) \
2028     && !defined(CONFIG_TCG_INTERPRETER)
2029         if (orig_sizemask & 1) {
2030             /* The 32-bit ABI is going to return the 64-bit value in
2031                the %o0/%o1 register pair.  Prepare for this by using
2032                two return temporaries, and reassemble below.  */
2033             retl = tcg_temp_new_i64();
2034             reth = tcg_temp_new_i64();
2035             op->args[pi++] = tcgv_i64_arg(reth);
2036             op->args[pi++] = tcgv_i64_arg(retl);
2037             nb_rets = 2;
2038         } else {
2039             op->args[pi++] = temp_arg(ret);
2040             nb_rets = 1;
2041         }
2042 #else
2043         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
2044 #ifdef HOST_WORDS_BIGENDIAN
2045             op->args[pi++] = temp_arg(ret + 1);
2046             op->args[pi++] = temp_arg(ret);
2047 #else
2048             op->args[pi++] = temp_arg(ret);
2049             op->args[pi++] = temp_arg(ret + 1);
2050 #endif
2051             nb_rets = 2;
2052         } else {
2053             op->args[pi++] = temp_arg(ret);
2054             nb_rets = 1;
2055         }
2056 #endif
2057     } else {
2058         nb_rets = 0;
2059     }
2060     TCGOP_CALLO(op) = nb_rets;
2061 
2062     real_args = 0;
2063     for (i = 0; i < nargs; i++) {
2064         int is_64bit = sizemask & (1 << (i+1)*2);
2065         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
2066 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
2067             /* some targets want aligned 64 bit args */
2068             if (real_args & 1) {
2069                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
2070                 real_args++;
2071             }
2072 #endif
2073            /* If stack grows up, then we will be placing successive
2074               arguments at lower addresses, which means we need to
2075               reverse the order compared to how we would normally
2076               treat either big or little-endian.  For those arguments
2077               that will wind up in registers, this still works for
2078               HPPA (the only current STACK_GROWSUP target) since the
2079               argument registers are *also* allocated in decreasing
2080               order.  If another such target is added, this logic may
2081               have to get more complicated to differentiate between
2082               stack arguments and register arguments.  */
2083 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
2084             op->args[pi++] = temp_arg(args[i] + 1);
2085             op->args[pi++] = temp_arg(args[i]);
2086 #else
2087             op->args[pi++] = temp_arg(args[i]);
2088             op->args[pi++] = temp_arg(args[i] + 1);
2089 #endif
2090             real_args += 2;
2091             continue;
2092         }
2093 
2094         op->args[pi++] = temp_arg(args[i]);
2095         real_args++;
2096     }
2097     op->args[pi++] = (uintptr_t)func;
2098     op->args[pi++] = flags;
2099     TCGOP_CALLI(op) = real_args;
2100 
2101     /* Make sure the fields didn't overflow.  */
2102     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
2103     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
2104 
2105 #if defined(__sparc__) && !defined(__arch64__) \
2106     && !defined(CONFIG_TCG_INTERPRETER)
2107     /* Free all of the parts we allocated above.  */
2108     for (i = real_args = 0; i < orig_nargs; ++i) {
2109         int is_64bit = orig_sizemask & (1 << (i+1)*2);
2110         if (is_64bit) {
2111             tcg_temp_free_internal(args[real_args++]);
2112             tcg_temp_free_internal(args[real_args++]);
2113         } else {
2114             real_args++;
2115         }
2116     }
2117     if (orig_sizemask & 1) {
2118         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
2119            Note that describing these as TCGv_i64 eliminates an unnecessary
2120            zero-extension that tcg_gen_concat_i32_i64 would create.  */
2121         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
2122         tcg_temp_free_i64(retl);
2123         tcg_temp_free_i64(reth);
2124     }
2125 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
2126     for (i = 0; i < nargs; ++i) {
2127         int is_64bit = sizemask & (1 << (i+1)*2);
2128         if (!is_64bit) {
2129             tcg_temp_free_internal(args[i]);
2130         }
2131     }
2132 #endif /* TCG_TARGET_EXTEND_ARGS */
2133 }
2134 
2135 static void tcg_reg_alloc_start(TCGContext *s)
2136 {
2137     int i, n;
2138 
2139     for (i = 0, n = s->nb_temps; i < n; i++) {
2140         TCGTemp *ts = &s->temps[i];
2141         TCGTempVal val = TEMP_VAL_MEM;
2142 
2143         switch (ts->kind) {
2144         case TEMP_CONST:
2145             val = TEMP_VAL_CONST;
2146             break;
2147         case TEMP_FIXED:
2148             val = TEMP_VAL_REG;
2149             break;
2150         case TEMP_GLOBAL:
2151             break;
2152         case TEMP_NORMAL:
2153             val = TEMP_VAL_DEAD;
2154             /* fall through */
2155         case TEMP_LOCAL:
2156             ts->mem_allocated = 0;
2157             break;
2158         default:
2159             g_assert_not_reached();
2160         }
2161         ts->val_type = val;
2162     }
2163 
2164     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
2165 }
2166 
2167 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
2168                                  TCGTemp *ts)
2169 {
2170     int idx = temp_idx(ts);
2171 
2172     switch (ts->kind) {
2173     case TEMP_FIXED:
2174     case TEMP_GLOBAL:
2175         pstrcpy(buf, buf_size, ts->name);
2176         break;
2177     case TEMP_LOCAL:
2178         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
2179         break;
2180     case TEMP_NORMAL:
2181         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
2182         break;
2183     case TEMP_CONST:
2184         switch (ts->type) {
2185         case TCG_TYPE_I32:
2186             snprintf(buf, buf_size, "$0x%x", (int32_t)ts->val);
2187             break;
2188 #if TCG_TARGET_REG_BITS > 32
2189         case TCG_TYPE_I64:
2190             snprintf(buf, buf_size, "$0x%" PRIx64, ts->val);
2191             break;
2192 #endif
2193         case TCG_TYPE_V64:
2194         case TCG_TYPE_V128:
2195         case TCG_TYPE_V256:
2196             snprintf(buf, buf_size, "v%d$0x%" PRIx64,
2197                      64 << (ts->type - TCG_TYPE_V64), ts->val);
2198             break;
2199         default:
2200             g_assert_not_reached();
2201         }
2202         break;
2203     }
2204     return buf;
2205 }
2206 
2207 static char *tcg_get_arg_str(TCGContext *s, char *buf,
2208                              int buf_size, TCGArg arg)
2209 {
2210     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
2211 }
2212 
2213 /* Find helper name.  */
2214 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
2215 {
2216     const char *ret = NULL;
2217     if (helper_table) {
2218         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
2219         if (info) {
2220             ret = info->name;
2221         }
2222     }
2223     return ret;
2224 }
2225 
2226 static const char * const cond_name[] =
2227 {
2228     [TCG_COND_NEVER] = "never",
2229     [TCG_COND_ALWAYS] = "always",
2230     [TCG_COND_EQ] = "eq",
2231     [TCG_COND_NE] = "ne",
2232     [TCG_COND_LT] = "lt",
2233     [TCG_COND_GE] = "ge",
2234     [TCG_COND_LE] = "le",
2235     [TCG_COND_GT] = "gt",
2236     [TCG_COND_LTU] = "ltu",
2237     [TCG_COND_GEU] = "geu",
2238     [TCG_COND_LEU] = "leu",
2239     [TCG_COND_GTU] = "gtu"
2240 };
2241 
2242 static const char * const ldst_name[] =
2243 {
2244     [MO_UB]   = "ub",
2245     [MO_SB]   = "sb",
2246     [MO_LEUW] = "leuw",
2247     [MO_LESW] = "lesw",
2248     [MO_LEUL] = "leul",
2249     [MO_LESL] = "lesl",
2250     [MO_LEQ]  = "leq",
2251     [MO_BEUW] = "beuw",
2252     [MO_BESW] = "besw",
2253     [MO_BEUL] = "beul",
2254     [MO_BESL] = "besl",
2255     [MO_BEQ]  = "beq",
2256 };
2257 
2258 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
2259 #ifdef TARGET_ALIGNED_ONLY
2260     [MO_UNALN >> MO_ASHIFT]    = "un+",
2261     [MO_ALIGN >> MO_ASHIFT]    = "",
2262 #else
2263     [MO_UNALN >> MO_ASHIFT]    = "",
2264     [MO_ALIGN >> MO_ASHIFT]    = "al+",
2265 #endif
2266     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
2267     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
2268     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
2269     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
2270     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
2271     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
2272 };
2273 
2274 static inline bool tcg_regset_single(TCGRegSet d)
2275 {
2276     return (d & (d - 1)) == 0;
2277 }
2278 
2279 static inline TCGReg tcg_regset_first(TCGRegSet d)
2280 {
2281     if (TCG_TARGET_NB_REGS <= 32) {
2282         return ctz32(d);
2283     } else {
2284         return ctz64(d);
2285     }
2286 }
2287 
2288 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
2289 {
2290     char buf[128];
2291     TCGOp *op;
2292 
2293     QTAILQ_FOREACH(op, &s->ops, link) {
2294         int i, k, nb_oargs, nb_iargs, nb_cargs;
2295         const TCGOpDef *def;
2296         TCGOpcode c;
2297         int col = 0;
2298 
2299         c = op->opc;
2300         def = &tcg_op_defs[c];
2301 
2302         if (c == INDEX_op_insn_start) {
2303             nb_oargs = 0;
2304             col += qemu_log("\n ----");
2305 
2306             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2307                 target_ulong a;
2308 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2309                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2310 #else
2311                 a = op->args[i];
2312 #endif
2313                 col += qemu_log(" " TARGET_FMT_lx, a);
2314             }
2315         } else if (c == INDEX_op_call) {
2316             /* variable number of arguments */
2317             nb_oargs = TCGOP_CALLO(op);
2318             nb_iargs = TCGOP_CALLI(op);
2319             nb_cargs = def->nb_cargs;
2320 
2321             /* function name, flags, out args */
2322             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2323                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2324                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2325             for (i = 0; i < nb_oargs; i++) {
2326                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2327                                                        op->args[i]));
2328             }
2329             for (i = 0; i < nb_iargs; i++) {
2330                 TCGArg arg = op->args[nb_oargs + i];
2331                 const char *t = "<dummy>";
2332                 if (arg != TCG_CALL_DUMMY_ARG) {
2333                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2334                 }
2335                 col += qemu_log(",%s", t);
2336             }
2337         } else {
2338             col += qemu_log(" %s ", def->name);
2339 
2340             nb_oargs = def->nb_oargs;
2341             nb_iargs = def->nb_iargs;
2342             nb_cargs = def->nb_cargs;
2343 
2344             if (def->flags & TCG_OPF_VECTOR) {
2345                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2346                                 8 << TCGOP_VECE(op));
2347             }
2348 
2349             k = 0;
2350             for (i = 0; i < nb_oargs; i++) {
2351                 if (k != 0) {
2352                     col += qemu_log(",");
2353                 }
2354                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2355                                                       op->args[k++]));
2356             }
2357             for (i = 0; i < nb_iargs; i++) {
2358                 if (k != 0) {
2359                     col += qemu_log(",");
2360                 }
2361                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2362                                                       op->args[k++]));
2363             }
2364             switch (c) {
2365             case INDEX_op_brcond_i32:
2366             case INDEX_op_setcond_i32:
2367             case INDEX_op_movcond_i32:
2368             case INDEX_op_brcond2_i32:
2369             case INDEX_op_setcond2_i32:
2370             case INDEX_op_brcond_i64:
2371             case INDEX_op_setcond_i64:
2372             case INDEX_op_movcond_i64:
2373             case INDEX_op_cmp_vec:
2374             case INDEX_op_cmpsel_vec:
2375                 if (op->args[k] < ARRAY_SIZE(cond_name)
2376                     && cond_name[op->args[k]]) {
2377                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2378                 } else {
2379                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2380                 }
2381                 i = 1;
2382                 break;
2383             case INDEX_op_qemu_ld_i32:
2384             case INDEX_op_qemu_st_i32:
2385             case INDEX_op_qemu_st8_i32:
2386             case INDEX_op_qemu_ld_i64:
2387             case INDEX_op_qemu_st_i64:
2388                 {
2389                     TCGMemOpIdx oi = op->args[k++];
2390                     MemOp op = get_memop(oi);
2391                     unsigned ix = get_mmuidx(oi);
2392 
2393                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2394                         col += qemu_log(",$0x%x,%u", op, ix);
2395                     } else {
2396                         const char *s_al, *s_op;
2397                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2398                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2399                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2400                     }
2401                     i = 1;
2402                 }
2403                 break;
2404             default:
2405                 i = 0;
2406                 break;
2407             }
2408             switch (c) {
2409             case INDEX_op_set_label:
2410             case INDEX_op_br:
2411             case INDEX_op_brcond_i32:
2412             case INDEX_op_brcond_i64:
2413             case INDEX_op_brcond2_i32:
2414                 col += qemu_log("%s$L%d", k ? "," : "",
2415                                 arg_label(op->args[k])->id);
2416                 i++, k++;
2417                 break;
2418             default:
2419                 break;
2420             }
2421             for (; i < nb_cargs; i++, k++) {
2422                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2423             }
2424         }
2425 
2426         if (have_prefs || op->life) {
2427 
2428             QemuLogFile *logfile;
2429 
2430             rcu_read_lock();
2431             logfile = qatomic_rcu_read(&qemu_logfile);
2432             if (logfile) {
2433                 for (; col < 40; ++col) {
2434                     putc(' ', logfile->fd);
2435                 }
2436             }
2437             rcu_read_unlock();
2438         }
2439 
2440         if (op->life) {
2441             unsigned life = op->life;
2442 
2443             if (life & (SYNC_ARG * 3)) {
2444                 qemu_log("  sync:");
2445                 for (i = 0; i < 2; ++i) {
2446                     if (life & (SYNC_ARG << i)) {
2447                         qemu_log(" %d", i);
2448                     }
2449                 }
2450             }
2451             life /= DEAD_ARG;
2452             if (life) {
2453                 qemu_log("  dead:");
2454                 for (i = 0; life; ++i, life >>= 1) {
2455                     if (life & 1) {
2456                         qemu_log(" %d", i);
2457                     }
2458                 }
2459             }
2460         }
2461 
2462         if (have_prefs) {
2463             for (i = 0; i < nb_oargs; ++i) {
2464                 TCGRegSet set = op->output_pref[i];
2465 
2466                 if (i == 0) {
2467                     qemu_log("  pref=");
2468                 } else {
2469                     qemu_log(",");
2470                 }
2471                 if (set == 0) {
2472                     qemu_log("none");
2473                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2474                     qemu_log("all");
2475 #ifdef CONFIG_DEBUG_TCG
2476                 } else if (tcg_regset_single(set)) {
2477                     TCGReg reg = tcg_regset_first(set);
2478                     qemu_log("%s", tcg_target_reg_names[reg]);
2479 #endif
2480                 } else if (TCG_TARGET_NB_REGS <= 32) {
2481                     qemu_log("%#x", (uint32_t)set);
2482                 } else {
2483                     qemu_log("%#" PRIx64, (uint64_t)set);
2484                 }
2485             }
2486         }
2487 
2488         qemu_log("\n");
2489     }
2490 }
2491 
2492 /* we give more priority to constraints with less registers */
2493 static int get_constraint_priority(const TCGOpDef *def, int k)
2494 {
2495     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2496     int n;
2497 
2498     if (arg_ct->oalias) {
2499         /* an alias is equivalent to a single register */
2500         n = 1;
2501     } else {
2502         n = ctpop64(arg_ct->regs);
2503     }
2504     return TCG_TARGET_NB_REGS - n + 1;
2505 }
2506 
2507 /* sort from highest priority to lowest */
2508 static void sort_constraints(TCGOpDef *def, int start, int n)
2509 {
2510     int i, j;
2511     TCGArgConstraint *a = def->args_ct;
2512 
2513     for (i = 0; i < n; i++) {
2514         a[start + i].sort_index = start + i;
2515     }
2516     if (n <= 1) {
2517         return;
2518     }
2519     for (i = 0; i < n - 1; i++) {
2520         for (j = i + 1; j < n; j++) {
2521             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2522             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2523             if (p1 < p2) {
2524                 int tmp = a[start + i].sort_index;
2525                 a[start + i].sort_index = a[start + j].sort_index;
2526                 a[start + j].sort_index = tmp;
2527             }
2528         }
2529     }
2530 }
2531 
2532 static void process_op_defs(TCGContext *s)
2533 {
2534     TCGOpcode op;
2535 
2536     for (op = 0; op < NB_OPS; op++) {
2537         TCGOpDef *def = &tcg_op_defs[op];
2538         const TCGTargetOpDef *tdefs;
2539         int i, nb_args;
2540 
2541         if (def->flags & TCG_OPF_NOT_PRESENT) {
2542             continue;
2543         }
2544 
2545         nb_args = def->nb_iargs + def->nb_oargs;
2546         if (nb_args == 0) {
2547             continue;
2548         }
2549 
2550         /*
2551          * Macro magic should make it impossible, but double-check that
2552          * the array index is in range.  Since the signness of an enum
2553          * is implementation defined, force the result to unsigned.
2554          */
2555         unsigned con_set = tcg_target_op_def(op);
2556         tcg_debug_assert(con_set < ARRAY_SIZE(constraint_sets));
2557         tdefs = &constraint_sets[con_set];
2558 
2559         for (i = 0; i < nb_args; i++) {
2560             const char *ct_str = tdefs->args_ct_str[i];
2561             /* Incomplete TCGTargetOpDef entry. */
2562             tcg_debug_assert(ct_str != NULL);
2563 
2564             while (*ct_str != '\0') {
2565                 switch(*ct_str) {
2566                 case '0' ... '9':
2567                     {
2568                         int oarg = *ct_str - '0';
2569                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2570                         tcg_debug_assert(oarg < def->nb_oargs);
2571                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2572                         def->args_ct[i] = def->args_ct[oarg];
2573                         /* The output sets oalias.  */
2574                         def->args_ct[oarg].oalias = true;
2575                         def->args_ct[oarg].alias_index = i;
2576                         /* The input sets ialias. */
2577                         def->args_ct[i].ialias = true;
2578                         def->args_ct[i].alias_index = oarg;
2579                     }
2580                     ct_str++;
2581                     break;
2582                 case '&':
2583                     def->args_ct[i].newreg = true;
2584                     ct_str++;
2585                     break;
2586                 case 'i':
2587                     def->args_ct[i].ct |= TCG_CT_CONST;
2588                     ct_str++;
2589                     break;
2590 
2591                 /* Include all of the target-specific constraints. */
2592 
2593 #undef CONST
2594 #define CONST(CASE, MASK) \
2595     case CASE: def->args_ct[i].ct |= MASK; ct_str++; break;
2596 #define REGS(CASE, MASK) \
2597     case CASE: def->args_ct[i].regs |= MASK; ct_str++; break;
2598 
2599 #include "tcg-target-con-str.h"
2600 
2601 #undef REGS
2602 #undef CONST
2603                 default:
2604                     /* Typo in TCGTargetOpDef constraint. */
2605                     g_assert_not_reached();
2606                 }
2607             }
2608         }
2609 
2610         /* TCGTargetOpDef entry with too much information? */
2611         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2612 
2613         /* sort the constraints (XXX: this is just an heuristic) */
2614         sort_constraints(def, 0, def->nb_oargs);
2615         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2616     }
2617 }
2618 
2619 void tcg_op_remove(TCGContext *s, TCGOp *op)
2620 {
2621     TCGLabel *label;
2622 
2623     switch (op->opc) {
2624     case INDEX_op_br:
2625         label = arg_label(op->args[0]);
2626         label->refs--;
2627         break;
2628     case INDEX_op_brcond_i32:
2629     case INDEX_op_brcond_i64:
2630         label = arg_label(op->args[3]);
2631         label->refs--;
2632         break;
2633     case INDEX_op_brcond2_i32:
2634         label = arg_label(op->args[5]);
2635         label->refs--;
2636         break;
2637     default:
2638         break;
2639     }
2640 
2641     QTAILQ_REMOVE(&s->ops, op, link);
2642     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2643     s->nb_ops--;
2644 
2645 #ifdef CONFIG_PROFILER
2646     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2647 #endif
2648 }
2649 
2650 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2651 {
2652     TCGContext *s = tcg_ctx;
2653     TCGOp *op;
2654 
2655     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2656         op = tcg_malloc(sizeof(TCGOp));
2657     } else {
2658         op = QTAILQ_FIRST(&s->free_ops);
2659         QTAILQ_REMOVE(&s->free_ops, op, link);
2660     }
2661     memset(op, 0, offsetof(TCGOp, link));
2662     op->opc = opc;
2663     s->nb_ops++;
2664 
2665     return op;
2666 }
2667 
2668 TCGOp *tcg_emit_op(TCGOpcode opc)
2669 {
2670     TCGOp *op = tcg_op_alloc(opc);
2671     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2672     return op;
2673 }
2674 
2675 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2676 {
2677     TCGOp *new_op = tcg_op_alloc(opc);
2678     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2679     return new_op;
2680 }
2681 
2682 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2683 {
2684     TCGOp *new_op = tcg_op_alloc(opc);
2685     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2686     return new_op;
2687 }
2688 
2689 /* Reachable analysis : remove unreachable code.  */
2690 static void reachable_code_pass(TCGContext *s)
2691 {
2692     TCGOp *op, *op_next;
2693     bool dead = false;
2694 
2695     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2696         bool remove = dead;
2697         TCGLabel *label;
2698         int call_flags;
2699 
2700         switch (op->opc) {
2701         case INDEX_op_set_label:
2702             label = arg_label(op->args[0]);
2703             if (label->refs == 0) {
2704                 /*
2705                  * While there is an occasional backward branch, virtually
2706                  * all branches generated by the translators are forward.
2707                  * Which means that generally we will have already removed
2708                  * all references to the label that will be, and there is
2709                  * little to be gained by iterating.
2710                  */
2711                 remove = true;
2712             } else {
2713                 /* Once we see a label, insns become live again.  */
2714                 dead = false;
2715                 remove = false;
2716 
2717                 /*
2718                  * Optimization can fold conditional branches to unconditional.
2719                  * If we find a label with one reference which is preceded by
2720                  * an unconditional branch to it, remove both.  This needed to
2721                  * wait until the dead code in between them was removed.
2722                  */
2723                 if (label->refs == 1) {
2724                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2725                     if (op_prev->opc == INDEX_op_br &&
2726                         label == arg_label(op_prev->args[0])) {
2727                         tcg_op_remove(s, op_prev);
2728                         remove = true;
2729                     }
2730                 }
2731             }
2732             break;
2733 
2734         case INDEX_op_br:
2735         case INDEX_op_exit_tb:
2736         case INDEX_op_goto_ptr:
2737             /* Unconditional branches; everything following is dead.  */
2738             dead = true;
2739             break;
2740 
2741         case INDEX_op_call:
2742             /* Notice noreturn helper calls, raising exceptions.  */
2743             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2744             if (call_flags & TCG_CALL_NO_RETURN) {
2745                 dead = true;
2746             }
2747             break;
2748 
2749         case INDEX_op_insn_start:
2750             /* Never remove -- we need to keep these for unwind.  */
2751             remove = false;
2752             break;
2753 
2754         default:
2755             break;
2756         }
2757 
2758         if (remove) {
2759             tcg_op_remove(s, op);
2760         }
2761     }
2762 }
2763 
2764 #define TS_DEAD  1
2765 #define TS_MEM   2
2766 
2767 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2768 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2769 
2770 /* For liveness_pass_1, the register preferences for a given temp.  */
2771 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2772 {
2773     return ts->state_ptr;
2774 }
2775 
2776 /* For liveness_pass_1, reset the preferences for a given temp to the
2777  * maximal regset for its type.
2778  */
2779 static inline void la_reset_pref(TCGTemp *ts)
2780 {
2781     *la_temp_pref(ts)
2782         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2783 }
2784 
2785 /* liveness analysis: end of function: all temps are dead, and globals
2786    should be in memory. */
2787 static void la_func_end(TCGContext *s, int ng, int nt)
2788 {
2789     int i;
2790 
2791     for (i = 0; i < ng; ++i) {
2792         s->temps[i].state = TS_DEAD | TS_MEM;
2793         la_reset_pref(&s->temps[i]);
2794     }
2795     for (i = ng; i < nt; ++i) {
2796         s->temps[i].state = TS_DEAD;
2797         la_reset_pref(&s->temps[i]);
2798     }
2799 }
2800 
2801 /* liveness analysis: end of basic block: all temps are dead, globals
2802    and local temps should be in memory. */
2803 static void la_bb_end(TCGContext *s, int ng, int nt)
2804 {
2805     int i;
2806 
2807     for (i = 0; i < nt; ++i) {
2808         TCGTemp *ts = &s->temps[i];
2809         int state;
2810 
2811         switch (ts->kind) {
2812         case TEMP_FIXED:
2813         case TEMP_GLOBAL:
2814         case TEMP_LOCAL:
2815             state = TS_DEAD | TS_MEM;
2816             break;
2817         case TEMP_NORMAL:
2818         case TEMP_CONST:
2819             state = TS_DEAD;
2820             break;
2821         default:
2822             g_assert_not_reached();
2823         }
2824         ts->state = state;
2825         la_reset_pref(ts);
2826     }
2827 }
2828 
2829 /* liveness analysis: sync globals back to memory.  */
2830 static void la_global_sync(TCGContext *s, int ng)
2831 {
2832     int i;
2833 
2834     for (i = 0; i < ng; ++i) {
2835         int state = s->temps[i].state;
2836         s->temps[i].state = state | TS_MEM;
2837         if (state == TS_DEAD) {
2838             /* If the global was previously dead, reset prefs.  */
2839             la_reset_pref(&s->temps[i]);
2840         }
2841     }
2842 }
2843 
2844 /*
2845  * liveness analysis: conditional branch: all temps are dead,
2846  * globals and local temps should be synced.
2847  */
2848 static void la_bb_sync(TCGContext *s, int ng, int nt)
2849 {
2850     la_global_sync(s, ng);
2851 
2852     for (int i = ng; i < nt; ++i) {
2853         TCGTemp *ts = &s->temps[i];
2854         int state;
2855 
2856         switch (ts->kind) {
2857         case TEMP_LOCAL:
2858             state = ts->state;
2859             ts->state = state | TS_MEM;
2860             if (state != TS_DEAD) {
2861                 continue;
2862             }
2863             break;
2864         case TEMP_NORMAL:
2865             s->temps[i].state = TS_DEAD;
2866             break;
2867         case TEMP_CONST:
2868             continue;
2869         default:
2870             g_assert_not_reached();
2871         }
2872         la_reset_pref(&s->temps[i]);
2873     }
2874 }
2875 
2876 /* liveness analysis: sync globals back to memory and kill.  */
2877 static void la_global_kill(TCGContext *s, int ng)
2878 {
2879     int i;
2880 
2881     for (i = 0; i < ng; i++) {
2882         s->temps[i].state = TS_DEAD | TS_MEM;
2883         la_reset_pref(&s->temps[i]);
2884     }
2885 }
2886 
2887 /* liveness analysis: note live globals crossing calls.  */
2888 static void la_cross_call(TCGContext *s, int nt)
2889 {
2890     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2891     int i;
2892 
2893     for (i = 0; i < nt; i++) {
2894         TCGTemp *ts = &s->temps[i];
2895         if (!(ts->state & TS_DEAD)) {
2896             TCGRegSet *pset = la_temp_pref(ts);
2897             TCGRegSet set = *pset;
2898 
2899             set &= mask;
2900             /* If the combination is not possible, restart.  */
2901             if (set == 0) {
2902                 set = tcg_target_available_regs[ts->type] & mask;
2903             }
2904             *pset = set;
2905         }
2906     }
2907 }
2908 
2909 /* Liveness analysis : update the opc_arg_life array to tell if a
2910    given input arguments is dead. Instructions updating dead
2911    temporaries are removed. */
2912 static void liveness_pass_1(TCGContext *s)
2913 {
2914     int nb_globals = s->nb_globals;
2915     int nb_temps = s->nb_temps;
2916     TCGOp *op, *op_prev;
2917     TCGRegSet *prefs;
2918     int i;
2919 
2920     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2921     for (i = 0; i < nb_temps; ++i) {
2922         s->temps[i].state_ptr = prefs + i;
2923     }
2924 
2925     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2926     la_func_end(s, nb_globals, nb_temps);
2927 
2928     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2929         int nb_iargs, nb_oargs;
2930         TCGOpcode opc_new, opc_new2;
2931         bool have_opc_new2;
2932         TCGLifeData arg_life = 0;
2933         TCGTemp *ts;
2934         TCGOpcode opc = op->opc;
2935         const TCGOpDef *def = &tcg_op_defs[opc];
2936 
2937         switch (opc) {
2938         case INDEX_op_call:
2939             {
2940                 int call_flags;
2941                 int nb_call_regs;
2942 
2943                 nb_oargs = TCGOP_CALLO(op);
2944                 nb_iargs = TCGOP_CALLI(op);
2945                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2946 
2947                 /* pure functions can be removed if their result is unused */
2948                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2949                     for (i = 0; i < nb_oargs; i++) {
2950                         ts = arg_temp(op->args[i]);
2951                         if (ts->state != TS_DEAD) {
2952                             goto do_not_remove_call;
2953                         }
2954                     }
2955                     goto do_remove;
2956                 }
2957             do_not_remove_call:
2958 
2959                 /* Output args are dead.  */
2960                 for (i = 0; i < nb_oargs; i++) {
2961                     ts = arg_temp(op->args[i]);
2962                     if (ts->state & TS_DEAD) {
2963                         arg_life |= DEAD_ARG << i;
2964                     }
2965                     if (ts->state & TS_MEM) {
2966                         arg_life |= SYNC_ARG << i;
2967                     }
2968                     ts->state = TS_DEAD;
2969                     la_reset_pref(ts);
2970 
2971                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2972                     op->output_pref[i] = 0;
2973                 }
2974 
2975                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2976                                     TCG_CALL_NO_READ_GLOBALS))) {
2977                     la_global_kill(s, nb_globals);
2978                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2979                     la_global_sync(s, nb_globals);
2980                 }
2981 
2982                 /* Record arguments that die in this helper.  */
2983                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2984                     ts = arg_temp(op->args[i]);
2985                     if (ts && ts->state & TS_DEAD) {
2986                         arg_life |= DEAD_ARG << i;
2987                     }
2988                 }
2989 
2990                 /* For all live registers, remove call-clobbered prefs.  */
2991                 la_cross_call(s, nb_temps);
2992 
2993                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2994 
2995                 /* Input arguments are live for preceding opcodes.  */
2996                 for (i = 0; i < nb_iargs; i++) {
2997                     ts = arg_temp(op->args[i + nb_oargs]);
2998                     if (ts && ts->state & TS_DEAD) {
2999                         /* For those arguments that die, and will be allocated
3000                          * in registers, clear the register set for that arg,
3001                          * to be filled in below.  For args that will be on
3002                          * the stack, reset to any available reg.
3003                          */
3004                         *la_temp_pref(ts)
3005                             = (i < nb_call_regs ? 0 :
3006                                tcg_target_available_regs[ts->type]);
3007                         ts->state &= ~TS_DEAD;
3008                     }
3009                 }
3010 
3011                 /* For each input argument, add its input register to prefs.
3012                    If a temp is used once, this produces a single set bit.  */
3013                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
3014                     ts = arg_temp(op->args[i + nb_oargs]);
3015                     if (ts) {
3016                         tcg_regset_set_reg(*la_temp_pref(ts),
3017                                            tcg_target_call_iarg_regs[i]);
3018                     }
3019                 }
3020             }
3021             break;
3022         case INDEX_op_insn_start:
3023             break;
3024         case INDEX_op_discard:
3025             /* mark the temporary as dead */
3026             ts = arg_temp(op->args[0]);
3027             ts->state = TS_DEAD;
3028             la_reset_pref(ts);
3029             break;
3030 
3031         case INDEX_op_add2_i32:
3032             opc_new = INDEX_op_add_i32;
3033             goto do_addsub2;
3034         case INDEX_op_sub2_i32:
3035             opc_new = INDEX_op_sub_i32;
3036             goto do_addsub2;
3037         case INDEX_op_add2_i64:
3038             opc_new = INDEX_op_add_i64;
3039             goto do_addsub2;
3040         case INDEX_op_sub2_i64:
3041             opc_new = INDEX_op_sub_i64;
3042         do_addsub2:
3043             nb_iargs = 4;
3044             nb_oargs = 2;
3045             /* Test if the high part of the operation is dead, but not
3046                the low part.  The result can be optimized to a simple
3047                add or sub.  This happens often for x86_64 guest when the
3048                cpu mode is set to 32 bit.  */
3049             if (arg_temp(op->args[1])->state == TS_DEAD) {
3050                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3051                     goto do_remove;
3052                 }
3053                 /* Replace the opcode and adjust the args in place,
3054                    leaving 3 unused args at the end.  */
3055                 op->opc = opc = opc_new;
3056                 op->args[1] = op->args[2];
3057                 op->args[2] = op->args[4];
3058                 /* Fall through and mark the single-word operation live.  */
3059                 nb_iargs = 2;
3060                 nb_oargs = 1;
3061             }
3062             goto do_not_remove;
3063 
3064         case INDEX_op_mulu2_i32:
3065             opc_new = INDEX_op_mul_i32;
3066             opc_new2 = INDEX_op_muluh_i32;
3067             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
3068             goto do_mul2;
3069         case INDEX_op_muls2_i32:
3070             opc_new = INDEX_op_mul_i32;
3071             opc_new2 = INDEX_op_mulsh_i32;
3072             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
3073             goto do_mul2;
3074         case INDEX_op_mulu2_i64:
3075             opc_new = INDEX_op_mul_i64;
3076             opc_new2 = INDEX_op_muluh_i64;
3077             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
3078             goto do_mul2;
3079         case INDEX_op_muls2_i64:
3080             opc_new = INDEX_op_mul_i64;
3081             opc_new2 = INDEX_op_mulsh_i64;
3082             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
3083             goto do_mul2;
3084         do_mul2:
3085             nb_iargs = 2;
3086             nb_oargs = 2;
3087             if (arg_temp(op->args[1])->state == TS_DEAD) {
3088                 if (arg_temp(op->args[0])->state == TS_DEAD) {
3089                     /* Both parts of the operation are dead.  */
3090                     goto do_remove;
3091                 }
3092                 /* The high part of the operation is dead; generate the low. */
3093                 op->opc = opc = opc_new;
3094                 op->args[1] = op->args[2];
3095                 op->args[2] = op->args[3];
3096             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
3097                 /* The low part of the operation is dead; generate the high. */
3098                 op->opc = opc = opc_new2;
3099                 op->args[0] = op->args[1];
3100                 op->args[1] = op->args[2];
3101                 op->args[2] = op->args[3];
3102             } else {
3103                 goto do_not_remove;
3104             }
3105             /* Mark the single-word operation live.  */
3106             nb_oargs = 1;
3107             goto do_not_remove;
3108 
3109         default:
3110             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
3111             nb_iargs = def->nb_iargs;
3112             nb_oargs = def->nb_oargs;
3113 
3114             /* Test if the operation can be removed because all
3115                its outputs are dead. We assume that nb_oargs == 0
3116                implies side effects */
3117             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
3118                 for (i = 0; i < nb_oargs; i++) {
3119                     if (arg_temp(op->args[i])->state != TS_DEAD) {
3120                         goto do_not_remove;
3121                     }
3122                 }
3123                 goto do_remove;
3124             }
3125             goto do_not_remove;
3126 
3127         do_remove:
3128             tcg_op_remove(s, op);
3129             break;
3130 
3131         do_not_remove:
3132             for (i = 0; i < nb_oargs; i++) {
3133                 ts = arg_temp(op->args[i]);
3134 
3135                 /* Remember the preference of the uses that followed.  */
3136                 op->output_pref[i] = *la_temp_pref(ts);
3137 
3138                 /* Output args are dead.  */
3139                 if (ts->state & TS_DEAD) {
3140                     arg_life |= DEAD_ARG << i;
3141                 }
3142                 if (ts->state & TS_MEM) {
3143                     arg_life |= SYNC_ARG << i;
3144                 }
3145                 ts->state = TS_DEAD;
3146                 la_reset_pref(ts);
3147             }
3148 
3149             /* If end of basic block, update.  */
3150             if (def->flags & TCG_OPF_BB_EXIT) {
3151                 la_func_end(s, nb_globals, nb_temps);
3152             } else if (def->flags & TCG_OPF_COND_BRANCH) {
3153                 la_bb_sync(s, nb_globals, nb_temps);
3154             } else if (def->flags & TCG_OPF_BB_END) {
3155                 la_bb_end(s, nb_globals, nb_temps);
3156             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3157                 la_global_sync(s, nb_globals);
3158                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
3159                     la_cross_call(s, nb_temps);
3160                 }
3161             }
3162 
3163             /* Record arguments that die in this opcode.  */
3164             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3165                 ts = arg_temp(op->args[i]);
3166                 if (ts->state & TS_DEAD) {
3167                     arg_life |= DEAD_ARG << i;
3168                 }
3169             }
3170 
3171             /* Input arguments are live for preceding opcodes.  */
3172             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3173                 ts = arg_temp(op->args[i]);
3174                 if (ts->state & TS_DEAD) {
3175                     /* For operands that were dead, initially allow
3176                        all regs for the type.  */
3177                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
3178                     ts->state &= ~TS_DEAD;
3179                 }
3180             }
3181 
3182             /* Incorporate constraints for this operand.  */
3183             switch (opc) {
3184             case INDEX_op_mov_i32:
3185             case INDEX_op_mov_i64:
3186                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
3187                    have proper constraints.  That said, special case
3188                    moves to propagate preferences backward.  */
3189                 if (IS_DEAD_ARG(1)) {
3190                     *la_temp_pref(arg_temp(op->args[0]))
3191                         = *la_temp_pref(arg_temp(op->args[1]));
3192                 }
3193                 break;
3194 
3195             default:
3196                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3197                     const TCGArgConstraint *ct = &def->args_ct[i];
3198                     TCGRegSet set, *pset;
3199 
3200                     ts = arg_temp(op->args[i]);
3201                     pset = la_temp_pref(ts);
3202                     set = *pset;
3203 
3204                     set &= ct->regs;
3205                     if (ct->ialias) {
3206                         set &= op->output_pref[ct->alias_index];
3207                     }
3208                     /* If the combination is not possible, restart.  */
3209                     if (set == 0) {
3210                         set = ct->regs;
3211                     }
3212                     *pset = set;
3213                 }
3214                 break;
3215             }
3216             break;
3217         }
3218         op->life = arg_life;
3219     }
3220 }
3221 
3222 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
3223 static bool liveness_pass_2(TCGContext *s)
3224 {
3225     int nb_globals = s->nb_globals;
3226     int nb_temps, i;
3227     bool changes = false;
3228     TCGOp *op, *op_next;
3229 
3230     /* Create a temporary for each indirect global.  */
3231     for (i = 0; i < nb_globals; ++i) {
3232         TCGTemp *its = &s->temps[i];
3233         if (its->indirect_reg) {
3234             TCGTemp *dts = tcg_temp_alloc(s);
3235             dts->type = its->type;
3236             dts->base_type = its->base_type;
3237             its->state_ptr = dts;
3238         } else {
3239             its->state_ptr = NULL;
3240         }
3241         /* All globals begin dead.  */
3242         its->state = TS_DEAD;
3243     }
3244     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
3245         TCGTemp *its = &s->temps[i];
3246         its->state_ptr = NULL;
3247         its->state = TS_DEAD;
3248     }
3249 
3250     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
3251         TCGOpcode opc = op->opc;
3252         const TCGOpDef *def = &tcg_op_defs[opc];
3253         TCGLifeData arg_life = op->life;
3254         int nb_iargs, nb_oargs, call_flags;
3255         TCGTemp *arg_ts, *dir_ts;
3256 
3257         if (opc == INDEX_op_call) {
3258             nb_oargs = TCGOP_CALLO(op);
3259             nb_iargs = TCGOP_CALLI(op);
3260             call_flags = op->args[nb_oargs + nb_iargs + 1];
3261         } else {
3262             nb_iargs = def->nb_iargs;
3263             nb_oargs = def->nb_oargs;
3264 
3265             /* Set flags similar to how calls require.  */
3266             if (def->flags & TCG_OPF_COND_BRANCH) {
3267                 /* Like reading globals: sync_globals */
3268                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3269             } else if (def->flags & TCG_OPF_BB_END) {
3270                 /* Like writing globals: save_globals */
3271                 call_flags = 0;
3272             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3273                 /* Like reading globals: sync_globals */
3274                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
3275             } else {
3276                 /* No effect on globals.  */
3277                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
3278                               TCG_CALL_NO_WRITE_GLOBALS);
3279             }
3280         }
3281 
3282         /* Make sure that input arguments are available.  */
3283         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3284             arg_ts = arg_temp(op->args[i]);
3285             if (arg_ts) {
3286                 dir_ts = arg_ts->state_ptr;
3287                 if (dir_ts && arg_ts->state == TS_DEAD) {
3288                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
3289                                       ? INDEX_op_ld_i32
3290                                       : INDEX_op_ld_i64);
3291                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
3292 
3293                     lop->args[0] = temp_arg(dir_ts);
3294                     lop->args[1] = temp_arg(arg_ts->mem_base);
3295                     lop->args[2] = arg_ts->mem_offset;
3296 
3297                     /* Loaded, but synced with memory.  */
3298                     arg_ts->state = TS_MEM;
3299                 }
3300             }
3301         }
3302 
3303         /* Perform input replacement, and mark inputs that became dead.
3304            No action is required except keeping temp_state up to date
3305            so that we reload when needed.  */
3306         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3307             arg_ts = arg_temp(op->args[i]);
3308             if (arg_ts) {
3309                 dir_ts = arg_ts->state_ptr;
3310                 if (dir_ts) {
3311                     op->args[i] = temp_arg(dir_ts);
3312                     changes = true;
3313                     if (IS_DEAD_ARG(i)) {
3314                         arg_ts->state = TS_DEAD;
3315                     }
3316                 }
3317             }
3318         }
3319 
3320         /* Liveness analysis should ensure that the following are
3321            all correct, for call sites and basic block end points.  */
3322         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
3323             /* Nothing to do */
3324         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
3325             for (i = 0; i < nb_globals; ++i) {
3326                 /* Liveness should see that globals are synced back,
3327                    that is, either TS_DEAD or TS_MEM.  */
3328                 arg_ts = &s->temps[i];
3329                 tcg_debug_assert(arg_ts->state_ptr == 0
3330                                  || arg_ts->state != 0);
3331             }
3332         } else {
3333             for (i = 0; i < nb_globals; ++i) {
3334                 /* Liveness should see that globals are saved back,
3335                    that is, TS_DEAD, waiting to be reloaded.  */
3336                 arg_ts = &s->temps[i];
3337                 tcg_debug_assert(arg_ts->state_ptr == 0
3338                                  || arg_ts->state == TS_DEAD);
3339             }
3340         }
3341 
3342         /* Outputs become available.  */
3343         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
3344             arg_ts = arg_temp(op->args[0]);
3345             dir_ts = arg_ts->state_ptr;
3346             if (dir_ts) {
3347                 op->args[0] = temp_arg(dir_ts);
3348                 changes = true;
3349 
3350                 /* The output is now live and modified.  */
3351                 arg_ts->state = 0;
3352 
3353                 if (NEED_SYNC_ARG(0)) {
3354                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3355                                       ? INDEX_op_st_i32
3356                                       : INDEX_op_st_i64);
3357                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3358                     TCGTemp *out_ts = dir_ts;
3359 
3360                     if (IS_DEAD_ARG(0)) {
3361                         out_ts = arg_temp(op->args[1]);
3362                         arg_ts->state = TS_DEAD;
3363                         tcg_op_remove(s, op);
3364                     } else {
3365                         arg_ts->state = TS_MEM;
3366                     }
3367 
3368                     sop->args[0] = temp_arg(out_ts);
3369                     sop->args[1] = temp_arg(arg_ts->mem_base);
3370                     sop->args[2] = arg_ts->mem_offset;
3371                 } else {
3372                     tcg_debug_assert(!IS_DEAD_ARG(0));
3373                 }
3374             }
3375         } else {
3376             for (i = 0; i < nb_oargs; i++) {
3377                 arg_ts = arg_temp(op->args[i]);
3378                 dir_ts = arg_ts->state_ptr;
3379                 if (!dir_ts) {
3380                     continue;
3381                 }
3382                 op->args[i] = temp_arg(dir_ts);
3383                 changes = true;
3384 
3385                 /* The output is now live and modified.  */
3386                 arg_ts->state = 0;
3387 
3388                 /* Sync outputs upon their last write.  */
3389                 if (NEED_SYNC_ARG(i)) {
3390                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3391                                       ? INDEX_op_st_i32
3392                                       : INDEX_op_st_i64);
3393                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3394 
3395                     sop->args[0] = temp_arg(dir_ts);
3396                     sop->args[1] = temp_arg(arg_ts->mem_base);
3397                     sop->args[2] = arg_ts->mem_offset;
3398 
3399                     arg_ts->state = TS_MEM;
3400                 }
3401                 /* Drop outputs that are dead.  */
3402                 if (IS_DEAD_ARG(i)) {
3403                     arg_ts->state = TS_DEAD;
3404                 }
3405             }
3406         }
3407     }
3408 
3409     return changes;
3410 }
3411 
3412 #ifdef CONFIG_DEBUG_TCG
3413 static void dump_regs(TCGContext *s)
3414 {
3415     TCGTemp *ts;
3416     int i;
3417     char buf[64];
3418 
3419     for(i = 0; i < s->nb_temps; i++) {
3420         ts = &s->temps[i];
3421         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3422         switch(ts->val_type) {
3423         case TEMP_VAL_REG:
3424             printf("%s", tcg_target_reg_names[ts->reg]);
3425             break;
3426         case TEMP_VAL_MEM:
3427             printf("%d(%s)", (int)ts->mem_offset,
3428                    tcg_target_reg_names[ts->mem_base->reg]);
3429             break;
3430         case TEMP_VAL_CONST:
3431             printf("$0x%" PRIx64, ts->val);
3432             break;
3433         case TEMP_VAL_DEAD:
3434             printf("D");
3435             break;
3436         default:
3437             printf("???");
3438             break;
3439         }
3440         printf("\n");
3441     }
3442 
3443     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3444         if (s->reg_to_temp[i] != NULL) {
3445             printf("%s: %s\n",
3446                    tcg_target_reg_names[i],
3447                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3448         }
3449     }
3450 }
3451 
3452 static void check_regs(TCGContext *s)
3453 {
3454     int reg;
3455     int k;
3456     TCGTemp *ts;
3457     char buf[64];
3458 
3459     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3460         ts = s->reg_to_temp[reg];
3461         if (ts != NULL) {
3462             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3463                 printf("Inconsistency for register %s:\n",
3464                        tcg_target_reg_names[reg]);
3465                 goto fail;
3466             }
3467         }
3468     }
3469     for (k = 0; k < s->nb_temps; k++) {
3470         ts = &s->temps[k];
3471         if (ts->val_type == TEMP_VAL_REG
3472             && ts->kind != TEMP_FIXED
3473             && s->reg_to_temp[ts->reg] != ts) {
3474             printf("Inconsistency for temp %s:\n",
3475                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3476         fail:
3477             printf("reg state:\n");
3478             dump_regs(s);
3479             tcg_abort();
3480         }
3481     }
3482 }
3483 #endif
3484 
3485 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3486 {
3487 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3488     /* Sparc64 stack is accessed with offset of 2047 */
3489     s->current_frame_offset = (s->current_frame_offset +
3490                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3491         ~(sizeof(tcg_target_long) - 1);
3492 #endif
3493     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3494         s->frame_end) {
3495         tcg_abort();
3496     }
3497     ts->mem_offset = s->current_frame_offset;
3498     ts->mem_base = s->frame_temp;
3499     ts->mem_allocated = 1;
3500     s->current_frame_offset += sizeof(tcg_target_long);
3501 }
3502 
3503 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3504 
3505 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3506    mark it free; otherwise mark it dead.  */
3507 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3508 {
3509     TCGTempVal new_type;
3510 
3511     switch (ts->kind) {
3512     case TEMP_FIXED:
3513         return;
3514     case TEMP_GLOBAL:
3515     case TEMP_LOCAL:
3516         new_type = TEMP_VAL_MEM;
3517         break;
3518     case TEMP_NORMAL:
3519         new_type = free_or_dead < 0 ? TEMP_VAL_MEM : TEMP_VAL_DEAD;
3520         break;
3521     case TEMP_CONST:
3522         new_type = TEMP_VAL_CONST;
3523         break;
3524     default:
3525         g_assert_not_reached();
3526     }
3527     if (ts->val_type == TEMP_VAL_REG) {
3528         s->reg_to_temp[ts->reg] = NULL;
3529     }
3530     ts->val_type = new_type;
3531 }
3532 
3533 /* Mark a temporary as dead.  */
3534 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3535 {
3536     temp_free_or_dead(s, ts, 1);
3537 }
3538 
3539 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3540    registers needs to be allocated to store a constant.  If 'free_or_dead'
3541    is non-zero, subsequently release the temporary; if it is positive, the
3542    temp is dead; if it is negative, the temp is free.  */
3543 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3544                       TCGRegSet preferred_regs, int free_or_dead)
3545 {
3546     if (!temp_readonly(ts) && !ts->mem_coherent) {
3547         if (!ts->mem_allocated) {
3548             temp_allocate_frame(s, ts);
3549         }
3550         switch (ts->val_type) {
3551         case TEMP_VAL_CONST:
3552             /* If we're going to free the temp immediately, then we won't
3553                require it later in a register, so attempt to store the
3554                constant to memory directly.  */
3555             if (free_or_dead
3556                 && tcg_out_sti(s, ts->type, ts->val,
3557                                ts->mem_base->reg, ts->mem_offset)) {
3558                 break;
3559             }
3560             temp_load(s, ts, tcg_target_available_regs[ts->type],
3561                       allocated_regs, preferred_regs);
3562             /* fallthrough */
3563 
3564         case TEMP_VAL_REG:
3565             tcg_out_st(s, ts->type, ts->reg,
3566                        ts->mem_base->reg, ts->mem_offset);
3567             break;
3568 
3569         case TEMP_VAL_MEM:
3570             break;
3571 
3572         case TEMP_VAL_DEAD:
3573         default:
3574             tcg_abort();
3575         }
3576         ts->mem_coherent = 1;
3577     }
3578     if (free_or_dead) {
3579         temp_free_or_dead(s, ts, free_or_dead);
3580     }
3581 }
3582 
3583 /* free register 'reg' by spilling the corresponding temporary if necessary */
3584 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3585 {
3586     TCGTemp *ts = s->reg_to_temp[reg];
3587     if (ts != NULL) {
3588         temp_sync(s, ts, allocated_regs, 0, -1);
3589     }
3590 }
3591 
3592 /**
3593  * tcg_reg_alloc:
3594  * @required_regs: Set of registers in which we must allocate.
3595  * @allocated_regs: Set of registers which must be avoided.
3596  * @preferred_regs: Set of registers we should prefer.
3597  * @rev: True if we search the registers in "indirect" order.
3598  *
3599  * The allocated register must be in @required_regs & ~@allocated_regs,
3600  * but if we can put it in @preferred_regs we may save a move later.
3601  */
3602 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3603                             TCGRegSet allocated_regs,
3604                             TCGRegSet preferred_regs, bool rev)
3605 {
3606     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3607     TCGRegSet reg_ct[2];
3608     const int *order;
3609 
3610     reg_ct[1] = required_regs & ~allocated_regs;
3611     tcg_debug_assert(reg_ct[1] != 0);
3612     reg_ct[0] = reg_ct[1] & preferred_regs;
3613 
3614     /* Skip the preferred_regs option if it cannot be satisfied,
3615        or if the preference made no difference.  */
3616     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3617 
3618     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3619 
3620     /* Try free registers, preferences first.  */
3621     for (j = f; j < 2; j++) {
3622         TCGRegSet set = reg_ct[j];
3623 
3624         if (tcg_regset_single(set)) {
3625             /* One register in the set.  */
3626             TCGReg reg = tcg_regset_first(set);
3627             if (s->reg_to_temp[reg] == NULL) {
3628                 return reg;
3629             }
3630         } else {
3631             for (i = 0; i < n; i++) {
3632                 TCGReg reg = order[i];
3633                 if (s->reg_to_temp[reg] == NULL &&
3634                     tcg_regset_test_reg(set, reg)) {
3635                     return reg;
3636                 }
3637             }
3638         }
3639     }
3640 
3641     /* We must spill something.  */
3642     for (j = f; j < 2; j++) {
3643         TCGRegSet set = reg_ct[j];
3644 
3645         if (tcg_regset_single(set)) {
3646             /* One register in the set.  */
3647             TCGReg reg = tcg_regset_first(set);
3648             tcg_reg_free(s, reg, allocated_regs);
3649             return reg;
3650         } else {
3651             for (i = 0; i < n; i++) {
3652                 TCGReg reg = order[i];
3653                 if (tcg_regset_test_reg(set, reg)) {
3654                     tcg_reg_free(s, reg, allocated_regs);
3655                     return reg;
3656                 }
3657             }
3658         }
3659     }
3660 
3661     tcg_abort();
3662 }
3663 
3664 /* Make sure the temporary is in a register.  If needed, allocate the register
3665    from DESIRED while avoiding ALLOCATED.  */
3666 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3667                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3668 {
3669     TCGReg reg;
3670 
3671     switch (ts->val_type) {
3672     case TEMP_VAL_REG:
3673         return;
3674     case TEMP_VAL_CONST:
3675         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3676                             preferred_regs, ts->indirect_base);
3677         if (ts->type <= TCG_TYPE_I64) {
3678             tcg_out_movi(s, ts->type, reg, ts->val);
3679         } else {
3680             uint64_t val = ts->val;
3681             MemOp vece = MO_64;
3682 
3683             /*
3684              * Find the minimal vector element that matches the constant.
3685              * The targets will, in general, have to do this search anyway,
3686              * do this generically.
3687              */
3688             if (val == dup_const(MO_8, val)) {
3689                 vece = MO_8;
3690             } else if (val == dup_const(MO_16, val)) {
3691                 vece = MO_16;
3692             } else if (val == dup_const(MO_32, val)) {
3693                 vece = MO_32;
3694             }
3695 
3696             tcg_out_dupi_vec(s, ts->type, vece, reg, ts->val);
3697         }
3698         ts->mem_coherent = 0;
3699         break;
3700     case TEMP_VAL_MEM:
3701         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3702                             preferred_regs, ts->indirect_base);
3703         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3704         ts->mem_coherent = 1;
3705         break;
3706     case TEMP_VAL_DEAD:
3707     default:
3708         tcg_abort();
3709     }
3710     ts->reg = reg;
3711     ts->val_type = TEMP_VAL_REG;
3712     s->reg_to_temp[reg] = ts;
3713 }
3714 
3715 /* Save a temporary to memory. 'allocated_regs' is used in case a
3716    temporary registers needs to be allocated to store a constant.  */
3717 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3718 {
3719     /* The liveness analysis already ensures that globals are back
3720        in memory. Keep an tcg_debug_assert for safety. */
3721     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || temp_readonly(ts));
3722 }
3723 
3724 /* save globals to their canonical location and assume they can be
3725    modified be the following code. 'allocated_regs' is used in case a
3726    temporary registers needs to be allocated to store a constant. */
3727 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3728 {
3729     int i, n;
3730 
3731     for (i = 0, n = s->nb_globals; i < n; i++) {
3732         temp_save(s, &s->temps[i], allocated_regs);
3733     }
3734 }
3735 
3736 /* sync globals to their canonical location and assume they can be
3737    read by the following code. 'allocated_regs' is used in case a
3738    temporary registers needs to be allocated to store a constant. */
3739 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3740 {
3741     int i, n;
3742 
3743     for (i = 0, n = s->nb_globals; i < n; i++) {
3744         TCGTemp *ts = &s->temps[i];
3745         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3746                          || ts->kind == TEMP_FIXED
3747                          || ts->mem_coherent);
3748     }
3749 }
3750 
3751 /* at the end of a basic block, we assume all temporaries are dead and
3752    all globals are stored at their canonical location. */
3753 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3754 {
3755     int i;
3756 
3757     for (i = s->nb_globals; i < s->nb_temps; i++) {
3758         TCGTemp *ts = &s->temps[i];
3759 
3760         switch (ts->kind) {
3761         case TEMP_LOCAL:
3762             temp_save(s, ts, allocated_regs);
3763             break;
3764         case TEMP_NORMAL:
3765             /* The liveness analysis already ensures that temps are dead.
3766                Keep an tcg_debug_assert for safety. */
3767             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3768             break;
3769         case TEMP_CONST:
3770             /* Similarly, we should have freed any allocated register. */
3771             tcg_debug_assert(ts->val_type == TEMP_VAL_CONST);
3772             break;
3773         default:
3774             g_assert_not_reached();
3775         }
3776     }
3777 
3778     save_globals(s, allocated_regs);
3779 }
3780 
3781 /*
3782  * At a conditional branch, we assume all temporaries are dead and
3783  * all globals and local temps are synced to their location.
3784  */
3785 static void tcg_reg_alloc_cbranch(TCGContext *s, TCGRegSet allocated_regs)
3786 {
3787     sync_globals(s, allocated_regs);
3788 
3789     for (int i = s->nb_globals; i < s->nb_temps; i++) {
3790         TCGTemp *ts = &s->temps[i];
3791         /*
3792          * The liveness analysis already ensures that temps are dead.
3793          * Keep tcg_debug_asserts for safety.
3794          */
3795         switch (ts->kind) {
3796         case TEMP_LOCAL:
3797             tcg_debug_assert(ts->val_type != TEMP_VAL_REG || ts->mem_coherent);
3798             break;
3799         case TEMP_NORMAL:
3800             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3801             break;
3802         case TEMP_CONST:
3803             break;
3804         default:
3805             g_assert_not_reached();
3806         }
3807     }
3808 }
3809 
3810 /*
3811  * Specialized code generation for INDEX_op_mov_* with a constant.
3812  */
3813 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3814                                   tcg_target_ulong val, TCGLifeData arg_life,
3815                                   TCGRegSet preferred_regs)
3816 {
3817     /* ENV should not be modified.  */
3818     tcg_debug_assert(!temp_readonly(ots));
3819 
3820     /* The movi is not explicitly generated here.  */
3821     if (ots->val_type == TEMP_VAL_REG) {
3822         s->reg_to_temp[ots->reg] = NULL;
3823     }
3824     ots->val_type = TEMP_VAL_CONST;
3825     ots->val = val;
3826     ots->mem_coherent = 0;
3827     if (NEED_SYNC_ARG(0)) {
3828         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3829     } else if (IS_DEAD_ARG(0)) {
3830         temp_dead(s, ots);
3831     }
3832 }
3833 
3834 /*
3835  * Specialized code generation for INDEX_op_mov_*.
3836  */
3837 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3838 {
3839     const TCGLifeData arg_life = op->life;
3840     TCGRegSet allocated_regs, preferred_regs;
3841     TCGTemp *ts, *ots;
3842     TCGType otype, itype;
3843 
3844     allocated_regs = s->reserved_regs;
3845     preferred_regs = op->output_pref[0];
3846     ots = arg_temp(op->args[0]);
3847     ts = arg_temp(op->args[1]);
3848 
3849     /* ENV should not be modified.  */
3850     tcg_debug_assert(!temp_readonly(ots));
3851 
3852     /* Note that otype != itype for no-op truncation.  */
3853     otype = ots->type;
3854     itype = ts->type;
3855 
3856     if (ts->val_type == TEMP_VAL_CONST) {
3857         /* propagate constant or generate sti */
3858         tcg_target_ulong val = ts->val;
3859         if (IS_DEAD_ARG(1)) {
3860             temp_dead(s, ts);
3861         }
3862         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3863         return;
3864     }
3865 
3866     /* If the source value is in memory we're going to be forced
3867        to have it in a register in order to perform the copy.  Copy
3868        the SOURCE value into its own register first, that way we
3869        don't have to reload SOURCE the next time it is used. */
3870     if (ts->val_type == TEMP_VAL_MEM) {
3871         temp_load(s, ts, tcg_target_available_regs[itype],
3872                   allocated_regs, preferred_regs);
3873     }
3874 
3875     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3876     if (IS_DEAD_ARG(0)) {
3877         /* mov to a non-saved dead register makes no sense (even with
3878            liveness analysis disabled). */
3879         tcg_debug_assert(NEED_SYNC_ARG(0));
3880         if (!ots->mem_allocated) {
3881             temp_allocate_frame(s, ots);
3882         }
3883         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3884         if (IS_DEAD_ARG(1)) {
3885             temp_dead(s, ts);
3886         }
3887         temp_dead(s, ots);
3888     } else {
3889         if (IS_DEAD_ARG(1) && ts->kind != TEMP_FIXED) {
3890             /* the mov can be suppressed */
3891             if (ots->val_type == TEMP_VAL_REG) {
3892                 s->reg_to_temp[ots->reg] = NULL;
3893             }
3894             ots->reg = ts->reg;
3895             temp_dead(s, ts);
3896         } else {
3897             if (ots->val_type != TEMP_VAL_REG) {
3898                 /* When allocating a new register, make sure to not spill the
3899                    input one. */
3900                 tcg_regset_set_reg(allocated_regs, ts->reg);
3901                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3902                                          allocated_regs, preferred_regs,
3903                                          ots->indirect_base);
3904             }
3905             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3906                 /*
3907                  * Cross register class move not supported.
3908                  * Store the source register into the destination slot
3909                  * and leave the destination temp as TEMP_VAL_MEM.
3910                  */
3911                 assert(!temp_readonly(ots));
3912                 if (!ts->mem_allocated) {
3913                     temp_allocate_frame(s, ots);
3914                 }
3915                 tcg_out_st(s, ts->type, ts->reg,
3916                            ots->mem_base->reg, ots->mem_offset);
3917                 ots->mem_coherent = 1;
3918                 temp_free_or_dead(s, ots, -1);
3919                 return;
3920             }
3921         }
3922         ots->val_type = TEMP_VAL_REG;
3923         ots->mem_coherent = 0;
3924         s->reg_to_temp[ots->reg] = ots;
3925         if (NEED_SYNC_ARG(0)) {
3926             temp_sync(s, ots, allocated_regs, 0, 0);
3927         }
3928     }
3929 }
3930 
3931 /*
3932  * Specialized code generation for INDEX_op_dup_vec.
3933  */
3934 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3935 {
3936     const TCGLifeData arg_life = op->life;
3937     TCGRegSet dup_out_regs, dup_in_regs;
3938     TCGTemp *its, *ots;
3939     TCGType itype, vtype;
3940     intptr_t endian_fixup;
3941     unsigned vece;
3942     bool ok;
3943 
3944     ots = arg_temp(op->args[0]);
3945     its = arg_temp(op->args[1]);
3946 
3947     /* ENV should not be modified.  */
3948     tcg_debug_assert(!temp_readonly(ots));
3949 
3950     itype = its->type;
3951     vece = TCGOP_VECE(op);
3952     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3953 
3954     if (its->val_type == TEMP_VAL_CONST) {
3955         /* Propagate constant via movi -> dupi.  */
3956         tcg_target_ulong val = its->val;
3957         if (IS_DEAD_ARG(1)) {
3958             temp_dead(s, its);
3959         }
3960         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3961         return;
3962     }
3963 
3964     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3965     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3966 
3967     /* Allocate the output register now.  */
3968     if (ots->val_type != TEMP_VAL_REG) {
3969         TCGRegSet allocated_regs = s->reserved_regs;
3970 
3971         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3972             /* Make sure to not spill the input register. */
3973             tcg_regset_set_reg(allocated_regs, its->reg);
3974         }
3975         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3976                                  op->output_pref[0], ots->indirect_base);
3977         ots->val_type = TEMP_VAL_REG;
3978         ots->mem_coherent = 0;
3979         s->reg_to_temp[ots->reg] = ots;
3980     }
3981 
3982     switch (its->val_type) {
3983     case TEMP_VAL_REG:
3984         /*
3985          * The dup constriaints must be broad, covering all possible VECE.
3986          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3987          * to fail, indicating that extra moves are required for that case.
3988          */
3989         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3990             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3991                 goto done;
3992             }
3993             /* Try again from memory or a vector input register.  */
3994         }
3995         if (!its->mem_coherent) {
3996             /*
3997              * The input register is not synced, and so an extra store
3998              * would be required to use memory.  Attempt an integer-vector
3999              * register move first.  We do not have a TCGRegSet for this.
4000              */
4001             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
4002                 break;
4003             }
4004             /* Sync the temp back to its slot and load from there.  */
4005             temp_sync(s, its, s->reserved_regs, 0, 0);
4006         }
4007         /* fall through */
4008 
4009     case TEMP_VAL_MEM:
4010 #ifdef HOST_WORDS_BIGENDIAN
4011         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
4012         endian_fixup -= 1 << vece;
4013 #else
4014         endian_fixup = 0;
4015 #endif
4016         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
4017                              its->mem_offset + endian_fixup)) {
4018             goto done;
4019         }
4020         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
4021         break;
4022 
4023     default:
4024         g_assert_not_reached();
4025     }
4026 
4027     /* We now have a vector input register, so dup must succeed. */
4028     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
4029     tcg_debug_assert(ok);
4030 
4031  done:
4032     if (IS_DEAD_ARG(1)) {
4033         temp_dead(s, its);
4034     }
4035     if (NEED_SYNC_ARG(0)) {
4036         temp_sync(s, ots, s->reserved_regs, 0, 0);
4037     }
4038     if (IS_DEAD_ARG(0)) {
4039         temp_dead(s, ots);
4040     }
4041 }
4042 
4043 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
4044 {
4045     const TCGLifeData arg_life = op->life;
4046     const TCGOpDef * const def = &tcg_op_defs[op->opc];
4047     TCGRegSet i_allocated_regs;
4048     TCGRegSet o_allocated_regs;
4049     int i, k, nb_iargs, nb_oargs;
4050     TCGReg reg;
4051     TCGArg arg;
4052     const TCGArgConstraint *arg_ct;
4053     TCGTemp *ts;
4054     TCGArg new_args[TCG_MAX_OP_ARGS];
4055     int const_args[TCG_MAX_OP_ARGS];
4056 
4057     nb_oargs = def->nb_oargs;
4058     nb_iargs = def->nb_iargs;
4059 
4060     /* copy constants */
4061     memcpy(new_args + nb_oargs + nb_iargs,
4062            op->args + nb_oargs + nb_iargs,
4063            sizeof(TCGArg) * def->nb_cargs);
4064 
4065     i_allocated_regs = s->reserved_regs;
4066     o_allocated_regs = s->reserved_regs;
4067 
4068     /* satisfy input constraints */
4069     for (k = 0; k < nb_iargs; k++) {
4070         TCGRegSet i_preferred_regs, o_preferred_regs;
4071 
4072         i = def->args_ct[nb_oargs + k].sort_index;
4073         arg = op->args[i];
4074         arg_ct = &def->args_ct[i];
4075         ts = arg_temp(arg);
4076 
4077         if (ts->val_type == TEMP_VAL_CONST
4078             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
4079             /* constant is OK for instruction */
4080             const_args[i] = 1;
4081             new_args[i] = ts->val;
4082             continue;
4083         }
4084 
4085         i_preferred_regs = o_preferred_regs = 0;
4086         if (arg_ct->ialias) {
4087             o_preferred_regs = op->output_pref[arg_ct->alias_index];
4088 
4089             /*
4090              * If the input is readonly, then it cannot also be an
4091              * output and aliased to itself.  If the input is not
4092              * dead after the instruction, we must allocate a new
4093              * register and move it.
4094              */
4095             if (temp_readonly(ts) || !IS_DEAD_ARG(i)) {
4096                 goto allocate_in_reg;
4097             }
4098 
4099             /*
4100              * Check if the current register has already been allocated
4101              * for another input aliased to an output.
4102              */
4103             if (ts->val_type == TEMP_VAL_REG) {
4104                 reg = ts->reg;
4105                 for (int k2 = 0; k2 < k; k2++) {
4106                     int i2 = def->args_ct[nb_oargs + k2].sort_index;
4107                     if (def->args_ct[i2].ialias && reg == new_args[i2]) {
4108                         goto allocate_in_reg;
4109                     }
4110                 }
4111             }
4112             i_preferred_regs = o_preferred_regs;
4113         }
4114 
4115         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
4116         reg = ts->reg;
4117 
4118         if (!tcg_regset_test_reg(arg_ct->regs, reg)) {
4119  allocate_in_reg:
4120             /*
4121              * Allocate a new register matching the constraint
4122              * and move the temporary register into it.
4123              */
4124             temp_load(s, ts, tcg_target_available_regs[ts->type],
4125                       i_allocated_regs, 0);
4126             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
4127                                 o_preferred_regs, ts->indirect_base);
4128             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4129                 /*
4130                  * Cross register class move not supported.  Sync the
4131                  * temp back to its slot and load from there.
4132                  */
4133                 temp_sync(s, ts, i_allocated_regs, 0, 0);
4134                 tcg_out_ld(s, ts->type, reg,
4135                            ts->mem_base->reg, ts->mem_offset);
4136             }
4137         }
4138         new_args[i] = reg;
4139         const_args[i] = 0;
4140         tcg_regset_set_reg(i_allocated_regs, reg);
4141     }
4142 
4143     /* mark dead temporaries and free the associated registers */
4144     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
4145         if (IS_DEAD_ARG(i)) {
4146             temp_dead(s, arg_temp(op->args[i]));
4147         }
4148     }
4149 
4150     if (def->flags & TCG_OPF_COND_BRANCH) {
4151         tcg_reg_alloc_cbranch(s, i_allocated_regs);
4152     } else if (def->flags & TCG_OPF_BB_END) {
4153         tcg_reg_alloc_bb_end(s, i_allocated_regs);
4154     } else {
4155         if (def->flags & TCG_OPF_CALL_CLOBBER) {
4156             /* XXX: permit generic clobber register list ? */
4157             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4158                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4159                     tcg_reg_free(s, i, i_allocated_regs);
4160                 }
4161             }
4162         }
4163         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
4164             /* sync globals if the op has side effects and might trigger
4165                an exception. */
4166             sync_globals(s, i_allocated_regs);
4167         }
4168 
4169         /* satisfy the output constraints */
4170         for(k = 0; k < nb_oargs; k++) {
4171             i = def->args_ct[k].sort_index;
4172             arg = op->args[i];
4173             arg_ct = &def->args_ct[i];
4174             ts = arg_temp(arg);
4175 
4176             /* ENV should not be modified.  */
4177             tcg_debug_assert(!temp_readonly(ts));
4178 
4179             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
4180                 reg = new_args[arg_ct->alias_index];
4181             } else if (arg_ct->newreg) {
4182                 reg = tcg_reg_alloc(s, arg_ct->regs,
4183                                     i_allocated_regs | o_allocated_regs,
4184                                     op->output_pref[k], ts->indirect_base);
4185             } else {
4186                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
4187                                     op->output_pref[k], ts->indirect_base);
4188             }
4189             tcg_regset_set_reg(o_allocated_regs, reg);
4190             if (ts->val_type == TEMP_VAL_REG) {
4191                 s->reg_to_temp[ts->reg] = NULL;
4192             }
4193             ts->val_type = TEMP_VAL_REG;
4194             ts->reg = reg;
4195             /*
4196              * Temp value is modified, so the value kept in memory is
4197              * potentially not the same.
4198              */
4199             ts->mem_coherent = 0;
4200             s->reg_to_temp[reg] = ts;
4201             new_args[i] = reg;
4202         }
4203     }
4204 
4205     /* emit instruction */
4206     if (def->flags & TCG_OPF_VECTOR) {
4207         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
4208                        new_args, const_args);
4209     } else {
4210         tcg_out_op(s, op->opc, new_args, const_args);
4211     }
4212 
4213     /* move the outputs in the correct register if needed */
4214     for(i = 0; i < nb_oargs; i++) {
4215         ts = arg_temp(op->args[i]);
4216 
4217         /* ENV should not be modified.  */
4218         tcg_debug_assert(!temp_readonly(ts));
4219 
4220         if (NEED_SYNC_ARG(i)) {
4221             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
4222         } else if (IS_DEAD_ARG(i)) {
4223             temp_dead(s, ts);
4224         }
4225     }
4226 }
4227 
4228 static bool tcg_reg_alloc_dup2(TCGContext *s, const TCGOp *op)
4229 {
4230     const TCGLifeData arg_life = op->life;
4231     TCGTemp *ots, *itsl, *itsh;
4232     TCGType vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
4233 
4234     /* This opcode is only valid for 32-bit hosts, for 64-bit elements. */
4235     tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
4236     tcg_debug_assert(TCGOP_VECE(op) == MO_64);
4237 
4238     ots = arg_temp(op->args[0]);
4239     itsl = arg_temp(op->args[1]);
4240     itsh = arg_temp(op->args[2]);
4241 
4242     /* ENV should not be modified.  */
4243     tcg_debug_assert(!temp_readonly(ots));
4244 
4245     /* Allocate the output register now.  */
4246     if (ots->val_type != TEMP_VAL_REG) {
4247         TCGRegSet allocated_regs = s->reserved_regs;
4248         TCGRegSet dup_out_regs =
4249             tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
4250 
4251         /* Make sure to not spill the input registers. */
4252         if (!IS_DEAD_ARG(1) && itsl->val_type == TEMP_VAL_REG) {
4253             tcg_regset_set_reg(allocated_regs, itsl->reg);
4254         }
4255         if (!IS_DEAD_ARG(2) && itsh->val_type == TEMP_VAL_REG) {
4256             tcg_regset_set_reg(allocated_regs, itsh->reg);
4257         }
4258 
4259         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
4260                                  op->output_pref[0], ots->indirect_base);
4261         ots->val_type = TEMP_VAL_REG;
4262         ots->mem_coherent = 0;
4263         s->reg_to_temp[ots->reg] = ots;
4264     }
4265 
4266     /* Promote dup2 of immediates to dupi_vec. */
4267     if (itsl->val_type == TEMP_VAL_CONST && itsh->val_type == TEMP_VAL_CONST) {
4268         uint64_t val = deposit64(itsl->val, 32, 32, itsh->val);
4269         MemOp vece = MO_64;
4270 
4271         if (val == dup_const(MO_8, val)) {
4272             vece = MO_8;
4273         } else if (val == dup_const(MO_16, val)) {
4274             vece = MO_16;
4275         } else if (val == dup_const(MO_32, val)) {
4276             vece = MO_32;
4277         }
4278 
4279         tcg_out_dupi_vec(s, vtype, vece, ots->reg, val);
4280         goto done;
4281     }
4282 
4283     /* If the two inputs form one 64-bit value, try dupm_vec. */
4284     if (itsl + 1 == itsh && itsl->base_type == TCG_TYPE_I64) {
4285         if (!itsl->mem_coherent) {
4286             temp_sync(s, itsl, s->reserved_regs, 0, 0);
4287         }
4288         if (!itsh->mem_coherent) {
4289             temp_sync(s, itsh, s->reserved_regs, 0, 0);
4290         }
4291 #ifdef HOST_WORDS_BIGENDIAN
4292         TCGTemp *its = itsh;
4293 #else
4294         TCGTemp *its = itsl;
4295 #endif
4296         if (tcg_out_dupm_vec(s, vtype, MO_64, ots->reg,
4297                              its->mem_base->reg, its->mem_offset)) {
4298             goto done;
4299         }
4300     }
4301 
4302     /* Fall back to generic expansion. */
4303     return false;
4304 
4305  done:
4306     if (IS_DEAD_ARG(1)) {
4307         temp_dead(s, itsl);
4308     }
4309     if (IS_DEAD_ARG(2)) {
4310         temp_dead(s, itsh);
4311     }
4312     if (NEED_SYNC_ARG(0)) {
4313         temp_sync(s, ots, s->reserved_regs, 0, IS_DEAD_ARG(0));
4314     } else if (IS_DEAD_ARG(0)) {
4315         temp_dead(s, ots);
4316     }
4317     return true;
4318 }
4319 
4320 #ifdef TCG_TARGET_STACK_GROWSUP
4321 #define STACK_DIR(x) (-(x))
4322 #else
4323 #define STACK_DIR(x) (x)
4324 #endif
4325 
4326 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
4327 {
4328     const int nb_oargs = TCGOP_CALLO(op);
4329     const int nb_iargs = TCGOP_CALLI(op);
4330     const TCGLifeData arg_life = op->life;
4331     int flags, nb_regs, i;
4332     TCGReg reg;
4333     TCGArg arg;
4334     TCGTemp *ts;
4335     intptr_t stack_offset;
4336     size_t call_stack_size;
4337     tcg_insn_unit *func_addr;
4338     int allocate_args;
4339     TCGRegSet allocated_regs;
4340 
4341     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
4342     flags = op->args[nb_oargs + nb_iargs + 1];
4343 
4344     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
4345     if (nb_regs > nb_iargs) {
4346         nb_regs = nb_iargs;
4347     }
4348 
4349     /* assign stack slots first */
4350     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
4351     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
4352         ~(TCG_TARGET_STACK_ALIGN - 1);
4353     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
4354     if (allocate_args) {
4355         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
4356            preallocate call stack */
4357         tcg_abort();
4358     }
4359 
4360     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
4361     for (i = nb_regs; i < nb_iargs; i++) {
4362         arg = op->args[nb_oargs + i];
4363 #ifdef TCG_TARGET_STACK_GROWSUP
4364         stack_offset -= sizeof(tcg_target_long);
4365 #endif
4366         if (arg != TCG_CALL_DUMMY_ARG) {
4367             ts = arg_temp(arg);
4368             temp_load(s, ts, tcg_target_available_regs[ts->type],
4369                       s->reserved_regs, 0);
4370             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
4371         }
4372 #ifndef TCG_TARGET_STACK_GROWSUP
4373         stack_offset += sizeof(tcg_target_long);
4374 #endif
4375     }
4376 
4377     /* assign input registers */
4378     allocated_regs = s->reserved_regs;
4379     for (i = 0; i < nb_regs; i++) {
4380         arg = op->args[nb_oargs + i];
4381         if (arg != TCG_CALL_DUMMY_ARG) {
4382             ts = arg_temp(arg);
4383             reg = tcg_target_call_iarg_regs[i];
4384 
4385             if (ts->val_type == TEMP_VAL_REG) {
4386                 if (ts->reg != reg) {
4387                     tcg_reg_free(s, reg, allocated_regs);
4388                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
4389                         /*
4390                          * Cross register class move not supported.  Sync the
4391                          * temp back to its slot and load from there.
4392                          */
4393                         temp_sync(s, ts, allocated_regs, 0, 0);
4394                         tcg_out_ld(s, ts->type, reg,
4395                                    ts->mem_base->reg, ts->mem_offset);
4396                     }
4397                 }
4398             } else {
4399                 TCGRegSet arg_set = 0;
4400 
4401                 tcg_reg_free(s, reg, allocated_regs);
4402                 tcg_regset_set_reg(arg_set, reg);
4403                 temp_load(s, ts, arg_set, allocated_regs, 0);
4404             }
4405 
4406             tcg_regset_set_reg(allocated_regs, reg);
4407         }
4408     }
4409 
4410     /* mark dead temporaries and free the associated registers */
4411     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
4412         if (IS_DEAD_ARG(i)) {
4413             temp_dead(s, arg_temp(op->args[i]));
4414         }
4415     }
4416 
4417     /* clobber call registers */
4418     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
4419         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
4420             tcg_reg_free(s, i, allocated_regs);
4421         }
4422     }
4423 
4424     /* Save globals if they might be written by the helper, sync them if
4425        they might be read. */
4426     if (flags & TCG_CALL_NO_READ_GLOBALS) {
4427         /* Nothing to do */
4428     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
4429         sync_globals(s, allocated_regs);
4430     } else {
4431         save_globals(s, allocated_regs);
4432     }
4433 
4434     tcg_out_call(s, func_addr);
4435 
4436     /* assign output registers and emit moves if needed */
4437     for(i = 0; i < nb_oargs; i++) {
4438         arg = op->args[i];
4439         ts = arg_temp(arg);
4440 
4441         /* ENV should not be modified.  */
4442         tcg_debug_assert(!temp_readonly(ts));
4443 
4444         reg = tcg_target_call_oarg_regs[i];
4445         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
4446         if (ts->val_type == TEMP_VAL_REG) {
4447             s->reg_to_temp[ts->reg] = NULL;
4448         }
4449         ts->val_type = TEMP_VAL_REG;
4450         ts->reg = reg;
4451         ts->mem_coherent = 0;
4452         s->reg_to_temp[reg] = ts;
4453         if (NEED_SYNC_ARG(i)) {
4454             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
4455         } else if (IS_DEAD_ARG(i)) {
4456             temp_dead(s, ts);
4457         }
4458     }
4459 }
4460 
4461 #ifdef CONFIG_PROFILER
4462 
4463 /* avoid copy/paste errors */
4464 #define PROF_ADD(to, from, field)                       \
4465     do {                                                \
4466         (to)->field += qatomic_read(&((from)->field));  \
4467     } while (0)
4468 
4469 #define PROF_MAX(to, from, field)                                       \
4470     do {                                                                \
4471         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
4472         if (val__ > (to)->field) {                                      \
4473             (to)->field = val__;                                        \
4474         }                                                               \
4475     } while (0)
4476 
4477 /* Pass in a zero'ed @prof */
4478 static inline
4479 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
4480 {
4481     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4482     unsigned int i;
4483 
4484     for (i = 0; i < n_ctxs; i++) {
4485         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4486         const TCGProfile *orig = &s->prof;
4487 
4488         if (counters) {
4489             PROF_ADD(prof, orig, cpu_exec_time);
4490             PROF_ADD(prof, orig, tb_count1);
4491             PROF_ADD(prof, orig, tb_count);
4492             PROF_ADD(prof, orig, op_count);
4493             PROF_MAX(prof, orig, op_count_max);
4494             PROF_ADD(prof, orig, temp_count);
4495             PROF_MAX(prof, orig, temp_count_max);
4496             PROF_ADD(prof, orig, del_op_count);
4497             PROF_ADD(prof, orig, code_in_len);
4498             PROF_ADD(prof, orig, code_out_len);
4499             PROF_ADD(prof, orig, search_out_len);
4500             PROF_ADD(prof, orig, interm_time);
4501             PROF_ADD(prof, orig, code_time);
4502             PROF_ADD(prof, orig, la_time);
4503             PROF_ADD(prof, orig, opt_time);
4504             PROF_ADD(prof, orig, restore_count);
4505             PROF_ADD(prof, orig, restore_time);
4506         }
4507         if (table) {
4508             int i;
4509 
4510             for (i = 0; i < NB_OPS; i++) {
4511                 PROF_ADD(prof, orig, table_op_count[i]);
4512             }
4513         }
4514     }
4515 }
4516 
4517 #undef PROF_ADD
4518 #undef PROF_MAX
4519 
4520 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4521 {
4522     tcg_profile_snapshot(prof, true, false);
4523 }
4524 
4525 static void tcg_profile_snapshot_table(TCGProfile *prof)
4526 {
4527     tcg_profile_snapshot(prof, false, true);
4528 }
4529 
4530 void tcg_dump_op_count(void)
4531 {
4532     TCGProfile prof = {};
4533     int i;
4534 
4535     tcg_profile_snapshot_table(&prof);
4536     for (i = 0; i < NB_OPS; i++) {
4537         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4538                     prof.table_op_count[i]);
4539     }
4540 }
4541 
4542 int64_t tcg_cpu_exec_time(void)
4543 {
4544     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4545     unsigned int i;
4546     int64_t ret = 0;
4547 
4548     for (i = 0; i < n_ctxs; i++) {
4549         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4550         const TCGProfile *prof = &s->prof;
4551 
4552         ret += qatomic_read(&prof->cpu_exec_time);
4553     }
4554     return ret;
4555 }
4556 #else
4557 void tcg_dump_op_count(void)
4558 {
4559     qemu_printf("[TCG profiler not compiled]\n");
4560 }
4561 
4562 int64_t tcg_cpu_exec_time(void)
4563 {
4564     error_report("%s: TCG profiler not compiled", __func__);
4565     exit(EXIT_FAILURE);
4566 }
4567 #endif
4568 
4569 
4570 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4571 {
4572 #ifdef CONFIG_PROFILER
4573     TCGProfile *prof = &s->prof;
4574 #endif
4575     int i, num_insns;
4576     TCGOp *op;
4577 
4578 #ifdef CONFIG_PROFILER
4579     {
4580         int n = 0;
4581 
4582         QTAILQ_FOREACH(op, &s->ops, link) {
4583             n++;
4584         }
4585         qatomic_set(&prof->op_count, prof->op_count + n);
4586         if (n > prof->op_count_max) {
4587             qatomic_set(&prof->op_count_max, n);
4588         }
4589 
4590         n = s->nb_temps;
4591         qatomic_set(&prof->temp_count, prof->temp_count + n);
4592         if (n > prof->temp_count_max) {
4593             qatomic_set(&prof->temp_count_max, n);
4594         }
4595     }
4596 #endif
4597 
4598 #ifdef DEBUG_DISAS
4599     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4600                  && qemu_log_in_addr_range(tb->pc))) {
4601         FILE *logfile = qemu_log_lock();
4602         qemu_log("OP:\n");
4603         tcg_dump_ops(s, false);
4604         qemu_log("\n");
4605         qemu_log_unlock(logfile);
4606     }
4607 #endif
4608 
4609 #ifdef CONFIG_DEBUG_TCG
4610     /* Ensure all labels referenced have been emitted.  */
4611     {
4612         TCGLabel *l;
4613         bool error = false;
4614 
4615         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4616             if (unlikely(!l->present) && l->refs) {
4617                 qemu_log_mask(CPU_LOG_TB_OP,
4618                               "$L%d referenced but not present.\n", l->id);
4619                 error = true;
4620             }
4621         }
4622         assert(!error);
4623     }
4624 #endif
4625 
4626 #ifdef CONFIG_PROFILER
4627     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4628 #endif
4629 
4630 #ifdef USE_TCG_OPTIMIZATIONS
4631     tcg_optimize(s);
4632 #endif
4633 
4634 #ifdef CONFIG_PROFILER
4635     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4636     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4637 #endif
4638 
4639     reachable_code_pass(s);
4640     liveness_pass_1(s);
4641 
4642     if (s->nb_indirects > 0) {
4643 #ifdef DEBUG_DISAS
4644         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4645                      && qemu_log_in_addr_range(tb->pc))) {
4646             FILE *logfile = qemu_log_lock();
4647             qemu_log("OP before indirect lowering:\n");
4648             tcg_dump_ops(s, false);
4649             qemu_log("\n");
4650             qemu_log_unlock(logfile);
4651         }
4652 #endif
4653         /* Replace indirect temps with direct temps.  */
4654         if (liveness_pass_2(s)) {
4655             /* If changes were made, re-run liveness.  */
4656             liveness_pass_1(s);
4657         }
4658     }
4659 
4660 #ifdef CONFIG_PROFILER
4661     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4662 #endif
4663 
4664 #ifdef DEBUG_DISAS
4665     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4666                  && qemu_log_in_addr_range(tb->pc))) {
4667         FILE *logfile = qemu_log_lock();
4668         qemu_log("OP after optimization and liveness analysis:\n");
4669         tcg_dump_ops(s, true);
4670         qemu_log("\n");
4671         qemu_log_unlock(logfile);
4672     }
4673 #endif
4674 
4675     tcg_reg_alloc_start(s);
4676 
4677     /*
4678      * Reset the buffer pointers when restarting after overflow.
4679      * TODO: Move this into translate-all.c with the rest of the
4680      * buffer management.  Having only this done here is confusing.
4681      */
4682     s->code_buf = tcg_splitwx_to_rw(tb->tc.ptr);
4683     s->code_ptr = s->code_buf;
4684 
4685 #ifdef TCG_TARGET_NEED_LDST_LABELS
4686     QSIMPLEQ_INIT(&s->ldst_labels);
4687 #endif
4688 #ifdef TCG_TARGET_NEED_POOL_LABELS
4689     s->pool_labels = NULL;
4690 #endif
4691 
4692     num_insns = -1;
4693     QTAILQ_FOREACH(op, &s->ops, link) {
4694         TCGOpcode opc = op->opc;
4695 
4696 #ifdef CONFIG_PROFILER
4697         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4698 #endif
4699 
4700         switch (opc) {
4701         case INDEX_op_mov_i32:
4702         case INDEX_op_mov_i64:
4703         case INDEX_op_mov_vec:
4704             tcg_reg_alloc_mov(s, op);
4705             break;
4706         case INDEX_op_dup_vec:
4707             tcg_reg_alloc_dup(s, op);
4708             break;
4709         case INDEX_op_insn_start:
4710             if (num_insns >= 0) {
4711                 size_t off = tcg_current_code_size(s);
4712                 s->gen_insn_end_off[num_insns] = off;
4713                 /* Assert that we do not overflow our stored offset.  */
4714                 assert(s->gen_insn_end_off[num_insns] == off);
4715             }
4716             num_insns++;
4717             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4718                 target_ulong a;
4719 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4720                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4721 #else
4722                 a = op->args[i];
4723 #endif
4724                 s->gen_insn_data[num_insns][i] = a;
4725             }
4726             break;
4727         case INDEX_op_discard:
4728             temp_dead(s, arg_temp(op->args[0]));
4729             break;
4730         case INDEX_op_set_label:
4731             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4732             tcg_out_label(s, arg_label(op->args[0]));
4733             break;
4734         case INDEX_op_call:
4735             tcg_reg_alloc_call(s, op);
4736             break;
4737         case INDEX_op_dup2_vec:
4738             if (tcg_reg_alloc_dup2(s, op)) {
4739                 break;
4740             }
4741             /* fall through */
4742         default:
4743             /* Sanity check that we've not introduced any unhandled opcodes. */
4744             tcg_debug_assert(tcg_op_supported(opc));
4745             /* Note: in order to speed up the code, it would be much
4746                faster to have specialized register allocator functions for
4747                some common argument patterns */
4748             tcg_reg_alloc_op(s, op);
4749             break;
4750         }
4751 #ifdef CONFIG_DEBUG_TCG
4752         check_regs(s);
4753 #endif
4754         /* Test for (pending) buffer overflow.  The assumption is that any
4755            one operation beginning below the high water mark cannot overrun
4756            the buffer completely.  Thus we can test for overflow after
4757            generating code without having to check during generation.  */
4758         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4759             return -1;
4760         }
4761         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4762         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4763             return -2;
4764         }
4765     }
4766     tcg_debug_assert(num_insns >= 0);
4767     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4768 
4769     /* Generate TB finalization at the end of block */
4770 #ifdef TCG_TARGET_NEED_LDST_LABELS
4771     i = tcg_out_ldst_finalize(s);
4772     if (i < 0) {
4773         return i;
4774     }
4775 #endif
4776 #ifdef TCG_TARGET_NEED_POOL_LABELS
4777     i = tcg_out_pool_finalize(s);
4778     if (i < 0) {
4779         return i;
4780     }
4781 #endif
4782     if (!tcg_resolve_relocs(s)) {
4783         return -2;
4784     }
4785 
4786 #ifndef CONFIG_TCG_INTERPRETER
4787     /* flush instruction cache */
4788     flush_idcache_range((uintptr_t)tcg_splitwx_to_rx(s->code_buf),
4789                         (uintptr_t)s->code_buf,
4790                         tcg_ptr_byte_diff(s->code_ptr, s->code_buf));
4791 #endif
4792 
4793     return tcg_current_code_size(s);
4794 }
4795 
4796 #ifdef CONFIG_PROFILER
4797 void tcg_dump_info(void)
4798 {
4799     TCGProfile prof = {};
4800     const TCGProfile *s;
4801     int64_t tb_count;
4802     int64_t tb_div_count;
4803     int64_t tot;
4804 
4805     tcg_profile_snapshot_counters(&prof);
4806     s = &prof;
4807     tb_count = s->tb_count;
4808     tb_div_count = tb_count ? tb_count : 1;
4809     tot = s->interm_time + s->code_time;
4810 
4811     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4812                 tot, tot / 2.4e9);
4813     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4814                 " %0.1f%%)\n",
4815                 tb_count, s->tb_count1 - tb_count,
4816                 (double)(s->tb_count1 - s->tb_count)
4817                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4818     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4819                 (double)s->op_count / tb_div_count, s->op_count_max);
4820     qemu_printf("deleted ops/TB      %0.2f\n",
4821                 (double)s->del_op_count / tb_div_count);
4822     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4823                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4824     qemu_printf("avg host code/TB    %0.1f\n",
4825                 (double)s->code_out_len / tb_div_count);
4826     qemu_printf("avg search data/TB  %0.1f\n",
4827                 (double)s->search_out_len / tb_div_count);
4828 
4829     qemu_printf("cycles/op           %0.1f\n",
4830                 s->op_count ? (double)tot / s->op_count : 0);
4831     qemu_printf("cycles/in byte      %0.1f\n",
4832                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4833     qemu_printf("cycles/out byte     %0.1f\n",
4834                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4835     qemu_printf("cycles/search byte     %0.1f\n",
4836                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4837     if (tot == 0) {
4838         tot = 1;
4839     }
4840     qemu_printf("  gen_interm time   %0.1f%%\n",
4841                 (double)s->interm_time / tot * 100.0);
4842     qemu_printf("  gen_code time     %0.1f%%\n",
4843                 (double)s->code_time / tot * 100.0);
4844     qemu_printf("optim./code time    %0.1f%%\n",
4845                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4846                 * 100.0);
4847     qemu_printf("liveness/code time  %0.1f%%\n",
4848                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4849     qemu_printf("cpu_restore count   %" PRId64 "\n",
4850                 s->restore_count);
4851     qemu_printf("  avg cycles        %0.1f\n",
4852                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4853 }
4854 #else
4855 void tcg_dump_info(void)
4856 {
4857     qemu_printf("[TCG profiler not compiled]\n");
4858 }
4859 #endif
4860 
4861 #ifdef ELF_HOST_MACHINE
4862 /* In order to use this feature, the backend needs to do three things:
4863 
4864    (1) Define ELF_HOST_MACHINE to indicate both what value to
4865        put into the ELF image and to indicate support for the feature.
4866 
4867    (2) Define tcg_register_jit.  This should create a buffer containing
4868        the contents of a .debug_frame section that describes the post-
4869        prologue unwind info for the tcg machine.
4870 
4871    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4872 */
4873 
4874 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4875 typedef enum {
4876     JIT_NOACTION = 0,
4877     JIT_REGISTER_FN,
4878     JIT_UNREGISTER_FN
4879 } jit_actions_t;
4880 
4881 struct jit_code_entry {
4882     struct jit_code_entry *next_entry;
4883     struct jit_code_entry *prev_entry;
4884     const void *symfile_addr;
4885     uint64_t symfile_size;
4886 };
4887 
4888 struct jit_descriptor {
4889     uint32_t version;
4890     uint32_t action_flag;
4891     struct jit_code_entry *relevant_entry;
4892     struct jit_code_entry *first_entry;
4893 };
4894 
4895 void __jit_debug_register_code(void) __attribute__((noinline));
4896 void __jit_debug_register_code(void)
4897 {
4898     asm("");
4899 }
4900 
4901 /* Must statically initialize the version, because GDB may check
4902    the version before we can set it.  */
4903 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4904 
4905 /* End GDB interface.  */
4906 
4907 static int find_string(const char *strtab, const char *str)
4908 {
4909     const char *p = strtab + 1;
4910 
4911     while (1) {
4912         if (strcmp(p, str) == 0) {
4913             return p - strtab;
4914         }
4915         p += strlen(p) + 1;
4916     }
4917 }
4918 
4919 static void tcg_register_jit_int(const void *buf_ptr, size_t buf_size,
4920                                  const void *debug_frame,
4921                                  size_t debug_frame_size)
4922 {
4923     struct __attribute__((packed)) DebugInfo {
4924         uint32_t  len;
4925         uint16_t  version;
4926         uint32_t  abbrev;
4927         uint8_t   ptr_size;
4928         uint8_t   cu_die;
4929         uint16_t  cu_lang;
4930         uintptr_t cu_low_pc;
4931         uintptr_t cu_high_pc;
4932         uint8_t   fn_die;
4933         char      fn_name[16];
4934         uintptr_t fn_low_pc;
4935         uintptr_t fn_high_pc;
4936         uint8_t   cu_eoc;
4937     };
4938 
4939     struct ElfImage {
4940         ElfW(Ehdr) ehdr;
4941         ElfW(Phdr) phdr;
4942         ElfW(Shdr) shdr[7];
4943         ElfW(Sym)  sym[2];
4944         struct DebugInfo di;
4945         uint8_t    da[24];
4946         char       str[80];
4947     };
4948 
4949     struct ElfImage *img;
4950 
4951     static const struct ElfImage img_template = {
4952         .ehdr = {
4953             .e_ident[EI_MAG0] = ELFMAG0,
4954             .e_ident[EI_MAG1] = ELFMAG1,
4955             .e_ident[EI_MAG2] = ELFMAG2,
4956             .e_ident[EI_MAG3] = ELFMAG3,
4957             .e_ident[EI_CLASS] = ELF_CLASS,
4958             .e_ident[EI_DATA] = ELF_DATA,
4959             .e_ident[EI_VERSION] = EV_CURRENT,
4960             .e_type = ET_EXEC,
4961             .e_machine = ELF_HOST_MACHINE,
4962             .e_version = EV_CURRENT,
4963             .e_phoff = offsetof(struct ElfImage, phdr),
4964             .e_shoff = offsetof(struct ElfImage, shdr),
4965             .e_ehsize = sizeof(ElfW(Shdr)),
4966             .e_phentsize = sizeof(ElfW(Phdr)),
4967             .e_phnum = 1,
4968             .e_shentsize = sizeof(ElfW(Shdr)),
4969             .e_shnum = ARRAY_SIZE(img->shdr),
4970             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4971 #ifdef ELF_HOST_FLAGS
4972             .e_flags = ELF_HOST_FLAGS,
4973 #endif
4974 #ifdef ELF_OSABI
4975             .e_ident[EI_OSABI] = ELF_OSABI,
4976 #endif
4977         },
4978         .phdr = {
4979             .p_type = PT_LOAD,
4980             .p_flags = PF_X,
4981         },
4982         .shdr = {
4983             [0] = { .sh_type = SHT_NULL },
4984             /* Trick: The contents of code_gen_buffer are not present in
4985                this fake ELF file; that got allocated elsewhere.  Therefore
4986                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4987                will not look for contents.  We can record any address.  */
4988             [1] = { /* .text */
4989                 .sh_type = SHT_NOBITS,
4990                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4991             },
4992             [2] = { /* .debug_info */
4993                 .sh_type = SHT_PROGBITS,
4994                 .sh_offset = offsetof(struct ElfImage, di),
4995                 .sh_size = sizeof(struct DebugInfo),
4996             },
4997             [3] = { /* .debug_abbrev */
4998                 .sh_type = SHT_PROGBITS,
4999                 .sh_offset = offsetof(struct ElfImage, da),
5000                 .sh_size = sizeof(img->da),
5001             },
5002             [4] = { /* .debug_frame */
5003                 .sh_type = SHT_PROGBITS,
5004                 .sh_offset = sizeof(struct ElfImage),
5005             },
5006             [5] = { /* .symtab */
5007                 .sh_type = SHT_SYMTAB,
5008                 .sh_offset = offsetof(struct ElfImage, sym),
5009                 .sh_size = sizeof(img->sym),
5010                 .sh_info = 1,
5011                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
5012                 .sh_entsize = sizeof(ElfW(Sym)),
5013             },
5014             [6] = { /* .strtab */
5015                 .sh_type = SHT_STRTAB,
5016                 .sh_offset = offsetof(struct ElfImage, str),
5017                 .sh_size = sizeof(img->str),
5018             }
5019         },
5020         .sym = {
5021             [1] = { /* code_gen_buffer */
5022                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
5023                 .st_shndx = 1,
5024             }
5025         },
5026         .di = {
5027             .len = sizeof(struct DebugInfo) - 4,
5028             .version = 2,
5029             .ptr_size = sizeof(void *),
5030             .cu_die = 1,
5031             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
5032             .fn_die = 2,
5033             .fn_name = "code_gen_buffer"
5034         },
5035         .da = {
5036             1,          /* abbrev number (the cu) */
5037             0x11, 1,    /* DW_TAG_compile_unit, has children */
5038             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
5039             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5040             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5041             0, 0,       /* end of abbrev */
5042             2,          /* abbrev number (the fn) */
5043             0x2e, 0,    /* DW_TAG_subprogram, no children */
5044             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
5045             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
5046             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
5047             0, 0,       /* end of abbrev */
5048             0           /* no more abbrev */
5049         },
5050         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
5051                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
5052     };
5053 
5054     /* We only need a single jit entry; statically allocate it.  */
5055     static struct jit_code_entry one_entry;
5056 
5057     uintptr_t buf = (uintptr_t)buf_ptr;
5058     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
5059     DebugFrameHeader *dfh;
5060 
5061     img = g_malloc(img_size);
5062     *img = img_template;
5063 
5064     img->phdr.p_vaddr = buf;
5065     img->phdr.p_paddr = buf;
5066     img->phdr.p_memsz = buf_size;
5067 
5068     img->shdr[1].sh_name = find_string(img->str, ".text");
5069     img->shdr[1].sh_addr = buf;
5070     img->shdr[1].sh_size = buf_size;
5071 
5072     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
5073     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
5074 
5075     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
5076     img->shdr[4].sh_size = debug_frame_size;
5077 
5078     img->shdr[5].sh_name = find_string(img->str, ".symtab");
5079     img->shdr[6].sh_name = find_string(img->str, ".strtab");
5080 
5081     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
5082     img->sym[1].st_value = buf;
5083     img->sym[1].st_size = buf_size;
5084 
5085     img->di.cu_low_pc = buf;
5086     img->di.cu_high_pc = buf + buf_size;
5087     img->di.fn_low_pc = buf;
5088     img->di.fn_high_pc = buf + buf_size;
5089 
5090     dfh = (DebugFrameHeader *)(img + 1);
5091     memcpy(dfh, debug_frame, debug_frame_size);
5092     dfh->fde.func_start = buf;
5093     dfh->fde.func_len = buf_size;
5094 
5095 #ifdef DEBUG_JIT
5096     /* Enable this block to be able to debug the ELF image file creation.
5097        One can use readelf, objdump, or other inspection utilities.  */
5098     {
5099         FILE *f = fopen("/tmp/qemu.jit", "w+b");
5100         if (f) {
5101             if (fwrite(img, img_size, 1, f) != img_size) {
5102                 /* Avoid stupid unused return value warning for fwrite.  */
5103             }
5104             fclose(f);
5105         }
5106     }
5107 #endif
5108 
5109     one_entry.symfile_addr = img;
5110     one_entry.symfile_size = img_size;
5111 
5112     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
5113     __jit_debug_descriptor.relevant_entry = &one_entry;
5114     __jit_debug_descriptor.first_entry = &one_entry;
5115     __jit_debug_register_code();
5116 }
5117 #else
5118 /* No support for the feature.  Provide the entry point expected by exec.c,
5119    and implement the internal function we declared earlier.  */
5120 
5121 static void tcg_register_jit_int(const void *buf, size_t size,
5122                                  const void *debug_frame,
5123                                  size_t debug_frame_size)
5124 {
5125 }
5126 
5127 void tcg_register_jit(const void *buf, size_t buf_size)
5128 {
5129 }
5130 #endif /* ELF_HOST_MACHINE */
5131 
5132 #if !TCG_TARGET_MAYBE_vec
5133 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
5134 {
5135     g_assert_not_reached();
5136 }
5137 #endif
5138