xref: /qemu/tcg/tcg.c (revision b3137100)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg/tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67 
68 /* Forward declarations for functions declared in tcg-target.c.inc and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.c.inc. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106                                            const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108                        intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111                          TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113                        const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116                             TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120                              TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122                            unsigned vece, const TCGArg *args,
123                            const int *const_args);
124 #else
125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136                                     TCGReg dst, tcg_target_long arg)
137 {
138     g_assert_not_reached();
139 }
140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141                                   unsigned vece, const TCGArg *args,
142                                   const int *const_args)
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153                                   const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 #define TCG_HIGHWATER 1024
159 
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 
164 struct tcg_region_tree {
165     QemuMutex lock;
166     GTree *tree;
167     /* padding to avoid false sharing is computed at run-time */
168 };
169 
170 /*
171  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
172  * dynamically allocate from as demand dictates. Given appropriate region
173  * sizing, this minimizes flushes even when some TCG threads generate a lot
174  * more code than others.
175  */
176 struct tcg_region_state {
177     QemuMutex lock;
178 
179     /* fields set at init time */
180     void *start;
181     void *start_aligned;
182     void *end;
183     size_t n;
184     size_t size; /* size of one region */
185     size_t stride; /* .size + guard size */
186 
187     /* fields protected by the lock */
188     size_t current; /* current region index */
189     size_t agg_size_full; /* aggregate size of full regions */
190 };
191 
192 static struct tcg_region_state region;
193 /*
194  * This is an array of struct tcg_region_tree's, with padding.
195  * We use void * to simplify the computation of region_trees[i]; each
196  * struct is found every tree_size bytes.
197  */
198 static void *region_trees;
199 static size_t tree_size;
200 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
201 static TCGRegSet tcg_target_call_clobber_regs;
202 
203 #if TCG_TARGET_INSN_UNIT_SIZE == 1
204 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
205 {
206     *s->code_ptr++ = v;
207 }
208 
209 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
210                                                       uint8_t v)
211 {
212     *p = v;
213 }
214 #endif
215 
216 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
217 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
218 {
219     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
220         *s->code_ptr++ = v;
221     } else {
222         tcg_insn_unit *p = s->code_ptr;
223         memcpy(p, &v, sizeof(v));
224         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
225     }
226 }
227 
228 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
229                                                        uint16_t v)
230 {
231     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
232         *p = v;
233     } else {
234         memcpy(p, &v, sizeof(v));
235     }
236 }
237 #endif
238 
239 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
240 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
241 {
242     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
243         *s->code_ptr++ = v;
244     } else {
245         tcg_insn_unit *p = s->code_ptr;
246         memcpy(p, &v, sizeof(v));
247         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
248     }
249 }
250 
251 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
252                                                        uint32_t v)
253 {
254     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
255         *p = v;
256     } else {
257         memcpy(p, &v, sizeof(v));
258     }
259 }
260 #endif
261 
262 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
263 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
264 {
265     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
266         *s->code_ptr++ = v;
267     } else {
268         tcg_insn_unit *p = s->code_ptr;
269         memcpy(p, &v, sizeof(v));
270         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
271     }
272 }
273 
274 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
275                                                        uint64_t v)
276 {
277     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
278         *p = v;
279     } else {
280         memcpy(p, &v, sizeof(v));
281     }
282 }
283 #endif
284 
285 /* label relocation processing */
286 
287 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
288                           TCGLabel *l, intptr_t addend)
289 {
290     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
291 
292     r->type = type;
293     r->ptr = code_ptr;
294     r->addend = addend;
295     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
296 }
297 
298 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
299 {
300     tcg_debug_assert(!l->has_value);
301     l->has_value = 1;
302     l->u.value_ptr = ptr;
303 }
304 
305 TCGLabel *gen_new_label(void)
306 {
307     TCGContext *s = tcg_ctx;
308     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
309 
310     memset(l, 0, sizeof(TCGLabel));
311     l->id = s->nb_labels++;
312     QSIMPLEQ_INIT(&l->relocs);
313 
314     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
315 
316     return l;
317 }
318 
319 static bool tcg_resolve_relocs(TCGContext *s)
320 {
321     TCGLabel *l;
322 
323     QSIMPLEQ_FOREACH(l, &s->labels, next) {
324         TCGRelocation *r;
325         uintptr_t value = l->u.value;
326 
327         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
328             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
329                 return false;
330             }
331         }
332     }
333     return true;
334 }
335 
336 static void set_jmp_reset_offset(TCGContext *s, int which)
337 {
338     size_t off = tcg_current_code_size(s);
339     s->tb_jmp_reset_offset[which] = off;
340     /* Make sure that we didn't overflow the stored offset.  */
341     assert(s->tb_jmp_reset_offset[which] == off);
342 }
343 
344 #include "tcg-target.c.inc"
345 
346 /* compare a pointer @ptr and a tb_tc @s */
347 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
348 {
349     if (ptr >= s->ptr + s->size) {
350         return 1;
351     } else if (ptr < s->ptr) {
352         return -1;
353     }
354     return 0;
355 }
356 
357 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
358 {
359     const struct tb_tc *a = ap;
360     const struct tb_tc *b = bp;
361 
362     /*
363      * When both sizes are set, we know this isn't a lookup.
364      * This is the most likely case: every TB must be inserted; lookups
365      * are a lot less frequent.
366      */
367     if (likely(a->size && b->size)) {
368         if (a->ptr > b->ptr) {
369             return 1;
370         } else if (a->ptr < b->ptr) {
371             return -1;
372         }
373         /* a->ptr == b->ptr should happen only on deletions */
374         g_assert(a->size == b->size);
375         return 0;
376     }
377     /*
378      * All lookups have either .size field set to 0.
379      * From the glib sources we see that @ap is always the lookup key. However
380      * the docs provide no guarantee, so we just mark this case as likely.
381      */
382     if (likely(a->size == 0)) {
383         return ptr_cmp_tb_tc(a->ptr, b);
384     }
385     return ptr_cmp_tb_tc(b->ptr, a);
386 }
387 
388 static void tcg_region_trees_init(void)
389 {
390     size_t i;
391 
392     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
393     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
394     for (i = 0; i < region.n; i++) {
395         struct tcg_region_tree *rt = region_trees + i * tree_size;
396 
397         qemu_mutex_init(&rt->lock);
398         rt->tree = g_tree_new(tb_tc_cmp);
399     }
400 }
401 
402 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
403 {
404     size_t region_idx;
405 
406     if (p < region.start_aligned) {
407         region_idx = 0;
408     } else {
409         ptrdiff_t offset = p - region.start_aligned;
410 
411         if (offset > region.stride * (region.n - 1)) {
412             region_idx = region.n - 1;
413         } else {
414             region_idx = offset / region.stride;
415         }
416     }
417     return region_trees + region_idx * tree_size;
418 }
419 
420 void tcg_tb_insert(TranslationBlock *tb)
421 {
422     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
423 
424     qemu_mutex_lock(&rt->lock);
425     g_tree_insert(rt->tree, &tb->tc, tb);
426     qemu_mutex_unlock(&rt->lock);
427 }
428 
429 void tcg_tb_remove(TranslationBlock *tb)
430 {
431     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
432 
433     qemu_mutex_lock(&rt->lock);
434     g_tree_remove(rt->tree, &tb->tc);
435     qemu_mutex_unlock(&rt->lock);
436 }
437 
438 /*
439  * Find the TB 'tb' such that
440  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
441  * Return NULL if not found.
442  */
443 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
444 {
445     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
446     TranslationBlock *tb;
447     struct tb_tc s = { .ptr = (void *)tc_ptr };
448 
449     qemu_mutex_lock(&rt->lock);
450     tb = g_tree_lookup(rt->tree, &s);
451     qemu_mutex_unlock(&rt->lock);
452     return tb;
453 }
454 
455 static void tcg_region_tree_lock_all(void)
456 {
457     size_t i;
458 
459     for (i = 0; i < region.n; i++) {
460         struct tcg_region_tree *rt = region_trees + i * tree_size;
461 
462         qemu_mutex_lock(&rt->lock);
463     }
464 }
465 
466 static void tcg_region_tree_unlock_all(void)
467 {
468     size_t i;
469 
470     for (i = 0; i < region.n; i++) {
471         struct tcg_region_tree *rt = region_trees + i * tree_size;
472 
473         qemu_mutex_unlock(&rt->lock);
474     }
475 }
476 
477 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
478 {
479     size_t i;
480 
481     tcg_region_tree_lock_all();
482     for (i = 0; i < region.n; i++) {
483         struct tcg_region_tree *rt = region_trees + i * tree_size;
484 
485         g_tree_foreach(rt->tree, func, user_data);
486     }
487     tcg_region_tree_unlock_all();
488 }
489 
490 size_t tcg_nb_tbs(void)
491 {
492     size_t nb_tbs = 0;
493     size_t i;
494 
495     tcg_region_tree_lock_all();
496     for (i = 0; i < region.n; i++) {
497         struct tcg_region_tree *rt = region_trees + i * tree_size;
498 
499         nb_tbs += g_tree_nnodes(rt->tree);
500     }
501     tcg_region_tree_unlock_all();
502     return nb_tbs;
503 }
504 
505 static gboolean tcg_region_tree_traverse(gpointer k, gpointer v, gpointer data)
506 {
507     TranslationBlock *tb = v;
508 
509     tb_destroy(tb);
510     return FALSE;
511 }
512 
513 static void tcg_region_tree_reset_all(void)
514 {
515     size_t i;
516 
517     tcg_region_tree_lock_all();
518     for (i = 0; i < region.n; i++) {
519         struct tcg_region_tree *rt = region_trees + i * tree_size;
520 
521         g_tree_foreach(rt->tree, tcg_region_tree_traverse, NULL);
522         /* Increment the refcount first so that destroy acts as a reset */
523         g_tree_ref(rt->tree);
524         g_tree_destroy(rt->tree);
525     }
526     tcg_region_tree_unlock_all();
527 }
528 
529 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
530 {
531     void *start, *end;
532 
533     start = region.start_aligned + curr_region * region.stride;
534     end = start + region.size;
535 
536     if (curr_region == 0) {
537         start = region.start;
538     }
539     if (curr_region == region.n - 1) {
540         end = region.end;
541     }
542 
543     *pstart = start;
544     *pend = end;
545 }
546 
547 static void tcg_region_assign(TCGContext *s, size_t curr_region)
548 {
549     void *start, *end;
550 
551     tcg_region_bounds(curr_region, &start, &end);
552 
553     s->code_gen_buffer = start;
554     s->code_gen_ptr = start;
555     s->code_gen_buffer_size = end - start;
556     s->code_gen_highwater = end - TCG_HIGHWATER;
557 }
558 
559 static bool tcg_region_alloc__locked(TCGContext *s)
560 {
561     if (region.current == region.n) {
562         return true;
563     }
564     tcg_region_assign(s, region.current);
565     region.current++;
566     return false;
567 }
568 
569 /*
570  * Request a new region once the one in use has filled up.
571  * Returns true on error.
572  */
573 static bool tcg_region_alloc(TCGContext *s)
574 {
575     bool err;
576     /* read the region size now; alloc__locked will overwrite it on success */
577     size_t size_full = s->code_gen_buffer_size;
578 
579     qemu_mutex_lock(&region.lock);
580     err = tcg_region_alloc__locked(s);
581     if (!err) {
582         region.agg_size_full += size_full - TCG_HIGHWATER;
583     }
584     qemu_mutex_unlock(&region.lock);
585     return err;
586 }
587 
588 /*
589  * Perform a context's first region allocation.
590  * This function does _not_ increment region.agg_size_full.
591  */
592 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
593 {
594     return tcg_region_alloc__locked(s);
595 }
596 
597 /* Call from a safe-work context */
598 void tcg_region_reset_all(void)
599 {
600     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
601     unsigned int i;
602 
603     qemu_mutex_lock(&region.lock);
604     region.current = 0;
605     region.agg_size_full = 0;
606 
607     for (i = 0; i < n_ctxs; i++) {
608         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
609         bool err = tcg_region_initial_alloc__locked(s);
610 
611         g_assert(!err);
612     }
613     qemu_mutex_unlock(&region.lock);
614 
615     tcg_region_tree_reset_all();
616 }
617 
618 #ifdef CONFIG_USER_ONLY
619 static size_t tcg_n_regions(void)
620 {
621     return 1;
622 }
623 #else
624 /*
625  * It is likely that some vCPUs will translate more code than others, so we
626  * first try to set more regions than max_cpus, with those regions being of
627  * reasonable size. If that's not possible we make do by evenly dividing
628  * the code_gen_buffer among the vCPUs.
629  */
630 static size_t tcg_n_regions(void)
631 {
632     size_t i;
633 
634     /* Use a single region if all we have is one vCPU thread */
635 #if !defined(CONFIG_USER_ONLY)
636     MachineState *ms = MACHINE(qdev_get_machine());
637     unsigned int max_cpus = ms->smp.max_cpus;
638 #endif
639     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
640         return 1;
641     }
642 
643     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
644     for (i = 8; i > 0; i--) {
645         size_t regions_per_thread = i;
646         size_t region_size;
647 
648         region_size = tcg_init_ctx.code_gen_buffer_size;
649         region_size /= max_cpus * regions_per_thread;
650 
651         if (region_size >= 2 * 1024u * 1024) {
652             return max_cpus * regions_per_thread;
653         }
654     }
655     /* If we can't, then just allocate one region per vCPU thread */
656     return max_cpus;
657 }
658 #endif
659 
660 /*
661  * Initializes region partitioning.
662  *
663  * Called at init time from the parent thread (i.e. the one calling
664  * tcg_context_init), after the target's TCG globals have been set.
665  *
666  * Region partitioning works by splitting code_gen_buffer into separate regions,
667  * and then assigning regions to TCG threads so that the threads can translate
668  * code in parallel without synchronization.
669  *
670  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
671  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
672  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
673  * must have been parsed before calling this function, since it calls
674  * qemu_tcg_mttcg_enabled().
675  *
676  * In user-mode we use a single region.  Having multiple regions in user-mode
677  * is not supported, because the number of vCPU threads (recall that each thread
678  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
679  * OS, and usually this number is huge (tens of thousands is not uncommon).
680  * Thus, given this large bound on the number of vCPU threads and the fact
681  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
682  * that the availability of at least one region per vCPU thread.
683  *
684  * However, this user-mode limitation is unlikely to be a significant problem
685  * in practice. Multi-threaded guests share most if not all of their translated
686  * code, which makes parallel code generation less appealing than in softmmu.
687  */
688 void tcg_region_init(void)
689 {
690     void *buf = tcg_init_ctx.code_gen_buffer;
691     void *aligned;
692     size_t size = tcg_init_ctx.code_gen_buffer_size;
693     size_t page_size = qemu_real_host_page_size;
694     size_t region_size;
695     size_t n_regions;
696     size_t i;
697 
698     n_regions = tcg_n_regions();
699 
700     /* The first region will be 'aligned - buf' bytes larger than the others */
701     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
702     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
703     /*
704      * Make region_size a multiple of page_size, using aligned as the start.
705      * As a result of this we might end up with a few extra pages at the end of
706      * the buffer; we will assign those to the last region.
707      */
708     region_size = (size - (aligned - buf)) / n_regions;
709     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
710 
711     /* A region must have at least 2 pages; one code, one guard */
712     g_assert(region_size >= 2 * page_size);
713 
714     /* init the region struct */
715     qemu_mutex_init(&region.lock);
716     region.n = n_regions;
717     region.size = region_size - page_size;
718     region.stride = region_size;
719     region.start = buf;
720     region.start_aligned = aligned;
721     /* page-align the end, since its last page will be a guard page */
722     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
723     /* account for that last guard page */
724     region.end -= page_size;
725 
726     /* set guard pages */
727     for (i = 0; i < region.n; i++) {
728         void *start, *end;
729         int rc;
730 
731         tcg_region_bounds(i, &start, &end);
732         rc = qemu_mprotect_none(end, page_size);
733         g_assert(!rc);
734     }
735 
736     tcg_region_trees_init();
737 
738     /* In user-mode we support only one ctx, so do the initial allocation now */
739 #ifdef CONFIG_USER_ONLY
740     {
741         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
742 
743         g_assert(!err);
744     }
745 #endif
746 }
747 
748 static void alloc_tcg_plugin_context(TCGContext *s)
749 {
750 #ifdef CONFIG_PLUGIN
751     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
752     s->plugin_tb->insns =
753         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
754 #endif
755 }
756 
757 /*
758  * All TCG threads except the parent (i.e. the one that called tcg_context_init
759  * and registered the target's TCG globals) must register with this function
760  * before initiating translation.
761  *
762  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
763  * of tcg_region_init() for the reasoning behind this.
764  *
765  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
766  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
767  * is not used anymore for translation once this function is called.
768  *
769  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
770  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
771  */
772 #ifdef CONFIG_USER_ONLY
773 void tcg_register_thread(void)
774 {
775     tcg_ctx = &tcg_init_ctx;
776 }
777 #else
778 void tcg_register_thread(void)
779 {
780     MachineState *ms = MACHINE(qdev_get_machine());
781     TCGContext *s = g_malloc(sizeof(*s));
782     unsigned int i, n;
783     bool err;
784 
785     *s = tcg_init_ctx;
786 
787     /* Relink mem_base.  */
788     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
789         if (tcg_init_ctx.temps[i].mem_base) {
790             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
791             tcg_debug_assert(b >= 0 && b < n);
792             s->temps[i].mem_base = &s->temps[b];
793         }
794     }
795 
796     /* Claim an entry in tcg_ctxs */
797     n = qatomic_fetch_inc(&n_tcg_ctxs);
798     g_assert(n < ms->smp.max_cpus);
799     qatomic_set(&tcg_ctxs[n], s);
800 
801     if (n > 0) {
802         alloc_tcg_plugin_context(s);
803     }
804 
805     tcg_ctx = s;
806     qemu_mutex_lock(&region.lock);
807     err = tcg_region_initial_alloc__locked(tcg_ctx);
808     g_assert(!err);
809     qemu_mutex_unlock(&region.lock);
810 }
811 #endif /* !CONFIG_USER_ONLY */
812 
813 /*
814  * Returns the size (in bytes) of all translated code (i.e. from all regions)
815  * currently in the cache.
816  * See also: tcg_code_capacity()
817  * Do not confuse with tcg_current_code_size(); that one applies to a single
818  * TCG context.
819  */
820 size_t tcg_code_size(void)
821 {
822     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
823     unsigned int i;
824     size_t total;
825 
826     qemu_mutex_lock(&region.lock);
827     total = region.agg_size_full;
828     for (i = 0; i < n_ctxs; i++) {
829         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
830         size_t size;
831 
832         size = qatomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
833         g_assert(size <= s->code_gen_buffer_size);
834         total += size;
835     }
836     qemu_mutex_unlock(&region.lock);
837     return total;
838 }
839 
840 /*
841  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
842  * regions.
843  * See also: tcg_code_size()
844  */
845 size_t tcg_code_capacity(void)
846 {
847     size_t guard_size, capacity;
848 
849     /* no need for synchronization; these variables are set at init time */
850     guard_size = region.stride - region.size;
851     capacity = region.end + guard_size - region.start;
852     capacity -= region.n * (guard_size + TCG_HIGHWATER);
853     return capacity;
854 }
855 
856 size_t tcg_tb_phys_invalidate_count(void)
857 {
858     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
859     unsigned int i;
860     size_t total = 0;
861 
862     for (i = 0; i < n_ctxs; i++) {
863         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
864 
865         total += qatomic_read(&s->tb_phys_invalidate_count);
866     }
867     return total;
868 }
869 
870 /* pool based memory allocation */
871 void *tcg_malloc_internal(TCGContext *s, int size)
872 {
873     TCGPool *p;
874     int pool_size;
875 
876     if (size > TCG_POOL_CHUNK_SIZE) {
877         /* big malloc: insert a new pool (XXX: could optimize) */
878         p = g_malloc(sizeof(TCGPool) + size);
879         p->size = size;
880         p->next = s->pool_first_large;
881         s->pool_first_large = p;
882         return p->data;
883     } else {
884         p = s->pool_current;
885         if (!p) {
886             p = s->pool_first;
887             if (!p)
888                 goto new_pool;
889         } else {
890             if (!p->next) {
891             new_pool:
892                 pool_size = TCG_POOL_CHUNK_SIZE;
893                 p = g_malloc(sizeof(TCGPool) + pool_size);
894                 p->size = pool_size;
895                 p->next = NULL;
896                 if (s->pool_current)
897                     s->pool_current->next = p;
898                 else
899                     s->pool_first = p;
900             } else {
901                 p = p->next;
902             }
903         }
904     }
905     s->pool_current = p;
906     s->pool_cur = p->data + size;
907     s->pool_end = p->data + p->size;
908     return p->data;
909 }
910 
911 void tcg_pool_reset(TCGContext *s)
912 {
913     TCGPool *p, *t;
914     for (p = s->pool_first_large; p; p = t) {
915         t = p->next;
916         g_free(p);
917     }
918     s->pool_first_large = NULL;
919     s->pool_cur = s->pool_end = NULL;
920     s->pool_current = NULL;
921 }
922 
923 typedef struct TCGHelperInfo {
924     void *func;
925     const char *name;
926     unsigned flags;
927     unsigned sizemask;
928 } TCGHelperInfo;
929 
930 #include "exec/helper-proto.h"
931 
932 static const TCGHelperInfo all_helpers[] = {
933 #include "exec/helper-tcg.h"
934 };
935 static GHashTable *helper_table;
936 
937 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
938 static void process_op_defs(TCGContext *s);
939 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
940                                             TCGReg reg, const char *name);
941 
942 void tcg_context_init(TCGContext *s)
943 {
944     int op, total_args, n, i;
945     TCGOpDef *def;
946     TCGArgConstraint *args_ct;
947     TCGTemp *ts;
948 
949     memset(s, 0, sizeof(*s));
950     s->nb_globals = 0;
951 
952     /* Count total number of arguments and allocate the corresponding
953        space */
954     total_args = 0;
955     for(op = 0; op < NB_OPS; op++) {
956         def = &tcg_op_defs[op];
957         n = def->nb_iargs + def->nb_oargs;
958         total_args += n;
959     }
960 
961     args_ct = g_new0(TCGArgConstraint, total_args);
962 
963     for(op = 0; op < NB_OPS; op++) {
964         def = &tcg_op_defs[op];
965         def->args_ct = args_ct;
966         n = def->nb_iargs + def->nb_oargs;
967         args_ct += n;
968     }
969 
970     /* Register helpers.  */
971     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
972     helper_table = g_hash_table_new(NULL, NULL);
973 
974     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
975         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
976                             (gpointer)&all_helpers[i]);
977     }
978 
979     tcg_target_init(s);
980     process_op_defs(s);
981 
982     /* Reverse the order of the saved registers, assuming they're all at
983        the start of tcg_target_reg_alloc_order.  */
984     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
985         int r = tcg_target_reg_alloc_order[n];
986         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
987             break;
988         }
989     }
990     for (i = 0; i < n; ++i) {
991         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
992     }
993     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
994         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
995     }
996 
997     alloc_tcg_plugin_context(s);
998 
999     tcg_ctx = s;
1000     /*
1001      * In user-mode we simply share the init context among threads, since we
1002      * use a single region. See the documentation tcg_region_init() for the
1003      * reasoning behind this.
1004      * In softmmu we will have at most max_cpus TCG threads.
1005      */
1006 #ifdef CONFIG_USER_ONLY
1007     tcg_ctxs = &tcg_ctx;
1008     n_tcg_ctxs = 1;
1009 #else
1010     MachineState *ms = MACHINE(qdev_get_machine());
1011     unsigned int max_cpus = ms->smp.max_cpus;
1012     tcg_ctxs = g_new(TCGContext *, max_cpus);
1013 #endif
1014 
1015     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1016     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1017     cpu_env = temp_tcgv_ptr(ts);
1018 }
1019 
1020 /*
1021  * Allocate TBs right before their corresponding translated code, making
1022  * sure that TBs and code are on different cache lines.
1023  */
1024 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1025 {
1026     uintptr_t align = qemu_icache_linesize;
1027     TranslationBlock *tb;
1028     void *next;
1029 
1030  retry:
1031     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1032     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1033 
1034     if (unlikely(next > s->code_gen_highwater)) {
1035         if (tcg_region_alloc(s)) {
1036             return NULL;
1037         }
1038         goto retry;
1039     }
1040     qatomic_set(&s->code_gen_ptr, next);
1041     s->data_gen_ptr = NULL;
1042     return tb;
1043 }
1044 
1045 void tcg_prologue_init(TCGContext *s)
1046 {
1047     size_t prologue_size, total_size;
1048     void *buf0, *buf1;
1049 
1050     /* Put the prologue at the beginning of code_gen_buffer.  */
1051     buf0 = s->code_gen_buffer;
1052     total_size = s->code_gen_buffer_size;
1053     s->code_ptr = buf0;
1054     s->code_buf = buf0;
1055     s->data_gen_ptr = NULL;
1056     s->code_gen_prologue = buf0;
1057 
1058     /* Compute a high-water mark, at which we voluntarily flush the buffer
1059        and start over.  The size here is arbitrary, significantly larger
1060        than we expect the code generation for any one opcode to require.  */
1061     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1062 
1063 #ifdef TCG_TARGET_NEED_POOL_LABELS
1064     s->pool_labels = NULL;
1065 #endif
1066 
1067     /* Generate the prologue.  */
1068     tcg_target_qemu_prologue(s);
1069 
1070 #ifdef TCG_TARGET_NEED_POOL_LABELS
1071     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1072     {
1073         int result = tcg_out_pool_finalize(s);
1074         tcg_debug_assert(result == 0);
1075     }
1076 #endif
1077 
1078     buf1 = s->code_ptr;
1079     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1080 
1081     /* Deduct the prologue from the buffer.  */
1082     prologue_size = tcg_current_code_size(s);
1083     s->code_gen_ptr = buf1;
1084     s->code_gen_buffer = buf1;
1085     s->code_buf = buf1;
1086     total_size -= prologue_size;
1087     s->code_gen_buffer_size = total_size;
1088 
1089     tcg_register_jit(s->code_gen_buffer, total_size);
1090 
1091 #ifdef DEBUG_DISAS
1092     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1093         FILE *logfile = qemu_log_lock();
1094         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1095         if (s->data_gen_ptr) {
1096             size_t code_size = s->data_gen_ptr - buf0;
1097             size_t data_size = prologue_size - code_size;
1098             size_t i;
1099 
1100             log_disas(buf0, code_size);
1101 
1102             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1103                 if (sizeof(tcg_target_ulong) == 8) {
1104                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1105                              (uintptr_t)s->data_gen_ptr + i,
1106                              *(uint64_t *)(s->data_gen_ptr + i));
1107                 } else {
1108                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1109                              (uintptr_t)s->data_gen_ptr + i,
1110                              *(uint32_t *)(s->data_gen_ptr + i));
1111                 }
1112             }
1113         } else {
1114             log_disas(buf0, prologue_size);
1115         }
1116         qemu_log("\n");
1117         qemu_log_flush();
1118         qemu_log_unlock(logfile);
1119     }
1120 #endif
1121 
1122     /* Assert that goto_ptr is implemented completely.  */
1123     if (TCG_TARGET_HAS_goto_ptr) {
1124         tcg_debug_assert(s->code_gen_epilogue != NULL);
1125     }
1126 }
1127 
1128 void tcg_func_start(TCGContext *s)
1129 {
1130     tcg_pool_reset(s);
1131     s->nb_temps = s->nb_globals;
1132 
1133     /* No temps have been previously allocated for size or locality.  */
1134     memset(s->free_temps, 0, sizeof(s->free_temps));
1135 
1136     s->nb_ops = 0;
1137     s->nb_labels = 0;
1138     s->current_frame_offset = s->frame_start;
1139 
1140 #ifdef CONFIG_DEBUG_TCG
1141     s->goto_tb_issue_mask = 0;
1142 #endif
1143 
1144     QTAILQ_INIT(&s->ops);
1145     QTAILQ_INIT(&s->free_ops);
1146     QSIMPLEQ_INIT(&s->labels);
1147 }
1148 
1149 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1150 {
1151     int n = s->nb_temps++;
1152     tcg_debug_assert(n < TCG_MAX_TEMPS);
1153     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1154 }
1155 
1156 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1157 {
1158     TCGTemp *ts;
1159 
1160     tcg_debug_assert(s->nb_globals == s->nb_temps);
1161     s->nb_globals++;
1162     ts = tcg_temp_alloc(s);
1163     ts->temp_global = 1;
1164 
1165     return ts;
1166 }
1167 
1168 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1169                                             TCGReg reg, const char *name)
1170 {
1171     TCGTemp *ts;
1172 
1173     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1174         tcg_abort();
1175     }
1176 
1177     ts = tcg_global_alloc(s);
1178     ts->base_type = type;
1179     ts->type = type;
1180     ts->fixed_reg = 1;
1181     ts->reg = reg;
1182     ts->name = name;
1183     tcg_regset_set_reg(s->reserved_regs, reg);
1184 
1185     return ts;
1186 }
1187 
1188 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1189 {
1190     s->frame_start = start;
1191     s->frame_end = start + size;
1192     s->frame_temp
1193         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1194 }
1195 
1196 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1197                                      intptr_t offset, const char *name)
1198 {
1199     TCGContext *s = tcg_ctx;
1200     TCGTemp *base_ts = tcgv_ptr_temp(base);
1201     TCGTemp *ts = tcg_global_alloc(s);
1202     int indirect_reg = 0, bigendian = 0;
1203 #ifdef HOST_WORDS_BIGENDIAN
1204     bigendian = 1;
1205 #endif
1206 
1207     if (!base_ts->fixed_reg) {
1208         /* We do not support double-indirect registers.  */
1209         tcg_debug_assert(!base_ts->indirect_reg);
1210         base_ts->indirect_base = 1;
1211         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1212                             ? 2 : 1);
1213         indirect_reg = 1;
1214     }
1215 
1216     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1217         TCGTemp *ts2 = tcg_global_alloc(s);
1218         char buf[64];
1219 
1220         ts->base_type = TCG_TYPE_I64;
1221         ts->type = TCG_TYPE_I32;
1222         ts->indirect_reg = indirect_reg;
1223         ts->mem_allocated = 1;
1224         ts->mem_base = base_ts;
1225         ts->mem_offset = offset + bigendian * 4;
1226         pstrcpy(buf, sizeof(buf), name);
1227         pstrcat(buf, sizeof(buf), "_0");
1228         ts->name = strdup(buf);
1229 
1230         tcg_debug_assert(ts2 == ts + 1);
1231         ts2->base_type = TCG_TYPE_I64;
1232         ts2->type = TCG_TYPE_I32;
1233         ts2->indirect_reg = indirect_reg;
1234         ts2->mem_allocated = 1;
1235         ts2->mem_base = base_ts;
1236         ts2->mem_offset = offset + (1 - bigendian) * 4;
1237         pstrcpy(buf, sizeof(buf), name);
1238         pstrcat(buf, sizeof(buf), "_1");
1239         ts2->name = strdup(buf);
1240     } else {
1241         ts->base_type = type;
1242         ts->type = type;
1243         ts->indirect_reg = indirect_reg;
1244         ts->mem_allocated = 1;
1245         ts->mem_base = base_ts;
1246         ts->mem_offset = offset;
1247         ts->name = name;
1248     }
1249     return ts;
1250 }
1251 
1252 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1253 {
1254     TCGContext *s = tcg_ctx;
1255     TCGTemp *ts;
1256     int idx, k;
1257 
1258     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1259     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1260     if (idx < TCG_MAX_TEMPS) {
1261         /* There is already an available temp with the right type.  */
1262         clear_bit(idx, s->free_temps[k].l);
1263 
1264         ts = &s->temps[idx];
1265         ts->temp_allocated = 1;
1266         tcg_debug_assert(ts->base_type == type);
1267         tcg_debug_assert(ts->temp_local == temp_local);
1268     } else {
1269         ts = tcg_temp_alloc(s);
1270         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1271             TCGTemp *ts2 = tcg_temp_alloc(s);
1272 
1273             ts->base_type = type;
1274             ts->type = TCG_TYPE_I32;
1275             ts->temp_allocated = 1;
1276             ts->temp_local = temp_local;
1277 
1278             tcg_debug_assert(ts2 == ts + 1);
1279             ts2->base_type = TCG_TYPE_I64;
1280             ts2->type = TCG_TYPE_I32;
1281             ts2->temp_allocated = 1;
1282             ts2->temp_local = temp_local;
1283         } else {
1284             ts->base_type = type;
1285             ts->type = type;
1286             ts->temp_allocated = 1;
1287             ts->temp_local = temp_local;
1288         }
1289     }
1290 
1291 #if defined(CONFIG_DEBUG_TCG)
1292     s->temps_in_use++;
1293 #endif
1294     return ts;
1295 }
1296 
1297 TCGv_vec tcg_temp_new_vec(TCGType type)
1298 {
1299     TCGTemp *t;
1300 
1301 #ifdef CONFIG_DEBUG_TCG
1302     switch (type) {
1303     case TCG_TYPE_V64:
1304         assert(TCG_TARGET_HAS_v64);
1305         break;
1306     case TCG_TYPE_V128:
1307         assert(TCG_TARGET_HAS_v128);
1308         break;
1309     case TCG_TYPE_V256:
1310         assert(TCG_TARGET_HAS_v256);
1311         break;
1312     default:
1313         g_assert_not_reached();
1314     }
1315 #endif
1316 
1317     t = tcg_temp_new_internal(type, 0);
1318     return temp_tcgv_vec(t);
1319 }
1320 
1321 /* Create a new temp of the same type as an existing temp.  */
1322 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1323 {
1324     TCGTemp *t = tcgv_vec_temp(match);
1325 
1326     tcg_debug_assert(t->temp_allocated != 0);
1327 
1328     t = tcg_temp_new_internal(t->base_type, 0);
1329     return temp_tcgv_vec(t);
1330 }
1331 
1332 void tcg_temp_free_internal(TCGTemp *ts)
1333 {
1334     TCGContext *s = tcg_ctx;
1335     int k, idx;
1336 
1337 #if defined(CONFIG_DEBUG_TCG)
1338     s->temps_in_use--;
1339     if (s->temps_in_use < 0) {
1340         fprintf(stderr, "More temporaries freed than allocated!\n");
1341     }
1342 #endif
1343 
1344     tcg_debug_assert(ts->temp_global == 0);
1345     tcg_debug_assert(ts->temp_allocated != 0);
1346     ts->temp_allocated = 0;
1347 
1348     idx = temp_idx(ts);
1349     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1350     set_bit(idx, s->free_temps[k].l);
1351 }
1352 
1353 TCGv_i32 tcg_const_i32(int32_t val)
1354 {
1355     TCGv_i32 t0;
1356     t0 = tcg_temp_new_i32();
1357     tcg_gen_movi_i32(t0, val);
1358     return t0;
1359 }
1360 
1361 TCGv_i64 tcg_const_i64(int64_t val)
1362 {
1363     TCGv_i64 t0;
1364     t0 = tcg_temp_new_i64();
1365     tcg_gen_movi_i64(t0, val);
1366     return t0;
1367 }
1368 
1369 TCGv_i32 tcg_const_local_i32(int32_t val)
1370 {
1371     TCGv_i32 t0;
1372     t0 = tcg_temp_local_new_i32();
1373     tcg_gen_movi_i32(t0, val);
1374     return t0;
1375 }
1376 
1377 TCGv_i64 tcg_const_local_i64(int64_t val)
1378 {
1379     TCGv_i64 t0;
1380     t0 = tcg_temp_local_new_i64();
1381     tcg_gen_movi_i64(t0, val);
1382     return t0;
1383 }
1384 
1385 #if defined(CONFIG_DEBUG_TCG)
1386 void tcg_clear_temp_count(void)
1387 {
1388     TCGContext *s = tcg_ctx;
1389     s->temps_in_use = 0;
1390 }
1391 
1392 int tcg_check_temp_count(void)
1393 {
1394     TCGContext *s = tcg_ctx;
1395     if (s->temps_in_use) {
1396         /* Clear the count so that we don't give another
1397          * warning immediately next time around.
1398          */
1399         s->temps_in_use = 0;
1400         return 1;
1401     }
1402     return 0;
1403 }
1404 #endif
1405 
1406 /* Return true if OP may appear in the opcode stream.
1407    Test the runtime variable that controls each opcode.  */
1408 bool tcg_op_supported(TCGOpcode op)
1409 {
1410     const bool have_vec
1411         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1412 
1413     switch (op) {
1414     case INDEX_op_discard:
1415     case INDEX_op_set_label:
1416     case INDEX_op_call:
1417     case INDEX_op_br:
1418     case INDEX_op_mb:
1419     case INDEX_op_insn_start:
1420     case INDEX_op_exit_tb:
1421     case INDEX_op_goto_tb:
1422     case INDEX_op_qemu_ld_i32:
1423     case INDEX_op_qemu_st_i32:
1424     case INDEX_op_qemu_ld_i64:
1425     case INDEX_op_qemu_st_i64:
1426         return true;
1427 
1428     case INDEX_op_goto_ptr:
1429         return TCG_TARGET_HAS_goto_ptr;
1430 
1431     case INDEX_op_mov_i32:
1432     case INDEX_op_movi_i32:
1433     case INDEX_op_setcond_i32:
1434     case INDEX_op_brcond_i32:
1435     case INDEX_op_ld8u_i32:
1436     case INDEX_op_ld8s_i32:
1437     case INDEX_op_ld16u_i32:
1438     case INDEX_op_ld16s_i32:
1439     case INDEX_op_ld_i32:
1440     case INDEX_op_st8_i32:
1441     case INDEX_op_st16_i32:
1442     case INDEX_op_st_i32:
1443     case INDEX_op_add_i32:
1444     case INDEX_op_sub_i32:
1445     case INDEX_op_mul_i32:
1446     case INDEX_op_and_i32:
1447     case INDEX_op_or_i32:
1448     case INDEX_op_xor_i32:
1449     case INDEX_op_shl_i32:
1450     case INDEX_op_shr_i32:
1451     case INDEX_op_sar_i32:
1452         return true;
1453 
1454     case INDEX_op_movcond_i32:
1455         return TCG_TARGET_HAS_movcond_i32;
1456     case INDEX_op_div_i32:
1457     case INDEX_op_divu_i32:
1458         return TCG_TARGET_HAS_div_i32;
1459     case INDEX_op_rem_i32:
1460     case INDEX_op_remu_i32:
1461         return TCG_TARGET_HAS_rem_i32;
1462     case INDEX_op_div2_i32:
1463     case INDEX_op_divu2_i32:
1464         return TCG_TARGET_HAS_div2_i32;
1465     case INDEX_op_rotl_i32:
1466     case INDEX_op_rotr_i32:
1467         return TCG_TARGET_HAS_rot_i32;
1468     case INDEX_op_deposit_i32:
1469         return TCG_TARGET_HAS_deposit_i32;
1470     case INDEX_op_extract_i32:
1471         return TCG_TARGET_HAS_extract_i32;
1472     case INDEX_op_sextract_i32:
1473         return TCG_TARGET_HAS_sextract_i32;
1474     case INDEX_op_extract2_i32:
1475         return TCG_TARGET_HAS_extract2_i32;
1476     case INDEX_op_add2_i32:
1477         return TCG_TARGET_HAS_add2_i32;
1478     case INDEX_op_sub2_i32:
1479         return TCG_TARGET_HAS_sub2_i32;
1480     case INDEX_op_mulu2_i32:
1481         return TCG_TARGET_HAS_mulu2_i32;
1482     case INDEX_op_muls2_i32:
1483         return TCG_TARGET_HAS_muls2_i32;
1484     case INDEX_op_muluh_i32:
1485         return TCG_TARGET_HAS_muluh_i32;
1486     case INDEX_op_mulsh_i32:
1487         return TCG_TARGET_HAS_mulsh_i32;
1488     case INDEX_op_ext8s_i32:
1489         return TCG_TARGET_HAS_ext8s_i32;
1490     case INDEX_op_ext16s_i32:
1491         return TCG_TARGET_HAS_ext16s_i32;
1492     case INDEX_op_ext8u_i32:
1493         return TCG_TARGET_HAS_ext8u_i32;
1494     case INDEX_op_ext16u_i32:
1495         return TCG_TARGET_HAS_ext16u_i32;
1496     case INDEX_op_bswap16_i32:
1497         return TCG_TARGET_HAS_bswap16_i32;
1498     case INDEX_op_bswap32_i32:
1499         return TCG_TARGET_HAS_bswap32_i32;
1500     case INDEX_op_not_i32:
1501         return TCG_TARGET_HAS_not_i32;
1502     case INDEX_op_neg_i32:
1503         return TCG_TARGET_HAS_neg_i32;
1504     case INDEX_op_andc_i32:
1505         return TCG_TARGET_HAS_andc_i32;
1506     case INDEX_op_orc_i32:
1507         return TCG_TARGET_HAS_orc_i32;
1508     case INDEX_op_eqv_i32:
1509         return TCG_TARGET_HAS_eqv_i32;
1510     case INDEX_op_nand_i32:
1511         return TCG_TARGET_HAS_nand_i32;
1512     case INDEX_op_nor_i32:
1513         return TCG_TARGET_HAS_nor_i32;
1514     case INDEX_op_clz_i32:
1515         return TCG_TARGET_HAS_clz_i32;
1516     case INDEX_op_ctz_i32:
1517         return TCG_TARGET_HAS_ctz_i32;
1518     case INDEX_op_ctpop_i32:
1519         return TCG_TARGET_HAS_ctpop_i32;
1520 
1521     case INDEX_op_brcond2_i32:
1522     case INDEX_op_setcond2_i32:
1523         return TCG_TARGET_REG_BITS == 32;
1524 
1525     case INDEX_op_mov_i64:
1526     case INDEX_op_movi_i64:
1527     case INDEX_op_setcond_i64:
1528     case INDEX_op_brcond_i64:
1529     case INDEX_op_ld8u_i64:
1530     case INDEX_op_ld8s_i64:
1531     case INDEX_op_ld16u_i64:
1532     case INDEX_op_ld16s_i64:
1533     case INDEX_op_ld32u_i64:
1534     case INDEX_op_ld32s_i64:
1535     case INDEX_op_ld_i64:
1536     case INDEX_op_st8_i64:
1537     case INDEX_op_st16_i64:
1538     case INDEX_op_st32_i64:
1539     case INDEX_op_st_i64:
1540     case INDEX_op_add_i64:
1541     case INDEX_op_sub_i64:
1542     case INDEX_op_mul_i64:
1543     case INDEX_op_and_i64:
1544     case INDEX_op_or_i64:
1545     case INDEX_op_xor_i64:
1546     case INDEX_op_shl_i64:
1547     case INDEX_op_shr_i64:
1548     case INDEX_op_sar_i64:
1549     case INDEX_op_ext_i32_i64:
1550     case INDEX_op_extu_i32_i64:
1551         return TCG_TARGET_REG_BITS == 64;
1552 
1553     case INDEX_op_movcond_i64:
1554         return TCG_TARGET_HAS_movcond_i64;
1555     case INDEX_op_div_i64:
1556     case INDEX_op_divu_i64:
1557         return TCG_TARGET_HAS_div_i64;
1558     case INDEX_op_rem_i64:
1559     case INDEX_op_remu_i64:
1560         return TCG_TARGET_HAS_rem_i64;
1561     case INDEX_op_div2_i64:
1562     case INDEX_op_divu2_i64:
1563         return TCG_TARGET_HAS_div2_i64;
1564     case INDEX_op_rotl_i64:
1565     case INDEX_op_rotr_i64:
1566         return TCG_TARGET_HAS_rot_i64;
1567     case INDEX_op_deposit_i64:
1568         return TCG_TARGET_HAS_deposit_i64;
1569     case INDEX_op_extract_i64:
1570         return TCG_TARGET_HAS_extract_i64;
1571     case INDEX_op_sextract_i64:
1572         return TCG_TARGET_HAS_sextract_i64;
1573     case INDEX_op_extract2_i64:
1574         return TCG_TARGET_HAS_extract2_i64;
1575     case INDEX_op_extrl_i64_i32:
1576         return TCG_TARGET_HAS_extrl_i64_i32;
1577     case INDEX_op_extrh_i64_i32:
1578         return TCG_TARGET_HAS_extrh_i64_i32;
1579     case INDEX_op_ext8s_i64:
1580         return TCG_TARGET_HAS_ext8s_i64;
1581     case INDEX_op_ext16s_i64:
1582         return TCG_TARGET_HAS_ext16s_i64;
1583     case INDEX_op_ext32s_i64:
1584         return TCG_TARGET_HAS_ext32s_i64;
1585     case INDEX_op_ext8u_i64:
1586         return TCG_TARGET_HAS_ext8u_i64;
1587     case INDEX_op_ext16u_i64:
1588         return TCG_TARGET_HAS_ext16u_i64;
1589     case INDEX_op_ext32u_i64:
1590         return TCG_TARGET_HAS_ext32u_i64;
1591     case INDEX_op_bswap16_i64:
1592         return TCG_TARGET_HAS_bswap16_i64;
1593     case INDEX_op_bswap32_i64:
1594         return TCG_TARGET_HAS_bswap32_i64;
1595     case INDEX_op_bswap64_i64:
1596         return TCG_TARGET_HAS_bswap64_i64;
1597     case INDEX_op_not_i64:
1598         return TCG_TARGET_HAS_not_i64;
1599     case INDEX_op_neg_i64:
1600         return TCG_TARGET_HAS_neg_i64;
1601     case INDEX_op_andc_i64:
1602         return TCG_TARGET_HAS_andc_i64;
1603     case INDEX_op_orc_i64:
1604         return TCG_TARGET_HAS_orc_i64;
1605     case INDEX_op_eqv_i64:
1606         return TCG_TARGET_HAS_eqv_i64;
1607     case INDEX_op_nand_i64:
1608         return TCG_TARGET_HAS_nand_i64;
1609     case INDEX_op_nor_i64:
1610         return TCG_TARGET_HAS_nor_i64;
1611     case INDEX_op_clz_i64:
1612         return TCG_TARGET_HAS_clz_i64;
1613     case INDEX_op_ctz_i64:
1614         return TCG_TARGET_HAS_ctz_i64;
1615     case INDEX_op_ctpop_i64:
1616         return TCG_TARGET_HAS_ctpop_i64;
1617     case INDEX_op_add2_i64:
1618         return TCG_TARGET_HAS_add2_i64;
1619     case INDEX_op_sub2_i64:
1620         return TCG_TARGET_HAS_sub2_i64;
1621     case INDEX_op_mulu2_i64:
1622         return TCG_TARGET_HAS_mulu2_i64;
1623     case INDEX_op_muls2_i64:
1624         return TCG_TARGET_HAS_muls2_i64;
1625     case INDEX_op_muluh_i64:
1626         return TCG_TARGET_HAS_muluh_i64;
1627     case INDEX_op_mulsh_i64:
1628         return TCG_TARGET_HAS_mulsh_i64;
1629 
1630     case INDEX_op_mov_vec:
1631     case INDEX_op_dup_vec:
1632     case INDEX_op_dupi_vec:
1633     case INDEX_op_dupm_vec:
1634     case INDEX_op_ld_vec:
1635     case INDEX_op_st_vec:
1636     case INDEX_op_add_vec:
1637     case INDEX_op_sub_vec:
1638     case INDEX_op_and_vec:
1639     case INDEX_op_or_vec:
1640     case INDEX_op_xor_vec:
1641     case INDEX_op_cmp_vec:
1642         return have_vec;
1643     case INDEX_op_dup2_vec:
1644         return have_vec && TCG_TARGET_REG_BITS == 32;
1645     case INDEX_op_not_vec:
1646         return have_vec && TCG_TARGET_HAS_not_vec;
1647     case INDEX_op_neg_vec:
1648         return have_vec && TCG_TARGET_HAS_neg_vec;
1649     case INDEX_op_abs_vec:
1650         return have_vec && TCG_TARGET_HAS_abs_vec;
1651     case INDEX_op_andc_vec:
1652         return have_vec && TCG_TARGET_HAS_andc_vec;
1653     case INDEX_op_orc_vec:
1654         return have_vec && TCG_TARGET_HAS_orc_vec;
1655     case INDEX_op_mul_vec:
1656         return have_vec && TCG_TARGET_HAS_mul_vec;
1657     case INDEX_op_shli_vec:
1658     case INDEX_op_shri_vec:
1659     case INDEX_op_sari_vec:
1660         return have_vec && TCG_TARGET_HAS_shi_vec;
1661     case INDEX_op_shls_vec:
1662     case INDEX_op_shrs_vec:
1663     case INDEX_op_sars_vec:
1664         return have_vec && TCG_TARGET_HAS_shs_vec;
1665     case INDEX_op_shlv_vec:
1666     case INDEX_op_shrv_vec:
1667     case INDEX_op_sarv_vec:
1668         return have_vec && TCG_TARGET_HAS_shv_vec;
1669     case INDEX_op_rotli_vec:
1670         return have_vec && TCG_TARGET_HAS_roti_vec;
1671     case INDEX_op_rotls_vec:
1672         return have_vec && TCG_TARGET_HAS_rots_vec;
1673     case INDEX_op_rotlv_vec:
1674     case INDEX_op_rotrv_vec:
1675         return have_vec && TCG_TARGET_HAS_rotv_vec;
1676     case INDEX_op_ssadd_vec:
1677     case INDEX_op_usadd_vec:
1678     case INDEX_op_sssub_vec:
1679     case INDEX_op_ussub_vec:
1680         return have_vec && TCG_TARGET_HAS_sat_vec;
1681     case INDEX_op_smin_vec:
1682     case INDEX_op_umin_vec:
1683     case INDEX_op_smax_vec:
1684     case INDEX_op_umax_vec:
1685         return have_vec && TCG_TARGET_HAS_minmax_vec;
1686     case INDEX_op_bitsel_vec:
1687         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1688     case INDEX_op_cmpsel_vec:
1689         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1690 
1691     default:
1692         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1693         return true;
1694     }
1695 }
1696 
1697 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1698    and endian swap. Maybe it would be better to do the alignment
1699    and endian swap in tcg_reg_alloc_call(). */
1700 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1701 {
1702     int i, real_args, nb_rets, pi;
1703     unsigned sizemask, flags;
1704     TCGHelperInfo *info;
1705     TCGOp *op;
1706 
1707     info = g_hash_table_lookup(helper_table, (gpointer)func);
1708     flags = info->flags;
1709     sizemask = info->sizemask;
1710 
1711 #ifdef CONFIG_PLUGIN
1712     /* detect non-plugin helpers */
1713     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1714         tcg_ctx->plugin_insn->calls_helpers = true;
1715     }
1716 #endif
1717 
1718 #if defined(__sparc__) && !defined(__arch64__) \
1719     && !defined(CONFIG_TCG_INTERPRETER)
1720     /* We have 64-bit values in one register, but need to pass as two
1721        separate parameters.  Split them.  */
1722     int orig_sizemask = sizemask;
1723     int orig_nargs = nargs;
1724     TCGv_i64 retl, reth;
1725     TCGTemp *split_args[MAX_OPC_PARAM];
1726 
1727     retl = NULL;
1728     reth = NULL;
1729     if (sizemask != 0) {
1730         for (i = real_args = 0; i < nargs; ++i) {
1731             int is_64bit = sizemask & (1 << (i+1)*2);
1732             if (is_64bit) {
1733                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1734                 TCGv_i32 h = tcg_temp_new_i32();
1735                 TCGv_i32 l = tcg_temp_new_i32();
1736                 tcg_gen_extr_i64_i32(l, h, orig);
1737                 split_args[real_args++] = tcgv_i32_temp(h);
1738                 split_args[real_args++] = tcgv_i32_temp(l);
1739             } else {
1740                 split_args[real_args++] = args[i];
1741             }
1742         }
1743         nargs = real_args;
1744         args = split_args;
1745         sizemask = 0;
1746     }
1747 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1748     for (i = 0; i < nargs; ++i) {
1749         int is_64bit = sizemask & (1 << (i+1)*2);
1750         int is_signed = sizemask & (2 << (i+1)*2);
1751         if (!is_64bit) {
1752             TCGv_i64 temp = tcg_temp_new_i64();
1753             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1754             if (is_signed) {
1755                 tcg_gen_ext32s_i64(temp, orig);
1756             } else {
1757                 tcg_gen_ext32u_i64(temp, orig);
1758             }
1759             args[i] = tcgv_i64_temp(temp);
1760         }
1761     }
1762 #endif /* TCG_TARGET_EXTEND_ARGS */
1763 
1764     op = tcg_emit_op(INDEX_op_call);
1765 
1766     pi = 0;
1767     if (ret != NULL) {
1768 #if defined(__sparc__) && !defined(__arch64__) \
1769     && !defined(CONFIG_TCG_INTERPRETER)
1770         if (orig_sizemask & 1) {
1771             /* The 32-bit ABI is going to return the 64-bit value in
1772                the %o0/%o1 register pair.  Prepare for this by using
1773                two return temporaries, and reassemble below.  */
1774             retl = tcg_temp_new_i64();
1775             reth = tcg_temp_new_i64();
1776             op->args[pi++] = tcgv_i64_arg(reth);
1777             op->args[pi++] = tcgv_i64_arg(retl);
1778             nb_rets = 2;
1779         } else {
1780             op->args[pi++] = temp_arg(ret);
1781             nb_rets = 1;
1782         }
1783 #else
1784         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1785 #ifdef HOST_WORDS_BIGENDIAN
1786             op->args[pi++] = temp_arg(ret + 1);
1787             op->args[pi++] = temp_arg(ret);
1788 #else
1789             op->args[pi++] = temp_arg(ret);
1790             op->args[pi++] = temp_arg(ret + 1);
1791 #endif
1792             nb_rets = 2;
1793         } else {
1794             op->args[pi++] = temp_arg(ret);
1795             nb_rets = 1;
1796         }
1797 #endif
1798     } else {
1799         nb_rets = 0;
1800     }
1801     TCGOP_CALLO(op) = nb_rets;
1802 
1803     real_args = 0;
1804     for (i = 0; i < nargs; i++) {
1805         int is_64bit = sizemask & (1 << (i+1)*2);
1806         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1807 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1808             /* some targets want aligned 64 bit args */
1809             if (real_args & 1) {
1810                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1811                 real_args++;
1812             }
1813 #endif
1814            /* If stack grows up, then we will be placing successive
1815               arguments at lower addresses, which means we need to
1816               reverse the order compared to how we would normally
1817               treat either big or little-endian.  For those arguments
1818               that will wind up in registers, this still works for
1819               HPPA (the only current STACK_GROWSUP target) since the
1820               argument registers are *also* allocated in decreasing
1821               order.  If another such target is added, this logic may
1822               have to get more complicated to differentiate between
1823               stack arguments and register arguments.  */
1824 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1825             op->args[pi++] = temp_arg(args[i] + 1);
1826             op->args[pi++] = temp_arg(args[i]);
1827 #else
1828             op->args[pi++] = temp_arg(args[i]);
1829             op->args[pi++] = temp_arg(args[i] + 1);
1830 #endif
1831             real_args += 2;
1832             continue;
1833         }
1834 
1835         op->args[pi++] = temp_arg(args[i]);
1836         real_args++;
1837     }
1838     op->args[pi++] = (uintptr_t)func;
1839     op->args[pi++] = flags;
1840     TCGOP_CALLI(op) = real_args;
1841 
1842     /* Make sure the fields didn't overflow.  */
1843     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1844     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1845 
1846 #if defined(__sparc__) && !defined(__arch64__) \
1847     && !defined(CONFIG_TCG_INTERPRETER)
1848     /* Free all of the parts we allocated above.  */
1849     for (i = real_args = 0; i < orig_nargs; ++i) {
1850         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1851         if (is_64bit) {
1852             tcg_temp_free_internal(args[real_args++]);
1853             tcg_temp_free_internal(args[real_args++]);
1854         } else {
1855             real_args++;
1856         }
1857     }
1858     if (orig_sizemask & 1) {
1859         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1860            Note that describing these as TCGv_i64 eliminates an unnecessary
1861            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1862         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1863         tcg_temp_free_i64(retl);
1864         tcg_temp_free_i64(reth);
1865     }
1866 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1867     for (i = 0; i < nargs; ++i) {
1868         int is_64bit = sizemask & (1 << (i+1)*2);
1869         if (!is_64bit) {
1870             tcg_temp_free_internal(args[i]);
1871         }
1872     }
1873 #endif /* TCG_TARGET_EXTEND_ARGS */
1874 }
1875 
1876 static void tcg_reg_alloc_start(TCGContext *s)
1877 {
1878     int i, n;
1879     TCGTemp *ts;
1880 
1881     for (i = 0, n = s->nb_globals; i < n; i++) {
1882         ts = &s->temps[i];
1883         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1884     }
1885     for (n = s->nb_temps; i < n; i++) {
1886         ts = &s->temps[i];
1887         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1888         ts->mem_allocated = 0;
1889         ts->fixed_reg = 0;
1890     }
1891 
1892     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1893 }
1894 
1895 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1896                                  TCGTemp *ts)
1897 {
1898     int idx = temp_idx(ts);
1899 
1900     if (ts->temp_global) {
1901         pstrcpy(buf, buf_size, ts->name);
1902     } else if (ts->temp_local) {
1903         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1904     } else {
1905         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1906     }
1907     return buf;
1908 }
1909 
1910 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1911                              int buf_size, TCGArg arg)
1912 {
1913     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1914 }
1915 
1916 /* Find helper name.  */
1917 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1918 {
1919     const char *ret = NULL;
1920     if (helper_table) {
1921         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1922         if (info) {
1923             ret = info->name;
1924         }
1925     }
1926     return ret;
1927 }
1928 
1929 static const char * const cond_name[] =
1930 {
1931     [TCG_COND_NEVER] = "never",
1932     [TCG_COND_ALWAYS] = "always",
1933     [TCG_COND_EQ] = "eq",
1934     [TCG_COND_NE] = "ne",
1935     [TCG_COND_LT] = "lt",
1936     [TCG_COND_GE] = "ge",
1937     [TCG_COND_LE] = "le",
1938     [TCG_COND_GT] = "gt",
1939     [TCG_COND_LTU] = "ltu",
1940     [TCG_COND_GEU] = "geu",
1941     [TCG_COND_LEU] = "leu",
1942     [TCG_COND_GTU] = "gtu"
1943 };
1944 
1945 static const char * const ldst_name[] =
1946 {
1947     [MO_UB]   = "ub",
1948     [MO_SB]   = "sb",
1949     [MO_LEUW] = "leuw",
1950     [MO_LESW] = "lesw",
1951     [MO_LEUL] = "leul",
1952     [MO_LESL] = "lesl",
1953     [MO_LEQ]  = "leq",
1954     [MO_BEUW] = "beuw",
1955     [MO_BESW] = "besw",
1956     [MO_BEUL] = "beul",
1957     [MO_BESL] = "besl",
1958     [MO_BEQ]  = "beq",
1959 };
1960 
1961 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1962 #ifdef TARGET_ALIGNED_ONLY
1963     [MO_UNALN >> MO_ASHIFT]    = "un+",
1964     [MO_ALIGN >> MO_ASHIFT]    = "",
1965 #else
1966     [MO_UNALN >> MO_ASHIFT]    = "",
1967     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1968 #endif
1969     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1970     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1971     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1972     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1973     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1974     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1975 };
1976 
1977 static inline bool tcg_regset_single(TCGRegSet d)
1978 {
1979     return (d & (d - 1)) == 0;
1980 }
1981 
1982 static inline TCGReg tcg_regset_first(TCGRegSet d)
1983 {
1984     if (TCG_TARGET_NB_REGS <= 32) {
1985         return ctz32(d);
1986     } else {
1987         return ctz64(d);
1988     }
1989 }
1990 
1991 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1992 {
1993     char buf[128];
1994     TCGOp *op;
1995 
1996     QTAILQ_FOREACH(op, &s->ops, link) {
1997         int i, k, nb_oargs, nb_iargs, nb_cargs;
1998         const TCGOpDef *def;
1999         TCGOpcode c;
2000         int col = 0;
2001 
2002         c = op->opc;
2003         def = &tcg_op_defs[c];
2004 
2005         if (c == INDEX_op_insn_start) {
2006             nb_oargs = 0;
2007             col += qemu_log("\n ----");
2008 
2009             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2010                 target_ulong a;
2011 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2012                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2013 #else
2014                 a = op->args[i];
2015 #endif
2016                 col += qemu_log(" " TARGET_FMT_lx, a);
2017             }
2018         } else if (c == INDEX_op_call) {
2019             /* variable number of arguments */
2020             nb_oargs = TCGOP_CALLO(op);
2021             nb_iargs = TCGOP_CALLI(op);
2022             nb_cargs = def->nb_cargs;
2023 
2024             /* function name, flags, out args */
2025             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2026                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2027                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2028             for (i = 0; i < nb_oargs; i++) {
2029                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2030                                                        op->args[i]));
2031             }
2032             for (i = 0; i < nb_iargs; i++) {
2033                 TCGArg arg = op->args[nb_oargs + i];
2034                 const char *t = "<dummy>";
2035                 if (arg != TCG_CALL_DUMMY_ARG) {
2036                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2037                 }
2038                 col += qemu_log(",%s", t);
2039             }
2040         } else {
2041             col += qemu_log(" %s ", def->name);
2042 
2043             nb_oargs = def->nb_oargs;
2044             nb_iargs = def->nb_iargs;
2045             nb_cargs = def->nb_cargs;
2046 
2047             if (def->flags & TCG_OPF_VECTOR) {
2048                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2049                                 8 << TCGOP_VECE(op));
2050             }
2051 
2052             k = 0;
2053             for (i = 0; i < nb_oargs; i++) {
2054                 if (k != 0) {
2055                     col += qemu_log(",");
2056                 }
2057                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2058                                                       op->args[k++]));
2059             }
2060             for (i = 0; i < nb_iargs; i++) {
2061                 if (k != 0) {
2062                     col += qemu_log(",");
2063                 }
2064                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2065                                                       op->args[k++]));
2066             }
2067             switch (c) {
2068             case INDEX_op_brcond_i32:
2069             case INDEX_op_setcond_i32:
2070             case INDEX_op_movcond_i32:
2071             case INDEX_op_brcond2_i32:
2072             case INDEX_op_setcond2_i32:
2073             case INDEX_op_brcond_i64:
2074             case INDEX_op_setcond_i64:
2075             case INDEX_op_movcond_i64:
2076             case INDEX_op_cmp_vec:
2077             case INDEX_op_cmpsel_vec:
2078                 if (op->args[k] < ARRAY_SIZE(cond_name)
2079                     && cond_name[op->args[k]]) {
2080                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2081                 } else {
2082                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2083                 }
2084                 i = 1;
2085                 break;
2086             case INDEX_op_qemu_ld_i32:
2087             case INDEX_op_qemu_st_i32:
2088             case INDEX_op_qemu_ld_i64:
2089             case INDEX_op_qemu_st_i64:
2090                 {
2091                     TCGMemOpIdx oi = op->args[k++];
2092                     MemOp op = get_memop(oi);
2093                     unsigned ix = get_mmuidx(oi);
2094 
2095                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2096                         col += qemu_log(",$0x%x,%u", op, ix);
2097                     } else {
2098                         const char *s_al, *s_op;
2099                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2100                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2101                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2102                     }
2103                     i = 1;
2104                 }
2105                 break;
2106             default:
2107                 i = 0;
2108                 break;
2109             }
2110             switch (c) {
2111             case INDEX_op_set_label:
2112             case INDEX_op_br:
2113             case INDEX_op_brcond_i32:
2114             case INDEX_op_brcond_i64:
2115             case INDEX_op_brcond2_i32:
2116                 col += qemu_log("%s$L%d", k ? "," : "",
2117                                 arg_label(op->args[k])->id);
2118                 i++, k++;
2119                 break;
2120             default:
2121                 break;
2122             }
2123             for (; i < nb_cargs; i++, k++) {
2124                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2125             }
2126         }
2127 
2128         if (have_prefs || op->life) {
2129 
2130             QemuLogFile *logfile;
2131 
2132             rcu_read_lock();
2133             logfile = qatomic_rcu_read(&qemu_logfile);
2134             if (logfile) {
2135                 for (; col < 40; ++col) {
2136                     putc(' ', logfile->fd);
2137                 }
2138             }
2139             rcu_read_unlock();
2140         }
2141 
2142         if (op->life) {
2143             unsigned life = op->life;
2144 
2145             if (life & (SYNC_ARG * 3)) {
2146                 qemu_log("  sync:");
2147                 for (i = 0; i < 2; ++i) {
2148                     if (life & (SYNC_ARG << i)) {
2149                         qemu_log(" %d", i);
2150                     }
2151                 }
2152             }
2153             life /= DEAD_ARG;
2154             if (life) {
2155                 qemu_log("  dead:");
2156                 for (i = 0; life; ++i, life >>= 1) {
2157                     if (life & 1) {
2158                         qemu_log(" %d", i);
2159                     }
2160                 }
2161             }
2162         }
2163 
2164         if (have_prefs) {
2165             for (i = 0; i < nb_oargs; ++i) {
2166                 TCGRegSet set = op->output_pref[i];
2167 
2168                 if (i == 0) {
2169                     qemu_log("  pref=");
2170                 } else {
2171                     qemu_log(",");
2172                 }
2173                 if (set == 0) {
2174                     qemu_log("none");
2175                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2176                     qemu_log("all");
2177 #ifdef CONFIG_DEBUG_TCG
2178                 } else if (tcg_regset_single(set)) {
2179                     TCGReg reg = tcg_regset_first(set);
2180                     qemu_log("%s", tcg_target_reg_names[reg]);
2181 #endif
2182                 } else if (TCG_TARGET_NB_REGS <= 32) {
2183                     qemu_log("%#x", (uint32_t)set);
2184                 } else {
2185                     qemu_log("%#" PRIx64, (uint64_t)set);
2186                 }
2187             }
2188         }
2189 
2190         qemu_log("\n");
2191     }
2192 }
2193 
2194 /* we give more priority to constraints with less registers */
2195 static int get_constraint_priority(const TCGOpDef *def, int k)
2196 {
2197     const TCGArgConstraint *arg_ct = &def->args_ct[k];
2198     int n;
2199 
2200     if (arg_ct->oalias) {
2201         /* an alias is equivalent to a single register */
2202         n = 1;
2203     } else {
2204         n = ctpop64(arg_ct->regs);
2205     }
2206     return TCG_TARGET_NB_REGS - n + 1;
2207 }
2208 
2209 /* sort from highest priority to lowest */
2210 static void sort_constraints(TCGOpDef *def, int start, int n)
2211 {
2212     int i, j;
2213     TCGArgConstraint *a = def->args_ct;
2214 
2215     for (i = 0; i < n; i++) {
2216         a[start + i].sort_index = start + i;
2217     }
2218     if (n <= 1) {
2219         return;
2220     }
2221     for (i = 0; i < n - 1; i++) {
2222         for (j = i + 1; j < n; j++) {
2223             int p1 = get_constraint_priority(def, a[start + i].sort_index);
2224             int p2 = get_constraint_priority(def, a[start + j].sort_index);
2225             if (p1 < p2) {
2226                 int tmp = a[start + i].sort_index;
2227                 a[start + i].sort_index = a[start + j].sort_index;
2228                 a[start + j].sort_index = tmp;
2229             }
2230         }
2231     }
2232 }
2233 
2234 static void process_op_defs(TCGContext *s)
2235 {
2236     TCGOpcode op;
2237 
2238     for (op = 0; op < NB_OPS; op++) {
2239         TCGOpDef *def = &tcg_op_defs[op];
2240         const TCGTargetOpDef *tdefs;
2241         TCGType type;
2242         int i, nb_args;
2243 
2244         if (def->flags & TCG_OPF_NOT_PRESENT) {
2245             continue;
2246         }
2247 
2248         nb_args = def->nb_iargs + def->nb_oargs;
2249         if (nb_args == 0) {
2250             continue;
2251         }
2252 
2253         tdefs = tcg_target_op_def(op);
2254         /* Missing TCGTargetOpDef entry. */
2255         tcg_debug_assert(tdefs != NULL);
2256 
2257         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2258         for (i = 0; i < nb_args; i++) {
2259             const char *ct_str = tdefs->args_ct_str[i];
2260             /* Incomplete TCGTargetOpDef entry. */
2261             tcg_debug_assert(ct_str != NULL);
2262 
2263             while (*ct_str != '\0') {
2264                 switch(*ct_str) {
2265                 case '0' ... '9':
2266                     {
2267                         int oarg = *ct_str - '0';
2268                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2269                         tcg_debug_assert(oarg < def->nb_oargs);
2270                         tcg_debug_assert(def->args_ct[oarg].regs != 0);
2271                         def->args_ct[i] = def->args_ct[oarg];
2272                         /* The output sets oalias.  */
2273                         def->args_ct[oarg].oalias = true;
2274                         def->args_ct[oarg].alias_index = i;
2275                         /* The input sets ialias. */
2276                         def->args_ct[i].ialias = true;
2277                         def->args_ct[i].alias_index = oarg;
2278                     }
2279                     ct_str++;
2280                     break;
2281                 case '&':
2282                     def->args_ct[i].newreg = true;
2283                     ct_str++;
2284                     break;
2285                 case 'i':
2286                     def->args_ct[i].ct |= TCG_CT_CONST;
2287                     ct_str++;
2288                     break;
2289                 default:
2290                     ct_str = target_parse_constraint(&def->args_ct[i],
2291                                                      ct_str, type);
2292                     /* Typo in TCGTargetOpDef constraint. */
2293                     tcg_debug_assert(ct_str != NULL);
2294                 }
2295             }
2296         }
2297 
2298         /* TCGTargetOpDef entry with too much information? */
2299         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2300 
2301         /* sort the constraints (XXX: this is just an heuristic) */
2302         sort_constraints(def, 0, def->nb_oargs);
2303         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2304     }
2305 }
2306 
2307 void tcg_op_remove(TCGContext *s, TCGOp *op)
2308 {
2309     TCGLabel *label;
2310 
2311     switch (op->opc) {
2312     case INDEX_op_br:
2313         label = arg_label(op->args[0]);
2314         label->refs--;
2315         break;
2316     case INDEX_op_brcond_i32:
2317     case INDEX_op_brcond_i64:
2318         label = arg_label(op->args[3]);
2319         label->refs--;
2320         break;
2321     case INDEX_op_brcond2_i32:
2322         label = arg_label(op->args[5]);
2323         label->refs--;
2324         break;
2325     default:
2326         break;
2327     }
2328 
2329     QTAILQ_REMOVE(&s->ops, op, link);
2330     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2331     s->nb_ops--;
2332 
2333 #ifdef CONFIG_PROFILER
2334     qatomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2335 #endif
2336 }
2337 
2338 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2339 {
2340     TCGContext *s = tcg_ctx;
2341     TCGOp *op;
2342 
2343     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2344         op = tcg_malloc(sizeof(TCGOp));
2345     } else {
2346         op = QTAILQ_FIRST(&s->free_ops);
2347         QTAILQ_REMOVE(&s->free_ops, op, link);
2348     }
2349     memset(op, 0, offsetof(TCGOp, link));
2350     op->opc = opc;
2351     s->nb_ops++;
2352 
2353     return op;
2354 }
2355 
2356 TCGOp *tcg_emit_op(TCGOpcode opc)
2357 {
2358     TCGOp *op = tcg_op_alloc(opc);
2359     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2360     return op;
2361 }
2362 
2363 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2364 {
2365     TCGOp *new_op = tcg_op_alloc(opc);
2366     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2367     return new_op;
2368 }
2369 
2370 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2371 {
2372     TCGOp *new_op = tcg_op_alloc(opc);
2373     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2374     return new_op;
2375 }
2376 
2377 /* Reachable analysis : remove unreachable code.  */
2378 static void reachable_code_pass(TCGContext *s)
2379 {
2380     TCGOp *op, *op_next;
2381     bool dead = false;
2382 
2383     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2384         bool remove = dead;
2385         TCGLabel *label;
2386         int call_flags;
2387 
2388         switch (op->opc) {
2389         case INDEX_op_set_label:
2390             label = arg_label(op->args[0]);
2391             if (label->refs == 0) {
2392                 /*
2393                  * While there is an occasional backward branch, virtually
2394                  * all branches generated by the translators are forward.
2395                  * Which means that generally we will have already removed
2396                  * all references to the label that will be, and there is
2397                  * little to be gained by iterating.
2398                  */
2399                 remove = true;
2400             } else {
2401                 /* Once we see a label, insns become live again.  */
2402                 dead = false;
2403                 remove = false;
2404 
2405                 /*
2406                  * Optimization can fold conditional branches to unconditional.
2407                  * If we find a label with one reference which is preceded by
2408                  * an unconditional branch to it, remove both.  This needed to
2409                  * wait until the dead code in between them was removed.
2410                  */
2411                 if (label->refs == 1) {
2412                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2413                     if (op_prev->opc == INDEX_op_br &&
2414                         label == arg_label(op_prev->args[0])) {
2415                         tcg_op_remove(s, op_prev);
2416                         remove = true;
2417                     }
2418                 }
2419             }
2420             break;
2421 
2422         case INDEX_op_br:
2423         case INDEX_op_exit_tb:
2424         case INDEX_op_goto_ptr:
2425             /* Unconditional branches; everything following is dead.  */
2426             dead = true;
2427             break;
2428 
2429         case INDEX_op_call:
2430             /* Notice noreturn helper calls, raising exceptions.  */
2431             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2432             if (call_flags & TCG_CALL_NO_RETURN) {
2433                 dead = true;
2434             }
2435             break;
2436 
2437         case INDEX_op_insn_start:
2438             /* Never remove -- we need to keep these for unwind.  */
2439             remove = false;
2440             break;
2441 
2442         default:
2443             break;
2444         }
2445 
2446         if (remove) {
2447             tcg_op_remove(s, op);
2448         }
2449     }
2450 }
2451 
2452 #define TS_DEAD  1
2453 #define TS_MEM   2
2454 
2455 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2456 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2457 
2458 /* For liveness_pass_1, the register preferences for a given temp.  */
2459 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2460 {
2461     return ts->state_ptr;
2462 }
2463 
2464 /* For liveness_pass_1, reset the preferences for a given temp to the
2465  * maximal regset for its type.
2466  */
2467 static inline void la_reset_pref(TCGTemp *ts)
2468 {
2469     *la_temp_pref(ts)
2470         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2471 }
2472 
2473 /* liveness analysis: end of function: all temps are dead, and globals
2474    should be in memory. */
2475 static void la_func_end(TCGContext *s, int ng, int nt)
2476 {
2477     int i;
2478 
2479     for (i = 0; i < ng; ++i) {
2480         s->temps[i].state = TS_DEAD | TS_MEM;
2481         la_reset_pref(&s->temps[i]);
2482     }
2483     for (i = ng; i < nt; ++i) {
2484         s->temps[i].state = TS_DEAD;
2485         la_reset_pref(&s->temps[i]);
2486     }
2487 }
2488 
2489 /* liveness analysis: end of basic block: all temps are dead, globals
2490    and local temps should be in memory. */
2491 static void la_bb_end(TCGContext *s, int ng, int nt)
2492 {
2493     int i;
2494 
2495     for (i = 0; i < ng; ++i) {
2496         s->temps[i].state = TS_DEAD | TS_MEM;
2497         la_reset_pref(&s->temps[i]);
2498     }
2499     for (i = ng; i < nt; ++i) {
2500         s->temps[i].state = (s->temps[i].temp_local
2501                              ? TS_DEAD | TS_MEM
2502                              : TS_DEAD);
2503         la_reset_pref(&s->temps[i]);
2504     }
2505 }
2506 
2507 /* liveness analysis: sync globals back to memory.  */
2508 static void la_global_sync(TCGContext *s, int ng)
2509 {
2510     int i;
2511 
2512     for (i = 0; i < ng; ++i) {
2513         int state = s->temps[i].state;
2514         s->temps[i].state = state | TS_MEM;
2515         if (state == TS_DEAD) {
2516             /* If the global was previously dead, reset prefs.  */
2517             la_reset_pref(&s->temps[i]);
2518         }
2519     }
2520 }
2521 
2522 /* liveness analysis: sync globals back to memory and kill.  */
2523 static void la_global_kill(TCGContext *s, int ng)
2524 {
2525     int i;
2526 
2527     for (i = 0; i < ng; i++) {
2528         s->temps[i].state = TS_DEAD | TS_MEM;
2529         la_reset_pref(&s->temps[i]);
2530     }
2531 }
2532 
2533 /* liveness analysis: note live globals crossing calls.  */
2534 static void la_cross_call(TCGContext *s, int nt)
2535 {
2536     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2537     int i;
2538 
2539     for (i = 0; i < nt; i++) {
2540         TCGTemp *ts = &s->temps[i];
2541         if (!(ts->state & TS_DEAD)) {
2542             TCGRegSet *pset = la_temp_pref(ts);
2543             TCGRegSet set = *pset;
2544 
2545             set &= mask;
2546             /* If the combination is not possible, restart.  */
2547             if (set == 0) {
2548                 set = tcg_target_available_regs[ts->type] & mask;
2549             }
2550             *pset = set;
2551         }
2552     }
2553 }
2554 
2555 /* Liveness analysis : update the opc_arg_life array to tell if a
2556    given input arguments is dead. Instructions updating dead
2557    temporaries are removed. */
2558 static void liveness_pass_1(TCGContext *s)
2559 {
2560     int nb_globals = s->nb_globals;
2561     int nb_temps = s->nb_temps;
2562     TCGOp *op, *op_prev;
2563     TCGRegSet *prefs;
2564     int i;
2565 
2566     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2567     for (i = 0; i < nb_temps; ++i) {
2568         s->temps[i].state_ptr = prefs + i;
2569     }
2570 
2571     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2572     la_func_end(s, nb_globals, nb_temps);
2573 
2574     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2575         int nb_iargs, nb_oargs;
2576         TCGOpcode opc_new, opc_new2;
2577         bool have_opc_new2;
2578         TCGLifeData arg_life = 0;
2579         TCGTemp *ts;
2580         TCGOpcode opc = op->opc;
2581         const TCGOpDef *def = &tcg_op_defs[opc];
2582 
2583         switch (opc) {
2584         case INDEX_op_call:
2585             {
2586                 int call_flags;
2587                 int nb_call_regs;
2588 
2589                 nb_oargs = TCGOP_CALLO(op);
2590                 nb_iargs = TCGOP_CALLI(op);
2591                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2592 
2593                 /* pure functions can be removed if their result is unused */
2594                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2595                     for (i = 0; i < nb_oargs; i++) {
2596                         ts = arg_temp(op->args[i]);
2597                         if (ts->state != TS_DEAD) {
2598                             goto do_not_remove_call;
2599                         }
2600                     }
2601                     goto do_remove;
2602                 }
2603             do_not_remove_call:
2604 
2605                 /* Output args are dead.  */
2606                 for (i = 0; i < nb_oargs; i++) {
2607                     ts = arg_temp(op->args[i]);
2608                     if (ts->state & TS_DEAD) {
2609                         arg_life |= DEAD_ARG << i;
2610                     }
2611                     if (ts->state & TS_MEM) {
2612                         arg_life |= SYNC_ARG << i;
2613                     }
2614                     ts->state = TS_DEAD;
2615                     la_reset_pref(ts);
2616 
2617                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2618                     op->output_pref[i] = 0;
2619                 }
2620 
2621                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2622                                     TCG_CALL_NO_READ_GLOBALS))) {
2623                     la_global_kill(s, nb_globals);
2624                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2625                     la_global_sync(s, nb_globals);
2626                 }
2627 
2628                 /* Record arguments that die in this helper.  */
2629                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2630                     ts = arg_temp(op->args[i]);
2631                     if (ts && ts->state & TS_DEAD) {
2632                         arg_life |= DEAD_ARG << i;
2633                     }
2634                 }
2635 
2636                 /* For all live registers, remove call-clobbered prefs.  */
2637                 la_cross_call(s, nb_temps);
2638 
2639                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2640 
2641                 /* Input arguments are live for preceding opcodes.  */
2642                 for (i = 0; i < nb_iargs; i++) {
2643                     ts = arg_temp(op->args[i + nb_oargs]);
2644                     if (ts && ts->state & TS_DEAD) {
2645                         /* For those arguments that die, and will be allocated
2646                          * in registers, clear the register set for that arg,
2647                          * to be filled in below.  For args that will be on
2648                          * the stack, reset to any available reg.
2649                          */
2650                         *la_temp_pref(ts)
2651                             = (i < nb_call_regs ? 0 :
2652                                tcg_target_available_regs[ts->type]);
2653                         ts->state &= ~TS_DEAD;
2654                     }
2655                 }
2656 
2657                 /* For each input argument, add its input register to prefs.
2658                    If a temp is used once, this produces a single set bit.  */
2659                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2660                     ts = arg_temp(op->args[i + nb_oargs]);
2661                     if (ts) {
2662                         tcg_regset_set_reg(*la_temp_pref(ts),
2663                                            tcg_target_call_iarg_regs[i]);
2664                     }
2665                 }
2666             }
2667             break;
2668         case INDEX_op_insn_start:
2669             break;
2670         case INDEX_op_discard:
2671             /* mark the temporary as dead */
2672             ts = arg_temp(op->args[0]);
2673             ts->state = TS_DEAD;
2674             la_reset_pref(ts);
2675             break;
2676 
2677         case INDEX_op_add2_i32:
2678             opc_new = INDEX_op_add_i32;
2679             goto do_addsub2;
2680         case INDEX_op_sub2_i32:
2681             opc_new = INDEX_op_sub_i32;
2682             goto do_addsub2;
2683         case INDEX_op_add2_i64:
2684             opc_new = INDEX_op_add_i64;
2685             goto do_addsub2;
2686         case INDEX_op_sub2_i64:
2687             opc_new = INDEX_op_sub_i64;
2688         do_addsub2:
2689             nb_iargs = 4;
2690             nb_oargs = 2;
2691             /* Test if the high part of the operation is dead, but not
2692                the low part.  The result can be optimized to a simple
2693                add or sub.  This happens often for x86_64 guest when the
2694                cpu mode is set to 32 bit.  */
2695             if (arg_temp(op->args[1])->state == TS_DEAD) {
2696                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2697                     goto do_remove;
2698                 }
2699                 /* Replace the opcode and adjust the args in place,
2700                    leaving 3 unused args at the end.  */
2701                 op->opc = opc = opc_new;
2702                 op->args[1] = op->args[2];
2703                 op->args[2] = op->args[4];
2704                 /* Fall through and mark the single-word operation live.  */
2705                 nb_iargs = 2;
2706                 nb_oargs = 1;
2707             }
2708             goto do_not_remove;
2709 
2710         case INDEX_op_mulu2_i32:
2711             opc_new = INDEX_op_mul_i32;
2712             opc_new2 = INDEX_op_muluh_i32;
2713             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2714             goto do_mul2;
2715         case INDEX_op_muls2_i32:
2716             opc_new = INDEX_op_mul_i32;
2717             opc_new2 = INDEX_op_mulsh_i32;
2718             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2719             goto do_mul2;
2720         case INDEX_op_mulu2_i64:
2721             opc_new = INDEX_op_mul_i64;
2722             opc_new2 = INDEX_op_muluh_i64;
2723             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2724             goto do_mul2;
2725         case INDEX_op_muls2_i64:
2726             opc_new = INDEX_op_mul_i64;
2727             opc_new2 = INDEX_op_mulsh_i64;
2728             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2729             goto do_mul2;
2730         do_mul2:
2731             nb_iargs = 2;
2732             nb_oargs = 2;
2733             if (arg_temp(op->args[1])->state == TS_DEAD) {
2734                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2735                     /* Both parts of the operation are dead.  */
2736                     goto do_remove;
2737                 }
2738                 /* The high part of the operation is dead; generate the low. */
2739                 op->opc = opc = opc_new;
2740                 op->args[1] = op->args[2];
2741                 op->args[2] = op->args[3];
2742             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2743                 /* The low part of the operation is dead; generate the high. */
2744                 op->opc = opc = opc_new2;
2745                 op->args[0] = op->args[1];
2746                 op->args[1] = op->args[2];
2747                 op->args[2] = op->args[3];
2748             } else {
2749                 goto do_not_remove;
2750             }
2751             /* Mark the single-word operation live.  */
2752             nb_oargs = 1;
2753             goto do_not_remove;
2754 
2755         default:
2756             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2757             nb_iargs = def->nb_iargs;
2758             nb_oargs = def->nb_oargs;
2759 
2760             /* Test if the operation can be removed because all
2761                its outputs are dead. We assume that nb_oargs == 0
2762                implies side effects */
2763             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2764                 for (i = 0; i < nb_oargs; i++) {
2765                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2766                         goto do_not_remove;
2767                     }
2768                 }
2769                 goto do_remove;
2770             }
2771             goto do_not_remove;
2772 
2773         do_remove:
2774             tcg_op_remove(s, op);
2775             break;
2776 
2777         do_not_remove:
2778             for (i = 0; i < nb_oargs; i++) {
2779                 ts = arg_temp(op->args[i]);
2780 
2781                 /* Remember the preference of the uses that followed.  */
2782                 op->output_pref[i] = *la_temp_pref(ts);
2783 
2784                 /* Output args are dead.  */
2785                 if (ts->state & TS_DEAD) {
2786                     arg_life |= DEAD_ARG << i;
2787                 }
2788                 if (ts->state & TS_MEM) {
2789                     arg_life |= SYNC_ARG << i;
2790                 }
2791                 ts->state = TS_DEAD;
2792                 la_reset_pref(ts);
2793             }
2794 
2795             /* If end of basic block, update.  */
2796             if (def->flags & TCG_OPF_BB_EXIT) {
2797                 la_func_end(s, nb_globals, nb_temps);
2798             } else if (def->flags & TCG_OPF_BB_END) {
2799                 la_bb_end(s, nb_globals, nb_temps);
2800             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2801                 la_global_sync(s, nb_globals);
2802                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2803                     la_cross_call(s, nb_temps);
2804                 }
2805             }
2806 
2807             /* Record arguments that die in this opcode.  */
2808             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2809                 ts = arg_temp(op->args[i]);
2810                 if (ts->state & TS_DEAD) {
2811                     arg_life |= DEAD_ARG << i;
2812                 }
2813             }
2814 
2815             /* Input arguments are live for preceding opcodes.  */
2816             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2817                 ts = arg_temp(op->args[i]);
2818                 if (ts->state & TS_DEAD) {
2819                     /* For operands that were dead, initially allow
2820                        all regs for the type.  */
2821                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2822                     ts->state &= ~TS_DEAD;
2823                 }
2824             }
2825 
2826             /* Incorporate constraints for this operand.  */
2827             switch (opc) {
2828             case INDEX_op_mov_i32:
2829             case INDEX_op_mov_i64:
2830                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2831                    have proper constraints.  That said, special case
2832                    moves to propagate preferences backward.  */
2833                 if (IS_DEAD_ARG(1)) {
2834                     *la_temp_pref(arg_temp(op->args[0]))
2835                         = *la_temp_pref(arg_temp(op->args[1]));
2836                 }
2837                 break;
2838 
2839             default:
2840                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2841                     const TCGArgConstraint *ct = &def->args_ct[i];
2842                     TCGRegSet set, *pset;
2843 
2844                     ts = arg_temp(op->args[i]);
2845                     pset = la_temp_pref(ts);
2846                     set = *pset;
2847 
2848                     set &= ct->regs;
2849                     if (ct->ialias) {
2850                         set &= op->output_pref[ct->alias_index];
2851                     }
2852                     /* If the combination is not possible, restart.  */
2853                     if (set == 0) {
2854                         set = ct->regs;
2855                     }
2856                     *pset = set;
2857                 }
2858                 break;
2859             }
2860             break;
2861         }
2862         op->life = arg_life;
2863     }
2864 }
2865 
2866 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2867 static bool liveness_pass_2(TCGContext *s)
2868 {
2869     int nb_globals = s->nb_globals;
2870     int nb_temps, i;
2871     bool changes = false;
2872     TCGOp *op, *op_next;
2873 
2874     /* Create a temporary for each indirect global.  */
2875     for (i = 0; i < nb_globals; ++i) {
2876         TCGTemp *its = &s->temps[i];
2877         if (its->indirect_reg) {
2878             TCGTemp *dts = tcg_temp_alloc(s);
2879             dts->type = its->type;
2880             dts->base_type = its->base_type;
2881             its->state_ptr = dts;
2882         } else {
2883             its->state_ptr = NULL;
2884         }
2885         /* All globals begin dead.  */
2886         its->state = TS_DEAD;
2887     }
2888     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2889         TCGTemp *its = &s->temps[i];
2890         its->state_ptr = NULL;
2891         its->state = TS_DEAD;
2892     }
2893 
2894     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2895         TCGOpcode opc = op->opc;
2896         const TCGOpDef *def = &tcg_op_defs[opc];
2897         TCGLifeData arg_life = op->life;
2898         int nb_iargs, nb_oargs, call_flags;
2899         TCGTemp *arg_ts, *dir_ts;
2900 
2901         if (opc == INDEX_op_call) {
2902             nb_oargs = TCGOP_CALLO(op);
2903             nb_iargs = TCGOP_CALLI(op);
2904             call_flags = op->args[nb_oargs + nb_iargs + 1];
2905         } else {
2906             nb_iargs = def->nb_iargs;
2907             nb_oargs = def->nb_oargs;
2908 
2909             /* Set flags similar to how calls require.  */
2910             if (def->flags & TCG_OPF_BB_END) {
2911                 /* Like writing globals: save_globals */
2912                 call_flags = 0;
2913             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2914                 /* Like reading globals: sync_globals */
2915                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2916             } else {
2917                 /* No effect on globals.  */
2918                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2919                               TCG_CALL_NO_WRITE_GLOBALS);
2920             }
2921         }
2922 
2923         /* Make sure that input arguments are available.  */
2924         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2925             arg_ts = arg_temp(op->args[i]);
2926             if (arg_ts) {
2927                 dir_ts = arg_ts->state_ptr;
2928                 if (dir_ts && arg_ts->state == TS_DEAD) {
2929                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2930                                       ? INDEX_op_ld_i32
2931                                       : INDEX_op_ld_i64);
2932                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2933 
2934                     lop->args[0] = temp_arg(dir_ts);
2935                     lop->args[1] = temp_arg(arg_ts->mem_base);
2936                     lop->args[2] = arg_ts->mem_offset;
2937 
2938                     /* Loaded, but synced with memory.  */
2939                     arg_ts->state = TS_MEM;
2940                 }
2941             }
2942         }
2943 
2944         /* Perform input replacement, and mark inputs that became dead.
2945            No action is required except keeping temp_state up to date
2946            so that we reload when needed.  */
2947         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2948             arg_ts = arg_temp(op->args[i]);
2949             if (arg_ts) {
2950                 dir_ts = arg_ts->state_ptr;
2951                 if (dir_ts) {
2952                     op->args[i] = temp_arg(dir_ts);
2953                     changes = true;
2954                     if (IS_DEAD_ARG(i)) {
2955                         arg_ts->state = TS_DEAD;
2956                     }
2957                 }
2958             }
2959         }
2960 
2961         /* Liveness analysis should ensure that the following are
2962            all correct, for call sites and basic block end points.  */
2963         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2964             /* Nothing to do */
2965         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2966             for (i = 0; i < nb_globals; ++i) {
2967                 /* Liveness should see that globals are synced back,
2968                    that is, either TS_DEAD or TS_MEM.  */
2969                 arg_ts = &s->temps[i];
2970                 tcg_debug_assert(arg_ts->state_ptr == 0
2971                                  || arg_ts->state != 0);
2972             }
2973         } else {
2974             for (i = 0; i < nb_globals; ++i) {
2975                 /* Liveness should see that globals are saved back,
2976                    that is, TS_DEAD, waiting to be reloaded.  */
2977                 arg_ts = &s->temps[i];
2978                 tcg_debug_assert(arg_ts->state_ptr == 0
2979                                  || arg_ts->state == TS_DEAD);
2980             }
2981         }
2982 
2983         /* Outputs become available.  */
2984         if (opc == INDEX_op_mov_i32 || opc == INDEX_op_mov_i64) {
2985             arg_ts = arg_temp(op->args[0]);
2986             dir_ts = arg_ts->state_ptr;
2987             if (dir_ts) {
2988                 op->args[0] = temp_arg(dir_ts);
2989                 changes = true;
2990 
2991                 /* The output is now live and modified.  */
2992                 arg_ts->state = 0;
2993 
2994                 if (NEED_SYNC_ARG(0)) {
2995                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2996                                       ? INDEX_op_st_i32
2997                                       : INDEX_op_st_i64);
2998                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2999                     TCGTemp *out_ts = dir_ts;
3000 
3001                     if (IS_DEAD_ARG(0)) {
3002                         out_ts = arg_temp(op->args[1]);
3003                         arg_ts->state = TS_DEAD;
3004                         tcg_op_remove(s, op);
3005                     } else {
3006                         arg_ts->state = TS_MEM;
3007                     }
3008 
3009                     sop->args[0] = temp_arg(out_ts);
3010                     sop->args[1] = temp_arg(arg_ts->mem_base);
3011                     sop->args[2] = arg_ts->mem_offset;
3012                 } else {
3013                     tcg_debug_assert(!IS_DEAD_ARG(0));
3014                 }
3015             }
3016         } else {
3017             for (i = 0; i < nb_oargs; i++) {
3018                 arg_ts = arg_temp(op->args[i]);
3019                 dir_ts = arg_ts->state_ptr;
3020                 if (!dir_ts) {
3021                     continue;
3022                 }
3023                 op->args[i] = temp_arg(dir_ts);
3024                 changes = true;
3025 
3026                 /* The output is now live and modified.  */
3027                 arg_ts->state = 0;
3028 
3029                 /* Sync outputs upon their last write.  */
3030                 if (NEED_SYNC_ARG(i)) {
3031                     TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
3032                                       ? INDEX_op_st_i32
3033                                       : INDEX_op_st_i64);
3034                     TCGOp *sop = tcg_op_insert_after(s, op, sopc);
3035 
3036                     sop->args[0] = temp_arg(dir_ts);
3037                     sop->args[1] = temp_arg(arg_ts->mem_base);
3038                     sop->args[2] = arg_ts->mem_offset;
3039 
3040                     arg_ts->state = TS_MEM;
3041                 }
3042                 /* Drop outputs that are dead.  */
3043                 if (IS_DEAD_ARG(i)) {
3044                     arg_ts->state = TS_DEAD;
3045                 }
3046             }
3047         }
3048     }
3049 
3050     return changes;
3051 }
3052 
3053 #ifdef CONFIG_DEBUG_TCG
3054 static void dump_regs(TCGContext *s)
3055 {
3056     TCGTemp *ts;
3057     int i;
3058     char buf[64];
3059 
3060     for(i = 0; i < s->nb_temps; i++) {
3061         ts = &s->temps[i];
3062         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3063         switch(ts->val_type) {
3064         case TEMP_VAL_REG:
3065             printf("%s", tcg_target_reg_names[ts->reg]);
3066             break;
3067         case TEMP_VAL_MEM:
3068             printf("%d(%s)", (int)ts->mem_offset,
3069                    tcg_target_reg_names[ts->mem_base->reg]);
3070             break;
3071         case TEMP_VAL_CONST:
3072             printf("$0x%" TCG_PRIlx, ts->val);
3073             break;
3074         case TEMP_VAL_DEAD:
3075             printf("D");
3076             break;
3077         default:
3078             printf("???");
3079             break;
3080         }
3081         printf("\n");
3082     }
3083 
3084     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3085         if (s->reg_to_temp[i] != NULL) {
3086             printf("%s: %s\n",
3087                    tcg_target_reg_names[i],
3088                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3089         }
3090     }
3091 }
3092 
3093 static void check_regs(TCGContext *s)
3094 {
3095     int reg;
3096     int k;
3097     TCGTemp *ts;
3098     char buf[64];
3099 
3100     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3101         ts = s->reg_to_temp[reg];
3102         if (ts != NULL) {
3103             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3104                 printf("Inconsistency for register %s:\n",
3105                        tcg_target_reg_names[reg]);
3106                 goto fail;
3107             }
3108         }
3109     }
3110     for (k = 0; k < s->nb_temps; k++) {
3111         ts = &s->temps[k];
3112         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3113             && s->reg_to_temp[ts->reg] != ts) {
3114             printf("Inconsistency for temp %s:\n",
3115                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3116         fail:
3117             printf("reg state:\n");
3118             dump_regs(s);
3119             tcg_abort();
3120         }
3121     }
3122 }
3123 #endif
3124 
3125 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3126 {
3127 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3128     /* Sparc64 stack is accessed with offset of 2047 */
3129     s->current_frame_offset = (s->current_frame_offset +
3130                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3131         ~(sizeof(tcg_target_long) - 1);
3132 #endif
3133     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3134         s->frame_end) {
3135         tcg_abort();
3136     }
3137     ts->mem_offset = s->current_frame_offset;
3138     ts->mem_base = s->frame_temp;
3139     ts->mem_allocated = 1;
3140     s->current_frame_offset += sizeof(tcg_target_long);
3141 }
3142 
3143 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3144 
3145 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3146    mark it free; otherwise mark it dead.  */
3147 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3148 {
3149     if (ts->fixed_reg) {
3150         return;
3151     }
3152     if (ts->val_type == TEMP_VAL_REG) {
3153         s->reg_to_temp[ts->reg] = NULL;
3154     }
3155     ts->val_type = (free_or_dead < 0
3156                     || ts->temp_local
3157                     || ts->temp_global
3158                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3159 }
3160 
3161 /* Mark a temporary as dead.  */
3162 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3163 {
3164     temp_free_or_dead(s, ts, 1);
3165 }
3166 
3167 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3168    registers needs to be allocated to store a constant.  If 'free_or_dead'
3169    is non-zero, subsequently release the temporary; if it is positive, the
3170    temp is dead; if it is negative, the temp is free.  */
3171 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3172                       TCGRegSet preferred_regs, int free_or_dead)
3173 {
3174     if (ts->fixed_reg) {
3175         return;
3176     }
3177     if (!ts->mem_coherent) {
3178         if (!ts->mem_allocated) {
3179             temp_allocate_frame(s, ts);
3180         }
3181         switch (ts->val_type) {
3182         case TEMP_VAL_CONST:
3183             /* If we're going to free the temp immediately, then we won't
3184                require it later in a register, so attempt to store the
3185                constant to memory directly.  */
3186             if (free_or_dead
3187                 && tcg_out_sti(s, ts->type, ts->val,
3188                                ts->mem_base->reg, ts->mem_offset)) {
3189                 break;
3190             }
3191             temp_load(s, ts, tcg_target_available_regs[ts->type],
3192                       allocated_regs, preferred_regs);
3193             /* fallthrough */
3194 
3195         case TEMP_VAL_REG:
3196             tcg_out_st(s, ts->type, ts->reg,
3197                        ts->mem_base->reg, ts->mem_offset);
3198             break;
3199 
3200         case TEMP_VAL_MEM:
3201             break;
3202 
3203         case TEMP_VAL_DEAD:
3204         default:
3205             tcg_abort();
3206         }
3207         ts->mem_coherent = 1;
3208     }
3209     if (free_or_dead) {
3210         temp_free_or_dead(s, ts, free_or_dead);
3211     }
3212 }
3213 
3214 /* free register 'reg' by spilling the corresponding temporary if necessary */
3215 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3216 {
3217     TCGTemp *ts = s->reg_to_temp[reg];
3218     if (ts != NULL) {
3219         temp_sync(s, ts, allocated_regs, 0, -1);
3220     }
3221 }
3222 
3223 /**
3224  * tcg_reg_alloc:
3225  * @required_regs: Set of registers in which we must allocate.
3226  * @allocated_regs: Set of registers which must be avoided.
3227  * @preferred_regs: Set of registers we should prefer.
3228  * @rev: True if we search the registers in "indirect" order.
3229  *
3230  * The allocated register must be in @required_regs & ~@allocated_regs,
3231  * but if we can put it in @preferred_regs we may save a move later.
3232  */
3233 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3234                             TCGRegSet allocated_regs,
3235                             TCGRegSet preferred_regs, bool rev)
3236 {
3237     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3238     TCGRegSet reg_ct[2];
3239     const int *order;
3240 
3241     reg_ct[1] = required_regs & ~allocated_regs;
3242     tcg_debug_assert(reg_ct[1] != 0);
3243     reg_ct[0] = reg_ct[1] & preferred_regs;
3244 
3245     /* Skip the preferred_regs option if it cannot be satisfied,
3246        or if the preference made no difference.  */
3247     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3248 
3249     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3250 
3251     /* Try free registers, preferences first.  */
3252     for (j = f; j < 2; j++) {
3253         TCGRegSet set = reg_ct[j];
3254 
3255         if (tcg_regset_single(set)) {
3256             /* One register in the set.  */
3257             TCGReg reg = tcg_regset_first(set);
3258             if (s->reg_to_temp[reg] == NULL) {
3259                 return reg;
3260             }
3261         } else {
3262             for (i = 0; i < n; i++) {
3263                 TCGReg reg = order[i];
3264                 if (s->reg_to_temp[reg] == NULL &&
3265                     tcg_regset_test_reg(set, reg)) {
3266                     return reg;
3267                 }
3268             }
3269         }
3270     }
3271 
3272     /* We must spill something.  */
3273     for (j = f; j < 2; j++) {
3274         TCGRegSet set = reg_ct[j];
3275 
3276         if (tcg_regset_single(set)) {
3277             /* One register in the set.  */
3278             TCGReg reg = tcg_regset_first(set);
3279             tcg_reg_free(s, reg, allocated_regs);
3280             return reg;
3281         } else {
3282             for (i = 0; i < n; i++) {
3283                 TCGReg reg = order[i];
3284                 if (tcg_regset_test_reg(set, reg)) {
3285                     tcg_reg_free(s, reg, allocated_regs);
3286                     return reg;
3287                 }
3288             }
3289         }
3290     }
3291 
3292     tcg_abort();
3293 }
3294 
3295 /* Make sure the temporary is in a register.  If needed, allocate the register
3296    from DESIRED while avoiding ALLOCATED.  */
3297 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3298                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3299 {
3300     TCGReg reg;
3301 
3302     switch (ts->val_type) {
3303     case TEMP_VAL_REG:
3304         return;
3305     case TEMP_VAL_CONST:
3306         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3307                             preferred_regs, ts->indirect_base);
3308         tcg_out_movi(s, ts->type, reg, ts->val);
3309         ts->mem_coherent = 0;
3310         break;
3311     case TEMP_VAL_MEM:
3312         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3313                             preferred_regs, ts->indirect_base);
3314         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3315         ts->mem_coherent = 1;
3316         break;
3317     case TEMP_VAL_DEAD:
3318     default:
3319         tcg_abort();
3320     }
3321     ts->reg = reg;
3322     ts->val_type = TEMP_VAL_REG;
3323     s->reg_to_temp[reg] = ts;
3324 }
3325 
3326 /* Save a temporary to memory. 'allocated_regs' is used in case a
3327    temporary registers needs to be allocated to store a constant.  */
3328 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3329 {
3330     /* The liveness analysis already ensures that globals are back
3331        in memory. Keep an tcg_debug_assert for safety. */
3332     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3333 }
3334 
3335 /* save globals to their canonical location and assume they can be
3336    modified be the following code. 'allocated_regs' is used in case a
3337    temporary registers needs to be allocated to store a constant. */
3338 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3339 {
3340     int i, n;
3341 
3342     for (i = 0, n = s->nb_globals; i < n; i++) {
3343         temp_save(s, &s->temps[i], allocated_regs);
3344     }
3345 }
3346 
3347 /* sync globals to their canonical location and assume they can be
3348    read by the following code. 'allocated_regs' is used in case a
3349    temporary registers needs to be allocated to store a constant. */
3350 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3351 {
3352     int i, n;
3353 
3354     for (i = 0, n = s->nb_globals; i < n; i++) {
3355         TCGTemp *ts = &s->temps[i];
3356         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3357                          || ts->fixed_reg
3358                          || ts->mem_coherent);
3359     }
3360 }
3361 
3362 /* at the end of a basic block, we assume all temporaries are dead and
3363    all globals are stored at their canonical location. */
3364 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3365 {
3366     int i;
3367 
3368     for (i = s->nb_globals; i < s->nb_temps; i++) {
3369         TCGTemp *ts = &s->temps[i];
3370         if (ts->temp_local) {
3371             temp_save(s, ts, allocated_regs);
3372         } else {
3373             /* The liveness analysis already ensures that temps are dead.
3374                Keep an tcg_debug_assert for safety. */
3375             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3376         }
3377     }
3378 
3379     save_globals(s, allocated_regs);
3380 }
3381 
3382 /*
3383  * Specialized code generation for INDEX_op_movi_*.
3384  */
3385 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3386                                   tcg_target_ulong val, TCGLifeData arg_life,
3387                                   TCGRegSet preferred_regs)
3388 {
3389     /* ENV should not be modified.  */
3390     tcg_debug_assert(!ots->fixed_reg);
3391 
3392     /* The movi is not explicitly generated here.  */
3393     if (ots->val_type == TEMP_VAL_REG) {
3394         s->reg_to_temp[ots->reg] = NULL;
3395     }
3396     ots->val_type = TEMP_VAL_CONST;
3397     ots->val = val;
3398     ots->mem_coherent = 0;
3399     if (NEED_SYNC_ARG(0)) {
3400         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3401     } else if (IS_DEAD_ARG(0)) {
3402         temp_dead(s, ots);
3403     }
3404 }
3405 
3406 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3407 {
3408     TCGTemp *ots = arg_temp(op->args[0]);
3409     tcg_target_ulong val = op->args[1];
3410 
3411     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3412 }
3413 
3414 /*
3415  * Specialized code generation for INDEX_op_mov_*.
3416  */
3417 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3418 {
3419     const TCGLifeData arg_life = op->life;
3420     TCGRegSet allocated_regs, preferred_regs;
3421     TCGTemp *ts, *ots;
3422     TCGType otype, itype;
3423 
3424     allocated_regs = s->reserved_regs;
3425     preferred_regs = op->output_pref[0];
3426     ots = arg_temp(op->args[0]);
3427     ts = arg_temp(op->args[1]);
3428 
3429     /* ENV should not be modified.  */
3430     tcg_debug_assert(!ots->fixed_reg);
3431 
3432     /* Note that otype != itype for no-op truncation.  */
3433     otype = ots->type;
3434     itype = ts->type;
3435 
3436     if (ts->val_type == TEMP_VAL_CONST) {
3437         /* propagate constant or generate sti */
3438         tcg_target_ulong val = ts->val;
3439         if (IS_DEAD_ARG(1)) {
3440             temp_dead(s, ts);
3441         }
3442         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3443         return;
3444     }
3445 
3446     /* If the source value is in memory we're going to be forced
3447        to have it in a register in order to perform the copy.  Copy
3448        the SOURCE value into its own register first, that way we
3449        don't have to reload SOURCE the next time it is used. */
3450     if (ts->val_type == TEMP_VAL_MEM) {
3451         temp_load(s, ts, tcg_target_available_regs[itype],
3452                   allocated_regs, preferred_regs);
3453     }
3454 
3455     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3456     if (IS_DEAD_ARG(0)) {
3457         /* mov to a non-saved dead register makes no sense (even with
3458            liveness analysis disabled). */
3459         tcg_debug_assert(NEED_SYNC_ARG(0));
3460         if (!ots->mem_allocated) {
3461             temp_allocate_frame(s, ots);
3462         }
3463         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3464         if (IS_DEAD_ARG(1)) {
3465             temp_dead(s, ts);
3466         }
3467         temp_dead(s, ots);
3468     } else {
3469         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3470             /* the mov can be suppressed */
3471             if (ots->val_type == TEMP_VAL_REG) {
3472                 s->reg_to_temp[ots->reg] = NULL;
3473             }
3474             ots->reg = ts->reg;
3475             temp_dead(s, ts);
3476         } else {
3477             if (ots->val_type != TEMP_VAL_REG) {
3478                 /* When allocating a new register, make sure to not spill the
3479                    input one. */
3480                 tcg_regset_set_reg(allocated_regs, ts->reg);
3481                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3482                                          allocated_regs, preferred_regs,
3483                                          ots->indirect_base);
3484             }
3485             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3486                 /*
3487                  * Cross register class move not supported.
3488                  * Store the source register into the destination slot
3489                  * and leave the destination temp as TEMP_VAL_MEM.
3490                  */
3491                 assert(!ots->fixed_reg);
3492                 if (!ts->mem_allocated) {
3493                     temp_allocate_frame(s, ots);
3494                 }
3495                 tcg_out_st(s, ts->type, ts->reg,
3496                            ots->mem_base->reg, ots->mem_offset);
3497                 ots->mem_coherent = 1;
3498                 temp_free_or_dead(s, ots, -1);
3499                 return;
3500             }
3501         }
3502         ots->val_type = TEMP_VAL_REG;
3503         ots->mem_coherent = 0;
3504         s->reg_to_temp[ots->reg] = ots;
3505         if (NEED_SYNC_ARG(0)) {
3506             temp_sync(s, ots, allocated_regs, 0, 0);
3507         }
3508     }
3509 }
3510 
3511 /*
3512  * Specialized code generation for INDEX_op_dup_vec.
3513  */
3514 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3515 {
3516     const TCGLifeData arg_life = op->life;
3517     TCGRegSet dup_out_regs, dup_in_regs;
3518     TCGTemp *its, *ots;
3519     TCGType itype, vtype;
3520     intptr_t endian_fixup;
3521     unsigned vece;
3522     bool ok;
3523 
3524     ots = arg_temp(op->args[0]);
3525     its = arg_temp(op->args[1]);
3526 
3527     /* ENV should not be modified.  */
3528     tcg_debug_assert(!ots->fixed_reg);
3529 
3530     itype = its->type;
3531     vece = TCGOP_VECE(op);
3532     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3533 
3534     if (its->val_type == TEMP_VAL_CONST) {
3535         /* Propagate constant via movi -> dupi.  */
3536         tcg_target_ulong val = its->val;
3537         if (IS_DEAD_ARG(1)) {
3538             temp_dead(s, its);
3539         }
3540         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3541         return;
3542     }
3543 
3544     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].regs;
3545     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].regs;
3546 
3547     /* Allocate the output register now.  */
3548     if (ots->val_type != TEMP_VAL_REG) {
3549         TCGRegSet allocated_regs = s->reserved_regs;
3550 
3551         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3552             /* Make sure to not spill the input register. */
3553             tcg_regset_set_reg(allocated_regs, its->reg);
3554         }
3555         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3556                                  op->output_pref[0], ots->indirect_base);
3557         ots->val_type = TEMP_VAL_REG;
3558         ots->mem_coherent = 0;
3559         s->reg_to_temp[ots->reg] = ots;
3560     }
3561 
3562     switch (its->val_type) {
3563     case TEMP_VAL_REG:
3564         /*
3565          * The dup constriaints must be broad, covering all possible VECE.
3566          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3567          * to fail, indicating that extra moves are required for that case.
3568          */
3569         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3570             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3571                 goto done;
3572             }
3573             /* Try again from memory or a vector input register.  */
3574         }
3575         if (!its->mem_coherent) {
3576             /*
3577              * The input register is not synced, and so an extra store
3578              * would be required to use memory.  Attempt an integer-vector
3579              * register move first.  We do not have a TCGRegSet for this.
3580              */
3581             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3582                 break;
3583             }
3584             /* Sync the temp back to its slot and load from there.  */
3585             temp_sync(s, its, s->reserved_regs, 0, 0);
3586         }
3587         /* fall through */
3588 
3589     case TEMP_VAL_MEM:
3590 #ifdef HOST_WORDS_BIGENDIAN
3591         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3592         endian_fixup -= 1 << vece;
3593 #else
3594         endian_fixup = 0;
3595 #endif
3596         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3597                              its->mem_offset + endian_fixup)) {
3598             goto done;
3599         }
3600         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3601         break;
3602 
3603     default:
3604         g_assert_not_reached();
3605     }
3606 
3607     /* We now have a vector input register, so dup must succeed. */
3608     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3609     tcg_debug_assert(ok);
3610 
3611  done:
3612     if (IS_DEAD_ARG(1)) {
3613         temp_dead(s, its);
3614     }
3615     if (NEED_SYNC_ARG(0)) {
3616         temp_sync(s, ots, s->reserved_regs, 0, 0);
3617     }
3618     if (IS_DEAD_ARG(0)) {
3619         temp_dead(s, ots);
3620     }
3621 }
3622 
3623 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3624 {
3625     const TCGLifeData arg_life = op->life;
3626     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3627     TCGRegSet i_allocated_regs;
3628     TCGRegSet o_allocated_regs;
3629     int i, k, nb_iargs, nb_oargs;
3630     TCGReg reg;
3631     TCGArg arg;
3632     const TCGArgConstraint *arg_ct;
3633     TCGTemp *ts;
3634     TCGArg new_args[TCG_MAX_OP_ARGS];
3635     int const_args[TCG_MAX_OP_ARGS];
3636 
3637     nb_oargs = def->nb_oargs;
3638     nb_iargs = def->nb_iargs;
3639 
3640     /* copy constants */
3641     memcpy(new_args + nb_oargs + nb_iargs,
3642            op->args + nb_oargs + nb_iargs,
3643            sizeof(TCGArg) * def->nb_cargs);
3644 
3645     i_allocated_regs = s->reserved_regs;
3646     o_allocated_regs = s->reserved_regs;
3647 
3648     /* satisfy input constraints */
3649     for (k = 0; k < nb_iargs; k++) {
3650         TCGRegSet i_preferred_regs, o_preferred_regs;
3651 
3652         i = def->args_ct[nb_oargs + k].sort_index;
3653         arg = op->args[i];
3654         arg_ct = &def->args_ct[i];
3655         ts = arg_temp(arg);
3656 
3657         if (ts->val_type == TEMP_VAL_CONST
3658             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3659             /* constant is OK for instruction */
3660             const_args[i] = 1;
3661             new_args[i] = ts->val;
3662             continue;
3663         }
3664 
3665         i_preferred_regs = o_preferred_regs = 0;
3666         if (arg_ct->ialias) {
3667             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3668             if (ts->fixed_reg) {
3669                 /* if fixed register, we must allocate a new register
3670                    if the alias is not the same register */
3671                 if (arg != op->args[arg_ct->alias_index]) {
3672                     goto allocate_in_reg;
3673                 }
3674             } else {
3675                 /* if the input is aliased to an output and if it is
3676                    not dead after the instruction, we must allocate
3677                    a new register and move it */
3678                 if (!IS_DEAD_ARG(i)) {
3679                     goto allocate_in_reg;
3680                 }
3681 
3682                 /* check if the current register has already been allocated
3683                    for another input aliased to an output */
3684                 if (ts->val_type == TEMP_VAL_REG) {
3685                     int k2, i2;
3686                     reg = ts->reg;
3687                     for (k2 = 0 ; k2 < k ; k2++) {
3688                         i2 = def->args_ct[nb_oargs + k2].sort_index;
3689                         if (def->args_ct[i2].ialias && reg == new_args[i2]) {
3690                             goto allocate_in_reg;
3691                         }
3692                     }
3693                 }
3694                 i_preferred_regs = o_preferred_regs;
3695             }
3696         }
3697 
3698         temp_load(s, ts, arg_ct->regs, i_allocated_regs, i_preferred_regs);
3699         reg = ts->reg;
3700 
3701         if (tcg_regset_test_reg(arg_ct->regs, reg)) {
3702             /* nothing to do : the constraint is satisfied */
3703         } else {
3704         allocate_in_reg:
3705             /* allocate a new register matching the constraint
3706                and move the temporary register into it */
3707             temp_load(s, ts, tcg_target_available_regs[ts->type],
3708                       i_allocated_regs, 0);
3709             reg = tcg_reg_alloc(s, arg_ct->regs, i_allocated_regs,
3710                                 o_preferred_regs, ts->indirect_base);
3711             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3712                 /*
3713                  * Cross register class move not supported.  Sync the
3714                  * temp back to its slot and load from there.
3715                  */
3716                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3717                 tcg_out_ld(s, ts->type, reg,
3718                            ts->mem_base->reg, ts->mem_offset);
3719             }
3720         }
3721         new_args[i] = reg;
3722         const_args[i] = 0;
3723         tcg_regset_set_reg(i_allocated_regs, reg);
3724     }
3725 
3726     /* mark dead temporaries and free the associated registers */
3727     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3728         if (IS_DEAD_ARG(i)) {
3729             temp_dead(s, arg_temp(op->args[i]));
3730         }
3731     }
3732 
3733     if (def->flags & TCG_OPF_BB_END) {
3734         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3735     } else {
3736         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3737             /* XXX: permit generic clobber register list ? */
3738             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3739                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3740                     tcg_reg_free(s, i, i_allocated_regs);
3741                 }
3742             }
3743         }
3744         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3745             /* sync globals if the op has side effects and might trigger
3746                an exception. */
3747             sync_globals(s, i_allocated_regs);
3748         }
3749 
3750         /* satisfy the output constraints */
3751         for(k = 0; k < nb_oargs; k++) {
3752             i = def->args_ct[k].sort_index;
3753             arg = op->args[i];
3754             arg_ct = &def->args_ct[i];
3755             ts = arg_temp(arg);
3756 
3757             /* ENV should not be modified.  */
3758             tcg_debug_assert(!ts->fixed_reg);
3759 
3760             if (arg_ct->oalias && !const_args[arg_ct->alias_index]) {
3761                 reg = new_args[arg_ct->alias_index];
3762             } else if (arg_ct->newreg) {
3763                 reg = tcg_reg_alloc(s, arg_ct->regs,
3764                                     i_allocated_regs | o_allocated_regs,
3765                                     op->output_pref[k], ts->indirect_base);
3766             } else {
3767                 reg = tcg_reg_alloc(s, arg_ct->regs, o_allocated_regs,
3768                                     op->output_pref[k], ts->indirect_base);
3769             }
3770             tcg_regset_set_reg(o_allocated_regs, reg);
3771             if (ts->val_type == TEMP_VAL_REG) {
3772                 s->reg_to_temp[ts->reg] = NULL;
3773             }
3774             ts->val_type = TEMP_VAL_REG;
3775             ts->reg = reg;
3776             /*
3777              * Temp value is modified, so the value kept in memory is
3778              * potentially not the same.
3779              */
3780             ts->mem_coherent = 0;
3781             s->reg_to_temp[reg] = ts;
3782             new_args[i] = reg;
3783         }
3784     }
3785 
3786     /* emit instruction */
3787     if (def->flags & TCG_OPF_VECTOR) {
3788         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3789                        new_args, const_args);
3790     } else {
3791         tcg_out_op(s, op->opc, new_args, const_args);
3792     }
3793 
3794     /* move the outputs in the correct register if needed */
3795     for(i = 0; i < nb_oargs; i++) {
3796         ts = arg_temp(op->args[i]);
3797 
3798         /* ENV should not be modified.  */
3799         tcg_debug_assert(!ts->fixed_reg);
3800 
3801         if (NEED_SYNC_ARG(i)) {
3802             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3803         } else if (IS_DEAD_ARG(i)) {
3804             temp_dead(s, ts);
3805         }
3806     }
3807 }
3808 
3809 #ifdef TCG_TARGET_STACK_GROWSUP
3810 #define STACK_DIR(x) (-(x))
3811 #else
3812 #define STACK_DIR(x) (x)
3813 #endif
3814 
3815 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3816 {
3817     const int nb_oargs = TCGOP_CALLO(op);
3818     const int nb_iargs = TCGOP_CALLI(op);
3819     const TCGLifeData arg_life = op->life;
3820     int flags, nb_regs, i;
3821     TCGReg reg;
3822     TCGArg arg;
3823     TCGTemp *ts;
3824     intptr_t stack_offset;
3825     size_t call_stack_size;
3826     tcg_insn_unit *func_addr;
3827     int allocate_args;
3828     TCGRegSet allocated_regs;
3829 
3830     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3831     flags = op->args[nb_oargs + nb_iargs + 1];
3832 
3833     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3834     if (nb_regs > nb_iargs) {
3835         nb_regs = nb_iargs;
3836     }
3837 
3838     /* assign stack slots first */
3839     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3840     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3841         ~(TCG_TARGET_STACK_ALIGN - 1);
3842     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3843     if (allocate_args) {
3844         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3845            preallocate call stack */
3846         tcg_abort();
3847     }
3848 
3849     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3850     for (i = nb_regs; i < nb_iargs; i++) {
3851         arg = op->args[nb_oargs + i];
3852 #ifdef TCG_TARGET_STACK_GROWSUP
3853         stack_offset -= sizeof(tcg_target_long);
3854 #endif
3855         if (arg != TCG_CALL_DUMMY_ARG) {
3856             ts = arg_temp(arg);
3857             temp_load(s, ts, tcg_target_available_regs[ts->type],
3858                       s->reserved_regs, 0);
3859             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3860         }
3861 #ifndef TCG_TARGET_STACK_GROWSUP
3862         stack_offset += sizeof(tcg_target_long);
3863 #endif
3864     }
3865 
3866     /* assign input registers */
3867     allocated_regs = s->reserved_regs;
3868     for (i = 0; i < nb_regs; i++) {
3869         arg = op->args[nb_oargs + i];
3870         if (arg != TCG_CALL_DUMMY_ARG) {
3871             ts = arg_temp(arg);
3872             reg = tcg_target_call_iarg_regs[i];
3873 
3874             if (ts->val_type == TEMP_VAL_REG) {
3875                 if (ts->reg != reg) {
3876                     tcg_reg_free(s, reg, allocated_regs);
3877                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3878                         /*
3879                          * Cross register class move not supported.  Sync the
3880                          * temp back to its slot and load from there.
3881                          */
3882                         temp_sync(s, ts, allocated_regs, 0, 0);
3883                         tcg_out_ld(s, ts->type, reg,
3884                                    ts->mem_base->reg, ts->mem_offset);
3885                     }
3886                 }
3887             } else {
3888                 TCGRegSet arg_set = 0;
3889 
3890                 tcg_reg_free(s, reg, allocated_regs);
3891                 tcg_regset_set_reg(arg_set, reg);
3892                 temp_load(s, ts, arg_set, allocated_regs, 0);
3893             }
3894 
3895             tcg_regset_set_reg(allocated_regs, reg);
3896         }
3897     }
3898 
3899     /* mark dead temporaries and free the associated registers */
3900     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3901         if (IS_DEAD_ARG(i)) {
3902             temp_dead(s, arg_temp(op->args[i]));
3903         }
3904     }
3905 
3906     /* clobber call registers */
3907     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3908         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3909             tcg_reg_free(s, i, allocated_regs);
3910         }
3911     }
3912 
3913     /* Save globals if they might be written by the helper, sync them if
3914        they might be read. */
3915     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3916         /* Nothing to do */
3917     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3918         sync_globals(s, allocated_regs);
3919     } else {
3920         save_globals(s, allocated_regs);
3921     }
3922 
3923     tcg_out_call(s, func_addr);
3924 
3925     /* assign output registers and emit moves if needed */
3926     for(i = 0; i < nb_oargs; i++) {
3927         arg = op->args[i];
3928         ts = arg_temp(arg);
3929 
3930         /* ENV should not be modified.  */
3931         tcg_debug_assert(!ts->fixed_reg);
3932 
3933         reg = tcg_target_call_oarg_regs[i];
3934         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3935         if (ts->val_type == TEMP_VAL_REG) {
3936             s->reg_to_temp[ts->reg] = NULL;
3937         }
3938         ts->val_type = TEMP_VAL_REG;
3939         ts->reg = reg;
3940         ts->mem_coherent = 0;
3941         s->reg_to_temp[reg] = ts;
3942         if (NEED_SYNC_ARG(i)) {
3943             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3944         } else if (IS_DEAD_ARG(i)) {
3945             temp_dead(s, ts);
3946         }
3947     }
3948 }
3949 
3950 #ifdef CONFIG_PROFILER
3951 
3952 /* avoid copy/paste errors */
3953 #define PROF_ADD(to, from, field)                       \
3954     do {                                                \
3955         (to)->field += qatomic_read(&((from)->field));  \
3956     } while (0)
3957 
3958 #define PROF_MAX(to, from, field)                                       \
3959     do {                                                                \
3960         typeof((from)->field) val__ = qatomic_read(&((from)->field));   \
3961         if (val__ > (to)->field) {                                      \
3962             (to)->field = val__;                                        \
3963         }                                                               \
3964     } while (0)
3965 
3966 /* Pass in a zero'ed @prof */
3967 static inline
3968 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3969 {
3970     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
3971     unsigned int i;
3972 
3973     for (i = 0; i < n_ctxs; i++) {
3974         TCGContext *s = qatomic_read(&tcg_ctxs[i]);
3975         const TCGProfile *orig = &s->prof;
3976 
3977         if (counters) {
3978             PROF_ADD(prof, orig, cpu_exec_time);
3979             PROF_ADD(prof, orig, tb_count1);
3980             PROF_ADD(prof, orig, tb_count);
3981             PROF_ADD(prof, orig, op_count);
3982             PROF_MAX(prof, orig, op_count_max);
3983             PROF_ADD(prof, orig, temp_count);
3984             PROF_MAX(prof, orig, temp_count_max);
3985             PROF_ADD(prof, orig, del_op_count);
3986             PROF_ADD(prof, orig, code_in_len);
3987             PROF_ADD(prof, orig, code_out_len);
3988             PROF_ADD(prof, orig, search_out_len);
3989             PROF_ADD(prof, orig, interm_time);
3990             PROF_ADD(prof, orig, code_time);
3991             PROF_ADD(prof, orig, la_time);
3992             PROF_ADD(prof, orig, opt_time);
3993             PROF_ADD(prof, orig, restore_count);
3994             PROF_ADD(prof, orig, restore_time);
3995         }
3996         if (table) {
3997             int i;
3998 
3999             for (i = 0; i < NB_OPS; i++) {
4000                 PROF_ADD(prof, orig, table_op_count[i]);
4001             }
4002         }
4003     }
4004 }
4005 
4006 #undef PROF_ADD
4007 #undef PROF_MAX
4008 
4009 static void tcg_profile_snapshot_counters(TCGProfile *prof)
4010 {
4011     tcg_profile_snapshot(prof, true, false);
4012 }
4013 
4014 static void tcg_profile_snapshot_table(TCGProfile *prof)
4015 {
4016     tcg_profile_snapshot(prof, false, true);
4017 }
4018 
4019 void tcg_dump_op_count(void)
4020 {
4021     TCGProfile prof = {};
4022     int i;
4023 
4024     tcg_profile_snapshot_table(&prof);
4025     for (i = 0; i < NB_OPS; i++) {
4026         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
4027                     prof.table_op_count[i]);
4028     }
4029 }
4030 
4031 int64_t tcg_cpu_exec_time(void)
4032 {
4033     unsigned int n_ctxs = qatomic_read(&n_tcg_ctxs);
4034     unsigned int i;
4035     int64_t ret = 0;
4036 
4037     for (i = 0; i < n_ctxs; i++) {
4038         const TCGContext *s = qatomic_read(&tcg_ctxs[i]);
4039         const TCGProfile *prof = &s->prof;
4040 
4041         ret += qatomic_read(&prof->cpu_exec_time);
4042     }
4043     return ret;
4044 }
4045 #else
4046 void tcg_dump_op_count(void)
4047 {
4048     qemu_printf("[TCG profiler not compiled]\n");
4049 }
4050 
4051 int64_t tcg_cpu_exec_time(void)
4052 {
4053     error_report("%s: TCG profiler not compiled", __func__);
4054     exit(EXIT_FAILURE);
4055 }
4056 #endif
4057 
4058 
4059 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4060 {
4061 #ifdef CONFIG_PROFILER
4062     TCGProfile *prof = &s->prof;
4063 #endif
4064     int i, num_insns;
4065     TCGOp *op;
4066 
4067 #ifdef CONFIG_PROFILER
4068     {
4069         int n = 0;
4070 
4071         QTAILQ_FOREACH(op, &s->ops, link) {
4072             n++;
4073         }
4074         qatomic_set(&prof->op_count, prof->op_count + n);
4075         if (n > prof->op_count_max) {
4076             qatomic_set(&prof->op_count_max, n);
4077         }
4078 
4079         n = s->nb_temps;
4080         qatomic_set(&prof->temp_count, prof->temp_count + n);
4081         if (n > prof->temp_count_max) {
4082             qatomic_set(&prof->temp_count_max, n);
4083         }
4084     }
4085 #endif
4086 
4087 #ifdef DEBUG_DISAS
4088     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4089                  && qemu_log_in_addr_range(tb->pc))) {
4090         FILE *logfile = qemu_log_lock();
4091         qemu_log("OP:\n");
4092         tcg_dump_ops(s, false);
4093         qemu_log("\n");
4094         qemu_log_unlock(logfile);
4095     }
4096 #endif
4097 
4098 #ifdef CONFIG_DEBUG_TCG
4099     /* Ensure all labels referenced have been emitted.  */
4100     {
4101         TCGLabel *l;
4102         bool error = false;
4103 
4104         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4105             if (unlikely(!l->present) && l->refs) {
4106                 qemu_log_mask(CPU_LOG_TB_OP,
4107                               "$L%d referenced but not present.\n", l->id);
4108                 error = true;
4109             }
4110         }
4111         assert(!error);
4112     }
4113 #endif
4114 
4115 #ifdef CONFIG_PROFILER
4116     qatomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4117 #endif
4118 
4119 #ifdef USE_TCG_OPTIMIZATIONS
4120     tcg_optimize(s);
4121 #endif
4122 
4123 #ifdef CONFIG_PROFILER
4124     qatomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4125     qatomic_set(&prof->la_time, prof->la_time - profile_getclock());
4126 #endif
4127 
4128     reachable_code_pass(s);
4129     liveness_pass_1(s);
4130 
4131     if (s->nb_indirects > 0) {
4132 #ifdef DEBUG_DISAS
4133         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4134                      && qemu_log_in_addr_range(tb->pc))) {
4135             FILE *logfile = qemu_log_lock();
4136             qemu_log("OP before indirect lowering:\n");
4137             tcg_dump_ops(s, false);
4138             qemu_log("\n");
4139             qemu_log_unlock(logfile);
4140         }
4141 #endif
4142         /* Replace indirect temps with direct temps.  */
4143         if (liveness_pass_2(s)) {
4144             /* If changes were made, re-run liveness.  */
4145             liveness_pass_1(s);
4146         }
4147     }
4148 
4149 #ifdef CONFIG_PROFILER
4150     qatomic_set(&prof->la_time, prof->la_time + profile_getclock());
4151 #endif
4152 
4153 #ifdef DEBUG_DISAS
4154     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4155                  && qemu_log_in_addr_range(tb->pc))) {
4156         FILE *logfile = qemu_log_lock();
4157         qemu_log("OP after optimization and liveness analysis:\n");
4158         tcg_dump_ops(s, true);
4159         qemu_log("\n");
4160         qemu_log_unlock(logfile);
4161     }
4162 #endif
4163 
4164     tcg_reg_alloc_start(s);
4165 
4166     s->code_buf = tb->tc.ptr;
4167     s->code_ptr = tb->tc.ptr;
4168 
4169 #ifdef TCG_TARGET_NEED_LDST_LABELS
4170     QSIMPLEQ_INIT(&s->ldst_labels);
4171 #endif
4172 #ifdef TCG_TARGET_NEED_POOL_LABELS
4173     s->pool_labels = NULL;
4174 #endif
4175 
4176     num_insns = -1;
4177     QTAILQ_FOREACH(op, &s->ops, link) {
4178         TCGOpcode opc = op->opc;
4179 
4180 #ifdef CONFIG_PROFILER
4181         qatomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4182 #endif
4183 
4184         switch (opc) {
4185         case INDEX_op_mov_i32:
4186         case INDEX_op_mov_i64:
4187         case INDEX_op_mov_vec:
4188             tcg_reg_alloc_mov(s, op);
4189             break;
4190         case INDEX_op_movi_i32:
4191         case INDEX_op_movi_i64:
4192         case INDEX_op_dupi_vec:
4193             tcg_reg_alloc_movi(s, op);
4194             break;
4195         case INDEX_op_dup_vec:
4196             tcg_reg_alloc_dup(s, op);
4197             break;
4198         case INDEX_op_insn_start:
4199             if (num_insns >= 0) {
4200                 size_t off = tcg_current_code_size(s);
4201                 s->gen_insn_end_off[num_insns] = off;
4202                 /* Assert that we do not overflow our stored offset.  */
4203                 assert(s->gen_insn_end_off[num_insns] == off);
4204             }
4205             num_insns++;
4206             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4207                 target_ulong a;
4208 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4209                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4210 #else
4211                 a = op->args[i];
4212 #endif
4213                 s->gen_insn_data[num_insns][i] = a;
4214             }
4215             break;
4216         case INDEX_op_discard:
4217             temp_dead(s, arg_temp(op->args[0]));
4218             break;
4219         case INDEX_op_set_label:
4220             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4221             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4222             break;
4223         case INDEX_op_call:
4224             tcg_reg_alloc_call(s, op);
4225             break;
4226         default:
4227             /* Sanity check that we've not introduced any unhandled opcodes. */
4228             tcg_debug_assert(tcg_op_supported(opc));
4229             /* Note: in order to speed up the code, it would be much
4230                faster to have specialized register allocator functions for
4231                some common argument patterns */
4232             tcg_reg_alloc_op(s, op);
4233             break;
4234         }
4235 #ifdef CONFIG_DEBUG_TCG
4236         check_regs(s);
4237 #endif
4238         /* Test for (pending) buffer overflow.  The assumption is that any
4239            one operation beginning below the high water mark cannot overrun
4240            the buffer completely.  Thus we can test for overflow after
4241            generating code without having to check during generation.  */
4242         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4243             return -1;
4244         }
4245         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4246         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4247             return -2;
4248         }
4249     }
4250     tcg_debug_assert(num_insns >= 0);
4251     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4252 
4253     /* Generate TB finalization at the end of block */
4254 #ifdef TCG_TARGET_NEED_LDST_LABELS
4255     i = tcg_out_ldst_finalize(s);
4256     if (i < 0) {
4257         return i;
4258     }
4259 #endif
4260 #ifdef TCG_TARGET_NEED_POOL_LABELS
4261     i = tcg_out_pool_finalize(s);
4262     if (i < 0) {
4263         return i;
4264     }
4265 #endif
4266     if (!tcg_resolve_relocs(s)) {
4267         return -2;
4268     }
4269 
4270     /* flush instruction cache */
4271     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4272 
4273     return tcg_current_code_size(s);
4274 }
4275 
4276 #ifdef CONFIG_PROFILER
4277 void tcg_dump_info(void)
4278 {
4279     TCGProfile prof = {};
4280     const TCGProfile *s;
4281     int64_t tb_count;
4282     int64_t tb_div_count;
4283     int64_t tot;
4284 
4285     tcg_profile_snapshot_counters(&prof);
4286     s = &prof;
4287     tb_count = s->tb_count;
4288     tb_div_count = tb_count ? tb_count : 1;
4289     tot = s->interm_time + s->code_time;
4290 
4291     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4292                 tot, tot / 2.4e9);
4293     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4294                 " %0.1f%%)\n",
4295                 tb_count, s->tb_count1 - tb_count,
4296                 (double)(s->tb_count1 - s->tb_count)
4297                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4298     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4299                 (double)s->op_count / tb_div_count, s->op_count_max);
4300     qemu_printf("deleted ops/TB      %0.2f\n",
4301                 (double)s->del_op_count / tb_div_count);
4302     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4303                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4304     qemu_printf("avg host code/TB    %0.1f\n",
4305                 (double)s->code_out_len / tb_div_count);
4306     qemu_printf("avg search data/TB  %0.1f\n",
4307                 (double)s->search_out_len / tb_div_count);
4308 
4309     qemu_printf("cycles/op           %0.1f\n",
4310                 s->op_count ? (double)tot / s->op_count : 0);
4311     qemu_printf("cycles/in byte      %0.1f\n",
4312                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4313     qemu_printf("cycles/out byte     %0.1f\n",
4314                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4315     qemu_printf("cycles/search byte     %0.1f\n",
4316                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4317     if (tot == 0) {
4318         tot = 1;
4319     }
4320     qemu_printf("  gen_interm time   %0.1f%%\n",
4321                 (double)s->interm_time / tot * 100.0);
4322     qemu_printf("  gen_code time     %0.1f%%\n",
4323                 (double)s->code_time / tot * 100.0);
4324     qemu_printf("optim./code time    %0.1f%%\n",
4325                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4326                 * 100.0);
4327     qemu_printf("liveness/code time  %0.1f%%\n",
4328                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4329     qemu_printf("cpu_restore count   %" PRId64 "\n",
4330                 s->restore_count);
4331     qemu_printf("  avg cycles        %0.1f\n",
4332                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4333 }
4334 #else
4335 void tcg_dump_info(void)
4336 {
4337     qemu_printf("[TCG profiler not compiled]\n");
4338 }
4339 #endif
4340 
4341 #ifdef ELF_HOST_MACHINE
4342 /* In order to use this feature, the backend needs to do three things:
4343 
4344    (1) Define ELF_HOST_MACHINE to indicate both what value to
4345        put into the ELF image and to indicate support for the feature.
4346 
4347    (2) Define tcg_register_jit.  This should create a buffer containing
4348        the contents of a .debug_frame section that describes the post-
4349        prologue unwind info for the tcg machine.
4350 
4351    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4352 */
4353 
4354 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4355 typedef enum {
4356     JIT_NOACTION = 0,
4357     JIT_REGISTER_FN,
4358     JIT_UNREGISTER_FN
4359 } jit_actions_t;
4360 
4361 struct jit_code_entry {
4362     struct jit_code_entry *next_entry;
4363     struct jit_code_entry *prev_entry;
4364     const void *symfile_addr;
4365     uint64_t symfile_size;
4366 };
4367 
4368 struct jit_descriptor {
4369     uint32_t version;
4370     uint32_t action_flag;
4371     struct jit_code_entry *relevant_entry;
4372     struct jit_code_entry *first_entry;
4373 };
4374 
4375 void __jit_debug_register_code(void) __attribute__((noinline));
4376 void __jit_debug_register_code(void)
4377 {
4378     asm("");
4379 }
4380 
4381 /* Must statically initialize the version, because GDB may check
4382    the version before we can set it.  */
4383 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4384 
4385 /* End GDB interface.  */
4386 
4387 static int find_string(const char *strtab, const char *str)
4388 {
4389     const char *p = strtab + 1;
4390 
4391     while (1) {
4392         if (strcmp(p, str) == 0) {
4393             return p - strtab;
4394         }
4395         p += strlen(p) + 1;
4396     }
4397 }
4398 
4399 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4400                                  const void *debug_frame,
4401                                  size_t debug_frame_size)
4402 {
4403     struct __attribute__((packed)) DebugInfo {
4404         uint32_t  len;
4405         uint16_t  version;
4406         uint32_t  abbrev;
4407         uint8_t   ptr_size;
4408         uint8_t   cu_die;
4409         uint16_t  cu_lang;
4410         uintptr_t cu_low_pc;
4411         uintptr_t cu_high_pc;
4412         uint8_t   fn_die;
4413         char      fn_name[16];
4414         uintptr_t fn_low_pc;
4415         uintptr_t fn_high_pc;
4416         uint8_t   cu_eoc;
4417     };
4418 
4419     struct ElfImage {
4420         ElfW(Ehdr) ehdr;
4421         ElfW(Phdr) phdr;
4422         ElfW(Shdr) shdr[7];
4423         ElfW(Sym)  sym[2];
4424         struct DebugInfo di;
4425         uint8_t    da[24];
4426         char       str[80];
4427     };
4428 
4429     struct ElfImage *img;
4430 
4431     static const struct ElfImage img_template = {
4432         .ehdr = {
4433             .e_ident[EI_MAG0] = ELFMAG0,
4434             .e_ident[EI_MAG1] = ELFMAG1,
4435             .e_ident[EI_MAG2] = ELFMAG2,
4436             .e_ident[EI_MAG3] = ELFMAG3,
4437             .e_ident[EI_CLASS] = ELF_CLASS,
4438             .e_ident[EI_DATA] = ELF_DATA,
4439             .e_ident[EI_VERSION] = EV_CURRENT,
4440             .e_type = ET_EXEC,
4441             .e_machine = ELF_HOST_MACHINE,
4442             .e_version = EV_CURRENT,
4443             .e_phoff = offsetof(struct ElfImage, phdr),
4444             .e_shoff = offsetof(struct ElfImage, shdr),
4445             .e_ehsize = sizeof(ElfW(Shdr)),
4446             .e_phentsize = sizeof(ElfW(Phdr)),
4447             .e_phnum = 1,
4448             .e_shentsize = sizeof(ElfW(Shdr)),
4449             .e_shnum = ARRAY_SIZE(img->shdr),
4450             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4451 #ifdef ELF_HOST_FLAGS
4452             .e_flags = ELF_HOST_FLAGS,
4453 #endif
4454 #ifdef ELF_OSABI
4455             .e_ident[EI_OSABI] = ELF_OSABI,
4456 #endif
4457         },
4458         .phdr = {
4459             .p_type = PT_LOAD,
4460             .p_flags = PF_X,
4461         },
4462         .shdr = {
4463             [0] = { .sh_type = SHT_NULL },
4464             /* Trick: The contents of code_gen_buffer are not present in
4465                this fake ELF file; that got allocated elsewhere.  Therefore
4466                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4467                will not look for contents.  We can record any address.  */
4468             [1] = { /* .text */
4469                 .sh_type = SHT_NOBITS,
4470                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4471             },
4472             [2] = { /* .debug_info */
4473                 .sh_type = SHT_PROGBITS,
4474                 .sh_offset = offsetof(struct ElfImage, di),
4475                 .sh_size = sizeof(struct DebugInfo),
4476             },
4477             [3] = { /* .debug_abbrev */
4478                 .sh_type = SHT_PROGBITS,
4479                 .sh_offset = offsetof(struct ElfImage, da),
4480                 .sh_size = sizeof(img->da),
4481             },
4482             [4] = { /* .debug_frame */
4483                 .sh_type = SHT_PROGBITS,
4484                 .sh_offset = sizeof(struct ElfImage),
4485             },
4486             [5] = { /* .symtab */
4487                 .sh_type = SHT_SYMTAB,
4488                 .sh_offset = offsetof(struct ElfImage, sym),
4489                 .sh_size = sizeof(img->sym),
4490                 .sh_info = 1,
4491                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4492                 .sh_entsize = sizeof(ElfW(Sym)),
4493             },
4494             [6] = { /* .strtab */
4495                 .sh_type = SHT_STRTAB,
4496                 .sh_offset = offsetof(struct ElfImage, str),
4497                 .sh_size = sizeof(img->str),
4498             }
4499         },
4500         .sym = {
4501             [1] = { /* code_gen_buffer */
4502                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4503                 .st_shndx = 1,
4504             }
4505         },
4506         .di = {
4507             .len = sizeof(struct DebugInfo) - 4,
4508             .version = 2,
4509             .ptr_size = sizeof(void *),
4510             .cu_die = 1,
4511             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4512             .fn_die = 2,
4513             .fn_name = "code_gen_buffer"
4514         },
4515         .da = {
4516             1,          /* abbrev number (the cu) */
4517             0x11, 1,    /* DW_TAG_compile_unit, has children */
4518             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4519             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4520             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4521             0, 0,       /* end of abbrev */
4522             2,          /* abbrev number (the fn) */
4523             0x2e, 0,    /* DW_TAG_subprogram, no children */
4524             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4525             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4526             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4527             0, 0,       /* end of abbrev */
4528             0           /* no more abbrev */
4529         },
4530         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4531                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4532     };
4533 
4534     /* We only need a single jit entry; statically allocate it.  */
4535     static struct jit_code_entry one_entry;
4536 
4537     uintptr_t buf = (uintptr_t)buf_ptr;
4538     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4539     DebugFrameHeader *dfh;
4540 
4541     img = g_malloc(img_size);
4542     *img = img_template;
4543 
4544     img->phdr.p_vaddr = buf;
4545     img->phdr.p_paddr = buf;
4546     img->phdr.p_memsz = buf_size;
4547 
4548     img->shdr[1].sh_name = find_string(img->str, ".text");
4549     img->shdr[1].sh_addr = buf;
4550     img->shdr[1].sh_size = buf_size;
4551 
4552     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4553     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4554 
4555     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4556     img->shdr[4].sh_size = debug_frame_size;
4557 
4558     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4559     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4560 
4561     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4562     img->sym[1].st_value = buf;
4563     img->sym[1].st_size = buf_size;
4564 
4565     img->di.cu_low_pc = buf;
4566     img->di.cu_high_pc = buf + buf_size;
4567     img->di.fn_low_pc = buf;
4568     img->di.fn_high_pc = buf + buf_size;
4569 
4570     dfh = (DebugFrameHeader *)(img + 1);
4571     memcpy(dfh, debug_frame, debug_frame_size);
4572     dfh->fde.func_start = buf;
4573     dfh->fde.func_len = buf_size;
4574 
4575 #ifdef DEBUG_JIT
4576     /* Enable this block to be able to debug the ELF image file creation.
4577        One can use readelf, objdump, or other inspection utilities.  */
4578     {
4579         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4580         if (f) {
4581             if (fwrite(img, img_size, 1, f) != img_size) {
4582                 /* Avoid stupid unused return value warning for fwrite.  */
4583             }
4584             fclose(f);
4585         }
4586     }
4587 #endif
4588 
4589     one_entry.symfile_addr = img;
4590     one_entry.symfile_size = img_size;
4591 
4592     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4593     __jit_debug_descriptor.relevant_entry = &one_entry;
4594     __jit_debug_descriptor.first_entry = &one_entry;
4595     __jit_debug_register_code();
4596 }
4597 #else
4598 /* No support for the feature.  Provide the entry point expected by exec.c,
4599    and implement the internal function we declared earlier.  */
4600 
4601 static void tcg_register_jit_int(void *buf, size_t size,
4602                                  const void *debug_frame,
4603                                  size_t debug_frame_size)
4604 {
4605 }
4606 
4607 void tcg_register_jit(void *buf, size_t buf_size)
4608 {
4609 }
4610 #endif /* ELF_HOST_MACHINE */
4611 
4612 #if !TCG_TARGET_MAYBE_vec
4613 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4614 {
4615     g_assert_not_reached();
4616 }
4617 #endif
4618