xref: /qemu/tcg/tcg.c (revision eae3eb3e)
1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/timer.h"
37 
38 /* Note: the long term plan is to reduce the dependencies on the QEMU
39    CPU definitions. Currently they are used for qemu_ld/st
40    instructions */
41 #define NO_CPU_IO_DEFS
42 #include "cpu.h"
43 
44 #include "exec/cpu-common.h"
45 #include "exec/exec-all.h"
46 
47 #include "tcg-op.h"
48 
49 #if UINTPTR_MAX == UINT32_MAX
50 # define ELF_CLASS  ELFCLASS32
51 #else
52 # define ELF_CLASS  ELFCLASS64
53 #endif
54 #ifdef HOST_WORDS_BIGENDIAN
55 # define ELF_DATA   ELFDATA2MSB
56 #else
57 # define ELF_DATA   ELFDATA2LSB
58 #endif
59 
60 #include "elf.h"
61 #include "exec/log.h"
62 #include "sysemu/sysemu.h"
63 
64 /* Forward declarations for functions declared in tcg-target.inc.c and
65    used here. */
66 static void tcg_target_init(TCGContext *s);
67 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
68 static void tcg_target_qemu_prologue(TCGContext *s);
69 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
70                         intptr_t value, intptr_t addend);
71 
72 /* The CIE and FDE header definitions will be common to all hosts.  */
73 typedef struct {
74     uint32_t len __attribute__((aligned((sizeof(void *)))));
75     uint32_t id;
76     uint8_t version;
77     char augmentation[1];
78     uint8_t code_align;
79     uint8_t data_align;
80     uint8_t return_column;
81 } DebugFrameCIE;
82 
83 typedef struct QEMU_PACKED {
84     uint32_t len __attribute__((aligned((sizeof(void *)))));
85     uint32_t cie_offset;
86     uintptr_t func_start;
87     uintptr_t func_len;
88 } DebugFrameFDEHeader;
89 
90 typedef struct QEMU_PACKED {
91     DebugFrameCIE cie;
92     DebugFrameFDEHeader fde;
93 } DebugFrameHeader;
94 
95 static void tcg_register_jit_int(void *buf, size_t size,
96                                  const void *debug_frame,
97                                  size_t debug_frame_size)
98     __attribute__((unused));
99 
100 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
101 static const char *target_parse_constraint(TCGArgConstraint *ct,
102                                            const char *ct_str, TCGType type);
103 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
104                        intptr_t arg2);
105 static void tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
106 static void tcg_out_movi(TCGContext *s, TCGType type,
107                          TCGReg ret, tcg_target_long arg);
108 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
109                        const int *const_args);
110 #if TCG_TARGET_MAYBE_vec
111 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
112                            unsigned vece, const TCGArg *args,
113                            const int *const_args);
114 #else
115 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
116                                   unsigned vece, const TCGArg *args,
117                                   const int *const_args)
118 {
119     g_assert_not_reached();
120 }
121 #endif
122 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
123                        intptr_t arg2);
124 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
125                         TCGReg base, intptr_t ofs);
126 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
127 static int tcg_target_const_match(tcg_target_long val, TCGType type,
128                                   const TCGArgConstraint *arg_ct);
129 #ifdef TCG_TARGET_NEED_LDST_LABELS
130 static bool tcg_out_ldst_finalize(TCGContext *s);
131 #endif
132 
133 #define TCG_HIGHWATER 1024
134 
135 static TCGContext **tcg_ctxs;
136 static unsigned int n_tcg_ctxs;
137 TCGv_env cpu_env = 0;
138 
139 struct tcg_region_tree {
140     QemuMutex lock;
141     GTree *tree;
142     /* padding to avoid false sharing is computed at run-time */
143 };
144 
145 /*
146  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
147  * dynamically allocate from as demand dictates. Given appropriate region
148  * sizing, this minimizes flushes even when some TCG threads generate a lot
149  * more code than others.
150  */
151 struct tcg_region_state {
152     QemuMutex lock;
153 
154     /* fields set at init time */
155     void *start;
156     void *start_aligned;
157     void *end;
158     size_t n;
159     size_t size; /* size of one region */
160     size_t stride; /* .size + guard size */
161 
162     /* fields protected by the lock */
163     size_t current; /* current region index */
164     size_t agg_size_full; /* aggregate size of full regions */
165 };
166 
167 static struct tcg_region_state region;
168 /*
169  * This is an array of struct tcg_region_tree's, with padding.
170  * We use void * to simplify the computation of region_trees[i]; each
171  * struct is found every tree_size bytes.
172  */
173 static void *region_trees;
174 static size_t tree_size;
175 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
176 static TCGRegSet tcg_target_call_clobber_regs;
177 
178 #if TCG_TARGET_INSN_UNIT_SIZE == 1
179 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
180 {
181     *s->code_ptr++ = v;
182 }
183 
184 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
185                                                       uint8_t v)
186 {
187     *p = v;
188 }
189 #endif
190 
191 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
192 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
193 {
194     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
195         *s->code_ptr++ = v;
196     } else {
197         tcg_insn_unit *p = s->code_ptr;
198         memcpy(p, &v, sizeof(v));
199         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
200     }
201 }
202 
203 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
204                                                        uint16_t v)
205 {
206     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
207         *p = v;
208     } else {
209         memcpy(p, &v, sizeof(v));
210     }
211 }
212 #endif
213 
214 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
215 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
216 {
217     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
218         *s->code_ptr++ = v;
219     } else {
220         tcg_insn_unit *p = s->code_ptr;
221         memcpy(p, &v, sizeof(v));
222         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
223     }
224 }
225 
226 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
227                                                        uint32_t v)
228 {
229     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
230         *p = v;
231     } else {
232         memcpy(p, &v, sizeof(v));
233     }
234 }
235 #endif
236 
237 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
238 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
239 {
240     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
241         *s->code_ptr++ = v;
242     } else {
243         tcg_insn_unit *p = s->code_ptr;
244         memcpy(p, &v, sizeof(v));
245         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
246     }
247 }
248 
249 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
250                                                        uint64_t v)
251 {
252     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
253         *p = v;
254     } else {
255         memcpy(p, &v, sizeof(v));
256     }
257 }
258 #endif
259 
260 /* label relocation processing */
261 
262 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
263                           TCGLabel *l, intptr_t addend)
264 {
265     TCGRelocation *r;
266 
267     if (l->has_value) {
268         /* FIXME: This may break relocations on RISC targets that
269            modify instruction fields in place.  The caller may not have
270            written the initial value.  */
271         bool ok = patch_reloc(code_ptr, type, l->u.value, addend);
272         tcg_debug_assert(ok);
273     } else {
274         /* add a new relocation entry */
275         r = tcg_malloc(sizeof(TCGRelocation));
276         r->type = type;
277         r->ptr = code_ptr;
278         r->addend = addend;
279         r->next = l->u.first_reloc;
280         l->u.first_reloc = r;
281     }
282 }
283 
284 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
285 {
286     intptr_t value = (intptr_t)ptr;
287     TCGRelocation *r;
288 
289     tcg_debug_assert(!l->has_value);
290 
291     for (r = l->u.first_reloc; r != NULL; r = r->next) {
292         bool ok = patch_reloc(r->ptr, r->type, value, r->addend);
293         tcg_debug_assert(ok);
294     }
295 
296     l->has_value = 1;
297     l->u.value_ptr = ptr;
298 }
299 
300 TCGLabel *gen_new_label(void)
301 {
302     TCGContext *s = tcg_ctx;
303     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
304 
305     *l = (TCGLabel){
306         .id = s->nb_labels++
307     };
308 
309     return l;
310 }
311 
312 static void set_jmp_reset_offset(TCGContext *s, int which)
313 {
314     size_t off = tcg_current_code_size(s);
315     s->tb_jmp_reset_offset[which] = off;
316     /* Make sure that we didn't overflow the stored offset.  */
317     assert(s->tb_jmp_reset_offset[which] == off);
318 }
319 
320 #include "tcg-target.inc.c"
321 
322 /* compare a pointer @ptr and a tb_tc @s */
323 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
324 {
325     if (ptr >= s->ptr + s->size) {
326         return 1;
327     } else if (ptr < s->ptr) {
328         return -1;
329     }
330     return 0;
331 }
332 
333 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
334 {
335     const struct tb_tc *a = ap;
336     const struct tb_tc *b = bp;
337 
338     /*
339      * When both sizes are set, we know this isn't a lookup.
340      * This is the most likely case: every TB must be inserted; lookups
341      * are a lot less frequent.
342      */
343     if (likely(a->size && b->size)) {
344         if (a->ptr > b->ptr) {
345             return 1;
346         } else if (a->ptr < b->ptr) {
347             return -1;
348         }
349         /* a->ptr == b->ptr should happen only on deletions */
350         g_assert(a->size == b->size);
351         return 0;
352     }
353     /*
354      * All lookups have either .size field set to 0.
355      * From the glib sources we see that @ap is always the lookup key. However
356      * the docs provide no guarantee, so we just mark this case as likely.
357      */
358     if (likely(a->size == 0)) {
359         return ptr_cmp_tb_tc(a->ptr, b);
360     }
361     return ptr_cmp_tb_tc(b->ptr, a);
362 }
363 
364 static void tcg_region_trees_init(void)
365 {
366     size_t i;
367 
368     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
369     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
370     for (i = 0; i < region.n; i++) {
371         struct tcg_region_tree *rt = region_trees + i * tree_size;
372 
373         qemu_mutex_init(&rt->lock);
374         rt->tree = g_tree_new(tb_tc_cmp);
375     }
376 }
377 
378 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
379 {
380     size_t region_idx;
381 
382     if (p < region.start_aligned) {
383         region_idx = 0;
384     } else {
385         ptrdiff_t offset = p - region.start_aligned;
386 
387         if (offset > region.stride * (region.n - 1)) {
388             region_idx = region.n - 1;
389         } else {
390             region_idx = offset / region.stride;
391         }
392     }
393     return region_trees + region_idx * tree_size;
394 }
395 
396 void tcg_tb_insert(TranslationBlock *tb)
397 {
398     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
399 
400     qemu_mutex_lock(&rt->lock);
401     g_tree_insert(rt->tree, &tb->tc, tb);
402     qemu_mutex_unlock(&rt->lock);
403 }
404 
405 void tcg_tb_remove(TranslationBlock *tb)
406 {
407     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
408 
409     qemu_mutex_lock(&rt->lock);
410     g_tree_remove(rt->tree, &tb->tc);
411     qemu_mutex_unlock(&rt->lock);
412 }
413 
414 /*
415  * Find the TB 'tb' such that
416  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
417  * Return NULL if not found.
418  */
419 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
420 {
421     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
422     TranslationBlock *tb;
423     struct tb_tc s = { .ptr = (void *)tc_ptr };
424 
425     qemu_mutex_lock(&rt->lock);
426     tb = g_tree_lookup(rt->tree, &s);
427     qemu_mutex_unlock(&rt->lock);
428     return tb;
429 }
430 
431 static void tcg_region_tree_lock_all(void)
432 {
433     size_t i;
434 
435     for (i = 0; i < region.n; i++) {
436         struct tcg_region_tree *rt = region_trees + i * tree_size;
437 
438         qemu_mutex_lock(&rt->lock);
439     }
440 }
441 
442 static void tcg_region_tree_unlock_all(void)
443 {
444     size_t i;
445 
446     for (i = 0; i < region.n; i++) {
447         struct tcg_region_tree *rt = region_trees + i * tree_size;
448 
449         qemu_mutex_unlock(&rt->lock);
450     }
451 }
452 
453 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
454 {
455     size_t i;
456 
457     tcg_region_tree_lock_all();
458     for (i = 0; i < region.n; i++) {
459         struct tcg_region_tree *rt = region_trees + i * tree_size;
460 
461         g_tree_foreach(rt->tree, func, user_data);
462     }
463     tcg_region_tree_unlock_all();
464 }
465 
466 size_t tcg_nb_tbs(void)
467 {
468     size_t nb_tbs = 0;
469     size_t i;
470 
471     tcg_region_tree_lock_all();
472     for (i = 0; i < region.n; i++) {
473         struct tcg_region_tree *rt = region_trees + i * tree_size;
474 
475         nb_tbs += g_tree_nnodes(rt->tree);
476     }
477     tcg_region_tree_unlock_all();
478     return nb_tbs;
479 }
480 
481 static void tcg_region_tree_reset_all(void)
482 {
483     size_t i;
484 
485     tcg_region_tree_lock_all();
486     for (i = 0; i < region.n; i++) {
487         struct tcg_region_tree *rt = region_trees + i * tree_size;
488 
489         /* Increment the refcount first so that destroy acts as a reset */
490         g_tree_ref(rt->tree);
491         g_tree_destroy(rt->tree);
492     }
493     tcg_region_tree_unlock_all();
494 }
495 
496 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
497 {
498     void *start, *end;
499 
500     start = region.start_aligned + curr_region * region.stride;
501     end = start + region.size;
502 
503     if (curr_region == 0) {
504         start = region.start;
505     }
506     if (curr_region == region.n - 1) {
507         end = region.end;
508     }
509 
510     *pstart = start;
511     *pend = end;
512 }
513 
514 static void tcg_region_assign(TCGContext *s, size_t curr_region)
515 {
516     void *start, *end;
517 
518     tcg_region_bounds(curr_region, &start, &end);
519 
520     s->code_gen_buffer = start;
521     s->code_gen_ptr = start;
522     s->code_gen_buffer_size = end - start;
523     s->code_gen_highwater = end - TCG_HIGHWATER;
524 }
525 
526 static bool tcg_region_alloc__locked(TCGContext *s)
527 {
528     if (region.current == region.n) {
529         return true;
530     }
531     tcg_region_assign(s, region.current);
532     region.current++;
533     return false;
534 }
535 
536 /*
537  * Request a new region once the one in use has filled up.
538  * Returns true on error.
539  */
540 static bool tcg_region_alloc(TCGContext *s)
541 {
542     bool err;
543     /* read the region size now; alloc__locked will overwrite it on success */
544     size_t size_full = s->code_gen_buffer_size;
545 
546     qemu_mutex_lock(&region.lock);
547     err = tcg_region_alloc__locked(s);
548     if (!err) {
549         region.agg_size_full += size_full - TCG_HIGHWATER;
550     }
551     qemu_mutex_unlock(&region.lock);
552     return err;
553 }
554 
555 /*
556  * Perform a context's first region allocation.
557  * This function does _not_ increment region.agg_size_full.
558  */
559 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
560 {
561     return tcg_region_alloc__locked(s);
562 }
563 
564 /* Call from a safe-work context */
565 void tcg_region_reset_all(void)
566 {
567     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
568     unsigned int i;
569 
570     qemu_mutex_lock(&region.lock);
571     region.current = 0;
572     region.agg_size_full = 0;
573 
574     for (i = 0; i < n_ctxs; i++) {
575         TCGContext *s = atomic_read(&tcg_ctxs[i]);
576         bool err = tcg_region_initial_alloc__locked(s);
577 
578         g_assert(!err);
579     }
580     qemu_mutex_unlock(&region.lock);
581 
582     tcg_region_tree_reset_all();
583 }
584 
585 #ifdef CONFIG_USER_ONLY
586 static size_t tcg_n_regions(void)
587 {
588     return 1;
589 }
590 #else
591 /*
592  * It is likely that some vCPUs will translate more code than others, so we
593  * first try to set more regions than max_cpus, with those regions being of
594  * reasonable size. If that's not possible we make do by evenly dividing
595  * the code_gen_buffer among the vCPUs.
596  */
597 static size_t tcg_n_regions(void)
598 {
599     size_t i;
600 
601     /* Use a single region if all we have is one vCPU thread */
602     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
603         return 1;
604     }
605 
606     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
607     for (i = 8; i > 0; i--) {
608         size_t regions_per_thread = i;
609         size_t region_size;
610 
611         region_size = tcg_init_ctx.code_gen_buffer_size;
612         region_size /= max_cpus * regions_per_thread;
613 
614         if (region_size >= 2 * 1024u * 1024) {
615             return max_cpus * regions_per_thread;
616         }
617     }
618     /* If we can't, then just allocate one region per vCPU thread */
619     return max_cpus;
620 }
621 #endif
622 
623 /*
624  * Initializes region partitioning.
625  *
626  * Called at init time from the parent thread (i.e. the one calling
627  * tcg_context_init), after the target's TCG globals have been set.
628  *
629  * Region partitioning works by splitting code_gen_buffer into separate regions,
630  * and then assigning regions to TCG threads so that the threads can translate
631  * code in parallel without synchronization.
632  *
633  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
634  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
635  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
636  * must have been parsed before calling this function, since it calls
637  * qemu_tcg_mttcg_enabled().
638  *
639  * In user-mode we use a single region.  Having multiple regions in user-mode
640  * is not supported, because the number of vCPU threads (recall that each thread
641  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
642  * OS, and usually this number is huge (tens of thousands is not uncommon).
643  * Thus, given this large bound on the number of vCPU threads and the fact
644  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
645  * that the availability of at least one region per vCPU thread.
646  *
647  * However, this user-mode limitation is unlikely to be a significant problem
648  * in practice. Multi-threaded guests share most if not all of their translated
649  * code, which makes parallel code generation less appealing than in softmmu.
650  */
651 void tcg_region_init(void)
652 {
653     void *buf = tcg_init_ctx.code_gen_buffer;
654     void *aligned;
655     size_t size = tcg_init_ctx.code_gen_buffer_size;
656     size_t page_size = qemu_real_host_page_size;
657     size_t region_size;
658     size_t n_regions;
659     size_t i;
660 
661     n_regions = tcg_n_regions();
662 
663     /* The first region will be 'aligned - buf' bytes larger than the others */
664     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
665     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
666     /*
667      * Make region_size a multiple of page_size, using aligned as the start.
668      * As a result of this we might end up with a few extra pages at the end of
669      * the buffer; we will assign those to the last region.
670      */
671     region_size = (size - (aligned - buf)) / n_regions;
672     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
673 
674     /* A region must have at least 2 pages; one code, one guard */
675     g_assert(region_size >= 2 * page_size);
676 
677     /* init the region struct */
678     qemu_mutex_init(&region.lock);
679     region.n = n_regions;
680     region.size = region_size - page_size;
681     region.stride = region_size;
682     region.start = buf;
683     region.start_aligned = aligned;
684     /* page-align the end, since its last page will be a guard page */
685     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
686     /* account for that last guard page */
687     region.end -= page_size;
688 
689     /* set guard pages */
690     for (i = 0; i < region.n; i++) {
691         void *start, *end;
692         int rc;
693 
694         tcg_region_bounds(i, &start, &end);
695         rc = qemu_mprotect_none(end, page_size);
696         g_assert(!rc);
697     }
698 
699     tcg_region_trees_init();
700 
701     /* In user-mode we support only one ctx, so do the initial allocation now */
702 #ifdef CONFIG_USER_ONLY
703     {
704         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
705 
706         g_assert(!err);
707     }
708 #endif
709 }
710 
711 /*
712  * All TCG threads except the parent (i.e. the one that called tcg_context_init
713  * and registered the target's TCG globals) must register with this function
714  * before initiating translation.
715  *
716  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
717  * of tcg_region_init() for the reasoning behind this.
718  *
719  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
720  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
721  * is not used anymore for translation once this function is called.
722  *
723  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
724  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
725  */
726 #ifdef CONFIG_USER_ONLY
727 void tcg_register_thread(void)
728 {
729     tcg_ctx = &tcg_init_ctx;
730 }
731 #else
732 void tcg_register_thread(void)
733 {
734     TCGContext *s = g_malloc(sizeof(*s));
735     unsigned int i, n;
736     bool err;
737 
738     *s = tcg_init_ctx;
739 
740     /* Relink mem_base.  */
741     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
742         if (tcg_init_ctx.temps[i].mem_base) {
743             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
744             tcg_debug_assert(b >= 0 && b < n);
745             s->temps[i].mem_base = &s->temps[b];
746         }
747     }
748 
749     /* Claim an entry in tcg_ctxs */
750     n = atomic_fetch_inc(&n_tcg_ctxs);
751     g_assert(n < max_cpus);
752     atomic_set(&tcg_ctxs[n], s);
753 
754     tcg_ctx = s;
755     qemu_mutex_lock(&region.lock);
756     err = tcg_region_initial_alloc__locked(tcg_ctx);
757     g_assert(!err);
758     qemu_mutex_unlock(&region.lock);
759 }
760 #endif /* !CONFIG_USER_ONLY */
761 
762 /*
763  * Returns the size (in bytes) of all translated code (i.e. from all regions)
764  * currently in the cache.
765  * See also: tcg_code_capacity()
766  * Do not confuse with tcg_current_code_size(); that one applies to a single
767  * TCG context.
768  */
769 size_t tcg_code_size(void)
770 {
771     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
772     unsigned int i;
773     size_t total;
774 
775     qemu_mutex_lock(&region.lock);
776     total = region.agg_size_full;
777     for (i = 0; i < n_ctxs; i++) {
778         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
779         size_t size;
780 
781         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
782         g_assert(size <= s->code_gen_buffer_size);
783         total += size;
784     }
785     qemu_mutex_unlock(&region.lock);
786     return total;
787 }
788 
789 /*
790  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
791  * regions.
792  * See also: tcg_code_size()
793  */
794 size_t tcg_code_capacity(void)
795 {
796     size_t guard_size, capacity;
797 
798     /* no need for synchronization; these variables are set at init time */
799     guard_size = region.stride - region.size;
800     capacity = region.end + guard_size - region.start;
801     capacity -= region.n * (guard_size + TCG_HIGHWATER);
802     return capacity;
803 }
804 
805 size_t tcg_tb_phys_invalidate_count(void)
806 {
807     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
808     unsigned int i;
809     size_t total = 0;
810 
811     for (i = 0; i < n_ctxs; i++) {
812         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
813 
814         total += atomic_read(&s->tb_phys_invalidate_count);
815     }
816     return total;
817 }
818 
819 /* pool based memory allocation */
820 void *tcg_malloc_internal(TCGContext *s, int size)
821 {
822     TCGPool *p;
823     int pool_size;
824 
825     if (size > TCG_POOL_CHUNK_SIZE) {
826         /* big malloc: insert a new pool (XXX: could optimize) */
827         p = g_malloc(sizeof(TCGPool) + size);
828         p->size = size;
829         p->next = s->pool_first_large;
830         s->pool_first_large = p;
831         return p->data;
832     } else {
833         p = s->pool_current;
834         if (!p) {
835             p = s->pool_first;
836             if (!p)
837                 goto new_pool;
838         } else {
839             if (!p->next) {
840             new_pool:
841                 pool_size = TCG_POOL_CHUNK_SIZE;
842                 p = g_malloc(sizeof(TCGPool) + pool_size);
843                 p->size = pool_size;
844                 p->next = NULL;
845                 if (s->pool_current)
846                     s->pool_current->next = p;
847                 else
848                     s->pool_first = p;
849             } else {
850                 p = p->next;
851             }
852         }
853     }
854     s->pool_current = p;
855     s->pool_cur = p->data + size;
856     s->pool_end = p->data + p->size;
857     return p->data;
858 }
859 
860 void tcg_pool_reset(TCGContext *s)
861 {
862     TCGPool *p, *t;
863     for (p = s->pool_first_large; p; p = t) {
864         t = p->next;
865         g_free(p);
866     }
867     s->pool_first_large = NULL;
868     s->pool_cur = s->pool_end = NULL;
869     s->pool_current = NULL;
870 }
871 
872 typedef struct TCGHelperInfo {
873     void *func;
874     const char *name;
875     unsigned flags;
876     unsigned sizemask;
877 } TCGHelperInfo;
878 
879 #include "exec/helper-proto.h"
880 
881 static const TCGHelperInfo all_helpers[] = {
882 #include "exec/helper-tcg.h"
883 };
884 static GHashTable *helper_table;
885 
886 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
887 static void process_op_defs(TCGContext *s);
888 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
889                                             TCGReg reg, const char *name);
890 
891 void tcg_context_init(TCGContext *s)
892 {
893     int op, total_args, n, i;
894     TCGOpDef *def;
895     TCGArgConstraint *args_ct;
896     int *sorted_args;
897     TCGTemp *ts;
898 
899     memset(s, 0, sizeof(*s));
900     s->nb_globals = 0;
901 
902     /* Count total number of arguments and allocate the corresponding
903        space */
904     total_args = 0;
905     for(op = 0; op < NB_OPS; op++) {
906         def = &tcg_op_defs[op];
907         n = def->nb_iargs + def->nb_oargs;
908         total_args += n;
909     }
910 
911     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
912     sorted_args = g_malloc(sizeof(int) * total_args);
913 
914     for(op = 0; op < NB_OPS; op++) {
915         def = &tcg_op_defs[op];
916         def->args_ct = args_ct;
917         def->sorted_args = sorted_args;
918         n = def->nb_iargs + def->nb_oargs;
919         sorted_args += n;
920         args_ct += n;
921     }
922 
923     /* Register helpers.  */
924     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
925     helper_table = g_hash_table_new(NULL, NULL);
926 
927     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
928         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
929                             (gpointer)&all_helpers[i]);
930     }
931 
932     tcg_target_init(s);
933     process_op_defs(s);
934 
935     /* Reverse the order of the saved registers, assuming they're all at
936        the start of tcg_target_reg_alloc_order.  */
937     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
938         int r = tcg_target_reg_alloc_order[n];
939         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
940             break;
941         }
942     }
943     for (i = 0; i < n; ++i) {
944         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
945     }
946     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
947         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
948     }
949 
950     tcg_ctx = s;
951     /*
952      * In user-mode we simply share the init context among threads, since we
953      * use a single region. See the documentation tcg_region_init() for the
954      * reasoning behind this.
955      * In softmmu we will have at most max_cpus TCG threads.
956      */
957 #ifdef CONFIG_USER_ONLY
958     tcg_ctxs = &tcg_ctx;
959     n_tcg_ctxs = 1;
960 #else
961     tcg_ctxs = g_new(TCGContext *, max_cpus);
962 #endif
963 
964     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
965     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
966     cpu_env = temp_tcgv_ptr(ts);
967 }
968 
969 /*
970  * Allocate TBs right before their corresponding translated code, making
971  * sure that TBs and code are on different cache lines.
972  */
973 TranslationBlock *tcg_tb_alloc(TCGContext *s)
974 {
975     uintptr_t align = qemu_icache_linesize;
976     TranslationBlock *tb;
977     void *next;
978 
979  retry:
980     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
981     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
982 
983     if (unlikely(next > s->code_gen_highwater)) {
984         if (tcg_region_alloc(s)) {
985             return NULL;
986         }
987         goto retry;
988     }
989     atomic_set(&s->code_gen_ptr, next);
990     s->data_gen_ptr = NULL;
991     return tb;
992 }
993 
994 void tcg_prologue_init(TCGContext *s)
995 {
996     size_t prologue_size, total_size;
997     void *buf0, *buf1;
998 
999     /* Put the prologue at the beginning of code_gen_buffer.  */
1000     buf0 = s->code_gen_buffer;
1001     total_size = s->code_gen_buffer_size;
1002     s->code_ptr = buf0;
1003     s->code_buf = buf0;
1004     s->data_gen_ptr = NULL;
1005     s->code_gen_prologue = buf0;
1006 
1007     /* Compute a high-water mark, at which we voluntarily flush the buffer
1008        and start over.  The size here is arbitrary, significantly larger
1009        than we expect the code generation for any one opcode to require.  */
1010     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1011 
1012 #ifdef TCG_TARGET_NEED_POOL_LABELS
1013     s->pool_labels = NULL;
1014 #endif
1015 
1016     /* Generate the prologue.  */
1017     tcg_target_qemu_prologue(s);
1018 
1019 #ifdef TCG_TARGET_NEED_POOL_LABELS
1020     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1021     {
1022         bool ok = tcg_out_pool_finalize(s);
1023         tcg_debug_assert(ok);
1024     }
1025 #endif
1026 
1027     buf1 = s->code_ptr;
1028     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1029 
1030     /* Deduct the prologue from the buffer.  */
1031     prologue_size = tcg_current_code_size(s);
1032     s->code_gen_ptr = buf1;
1033     s->code_gen_buffer = buf1;
1034     s->code_buf = buf1;
1035     total_size -= prologue_size;
1036     s->code_gen_buffer_size = total_size;
1037 
1038     tcg_register_jit(s->code_gen_buffer, total_size);
1039 
1040 #ifdef DEBUG_DISAS
1041     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1042         qemu_log_lock();
1043         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1044         if (s->data_gen_ptr) {
1045             size_t code_size = s->data_gen_ptr - buf0;
1046             size_t data_size = prologue_size - code_size;
1047             size_t i;
1048 
1049             log_disas(buf0, code_size);
1050 
1051             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1052                 if (sizeof(tcg_target_ulong) == 8) {
1053                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1054                              (uintptr_t)s->data_gen_ptr + i,
1055                              *(uint64_t *)(s->data_gen_ptr + i));
1056                 } else {
1057                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1058                              (uintptr_t)s->data_gen_ptr + i,
1059                              *(uint32_t *)(s->data_gen_ptr + i));
1060                 }
1061             }
1062         } else {
1063             log_disas(buf0, prologue_size);
1064         }
1065         qemu_log("\n");
1066         qemu_log_flush();
1067         qemu_log_unlock();
1068     }
1069 #endif
1070 
1071     /* Assert that goto_ptr is implemented completely.  */
1072     if (TCG_TARGET_HAS_goto_ptr) {
1073         tcg_debug_assert(s->code_gen_epilogue != NULL);
1074     }
1075 }
1076 
1077 void tcg_func_start(TCGContext *s)
1078 {
1079     tcg_pool_reset(s);
1080     s->nb_temps = s->nb_globals;
1081 
1082     /* No temps have been previously allocated for size or locality.  */
1083     memset(s->free_temps, 0, sizeof(s->free_temps));
1084 
1085     s->nb_ops = 0;
1086     s->nb_labels = 0;
1087     s->current_frame_offset = s->frame_start;
1088 
1089 #ifdef CONFIG_DEBUG_TCG
1090     s->goto_tb_issue_mask = 0;
1091 #endif
1092 
1093     QTAILQ_INIT(&s->ops);
1094     QTAILQ_INIT(&s->free_ops);
1095 }
1096 
1097 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1098 {
1099     int n = s->nb_temps++;
1100     tcg_debug_assert(n < TCG_MAX_TEMPS);
1101     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1102 }
1103 
1104 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1105 {
1106     TCGTemp *ts;
1107 
1108     tcg_debug_assert(s->nb_globals == s->nb_temps);
1109     s->nb_globals++;
1110     ts = tcg_temp_alloc(s);
1111     ts->temp_global = 1;
1112 
1113     return ts;
1114 }
1115 
1116 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1117                                             TCGReg reg, const char *name)
1118 {
1119     TCGTemp *ts;
1120 
1121     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1122         tcg_abort();
1123     }
1124 
1125     ts = tcg_global_alloc(s);
1126     ts->base_type = type;
1127     ts->type = type;
1128     ts->fixed_reg = 1;
1129     ts->reg = reg;
1130     ts->name = name;
1131     tcg_regset_set_reg(s->reserved_regs, reg);
1132 
1133     return ts;
1134 }
1135 
1136 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1137 {
1138     s->frame_start = start;
1139     s->frame_end = start + size;
1140     s->frame_temp
1141         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1142 }
1143 
1144 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1145                                      intptr_t offset, const char *name)
1146 {
1147     TCGContext *s = tcg_ctx;
1148     TCGTemp *base_ts = tcgv_ptr_temp(base);
1149     TCGTemp *ts = tcg_global_alloc(s);
1150     int indirect_reg = 0, bigendian = 0;
1151 #ifdef HOST_WORDS_BIGENDIAN
1152     bigendian = 1;
1153 #endif
1154 
1155     if (!base_ts->fixed_reg) {
1156         /* We do not support double-indirect registers.  */
1157         tcg_debug_assert(!base_ts->indirect_reg);
1158         base_ts->indirect_base = 1;
1159         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1160                             ? 2 : 1);
1161         indirect_reg = 1;
1162     }
1163 
1164     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1165         TCGTemp *ts2 = tcg_global_alloc(s);
1166         char buf[64];
1167 
1168         ts->base_type = TCG_TYPE_I64;
1169         ts->type = TCG_TYPE_I32;
1170         ts->indirect_reg = indirect_reg;
1171         ts->mem_allocated = 1;
1172         ts->mem_base = base_ts;
1173         ts->mem_offset = offset + bigendian * 4;
1174         pstrcpy(buf, sizeof(buf), name);
1175         pstrcat(buf, sizeof(buf), "_0");
1176         ts->name = strdup(buf);
1177 
1178         tcg_debug_assert(ts2 == ts + 1);
1179         ts2->base_type = TCG_TYPE_I64;
1180         ts2->type = TCG_TYPE_I32;
1181         ts2->indirect_reg = indirect_reg;
1182         ts2->mem_allocated = 1;
1183         ts2->mem_base = base_ts;
1184         ts2->mem_offset = offset + (1 - bigendian) * 4;
1185         pstrcpy(buf, sizeof(buf), name);
1186         pstrcat(buf, sizeof(buf), "_1");
1187         ts2->name = strdup(buf);
1188     } else {
1189         ts->base_type = type;
1190         ts->type = type;
1191         ts->indirect_reg = indirect_reg;
1192         ts->mem_allocated = 1;
1193         ts->mem_base = base_ts;
1194         ts->mem_offset = offset;
1195         ts->name = name;
1196     }
1197     return ts;
1198 }
1199 
1200 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1201 {
1202     TCGContext *s = tcg_ctx;
1203     TCGTemp *ts;
1204     int idx, k;
1205 
1206     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1207     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1208     if (idx < TCG_MAX_TEMPS) {
1209         /* There is already an available temp with the right type.  */
1210         clear_bit(idx, s->free_temps[k].l);
1211 
1212         ts = &s->temps[idx];
1213         ts->temp_allocated = 1;
1214         tcg_debug_assert(ts->base_type == type);
1215         tcg_debug_assert(ts->temp_local == temp_local);
1216     } else {
1217         ts = tcg_temp_alloc(s);
1218         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1219             TCGTemp *ts2 = tcg_temp_alloc(s);
1220 
1221             ts->base_type = type;
1222             ts->type = TCG_TYPE_I32;
1223             ts->temp_allocated = 1;
1224             ts->temp_local = temp_local;
1225 
1226             tcg_debug_assert(ts2 == ts + 1);
1227             ts2->base_type = TCG_TYPE_I64;
1228             ts2->type = TCG_TYPE_I32;
1229             ts2->temp_allocated = 1;
1230             ts2->temp_local = temp_local;
1231         } else {
1232             ts->base_type = type;
1233             ts->type = type;
1234             ts->temp_allocated = 1;
1235             ts->temp_local = temp_local;
1236         }
1237     }
1238 
1239 #if defined(CONFIG_DEBUG_TCG)
1240     s->temps_in_use++;
1241 #endif
1242     return ts;
1243 }
1244 
1245 TCGv_vec tcg_temp_new_vec(TCGType type)
1246 {
1247     TCGTemp *t;
1248 
1249 #ifdef CONFIG_DEBUG_TCG
1250     switch (type) {
1251     case TCG_TYPE_V64:
1252         assert(TCG_TARGET_HAS_v64);
1253         break;
1254     case TCG_TYPE_V128:
1255         assert(TCG_TARGET_HAS_v128);
1256         break;
1257     case TCG_TYPE_V256:
1258         assert(TCG_TARGET_HAS_v256);
1259         break;
1260     default:
1261         g_assert_not_reached();
1262     }
1263 #endif
1264 
1265     t = tcg_temp_new_internal(type, 0);
1266     return temp_tcgv_vec(t);
1267 }
1268 
1269 /* Create a new temp of the same type as an existing temp.  */
1270 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1271 {
1272     TCGTemp *t = tcgv_vec_temp(match);
1273 
1274     tcg_debug_assert(t->temp_allocated != 0);
1275 
1276     t = tcg_temp_new_internal(t->base_type, 0);
1277     return temp_tcgv_vec(t);
1278 }
1279 
1280 void tcg_temp_free_internal(TCGTemp *ts)
1281 {
1282     TCGContext *s = tcg_ctx;
1283     int k, idx;
1284 
1285 #if defined(CONFIG_DEBUG_TCG)
1286     s->temps_in_use--;
1287     if (s->temps_in_use < 0) {
1288         fprintf(stderr, "More temporaries freed than allocated!\n");
1289     }
1290 #endif
1291 
1292     tcg_debug_assert(ts->temp_global == 0);
1293     tcg_debug_assert(ts->temp_allocated != 0);
1294     ts->temp_allocated = 0;
1295 
1296     idx = temp_idx(ts);
1297     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1298     set_bit(idx, s->free_temps[k].l);
1299 }
1300 
1301 TCGv_i32 tcg_const_i32(int32_t val)
1302 {
1303     TCGv_i32 t0;
1304     t0 = tcg_temp_new_i32();
1305     tcg_gen_movi_i32(t0, val);
1306     return t0;
1307 }
1308 
1309 TCGv_i64 tcg_const_i64(int64_t val)
1310 {
1311     TCGv_i64 t0;
1312     t0 = tcg_temp_new_i64();
1313     tcg_gen_movi_i64(t0, val);
1314     return t0;
1315 }
1316 
1317 TCGv_i32 tcg_const_local_i32(int32_t val)
1318 {
1319     TCGv_i32 t0;
1320     t0 = tcg_temp_local_new_i32();
1321     tcg_gen_movi_i32(t0, val);
1322     return t0;
1323 }
1324 
1325 TCGv_i64 tcg_const_local_i64(int64_t val)
1326 {
1327     TCGv_i64 t0;
1328     t0 = tcg_temp_local_new_i64();
1329     tcg_gen_movi_i64(t0, val);
1330     return t0;
1331 }
1332 
1333 #if defined(CONFIG_DEBUG_TCG)
1334 void tcg_clear_temp_count(void)
1335 {
1336     TCGContext *s = tcg_ctx;
1337     s->temps_in_use = 0;
1338 }
1339 
1340 int tcg_check_temp_count(void)
1341 {
1342     TCGContext *s = tcg_ctx;
1343     if (s->temps_in_use) {
1344         /* Clear the count so that we don't give another
1345          * warning immediately next time around.
1346          */
1347         s->temps_in_use = 0;
1348         return 1;
1349     }
1350     return 0;
1351 }
1352 #endif
1353 
1354 /* Return true if OP may appear in the opcode stream.
1355    Test the runtime variable that controls each opcode.  */
1356 bool tcg_op_supported(TCGOpcode op)
1357 {
1358     const bool have_vec
1359         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1360 
1361     switch (op) {
1362     case INDEX_op_discard:
1363     case INDEX_op_set_label:
1364     case INDEX_op_call:
1365     case INDEX_op_br:
1366     case INDEX_op_mb:
1367     case INDEX_op_insn_start:
1368     case INDEX_op_exit_tb:
1369     case INDEX_op_goto_tb:
1370     case INDEX_op_qemu_ld_i32:
1371     case INDEX_op_qemu_st_i32:
1372     case INDEX_op_qemu_ld_i64:
1373     case INDEX_op_qemu_st_i64:
1374         return true;
1375 
1376     case INDEX_op_goto_ptr:
1377         return TCG_TARGET_HAS_goto_ptr;
1378 
1379     case INDEX_op_mov_i32:
1380     case INDEX_op_movi_i32:
1381     case INDEX_op_setcond_i32:
1382     case INDEX_op_brcond_i32:
1383     case INDEX_op_ld8u_i32:
1384     case INDEX_op_ld8s_i32:
1385     case INDEX_op_ld16u_i32:
1386     case INDEX_op_ld16s_i32:
1387     case INDEX_op_ld_i32:
1388     case INDEX_op_st8_i32:
1389     case INDEX_op_st16_i32:
1390     case INDEX_op_st_i32:
1391     case INDEX_op_add_i32:
1392     case INDEX_op_sub_i32:
1393     case INDEX_op_mul_i32:
1394     case INDEX_op_and_i32:
1395     case INDEX_op_or_i32:
1396     case INDEX_op_xor_i32:
1397     case INDEX_op_shl_i32:
1398     case INDEX_op_shr_i32:
1399     case INDEX_op_sar_i32:
1400         return true;
1401 
1402     case INDEX_op_movcond_i32:
1403         return TCG_TARGET_HAS_movcond_i32;
1404     case INDEX_op_div_i32:
1405     case INDEX_op_divu_i32:
1406         return TCG_TARGET_HAS_div_i32;
1407     case INDEX_op_rem_i32:
1408     case INDEX_op_remu_i32:
1409         return TCG_TARGET_HAS_rem_i32;
1410     case INDEX_op_div2_i32:
1411     case INDEX_op_divu2_i32:
1412         return TCG_TARGET_HAS_div2_i32;
1413     case INDEX_op_rotl_i32:
1414     case INDEX_op_rotr_i32:
1415         return TCG_TARGET_HAS_rot_i32;
1416     case INDEX_op_deposit_i32:
1417         return TCG_TARGET_HAS_deposit_i32;
1418     case INDEX_op_extract_i32:
1419         return TCG_TARGET_HAS_extract_i32;
1420     case INDEX_op_sextract_i32:
1421         return TCG_TARGET_HAS_sextract_i32;
1422     case INDEX_op_add2_i32:
1423         return TCG_TARGET_HAS_add2_i32;
1424     case INDEX_op_sub2_i32:
1425         return TCG_TARGET_HAS_sub2_i32;
1426     case INDEX_op_mulu2_i32:
1427         return TCG_TARGET_HAS_mulu2_i32;
1428     case INDEX_op_muls2_i32:
1429         return TCG_TARGET_HAS_muls2_i32;
1430     case INDEX_op_muluh_i32:
1431         return TCG_TARGET_HAS_muluh_i32;
1432     case INDEX_op_mulsh_i32:
1433         return TCG_TARGET_HAS_mulsh_i32;
1434     case INDEX_op_ext8s_i32:
1435         return TCG_TARGET_HAS_ext8s_i32;
1436     case INDEX_op_ext16s_i32:
1437         return TCG_TARGET_HAS_ext16s_i32;
1438     case INDEX_op_ext8u_i32:
1439         return TCG_TARGET_HAS_ext8u_i32;
1440     case INDEX_op_ext16u_i32:
1441         return TCG_TARGET_HAS_ext16u_i32;
1442     case INDEX_op_bswap16_i32:
1443         return TCG_TARGET_HAS_bswap16_i32;
1444     case INDEX_op_bswap32_i32:
1445         return TCG_TARGET_HAS_bswap32_i32;
1446     case INDEX_op_not_i32:
1447         return TCG_TARGET_HAS_not_i32;
1448     case INDEX_op_neg_i32:
1449         return TCG_TARGET_HAS_neg_i32;
1450     case INDEX_op_andc_i32:
1451         return TCG_TARGET_HAS_andc_i32;
1452     case INDEX_op_orc_i32:
1453         return TCG_TARGET_HAS_orc_i32;
1454     case INDEX_op_eqv_i32:
1455         return TCG_TARGET_HAS_eqv_i32;
1456     case INDEX_op_nand_i32:
1457         return TCG_TARGET_HAS_nand_i32;
1458     case INDEX_op_nor_i32:
1459         return TCG_TARGET_HAS_nor_i32;
1460     case INDEX_op_clz_i32:
1461         return TCG_TARGET_HAS_clz_i32;
1462     case INDEX_op_ctz_i32:
1463         return TCG_TARGET_HAS_ctz_i32;
1464     case INDEX_op_ctpop_i32:
1465         return TCG_TARGET_HAS_ctpop_i32;
1466 
1467     case INDEX_op_brcond2_i32:
1468     case INDEX_op_setcond2_i32:
1469         return TCG_TARGET_REG_BITS == 32;
1470 
1471     case INDEX_op_mov_i64:
1472     case INDEX_op_movi_i64:
1473     case INDEX_op_setcond_i64:
1474     case INDEX_op_brcond_i64:
1475     case INDEX_op_ld8u_i64:
1476     case INDEX_op_ld8s_i64:
1477     case INDEX_op_ld16u_i64:
1478     case INDEX_op_ld16s_i64:
1479     case INDEX_op_ld32u_i64:
1480     case INDEX_op_ld32s_i64:
1481     case INDEX_op_ld_i64:
1482     case INDEX_op_st8_i64:
1483     case INDEX_op_st16_i64:
1484     case INDEX_op_st32_i64:
1485     case INDEX_op_st_i64:
1486     case INDEX_op_add_i64:
1487     case INDEX_op_sub_i64:
1488     case INDEX_op_mul_i64:
1489     case INDEX_op_and_i64:
1490     case INDEX_op_or_i64:
1491     case INDEX_op_xor_i64:
1492     case INDEX_op_shl_i64:
1493     case INDEX_op_shr_i64:
1494     case INDEX_op_sar_i64:
1495     case INDEX_op_ext_i32_i64:
1496     case INDEX_op_extu_i32_i64:
1497         return TCG_TARGET_REG_BITS == 64;
1498 
1499     case INDEX_op_movcond_i64:
1500         return TCG_TARGET_HAS_movcond_i64;
1501     case INDEX_op_div_i64:
1502     case INDEX_op_divu_i64:
1503         return TCG_TARGET_HAS_div_i64;
1504     case INDEX_op_rem_i64:
1505     case INDEX_op_remu_i64:
1506         return TCG_TARGET_HAS_rem_i64;
1507     case INDEX_op_div2_i64:
1508     case INDEX_op_divu2_i64:
1509         return TCG_TARGET_HAS_div2_i64;
1510     case INDEX_op_rotl_i64:
1511     case INDEX_op_rotr_i64:
1512         return TCG_TARGET_HAS_rot_i64;
1513     case INDEX_op_deposit_i64:
1514         return TCG_TARGET_HAS_deposit_i64;
1515     case INDEX_op_extract_i64:
1516         return TCG_TARGET_HAS_extract_i64;
1517     case INDEX_op_sextract_i64:
1518         return TCG_TARGET_HAS_sextract_i64;
1519     case INDEX_op_extrl_i64_i32:
1520         return TCG_TARGET_HAS_extrl_i64_i32;
1521     case INDEX_op_extrh_i64_i32:
1522         return TCG_TARGET_HAS_extrh_i64_i32;
1523     case INDEX_op_ext8s_i64:
1524         return TCG_TARGET_HAS_ext8s_i64;
1525     case INDEX_op_ext16s_i64:
1526         return TCG_TARGET_HAS_ext16s_i64;
1527     case INDEX_op_ext32s_i64:
1528         return TCG_TARGET_HAS_ext32s_i64;
1529     case INDEX_op_ext8u_i64:
1530         return TCG_TARGET_HAS_ext8u_i64;
1531     case INDEX_op_ext16u_i64:
1532         return TCG_TARGET_HAS_ext16u_i64;
1533     case INDEX_op_ext32u_i64:
1534         return TCG_TARGET_HAS_ext32u_i64;
1535     case INDEX_op_bswap16_i64:
1536         return TCG_TARGET_HAS_bswap16_i64;
1537     case INDEX_op_bswap32_i64:
1538         return TCG_TARGET_HAS_bswap32_i64;
1539     case INDEX_op_bswap64_i64:
1540         return TCG_TARGET_HAS_bswap64_i64;
1541     case INDEX_op_not_i64:
1542         return TCG_TARGET_HAS_not_i64;
1543     case INDEX_op_neg_i64:
1544         return TCG_TARGET_HAS_neg_i64;
1545     case INDEX_op_andc_i64:
1546         return TCG_TARGET_HAS_andc_i64;
1547     case INDEX_op_orc_i64:
1548         return TCG_TARGET_HAS_orc_i64;
1549     case INDEX_op_eqv_i64:
1550         return TCG_TARGET_HAS_eqv_i64;
1551     case INDEX_op_nand_i64:
1552         return TCG_TARGET_HAS_nand_i64;
1553     case INDEX_op_nor_i64:
1554         return TCG_TARGET_HAS_nor_i64;
1555     case INDEX_op_clz_i64:
1556         return TCG_TARGET_HAS_clz_i64;
1557     case INDEX_op_ctz_i64:
1558         return TCG_TARGET_HAS_ctz_i64;
1559     case INDEX_op_ctpop_i64:
1560         return TCG_TARGET_HAS_ctpop_i64;
1561     case INDEX_op_add2_i64:
1562         return TCG_TARGET_HAS_add2_i64;
1563     case INDEX_op_sub2_i64:
1564         return TCG_TARGET_HAS_sub2_i64;
1565     case INDEX_op_mulu2_i64:
1566         return TCG_TARGET_HAS_mulu2_i64;
1567     case INDEX_op_muls2_i64:
1568         return TCG_TARGET_HAS_muls2_i64;
1569     case INDEX_op_muluh_i64:
1570         return TCG_TARGET_HAS_muluh_i64;
1571     case INDEX_op_mulsh_i64:
1572         return TCG_TARGET_HAS_mulsh_i64;
1573 
1574     case INDEX_op_mov_vec:
1575     case INDEX_op_dup_vec:
1576     case INDEX_op_dupi_vec:
1577     case INDEX_op_ld_vec:
1578     case INDEX_op_st_vec:
1579     case INDEX_op_add_vec:
1580     case INDEX_op_sub_vec:
1581     case INDEX_op_and_vec:
1582     case INDEX_op_or_vec:
1583     case INDEX_op_xor_vec:
1584     case INDEX_op_cmp_vec:
1585         return have_vec;
1586     case INDEX_op_dup2_vec:
1587         return have_vec && TCG_TARGET_REG_BITS == 32;
1588     case INDEX_op_not_vec:
1589         return have_vec && TCG_TARGET_HAS_not_vec;
1590     case INDEX_op_neg_vec:
1591         return have_vec && TCG_TARGET_HAS_neg_vec;
1592     case INDEX_op_andc_vec:
1593         return have_vec && TCG_TARGET_HAS_andc_vec;
1594     case INDEX_op_orc_vec:
1595         return have_vec && TCG_TARGET_HAS_orc_vec;
1596     case INDEX_op_mul_vec:
1597         return have_vec && TCG_TARGET_HAS_mul_vec;
1598     case INDEX_op_shli_vec:
1599     case INDEX_op_shri_vec:
1600     case INDEX_op_sari_vec:
1601         return have_vec && TCG_TARGET_HAS_shi_vec;
1602     case INDEX_op_shls_vec:
1603     case INDEX_op_shrs_vec:
1604     case INDEX_op_sars_vec:
1605         return have_vec && TCG_TARGET_HAS_shs_vec;
1606     case INDEX_op_shlv_vec:
1607     case INDEX_op_shrv_vec:
1608     case INDEX_op_sarv_vec:
1609         return have_vec && TCG_TARGET_HAS_shv_vec;
1610 
1611     default:
1612         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1613         return true;
1614     }
1615 }
1616 
1617 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1618    and endian swap. Maybe it would be better to do the alignment
1619    and endian swap in tcg_reg_alloc_call(). */
1620 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1621 {
1622     int i, real_args, nb_rets, pi;
1623     unsigned sizemask, flags;
1624     TCGHelperInfo *info;
1625     TCGOp *op;
1626 
1627     info = g_hash_table_lookup(helper_table, (gpointer)func);
1628     flags = info->flags;
1629     sizemask = info->sizemask;
1630 
1631 #if defined(__sparc__) && !defined(__arch64__) \
1632     && !defined(CONFIG_TCG_INTERPRETER)
1633     /* We have 64-bit values in one register, but need to pass as two
1634        separate parameters.  Split them.  */
1635     int orig_sizemask = sizemask;
1636     int orig_nargs = nargs;
1637     TCGv_i64 retl, reth;
1638     TCGTemp *split_args[MAX_OPC_PARAM];
1639 
1640     retl = NULL;
1641     reth = NULL;
1642     if (sizemask != 0) {
1643         for (i = real_args = 0; i < nargs; ++i) {
1644             int is_64bit = sizemask & (1 << (i+1)*2);
1645             if (is_64bit) {
1646                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1647                 TCGv_i32 h = tcg_temp_new_i32();
1648                 TCGv_i32 l = tcg_temp_new_i32();
1649                 tcg_gen_extr_i64_i32(l, h, orig);
1650                 split_args[real_args++] = tcgv_i32_temp(h);
1651                 split_args[real_args++] = tcgv_i32_temp(l);
1652             } else {
1653                 split_args[real_args++] = args[i];
1654             }
1655         }
1656         nargs = real_args;
1657         args = split_args;
1658         sizemask = 0;
1659     }
1660 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1661     for (i = 0; i < nargs; ++i) {
1662         int is_64bit = sizemask & (1 << (i+1)*2);
1663         int is_signed = sizemask & (2 << (i+1)*2);
1664         if (!is_64bit) {
1665             TCGv_i64 temp = tcg_temp_new_i64();
1666             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1667             if (is_signed) {
1668                 tcg_gen_ext32s_i64(temp, orig);
1669             } else {
1670                 tcg_gen_ext32u_i64(temp, orig);
1671             }
1672             args[i] = tcgv_i64_temp(temp);
1673         }
1674     }
1675 #endif /* TCG_TARGET_EXTEND_ARGS */
1676 
1677     op = tcg_emit_op(INDEX_op_call);
1678 
1679     pi = 0;
1680     if (ret != NULL) {
1681 #if defined(__sparc__) && !defined(__arch64__) \
1682     && !defined(CONFIG_TCG_INTERPRETER)
1683         if (orig_sizemask & 1) {
1684             /* The 32-bit ABI is going to return the 64-bit value in
1685                the %o0/%o1 register pair.  Prepare for this by using
1686                two return temporaries, and reassemble below.  */
1687             retl = tcg_temp_new_i64();
1688             reth = tcg_temp_new_i64();
1689             op->args[pi++] = tcgv_i64_arg(reth);
1690             op->args[pi++] = tcgv_i64_arg(retl);
1691             nb_rets = 2;
1692         } else {
1693             op->args[pi++] = temp_arg(ret);
1694             nb_rets = 1;
1695         }
1696 #else
1697         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1698 #ifdef HOST_WORDS_BIGENDIAN
1699             op->args[pi++] = temp_arg(ret + 1);
1700             op->args[pi++] = temp_arg(ret);
1701 #else
1702             op->args[pi++] = temp_arg(ret);
1703             op->args[pi++] = temp_arg(ret + 1);
1704 #endif
1705             nb_rets = 2;
1706         } else {
1707             op->args[pi++] = temp_arg(ret);
1708             nb_rets = 1;
1709         }
1710 #endif
1711     } else {
1712         nb_rets = 0;
1713     }
1714     TCGOP_CALLO(op) = nb_rets;
1715 
1716     real_args = 0;
1717     for (i = 0; i < nargs; i++) {
1718         int is_64bit = sizemask & (1 << (i+1)*2);
1719         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1720 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1721             /* some targets want aligned 64 bit args */
1722             if (real_args & 1) {
1723                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1724                 real_args++;
1725             }
1726 #endif
1727            /* If stack grows up, then we will be placing successive
1728               arguments at lower addresses, which means we need to
1729               reverse the order compared to how we would normally
1730               treat either big or little-endian.  For those arguments
1731               that will wind up in registers, this still works for
1732               HPPA (the only current STACK_GROWSUP target) since the
1733               argument registers are *also* allocated in decreasing
1734               order.  If another such target is added, this logic may
1735               have to get more complicated to differentiate between
1736               stack arguments and register arguments.  */
1737 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1738             op->args[pi++] = temp_arg(args[i] + 1);
1739             op->args[pi++] = temp_arg(args[i]);
1740 #else
1741             op->args[pi++] = temp_arg(args[i]);
1742             op->args[pi++] = temp_arg(args[i] + 1);
1743 #endif
1744             real_args += 2;
1745             continue;
1746         }
1747 
1748         op->args[pi++] = temp_arg(args[i]);
1749         real_args++;
1750     }
1751     op->args[pi++] = (uintptr_t)func;
1752     op->args[pi++] = flags;
1753     TCGOP_CALLI(op) = real_args;
1754 
1755     /* Make sure the fields didn't overflow.  */
1756     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1757     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1758 
1759 #if defined(__sparc__) && !defined(__arch64__) \
1760     && !defined(CONFIG_TCG_INTERPRETER)
1761     /* Free all of the parts we allocated above.  */
1762     for (i = real_args = 0; i < orig_nargs; ++i) {
1763         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1764         if (is_64bit) {
1765             tcg_temp_free_internal(args[real_args++]);
1766             tcg_temp_free_internal(args[real_args++]);
1767         } else {
1768             real_args++;
1769         }
1770     }
1771     if (orig_sizemask & 1) {
1772         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1773            Note that describing these as TCGv_i64 eliminates an unnecessary
1774            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1775         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1776         tcg_temp_free_i64(retl);
1777         tcg_temp_free_i64(reth);
1778     }
1779 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1780     for (i = 0; i < nargs; ++i) {
1781         int is_64bit = sizemask & (1 << (i+1)*2);
1782         if (!is_64bit) {
1783             tcg_temp_free_internal(args[i]);
1784         }
1785     }
1786 #endif /* TCG_TARGET_EXTEND_ARGS */
1787 }
1788 
1789 static void tcg_reg_alloc_start(TCGContext *s)
1790 {
1791     int i, n;
1792     TCGTemp *ts;
1793 
1794     for (i = 0, n = s->nb_globals; i < n; i++) {
1795         ts = &s->temps[i];
1796         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1797     }
1798     for (n = s->nb_temps; i < n; i++) {
1799         ts = &s->temps[i];
1800         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1801         ts->mem_allocated = 0;
1802         ts->fixed_reg = 0;
1803     }
1804 
1805     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1806 }
1807 
1808 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1809                                  TCGTemp *ts)
1810 {
1811     int idx = temp_idx(ts);
1812 
1813     if (ts->temp_global) {
1814         pstrcpy(buf, buf_size, ts->name);
1815     } else if (ts->temp_local) {
1816         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1817     } else {
1818         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1819     }
1820     return buf;
1821 }
1822 
1823 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1824                              int buf_size, TCGArg arg)
1825 {
1826     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1827 }
1828 
1829 /* Find helper name.  */
1830 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1831 {
1832     const char *ret = NULL;
1833     if (helper_table) {
1834         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1835         if (info) {
1836             ret = info->name;
1837         }
1838     }
1839     return ret;
1840 }
1841 
1842 static const char * const cond_name[] =
1843 {
1844     [TCG_COND_NEVER] = "never",
1845     [TCG_COND_ALWAYS] = "always",
1846     [TCG_COND_EQ] = "eq",
1847     [TCG_COND_NE] = "ne",
1848     [TCG_COND_LT] = "lt",
1849     [TCG_COND_GE] = "ge",
1850     [TCG_COND_LE] = "le",
1851     [TCG_COND_GT] = "gt",
1852     [TCG_COND_LTU] = "ltu",
1853     [TCG_COND_GEU] = "geu",
1854     [TCG_COND_LEU] = "leu",
1855     [TCG_COND_GTU] = "gtu"
1856 };
1857 
1858 static const char * const ldst_name[] =
1859 {
1860     [MO_UB]   = "ub",
1861     [MO_SB]   = "sb",
1862     [MO_LEUW] = "leuw",
1863     [MO_LESW] = "lesw",
1864     [MO_LEUL] = "leul",
1865     [MO_LESL] = "lesl",
1866     [MO_LEQ]  = "leq",
1867     [MO_BEUW] = "beuw",
1868     [MO_BESW] = "besw",
1869     [MO_BEUL] = "beul",
1870     [MO_BESL] = "besl",
1871     [MO_BEQ]  = "beq",
1872 };
1873 
1874 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1875 #ifdef ALIGNED_ONLY
1876     [MO_UNALN >> MO_ASHIFT]    = "un+",
1877     [MO_ALIGN >> MO_ASHIFT]    = "",
1878 #else
1879     [MO_UNALN >> MO_ASHIFT]    = "",
1880     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1881 #endif
1882     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1883     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1884     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1885     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1886     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1887     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1888 };
1889 
1890 static inline bool tcg_regset_single(TCGRegSet d)
1891 {
1892     return (d & (d - 1)) == 0;
1893 }
1894 
1895 static inline TCGReg tcg_regset_first(TCGRegSet d)
1896 {
1897     if (TCG_TARGET_NB_REGS <= 32) {
1898         return ctz32(d);
1899     } else {
1900         return ctz64(d);
1901     }
1902 }
1903 
1904 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1905 {
1906     char buf[128];
1907     TCGOp *op;
1908 
1909     QTAILQ_FOREACH(op, &s->ops, link) {
1910         int i, k, nb_oargs, nb_iargs, nb_cargs;
1911         const TCGOpDef *def;
1912         TCGOpcode c;
1913         int col = 0;
1914 
1915         c = op->opc;
1916         def = &tcg_op_defs[c];
1917 
1918         if (c == INDEX_op_insn_start) {
1919             nb_oargs = 0;
1920             col += qemu_log("\n ----");
1921 
1922             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
1923                 target_ulong a;
1924 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
1925                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
1926 #else
1927                 a = op->args[i];
1928 #endif
1929                 col += qemu_log(" " TARGET_FMT_lx, a);
1930             }
1931         } else if (c == INDEX_op_call) {
1932             /* variable number of arguments */
1933             nb_oargs = TCGOP_CALLO(op);
1934             nb_iargs = TCGOP_CALLI(op);
1935             nb_cargs = def->nb_cargs;
1936 
1937             /* function name, flags, out args */
1938             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
1939                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
1940                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
1941             for (i = 0; i < nb_oargs; i++) {
1942                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
1943                                                        op->args[i]));
1944             }
1945             for (i = 0; i < nb_iargs; i++) {
1946                 TCGArg arg = op->args[nb_oargs + i];
1947                 const char *t = "<dummy>";
1948                 if (arg != TCG_CALL_DUMMY_ARG) {
1949                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
1950                 }
1951                 col += qemu_log(",%s", t);
1952             }
1953         } else {
1954             col += qemu_log(" %s ", def->name);
1955 
1956             nb_oargs = def->nb_oargs;
1957             nb_iargs = def->nb_iargs;
1958             nb_cargs = def->nb_cargs;
1959 
1960             if (def->flags & TCG_OPF_VECTOR) {
1961                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
1962                                 8 << TCGOP_VECE(op));
1963             }
1964 
1965             k = 0;
1966             for (i = 0; i < nb_oargs; i++) {
1967                 if (k != 0) {
1968                     col += qemu_log(",");
1969                 }
1970                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1971                                                       op->args[k++]));
1972             }
1973             for (i = 0; i < nb_iargs; i++) {
1974                 if (k != 0) {
1975                     col += qemu_log(",");
1976                 }
1977                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
1978                                                       op->args[k++]));
1979             }
1980             switch (c) {
1981             case INDEX_op_brcond_i32:
1982             case INDEX_op_setcond_i32:
1983             case INDEX_op_movcond_i32:
1984             case INDEX_op_brcond2_i32:
1985             case INDEX_op_setcond2_i32:
1986             case INDEX_op_brcond_i64:
1987             case INDEX_op_setcond_i64:
1988             case INDEX_op_movcond_i64:
1989             case INDEX_op_cmp_vec:
1990                 if (op->args[k] < ARRAY_SIZE(cond_name)
1991                     && cond_name[op->args[k]]) {
1992                     col += qemu_log(",%s", cond_name[op->args[k++]]);
1993                 } else {
1994                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
1995                 }
1996                 i = 1;
1997                 break;
1998             case INDEX_op_qemu_ld_i32:
1999             case INDEX_op_qemu_st_i32:
2000             case INDEX_op_qemu_ld_i64:
2001             case INDEX_op_qemu_st_i64:
2002                 {
2003                     TCGMemOpIdx oi = op->args[k++];
2004                     TCGMemOp op = get_memop(oi);
2005                     unsigned ix = get_mmuidx(oi);
2006 
2007                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2008                         col += qemu_log(",$0x%x,%u", op, ix);
2009                     } else {
2010                         const char *s_al, *s_op;
2011                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2012                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2013                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2014                     }
2015                     i = 1;
2016                 }
2017                 break;
2018             default:
2019                 i = 0;
2020                 break;
2021             }
2022             switch (c) {
2023             case INDEX_op_set_label:
2024             case INDEX_op_br:
2025             case INDEX_op_brcond_i32:
2026             case INDEX_op_brcond_i64:
2027             case INDEX_op_brcond2_i32:
2028                 col += qemu_log("%s$L%d", k ? "," : "",
2029                                 arg_label(op->args[k])->id);
2030                 i++, k++;
2031                 break;
2032             default:
2033                 break;
2034             }
2035             for (; i < nb_cargs; i++, k++) {
2036                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2037             }
2038         }
2039 
2040         if (have_prefs || op->life) {
2041             for (; col < 40; ++col) {
2042                 putc(' ', qemu_logfile);
2043             }
2044         }
2045 
2046         if (op->life) {
2047             unsigned life = op->life;
2048 
2049             if (life & (SYNC_ARG * 3)) {
2050                 qemu_log("  sync:");
2051                 for (i = 0; i < 2; ++i) {
2052                     if (life & (SYNC_ARG << i)) {
2053                         qemu_log(" %d", i);
2054                     }
2055                 }
2056             }
2057             life /= DEAD_ARG;
2058             if (life) {
2059                 qemu_log("  dead:");
2060                 for (i = 0; life; ++i, life >>= 1) {
2061                     if (life & 1) {
2062                         qemu_log(" %d", i);
2063                     }
2064                 }
2065             }
2066         }
2067 
2068         if (have_prefs) {
2069             for (i = 0; i < nb_oargs; ++i) {
2070                 TCGRegSet set = op->output_pref[i];
2071 
2072                 if (i == 0) {
2073                     qemu_log("  pref=");
2074                 } else {
2075                     qemu_log(",");
2076                 }
2077                 if (set == 0) {
2078                     qemu_log("none");
2079                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2080                     qemu_log("all");
2081 #ifdef CONFIG_DEBUG_TCG
2082                 } else if (tcg_regset_single(set)) {
2083                     TCGReg reg = tcg_regset_first(set);
2084                     qemu_log("%s", tcg_target_reg_names[reg]);
2085 #endif
2086                 } else if (TCG_TARGET_NB_REGS <= 32) {
2087                     qemu_log("%#x", (uint32_t)set);
2088                 } else {
2089                     qemu_log("%#" PRIx64, (uint64_t)set);
2090                 }
2091             }
2092         }
2093 
2094         qemu_log("\n");
2095     }
2096 }
2097 
2098 /* we give more priority to constraints with less registers */
2099 static int get_constraint_priority(const TCGOpDef *def, int k)
2100 {
2101     const TCGArgConstraint *arg_ct;
2102 
2103     int i, n;
2104     arg_ct = &def->args_ct[k];
2105     if (arg_ct->ct & TCG_CT_ALIAS) {
2106         /* an alias is equivalent to a single register */
2107         n = 1;
2108     } else {
2109         if (!(arg_ct->ct & TCG_CT_REG))
2110             return 0;
2111         n = 0;
2112         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2113             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2114                 n++;
2115         }
2116     }
2117     return TCG_TARGET_NB_REGS - n + 1;
2118 }
2119 
2120 /* sort from highest priority to lowest */
2121 static void sort_constraints(TCGOpDef *def, int start, int n)
2122 {
2123     int i, j, p1, p2, tmp;
2124 
2125     for(i = 0; i < n; i++)
2126         def->sorted_args[start + i] = start + i;
2127     if (n <= 1)
2128         return;
2129     for(i = 0; i < n - 1; i++) {
2130         for(j = i + 1; j < n; j++) {
2131             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2132             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2133             if (p1 < p2) {
2134                 tmp = def->sorted_args[start + i];
2135                 def->sorted_args[start + i] = def->sorted_args[start + j];
2136                 def->sorted_args[start + j] = tmp;
2137             }
2138         }
2139     }
2140 }
2141 
2142 static void process_op_defs(TCGContext *s)
2143 {
2144     TCGOpcode op;
2145 
2146     for (op = 0; op < NB_OPS; op++) {
2147         TCGOpDef *def = &tcg_op_defs[op];
2148         const TCGTargetOpDef *tdefs;
2149         TCGType type;
2150         int i, nb_args;
2151 
2152         if (def->flags & TCG_OPF_NOT_PRESENT) {
2153             continue;
2154         }
2155 
2156         nb_args = def->nb_iargs + def->nb_oargs;
2157         if (nb_args == 0) {
2158             continue;
2159         }
2160 
2161         tdefs = tcg_target_op_def(op);
2162         /* Missing TCGTargetOpDef entry. */
2163         tcg_debug_assert(tdefs != NULL);
2164 
2165         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2166         for (i = 0; i < nb_args; i++) {
2167             const char *ct_str = tdefs->args_ct_str[i];
2168             /* Incomplete TCGTargetOpDef entry. */
2169             tcg_debug_assert(ct_str != NULL);
2170 
2171             def->args_ct[i].u.regs = 0;
2172             def->args_ct[i].ct = 0;
2173             while (*ct_str != '\0') {
2174                 switch(*ct_str) {
2175                 case '0' ... '9':
2176                     {
2177                         int oarg = *ct_str - '0';
2178                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2179                         tcg_debug_assert(oarg < def->nb_oargs);
2180                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2181                         /* TCG_CT_ALIAS is for the output arguments.
2182                            The input is tagged with TCG_CT_IALIAS. */
2183                         def->args_ct[i] = def->args_ct[oarg];
2184                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2185                         def->args_ct[oarg].alias_index = i;
2186                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2187                         def->args_ct[i].alias_index = oarg;
2188                     }
2189                     ct_str++;
2190                     break;
2191                 case '&':
2192                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2193                     ct_str++;
2194                     break;
2195                 case 'i':
2196                     def->args_ct[i].ct |= TCG_CT_CONST;
2197                     ct_str++;
2198                     break;
2199                 default:
2200                     ct_str = target_parse_constraint(&def->args_ct[i],
2201                                                      ct_str, type);
2202                     /* Typo in TCGTargetOpDef constraint. */
2203                     tcg_debug_assert(ct_str != NULL);
2204                 }
2205             }
2206         }
2207 
2208         /* TCGTargetOpDef entry with too much information? */
2209         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2210 
2211         /* sort the constraints (XXX: this is just an heuristic) */
2212         sort_constraints(def, 0, def->nb_oargs);
2213         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2214     }
2215 }
2216 
2217 void tcg_op_remove(TCGContext *s, TCGOp *op)
2218 {
2219     TCGLabel *label;
2220 
2221     switch (op->opc) {
2222     case INDEX_op_br:
2223         label = arg_label(op->args[0]);
2224         label->refs--;
2225         break;
2226     case INDEX_op_brcond_i32:
2227     case INDEX_op_brcond_i64:
2228         label = arg_label(op->args[3]);
2229         label->refs--;
2230         break;
2231     case INDEX_op_brcond2_i32:
2232         label = arg_label(op->args[5]);
2233         label->refs--;
2234         break;
2235     default:
2236         break;
2237     }
2238 
2239     QTAILQ_REMOVE(&s->ops, op, link);
2240     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2241     s->nb_ops--;
2242 
2243 #ifdef CONFIG_PROFILER
2244     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2245 #endif
2246 }
2247 
2248 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2249 {
2250     TCGContext *s = tcg_ctx;
2251     TCGOp *op;
2252 
2253     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2254         op = tcg_malloc(sizeof(TCGOp));
2255     } else {
2256         op = QTAILQ_FIRST(&s->free_ops);
2257         QTAILQ_REMOVE(&s->free_ops, op, link);
2258     }
2259     memset(op, 0, offsetof(TCGOp, link));
2260     op->opc = opc;
2261     s->nb_ops++;
2262 
2263     return op;
2264 }
2265 
2266 TCGOp *tcg_emit_op(TCGOpcode opc)
2267 {
2268     TCGOp *op = tcg_op_alloc(opc);
2269     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2270     return op;
2271 }
2272 
2273 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2274 {
2275     TCGOp *new_op = tcg_op_alloc(opc);
2276     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2277     return new_op;
2278 }
2279 
2280 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2281 {
2282     TCGOp *new_op = tcg_op_alloc(opc);
2283     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2284     return new_op;
2285 }
2286 
2287 /* Reachable analysis : remove unreachable code.  */
2288 static void reachable_code_pass(TCGContext *s)
2289 {
2290     TCGOp *op, *op_next;
2291     bool dead = false;
2292 
2293     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2294         bool remove = dead;
2295         TCGLabel *label;
2296         int call_flags;
2297 
2298         switch (op->opc) {
2299         case INDEX_op_set_label:
2300             label = arg_label(op->args[0]);
2301             if (label->refs == 0) {
2302                 /*
2303                  * While there is an occasional backward branch, virtually
2304                  * all branches generated by the translators are forward.
2305                  * Which means that generally we will have already removed
2306                  * all references to the label that will be, and there is
2307                  * little to be gained by iterating.
2308                  */
2309                 remove = true;
2310             } else {
2311                 /* Once we see a label, insns become live again.  */
2312                 dead = false;
2313                 remove = false;
2314 
2315                 /*
2316                  * Optimization can fold conditional branches to unconditional.
2317                  * If we find a label with one reference which is preceded by
2318                  * an unconditional branch to it, remove both.  This needed to
2319                  * wait until the dead code in between them was removed.
2320                  */
2321                 if (label->refs == 1) {
2322                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2323                     if (op_prev->opc == INDEX_op_br &&
2324                         label == arg_label(op_prev->args[0])) {
2325                         tcg_op_remove(s, op_prev);
2326                         remove = true;
2327                     }
2328                 }
2329             }
2330             break;
2331 
2332         case INDEX_op_br:
2333         case INDEX_op_exit_tb:
2334         case INDEX_op_goto_ptr:
2335             /* Unconditional branches; everything following is dead.  */
2336             dead = true;
2337             break;
2338 
2339         case INDEX_op_call:
2340             /* Notice noreturn helper calls, raising exceptions.  */
2341             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2342             if (call_flags & TCG_CALL_NO_RETURN) {
2343                 dead = true;
2344             }
2345             break;
2346 
2347         case INDEX_op_insn_start:
2348             /* Never remove -- we need to keep these for unwind.  */
2349             remove = false;
2350             break;
2351 
2352         default:
2353             break;
2354         }
2355 
2356         if (remove) {
2357             tcg_op_remove(s, op);
2358         }
2359     }
2360 }
2361 
2362 #define TS_DEAD  1
2363 #define TS_MEM   2
2364 
2365 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2366 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2367 
2368 /* For liveness_pass_1, the register preferences for a given temp.  */
2369 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2370 {
2371     return ts->state_ptr;
2372 }
2373 
2374 /* For liveness_pass_1, reset the preferences for a given temp to the
2375  * maximal regset for its type.
2376  */
2377 static inline void la_reset_pref(TCGTemp *ts)
2378 {
2379     *la_temp_pref(ts)
2380         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2381 }
2382 
2383 /* liveness analysis: end of function: all temps are dead, and globals
2384    should be in memory. */
2385 static void la_func_end(TCGContext *s, int ng, int nt)
2386 {
2387     int i;
2388 
2389     for (i = 0; i < ng; ++i) {
2390         s->temps[i].state = TS_DEAD | TS_MEM;
2391         la_reset_pref(&s->temps[i]);
2392     }
2393     for (i = ng; i < nt; ++i) {
2394         s->temps[i].state = TS_DEAD;
2395         la_reset_pref(&s->temps[i]);
2396     }
2397 }
2398 
2399 /* liveness analysis: end of basic block: all temps are dead, globals
2400    and local temps should be in memory. */
2401 static void la_bb_end(TCGContext *s, int ng, int nt)
2402 {
2403     int i;
2404 
2405     for (i = 0; i < ng; ++i) {
2406         s->temps[i].state = TS_DEAD | TS_MEM;
2407         la_reset_pref(&s->temps[i]);
2408     }
2409     for (i = ng; i < nt; ++i) {
2410         s->temps[i].state = (s->temps[i].temp_local
2411                              ? TS_DEAD | TS_MEM
2412                              : TS_DEAD);
2413         la_reset_pref(&s->temps[i]);
2414     }
2415 }
2416 
2417 /* liveness analysis: sync globals back to memory.  */
2418 static void la_global_sync(TCGContext *s, int ng)
2419 {
2420     int i;
2421 
2422     for (i = 0; i < ng; ++i) {
2423         int state = s->temps[i].state;
2424         s->temps[i].state = state | TS_MEM;
2425         if (state == TS_DEAD) {
2426             /* If the global was previously dead, reset prefs.  */
2427             la_reset_pref(&s->temps[i]);
2428         }
2429     }
2430 }
2431 
2432 /* liveness analysis: sync globals back to memory and kill.  */
2433 static void la_global_kill(TCGContext *s, int ng)
2434 {
2435     int i;
2436 
2437     for (i = 0; i < ng; i++) {
2438         s->temps[i].state = TS_DEAD | TS_MEM;
2439         la_reset_pref(&s->temps[i]);
2440     }
2441 }
2442 
2443 /* liveness analysis: note live globals crossing calls.  */
2444 static void la_cross_call(TCGContext *s, int nt)
2445 {
2446     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2447     int i;
2448 
2449     for (i = 0; i < nt; i++) {
2450         TCGTemp *ts = &s->temps[i];
2451         if (!(ts->state & TS_DEAD)) {
2452             TCGRegSet *pset = la_temp_pref(ts);
2453             TCGRegSet set = *pset;
2454 
2455             set &= mask;
2456             /* If the combination is not possible, restart.  */
2457             if (set == 0) {
2458                 set = tcg_target_available_regs[ts->type] & mask;
2459             }
2460             *pset = set;
2461         }
2462     }
2463 }
2464 
2465 /* Liveness analysis : update the opc_arg_life array to tell if a
2466    given input arguments is dead. Instructions updating dead
2467    temporaries are removed. */
2468 static void liveness_pass_1(TCGContext *s)
2469 {
2470     int nb_globals = s->nb_globals;
2471     int nb_temps = s->nb_temps;
2472     TCGOp *op, *op_prev;
2473     TCGRegSet *prefs;
2474     int i;
2475 
2476     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2477     for (i = 0; i < nb_temps; ++i) {
2478         s->temps[i].state_ptr = prefs + i;
2479     }
2480 
2481     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2482     la_func_end(s, nb_globals, nb_temps);
2483 
2484     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2485         int nb_iargs, nb_oargs;
2486         TCGOpcode opc_new, opc_new2;
2487         bool have_opc_new2;
2488         TCGLifeData arg_life = 0;
2489         TCGTemp *ts;
2490         TCGOpcode opc = op->opc;
2491         const TCGOpDef *def = &tcg_op_defs[opc];
2492 
2493         switch (opc) {
2494         case INDEX_op_call:
2495             {
2496                 int call_flags;
2497                 int nb_call_regs;
2498 
2499                 nb_oargs = TCGOP_CALLO(op);
2500                 nb_iargs = TCGOP_CALLI(op);
2501                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2502 
2503                 /* pure functions can be removed if their result is unused */
2504                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2505                     for (i = 0; i < nb_oargs; i++) {
2506                         ts = arg_temp(op->args[i]);
2507                         if (ts->state != TS_DEAD) {
2508                             goto do_not_remove_call;
2509                         }
2510                     }
2511                     goto do_remove;
2512                 }
2513             do_not_remove_call:
2514 
2515                 /* Output args are dead.  */
2516                 for (i = 0; i < nb_oargs; i++) {
2517                     ts = arg_temp(op->args[i]);
2518                     if (ts->state & TS_DEAD) {
2519                         arg_life |= DEAD_ARG << i;
2520                     }
2521                     if (ts->state & TS_MEM) {
2522                         arg_life |= SYNC_ARG << i;
2523                     }
2524                     ts->state = TS_DEAD;
2525                     la_reset_pref(ts);
2526 
2527                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2528                     op->output_pref[i] = 0;
2529                 }
2530 
2531                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2532                                     TCG_CALL_NO_READ_GLOBALS))) {
2533                     la_global_kill(s, nb_globals);
2534                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2535                     la_global_sync(s, nb_globals);
2536                 }
2537 
2538                 /* Record arguments that die in this helper.  */
2539                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2540                     ts = arg_temp(op->args[i]);
2541                     if (ts && ts->state & TS_DEAD) {
2542                         arg_life |= DEAD_ARG << i;
2543                     }
2544                 }
2545 
2546                 /* For all live registers, remove call-clobbered prefs.  */
2547                 la_cross_call(s, nb_temps);
2548 
2549                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2550 
2551                 /* Input arguments are live for preceding opcodes.  */
2552                 for (i = 0; i < nb_iargs; i++) {
2553                     ts = arg_temp(op->args[i + nb_oargs]);
2554                     if (ts && ts->state & TS_DEAD) {
2555                         /* For those arguments that die, and will be allocated
2556                          * in registers, clear the register set for that arg,
2557                          * to be filled in below.  For args that will be on
2558                          * the stack, reset to any available reg.
2559                          */
2560                         *la_temp_pref(ts)
2561                             = (i < nb_call_regs ? 0 :
2562                                tcg_target_available_regs[ts->type]);
2563                         ts->state &= ~TS_DEAD;
2564                     }
2565                 }
2566 
2567                 /* For each input argument, add its input register to prefs.
2568                    If a temp is used once, this produces a single set bit.  */
2569                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2570                     ts = arg_temp(op->args[i + nb_oargs]);
2571                     if (ts) {
2572                         tcg_regset_set_reg(*la_temp_pref(ts),
2573                                            tcg_target_call_iarg_regs[i]);
2574                     }
2575                 }
2576             }
2577             break;
2578         case INDEX_op_insn_start:
2579             break;
2580         case INDEX_op_discard:
2581             /* mark the temporary as dead */
2582             ts = arg_temp(op->args[0]);
2583             ts->state = TS_DEAD;
2584             la_reset_pref(ts);
2585             break;
2586 
2587         case INDEX_op_add2_i32:
2588             opc_new = INDEX_op_add_i32;
2589             goto do_addsub2;
2590         case INDEX_op_sub2_i32:
2591             opc_new = INDEX_op_sub_i32;
2592             goto do_addsub2;
2593         case INDEX_op_add2_i64:
2594             opc_new = INDEX_op_add_i64;
2595             goto do_addsub2;
2596         case INDEX_op_sub2_i64:
2597             opc_new = INDEX_op_sub_i64;
2598         do_addsub2:
2599             nb_iargs = 4;
2600             nb_oargs = 2;
2601             /* Test if the high part of the operation is dead, but not
2602                the low part.  The result can be optimized to a simple
2603                add or sub.  This happens often for x86_64 guest when the
2604                cpu mode is set to 32 bit.  */
2605             if (arg_temp(op->args[1])->state == TS_DEAD) {
2606                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2607                     goto do_remove;
2608                 }
2609                 /* Replace the opcode and adjust the args in place,
2610                    leaving 3 unused args at the end.  */
2611                 op->opc = opc = opc_new;
2612                 op->args[1] = op->args[2];
2613                 op->args[2] = op->args[4];
2614                 /* Fall through and mark the single-word operation live.  */
2615                 nb_iargs = 2;
2616                 nb_oargs = 1;
2617             }
2618             goto do_not_remove;
2619 
2620         case INDEX_op_mulu2_i32:
2621             opc_new = INDEX_op_mul_i32;
2622             opc_new2 = INDEX_op_muluh_i32;
2623             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2624             goto do_mul2;
2625         case INDEX_op_muls2_i32:
2626             opc_new = INDEX_op_mul_i32;
2627             opc_new2 = INDEX_op_mulsh_i32;
2628             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2629             goto do_mul2;
2630         case INDEX_op_mulu2_i64:
2631             opc_new = INDEX_op_mul_i64;
2632             opc_new2 = INDEX_op_muluh_i64;
2633             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2634             goto do_mul2;
2635         case INDEX_op_muls2_i64:
2636             opc_new = INDEX_op_mul_i64;
2637             opc_new2 = INDEX_op_mulsh_i64;
2638             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2639             goto do_mul2;
2640         do_mul2:
2641             nb_iargs = 2;
2642             nb_oargs = 2;
2643             if (arg_temp(op->args[1])->state == TS_DEAD) {
2644                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2645                     /* Both parts of the operation are dead.  */
2646                     goto do_remove;
2647                 }
2648                 /* The high part of the operation is dead; generate the low. */
2649                 op->opc = opc = opc_new;
2650                 op->args[1] = op->args[2];
2651                 op->args[2] = op->args[3];
2652             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2653                 /* The low part of the operation is dead; generate the high. */
2654                 op->opc = opc = opc_new2;
2655                 op->args[0] = op->args[1];
2656                 op->args[1] = op->args[2];
2657                 op->args[2] = op->args[3];
2658             } else {
2659                 goto do_not_remove;
2660             }
2661             /* Mark the single-word operation live.  */
2662             nb_oargs = 1;
2663             goto do_not_remove;
2664 
2665         default:
2666             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2667             nb_iargs = def->nb_iargs;
2668             nb_oargs = def->nb_oargs;
2669 
2670             /* Test if the operation can be removed because all
2671                its outputs are dead. We assume that nb_oargs == 0
2672                implies side effects */
2673             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2674                 for (i = 0; i < nb_oargs; i++) {
2675                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2676                         goto do_not_remove;
2677                     }
2678                 }
2679                 goto do_remove;
2680             }
2681             goto do_not_remove;
2682 
2683         do_remove:
2684             tcg_op_remove(s, op);
2685             break;
2686 
2687         do_not_remove:
2688             for (i = 0; i < nb_oargs; i++) {
2689                 ts = arg_temp(op->args[i]);
2690 
2691                 /* Remember the preference of the uses that followed.  */
2692                 op->output_pref[i] = *la_temp_pref(ts);
2693 
2694                 /* Output args are dead.  */
2695                 if (ts->state & TS_DEAD) {
2696                     arg_life |= DEAD_ARG << i;
2697                 }
2698                 if (ts->state & TS_MEM) {
2699                     arg_life |= SYNC_ARG << i;
2700                 }
2701                 ts->state = TS_DEAD;
2702                 la_reset_pref(ts);
2703             }
2704 
2705             /* If end of basic block, update.  */
2706             if (def->flags & TCG_OPF_BB_EXIT) {
2707                 la_func_end(s, nb_globals, nb_temps);
2708             } else if (def->flags & TCG_OPF_BB_END) {
2709                 la_bb_end(s, nb_globals, nb_temps);
2710             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2711                 la_global_sync(s, nb_globals);
2712                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2713                     la_cross_call(s, nb_temps);
2714                 }
2715             }
2716 
2717             /* Record arguments that die in this opcode.  */
2718             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2719                 ts = arg_temp(op->args[i]);
2720                 if (ts->state & TS_DEAD) {
2721                     arg_life |= DEAD_ARG << i;
2722                 }
2723             }
2724 
2725             /* Input arguments are live for preceding opcodes.  */
2726             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2727                 ts = arg_temp(op->args[i]);
2728                 if (ts->state & TS_DEAD) {
2729                     /* For operands that were dead, initially allow
2730                        all regs for the type.  */
2731                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2732                     ts->state &= ~TS_DEAD;
2733                 }
2734             }
2735 
2736             /* Incorporate constraints for this operand.  */
2737             switch (opc) {
2738             case INDEX_op_mov_i32:
2739             case INDEX_op_mov_i64:
2740                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2741                    have proper constraints.  That said, special case
2742                    moves to propagate preferences backward.  */
2743                 if (IS_DEAD_ARG(1)) {
2744                     *la_temp_pref(arg_temp(op->args[0]))
2745                         = *la_temp_pref(arg_temp(op->args[1]));
2746                 }
2747                 break;
2748 
2749             default:
2750                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2751                     const TCGArgConstraint *ct = &def->args_ct[i];
2752                     TCGRegSet set, *pset;
2753 
2754                     ts = arg_temp(op->args[i]);
2755                     pset = la_temp_pref(ts);
2756                     set = *pset;
2757 
2758                     set &= ct->u.regs;
2759                     if (ct->ct & TCG_CT_IALIAS) {
2760                         set &= op->output_pref[ct->alias_index];
2761                     }
2762                     /* If the combination is not possible, restart.  */
2763                     if (set == 0) {
2764                         set = ct->u.regs;
2765                     }
2766                     *pset = set;
2767                 }
2768                 break;
2769             }
2770             break;
2771         }
2772         op->life = arg_life;
2773     }
2774 }
2775 
2776 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
2777 static bool liveness_pass_2(TCGContext *s)
2778 {
2779     int nb_globals = s->nb_globals;
2780     int nb_temps, i;
2781     bool changes = false;
2782     TCGOp *op, *op_next;
2783 
2784     /* Create a temporary for each indirect global.  */
2785     for (i = 0; i < nb_globals; ++i) {
2786         TCGTemp *its = &s->temps[i];
2787         if (its->indirect_reg) {
2788             TCGTemp *dts = tcg_temp_alloc(s);
2789             dts->type = its->type;
2790             dts->base_type = its->base_type;
2791             its->state_ptr = dts;
2792         } else {
2793             its->state_ptr = NULL;
2794         }
2795         /* All globals begin dead.  */
2796         its->state = TS_DEAD;
2797     }
2798     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2799         TCGTemp *its = &s->temps[i];
2800         its->state_ptr = NULL;
2801         its->state = TS_DEAD;
2802     }
2803 
2804     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2805         TCGOpcode opc = op->opc;
2806         const TCGOpDef *def = &tcg_op_defs[opc];
2807         TCGLifeData arg_life = op->life;
2808         int nb_iargs, nb_oargs, call_flags;
2809         TCGTemp *arg_ts, *dir_ts;
2810 
2811         if (opc == INDEX_op_call) {
2812             nb_oargs = TCGOP_CALLO(op);
2813             nb_iargs = TCGOP_CALLI(op);
2814             call_flags = op->args[nb_oargs + nb_iargs + 1];
2815         } else {
2816             nb_iargs = def->nb_iargs;
2817             nb_oargs = def->nb_oargs;
2818 
2819             /* Set flags similar to how calls require.  */
2820             if (def->flags & TCG_OPF_BB_END) {
2821                 /* Like writing globals: save_globals */
2822                 call_flags = 0;
2823             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2824                 /* Like reading globals: sync_globals */
2825                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2826             } else {
2827                 /* No effect on globals.  */
2828                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2829                               TCG_CALL_NO_WRITE_GLOBALS);
2830             }
2831         }
2832 
2833         /* Make sure that input arguments are available.  */
2834         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2835             arg_ts = arg_temp(op->args[i]);
2836             if (arg_ts) {
2837                 dir_ts = arg_ts->state_ptr;
2838                 if (dir_ts && arg_ts->state == TS_DEAD) {
2839                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2840                                       ? INDEX_op_ld_i32
2841                                       : INDEX_op_ld_i64);
2842                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2843 
2844                     lop->args[0] = temp_arg(dir_ts);
2845                     lop->args[1] = temp_arg(arg_ts->mem_base);
2846                     lop->args[2] = arg_ts->mem_offset;
2847 
2848                     /* Loaded, but synced with memory.  */
2849                     arg_ts->state = TS_MEM;
2850                 }
2851             }
2852         }
2853 
2854         /* Perform input replacement, and mark inputs that became dead.
2855            No action is required except keeping temp_state up to date
2856            so that we reload when needed.  */
2857         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2858             arg_ts = arg_temp(op->args[i]);
2859             if (arg_ts) {
2860                 dir_ts = arg_ts->state_ptr;
2861                 if (dir_ts) {
2862                     op->args[i] = temp_arg(dir_ts);
2863                     changes = true;
2864                     if (IS_DEAD_ARG(i)) {
2865                         arg_ts->state = TS_DEAD;
2866                     }
2867                 }
2868             }
2869         }
2870 
2871         /* Liveness analysis should ensure that the following are
2872            all correct, for call sites and basic block end points.  */
2873         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2874             /* Nothing to do */
2875         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2876             for (i = 0; i < nb_globals; ++i) {
2877                 /* Liveness should see that globals are synced back,
2878                    that is, either TS_DEAD or TS_MEM.  */
2879                 arg_ts = &s->temps[i];
2880                 tcg_debug_assert(arg_ts->state_ptr == 0
2881                                  || arg_ts->state != 0);
2882             }
2883         } else {
2884             for (i = 0; i < nb_globals; ++i) {
2885                 /* Liveness should see that globals are saved back,
2886                    that is, TS_DEAD, waiting to be reloaded.  */
2887                 arg_ts = &s->temps[i];
2888                 tcg_debug_assert(arg_ts->state_ptr == 0
2889                                  || arg_ts->state == TS_DEAD);
2890             }
2891         }
2892 
2893         /* Outputs become available.  */
2894         for (i = 0; i < nb_oargs; i++) {
2895             arg_ts = arg_temp(op->args[i]);
2896             dir_ts = arg_ts->state_ptr;
2897             if (!dir_ts) {
2898                 continue;
2899             }
2900             op->args[i] = temp_arg(dir_ts);
2901             changes = true;
2902 
2903             /* The output is now live and modified.  */
2904             arg_ts->state = 0;
2905 
2906             /* Sync outputs upon their last write.  */
2907             if (NEED_SYNC_ARG(i)) {
2908                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2909                                   ? INDEX_op_st_i32
2910                                   : INDEX_op_st_i64);
2911                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2912 
2913                 sop->args[0] = temp_arg(dir_ts);
2914                 sop->args[1] = temp_arg(arg_ts->mem_base);
2915                 sop->args[2] = arg_ts->mem_offset;
2916 
2917                 arg_ts->state = TS_MEM;
2918             }
2919             /* Drop outputs that are dead.  */
2920             if (IS_DEAD_ARG(i)) {
2921                 arg_ts->state = TS_DEAD;
2922             }
2923         }
2924     }
2925 
2926     return changes;
2927 }
2928 
2929 #ifdef CONFIG_DEBUG_TCG
2930 static void dump_regs(TCGContext *s)
2931 {
2932     TCGTemp *ts;
2933     int i;
2934     char buf[64];
2935 
2936     for(i = 0; i < s->nb_temps; i++) {
2937         ts = &s->temps[i];
2938         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2939         switch(ts->val_type) {
2940         case TEMP_VAL_REG:
2941             printf("%s", tcg_target_reg_names[ts->reg]);
2942             break;
2943         case TEMP_VAL_MEM:
2944             printf("%d(%s)", (int)ts->mem_offset,
2945                    tcg_target_reg_names[ts->mem_base->reg]);
2946             break;
2947         case TEMP_VAL_CONST:
2948             printf("$0x%" TCG_PRIlx, ts->val);
2949             break;
2950         case TEMP_VAL_DEAD:
2951             printf("D");
2952             break;
2953         default:
2954             printf("???");
2955             break;
2956         }
2957         printf("\n");
2958     }
2959 
2960     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2961         if (s->reg_to_temp[i] != NULL) {
2962             printf("%s: %s\n",
2963                    tcg_target_reg_names[i],
2964                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
2965         }
2966     }
2967 }
2968 
2969 static void check_regs(TCGContext *s)
2970 {
2971     int reg;
2972     int k;
2973     TCGTemp *ts;
2974     char buf[64];
2975 
2976     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
2977         ts = s->reg_to_temp[reg];
2978         if (ts != NULL) {
2979             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
2980                 printf("Inconsistency for register %s:\n",
2981                        tcg_target_reg_names[reg]);
2982                 goto fail;
2983             }
2984         }
2985     }
2986     for (k = 0; k < s->nb_temps; k++) {
2987         ts = &s->temps[k];
2988         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
2989             && s->reg_to_temp[ts->reg] != ts) {
2990             printf("Inconsistency for temp %s:\n",
2991                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
2992         fail:
2993             printf("reg state:\n");
2994             dump_regs(s);
2995             tcg_abort();
2996         }
2997     }
2998 }
2999 #endif
3000 
3001 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3002 {
3003 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3004     /* Sparc64 stack is accessed with offset of 2047 */
3005     s->current_frame_offset = (s->current_frame_offset +
3006                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3007         ~(sizeof(tcg_target_long) - 1);
3008 #endif
3009     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3010         s->frame_end) {
3011         tcg_abort();
3012     }
3013     ts->mem_offset = s->current_frame_offset;
3014     ts->mem_base = s->frame_temp;
3015     ts->mem_allocated = 1;
3016     s->current_frame_offset += sizeof(tcg_target_long);
3017 }
3018 
3019 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3020 
3021 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3022    mark it free; otherwise mark it dead.  */
3023 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3024 {
3025     if (ts->fixed_reg) {
3026         return;
3027     }
3028     if (ts->val_type == TEMP_VAL_REG) {
3029         s->reg_to_temp[ts->reg] = NULL;
3030     }
3031     ts->val_type = (free_or_dead < 0
3032                     || ts->temp_local
3033                     || ts->temp_global
3034                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3035 }
3036 
3037 /* Mark a temporary as dead.  */
3038 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3039 {
3040     temp_free_or_dead(s, ts, 1);
3041 }
3042 
3043 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3044    registers needs to be allocated to store a constant.  If 'free_or_dead'
3045    is non-zero, subsequently release the temporary; if it is positive, the
3046    temp is dead; if it is negative, the temp is free.  */
3047 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3048                       TCGRegSet preferred_regs, int free_or_dead)
3049 {
3050     if (ts->fixed_reg) {
3051         return;
3052     }
3053     if (!ts->mem_coherent) {
3054         if (!ts->mem_allocated) {
3055             temp_allocate_frame(s, ts);
3056         }
3057         switch (ts->val_type) {
3058         case TEMP_VAL_CONST:
3059             /* If we're going to free the temp immediately, then we won't
3060                require it later in a register, so attempt to store the
3061                constant to memory directly.  */
3062             if (free_or_dead
3063                 && tcg_out_sti(s, ts->type, ts->val,
3064                                ts->mem_base->reg, ts->mem_offset)) {
3065                 break;
3066             }
3067             temp_load(s, ts, tcg_target_available_regs[ts->type],
3068                       allocated_regs, preferred_regs);
3069             /* fallthrough */
3070 
3071         case TEMP_VAL_REG:
3072             tcg_out_st(s, ts->type, ts->reg,
3073                        ts->mem_base->reg, ts->mem_offset);
3074             break;
3075 
3076         case TEMP_VAL_MEM:
3077             break;
3078 
3079         case TEMP_VAL_DEAD:
3080         default:
3081             tcg_abort();
3082         }
3083         ts->mem_coherent = 1;
3084     }
3085     if (free_or_dead) {
3086         temp_free_or_dead(s, ts, free_or_dead);
3087     }
3088 }
3089 
3090 /* free register 'reg' by spilling the corresponding temporary if necessary */
3091 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3092 {
3093     TCGTemp *ts = s->reg_to_temp[reg];
3094     if (ts != NULL) {
3095         temp_sync(s, ts, allocated_regs, 0, -1);
3096     }
3097 }
3098 
3099 /**
3100  * tcg_reg_alloc:
3101  * @required_regs: Set of registers in which we must allocate.
3102  * @allocated_regs: Set of registers which must be avoided.
3103  * @preferred_regs: Set of registers we should prefer.
3104  * @rev: True if we search the registers in "indirect" order.
3105  *
3106  * The allocated register must be in @required_regs & ~@allocated_regs,
3107  * but if we can put it in @preferred_regs we may save a move later.
3108  */
3109 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3110                             TCGRegSet allocated_regs,
3111                             TCGRegSet preferred_regs, bool rev)
3112 {
3113     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3114     TCGRegSet reg_ct[2];
3115     const int *order;
3116 
3117     reg_ct[1] = required_regs & ~allocated_regs;
3118     tcg_debug_assert(reg_ct[1] != 0);
3119     reg_ct[0] = reg_ct[1] & preferred_regs;
3120 
3121     /* Skip the preferred_regs option if it cannot be satisfied,
3122        or if the preference made no difference.  */
3123     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3124 
3125     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3126 
3127     /* Try free registers, preferences first.  */
3128     for (j = f; j < 2; j++) {
3129         TCGRegSet set = reg_ct[j];
3130 
3131         if (tcg_regset_single(set)) {
3132             /* One register in the set.  */
3133             TCGReg reg = tcg_regset_first(set);
3134             if (s->reg_to_temp[reg] == NULL) {
3135                 return reg;
3136             }
3137         } else {
3138             for (i = 0; i < n; i++) {
3139                 TCGReg reg = order[i];
3140                 if (s->reg_to_temp[reg] == NULL &&
3141                     tcg_regset_test_reg(set, reg)) {
3142                     return reg;
3143                 }
3144             }
3145         }
3146     }
3147 
3148     /* We must spill something.  */
3149     for (j = f; j < 2; j++) {
3150         TCGRegSet set = reg_ct[j];
3151 
3152         if (tcg_regset_single(set)) {
3153             /* One register in the set.  */
3154             TCGReg reg = tcg_regset_first(set);
3155             tcg_reg_free(s, reg, allocated_regs);
3156             return reg;
3157         } else {
3158             for (i = 0; i < n; i++) {
3159                 TCGReg reg = order[i];
3160                 if (tcg_regset_test_reg(set, reg)) {
3161                     tcg_reg_free(s, reg, allocated_regs);
3162                     return reg;
3163                 }
3164             }
3165         }
3166     }
3167 
3168     tcg_abort();
3169 }
3170 
3171 /* Make sure the temporary is in a register.  If needed, allocate the register
3172    from DESIRED while avoiding ALLOCATED.  */
3173 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3174                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3175 {
3176     TCGReg reg;
3177 
3178     switch (ts->val_type) {
3179     case TEMP_VAL_REG:
3180         return;
3181     case TEMP_VAL_CONST:
3182         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3183                             preferred_regs, ts->indirect_base);
3184         tcg_out_movi(s, ts->type, reg, ts->val);
3185         ts->mem_coherent = 0;
3186         break;
3187     case TEMP_VAL_MEM:
3188         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3189                             preferred_regs, ts->indirect_base);
3190         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3191         ts->mem_coherent = 1;
3192         break;
3193     case TEMP_VAL_DEAD:
3194     default:
3195         tcg_abort();
3196     }
3197     ts->reg = reg;
3198     ts->val_type = TEMP_VAL_REG;
3199     s->reg_to_temp[reg] = ts;
3200 }
3201 
3202 /* Save a temporary to memory. 'allocated_regs' is used in case a
3203    temporary registers needs to be allocated to store a constant.  */
3204 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3205 {
3206     /* The liveness analysis already ensures that globals are back
3207        in memory. Keep an tcg_debug_assert for safety. */
3208     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3209 }
3210 
3211 /* save globals to their canonical location and assume they can be
3212    modified be the following code. 'allocated_regs' is used in case a
3213    temporary registers needs to be allocated to store a constant. */
3214 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3215 {
3216     int i, n;
3217 
3218     for (i = 0, n = s->nb_globals; i < n; i++) {
3219         temp_save(s, &s->temps[i], allocated_regs);
3220     }
3221 }
3222 
3223 /* sync globals to their canonical location and assume they can be
3224    read by the following code. 'allocated_regs' is used in case a
3225    temporary registers needs to be allocated to store a constant. */
3226 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3227 {
3228     int i, n;
3229 
3230     for (i = 0, n = s->nb_globals; i < n; i++) {
3231         TCGTemp *ts = &s->temps[i];
3232         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3233                          || ts->fixed_reg
3234                          || ts->mem_coherent);
3235     }
3236 }
3237 
3238 /* at the end of a basic block, we assume all temporaries are dead and
3239    all globals are stored at their canonical location. */
3240 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3241 {
3242     int i;
3243 
3244     for (i = s->nb_globals; i < s->nb_temps; i++) {
3245         TCGTemp *ts = &s->temps[i];
3246         if (ts->temp_local) {
3247             temp_save(s, ts, allocated_regs);
3248         } else {
3249             /* The liveness analysis already ensures that temps are dead.
3250                Keep an tcg_debug_assert for safety. */
3251             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3252         }
3253     }
3254 
3255     save_globals(s, allocated_regs);
3256 }
3257 
3258 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3259                                   tcg_target_ulong val, TCGLifeData arg_life,
3260                                   TCGRegSet preferred_regs)
3261 {
3262     if (ots->fixed_reg) {
3263         /* For fixed registers, we do not do any constant propagation.  */
3264         tcg_out_movi(s, ots->type, ots->reg, val);
3265         return;
3266     }
3267 
3268     /* The movi is not explicitly generated here.  */
3269     if (ots->val_type == TEMP_VAL_REG) {
3270         s->reg_to_temp[ots->reg] = NULL;
3271     }
3272     ots->val_type = TEMP_VAL_CONST;
3273     ots->val = val;
3274     ots->mem_coherent = 0;
3275     if (NEED_SYNC_ARG(0)) {
3276         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3277     } else if (IS_DEAD_ARG(0)) {
3278         temp_dead(s, ots);
3279     }
3280 }
3281 
3282 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3283 {
3284     TCGTemp *ots = arg_temp(op->args[0]);
3285     tcg_target_ulong val = op->args[1];
3286 
3287     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3288 }
3289 
3290 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3291 {
3292     const TCGLifeData arg_life = op->life;
3293     TCGRegSet allocated_regs, preferred_regs;
3294     TCGTemp *ts, *ots;
3295     TCGType otype, itype;
3296 
3297     allocated_regs = s->reserved_regs;
3298     preferred_regs = op->output_pref[0];
3299     ots = arg_temp(op->args[0]);
3300     ts = arg_temp(op->args[1]);
3301 
3302     /* Note that otype != itype for no-op truncation.  */
3303     otype = ots->type;
3304     itype = ts->type;
3305 
3306     if (ts->val_type == TEMP_VAL_CONST) {
3307         /* propagate constant or generate sti */
3308         tcg_target_ulong val = ts->val;
3309         if (IS_DEAD_ARG(1)) {
3310             temp_dead(s, ts);
3311         }
3312         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3313         return;
3314     }
3315 
3316     /* If the source value is in memory we're going to be forced
3317        to have it in a register in order to perform the copy.  Copy
3318        the SOURCE value into its own register first, that way we
3319        don't have to reload SOURCE the next time it is used. */
3320     if (ts->val_type == TEMP_VAL_MEM) {
3321         temp_load(s, ts, tcg_target_available_regs[itype],
3322                   allocated_regs, preferred_regs);
3323     }
3324 
3325     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3326     if (IS_DEAD_ARG(0) && !ots->fixed_reg) {
3327         /* mov to a non-saved dead register makes no sense (even with
3328            liveness analysis disabled). */
3329         tcg_debug_assert(NEED_SYNC_ARG(0));
3330         if (!ots->mem_allocated) {
3331             temp_allocate_frame(s, ots);
3332         }
3333         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3334         if (IS_DEAD_ARG(1)) {
3335             temp_dead(s, ts);
3336         }
3337         temp_dead(s, ots);
3338     } else {
3339         if (IS_DEAD_ARG(1) && !ts->fixed_reg && !ots->fixed_reg) {
3340             /* the mov can be suppressed */
3341             if (ots->val_type == TEMP_VAL_REG) {
3342                 s->reg_to_temp[ots->reg] = NULL;
3343             }
3344             ots->reg = ts->reg;
3345             temp_dead(s, ts);
3346         } else {
3347             if (ots->val_type != TEMP_VAL_REG) {
3348                 /* When allocating a new register, make sure to not spill the
3349                    input one. */
3350                 tcg_regset_set_reg(allocated_regs, ts->reg);
3351                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3352                                          allocated_regs, preferred_regs,
3353                                          ots->indirect_base);
3354             }
3355             tcg_out_mov(s, otype, ots->reg, ts->reg);
3356         }
3357         ots->val_type = TEMP_VAL_REG;
3358         ots->mem_coherent = 0;
3359         s->reg_to_temp[ots->reg] = ots;
3360         if (NEED_SYNC_ARG(0)) {
3361             temp_sync(s, ots, allocated_regs, 0, 0);
3362         }
3363     }
3364 }
3365 
3366 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3367 {
3368     const TCGLifeData arg_life = op->life;
3369     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3370     TCGRegSet i_allocated_regs;
3371     TCGRegSet o_allocated_regs;
3372     int i, k, nb_iargs, nb_oargs;
3373     TCGReg reg;
3374     TCGArg arg;
3375     const TCGArgConstraint *arg_ct;
3376     TCGTemp *ts;
3377     TCGArg new_args[TCG_MAX_OP_ARGS];
3378     int const_args[TCG_MAX_OP_ARGS];
3379 
3380     nb_oargs = def->nb_oargs;
3381     nb_iargs = def->nb_iargs;
3382 
3383     /* copy constants */
3384     memcpy(new_args + nb_oargs + nb_iargs,
3385            op->args + nb_oargs + nb_iargs,
3386            sizeof(TCGArg) * def->nb_cargs);
3387 
3388     i_allocated_regs = s->reserved_regs;
3389     o_allocated_regs = s->reserved_regs;
3390 
3391     /* satisfy input constraints */
3392     for (k = 0; k < nb_iargs; k++) {
3393         TCGRegSet i_preferred_regs, o_preferred_regs;
3394 
3395         i = def->sorted_args[nb_oargs + k];
3396         arg = op->args[i];
3397         arg_ct = &def->args_ct[i];
3398         ts = arg_temp(arg);
3399 
3400         if (ts->val_type == TEMP_VAL_CONST
3401             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3402             /* constant is OK for instruction */
3403             const_args[i] = 1;
3404             new_args[i] = ts->val;
3405             continue;
3406         }
3407 
3408         i_preferred_regs = o_preferred_regs = 0;
3409         if (arg_ct->ct & TCG_CT_IALIAS) {
3410             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3411             if (ts->fixed_reg) {
3412                 /* if fixed register, we must allocate a new register
3413                    if the alias is not the same register */
3414                 if (arg != op->args[arg_ct->alias_index]) {
3415                     goto allocate_in_reg;
3416                 }
3417             } else {
3418                 /* if the input is aliased to an output and if it is
3419                    not dead after the instruction, we must allocate
3420                    a new register and move it */
3421                 if (!IS_DEAD_ARG(i)) {
3422                     goto allocate_in_reg;
3423                 }
3424 
3425                 /* check if the current register has already been allocated
3426                    for another input aliased to an output */
3427                 if (ts->val_type == TEMP_VAL_REG) {
3428                     int k2, i2;
3429                     reg = ts->reg;
3430                     for (k2 = 0 ; k2 < k ; k2++) {
3431                         i2 = def->sorted_args[nb_oargs + k2];
3432                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3433                             reg == new_args[i2]) {
3434                             goto allocate_in_reg;
3435                         }
3436                     }
3437                 }
3438                 i_preferred_regs = o_preferred_regs;
3439             }
3440         }
3441 
3442         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3443         reg = ts->reg;
3444 
3445         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3446             /* nothing to do : the constraint is satisfied */
3447         } else {
3448         allocate_in_reg:
3449             /* allocate a new register matching the constraint
3450                and move the temporary register into it */
3451             temp_load(s, ts, tcg_target_available_regs[ts->type],
3452                       i_allocated_regs, 0);
3453             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3454                                 o_preferred_regs, ts->indirect_base);
3455             tcg_out_mov(s, ts->type, reg, ts->reg);
3456         }
3457         new_args[i] = reg;
3458         const_args[i] = 0;
3459         tcg_regset_set_reg(i_allocated_regs, reg);
3460     }
3461 
3462     /* mark dead temporaries and free the associated registers */
3463     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3464         if (IS_DEAD_ARG(i)) {
3465             temp_dead(s, arg_temp(op->args[i]));
3466         }
3467     }
3468 
3469     if (def->flags & TCG_OPF_BB_END) {
3470         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3471     } else {
3472         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3473             /* XXX: permit generic clobber register list ? */
3474             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3475                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3476                     tcg_reg_free(s, i, i_allocated_regs);
3477                 }
3478             }
3479         }
3480         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3481             /* sync globals if the op has side effects and might trigger
3482                an exception. */
3483             sync_globals(s, i_allocated_regs);
3484         }
3485 
3486         /* satisfy the output constraints */
3487         for(k = 0; k < nb_oargs; k++) {
3488             i = def->sorted_args[k];
3489             arg = op->args[i];
3490             arg_ct = &def->args_ct[i];
3491             ts = arg_temp(arg);
3492             if ((arg_ct->ct & TCG_CT_ALIAS)
3493                 && !const_args[arg_ct->alias_index]) {
3494                 reg = new_args[arg_ct->alias_index];
3495             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3496                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3497                                     i_allocated_regs | o_allocated_regs,
3498                                     op->output_pref[k], ts->indirect_base);
3499             } else {
3500                 /* if fixed register, we try to use it */
3501                 reg = ts->reg;
3502                 if (ts->fixed_reg &&
3503                     tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3504                     goto oarg_end;
3505                 }
3506                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3507                                     op->output_pref[k], ts->indirect_base);
3508             }
3509             tcg_regset_set_reg(o_allocated_regs, reg);
3510             /* if a fixed register is used, then a move will be done afterwards */
3511             if (!ts->fixed_reg) {
3512                 if (ts->val_type == TEMP_VAL_REG) {
3513                     s->reg_to_temp[ts->reg] = NULL;
3514                 }
3515                 ts->val_type = TEMP_VAL_REG;
3516                 ts->reg = reg;
3517                 /* temp value is modified, so the value kept in memory is
3518                    potentially not the same */
3519                 ts->mem_coherent = 0;
3520                 s->reg_to_temp[reg] = ts;
3521             }
3522         oarg_end:
3523             new_args[i] = reg;
3524         }
3525     }
3526 
3527     /* emit instruction */
3528     if (def->flags & TCG_OPF_VECTOR) {
3529         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3530                        new_args, const_args);
3531     } else {
3532         tcg_out_op(s, op->opc, new_args, const_args);
3533     }
3534 
3535     /* move the outputs in the correct register if needed */
3536     for(i = 0; i < nb_oargs; i++) {
3537         ts = arg_temp(op->args[i]);
3538         reg = new_args[i];
3539         if (ts->fixed_reg && ts->reg != reg) {
3540             tcg_out_mov(s, ts->type, ts->reg, reg);
3541         }
3542         if (NEED_SYNC_ARG(i)) {
3543             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3544         } else if (IS_DEAD_ARG(i)) {
3545             temp_dead(s, ts);
3546         }
3547     }
3548 }
3549 
3550 #ifdef TCG_TARGET_STACK_GROWSUP
3551 #define STACK_DIR(x) (-(x))
3552 #else
3553 #define STACK_DIR(x) (x)
3554 #endif
3555 
3556 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3557 {
3558     const int nb_oargs = TCGOP_CALLO(op);
3559     const int nb_iargs = TCGOP_CALLI(op);
3560     const TCGLifeData arg_life = op->life;
3561     int flags, nb_regs, i;
3562     TCGReg reg;
3563     TCGArg arg;
3564     TCGTemp *ts;
3565     intptr_t stack_offset;
3566     size_t call_stack_size;
3567     tcg_insn_unit *func_addr;
3568     int allocate_args;
3569     TCGRegSet allocated_regs;
3570 
3571     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3572     flags = op->args[nb_oargs + nb_iargs + 1];
3573 
3574     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3575     if (nb_regs > nb_iargs) {
3576         nb_regs = nb_iargs;
3577     }
3578 
3579     /* assign stack slots first */
3580     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3581     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3582         ~(TCG_TARGET_STACK_ALIGN - 1);
3583     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3584     if (allocate_args) {
3585         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3586            preallocate call stack */
3587         tcg_abort();
3588     }
3589 
3590     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3591     for (i = nb_regs; i < nb_iargs; i++) {
3592         arg = op->args[nb_oargs + i];
3593 #ifdef TCG_TARGET_STACK_GROWSUP
3594         stack_offset -= sizeof(tcg_target_long);
3595 #endif
3596         if (arg != TCG_CALL_DUMMY_ARG) {
3597             ts = arg_temp(arg);
3598             temp_load(s, ts, tcg_target_available_regs[ts->type],
3599                       s->reserved_regs, 0);
3600             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3601         }
3602 #ifndef TCG_TARGET_STACK_GROWSUP
3603         stack_offset += sizeof(tcg_target_long);
3604 #endif
3605     }
3606 
3607     /* assign input registers */
3608     allocated_regs = s->reserved_regs;
3609     for (i = 0; i < nb_regs; i++) {
3610         arg = op->args[nb_oargs + i];
3611         if (arg != TCG_CALL_DUMMY_ARG) {
3612             ts = arg_temp(arg);
3613             reg = tcg_target_call_iarg_regs[i];
3614 
3615             if (ts->val_type == TEMP_VAL_REG) {
3616                 if (ts->reg != reg) {
3617                     tcg_reg_free(s, reg, allocated_regs);
3618                     tcg_out_mov(s, ts->type, reg, ts->reg);
3619                 }
3620             } else {
3621                 TCGRegSet arg_set = 0;
3622 
3623                 tcg_reg_free(s, reg, allocated_regs);
3624                 tcg_regset_set_reg(arg_set, reg);
3625                 temp_load(s, ts, arg_set, allocated_regs, 0);
3626             }
3627 
3628             tcg_regset_set_reg(allocated_regs, reg);
3629         }
3630     }
3631 
3632     /* mark dead temporaries and free the associated registers */
3633     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3634         if (IS_DEAD_ARG(i)) {
3635             temp_dead(s, arg_temp(op->args[i]));
3636         }
3637     }
3638 
3639     /* clobber call registers */
3640     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3641         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3642             tcg_reg_free(s, i, allocated_regs);
3643         }
3644     }
3645 
3646     /* Save globals if they might be written by the helper, sync them if
3647        they might be read. */
3648     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3649         /* Nothing to do */
3650     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3651         sync_globals(s, allocated_regs);
3652     } else {
3653         save_globals(s, allocated_regs);
3654     }
3655 
3656     tcg_out_call(s, func_addr);
3657 
3658     /* assign output registers and emit moves if needed */
3659     for(i = 0; i < nb_oargs; i++) {
3660         arg = op->args[i];
3661         ts = arg_temp(arg);
3662         reg = tcg_target_call_oarg_regs[i];
3663         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3664 
3665         if (ts->fixed_reg) {
3666             if (ts->reg != reg) {
3667                 tcg_out_mov(s, ts->type, ts->reg, reg);
3668             }
3669         } else {
3670             if (ts->val_type == TEMP_VAL_REG) {
3671                 s->reg_to_temp[ts->reg] = NULL;
3672             }
3673             ts->val_type = TEMP_VAL_REG;
3674             ts->reg = reg;
3675             ts->mem_coherent = 0;
3676             s->reg_to_temp[reg] = ts;
3677             if (NEED_SYNC_ARG(i)) {
3678                 temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3679             } else if (IS_DEAD_ARG(i)) {
3680                 temp_dead(s, ts);
3681             }
3682         }
3683     }
3684 }
3685 
3686 #ifdef CONFIG_PROFILER
3687 
3688 /* avoid copy/paste errors */
3689 #define PROF_ADD(to, from, field)                       \
3690     do {                                                \
3691         (to)->field += atomic_read(&((from)->field));   \
3692     } while (0)
3693 
3694 #define PROF_MAX(to, from, field)                                       \
3695     do {                                                                \
3696         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3697         if (val__ > (to)->field) {                                      \
3698             (to)->field = val__;                                        \
3699         }                                                               \
3700     } while (0)
3701 
3702 /* Pass in a zero'ed @prof */
3703 static inline
3704 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3705 {
3706     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3707     unsigned int i;
3708 
3709     for (i = 0; i < n_ctxs; i++) {
3710         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3711         const TCGProfile *orig = &s->prof;
3712 
3713         if (counters) {
3714             PROF_ADD(prof, orig, cpu_exec_time);
3715             PROF_ADD(prof, orig, tb_count1);
3716             PROF_ADD(prof, orig, tb_count);
3717             PROF_ADD(prof, orig, op_count);
3718             PROF_MAX(prof, orig, op_count_max);
3719             PROF_ADD(prof, orig, temp_count);
3720             PROF_MAX(prof, orig, temp_count_max);
3721             PROF_ADD(prof, orig, del_op_count);
3722             PROF_ADD(prof, orig, code_in_len);
3723             PROF_ADD(prof, orig, code_out_len);
3724             PROF_ADD(prof, orig, search_out_len);
3725             PROF_ADD(prof, orig, interm_time);
3726             PROF_ADD(prof, orig, code_time);
3727             PROF_ADD(prof, orig, la_time);
3728             PROF_ADD(prof, orig, opt_time);
3729             PROF_ADD(prof, orig, restore_count);
3730             PROF_ADD(prof, orig, restore_time);
3731         }
3732         if (table) {
3733             int i;
3734 
3735             for (i = 0; i < NB_OPS; i++) {
3736                 PROF_ADD(prof, orig, table_op_count[i]);
3737             }
3738         }
3739     }
3740 }
3741 
3742 #undef PROF_ADD
3743 #undef PROF_MAX
3744 
3745 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3746 {
3747     tcg_profile_snapshot(prof, true, false);
3748 }
3749 
3750 static void tcg_profile_snapshot_table(TCGProfile *prof)
3751 {
3752     tcg_profile_snapshot(prof, false, true);
3753 }
3754 
3755 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3756 {
3757     TCGProfile prof = {};
3758     int i;
3759 
3760     tcg_profile_snapshot_table(&prof);
3761     for (i = 0; i < NB_OPS; i++) {
3762         cpu_fprintf(f, "%s %" PRId64 "\n", tcg_op_defs[i].name,
3763                     prof.table_op_count[i]);
3764     }
3765 }
3766 
3767 int64_t tcg_cpu_exec_time(void)
3768 {
3769     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3770     unsigned int i;
3771     int64_t ret = 0;
3772 
3773     for (i = 0; i < n_ctxs; i++) {
3774         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
3775         const TCGProfile *prof = &s->prof;
3776 
3777         ret += atomic_read(&prof->cpu_exec_time);
3778     }
3779     return ret;
3780 }
3781 #else
3782 void tcg_dump_op_count(FILE *f, fprintf_function cpu_fprintf)
3783 {
3784     cpu_fprintf(f, "[TCG profiler not compiled]\n");
3785 }
3786 
3787 int64_t tcg_cpu_exec_time(void)
3788 {
3789     error_report("%s: TCG profiler not compiled", __func__);
3790     exit(EXIT_FAILURE);
3791 }
3792 #endif
3793 
3794 
3795 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
3796 {
3797 #ifdef CONFIG_PROFILER
3798     TCGProfile *prof = &s->prof;
3799 #endif
3800     int i, num_insns;
3801     TCGOp *op;
3802 
3803 #ifdef CONFIG_PROFILER
3804     {
3805         int n = 0;
3806 
3807         QTAILQ_FOREACH(op, &s->ops, link) {
3808             n++;
3809         }
3810         atomic_set(&prof->op_count, prof->op_count + n);
3811         if (n > prof->op_count_max) {
3812             atomic_set(&prof->op_count_max, n);
3813         }
3814 
3815         n = s->nb_temps;
3816         atomic_set(&prof->temp_count, prof->temp_count + n);
3817         if (n > prof->temp_count_max) {
3818             atomic_set(&prof->temp_count_max, n);
3819         }
3820     }
3821 #endif
3822 
3823 #ifdef DEBUG_DISAS
3824     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
3825                  && qemu_log_in_addr_range(tb->pc))) {
3826         qemu_log_lock();
3827         qemu_log("OP:\n");
3828         tcg_dump_ops(s, false);
3829         qemu_log("\n");
3830         qemu_log_unlock();
3831     }
3832 #endif
3833 
3834 #ifdef CONFIG_PROFILER
3835     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
3836 #endif
3837 
3838 #ifdef USE_TCG_OPTIMIZATIONS
3839     tcg_optimize(s);
3840 #endif
3841 
3842 #ifdef CONFIG_PROFILER
3843     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
3844     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
3845 #endif
3846 
3847     reachable_code_pass(s);
3848     liveness_pass_1(s);
3849 
3850     if (s->nb_indirects > 0) {
3851 #ifdef DEBUG_DISAS
3852         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
3853                      && qemu_log_in_addr_range(tb->pc))) {
3854             qemu_log_lock();
3855             qemu_log("OP before indirect lowering:\n");
3856             tcg_dump_ops(s, false);
3857             qemu_log("\n");
3858             qemu_log_unlock();
3859         }
3860 #endif
3861         /* Replace indirect temps with direct temps.  */
3862         if (liveness_pass_2(s)) {
3863             /* If changes were made, re-run liveness.  */
3864             liveness_pass_1(s);
3865         }
3866     }
3867 
3868 #ifdef CONFIG_PROFILER
3869     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
3870 #endif
3871 
3872 #ifdef DEBUG_DISAS
3873     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
3874                  && qemu_log_in_addr_range(tb->pc))) {
3875         qemu_log_lock();
3876         qemu_log("OP after optimization and liveness analysis:\n");
3877         tcg_dump_ops(s, true);
3878         qemu_log("\n");
3879         qemu_log_unlock();
3880     }
3881 #endif
3882 
3883     tcg_reg_alloc_start(s);
3884 
3885     s->code_buf = tb->tc.ptr;
3886     s->code_ptr = tb->tc.ptr;
3887 
3888 #ifdef TCG_TARGET_NEED_LDST_LABELS
3889     QSIMPLEQ_INIT(&s->ldst_labels);
3890 #endif
3891 #ifdef TCG_TARGET_NEED_POOL_LABELS
3892     s->pool_labels = NULL;
3893 #endif
3894 
3895     num_insns = -1;
3896     QTAILQ_FOREACH(op, &s->ops, link) {
3897         TCGOpcode opc = op->opc;
3898 
3899 #ifdef CONFIG_PROFILER
3900         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
3901 #endif
3902 
3903         switch (opc) {
3904         case INDEX_op_mov_i32:
3905         case INDEX_op_mov_i64:
3906         case INDEX_op_mov_vec:
3907             tcg_reg_alloc_mov(s, op);
3908             break;
3909         case INDEX_op_movi_i32:
3910         case INDEX_op_movi_i64:
3911         case INDEX_op_dupi_vec:
3912             tcg_reg_alloc_movi(s, op);
3913             break;
3914         case INDEX_op_insn_start:
3915             if (num_insns >= 0) {
3916                 size_t off = tcg_current_code_size(s);
3917                 s->gen_insn_end_off[num_insns] = off;
3918                 /* Assert that we do not overflow our stored offset.  */
3919                 assert(s->gen_insn_end_off[num_insns] == off);
3920             }
3921             num_insns++;
3922             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
3923                 target_ulong a;
3924 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
3925                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
3926 #else
3927                 a = op->args[i];
3928 #endif
3929                 s->gen_insn_data[num_insns][i] = a;
3930             }
3931             break;
3932         case INDEX_op_discard:
3933             temp_dead(s, arg_temp(op->args[0]));
3934             break;
3935         case INDEX_op_set_label:
3936             tcg_reg_alloc_bb_end(s, s->reserved_regs);
3937             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
3938             break;
3939         case INDEX_op_call:
3940             tcg_reg_alloc_call(s, op);
3941             break;
3942         default:
3943             /* Sanity check that we've not introduced any unhandled opcodes. */
3944             tcg_debug_assert(tcg_op_supported(opc));
3945             /* Note: in order to speed up the code, it would be much
3946                faster to have specialized register allocator functions for
3947                some common argument patterns */
3948             tcg_reg_alloc_op(s, op);
3949             break;
3950         }
3951 #ifdef CONFIG_DEBUG_TCG
3952         check_regs(s);
3953 #endif
3954         /* Test for (pending) buffer overflow.  The assumption is that any
3955            one operation beginning below the high water mark cannot overrun
3956            the buffer completely.  Thus we can test for overflow after
3957            generating code without having to check during generation.  */
3958         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
3959             return -1;
3960         }
3961     }
3962     tcg_debug_assert(num_insns >= 0);
3963     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
3964 
3965     /* Generate TB finalization at the end of block */
3966 #ifdef TCG_TARGET_NEED_LDST_LABELS
3967     if (!tcg_out_ldst_finalize(s)) {
3968         return -1;
3969     }
3970 #endif
3971 #ifdef TCG_TARGET_NEED_POOL_LABELS
3972     if (!tcg_out_pool_finalize(s)) {
3973         return -1;
3974     }
3975 #endif
3976 
3977     /* flush instruction cache */
3978     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
3979 
3980     return tcg_current_code_size(s);
3981 }
3982 
3983 #ifdef CONFIG_PROFILER
3984 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
3985 {
3986     TCGProfile prof = {};
3987     const TCGProfile *s;
3988     int64_t tb_count;
3989     int64_t tb_div_count;
3990     int64_t tot;
3991 
3992     tcg_profile_snapshot_counters(&prof);
3993     s = &prof;
3994     tb_count = s->tb_count;
3995     tb_div_count = tb_count ? tb_count : 1;
3996     tot = s->interm_time + s->code_time;
3997 
3998     cpu_fprintf(f, "JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
3999                 tot, tot / 2.4e9);
4000     cpu_fprintf(f, "translated TBs      %" PRId64 " (aborted=%" PRId64 " %0.1f%%)\n",
4001                 tb_count, s->tb_count1 - tb_count,
4002                 (double)(s->tb_count1 - s->tb_count)
4003                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4004     cpu_fprintf(f, "avg ops/TB          %0.1f max=%d\n",
4005                 (double)s->op_count / tb_div_count, s->op_count_max);
4006     cpu_fprintf(f, "deleted ops/TB      %0.2f\n",
4007                 (double)s->del_op_count / tb_div_count);
4008     cpu_fprintf(f, "avg temps/TB        %0.2f max=%d\n",
4009                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4010     cpu_fprintf(f, "avg host code/TB    %0.1f\n",
4011                 (double)s->code_out_len / tb_div_count);
4012     cpu_fprintf(f, "avg search data/TB  %0.1f\n",
4013                 (double)s->search_out_len / tb_div_count);
4014 
4015     cpu_fprintf(f, "cycles/op           %0.1f\n",
4016                 s->op_count ? (double)tot / s->op_count : 0);
4017     cpu_fprintf(f, "cycles/in byte      %0.1f\n",
4018                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4019     cpu_fprintf(f, "cycles/out byte     %0.1f\n",
4020                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4021     cpu_fprintf(f, "cycles/search byte     %0.1f\n",
4022                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4023     if (tot == 0) {
4024         tot = 1;
4025     }
4026     cpu_fprintf(f, "  gen_interm time   %0.1f%%\n",
4027                 (double)s->interm_time / tot * 100.0);
4028     cpu_fprintf(f, "  gen_code time     %0.1f%%\n",
4029                 (double)s->code_time / tot * 100.0);
4030     cpu_fprintf(f, "optim./code time    %0.1f%%\n",
4031                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4032                 * 100.0);
4033     cpu_fprintf(f, "liveness/code time  %0.1f%%\n",
4034                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4035     cpu_fprintf(f, "cpu_restore count   %" PRId64 "\n",
4036                 s->restore_count);
4037     cpu_fprintf(f, "  avg cycles        %0.1f\n",
4038                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4039 }
4040 #else
4041 void tcg_dump_info(FILE *f, fprintf_function cpu_fprintf)
4042 {
4043     cpu_fprintf(f, "[TCG profiler not compiled]\n");
4044 }
4045 #endif
4046 
4047 #ifdef ELF_HOST_MACHINE
4048 /* In order to use this feature, the backend needs to do three things:
4049 
4050    (1) Define ELF_HOST_MACHINE to indicate both what value to
4051        put into the ELF image and to indicate support for the feature.
4052 
4053    (2) Define tcg_register_jit.  This should create a buffer containing
4054        the contents of a .debug_frame section that describes the post-
4055        prologue unwind info for the tcg machine.
4056 
4057    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4058 */
4059 
4060 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4061 typedef enum {
4062     JIT_NOACTION = 0,
4063     JIT_REGISTER_FN,
4064     JIT_UNREGISTER_FN
4065 } jit_actions_t;
4066 
4067 struct jit_code_entry {
4068     struct jit_code_entry *next_entry;
4069     struct jit_code_entry *prev_entry;
4070     const void *symfile_addr;
4071     uint64_t symfile_size;
4072 };
4073 
4074 struct jit_descriptor {
4075     uint32_t version;
4076     uint32_t action_flag;
4077     struct jit_code_entry *relevant_entry;
4078     struct jit_code_entry *first_entry;
4079 };
4080 
4081 void __jit_debug_register_code(void) __attribute__((noinline));
4082 void __jit_debug_register_code(void)
4083 {
4084     asm("");
4085 }
4086 
4087 /* Must statically initialize the version, because GDB may check
4088    the version before we can set it.  */
4089 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4090 
4091 /* End GDB interface.  */
4092 
4093 static int find_string(const char *strtab, const char *str)
4094 {
4095     const char *p = strtab + 1;
4096 
4097     while (1) {
4098         if (strcmp(p, str) == 0) {
4099             return p - strtab;
4100         }
4101         p += strlen(p) + 1;
4102     }
4103 }
4104 
4105 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4106                                  const void *debug_frame,
4107                                  size_t debug_frame_size)
4108 {
4109     struct __attribute__((packed)) DebugInfo {
4110         uint32_t  len;
4111         uint16_t  version;
4112         uint32_t  abbrev;
4113         uint8_t   ptr_size;
4114         uint8_t   cu_die;
4115         uint16_t  cu_lang;
4116         uintptr_t cu_low_pc;
4117         uintptr_t cu_high_pc;
4118         uint8_t   fn_die;
4119         char      fn_name[16];
4120         uintptr_t fn_low_pc;
4121         uintptr_t fn_high_pc;
4122         uint8_t   cu_eoc;
4123     };
4124 
4125     struct ElfImage {
4126         ElfW(Ehdr) ehdr;
4127         ElfW(Phdr) phdr;
4128         ElfW(Shdr) shdr[7];
4129         ElfW(Sym)  sym[2];
4130         struct DebugInfo di;
4131         uint8_t    da[24];
4132         char       str[80];
4133     };
4134 
4135     struct ElfImage *img;
4136 
4137     static const struct ElfImage img_template = {
4138         .ehdr = {
4139             .e_ident[EI_MAG0] = ELFMAG0,
4140             .e_ident[EI_MAG1] = ELFMAG1,
4141             .e_ident[EI_MAG2] = ELFMAG2,
4142             .e_ident[EI_MAG3] = ELFMAG3,
4143             .e_ident[EI_CLASS] = ELF_CLASS,
4144             .e_ident[EI_DATA] = ELF_DATA,
4145             .e_ident[EI_VERSION] = EV_CURRENT,
4146             .e_type = ET_EXEC,
4147             .e_machine = ELF_HOST_MACHINE,
4148             .e_version = EV_CURRENT,
4149             .e_phoff = offsetof(struct ElfImage, phdr),
4150             .e_shoff = offsetof(struct ElfImage, shdr),
4151             .e_ehsize = sizeof(ElfW(Shdr)),
4152             .e_phentsize = sizeof(ElfW(Phdr)),
4153             .e_phnum = 1,
4154             .e_shentsize = sizeof(ElfW(Shdr)),
4155             .e_shnum = ARRAY_SIZE(img->shdr),
4156             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4157 #ifdef ELF_HOST_FLAGS
4158             .e_flags = ELF_HOST_FLAGS,
4159 #endif
4160 #ifdef ELF_OSABI
4161             .e_ident[EI_OSABI] = ELF_OSABI,
4162 #endif
4163         },
4164         .phdr = {
4165             .p_type = PT_LOAD,
4166             .p_flags = PF_X,
4167         },
4168         .shdr = {
4169             [0] = { .sh_type = SHT_NULL },
4170             /* Trick: The contents of code_gen_buffer are not present in
4171                this fake ELF file; that got allocated elsewhere.  Therefore
4172                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4173                will not look for contents.  We can record any address.  */
4174             [1] = { /* .text */
4175                 .sh_type = SHT_NOBITS,
4176                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4177             },
4178             [2] = { /* .debug_info */
4179                 .sh_type = SHT_PROGBITS,
4180                 .sh_offset = offsetof(struct ElfImage, di),
4181                 .sh_size = sizeof(struct DebugInfo),
4182             },
4183             [3] = { /* .debug_abbrev */
4184                 .sh_type = SHT_PROGBITS,
4185                 .sh_offset = offsetof(struct ElfImage, da),
4186                 .sh_size = sizeof(img->da),
4187             },
4188             [4] = { /* .debug_frame */
4189                 .sh_type = SHT_PROGBITS,
4190                 .sh_offset = sizeof(struct ElfImage),
4191             },
4192             [5] = { /* .symtab */
4193                 .sh_type = SHT_SYMTAB,
4194                 .sh_offset = offsetof(struct ElfImage, sym),
4195                 .sh_size = sizeof(img->sym),
4196                 .sh_info = 1,
4197                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4198                 .sh_entsize = sizeof(ElfW(Sym)),
4199             },
4200             [6] = { /* .strtab */
4201                 .sh_type = SHT_STRTAB,
4202                 .sh_offset = offsetof(struct ElfImage, str),
4203                 .sh_size = sizeof(img->str),
4204             }
4205         },
4206         .sym = {
4207             [1] = { /* code_gen_buffer */
4208                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4209                 .st_shndx = 1,
4210             }
4211         },
4212         .di = {
4213             .len = sizeof(struct DebugInfo) - 4,
4214             .version = 2,
4215             .ptr_size = sizeof(void *),
4216             .cu_die = 1,
4217             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4218             .fn_die = 2,
4219             .fn_name = "code_gen_buffer"
4220         },
4221         .da = {
4222             1,          /* abbrev number (the cu) */
4223             0x11, 1,    /* DW_TAG_compile_unit, has children */
4224             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4225             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4226             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4227             0, 0,       /* end of abbrev */
4228             2,          /* abbrev number (the fn) */
4229             0x2e, 0,    /* DW_TAG_subprogram, no children */
4230             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4231             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4232             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4233             0, 0,       /* end of abbrev */
4234             0           /* no more abbrev */
4235         },
4236         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4237                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4238     };
4239 
4240     /* We only need a single jit entry; statically allocate it.  */
4241     static struct jit_code_entry one_entry;
4242 
4243     uintptr_t buf = (uintptr_t)buf_ptr;
4244     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4245     DebugFrameHeader *dfh;
4246 
4247     img = g_malloc(img_size);
4248     *img = img_template;
4249 
4250     img->phdr.p_vaddr = buf;
4251     img->phdr.p_paddr = buf;
4252     img->phdr.p_memsz = buf_size;
4253 
4254     img->shdr[1].sh_name = find_string(img->str, ".text");
4255     img->shdr[1].sh_addr = buf;
4256     img->shdr[1].sh_size = buf_size;
4257 
4258     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4259     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4260 
4261     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4262     img->shdr[4].sh_size = debug_frame_size;
4263 
4264     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4265     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4266 
4267     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4268     img->sym[1].st_value = buf;
4269     img->sym[1].st_size = buf_size;
4270 
4271     img->di.cu_low_pc = buf;
4272     img->di.cu_high_pc = buf + buf_size;
4273     img->di.fn_low_pc = buf;
4274     img->di.fn_high_pc = buf + buf_size;
4275 
4276     dfh = (DebugFrameHeader *)(img + 1);
4277     memcpy(dfh, debug_frame, debug_frame_size);
4278     dfh->fde.func_start = buf;
4279     dfh->fde.func_len = buf_size;
4280 
4281 #ifdef DEBUG_JIT
4282     /* Enable this block to be able to debug the ELF image file creation.
4283        One can use readelf, objdump, or other inspection utilities.  */
4284     {
4285         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4286         if (f) {
4287             if (fwrite(img, img_size, 1, f) != img_size) {
4288                 /* Avoid stupid unused return value warning for fwrite.  */
4289             }
4290             fclose(f);
4291         }
4292     }
4293 #endif
4294 
4295     one_entry.symfile_addr = img;
4296     one_entry.symfile_size = img_size;
4297 
4298     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4299     __jit_debug_descriptor.relevant_entry = &one_entry;
4300     __jit_debug_descriptor.first_entry = &one_entry;
4301     __jit_debug_register_code();
4302 }
4303 #else
4304 /* No support for the feature.  Provide the entry point expected by exec.c,
4305    and implement the internal function we declared earlier.  */
4306 
4307 static void tcg_register_jit_int(void *buf, size_t size,
4308                                  const void *debug_frame,
4309                                  size_t debug_frame_size)
4310 {
4311 }
4312 
4313 void tcg_register_jit(void *buf, size_t buf_size)
4314 {
4315 }
4316 #endif /* ELF_HOST_MACHINE */
4317 
4318 #if !TCG_TARGET_MAYBE_vec
4319 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4320 {
4321     g_assert_not_reached();
4322 }
4323 #endif
4324