1 /*
2  * Tiny Code Generator for QEMU
3  *
4  * Copyright (c) 2008 Fabrice Bellard
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a copy
7  * of this software and associated documentation files (the "Software"), to deal
8  * in the Software without restriction, including without limitation the rights
9  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
10  * copies of the Software, and to permit persons to whom the Software is
11  * furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice shall be included in
14  * all copies or substantial portions of the Software.
15  *
16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
21  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22  * THE SOFTWARE.
23  */
24 
25 /* define it to use liveness analysis (better code) */
26 #define USE_TCG_OPTIMIZATIONS
27 
28 #include "qemu/osdep.h"
29 
30 /* Define to jump the ELF file used to communicate with GDB.  */
31 #undef DEBUG_JIT
32 
33 #include "qemu/error-report.h"
34 #include "qemu/cutils.h"
35 #include "qemu/host-utils.h"
36 #include "qemu/qemu-print.h"
37 #include "qemu/timer.h"
38 
39 /* Note: the long term plan is to reduce the dependencies on the QEMU
40    CPU definitions. Currently they are used for qemu_ld/st
41    instructions */
42 #define NO_CPU_IO_DEFS
43 #include "cpu.h"
44 
45 #include "exec/exec-all.h"
46 
47 #if !defined(CONFIG_USER_ONLY)
48 #include "hw/boards.h"
49 #endif
50 
51 #include "tcg/tcg-op.h"
52 
53 #if UINTPTR_MAX == UINT32_MAX
54 # define ELF_CLASS  ELFCLASS32
55 #else
56 # define ELF_CLASS  ELFCLASS64
57 #endif
58 #ifdef HOST_WORDS_BIGENDIAN
59 # define ELF_DATA   ELFDATA2MSB
60 #else
61 # define ELF_DATA   ELFDATA2LSB
62 #endif
63 
64 #include "elf.h"
65 #include "exec/log.h"
66 #include "sysemu/sysemu.h"
67 
68 /* Forward declarations for functions declared in tcg-target.inc.c and
69    used here. */
70 static void tcg_target_init(TCGContext *s);
71 static const TCGTargetOpDef *tcg_target_op_def(TCGOpcode);
72 static void tcg_target_qemu_prologue(TCGContext *s);
73 static bool patch_reloc(tcg_insn_unit *code_ptr, int type,
74                         intptr_t value, intptr_t addend);
75 
76 /* The CIE and FDE header definitions will be common to all hosts.  */
77 typedef struct {
78     uint32_t len __attribute__((aligned((sizeof(void *)))));
79     uint32_t id;
80     uint8_t version;
81     char augmentation[1];
82     uint8_t code_align;
83     uint8_t data_align;
84     uint8_t return_column;
85 } DebugFrameCIE;
86 
87 typedef struct QEMU_PACKED {
88     uint32_t len __attribute__((aligned((sizeof(void *)))));
89     uint32_t cie_offset;
90     uintptr_t func_start;
91     uintptr_t func_len;
92 } DebugFrameFDEHeader;
93 
94 typedef struct QEMU_PACKED {
95     DebugFrameCIE cie;
96     DebugFrameFDEHeader fde;
97 } DebugFrameHeader;
98 
99 static void tcg_register_jit_int(void *buf, size_t size,
100                                  const void *debug_frame,
101                                  size_t debug_frame_size)
102     __attribute__((unused));
103 
104 /* Forward declarations for functions declared and used in tcg-target.inc.c. */
105 static const char *target_parse_constraint(TCGArgConstraint *ct,
106                                            const char *ct_str, TCGType type);
107 static void tcg_out_ld(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg1,
108                        intptr_t arg2);
109 static bool tcg_out_mov(TCGContext *s, TCGType type, TCGReg ret, TCGReg arg);
110 static void tcg_out_movi(TCGContext *s, TCGType type,
111                          TCGReg ret, tcg_target_long arg);
112 static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
113                        const int *const_args);
114 #if TCG_TARGET_MAYBE_vec
115 static bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
116                             TCGReg dst, TCGReg src);
117 static bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
118                              TCGReg dst, TCGReg base, intptr_t offset);
119 static void tcg_out_dupi_vec(TCGContext *s, TCGType type,
120                              TCGReg dst, tcg_target_long arg);
121 static void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
122                            unsigned vece, const TCGArg *args,
123                            const int *const_args);
124 #else
tcg_out_dup_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg dst,TCGReg src)125 static inline bool tcg_out_dup_vec(TCGContext *s, TCGType type, unsigned vece,
126                                    TCGReg dst, TCGReg src)
127 {
128     g_assert_not_reached();
129 }
tcg_out_dupm_vec(TCGContext * s,TCGType type,unsigned vece,TCGReg dst,TCGReg base,intptr_t offset)130 static inline bool tcg_out_dupm_vec(TCGContext *s, TCGType type, unsigned vece,
131                                     TCGReg dst, TCGReg base, intptr_t offset)
132 {
133     g_assert_not_reached();
134 }
tcg_out_dupi_vec(TCGContext * s,TCGType type,TCGReg dst,tcg_target_long arg)135 static inline void tcg_out_dupi_vec(TCGContext *s, TCGType type,
136                                     TCGReg dst, tcg_target_long arg)
137 {
138     g_assert_not_reached();
139 }
tcg_out_vec_op(TCGContext * s,TCGOpcode opc,unsigned vecl,unsigned vece,const TCGArg * args,const int * const_args)140 static inline void tcg_out_vec_op(TCGContext *s, TCGOpcode opc, unsigned vecl,
141                                   unsigned vece, const TCGArg *args,
142                                   const int *const_args)
143 {
144     g_assert_not_reached();
145 }
146 #endif
147 static void tcg_out_st(TCGContext *s, TCGType type, TCGReg arg, TCGReg arg1,
148                        intptr_t arg2);
149 static bool tcg_out_sti(TCGContext *s, TCGType type, TCGArg val,
150                         TCGReg base, intptr_t ofs);
151 static void tcg_out_call(TCGContext *s, tcg_insn_unit *target);
152 static int tcg_target_const_match(tcg_target_long val, TCGType type,
153                                   const TCGArgConstraint *arg_ct);
154 #ifdef TCG_TARGET_NEED_LDST_LABELS
155 static int tcg_out_ldst_finalize(TCGContext *s);
156 #endif
157 
158 #define TCG_HIGHWATER 1024
159 
160 static TCGContext **tcg_ctxs;
161 static unsigned int n_tcg_ctxs;
162 TCGv_env cpu_env = 0;
163 #ifdef CONFIG_DEBUG_TCG
164 TCGv _pc_is_current = 0;
165 #endif
166 
167 struct tcg_region_tree {
168     QemuMutex lock;
169     GTree *tree;
170     /* padding to avoid false sharing is computed at run-time */
171 };
172 
173 /*
174  * We divide code_gen_buffer into equally-sized "regions" that TCG threads
175  * dynamically allocate from as demand dictates. Given appropriate region
176  * sizing, this minimizes flushes even when some TCG threads generate a lot
177  * more code than others.
178  */
179 struct tcg_region_state {
180     QemuMutex lock;
181 
182     /* fields set at init time */
183     void *start;
184     void *start_aligned;
185     void *end;
186     size_t n;
187     size_t size; /* size of one region */
188     size_t stride; /* .size + guard size */
189 
190     /* fields protected by the lock */
191     size_t current; /* current region index */
192     size_t agg_size_full; /* aggregate size of full regions */
193 };
194 
195 static struct tcg_region_state region;
196 /*
197  * This is an array of struct tcg_region_tree's, with padding.
198  * We use void * to simplify the computation of region_trees[i]; each
199  * struct is found every tree_size bytes.
200  */
201 static void *region_trees;
202 static size_t tree_size;
203 static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
204 static TCGRegSet tcg_target_call_clobber_regs;
205 
206 #if TCG_TARGET_INSN_UNIT_SIZE == 1
tcg_out8(TCGContext * s,uint8_t v)207 static __attribute__((unused)) inline void tcg_out8(TCGContext *s, uint8_t v)
208 {
209     *s->code_ptr++ = v;
210 }
211 
tcg_patch8(tcg_insn_unit * p,uint8_t v)212 static __attribute__((unused)) inline void tcg_patch8(tcg_insn_unit *p,
213                                                       uint8_t v)
214 {
215     *p = v;
216 }
217 #endif
218 
219 #if TCG_TARGET_INSN_UNIT_SIZE <= 2
tcg_out16(TCGContext * s,uint16_t v)220 static __attribute__((unused)) inline void tcg_out16(TCGContext *s, uint16_t v)
221 {
222     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
223         *s->code_ptr++ = v;
224     } else {
225         tcg_insn_unit *p = s->code_ptr;
226         memcpy(p, &v, sizeof(v));
227         s->code_ptr = p + (2 / TCG_TARGET_INSN_UNIT_SIZE);
228     }
229 }
230 
tcg_patch16(tcg_insn_unit * p,uint16_t v)231 static __attribute__((unused)) inline void tcg_patch16(tcg_insn_unit *p,
232                                                        uint16_t v)
233 {
234     if (TCG_TARGET_INSN_UNIT_SIZE == 2) {
235         *p = v;
236     } else {
237         memcpy(p, &v, sizeof(v));
238     }
239 }
240 #endif
241 
242 #if TCG_TARGET_INSN_UNIT_SIZE <= 4
tcg_out32(TCGContext * s,uint32_t v)243 static __attribute__((unused)) inline void tcg_out32(TCGContext *s, uint32_t v)
244 {
245     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
246         *s->code_ptr++ = v;
247     } else {
248         tcg_insn_unit *p = s->code_ptr;
249         memcpy(p, &v, sizeof(v));
250         s->code_ptr = p + (4 / TCG_TARGET_INSN_UNIT_SIZE);
251     }
252 }
253 
tcg_patch32(tcg_insn_unit * p,uint32_t v)254 static __attribute__((unused)) inline void tcg_patch32(tcg_insn_unit *p,
255                                                        uint32_t v)
256 {
257     if (TCG_TARGET_INSN_UNIT_SIZE == 4) {
258         *p = v;
259     } else {
260         memcpy(p, &v, sizeof(v));
261     }
262 }
263 #endif
264 
265 #if TCG_TARGET_INSN_UNIT_SIZE <= 8
tcg_out64(TCGContext * s,uint64_t v)266 static __attribute__((unused)) inline void tcg_out64(TCGContext *s, uint64_t v)
267 {
268     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
269         *s->code_ptr++ = v;
270     } else {
271         tcg_insn_unit *p = s->code_ptr;
272         memcpy(p, &v, sizeof(v));
273         s->code_ptr = p + (8 / TCG_TARGET_INSN_UNIT_SIZE);
274     }
275 }
276 
tcg_patch64(tcg_insn_unit * p,uint64_t v)277 static __attribute__((unused)) inline void tcg_patch64(tcg_insn_unit *p,
278                                                        uint64_t v)
279 {
280     if (TCG_TARGET_INSN_UNIT_SIZE == 8) {
281         *p = v;
282     } else {
283         memcpy(p, &v, sizeof(v));
284     }
285 }
286 #endif
287 
288 /* label relocation processing */
289 
tcg_out_reloc(TCGContext * s,tcg_insn_unit * code_ptr,int type,TCGLabel * l,intptr_t addend)290 static void tcg_out_reloc(TCGContext *s, tcg_insn_unit *code_ptr, int type,
291                           TCGLabel *l, intptr_t addend)
292 {
293     TCGRelocation *r = tcg_malloc(sizeof(TCGRelocation));
294 
295     r->type = type;
296     r->ptr = code_ptr;
297     r->addend = addend;
298     QSIMPLEQ_INSERT_TAIL(&l->relocs, r, next);
299 }
300 
tcg_out_label(TCGContext * s,TCGLabel * l,tcg_insn_unit * ptr)301 static void tcg_out_label(TCGContext *s, TCGLabel *l, tcg_insn_unit *ptr)
302 {
303     tcg_debug_assert(!l->has_value);
304     l->has_value = 1;
305     l->u.value_ptr = ptr;
306 }
307 
gen_new_label(void)308 TCGLabel *gen_new_label(void)
309 {
310     TCGContext *s = tcg_ctx;
311     TCGLabel *l = tcg_malloc(sizeof(TCGLabel));
312 
313     memset(l, 0, sizeof(TCGLabel));
314     l->id = s->nb_labels++;
315     QSIMPLEQ_INIT(&l->relocs);
316 
317     QSIMPLEQ_INSERT_TAIL(&s->labels, l, next);
318 
319     return l;
320 }
321 
tcg_resolve_relocs(TCGContext * s)322 static bool tcg_resolve_relocs(TCGContext *s)
323 {
324     TCGLabel *l;
325 
326     QSIMPLEQ_FOREACH(l, &s->labels, next) {
327         TCGRelocation *r;
328         uintptr_t value = l->u.value;
329 
330         QSIMPLEQ_FOREACH(r, &l->relocs, next) {
331             if (!patch_reloc(r->ptr, r->type, value, r->addend)) {
332                 return false;
333             }
334         }
335     }
336     return true;
337 }
338 
set_jmp_reset_offset(TCGContext * s,int which)339 static void set_jmp_reset_offset(TCGContext *s, int which)
340 {
341     size_t off = tcg_current_code_size(s);
342     s->tb_jmp_reset_offset[which] = off;
343     /* Make sure that we didn't overflow the stored offset.  */
344     assert(s->tb_jmp_reset_offset[which] == off);
345 }
346 
347 #include "tcg-target.inc.c"
348 
349 /* compare a pointer @ptr and a tb_tc @s */
ptr_cmp_tb_tc(const void * ptr,const struct tb_tc * s)350 static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
351 {
352     if (ptr >= s->ptr + s->size) {
353         return 1;
354     } else if (ptr < s->ptr) {
355         return -1;
356     }
357     return 0;
358 }
359 
tb_tc_cmp(gconstpointer ap,gconstpointer bp)360 static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
361 {
362     const struct tb_tc *a = ap;
363     const struct tb_tc *b = bp;
364 
365     /*
366      * When both sizes are set, we know this isn't a lookup.
367      * This is the most likely case: every TB must be inserted; lookups
368      * are a lot less frequent.
369      */
370     if (likely(a->size && b->size)) {
371         if (a->ptr > b->ptr) {
372             return 1;
373         } else if (a->ptr < b->ptr) {
374             return -1;
375         }
376         /* a->ptr == b->ptr should happen only on deletions */
377         g_assert(a->size == b->size);
378         return 0;
379     }
380     /*
381      * All lookups have either .size field set to 0.
382      * From the glib sources we see that @ap is always the lookup key. However
383      * the docs provide no guarantee, so we just mark this case as likely.
384      */
385     if (likely(a->size == 0)) {
386         return ptr_cmp_tb_tc(a->ptr, b);
387     }
388     return ptr_cmp_tb_tc(b->ptr, a);
389 }
390 
tcg_region_trees_init(void)391 static void tcg_region_trees_init(void)
392 {
393     size_t i;
394 
395     tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
396     region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
397     for (i = 0; i < region.n; i++) {
398         struct tcg_region_tree *rt = region_trees + i * tree_size;
399 
400         qemu_mutex_init(&rt->lock);
401         rt->tree = g_tree_new(tb_tc_cmp);
402     }
403 }
404 
tc_ptr_to_region_tree(void * p)405 static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
406 {
407     size_t region_idx;
408 
409     if (p < region.start_aligned) {
410         region_idx = 0;
411     } else {
412         ptrdiff_t offset = p - region.start_aligned;
413 
414         if (offset > region.stride * (region.n - 1)) {
415             region_idx = region.n - 1;
416         } else {
417             region_idx = offset / region.stride;
418         }
419     }
420     return region_trees + region_idx * tree_size;
421 }
422 
tcg_tb_insert(TranslationBlock * tb)423 void tcg_tb_insert(TranslationBlock *tb)
424 {
425     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
426 
427     qemu_mutex_lock(&rt->lock);
428     g_tree_insert(rt->tree, &tb->tc, tb);
429     qemu_mutex_unlock(&rt->lock);
430 }
431 
tcg_tb_remove(TranslationBlock * tb)432 void tcg_tb_remove(TranslationBlock *tb)
433 {
434     struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
435 
436     qemu_mutex_lock(&rt->lock);
437     g_tree_remove(rt->tree, &tb->tc);
438     qemu_mutex_unlock(&rt->lock);
439 }
440 
441 /*
442  * Find the TB 'tb' such that
443  * tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
444  * Return NULL if not found.
445  */
tcg_tb_lookup(uintptr_t tc_ptr)446 TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
447 {
448     struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
449     TranslationBlock *tb;
450     struct tb_tc s = { .ptr = (void *)tc_ptr };
451 
452     qemu_mutex_lock(&rt->lock);
453     tb = g_tree_lookup(rt->tree, &s);
454     qemu_mutex_unlock(&rt->lock);
455     return tb;
456 }
457 
tcg_region_tree_lock_all(void)458 static void tcg_region_tree_lock_all(void)
459 {
460     size_t i;
461 
462     for (i = 0; i < region.n; i++) {
463         struct tcg_region_tree *rt = region_trees + i * tree_size;
464 
465         qemu_mutex_lock(&rt->lock);
466     }
467 }
468 
tcg_region_tree_unlock_all(void)469 static void tcg_region_tree_unlock_all(void)
470 {
471     size_t i;
472 
473     for (i = 0; i < region.n; i++) {
474         struct tcg_region_tree *rt = region_trees + i * tree_size;
475 
476         qemu_mutex_unlock(&rt->lock);
477     }
478 }
479 
tcg_tb_foreach(GTraverseFunc func,gpointer user_data)480 void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
481 {
482     size_t i;
483 
484     tcg_region_tree_lock_all();
485     for (i = 0; i < region.n; i++) {
486         struct tcg_region_tree *rt = region_trees + i * tree_size;
487 
488         g_tree_foreach(rt->tree, func, user_data);
489     }
490     tcg_region_tree_unlock_all();
491 }
492 
tcg_nb_tbs(void)493 size_t tcg_nb_tbs(void)
494 {
495     size_t nb_tbs = 0;
496     size_t i;
497 
498     tcg_region_tree_lock_all();
499     for (i = 0; i < region.n; i++) {
500         struct tcg_region_tree *rt = region_trees + i * tree_size;
501 
502         nb_tbs += g_tree_nnodes(rt->tree);
503     }
504     tcg_region_tree_unlock_all();
505     return nb_tbs;
506 }
507 
tcg_region_tree_reset_all(void)508 static void tcg_region_tree_reset_all(void)
509 {
510     size_t i;
511 
512     tcg_region_tree_lock_all();
513     for (i = 0; i < region.n; i++) {
514         struct tcg_region_tree *rt = region_trees + i * tree_size;
515 
516         /* Increment the refcount first so that destroy acts as a reset */
517         g_tree_ref(rt->tree);
518         g_tree_destroy(rt->tree);
519     }
520     tcg_region_tree_unlock_all();
521 }
522 
tcg_region_bounds(size_t curr_region,void ** pstart,void ** pend)523 static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
524 {
525     void *start, *end;
526 
527     start = region.start_aligned + curr_region * region.stride;
528     end = start + region.size;
529 
530     if (curr_region == 0) {
531         start = region.start;
532     }
533     if (curr_region == region.n - 1) {
534         end = region.end;
535     }
536 
537     *pstart = start;
538     *pend = end;
539 }
540 
tcg_region_assign(TCGContext * s,size_t curr_region)541 static void tcg_region_assign(TCGContext *s, size_t curr_region)
542 {
543     void *start, *end;
544 
545     tcg_region_bounds(curr_region, &start, &end);
546 
547     s->code_gen_buffer = start;
548     s->code_gen_ptr = start;
549     s->code_gen_buffer_size = end - start;
550     s->code_gen_highwater = end - TCG_HIGHWATER;
551 }
552 
tcg_region_alloc__locked(TCGContext * s)553 static bool tcg_region_alloc__locked(TCGContext *s)
554 {
555     if (region.current == region.n) {
556         return true;
557     }
558     tcg_region_assign(s, region.current);
559     region.current++;
560     return false;
561 }
562 
563 /*
564  * Request a new region once the one in use has filled up.
565  * Returns true on error.
566  */
tcg_region_alloc(TCGContext * s)567 static bool tcg_region_alloc(TCGContext *s)
568 {
569     bool err;
570     /* read the region size now; alloc__locked will overwrite it on success */
571     size_t size_full = s->code_gen_buffer_size;
572 
573     qemu_mutex_lock(&region.lock);
574     err = tcg_region_alloc__locked(s);
575     if (!err) {
576         region.agg_size_full += size_full - TCG_HIGHWATER;
577     }
578     qemu_mutex_unlock(&region.lock);
579     return err;
580 }
581 
582 /*
583  * Perform a context's first region allocation.
584  * This function does _not_ increment region.agg_size_full.
585  */
tcg_region_initial_alloc__locked(TCGContext * s)586 static inline bool tcg_region_initial_alloc__locked(TCGContext *s)
587 {
588     return tcg_region_alloc__locked(s);
589 }
590 
591 /* Call from a safe-work context */
tcg_region_reset_all(void)592 void tcg_region_reset_all(void)
593 {
594     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
595     unsigned int i;
596 
597     qemu_mutex_lock(&region.lock);
598     region.current = 0;
599     region.agg_size_full = 0;
600 
601     for (i = 0; i < n_ctxs; i++) {
602         TCGContext *s = atomic_read(&tcg_ctxs[i]);
603         bool err = tcg_region_initial_alloc__locked(s);
604 
605         g_assert(!err);
606     }
607     qemu_mutex_unlock(&region.lock);
608 
609     tcg_region_tree_reset_all();
610 }
611 
612 #ifdef CONFIG_USER_ONLY
tcg_n_regions(void)613 static size_t tcg_n_regions(void)
614 {
615     return 1;
616 }
617 #else
618 /*
619  * It is likely that some vCPUs will translate more code than others, so we
620  * first try to set more regions than max_cpus, with those regions being of
621  * reasonable size. If that's not possible we make do by evenly dividing
622  * the code_gen_buffer among the vCPUs.
623  */
tcg_n_regions(void)624 static size_t tcg_n_regions(void)
625 {
626     size_t i;
627 
628     /* Use a single region if all we have is one vCPU thread */
629 #if !defined(CONFIG_USER_ONLY)
630     MachineState *ms = MACHINE(qdev_get_machine());
631     unsigned int max_cpus = ms->smp.max_cpus;
632 #endif
633     if (max_cpus == 1 || !qemu_tcg_mttcg_enabled()) {
634         return 1;
635     }
636 
637     /* Try to have more regions than max_cpus, with each region being >= 2 MB */
638     for (i = 8; i > 0; i--) {
639         size_t regions_per_thread = i;
640         size_t region_size;
641 
642         region_size = tcg_init_ctx.code_gen_buffer_size;
643         region_size /= max_cpus * regions_per_thread;
644 
645         if (region_size >= 2 * 1024u * 1024) {
646             return max_cpus * regions_per_thread;
647         }
648     }
649     /* If we can't, then just allocate one region per vCPU thread */
650     return max_cpus;
651 }
652 #endif
653 
654 /*
655  * Initializes region partitioning.
656  *
657  * Called at init time from the parent thread (i.e. the one calling
658  * tcg_context_init), after the target's TCG globals have been set.
659  *
660  * Region partitioning works by splitting code_gen_buffer into separate regions,
661  * and then assigning regions to TCG threads so that the threads can translate
662  * code in parallel without synchronization.
663  *
664  * In softmmu the number of TCG threads is bounded by max_cpus, so we use at
665  * least max_cpus regions in MTTCG. In !MTTCG we use a single region.
666  * Note that the TCG options from the command-line (i.e. -accel accel=tcg,[...])
667  * must have been parsed before calling this function, since it calls
668  * qemu_tcg_mttcg_enabled().
669  *
670  * In user-mode we use a single region.  Having multiple regions in user-mode
671  * is not supported, because the number of vCPU threads (recall that each thread
672  * spawned by the guest corresponds to a vCPU thread) is only bounded by the
673  * OS, and usually this number is huge (tens of thousands is not uncommon).
674  * Thus, given this large bound on the number of vCPU threads and the fact
675  * that code_gen_buffer is allocated at compile-time, we cannot guarantee
676  * that the availability of at least one region per vCPU thread.
677  *
678  * However, this user-mode limitation is unlikely to be a significant problem
679  * in practice. Multi-threaded guests share most if not all of their translated
680  * code, which makes parallel code generation less appealing than in softmmu.
681  */
tcg_region_init(void)682 void tcg_region_init(void)
683 {
684     void *buf = tcg_init_ctx.code_gen_buffer;
685     void *aligned;
686     size_t size = tcg_init_ctx.code_gen_buffer_size;
687     size_t page_size = qemu_real_host_page_size;
688     size_t region_size;
689     size_t n_regions;
690     size_t i;
691 
692     n_regions = tcg_n_regions();
693 
694     /* The first region will be 'aligned - buf' bytes larger than the others */
695     aligned = QEMU_ALIGN_PTR_UP(buf, page_size);
696     g_assert(aligned < tcg_init_ctx.code_gen_buffer + size);
697     /*
698      * Make region_size a multiple of page_size, using aligned as the start.
699      * As a result of this we might end up with a few extra pages at the end of
700      * the buffer; we will assign those to the last region.
701      */
702     region_size = (size - (aligned - buf)) / n_regions;
703     region_size = QEMU_ALIGN_DOWN(region_size, page_size);
704 
705     /* A region must have at least 2 pages; one code, one guard */
706     g_assert(region_size >= 2 * page_size);
707 
708     /* init the region struct */
709     qemu_mutex_init(&region.lock);
710     region.n = n_regions;
711     region.size = region_size - page_size;
712     region.stride = region_size;
713     region.start = buf;
714     region.start_aligned = aligned;
715     /* page-align the end, since its last page will be a guard page */
716     region.end = QEMU_ALIGN_PTR_DOWN(buf + size, page_size);
717     /* account for that last guard page */
718     region.end -= page_size;
719 
720     /* set guard pages */
721     for (i = 0; i < region.n; i++) {
722         void *start, *end;
723         int rc;
724 
725         tcg_region_bounds(i, &start, &end);
726         rc = qemu_mprotect_none(end, page_size);
727         g_assert(!rc);
728     }
729 
730     tcg_region_trees_init();
731 
732     /* In user-mode we support only one ctx, so do the initial allocation now */
733 #ifdef CONFIG_USER_ONLY
734     {
735         bool err = tcg_region_initial_alloc__locked(tcg_ctx);
736 
737         g_assert(!err);
738     }
739 #endif
740 }
741 
alloc_tcg_plugin_context(TCGContext * s)742 static void alloc_tcg_plugin_context(TCGContext *s)
743 {
744 #ifdef CONFIG_PLUGIN
745     s->plugin_tb = g_new0(struct qemu_plugin_tb, 1);
746     s->plugin_tb->insns =
747         g_ptr_array_new_with_free_func(qemu_plugin_insn_cleanup_fn);
748 #endif
749 }
750 
751 /*
752  * All TCG threads except the parent (i.e. the one that called tcg_context_init
753  * and registered the target's TCG globals) must register with this function
754  * before initiating translation.
755  *
756  * In user-mode we just point tcg_ctx to tcg_init_ctx. See the documentation
757  * of tcg_region_init() for the reasoning behind this.
758  *
759  * In softmmu each caller registers its context in tcg_ctxs[]. Note that in
760  * softmmu tcg_ctxs[] does not track tcg_ctx_init, since the initial context
761  * is not used anymore for translation once this function is called.
762  *
763  * Not tracking tcg_init_ctx in tcg_ctxs[] in softmmu keeps code that iterates
764  * over the array (e.g. tcg_code_size() the same for both softmmu and user-mode.
765  */
766 #ifdef CONFIG_USER_ONLY
tcg_register_thread(void)767 void tcg_register_thread(void)
768 {
769     tcg_ctx = &tcg_init_ctx;
770 }
771 #else
tcg_register_thread(void)772 void tcg_register_thread(void)
773 {
774     MachineState *ms = MACHINE(qdev_get_machine());
775     TCGContext *s = g_malloc(sizeof(*s));
776     unsigned int i, n;
777     bool err;
778 
779     *s = tcg_init_ctx;
780 
781     /* Relink mem_base.  */
782     for (i = 0, n = tcg_init_ctx.nb_globals; i < n; ++i) {
783         if (tcg_init_ctx.temps[i].mem_base) {
784             ptrdiff_t b = tcg_init_ctx.temps[i].mem_base - tcg_init_ctx.temps;
785             tcg_debug_assert(b >= 0 && b < n);
786             s->temps[i].mem_base = &s->temps[b];
787         }
788     }
789 
790     /* Claim an entry in tcg_ctxs */
791     n = atomic_fetch_inc(&n_tcg_ctxs);
792     g_assert(n < ms->smp.max_cpus);
793     atomic_set(&tcg_ctxs[n], s);
794 
795     if (n > 0) {
796         alloc_tcg_plugin_context(s);
797     }
798 
799     tcg_ctx = s;
800     qemu_mutex_lock(&region.lock);
801     err = tcg_region_initial_alloc__locked(tcg_ctx);
802     g_assert(!err);
803     qemu_mutex_unlock(&region.lock);
804 }
805 #endif /* !CONFIG_USER_ONLY */
806 
807 /*
808  * Returns the size (in bytes) of all translated code (i.e. from all regions)
809  * currently in the cache.
810  * See also: tcg_code_capacity()
811  * Do not confuse with tcg_current_code_size(); that one applies to a single
812  * TCG context.
813  */
tcg_code_size(void)814 size_t tcg_code_size(void)
815 {
816     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
817     unsigned int i;
818     size_t total;
819 
820     qemu_mutex_lock(&region.lock);
821     total = region.agg_size_full;
822     for (i = 0; i < n_ctxs; i++) {
823         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
824         size_t size;
825 
826         size = atomic_read(&s->code_gen_ptr) - s->code_gen_buffer;
827         g_assert(size <= s->code_gen_buffer_size);
828         total += size;
829     }
830     qemu_mutex_unlock(&region.lock);
831     return total;
832 }
833 
834 /*
835  * Returns the code capacity (in bytes) of the entire cache, i.e. including all
836  * regions.
837  * See also: tcg_code_size()
838  */
tcg_code_capacity(void)839 size_t tcg_code_capacity(void)
840 {
841     size_t guard_size, capacity;
842 
843     /* no need for synchronization; these variables are set at init time */
844     guard_size = region.stride - region.size;
845     capacity = region.end + guard_size - region.start;
846     capacity -= region.n * (guard_size + TCG_HIGHWATER);
847     return capacity;
848 }
849 
tcg_tb_phys_invalidate_count(void)850 size_t tcg_tb_phys_invalidate_count(void)
851 {
852     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
853     unsigned int i;
854     size_t total = 0;
855 
856     for (i = 0; i < n_ctxs; i++) {
857         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
858 
859         total += atomic_read(&s->tb_phys_invalidate_count);
860     }
861     return total;
862 }
863 
864 /* pool based memory allocation */
tcg_malloc_internal(TCGContext * s,int size)865 void *tcg_malloc_internal(TCGContext *s, int size)
866 {
867     TCGPool *p;
868     int pool_size;
869 
870     if (size > TCG_POOL_CHUNK_SIZE) {
871         /* big malloc: insert a new pool (XXX: could optimize) */
872         p = g_malloc(sizeof(TCGPool) + size);
873         p->size = size;
874         p->next = s->pool_first_large;
875         s->pool_first_large = p;
876         return p->data;
877     } else {
878         p = s->pool_current;
879         if (!p) {
880             p = s->pool_first;
881             if (!p)
882                 goto new_pool;
883         } else {
884             if (!p->next) {
885             new_pool:
886                 pool_size = TCG_POOL_CHUNK_SIZE;
887                 p = g_malloc(sizeof(TCGPool) + pool_size);
888                 p->size = pool_size;
889                 p->next = NULL;
890                 if (s->pool_current)
891                     s->pool_current->next = p;
892                 else
893                     s->pool_first = p;
894             } else {
895                 p = p->next;
896             }
897         }
898     }
899     s->pool_current = p;
900     s->pool_cur = p->data + size;
901     s->pool_end = p->data + p->size;
902     return p->data;
903 }
904 
tcg_pool_reset(TCGContext * s)905 void tcg_pool_reset(TCGContext *s)
906 {
907     TCGPool *p, *t;
908     for (p = s->pool_first_large; p; p = t) {
909         t = p->next;
910         g_free(p);
911     }
912     s->pool_first_large = NULL;
913     s->pool_cur = s->pool_end = NULL;
914     s->pool_current = NULL;
915 }
916 
917 typedef struct TCGHelperInfo {
918     void *func;
919     const char *name;
920     unsigned flags;
921     unsigned sizemask;
922 } TCGHelperInfo;
923 
924 #include "exec/helper-proto.h"
925 
926 static const TCGHelperInfo all_helpers[] = {
927 #include "exec/helper-tcg.h"
928 };
929 static GHashTable *helper_table;
930 
931 static int indirect_reg_alloc_order[ARRAY_SIZE(tcg_target_reg_alloc_order)];
932 static void process_op_defs(TCGContext *s);
933 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
934                                             TCGReg reg, const char *name);
935 
tcg_context_init(TCGContext * s)936 void tcg_context_init(TCGContext *s)
937 {
938     int op, total_args, n, i;
939     TCGOpDef *def;
940     TCGArgConstraint *args_ct;
941     int *sorted_args;
942     TCGTemp *ts;
943 
944     memset(s, 0, sizeof(*s));
945     s->nb_globals = 0;
946 
947     /* Count total number of arguments and allocate the corresponding
948        space */
949     total_args = 0;
950     for(op = 0; op < NB_OPS; op++) {
951         def = &tcg_op_defs[op];
952         n = def->nb_iargs + def->nb_oargs;
953         total_args += n;
954     }
955 
956     args_ct = g_malloc(sizeof(TCGArgConstraint) * total_args);
957     sorted_args = g_malloc(sizeof(int) * total_args);
958 
959     for(op = 0; op < NB_OPS; op++) {
960         def = &tcg_op_defs[op];
961         def->args_ct = args_ct;
962         def->sorted_args = sorted_args;
963         n = def->nb_iargs + def->nb_oargs;
964         sorted_args += n;
965         args_ct += n;
966     }
967 
968     /* Register helpers.  */
969     /* Use g_direct_hash/equal for direct pointer comparisons on func.  */
970     helper_table = g_hash_table_new(NULL, NULL);
971 
972     for (i = 0; i < ARRAY_SIZE(all_helpers); ++i) {
973         g_hash_table_insert(helper_table, (gpointer)all_helpers[i].func,
974                             (gpointer)&all_helpers[i]);
975     }
976 
977     tcg_target_init(s);
978     process_op_defs(s);
979 
980     /* Reverse the order of the saved registers, assuming they're all at
981        the start of tcg_target_reg_alloc_order.  */
982     for (n = 0; n < ARRAY_SIZE(tcg_target_reg_alloc_order); ++n) {
983         int r = tcg_target_reg_alloc_order[n];
984         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, r)) {
985             break;
986         }
987     }
988     for (i = 0; i < n; ++i) {
989         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[n - 1 - i];
990     }
991     for (; i < ARRAY_SIZE(tcg_target_reg_alloc_order); ++i) {
992         indirect_reg_alloc_order[i] = tcg_target_reg_alloc_order[i];
993     }
994 
995     alloc_tcg_plugin_context(s);
996 
997     tcg_ctx = s;
998     /*
999      * In user-mode we simply share the init context among threads, since we
1000      * use a single region. See the documentation tcg_region_init() for the
1001      * reasoning behind this.
1002      * In softmmu we will have at most max_cpus TCG threads.
1003      */
1004 #ifdef CONFIG_USER_ONLY
1005     tcg_ctxs = &tcg_ctx;
1006     n_tcg_ctxs = 1;
1007 #else
1008     MachineState *ms = MACHINE(qdev_get_machine());
1009     unsigned int max_cpus = ms->smp.max_cpus;
1010     tcg_ctxs = g_new(TCGContext *, max_cpus);
1011 #endif
1012 
1013     tcg_debug_assert(!tcg_regset_test_reg(s->reserved_regs, TCG_AREG0));
1014     ts = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, TCG_AREG0, "env");
1015     cpu_env = temp_tcgv_ptr(ts);
1016 }
1017 
1018 /*
1019  * Allocate TBs right before their corresponding translated code, making
1020  * sure that TBs and code are on different cache lines.
1021  */
tcg_tb_alloc(TCGContext * s)1022 TranslationBlock *tcg_tb_alloc(TCGContext *s)
1023 {
1024     uintptr_t align = qemu_icache_linesize;
1025     TranslationBlock *tb;
1026     void *next;
1027 
1028  retry:
1029     tb = (void *)ROUND_UP((uintptr_t)s->code_gen_ptr, align);
1030     next = (void *)ROUND_UP((uintptr_t)(tb + 1), align);
1031 
1032     if (unlikely(next > s->code_gen_highwater)) {
1033         if (tcg_region_alloc(s)) {
1034             return NULL;
1035         }
1036         goto retry;
1037     }
1038     atomic_set(&s->code_gen_ptr, next);
1039     s->data_gen_ptr = NULL;
1040     return tb;
1041 }
1042 
tcg_prologue_init(TCGContext * s)1043 void tcg_prologue_init(TCGContext *s)
1044 {
1045     size_t prologue_size, total_size;
1046     void *buf0, *buf1;
1047 
1048     /* Put the prologue at the beginning of code_gen_buffer.  */
1049     buf0 = s->code_gen_buffer;
1050     total_size = s->code_gen_buffer_size;
1051     s->code_ptr = buf0;
1052     s->code_buf = buf0;
1053     s->data_gen_ptr = NULL;
1054     s->code_gen_prologue = buf0;
1055 
1056     /* Compute a high-water mark, at which we voluntarily flush the buffer
1057        and start over.  The size here is arbitrary, significantly larger
1058        than we expect the code generation for any one opcode to require.  */
1059     s->code_gen_highwater = s->code_gen_buffer + (total_size - TCG_HIGHWATER);
1060 
1061 #ifdef TCG_TARGET_NEED_POOL_LABELS
1062     s->pool_labels = NULL;
1063 #endif
1064 
1065     /* Generate the prologue.  */
1066     tcg_target_qemu_prologue(s);
1067 
1068 #ifdef TCG_TARGET_NEED_POOL_LABELS
1069     /* Allow the prologue to put e.g. guest_base into a pool entry.  */
1070     {
1071         int result = tcg_out_pool_finalize(s);
1072         tcg_debug_assert(result == 0);
1073     }
1074 #endif
1075 
1076     buf1 = s->code_ptr;
1077     flush_icache_range((uintptr_t)buf0, (uintptr_t)buf1);
1078 
1079     /* Deduct the prologue from the buffer.  */
1080     prologue_size = tcg_current_code_size(s);
1081     s->code_gen_ptr = buf1;
1082     s->code_gen_buffer = buf1;
1083     s->code_buf = buf1;
1084     total_size -= prologue_size;
1085     s->code_gen_buffer_size = total_size;
1086 
1087     tcg_register_jit(s->code_gen_buffer, total_size);
1088 
1089 #ifdef DEBUG_DISAS
1090     if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM)) {
1091         FILE *logfile = qemu_log_lock();
1092         qemu_log("PROLOGUE: [size=%zu]\n", prologue_size);
1093         if (s->data_gen_ptr) {
1094             size_t code_size = s->data_gen_ptr - buf0;
1095             size_t data_size = prologue_size - code_size;
1096             size_t i;
1097 
1098             log_disas(buf0, code_size);
1099 
1100             for (i = 0; i < data_size; i += sizeof(tcg_target_ulong)) {
1101                 if (sizeof(tcg_target_ulong) == 8) {
1102                     qemu_log("0x%08" PRIxPTR ":  .quad  0x%016" PRIx64 "\n",
1103                              (uintptr_t)s->data_gen_ptr + i,
1104                              *(uint64_t *)(s->data_gen_ptr + i));
1105                 } else {
1106                     qemu_log("0x%08" PRIxPTR ":  .long  0x%08x\n",
1107                              (uintptr_t)s->data_gen_ptr + i,
1108                              *(uint32_t *)(s->data_gen_ptr + i));
1109                 }
1110             }
1111         } else {
1112             log_disas(buf0, prologue_size);
1113         }
1114         qemu_log("\n");
1115         qemu_log_flush();
1116         qemu_log_unlock(logfile);
1117     }
1118 #endif
1119 
1120     /* Assert that goto_ptr is implemented completely.  */
1121     if (TCG_TARGET_HAS_goto_ptr) {
1122         tcg_debug_assert(s->code_gen_epilogue != NULL);
1123     }
1124 }
1125 
tcg_func_start(TCGContext * s)1126 void tcg_func_start(TCGContext *s)
1127 {
1128     tcg_pool_reset(s);
1129     s->nb_temps = s->nb_globals;
1130 
1131     /* No temps have been previously allocated for size or locality.  */
1132     memset(s->free_temps, 0, sizeof(s->free_temps));
1133 
1134     s->nb_ops = 0;
1135     s->nb_labels = 0;
1136     s->current_frame_offset = s->frame_start;
1137 
1138 #ifdef CONFIG_DEBUG_TCG
1139     s->goto_tb_issue_mask = 0;
1140 #endif
1141 
1142     QTAILQ_INIT(&s->ops);
1143     QTAILQ_INIT(&s->free_ops);
1144     QSIMPLEQ_INIT(&s->labels);
1145 }
1146 
tcg_temp_alloc(TCGContext * s)1147 static inline TCGTemp *tcg_temp_alloc(TCGContext *s)
1148 {
1149     int n = s->nb_temps++;
1150     tcg_debug_assert(n < TCG_MAX_TEMPS);
1151     return memset(&s->temps[n], 0, sizeof(TCGTemp));
1152 }
1153 
tcg_global_alloc(TCGContext * s)1154 static inline TCGTemp *tcg_global_alloc(TCGContext *s)
1155 {
1156     TCGTemp *ts;
1157 
1158     tcg_debug_assert(s->nb_globals == s->nb_temps);
1159     s->nb_globals++;
1160     ts = tcg_temp_alloc(s);
1161     ts->temp_global = 1;
1162 
1163     return ts;
1164 }
1165 
tcg_global_reg_new_internal(TCGContext * s,TCGType type,TCGReg reg,const char * name)1166 static TCGTemp *tcg_global_reg_new_internal(TCGContext *s, TCGType type,
1167                                             TCGReg reg, const char *name)
1168 {
1169     TCGTemp *ts;
1170 
1171     if (TCG_TARGET_REG_BITS == 32 && type != TCG_TYPE_I32) {
1172         tcg_abort();
1173     }
1174 
1175     ts = tcg_global_alloc(s);
1176     ts->base_type = type;
1177     ts->type = type;
1178     ts->fixed_reg = 1;
1179     ts->reg = reg;
1180     ts->name = name;
1181     tcg_regset_set_reg(s->reserved_regs, reg);
1182 
1183     return ts;
1184 }
1185 
tcg_set_frame(TCGContext * s,TCGReg reg,intptr_t start,intptr_t size)1186 void tcg_set_frame(TCGContext *s, TCGReg reg, intptr_t start, intptr_t size)
1187 {
1188     s->frame_start = start;
1189     s->frame_end = start + size;
1190     s->frame_temp
1191         = tcg_global_reg_new_internal(s, TCG_TYPE_PTR, reg, "_frame");
1192 }
1193 
tcg_global_mem_new_internal(TCGType type,TCGv_ptr base,intptr_t offset,const char * name)1194 TCGTemp *tcg_global_mem_new_internal(TCGType type, TCGv_ptr base,
1195                                      intptr_t offset, const char *name)
1196 {
1197     TCGContext *s = tcg_ctx;
1198     TCGTemp *base_ts = tcgv_ptr_temp(base);
1199     TCGTemp *ts = tcg_global_alloc(s);
1200     int indirect_reg = 0, bigendian = 0;
1201 #ifdef HOST_WORDS_BIGENDIAN
1202     bigendian = 1;
1203 #endif
1204 
1205     if (!base_ts->fixed_reg) {
1206         /* We do not support double-indirect registers.  */
1207         tcg_debug_assert(!base_ts->indirect_reg);
1208         base_ts->indirect_base = 1;
1209         s->nb_indirects += (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64
1210                             ? 2 : 1);
1211         indirect_reg = 1;
1212     }
1213 
1214     if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1215         TCGTemp *ts2 = tcg_global_alloc(s);
1216         char buf[64];
1217 
1218         ts->base_type = TCG_TYPE_I64;
1219         ts->type = TCG_TYPE_I32;
1220         ts->indirect_reg = indirect_reg;
1221         ts->mem_allocated = 1;
1222         ts->mem_base = base_ts;
1223         ts->mem_offset = offset + bigendian * 4;
1224         pstrcpy(buf, sizeof(buf), name);
1225         pstrcat(buf, sizeof(buf), "_0");
1226         ts->name = strdup(buf);
1227 
1228         tcg_debug_assert(ts2 == ts + 1);
1229         ts2->base_type = TCG_TYPE_I64;
1230         ts2->type = TCG_TYPE_I32;
1231         ts2->indirect_reg = indirect_reg;
1232         ts2->mem_allocated = 1;
1233         ts2->mem_base = base_ts;
1234         ts2->mem_offset = offset + (1 - bigendian) * 4;
1235         pstrcpy(buf, sizeof(buf), name);
1236         pstrcat(buf, sizeof(buf), "_1");
1237         ts2->name = strdup(buf);
1238     } else {
1239         ts->base_type = type;
1240         ts->type = type;
1241         ts->indirect_reg = indirect_reg;
1242         ts->mem_allocated = 1;
1243         ts->mem_base = base_ts;
1244         ts->mem_offset = offset;
1245         ts->name = name;
1246     }
1247     return ts;
1248 }
1249 
tcg_temp_new_internal(TCGType type,bool temp_local)1250 TCGTemp *tcg_temp_new_internal(TCGType type, bool temp_local)
1251 {
1252     TCGContext *s = tcg_ctx;
1253     TCGTemp *ts;
1254     int idx, k;
1255 
1256     k = type + (temp_local ? TCG_TYPE_COUNT : 0);
1257     idx = find_first_bit(s->free_temps[k].l, TCG_MAX_TEMPS);
1258     if (idx < TCG_MAX_TEMPS) {
1259         /* There is already an available temp with the right type.  */
1260         clear_bit(idx, s->free_temps[k].l);
1261 
1262         ts = &s->temps[idx];
1263         ts->temp_allocated = 1;
1264         tcg_debug_assert(ts->base_type == type);
1265         tcg_debug_assert(ts->temp_local == temp_local);
1266     } else {
1267         ts = tcg_temp_alloc(s);
1268         if (TCG_TARGET_REG_BITS == 32 && type == TCG_TYPE_I64) {
1269             TCGTemp *ts2 = tcg_temp_alloc(s);
1270 
1271             ts->base_type = type;
1272             ts->type = TCG_TYPE_I32;
1273             ts->temp_allocated = 1;
1274             ts->temp_local = temp_local;
1275 
1276             tcg_debug_assert(ts2 == ts + 1);
1277             ts2->base_type = TCG_TYPE_I64;
1278             ts2->type = TCG_TYPE_I32;
1279             ts2->temp_allocated = 1;
1280             ts2->temp_local = temp_local;
1281         } else {
1282             ts->base_type = type;
1283             ts->type = type;
1284             ts->temp_allocated = 1;
1285             ts->temp_local = temp_local;
1286         }
1287     }
1288 
1289 #if defined(CONFIG_DEBUG_TCG)
1290     s->temps_in_use++;
1291 #endif
1292     return ts;
1293 }
1294 
tcg_temp_new_vec(TCGType type)1295 TCGv_vec tcg_temp_new_vec(TCGType type)
1296 {
1297     TCGTemp *t;
1298 
1299 #ifdef CONFIG_DEBUG_TCG
1300     switch (type) {
1301     case TCG_TYPE_V64:
1302         assert(TCG_TARGET_HAS_v64);
1303         break;
1304     case TCG_TYPE_V128:
1305         assert(TCG_TARGET_HAS_v128);
1306         break;
1307     case TCG_TYPE_V256:
1308         assert(TCG_TARGET_HAS_v256);
1309         break;
1310     default:
1311         g_assert_not_reached();
1312     }
1313 #endif
1314 
1315     t = tcg_temp_new_internal(type, 0);
1316     return temp_tcgv_vec(t);
1317 }
1318 
1319 /* Create a new temp of the same type as an existing temp.  */
tcg_temp_new_vec_matching(TCGv_vec match)1320 TCGv_vec tcg_temp_new_vec_matching(TCGv_vec match)
1321 {
1322     TCGTemp *t = tcgv_vec_temp(match);
1323 
1324     tcg_debug_assert(t->temp_allocated != 0);
1325 
1326     t = tcg_temp_new_internal(t->base_type, 0);
1327     return temp_tcgv_vec(t);
1328 }
1329 
tcg_temp_free_internal(TCGTemp * ts)1330 void tcg_temp_free_internal(TCGTemp *ts)
1331 {
1332     TCGContext *s = tcg_ctx;
1333     int k, idx;
1334 
1335 #if defined(CONFIG_DEBUG_TCG)
1336     s->temps_in_use--;
1337     if (s->temps_in_use < 0) {
1338         fprintf(stderr, "More temporaries freed than allocated!\n");
1339     }
1340 #endif
1341 
1342     tcg_debug_assert(ts->temp_global == 0);
1343     tcg_debug_assert(ts->temp_allocated != 0);
1344     ts->temp_allocated = 0;
1345 
1346     idx = temp_idx(ts);
1347     k = ts->base_type + (ts->temp_local ? TCG_TYPE_COUNT : 0);
1348     set_bit(idx, s->free_temps[k].l);
1349 }
1350 
tcg_const_i32(int32_t val)1351 TCGv_i32 tcg_const_i32(int32_t val)
1352 {
1353     TCGv_i32 t0;
1354     t0 = tcg_temp_new_i32();
1355     tcg_gen_movi_i32(t0, val);
1356     return t0;
1357 }
1358 
tcg_const_i64(int64_t val)1359 TCGv_i64 tcg_const_i64(int64_t val)
1360 {
1361     TCGv_i64 t0;
1362     t0 = tcg_temp_new_i64();
1363     tcg_gen_movi_i64(t0, val);
1364     return t0;
1365 }
1366 
tcg_const_local_i32(int32_t val)1367 TCGv_i32 tcg_const_local_i32(int32_t val)
1368 {
1369     TCGv_i32 t0;
1370     t0 = tcg_temp_local_new_i32();
1371     tcg_gen_movi_i32(t0, val);
1372     return t0;
1373 }
1374 
tcg_const_local_i64(int64_t val)1375 TCGv_i64 tcg_const_local_i64(int64_t val)
1376 {
1377     TCGv_i64 t0;
1378     t0 = tcg_temp_local_new_i64();
1379     tcg_gen_movi_i64(t0, val);
1380     return t0;
1381 }
1382 
1383 #if defined(CONFIG_DEBUG_TCG)
tcg_clear_temp_count(void)1384 void tcg_clear_temp_count(void)
1385 {
1386     TCGContext *s = tcg_ctx;
1387     s->temps_in_use = 0;
1388 }
1389 
tcg_check_temp_count(void)1390 int tcg_check_temp_count(void)
1391 {
1392     TCGContext *s = tcg_ctx;
1393     if (s->temps_in_use) {
1394         /* Clear the count so that we don't give another
1395          * warning immediately next time around.
1396          */
1397         s->temps_in_use = 0;
1398         return 1;
1399     }
1400     return 0;
1401 }
1402 #endif
1403 
1404 /* Return true if OP may appear in the opcode stream.
1405    Test the runtime variable that controls each opcode.  */
tcg_op_supported(TCGOpcode op)1406 bool tcg_op_supported(TCGOpcode op)
1407 {
1408     const bool have_vec
1409         = TCG_TARGET_HAS_v64 | TCG_TARGET_HAS_v128 | TCG_TARGET_HAS_v256;
1410 
1411     switch (op) {
1412     case INDEX_op_discard:
1413     case INDEX_op_set_label:
1414     case INDEX_op_call:
1415     case INDEX_op_br:
1416     case INDEX_op_mb:
1417     case INDEX_op_insn_start:
1418     case INDEX_op_exit_tb:
1419     case INDEX_op_goto_tb:
1420     case INDEX_op_qemu_ld_i32:
1421     case INDEX_op_qemu_st_i32:
1422     case INDEX_op_qemu_ld_i64:
1423     case INDEX_op_qemu_st_i64:
1424         return true;
1425 
1426     case INDEX_op_goto_ptr:
1427         return TCG_TARGET_HAS_goto_ptr;
1428 
1429     case INDEX_op_mov_i32:
1430     case INDEX_op_movi_i32:
1431     case INDEX_op_setcond_i32:
1432     case INDEX_op_brcond_i32:
1433     case INDEX_op_ld8u_i32:
1434     case INDEX_op_ld8s_i32:
1435     case INDEX_op_ld16u_i32:
1436     case INDEX_op_ld16s_i32:
1437     case INDEX_op_ld_i32:
1438     case INDEX_op_st8_i32:
1439     case INDEX_op_st16_i32:
1440     case INDEX_op_st_i32:
1441     case INDEX_op_add_i32:
1442     case INDEX_op_sub_i32:
1443     case INDEX_op_mul_i32:
1444     case INDEX_op_and_i32:
1445     case INDEX_op_or_i32:
1446     case INDEX_op_xor_i32:
1447     case INDEX_op_shl_i32:
1448     case INDEX_op_shr_i32:
1449     case INDEX_op_sar_i32:
1450         return true;
1451 
1452     case INDEX_op_movcond_i32:
1453         return TCG_TARGET_HAS_movcond_i32;
1454     case INDEX_op_div_i32:
1455     case INDEX_op_divu_i32:
1456         return TCG_TARGET_HAS_div_i32;
1457     case INDEX_op_rem_i32:
1458     case INDEX_op_remu_i32:
1459         return TCG_TARGET_HAS_rem_i32;
1460     case INDEX_op_div2_i32:
1461     case INDEX_op_divu2_i32:
1462         return TCG_TARGET_HAS_div2_i32;
1463     case INDEX_op_rotl_i32:
1464     case INDEX_op_rotr_i32:
1465         return TCG_TARGET_HAS_rot_i32;
1466     case INDEX_op_deposit_i32:
1467         return TCG_TARGET_HAS_deposit_i32;
1468     case INDEX_op_extract_i32:
1469         return TCG_TARGET_HAS_extract_i32;
1470     case INDEX_op_sextract_i32:
1471         return TCG_TARGET_HAS_sextract_i32;
1472     case INDEX_op_extract2_i32:
1473         return TCG_TARGET_HAS_extract2_i32;
1474     case INDEX_op_add2_i32:
1475         return TCG_TARGET_HAS_add2_i32;
1476     case INDEX_op_sub2_i32:
1477         return TCG_TARGET_HAS_sub2_i32;
1478     case INDEX_op_mulu2_i32:
1479         return TCG_TARGET_HAS_mulu2_i32;
1480     case INDEX_op_muls2_i32:
1481         return TCG_TARGET_HAS_muls2_i32;
1482     case INDEX_op_muluh_i32:
1483         return TCG_TARGET_HAS_muluh_i32;
1484     case INDEX_op_mulsh_i32:
1485         return TCG_TARGET_HAS_mulsh_i32;
1486     case INDEX_op_ext8s_i32:
1487         return TCG_TARGET_HAS_ext8s_i32;
1488     case INDEX_op_ext16s_i32:
1489         return TCG_TARGET_HAS_ext16s_i32;
1490     case INDEX_op_ext8u_i32:
1491         return TCG_TARGET_HAS_ext8u_i32;
1492     case INDEX_op_ext16u_i32:
1493         return TCG_TARGET_HAS_ext16u_i32;
1494     case INDEX_op_bswap16_i32:
1495         return TCG_TARGET_HAS_bswap16_i32;
1496     case INDEX_op_bswap32_i32:
1497         return TCG_TARGET_HAS_bswap32_i32;
1498     case INDEX_op_not_i32:
1499         return TCG_TARGET_HAS_not_i32;
1500     case INDEX_op_neg_i32:
1501         return TCG_TARGET_HAS_neg_i32;
1502     case INDEX_op_andc_i32:
1503         return TCG_TARGET_HAS_andc_i32;
1504     case INDEX_op_orc_i32:
1505         return TCG_TARGET_HAS_orc_i32;
1506     case INDEX_op_eqv_i32:
1507         return TCG_TARGET_HAS_eqv_i32;
1508     case INDEX_op_nand_i32:
1509         return TCG_TARGET_HAS_nand_i32;
1510     case INDEX_op_nor_i32:
1511         return TCG_TARGET_HAS_nor_i32;
1512     case INDEX_op_clz_i32:
1513         return TCG_TARGET_HAS_clz_i32;
1514     case INDEX_op_ctz_i32:
1515         return TCG_TARGET_HAS_ctz_i32;
1516     case INDEX_op_ctpop_i32:
1517         return TCG_TARGET_HAS_ctpop_i32;
1518 
1519     case INDEX_op_brcond2_i32:
1520     case INDEX_op_setcond2_i32:
1521         return TCG_TARGET_REG_BITS == 32;
1522 
1523     case INDEX_op_mov_i64:
1524     case INDEX_op_movi_i64:
1525     case INDEX_op_setcond_i64:
1526     case INDEX_op_brcond_i64:
1527     case INDEX_op_ld8u_i64:
1528     case INDEX_op_ld8s_i64:
1529     case INDEX_op_ld16u_i64:
1530     case INDEX_op_ld16s_i64:
1531     case INDEX_op_ld32u_i64:
1532     case INDEX_op_ld32s_i64:
1533     case INDEX_op_ld_i64:
1534     case INDEX_op_st8_i64:
1535     case INDEX_op_st16_i64:
1536     case INDEX_op_st32_i64:
1537     case INDEX_op_st_i64:
1538     case INDEX_op_add_i64:
1539     case INDEX_op_sub_i64:
1540     case INDEX_op_mul_i64:
1541     case INDEX_op_and_i64:
1542     case INDEX_op_or_i64:
1543     case INDEX_op_xor_i64:
1544     case INDEX_op_shl_i64:
1545     case INDEX_op_shr_i64:
1546     case INDEX_op_sar_i64:
1547     case INDEX_op_ext_i32_i64:
1548     case INDEX_op_extu_i32_i64:
1549         return TCG_TARGET_REG_BITS == 64;
1550 
1551     case INDEX_op_movcond_i64:
1552         return TCG_TARGET_HAS_movcond_i64;
1553     case INDEX_op_div_i64:
1554     case INDEX_op_divu_i64:
1555         return TCG_TARGET_HAS_div_i64;
1556     case INDEX_op_rem_i64:
1557     case INDEX_op_remu_i64:
1558         return TCG_TARGET_HAS_rem_i64;
1559     case INDEX_op_div2_i64:
1560     case INDEX_op_divu2_i64:
1561         return TCG_TARGET_HAS_div2_i64;
1562     case INDEX_op_rotl_i64:
1563     case INDEX_op_rotr_i64:
1564         return TCG_TARGET_HAS_rot_i64;
1565     case INDEX_op_deposit_i64:
1566         return TCG_TARGET_HAS_deposit_i64;
1567     case INDEX_op_extract_i64:
1568         return TCG_TARGET_HAS_extract_i64;
1569     case INDEX_op_sextract_i64:
1570         return TCG_TARGET_HAS_sextract_i64;
1571     case INDEX_op_extract2_i64:
1572         return TCG_TARGET_HAS_extract2_i64;
1573     case INDEX_op_extrl_i64_i32:
1574         return TCG_TARGET_HAS_extrl_i64_i32;
1575     case INDEX_op_extrh_i64_i32:
1576         return TCG_TARGET_HAS_extrh_i64_i32;
1577     case INDEX_op_ext8s_i64:
1578         return TCG_TARGET_HAS_ext8s_i64;
1579     case INDEX_op_ext16s_i64:
1580         return TCG_TARGET_HAS_ext16s_i64;
1581     case INDEX_op_ext32s_i64:
1582         return TCG_TARGET_HAS_ext32s_i64;
1583     case INDEX_op_ext8u_i64:
1584         return TCG_TARGET_HAS_ext8u_i64;
1585     case INDEX_op_ext16u_i64:
1586         return TCG_TARGET_HAS_ext16u_i64;
1587     case INDEX_op_ext32u_i64:
1588         return TCG_TARGET_HAS_ext32u_i64;
1589     case INDEX_op_bswap16_i64:
1590         return TCG_TARGET_HAS_bswap16_i64;
1591     case INDEX_op_bswap32_i64:
1592         return TCG_TARGET_HAS_bswap32_i64;
1593     case INDEX_op_bswap64_i64:
1594         return TCG_TARGET_HAS_bswap64_i64;
1595     case INDEX_op_not_i64:
1596         return TCG_TARGET_HAS_not_i64;
1597     case INDEX_op_neg_i64:
1598         return TCG_TARGET_HAS_neg_i64;
1599     case INDEX_op_andc_i64:
1600         return TCG_TARGET_HAS_andc_i64;
1601     case INDEX_op_orc_i64:
1602         return TCG_TARGET_HAS_orc_i64;
1603     case INDEX_op_eqv_i64:
1604         return TCG_TARGET_HAS_eqv_i64;
1605     case INDEX_op_nand_i64:
1606         return TCG_TARGET_HAS_nand_i64;
1607     case INDEX_op_nor_i64:
1608         return TCG_TARGET_HAS_nor_i64;
1609     case INDEX_op_clz_i64:
1610         return TCG_TARGET_HAS_clz_i64;
1611     case INDEX_op_ctz_i64:
1612         return TCG_TARGET_HAS_ctz_i64;
1613     case INDEX_op_ctpop_i64:
1614         return TCG_TARGET_HAS_ctpop_i64;
1615     case INDEX_op_add2_i64:
1616         return TCG_TARGET_HAS_add2_i64;
1617     case INDEX_op_sub2_i64:
1618         return TCG_TARGET_HAS_sub2_i64;
1619     case INDEX_op_mulu2_i64:
1620         return TCG_TARGET_HAS_mulu2_i64;
1621     case INDEX_op_muls2_i64:
1622         return TCG_TARGET_HAS_muls2_i64;
1623     case INDEX_op_muluh_i64:
1624         return TCG_TARGET_HAS_muluh_i64;
1625     case INDEX_op_mulsh_i64:
1626         return TCG_TARGET_HAS_mulsh_i64;
1627 
1628     case INDEX_op_mov_vec:
1629     case INDEX_op_dup_vec:
1630     case INDEX_op_dupi_vec:
1631     case INDEX_op_dupm_vec:
1632     case INDEX_op_ld_vec:
1633     case INDEX_op_st_vec:
1634     case INDEX_op_add_vec:
1635     case INDEX_op_sub_vec:
1636     case INDEX_op_and_vec:
1637     case INDEX_op_or_vec:
1638     case INDEX_op_xor_vec:
1639     case INDEX_op_cmp_vec:
1640         return have_vec;
1641     case INDEX_op_dup2_vec:
1642         return have_vec && TCG_TARGET_REG_BITS == 32;
1643     case INDEX_op_not_vec:
1644         return have_vec && TCG_TARGET_HAS_not_vec;
1645     case INDEX_op_neg_vec:
1646         return have_vec && TCG_TARGET_HAS_neg_vec;
1647     case INDEX_op_abs_vec:
1648         return have_vec && TCG_TARGET_HAS_abs_vec;
1649     case INDEX_op_andc_vec:
1650         return have_vec && TCG_TARGET_HAS_andc_vec;
1651     case INDEX_op_orc_vec:
1652         return have_vec && TCG_TARGET_HAS_orc_vec;
1653     case INDEX_op_mul_vec:
1654         return have_vec && TCG_TARGET_HAS_mul_vec;
1655     case INDEX_op_shli_vec:
1656     case INDEX_op_shri_vec:
1657     case INDEX_op_sari_vec:
1658         return have_vec && TCG_TARGET_HAS_shi_vec;
1659     case INDEX_op_shls_vec:
1660     case INDEX_op_shrs_vec:
1661     case INDEX_op_sars_vec:
1662         return have_vec && TCG_TARGET_HAS_shs_vec;
1663     case INDEX_op_shlv_vec:
1664     case INDEX_op_shrv_vec:
1665     case INDEX_op_sarv_vec:
1666         return have_vec && TCG_TARGET_HAS_shv_vec;
1667     case INDEX_op_ssadd_vec:
1668     case INDEX_op_usadd_vec:
1669     case INDEX_op_sssub_vec:
1670     case INDEX_op_ussub_vec:
1671         return have_vec && TCG_TARGET_HAS_sat_vec;
1672     case INDEX_op_smin_vec:
1673     case INDEX_op_umin_vec:
1674     case INDEX_op_smax_vec:
1675     case INDEX_op_umax_vec:
1676         return have_vec && TCG_TARGET_HAS_minmax_vec;
1677     case INDEX_op_bitsel_vec:
1678         return have_vec && TCG_TARGET_HAS_bitsel_vec;
1679     case INDEX_op_cmpsel_vec:
1680         return have_vec && TCG_TARGET_HAS_cmpsel_vec;
1681 
1682     default:
1683         tcg_debug_assert(op > INDEX_op_last_generic && op < NB_OPS);
1684         return true;
1685     }
1686 }
1687 
1688 /* Note: we convert the 64 bit args to 32 bit and do some alignment
1689    and endian swap. Maybe it would be better to do the alignment
1690    and endian swap in tcg_reg_alloc_call(). */
tcg_gen_callN(void * func,TCGTemp * ret,int nargs,TCGTemp ** args)1691 void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args)
1692 {
1693     int i, real_args, nb_rets, pi;
1694     unsigned sizemask, flags;
1695     TCGHelperInfo *info;
1696     TCGOp *op;
1697 
1698     info = g_hash_table_lookup(helper_table, (gpointer)func);
1699     flags = info->flags;
1700     sizemask = info->sizemask;
1701 
1702 #ifdef CONFIG_PLUGIN
1703     /* detect non-plugin helpers */
1704     if (tcg_ctx->plugin_insn && unlikely(strncmp(info->name, "plugin_", 7))) {
1705         tcg_ctx->plugin_insn->calls_helpers = true;
1706     }
1707 #endif
1708 
1709 #if defined(__sparc__) && !defined(__arch64__) \
1710     && !defined(CONFIG_TCG_INTERPRETER)
1711     /* We have 64-bit values in one register, but need to pass as two
1712        separate parameters.  Split them.  */
1713     int orig_sizemask = sizemask;
1714     int orig_nargs = nargs;
1715     TCGv_i64 retl, reth;
1716     TCGTemp *split_args[MAX_OPC_PARAM];
1717 
1718     retl = NULL;
1719     reth = NULL;
1720     if (sizemask != 0) {
1721         for (i = real_args = 0; i < nargs; ++i) {
1722             int is_64bit = sizemask & (1 << (i+1)*2);
1723             if (is_64bit) {
1724                 TCGv_i64 orig = temp_tcgv_i64(args[i]);
1725                 TCGv_i32 h = tcg_temp_new_i32();
1726                 TCGv_i32 l = tcg_temp_new_i32();
1727                 tcg_gen_extr_i64_i32(l, h, orig);
1728                 split_args[real_args++] = tcgv_i32_temp(h);
1729                 split_args[real_args++] = tcgv_i32_temp(l);
1730             } else {
1731                 split_args[real_args++] = args[i];
1732             }
1733         }
1734         nargs = real_args;
1735         args = split_args;
1736         sizemask = 0;
1737     }
1738 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1739     for (i = 0; i < nargs; ++i) {
1740         int is_64bit = sizemask & (1 << (i+1)*2);
1741         int is_signed = sizemask & (2 << (i+1)*2);
1742         if (!is_64bit) {
1743             TCGv_i64 temp = tcg_temp_new_i64();
1744             TCGv_i64 orig = temp_tcgv_i64(args[i]);
1745             if (is_signed) {
1746                 tcg_gen_ext32s_i64(temp, orig);
1747             } else {
1748                 tcg_gen_ext32u_i64(temp, orig);
1749             }
1750             args[i] = tcgv_i64_temp(temp);
1751         }
1752     }
1753 #endif /* TCG_TARGET_EXTEND_ARGS */
1754 
1755     op = tcg_emit_op(INDEX_op_call);
1756 
1757     pi = 0;
1758     if (ret != NULL) {
1759 #if defined(__sparc__) && !defined(__arch64__) \
1760     && !defined(CONFIG_TCG_INTERPRETER)
1761         if (orig_sizemask & 1) {
1762             /* The 32-bit ABI is going to return the 64-bit value in
1763                the %o0/%o1 register pair.  Prepare for this by using
1764                two return temporaries, and reassemble below.  */
1765             retl = tcg_temp_new_i64();
1766             reth = tcg_temp_new_i64();
1767             op->args[pi++] = tcgv_i64_arg(reth);
1768             op->args[pi++] = tcgv_i64_arg(retl);
1769             nb_rets = 2;
1770         } else {
1771             op->args[pi++] = temp_arg(ret);
1772             nb_rets = 1;
1773         }
1774 #else
1775         if (TCG_TARGET_REG_BITS < 64 && (sizemask & 1)) {
1776 #ifdef HOST_WORDS_BIGENDIAN
1777             op->args[pi++] = temp_arg(ret + 1);
1778             op->args[pi++] = temp_arg(ret);
1779 #else
1780             op->args[pi++] = temp_arg(ret);
1781             op->args[pi++] = temp_arg(ret + 1);
1782 #endif
1783             nb_rets = 2;
1784         } else {
1785             op->args[pi++] = temp_arg(ret);
1786             nb_rets = 1;
1787         }
1788 #endif
1789     } else {
1790         nb_rets = 0;
1791     }
1792     TCGOP_CALLO(op) = nb_rets;
1793 
1794     real_args = 0;
1795     for (i = 0; i < nargs; i++) {
1796         int is_64bit = sizemask & (1 << (i+1)*2);
1797         if (TCG_TARGET_REG_BITS < 64 && is_64bit) {
1798 #ifdef TCG_TARGET_CALL_ALIGN_ARGS
1799             /* some targets want aligned 64 bit args */
1800             if (real_args & 1) {
1801                 op->args[pi++] = TCG_CALL_DUMMY_ARG;
1802                 real_args++;
1803             }
1804 #endif
1805            /* If stack grows up, then we will be placing successive
1806               arguments at lower addresses, which means we need to
1807               reverse the order compared to how we would normally
1808               treat either big or little-endian.  For those arguments
1809               that will wind up in registers, this still works for
1810               HPPA (the only current STACK_GROWSUP target) since the
1811               argument registers are *also* allocated in decreasing
1812               order.  If another such target is added, this logic may
1813               have to get more complicated to differentiate between
1814               stack arguments and register arguments.  */
1815 #if defined(HOST_WORDS_BIGENDIAN) != defined(TCG_TARGET_STACK_GROWSUP)
1816             op->args[pi++] = temp_arg(args[i] + 1);
1817             op->args[pi++] = temp_arg(args[i]);
1818 #else
1819             op->args[pi++] = temp_arg(args[i]);
1820             op->args[pi++] = temp_arg(args[i] + 1);
1821 #endif
1822             real_args += 2;
1823             continue;
1824         }
1825 
1826         op->args[pi++] = temp_arg(args[i]);
1827         real_args++;
1828     }
1829     op->args[pi++] = (uintptr_t)func;
1830     op->args[pi++] = flags;
1831     TCGOP_CALLI(op) = real_args;
1832 
1833     /* Make sure the fields didn't overflow.  */
1834     tcg_debug_assert(TCGOP_CALLI(op) == real_args);
1835     tcg_debug_assert(pi <= ARRAY_SIZE(op->args));
1836 
1837 #if defined(__sparc__) && !defined(__arch64__) \
1838     && !defined(CONFIG_TCG_INTERPRETER)
1839     /* Free all of the parts we allocated above.  */
1840     for (i = real_args = 0; i < orig_nargs; ++i) {
1841         int is_64bit = orig_sizemask & (1 << (i+1)*2);
1842         if (is_64bit) {
1843             tcg_temp_free_internal(args[real_args++]);
1844             tcg_temp_free_internal(args[real_args++]);
1845         } else {
1846             real_args++;
1847         }
1848     }
1849     if (orig_sizemask & 1) {
1850         /* The 32-bit ABI returned two 32-bit pieces.  Re-assemble them.
1851            Note that describing these as TCGv_i64 eliminates an unnecessary
1852            zero-extension that tcg_gen_concat_i32_i64 would create.  */
1853         tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth);
1854         tcg_temp_free_i64(retl);
1855         tcg_temp_free_i64(reth);
1856     }
1857 #elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64
1858     for (i = 0; i < nargs; ++i) {
1859         int is_64bit = sizemask & (1 << (i+1)*2);
1860         if (!is_64bit) {
1861             tcg_temp_free_internal(args[i]);
1862         }
1863     }
1864 #endif /* TCG_TARGET_EXTEND_ARGS */
1865 }
1866 
tcg_reg_alloc_start(TCGContext * s)1867 static void tcg_reg_alloc_start(TCGContext *s)
1868 {
1869     int i, n;
1870     TCGTemp *ts;
1871 
1872     for (i = 0, n = s->nb_globals; i < n; i++) {
1873         ts = &s->temps[i];
1874         ts->val_type = (ts->fixed_reg ? TEMP_VAL_REG : TEMP_VAL_MEM);
1875     }
1876     for (n = s->nb_temps; i < n; i++) {
1877         ts = &s->temps[i];
1878         ts->val_type = (ts->temp_local ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
1879         ts->mem_allocated = 0;
1880         ts->fixed_reg = 0;
1881     }
1882 
1883     memset(s->reg_to_temp, 0, sizeof(s->reg_to_temp));
1884 }
1885 
tcg_get_arg_str_ptr(TCGContext * s,char * buf,int buf_size,TCGTemp * ts)1886 static char *tcg_get_arg_str_ptr(TCGContext *s, char *buf, int buf_size,
1887                                  TCGTemp *ts)
1888 {
1889     int idx = temp_idx(ts);
1890 
1891     if (ts->temp_global) {
1892         pstrcpy(buf, buf_size, ts->name);
1893     } else if (ts->temp_local) {
1894         snprintf(buf, buf_size, "loc%d", idx - s->nb_globals);
1895     } else {
1896         snprintf(buf, buf_size, "tmp%d", idx - s->nb_globals);
1897     }
1898     return buf;
1899 }
1900 
tcg_get_arg_str(TCGContext * s,char * buf,int buf_size,TCGArg arg)1901 static char *tcg_get_arg_str(TCGContext *s, char *buf,
1902                              int buf_size, TCGArg arg)
1903 {
1904     return tcg_get_arg_str_ptr(s, buf, buf_size, arg_temp(arg));
1905 }
1906 
1907 /* Find helper name.  */
tcg_find_helper(TCGContext * s,uintptr_t val)1908 static inline const char *tcg_find_helper(TCGContext *s, uintptr_t val)
1909 {
1910     const char *ret = NULL;
1911     if (helper_table) {
1912         TCGHelperInfo *info = g_hash_table_lookup(helper_table, (gpointer)val);
1913         if (info) {
1914             ret = info->name;
1915         }
1916     }
1917     return ret;
1918 }
1919 
1920 static const char * const cond_name[] =
1921 {
1922     [TCG_COND_NEVER] = "never",
1923     [TCG_COND_ALWAYS] = "always",
1924     [TCG_COND_EQ] = "eq",
1925     [TCG_COND_NE] = "ne",
1926     [TCG_COND_LT] = "lt",
1927     [TCG_COND_GE] = "ge",
1928     [TCG_COND_LE] = "le",
1929     [TCG_COND_GT] = "gt",
1930     [TCG_COND_LTU] = "ltu",
1931     [TCG_COND_GEU] = "geu",
1932     [TCG_COND_LEU] = "leu",
1933     [TCG_COND_GTU] = "gtu"
1934 };
1935 
1936 static const char * const ldst_name[] =
1937 {
1938     [MO_UB]   = "ub",
1939     [MO_SB]   = "sb",
1940     [MO_LEUW] = "leuw",
1941     [MO_LESW] = "lesw",
1942     [MO_LEUL] = "leul",
1943     [MO_LESL] = "lesl",
1944     [MO_LEQ]  = "leq",
1945     [MO_BEUW] = "beuw",
1946     [MO_BESW] = "besw",
1947     [MO_BEUL] = "beul",
1948     [MO_BESL] = "besl",
1949     [MO_BEQ]  = "beq",
1950 };
1951 
1952 static const char * const alignment_name[(MO_AMASK >> MO_ASHIFT) + 1] = {
1953 #ifdef TARGET_ALIGNED_ONLY
1954     [MO_UNALN >> MO_ASHIFT]    = "un+",
1955     [MO_ALIGN >> MO_ASHIFT]    = "",
1956 #else
1957     [MO_UNALN >> MO_ASHIFT]    = "",
1958     [MO_ALIGN >> MO_ASHIFT]    = "al+",
1959 #endif
1960     [MO_ALIGN_2 >> MO_ASHIFT]  = "al2+",
1961     [MO_ALIGN_4 >> MO_ASHIFT]  = "al4+",
1962     [MO_ALIGN_8 >> MO_ASHIFT]  = "al8+",
1963     [MO_ALIGN_16 >> MO_ASHIFT] = "al16+",
1964     [MO_ALIGN_32 >> MO_ASHIFT] = "al32+",
1965     [MO_ALIGN_64 >> MO_ASHIFT] = "al64+",
1966 };
1967 
tcg_regset_single(TCGRegSet d)1968 static inline bool tcg_regset_single(TCGRegSet d)
1969 {
1970     return (d & (d - 1)) == 0;
1971 }
1972 
tcg_regset_first(TCGRegSet d)1973 static inline TCGReg tcg_regset_first(TCGRegSet d)
1974 {
1975     if (TCG_TARGET_NB_REGS <= 32) {
1976         return ctz32(d);
1977     } else {
1978         return ctz64(d);
1979     }
1980 }
1981 
tcg_dump_ops(TCGContext * s,bool have_prefs)1982 static void tcg_dump_ops(TCGContext *s, bool have_prefs)
1983 {
1984     char buf[128];
1985     TCGOp *op;
1986 
1987     QTAILQ_FOREACH(op, &s->ops, link) {
1988         int i, k, nb_oargs, nb_iargs, nb_cargs;
1989         const TCGOpDef *def;
1990         TCGOpcode c;
1991         int col = 0;
1992 
1993         c = op->opc;
1994         def = &tcg_op_defs[c];
1995 
1996         if (c == INDEX_op_insn_start) {
1997             nb_oargs = 0;
1998             col += qemu_log("\n ----");
1999 
2000             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
2001                 target_ulong a;
2002 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
2003                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
2004 #else
2005                 a = op->args[i];
2006 #endif
2007                 col += qemu_log(" " TARGET_FMT_lx, a);
2008             }
2009         } else if (c == INDEX_op_call) {
2010             /* variable number of arguments */
2011             nb_oargs = TCGOP_CALLO(op);
2012             nb_iargs = TCGOP_CALLI(op);
2013             nb_cargs = def->nb_cargs;
2014 
2015             /* function name, flags, out args */
2016             col += qemu_log(" %s %s,$0x%" TCG_PRIlx ",$%d", def->name,
2017                             tcg_find_helper(s, op->args[nb_oargs + nb_iargs]),
2018                             op->args[nb_oargs + nb_iargs + 1], nb_oargs);
2019             for (i = 0; i < nb_oargs; i++) {
2020                 col += qemu_log(",%s", tcg_get_arg_str(s, buf, sizeof(buf),
2021                                                        op->args[i]));
2022             }
2023             for (i = 0; i < nb_iargs; i++) {
2024                 TCGArg arg = op->args[nb_oargs + i];
2025                 const char *t = "<dummy>";
2026                 if (arg != TCG_CALL_DUMMY_ARG) {
2027                     t = tcg_get_arg_str(s, buf, sizeof(buf), arg);
2028                 }
2029                 col += qemu_log(",%s", t);
2030             }
2031         } else {
2032             col += qemu_log(" %s ", def->name);
2033 
2034             nb_oargs = def->nb_oargs;
2035             nb_iargs = def->nb_iargs;
2036             nb_cargs = def->nb_cargs;
2037 
2038             if (def->flags & TCG_OPF_VECTOR) {
2039                 col += qemu_log("v%d,e%d,", 64 << TCGOP_VECL(op),
2040                                 8 << TCGOP_VECE(op));
2041             }
2042 
2043             k = 0;
2044             for (i = 0; i < nb_oargs; i++) {
2045                 if (k != 0) {
2046                     col += qemu_log(",");
2047                 }
2048                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2049                                                       op->args[k++]));
2050             }
2051             for (i = 0; i < nb_iargs; i++) {
2052                 if (k != 0) {
2053                     col += qemu_log(",");
2054                 }
2055                 col += qemu_log("%s", tcg_get_arg_str(s, buf, sizeof(buf),
2056                                                       op->args[k++]));
2057             }
2058             switch (c) {
2059             case INDEX_op_brcond_i32:
2060             case INDEX_op_setcond_i32:
2061             case INDEX_op_movcond_i32:
2062             case INDEX_op_brcond2_i32:
2063             case INDEX_op_setcond2_i32:
2064             case INDEX_op_brcond_i64:
2065             case INDEX_op_setcond_i64:
2066             case INDEX_op_movcond_i64:
2067             case INDEX_op_cmp_vec:
2068             case INDEX_op_cmpsel_vec:
2069                 if (op->args[k] < ARRAY_SIZE(cond_name)
2070                     && cond_name[op->args[k]]) {
2071                     col += qemu_log(",%s", cond_name[op->args[k++]]);
2072                 } else {
2073                     col += qemu_log(",$0x%" TCG_PRIlx, op->args[k++]);
2074                 }
2075                 i = 1;
2076                 break;
2077             case INDEX_op_qemu_ld_i32:
2078             case INDEX_op_qemu_st_i32:
2079             case INDEX_op_qemu_ld_i64:
2080             case INDEX_op_qemu_st_i64:
2081                 {
2082                     TCGMemOpIdx oi = op->args[k++];
2083                     MemOp op = get_memop(oi);
2084                     unsigned ix = get_mmuidx(oi);
2085 
2086                     if (op & ~(MO_AMASK | MO_BSWAP | MO_SSIZE)) {
2087                         col += qemu_log(",$0x%x,%u", op, ix);
2088                     } else {
2089                         const char *s_al, *s_op;
2090                         s_al = alignment_name[(op & MO_AMASK) >> MO_ASHIFT];
2091                         s_op = ldst_name[op & (MO_BSWAP | MO_SSIZE)];
2092                         col += qemu_log(",%s%s,%u", s_al, s_op, ix);
2093                     }
2094                     i = 1;
2095                 }
2096                 break;
2097             default:
2098                 i = 0;
2099                 break;
2100             }
2101             switch (c) {
2102             case INDEX_op_set_label:
2103             case INDEX_op_br:
2104             case INDEX_op_brcond_i32:
2105             case INDEX_op_brcond_i64:
2106             case INDEX_op_brcond2_i32:
2107                 col += qemu_log("%s$L%d", k ? "," : "",
2108                                 arg_label(op->args[k])->id);
2109                 i++, k++;
2110                 break;
2111             default:
2112                 break;
2113             }
2114             for (; i < nb_cargs; i++, k++) {
2115                 col += qemu_log("%s$0x%" TCG_PRIlx, k ? "," : "", op->args[k]);
2116             }
2117         }
2118 
2119         if (have_prefs || op->life) {
2120 
2121             QemuLogFile *logfile;
2122 
2123             rcu_read_lock();
2124             logfile = atomic_rcu_read(&qemu_logfile);
2125             if (logfile) {
2126                 for (; col < 40; ++col) {
2127                     putc(' ', logfile->fd);
2128                 }
2129             }
2130             rcu_read_unlock();
2131         }
2132 
2133         if (op->life) {
2134             unsigned life = op->life;
2135 
2136             if (life & (SYNC_ARG * 3)) {
2137                 qemu_log("  sync:");
2138                 for (i = 0; i < 2; ++i) {
2139                     if (life & (SYNC_ARG << i)) {
2140                         qemu_log(" %d", i);
2141                     }
2142                 }
2143             }
2144             life /= DEAD_ARG;
2145             if (life) {
2146                 qemu_log("  dead:");
2147                 for (i = 0; life; ++i, life >>= 1) {
2148                     if (life & 1) {
2149                         qemu_log(" %d", i);
2150                     }
2151                 }
2152             }
2153         }
2154 
2155         if (have_prefs) {
2156             for (i = 0; i < nb_oargs; ++i) {
2157                 TCGRegSet set = op->output_pref[i];
2158 
2159                 if (i == 0) {
2160                     qemu_log("  pref=");
2161                 } else {
2162                     qemu_log(",");
2163                 }
2164                 if (set == 0) {
2165                     qemu_log("none");
2166                 } else if (set == MAKE_64BIT_MASK(0, TCG_TARGET_NB_REGS)) {
2167                     qemu_log("all");
2168 #ifdef CONFIG_DEBUG_TCG
2169                 } else if (tcg_regset_single(set)) {
2170                     TCGReg reg = tcg_regset_first(set);
2171                     qemu_log("%s", tcg_target_reg_names[reg]);
2172 #endif
2173                 } else if (TCG_TARGET_NB_REGS <= 32) {
2174                     qemu_log("%#x", (uint32_t)set);
2175                 } else {
2176                     qemu_log("%#" PRIx64, (uint64_t)set);
2177                 }
2178             }
2179         }
2180 
2181         qemu_log("\n");
2182     }
2183 }
2184 
2185 /* we give more priority to constraints with less registers */
get_constraint_priority(const TCGOpDef * def,int k)2186 static int get_constraint_priority(const TCGOpDef *def, int k)
2187 {
2188     const TCGArgConstraint *arg_ct;
2189 
2190     int i, n;
2191     arg_ct = &def->args_ct[k];
2192     if (arg_ct->ct & TCG_CT_ALIAS) {
2193         /* an alias is equivalent to a single register */
2194         n = 1;
2195     } else {
2196         if (!(arg_ct->ct & TCG_CT_REG))
2197             return 0;
2198         n = 0;
2199         for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
2200             if (tcg_regset_test_reg(arg_ct->u.regs, i))
2201                 n++;
2202         }
2203     }
2204     return TCG_TARGET_NB_REGS - n + 1;
2205 }
2206 
2207 /* sort from highest priority to lowest */
sort_constraints(TCGOpDef * def,int start,int n)2208 static void sort_constraints(TCGOpDef *def, int start, int n)
2209 {
2210     int i, j, p1, p2, tmp;
2211 
2212     for(i = 0; i < n; i++)
2213         def->sorted_args[start + i] = start + i;
2214     if (n <= 1)
2215         return;
2216     for(i = 0; i < n - 1; i++) {
2217         for(j = i + 1; j < n; j++) {
2218             p1 = get_constraint_priority(def, def->sorted_args[start + i]);
2219             p2 = get_constraint_priority(def, def->sorted_args[start + j]);
2220             if (p1 < p2) {
2221                 tmp = def->sorted_args[start + i];
2222                 def->sorted_args[start + i] = def->sorted_args[start + j];
2223                 def->sorted_args[start + j] = tmp;
2224             }
2225         }
2226     }
2227 }
2228 
process_op_defs(TCGContext * s)2229 static void process_op_defs(TCGContext *s)
2230 {
2231     TCGOpcode op;
2232 
2233     for (op = 0; op < NB_OPS; op++) {
2234         TCGOpDef *def = &tcg_op_defs[op];
2235         const TCGTargetOpDef *tdefs;
2236         TCGType type;
2237         int i, nb_args;
2238 
2239         if (def->flags & TCG_OPF_NOT_PRESENT) {
2240             continue;
2241         }
2242 
2243         nb_args = def->nb_iargs + def->nb_oargs;
2244         if (nb_args == 0) {
2245             continue;
2246         }
2247 
2248         tdefs = tcg_target_op_def(op);
2249         /* Missing TCGTargetOpDef entry. */
2250         tcg_debug_assert(tdefs != NULL);
2251 
2252         type = (def->flags & TCG_OPF_64BIT ? TCG_TYPE_I64 : TCG_TYPE_I32);
2253         for (i = 0; i < nb_args; i++) {
2254             const char *ct_str = tdefs->args_ct_str[i];
2255             /* Incomplete TCGTargetOpDef entry. */
2256             tcg_debug_assert(ct_str != NULL);
2257 
2258             def->args_ct[i].u.regs = 0;
2259             def->args_ct[i].ct = 0;
2260             while (*ct_str != '\0') {
2261                 switch(*ct_str) {
2262                 case '0' ... '9':
2263                     {
2264                         int oarg = *ct_str - '0';
2265                         tcg_debug_assert(ct_str == tdefs->args_ct_str[i]);
2266                         tcg_debug_assert(oarg < def->nb_oargs);
2267                         tcg_debug_assert(def->args_ct[oarg].ct & TCG_CT_REG);
2268                         /* TCG_CT_ALIAS is for the output arguments.
2269                            The input is tagged with TCG_CT_IALIAS. */
2270                         def->args_ct[i] = def->args_ct[oarg];
2271                         def->args_ct[oarg].ct |= TCG_CT_ALIAS;
2272                         def->args_ct[oarg].alias_index = i;
2273                         def->args_ct[i].ct |= TCG_CT_IALIAS;
2274                         def->args_ct[i].alias_index = oarg;
2275                     }
2276                     ct_str++;
2277                     break;
2278                 case '&':
2279                     def->args_ct[i].ct |= TCG_CT_NEWREG;
2280                     ct_str++;
2281                     break;
2282                 case 'i':
2283                     def->args_ct[i].ct |= TCG_CT_CONST;
2284                     ct_str++;
2285                     break;
2286                 default:
2287                     ct_str = target_parse_constraint(&def->args_ct[i],
2288                                                      ct_str, type);
2289                     /* Typo in TCGTargetOpDef constraint. */
2290                     tcg_debug_assert(ct_str != NULL);
2291                 }
2292             }
2293         }
2294 
2295         /* TCGTargetOpDef entry with too much information? */
2296         tcg_debug_assert(i == TCG_MAX_OP_ARGS || tdefs->args_ct_str[i] == NULL);
2297 
2298         /* sort the constraints (XXX: this is just an heuristic) */
2299         sort_constraints(def, 0, def->nb_oargs);
2300         sort_constraints(def, def->nb_oargs, def->nb_iargs);
2301     }
2302 }
2303 
tcg_op_remove(TCGContext * s,TCGOp * op)2304 void tcg_op_remove(TCGContext *s, TCGOp *op)
2305 {
2306     TCGLabel *label;
2307 
2308     switch (op->opc) {
2309     case INDEX_op_br:
2310         label = arg_label(op->args[0]);
2311         label->refs--;
2312         break;
2313     case INDEX_op_brcond_i32:
2314     case INDEX_op_brcond_i64:
2315         label = arg_label(op->args[3]);
2316         label->refs--;
2317         break;
2318     case INDEX_op_brcond2_i32:
2319         label = arg_label(op->args[5]);
2320         label->refs--;
2321         break;
2322     default:
2323         break;
2324     }
2325 
2326     QTAILQ_REMOVE(&s->ops, op, link);
2327     QTAILQ_INSERT_TAIL(&s->free_ops, op, link);
2328     s->nb_ops--;
2329 
2330 #ifdef CONFIG_PROFILER
2331     atomic_set(&s->prof.del_op_count, s->prof.del_op_count + 1);
2332 #endif
2333 }
2334 
tcg_op_alloc(TCGOpcode opc)2335 static TCGOp *tcg_op_alloc(TCGOpcode opc)
2336 {
2337     TCGContext *s = tcg_ctx;
2338     TCGOp *op;
2339 
2340     if (likely(QTAILQ_EMPTY(&s->free_ops))) {
2341         op = tcg_malloc(sizeof(TCGOp));
2342     } else {
2343         op = QTAILQ_FIRST(&s->free_ops);
2344         QTAILQ_REMOVE(&s->free_ops, op, link);
2345     }
2346     memset(op, 0, offsetof(TCGOp, link));
2347     op->opc = opc;
2348     s->nb_ops++;
2349 
2350     return op;
2351 }
2352 
tcg_emit_op(TCGOpcode opc)2353 TCGOp *tcg_emit_op(TCGOpcode opc)
2354 {
2355     TCGOp *op = tcg_op_alloc(opc);
2356     QTAILQ_INSERT_TAIL(&tcg_ctx->ops, op, link);
2357     return op;
2358 }
2359 
tcg_op_insert_before(TCGContext * s,TCGOp * old_op,TCGOpcode opc)2360 TCGOp *tcg_op_insert_before(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2361 {
2362     TCGOp *new_op = tcg_op_alloc(opc);
2363     QTAILQ_INSERT_BEFORE(old_op, new_op, link);
2364     return new_op;
2365 }
2366 
tcg_op_insert_after(TCGContext * s,TCGOp * old_op,TCGOpcode opc)2367 TCGOp *tcg_op_insert_after(TCGContext *s, TCGOp *old_op, TCGOpcode opc)
2368 {
2369     TCGOp *new_op = tcg_op_alloc(opc);
2370     QTAILQ_INSERT_AFTER(&s->ops, old_op, new_op, link);
2371     return new_op;
2372 }
2373 
2374 /* Reachable analysis : remove unreachable code.  */
reachable_code_pass(TCGContext * s)2375 static void reachable_code_pass(TCGContext *s)
2376 {
2377     TCGOp *op, *op_next;
2378     bool dead = false;
2379 
2380     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2381         bool remove = dead;
2382         TCGLabel *label;
2383         int call_flags;
2384 
2385         switch (op->opc) {
2386         case INDEX_op_set_label:
2387             label = arg_label(op->args[0]);
2388             if (label->refs == 0) {
2389                 /*
2390                  * While there is an occasional backward branch, virtually
2391                  * all branches generated by the translators are forward.
2392                  * Which means that generally we will have already removed
2393                  * all references to the label that will be, and there is
2394                  * little to be gained by iterating.
2395                  */
2396                 remove = true;
2397             } else {
2398                 /* Once we see a label, insns become live again.  */
2399                 dead = false;
2400                 remove = false;
2401 
2402                 /*
2403                  * Optimization can fold conditional branches to unconditional.
2404                  * If we find a label with one reference which is preceded by
2405                  * an unconditional branch to it, remove both.  This needed to
2406                  * wait until the dead code in between them was removed.
2407                  */
2408                 if (label->refs == 1) {
2409                     TCGOp *op_prev = QTAILQ_PREV(op, link);
2410                     if (op_prev->opc == INDEX_op_br &&
2411                         label == arg_label(op_prev->args[0])) {
2412                         tcg_op_remove(s, op_prev);
2413                         remove = true;
2414                     }
2415                 }
2416             }
2417             break;
2418 
2419         case INDEX_op_br:
2420         case INDEX_op_exit_tb:
2421         case INDEX_op_goto_ptr:
2422             /* Unconditional branches; everything following is dead.  */
2423             dead = true;
2424             break;
2425 
2426         case INDEX_op_call:
2427             /* Notice noreturn helper calls, raising exceptions.  */
2428             call_flags = op->args[TCGOP_CALLO(op) + TCGOP_CALLI(op) + 1];
2429             if (call_flags & TCG_CALL_NO_RETURN) {
2430                 dead = true;
2431             }
2432             break;
2433 
2434         case INDEX_op_insn_start:
2435             /* Never remove -- we need to keep these for unwind.  */
2436             remove = false;
2437             break;
2438 
2439         default:
2440             break;
2441         }
2442 
2443         if (remove) {
2444             tcg_op_remove(s, op);
2445         }
2446     }
2447 }
2448 
2449 #define TS_DEAD  1
2450 #define TS_MEM   2
2451 
2452 #define IS_DEAD_ARG(n)   (arg_life & (DEAD_ARG << (n)))
2453 #define NEED_SYNC_ARG(n) (arg_life & (SYNC_ARG << (n)))
2454 
2455 /* For liveness_pass_1, the register preferences for a given temp.  */
la_temp_pref(TCGTemp * ts)2456 static inline TCGRegSet *la_temp_pref(TCGTemp *ts)
2457 {
2458     return ts->state_ptr;
2459 }
2460 
2461 /* For liveness_pass_1, reset the preferences for a given temp to the
2462  * maximal regset for its type.
2463  */
la_reset_pref(TCGTemp * ts)2464 static inline void la_reset_pref(TCGTemp *ts)
2465 {
2466     *la_temp_pref(ts)
2467         = (ts->state == TS_DEAD ? 0 : tcg_target_available_regs[ts->type]);
2468 }
2469 
2470 /* liveness analysis: end of function: all temps are dead, and globals
2471    should be in memory. */
la_func_end(TCGContext * s,int ng,int nt)2472 static void la_func_end(TCGContext *s, int ng, int nt)
2473 {
2474     int i;
2475 
2476     for (i = 0; i < ng; ++i) {
2477         s->temps[i].state = TS_DEAD | TS_MEM;
2478         la_reset_pref(&s->temps[i]);
2479     }
2480     for (i = ng; i < nt; ++i) {
2481         s->temps[i].state = TS_DEAD;
2482         la_reset_pref(&s->temps[i]);
2483     }
2484 }
2485 
2486 /* liveness analysis: end of basic block: all temps are dead, globals
2487    and local temps should be in memory. */
la_bb_end(TCGContext * s,int ng,int nt)2488 static void la_bb_end(TCGContext *s, int ng, int nt)
2489 {
2490     int i;
2491 
2492     for (i = 0; i < ng; ++i) {
2493         s->temps[i].state = TS_DEAD | TS_MEM;
2494         la_reset_pref(&s->temps[i]);
2495     }
2496     for (i = ng; i < nt; ++i) {
2497         s->temps[i].state = (s->temps[i].temp_local
2498                              ? TS_DEAD | TS_MEM
2499                              : TS_DEAD);
2500         la_reset_pref(&s->temps[i]);
2501     }
2502 }
2503 
2504 /* liveness analysis: sync globals back to memory.  */
la_global_sync(TCGContext * s,int ng)2505 static void la_global_sync(TCGContext *s, int ng)
2506 {
2507     int i;
2508 
2509     for (i = 0; i < ng; ++i) {
2510         int state = s->temps[i].state;
2511         s->temps[i].state = state | TS_MEM;
2512         if (state == TS_DEAD) {
2513             /* If the global was previously dead, reset prefs.  */
2514             la_reset_pref(&s->temps[i]);
2515         }
2516     }
2517 }
2518 
2519 /* liveness analysis: sync globals back to memory and kill.  */
la_global_kill(TCGContext * s,int ng)2520 static void la_global_kill(TCGContext *s, int ng)
2521 {
2522     int i;
2523 
2524     for (i = 0; i < ng; i++) {
2525         s->temps[i].state = TS_DEAD | TS_MEM;
2526         la_reset_pref(&s->temps[i]);
2527     }
2528 }
2529 
2530 /* liveness analysis: note live globals crossing calls.  */
la_cross_call(TCGContext * s,int nt)2531 static void la_cross_call(TCGContext *s, int nt)
2532 {
2533     TCGRegSet mask = ~tcg_target_call_clobber_regs;
2534     int i;
2535 
2536     for (i = 0; i < nt; i++) {
2537         TCGTemp *ts = &s->temps[i];
2538         if (!(ts->state & TS_DEAD)) {
2539             TCGRegSet *pset = la_temp_pref(ts);
2540             TCGRegSet set = *pset;
2541 
2542             set &= mask;
2543             /* If the combination is not possible, restart.  */
2544             if (set == 0) {
2545                 set = tcg_target_available_regs[ts->type] & mask;
2546             }
2547             *pset = set;
2548         }
2549     }
2550 }
2551 
2552 /* Liveness analysis : update the opc_arg_life array to tell if a
2553    given input arguments is dead. Instructions updating dead
2554    temporaries are removed. */
liveness_pass_1(TCGContext * s)2555 static void liveness_pass_1(TCGContext *s)
2556 {
2557     int nb_globals = s->nb_globals;
2558     int nb_temps = s->nb_temps;
2559     TCGOp *op, *op_prev;
2560     TCGRegSet *prefs;
2561     int i;
2562 
2563     prefs = tcg_malloc(sizeof(TCGRegSet) * nb_temps);
2564     for (i = 0; i < nb_temps; ++i) {
2565         s->temps[i].state_ptr = prefs + i;
2566     }
2567 
2568     /* ??? Should be redundant with the exit_tb that ends the TB.  */
2569     la_func_end(s, nb_globals, nb_temps);
2570 
2571     QTAILQ_FOREACH_REVERSE_SAFE(op, &s->ops, link, op_prev) {
2572         int nb_iargs, nb_oargs;
2573         TCGOpcode opc_new, opc_new2;
2574         bool have_opc_new2;
2575         TCGLifeData arg_life = 0;
2576         TCGTemp *ts;
2577         TCGOpcode opc = op->opc;
2578         const TCGOpDef *def = &tcg_op_defs[opc];
2579 
2580         switch (opc) {
2581         case INDEX_op_call:
2582             {
2583                 int call_flags;
2584                 int nb_call_regs;
2585 
2586                 nb_oargs = TCGOP_CALLO(op);
2587                 nb_iargs = TCGOP_CALLI(op);
2588                 call_flags = op->args[nb_oargs + nb_iargs + 1];
2589 
2590                 /* pure functions can be removed if their result is unused */
2591                 if (call_flags & TCG_CALL_NO_SIDE_EFFECTS) {
2592                     for (i = 0; i < nb_oargs; i++) {
2593                         ts = arg_temp(op->args[i]);
2594                         if (ts->state != TS_DEAD) {
2595                             goto do_not_remove_call;
2596                         }
2597                     }
2598                     goto do_remove;
2599                 }
2600             do_not_remove_call:
2601 
2602                 /* Output args are dead.  */
2603                 for (i = 0; i < nb_oargs; i++) {
2604                     ts = arg_temp(op->args[i]);
2605                     if (ts->state & TS_DEAD) {
2606                         arg_life |= DEAD_ARG << i;
2607                     }
2608                     if (ts->state & TS_MEM) {
2609                         arg_life |= SYNC_ARG << i;
2610                     }
2611                     ts->state = TS_DEAD;
2612                     la_reset_pref(ts);
2613 
2614                     /* Not used -- it will be tcg_target_call_oarg_regs[i].  */
2615                     op->output_pref[i] = 0;
2616                 }
2617 
2618                 if (!(call_flags & (TCG_CALL_NO_WRITE_GLOBALS |
2619                                     TCG_CALL_NO_READ_GLOBALS))) {
2620                     la_global_kill(s, nb_globals);
2621                 } else if (!(call_flags & TCG_CALL_NO_READ_GLOBALS)) {
2622                     la_global_sync(s, nb_globals);
2623                 }
2624 
2625                 /* Record arguments that die in this helper.  */
2626                 for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2627                     ts = arg_temp(op->args[i]);
2628                     if (ts && ts->state & TS_DEAD) {
2629                         arg_life |= DEAD_ARG << i;
2630                     }
2631                 }
2632 
2633                 /* For all live registers, remove call-clobbered prefs.  */
2634                 la_cross_call(s, nb_temps);
2635 
2636                 nb_call_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
2637 
2638                 /* Input arguments are live for preceding opcodes.  */
2639                 for (i = 0; i < nb_iargs; i++) {
2640                     ts = arg_temp(op->args[i + nb_oargs]);
2641                     if (ts && ts->state & TS_DEAD) {
2642                         /* For those arguments that die, and will be allocated
2643                          * in registers, clear the register set for that arg,
2644                          * to be filled in below.  For args that will be on
2645                          * the stack, reset to any available reg.
2646                          */
2647                         *la_temp_pref(ts)
2648                             = (i < nb_call_regs ? 0 :
2649                                tcg_target_available_regs[ts->type]);
2650                         ts->state &= ~TS_DEAD;
2651                     }
2652                 }
2653 
2654                 /* For each input argument, add its input register to prefs.
2655                    If a temp is used once, this produces a single set bit.  */
2656                 for (i = 0; i < MIN(nb_call_regs, nb_iargs); i++) {
2657                     ts = arg_temp(op->args[i + nb_oargs]);
2658                     if (ts) {
2659                         tcg_regset_set_reg(*la_temp_pref(ts),
2660                                            tcg_target_call_iarg_regs[i]);
2661                     }
2662                 }
2663             }
2664             break;
2665         case INDEX_op_insn_start:
2666             break;
2667         case INDEX_op_discard:
2668             /* mark the temporary as dead */
2669             ts = arg_temp(op->args[0]);
2670             ts->state = TS_DEAD;
2671             la_reset_pref(ts);
2672             break;
2673 
2674         case INDEX_op_add2_i32:
2675             opc_new = INDEX_op_add_i32;
2676             goto do_addsub2;
2677         case INDEX_op_sub2_i32:
2678             opc_new = INDEX_op_sub_i32;
2679             goto do_addsub2;
2680         case INDEX_op_add2_i64:
2681             opc_new = INDEX_op_add_i64;
2682             goto do_addsub2;
2683         case INDEX_op_sub2_i64:
2684             opc_new = INDEX_op_sub_i64;
2685         do_addsub2:
2686             nb_iargs = 4;
2687             nb_oargs = 2;
2688             /* Test if the high part of the operation is dead, but not
2689                the low part.  The result can be optimized to a simple
2690                add or sub.  This happens often for x86_64 guest when the
2691                cpu mode is set to 32 bit.  */
2692             if (arg_temp(op->args[1])->state == TS_DEAD) {
2693                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2694                     goto do_remove;
2695                 }
2696                 /* Replace the opcode and adjust the args in place,
2697                    leaving 3 unused args at the end.  */
2698                 op->opc = opc = opc_new;
2699                 op->args[1] = op->args[2];
2700                 op->args[2] = op->args[4];
2701                 /* Fall through and mark the single-word operation live.  */
2702                 nb_iargs = 2;
2703                 nb_oargs = 1;
2704             }
2705             goto do_not_remove;
2706 
2707         case INDEX_op_mulu2_i32:
2708             opc_new = INDEX_op_mul_i32;
2709             opc_new2 = INDEX_op_muluh_i32;
2710             have_opc_new2 = TCG_TARGET_HAS_muluh_i32;
2711             goto do_mul2;
2712         case INDEX_op_muls2_i32:
2713             opc_new = INDEX_op_mul_i32;
2714             opc_new2 = INDEX_op_mulsh_i32;
2715             have_opc_new2 = TCG_TARGET_HAS_mulsh_i32;
2716             goto do_mul2;
2717         case INDEX_op_mulu2_i64:
2718             opc_new = INDEX_op_mul_i64;
2719             opc_new2 = INDEX_op_muluh_i64;
2720             have_opc_new2 = TCG_TARGET_HAS_muluh_i64;
2721             goto do_mul2;
2722         case INDEX_op_muls2_i64:
2723             opc_new = INDEX_op_mul_i64;
2724             opc_new2 = INDEX_op_mulsh_i64;
2725             have_opc_new2 = TCG_TARGET_HAS_mulsh_i64;
2726             goto do_mul2;
2727         do_mul2:
2728             nb_iargs = 2;
2729             nb_oargs = 2;
2730             if (arg_temp(op->args[1])->state == TS_DEAD) {
2731                 if (arg_temp(op->args[0])->state == TS_DEAD) {
2732                     /* Both parts of the operation are dead.  */
2733                     goto do_remove;
2734                 }
2735                 /* The high part of the operation is dead; generate the low. */
2736                 op->opc = opc = opc_new;
2737                 op->args[1] = op->args[2];
2738                 op->args[2] = op->args[3];
2739             } else if (arg_temp(op->args[0])->state == TS_DEAD && have_opc_new2) {
2740                 /* The low part of the operation is dead; generate the high. */
2741                 op->opc = opc = opc_new2;
2742                 op->args[0] = op->args[1];
2743                 op->args[1] = op->args[2];
2744                 op->args[2] = op->args[3];
2745             } else {
2746                 goto do_not_remove;
2747             }
2748             /* Mark the single-word operation live.  */
2749             nb_oargs = 1;
2750             goto do_not_remove;
2751 
2752         default:
2753             /* XXX: optimize by hardcoding common cases (e.g. triadic ops) */
2754             nb_iargs = def->nb_iargs;
2755             nb_oargs = def->nb_oargs;
2756 
2757             /* Test if the operation can be removed because all
2758                its outputs are dead. We assume that nb_oargs == 0
2759                implies side effects */
2760             if (!(def->flags & TCG_OPF_SIDE_EFFECTS) && nb_oargs != 0) {
2761                 for (i = 0; i < nb_oargs; i++) {
2762                     if (arg_temp(op->args[i])->state != TS_DEAD) {
2763                         goto do_not_remove;
2764                     }
2765                 }
2766                 goto do_remove;
2767             }
2768             goto do_not_remove;
2769 
2770         do_remove:
2771             tcg_op_remove(s, op);
2772             break;
2773 
2774         do_not_remove:
2775             for (i = 0; i < nb_oargs; i++) {
2776                 ts = arg_temp(op->args[i]);
2777 
2778                 /* Remember the preference of the uses that followed.  */
2779                 op->output_pref[i] = *la_temp_pref(ts);
2780 
2781                 /* Output args are dead.  */
2782                 if (ts->state & TS_DEAD) {
2783                     arg_life |= DEAD_ARG << i;
2784                 }
2785                 if (ts->state & TS_MEM) {
2786                     arg_life |= SYNC_ARG << i;
2787                 }
2788                 ts->state = TS_DEAD;
2789                 la_reset_pref(ts);
2790             }
2791 
2792             /* If end of basic block, update.  */
2793             if (def->flags & TCG_OPF_BB_EXIT) {
2794                 la_func_end(s, nb_globals, nb_temps);
2795             } else if (def->flags & TCG_OPF_BB_END) {
2796                 la_bb_end(s, nb_globals, nb_temps);
2797             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2798                 la_global_sync(s, nb_globals);
2799                 if (def->flags & TCG_OPF_CALL_CLOBBER) {
2800                     la_cross_call(s, nb_temps);
2801                 }
2802             }
2803 
2804             /* Record arguments that die in this opcode.  */
2805             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2806                 ts = arg_temp(op->args[i]);
2807                 if (ts->state & TS_DEAD) {
2808                     arg_life |= DEAD_ARG << i;
2809                 }
2810             }
2811 
2812             /* Input arguments are live for preceding opcodes.  */
2813             for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2814                 ts = arg_temp(op->args[i]);
2815                 if (ts->state & TS_DEAD) {
2816                     /* For operands that were dead, initially allow
2817                        all regs for the type.  */
2818                     *la_temp_pref(ts) = tcg_target_available_regs[ts->type];
2819                     ts->state &= ~TS_DEAD;
2820                 }
2821             }
2822 
2823             /* Incorporate constraints for this operand.  */
2824             switch (opc) {
2825             case INDEX_op_mov_i32:
2826             case INDEX_op_mov_i64:
2827                 /* Note that these are TCG_OPF_NOT_PRESENT and do not
2828                    have proper constraints.  That said, special case
2829                    moves to propagate preferences backward.  */
2830                 if (IS_DEAD_ARG(1)) {
2831                     *la_temp_pref(arg_temp(op->args[0]))
2832                         = *la_temp_pref(arg_temp(op->args[1]));
2833                 }
2834                 break;
2835 
2836             default:
2837                 for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
2838                     const TCGArgConstraint *ct = &def->args_ct[i];
2839                     TCGRegSet set, *pset;
2840 
2841                     ts = arg_temp(op->args[i]);
2842                     pset = la_temp_pref(ts);
2843                     set = *pset;
2844 
2845                     set &= ct->u.regs;
2846                     if (ct->ct & TCG_CT_IALIAS) {
2847                         set &= op->output_pref[ct->alias_index];
2848                     }
2849                     /* If the combination is not possible, restart.  */
2850                     if (set == 0) {
2851                         set = ct->u.regs;
2852                     }
2853                     *pset = set;
2854                 }
2855                 break;
2856             }
2857             break;
2858         }
2859         op->life = arg_life;
2860     }
2861 }
2862 
2863 /* Liveness analysis: Convert indirect regs to direct temporaries.  */
liveness_pass_2(TCGContext * s)2864 static bool liveness_pass_2(TCGContext *s)
2865 {
2866     int nb_globals = s->nb_globals;
2867     int nb_temps, i;
2868     bool changes = false;
2869     TCGOp *op, *op_next;
2870 
2871     /* Create a temporary for each indirect global.  */
2872     for (i = 0; i < nb_globals; ++i) {
2873         TCGTemp *its = &s->temps[i];
2874         if (its->indirect_reg) {
2875             TCGTemp *dts = tcg_temp_alloc(s);
2876             dts->type = its->type;
2877             dts->base_type = its->base_type;
2878             its->state_ptr = dts;
2879         } else {
2880             its->state_ptr = NULL;
2881         }
2882         /* All globals begin dead.  */
2883         its->state = TS_DEAD;
2884     }
2885     for (nb_temps = s->nb_temps; i < nb_temps; ++i) {
2886         TCGTemp *its = &s->temps[i];
2887         its->state_ptr = NULL;
2888         its->state = TS_DEAD;
2889     }
2890 
2891     QTAILQ_FOREACH_SAFE(op, &s->ops, link, op_next) {
2892         TCGOpcode opc = op->opc;
2893         const TCGOpDef *def = &tcg_op_defs[opc];
2894         TCGLifeData arg_life = op->life;
2895         int nb_iargs, nb_oargs, call_flags;
2896         TCGTemp *arg_ts, *dir_ts;
2897 
2898         if (opc == INDEX_op_call) {
2899             nb_oargs = TCGOP_CALLO(op);
2900             nb_iargs = TCGOP_CALLI(op);
2901             call_flags = op->args[nb_oargs + nb_iargs + 1];
2902         } else {
2903             nb_iargs = def->nb_iargs;
2904             nb_oargs = def->nb_oargs;
2905 
2906             /* Set flags similar to how calls require.  */
2907             if (def->flags & TCG_OPF_BB_END) {
2908                 /* Like writing globals: save_globals */
2909                 call_flags = 0;
2910             } else if (def->flags & TCG_OPF_SIDE_EFFECTS) {
2911                 /* Like reading globals: sync_globals */
2912                 call_flags = TCG_CALL_NO_WRITE_GLOBALS;
2913             } else {
2914                 /* No effect on globals.  */
2915                 call_flags = (TCG_CALL_NO_READ_GLOBALS |
2916                               TCG_CALL_NO_WRITE_GLOBALS);
2917             }
2918         }
2919 
2920         /* Make sure that input arguments are available.  */
2921         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2922             arg_ts = arg_temp(op->args[i]);
2923             if (arg_ts) {
2924                 dir_ts = arg_ts->state_ptr;
2925                 if (dir_ts && arg_ts->state == TS_DEAD) {
2926                     TCGOpcode lopc = (arg_ts->type == TCG_TYPE_I32
2927                                       ? INDEX_op_ld_i32
2928                                       : INDEX_op_ld_i64);
2929                     TCGOp *lop = tcg_op_insert_before(s, op, lopc);
2930 
2931                     lop->args[0] = temp_arg(dir_ts);
2932                     lop->args[1] = temp_arg(arg_ts->mem_base);
2933                     lop->args[2] = arg_ts->mem_offset;
2934 
2935                     /* Loaded, but synced with memory.  */
2936                     arg_ts->state = TS_MEM;
2937                 }
2938             }
2939         }
2940 
2941         /* Perform input replacement, and mark inputs that became dead.
2942            No action is required except keeping temp_state up to date
2943            so that we reload when needed.  */
2944         for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
2945             arg_ts = arg_temp(op->args[i]);
2946             if (arg_ts) {
2947                 dir_ts = arg_ts->state_ptr;
2948                 if (dir_ts) {
2949                     op->args[i] = temp_arg(dir_ts);
2950                     changes = true;
2951                     if (IS_DEAD_ARG(i)) {
2952                         arg_ts->state = TS_DEAD;
2953                     }
2954                 }
2955             }
2956         }
2957 
2958         /* Liveness analysis should ensure that the following are
2959            all correct, for call sites and basic block end points.  */
2960         if (call_flags & TCG_CALL_NO_READ_GLOBALS) {
2961             /* Nothing to do */
2962         } else if (call_flags & TCG_CALL_NO_WRITE_GLOBALS) {
2963             for (i = 0; i < nb_globals; ++i) {
2964                 /* Liveness should see that globals are synced back,
2965                    that is, either TS_DEAD or TS_MEM.  */
2966                 arg_ts = &s->temps[i];
2967                 tcg_debug_assert(arg_ts->state_ptr == 0
2968                                  || arg_ts->state != 0);
2969             }
2970         } else {
2971             for (i = 0; i < nb_globals; ++i) {
2972                 /* Liveness should see that globals are saved back,
2973                    that is, TS_DEAD, waiting to be reloaded.  */
2974                 arg_ts = &s->temps[i];
2975                 tcg_debug_assert(arg_ts->state_ptr == 0
2976                                  || arg_ts->state == TS_DEAD);
2977             }
2978         }
2979 
2980         /* Outputs become available.  */
2981         for (i = 0; i < nb_oargs; i++) {
2982             arg_ts = arg_temp(op->args[i]);
2983             dir_ts = arg_ts->state_ptr;
2984             if (!dir_ts) {
2985                 continue;
2986             }
2987             op->args[i] = temp_arg(dir_ts);
2988             changes = true;
2989 
2990             /* The output is now live and modified.  */
2991             arg_ts->state = 0;
2992 
2993             /* Sync outputs upon their last write.  */
2994             if (NEED_SYNC_ARG(i)) {
2995                 TCGOpcode sopc = (arg_ts->type == TCG_TYPE_I32
2996                                   ? INDEX_op_st_i32
2997                                   : INDEX_op_st_i64);
2998                 TCGOp *sop = tcg_op_insert_after(s, op, sopc);
2999 
3000                 sop->args[0] = temp_arg(dir_ts);
3001                 sop->args[1] = temp_arg(arg_ts->mem_base);
3002                 sop->args[2] = arg_ts->mem_offset;
3003 
3004                 arg_ts->state = TS_MEM;
3005             }
3006             /* Drop outputs that are dead.  */
3007             if (IS_DEAD_ARG(i)) {
3008                 arg_ts->state = TS_DEAD;
3009             }
3010         }
3011     }
3012 
3013     return changes;
3014 }
3015 
3016 #ifdef CONFIG_DEBUG_TCG
dump_regs(TCGContext * s)3017 static void dump_regs(TCGContext *s)
3018 {
3019     TCGTemp *ts;
3020     int i;
3021     char buf[64];
3022 
3023     for(i = 0; i < s->nb_temps; i++) {
3024         ts = &s->temps[i];
3025         printf("  %10s: ", tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3026         switch(ts->val_type) {
3027         case TEMP_VAL_REG:
3028             printf("%s", tcg_target_reg_names[ts->reg]);
3029             break;
3030         case TEMP_VAL_MEM:
3031             printf("%d(%s)", (int)ts->mem_offset,
3032                    tcg_target_reg_names[ts->mem_base->reg]);
3033             break;
3034         case TEMP_VAL_CONST:
3035             printf("$0x%" TCG_PRIlx, ts->val);
3036             break;
3037         case TEMP_VAL_DEAD:
3038             printf("D");
3039             break;
3040         default:
3041             printf("???");
3042             break;
3043         }
3044         printf("\n");
3045     }
3046 
3047     for(i = 0; i < TCG_TARGET_NB_REGS; i++) {
3048         if (s->reg_to_temp[i] != NULL) {
3049             printf("%s: %s\n",
3050                    tcg_target_reg_names[i],
3051                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), s->reg_to_temp[i]));
3052         }
3053     }
3054 }
3055 
check_regs(TCGContext * s)3056 static void check_regs(TCGContext *s)
3057 {
3058     int reg;
3059     int k;
3060     TCGTemp *ts;
3061     char buf[64];
3062 
3063     for (reg = 0; reg < TCG_TARGET_NB_REGS; reg++) {
3064         ts = s->reg_to_temp[reg];
3065         if (ts != NULL) {
3066             if (ts->val_type != TEMP_VAL_REG || ts->reg != reg) {
3067                 printf("Inconsistency for register %s:\n",
3068                        tcg_target_reg_names[reg]);
3069                 goto fail;
3070             }
3071         }
3072     }
3073     for (k = 0; k < s->nb_temps; k++) {
3074         ts = &s->temps[k];
3075         if (ts->val_type == TEMP_VAL_REG && !ts->fixed_reg
3076             && s->reg_to_temp[ts->reg] != ts) {
3077             printf("Inconsistency for temp %s:\n",
3078                    tcg_get_arg_str_ptr(s, buf, sizeof(buf), ts));
3079         fail:
3080             printf("reg state:\n");
3081             dump_regs(s);
3082             tcg_abort();
3083         }
3084     }
3085 }
3086 #endif
3087 
temp_allocate_frame(TCGContext * s,TCGTemp * ts)3088 static void temp_allocate_frame(TCGContext *s, TCGTemp *ts)
3089 {
3090 #if !(defined(__sparc__) && TCG_TARGET_REG_BITS == 64)
3091     /* Sparc64 stack is accessed with offset of 2047 */
3092     s->current_frame_offset = (s->current_frame_offset +
3093                                (tcg_target_long)sizeof(tcg_target_long) - 1) &
3094         ~(sizeof(tcg_target_long) - 1);
3095 #endif
3096     if (s->current_frame_offset + (tcg_target_long)sizeof(tcg_target_long) >
3097         s->frame_end) {
3098         tcg_abort();
3099     }
3100     ts->mem_offset = s->current_frame_offset;
3101     ts->mem_base = s->frame_temp;
3102     ts->mem_allocated = 1;
3103     s->current_frame_offset += sizeof(tcg_target_long);
3104 }
3105 
3106 static void temp_load(TCGContext *, TCGTemp *, TCGRegSet, TCGRegSet, TCGRegSet);
3107 
3108 /* Mark a temporary as free or dead.  If 'free_or_dead' is negative,
3109    mark it free; otherwise mark it dead.  */
temp_free_or_dead(TCGContext * s,TCGTemp * ts,int free_or_dead)3110 static void temp_free_or_dead(TCGContext *s, TCGTemp *ts, int free_or_dead)
3111 {
3112     if (ts->fixed_reg) {
3113         return;
3114     }
3115     if (ts->val_type == TEMP_VAL_REG) {
3116         s->reg_to_temp[ts->reg] = NULL;
3117     }
3118     ts->val_type = (free_or_dead < 0
3119                     || ts->temp_local
3120                     || ts->temp_global
3121                     ? TEMP_VAL_MEM : TEMP_VAL_DEAD);
3122 }
3123 
3124 /* Mark a temporary as dead.  */
temp_dead(TCGContext * s,TCGTemp * ts)3125 static inline void temp_dead(TCGContext *s, TCGTemp *ts)
3126 {
3127     temp_free_or_dead(s, ts, 1);
3128 }
3129 
3130 /* Sync a temporary to memory. 'allocated_regs' is used in case a temporary
3131    registers needs to be allocated to store a constant.  If 'free_or_dead'
3132    is non-zero, subsequently release the temporary; if it is positive, the
3133    temp is dead; if it is negative, the temp is free.  */
temp_sync(TCGContext * s,TCGTemp * ts,TCGRegSet allocated_regs,TCGRegSet preferred_regs,int free_or_dead)3134 static void temp_sync(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs,
3135                       TCGRegSet preferred_regs, int free_or_dead)
3136 {
3137     if (ts->fixed_reg) {
3138         return;
3139     }
3140     if (!ts->mem_coherent) {
3141         if (!ts->mem_allocated) {
3142             temp_allocate_frame(s, ts);
3143         }
3144         switch (ts->val_type) {
3145         case TEMP_VAL_CONST:
3146             /* If we're going to free the temp immediately, then we won't
3147                require it later in a register, so attempt to store the
3148                constant to memory directly.  */
3149             if (free_or_dead
3150                 && tcg_out_sti(s, ts->type, ts->val,
3151                                ts->mem_base->reg, ts->mem_offset)) {
3152                 break;
3153             }
3154             temp_load(s, ts, tcg_target_available_regs[ts->type],
3155                       allocated_regs, preferred_regs);
3156             /* fallthrough */
3157 
3158         case TEMP_VAL_REG:
3159             tcg_out_st(s, ts->type, ts->reg,
3160                        ts->mem_base->reg, ts->mem_offset);
3161             break;
3162 
3163         case TEMP_VAL_MEM:
3164             break;
3165 
3166         case TEMP_VAL_DEAD:
3167         default:
3168             tcg_abort();
3169         }
3170         ts->mem_coherent = 1;
3171     }
3172     if (free_or_dead) {
3173         temp_free_or_dead(s, ts, free_or_dead);
3174     }
3175 }
3176 
3177 /* free register 'reg' by spilling the corresponding temporary if necessary */
tcg_reg_free(TCGContext * s,TCGReg reg,TCGRegSet allocated_regs)3178 static void tcg_reg_free(TCGContext *s, TCGReg reg, TCGRegSet allocated_regs)
3179 {
3180     TCGTemp *ts = s->reg_to_temp[reg];
3181     if (ts != NULL) {
3182         temp_sync(s, ts, allocated_regs, 0, -1);
3183     }
3184 }
3185 
3186 /**
3187  * tcg_reg_alloc:
3188  * @required_regs: Set of registers in which we must allocate.
3189  * @allocated_regs: Set of registers which must be avoided.
3190  * @preferred_regs: Set of registers we should prefer.
3191  * @rev: True if we search the registers in "indirect" order.
3192  *
3193  * The allocated register must be in @required_regs & ~@allocated_regs,
3194  * but if we can put it in @preferred_regs we may save a move later.
3195  */
tcg_reg_alloc(TCGContext * s,TCGRegSet required_regs,TCGRegSet allocated_regs,TCGRegSet preferred_regs,bool rev)3196 static TCGReg tcg_reg_alloc(TCGContext *s, TCGRegSet required_regs,
3197                             TCGRegSet allocated_regs,
3198                             TCGRegSet preferred_regs, bool rev)
3199 {
3200     int i, j, f, n = ARRAY_SIZE(tcg_target_reg_alloc_order);
3201     TCGRegSet reg_ct[2];
3202     const int *order;
3203 
3204     reg_ct[1] = required_regs & ~allocated_regs;
3205     tcg_debug_assert(reg_ct[1] != 0);
3206     reg_ct[0] = reg_ct[1] & preferred_regs;
3207 
3208     /* Skip the preferred_regs option if it cannot be satisfied,
3209        or if the preference made no difference.  */
3210     f = reg_ct[0] == 0 || reg_ct[0] == reg_ct[1];
3211 
3212     order = rev ? indirect_reg_alloc_order : tcg_target_reg_alloc_order;
3213 
3214     /* Try free registers, preferences first.  */
3215     for (j = f; j < 2; j++) {
3216         TCGRegSet set = reg_ct[j];
3217 
3218         if (tcg_regset_single(set)) {
3219             /* One register in the set.  */
3220             TCGReg reg = tcg_regset_first(set);
3221             if (s->reg_to_temp[reg] == NULL) {
3222                 return reg;
3223             }
3224         } else {
3225             for (i = 0; i < n; i++) {
3226                 TCGReg reg = order[i];
3227                 if (s->reg_to_temp[reg] == NULL &&
3228                     tcg_regset_test_reg(set, reg)) {
3229                     return reg;
3230                 }
3231             }
3232         }
3233     }
3234 
3235     /* We must spill something.  */
3236     for (j = f; j < 2; j++) {
3237         TCGRegSet set = reg_ct[j];
3238 
3239         if (tcg_regset_single(set)) {
3240             /* One register in the set.  */
3241             TCGReg reg = tcg_regset_first(set);
3242             tcg_reg_free(s, reg, allocated_regs);
3243             return reg;
3244         } else {
3245             for (i = 0; i < n; i++) {
3246                 TCGReg reg = order[i];
3247                 if (tcg_regset_test_reg(set, reg)) {
3248                     tcg_reg_free(s, reg, allocated_regs);
3249                     return reg;
3250                 }
3251             }
3252         }
3253     }
3254 
3255     tcg_abort();
3256 }
3257 
3258 /* Make sure the temporary is in a register.  If needed, allocate the register
3259    from DESIRED while avoiding ALLOCATED.  */
temp_load(TCGContext * s,TCGTemp * ts,TCGRegSet desired_regs,TCGRegSet allocated_regs,TCGRegSet preferred_regs)3260 static void temp_load(TCGContext *s, TCGTemp *ts, TCGRegSet desired_regs,
3261                       TCGRegSet allocated_regs, TCGRegSet preferred_regs)
3262 {
3263     TCGReg reg;
3264 
3265     switch (ts->val_type) {
3266     case TEMP_VAL_REG:
3267         return;
3268     case TEMP_VAL_CONST:
3269         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3270                             preferred_regs, ts->indirect_base);
3271         tcg_out_movi(s, ts->type, reg, ts->val);
3272         ts->mem_coherent = 0;
3273         break;
3274     case TEMP_VAL_MEM:
3275         reg = tcg_reg_alloc(s, desired_regs, allocated_regs,
3276                             preferred_regs, ts->indirect_base);
3277         tcg_out_ld(s, ts->type, reg, ts->mem_base->reg, ts->mem_offset);
3278         ts->mem_coherent = 1;
3279         break;
3280     case TEMP_VAL_DEAD:
3281     default:
3282         tcg_abort();
3283     }
3284     ts->reg = reg;
3285     ts->val_type = TEMP_VAL_REG;
3286     s->reg_to_temp[reg] = ts;
3287 }
3288 
3289 /* Save a temporary to memory. 'allocated_regs' is used in case a
3290    temporary registers needs to be allocated to store a constant.  */
temp_save(TCGContext * s,TCGTemp * ts,TCGRegSet allocated_regs)3291 static void temp_save(TCGContext *s, TCGTemp *ts, TCGRegSet allocated_regs)
3292 {
3293     /* The liveness analysis already ensures that globals are back
3294        in memory. Keep an tcg_debug_assert for safety. */
3295     tcg_debug_assert(ts->val_type == TEMP_VAL_MEM || ts->fixed_reg);
3296 }
3297 
3298 /* save globals to their canonical location and assume they can be
3299    modified be the following code. 'allocated_regs' is used in case a
3300    temporary registers needs to be allocated to store a constant. */
save_globals(TCGContext * s,TCGRegSet allocated_regs)3301 static void save_globals(TCGContext *s, TCGRegSet allocated_regs)
3302 {
3303     int i, n;
3304 
3305     for (i = 0, n = s->nb_globals; i < n; i++) {
3306         temp_save(s, &s->temps[i], allocated_regs);
3307     }
3308 }
3309 
3310 /* sync globals to their canonical location and assume they can be
3311    read by the following code. 'allocated_regs' is used in case a
3312    temporary registers needs to be allocated to store a constant. */
sync_globals(TCGContext * s,TCGRegSet allocated_regs)3313 static void sync_globals(TCGContext *s, TCGRegSet allocated_regs)
3314 {
3315     int i, n;
3316 
3317     for (i = 0, n = s->nb_globals; i < n; i++) {
3318         TCGTemp *ts = &s->temps[i];
3319         tcg_debug_assert(ts->val_type != TEMP_VAL_REG
3320                          || ts->fixed_reg
3321                          || ts->mem_coherent);
3322     }
3323 }
3324 
3325 /* at the end of a basic block, we assume all temporaries are dead and
3326    all globals are stored at their canonical location. */
tcg_reg_alloc_bb_end(TCGContext * s,TCGRegSet allocated_regs)3327 static void tcg_reg_alloc_bb_end(TCGContext *s, TCGRegSet allocated_regs)
3328 {
3329     int i;
3330 
3331     for (i = s->nb_globals; i < s->nb_temps; i++) {
3332         TCGTemp *ts = &s->temps[i];
3333         if (ts->temp_local) {
3334             temp_save(s, ts, allocated_regs);
3335         } else {
3336             /* The liveness analysis already ensures that temps are dead.
3337                Keep an tcg_debug_assert for safety. */
3338             tcg_debug_assert(ts->val_type == TEMP_VAL_DEAD);
3339         }
3340     }
3341 
3342     save_globals(s, allocated_regs);
3343 }
3344 
3345 /*
3346  * Specialized code generation for INDEX_op_movi_*.
3347  */
tcg_reg_alloc_do_movi(TCGContext * s,TCGTemp * ots,tcg_target_ulong val,TCGLifeData arg_life,TCGRegSet preferred_regs)3348 static void tcg_reg_alloc_do_movi(TCGContext *s, TCGTemp *ots,
3349                                   tcg_target_ulong val, TCGLifeData arg_life,
3350                                   TCGRegSet preferred_regs)
3351 {
3352     /* ENV should not be modified.  */
3353     tcg_debug_assert(!ots->fixed_reg);
3354 
3355     /* The movi is not explicitly generated here.  */
3356     if (ots->val_type == TEMP_VAL_REG) {
3357         s->reg_to_temp[ots->reg] = NULL;
3358     }
3359     ots->val_type = TEMP_VAL_CONST;
3360     ots->val = val;
3361     ots->mem_coherent = 0;
3362     if (NEED_SYNC_ARG(0)) {
3363         temp_sync(s, ots, s->reserved_regs, preferred_regs, IS_DEAD_ARG(0));
3364     } else if (IS_DEAD_ARG(0)) {
3365         temp_dead(s, ots);
3366     }
3367 }
3368 
tcg_reg_alloc_movi(TCGContext * s,const TCGOp * op)3369 static void tcg_reg_alloc_movi(TCGContext *s, const TCGOp *op)
3370 {
3371     TCGTemp *ots = arg_temp(op->args[0]);
3372     tcg_target_ulong val = op->args[1];
3373 
3374     tcg_reg_alloc_do_movi(s, ots, val, op->life, op->output_pref[0]);
3375 }
3376 
3377 /*
3378  * Specialized code generation for INDEX_op_mov_*.
3379  */
tcg_reg_alloc_mov(TCGContext * s,const TCGOp * op)3380 static void tcg_reg_alloc_mov(TCGContext *s, const TCGOp *op)
3381 {
3382     const TCGLifeData arg_life = op->life;
3383     TCGRegSet allocated_regs, preferred_regs;
3384     TCGTemp *ts, *ots;
3385     TCGType otype, itype;
3386 
3387     allocated_regs = s->reserved_regs;
3388     preferred_regs = op->output_pref[0];
3389     ots = arg_temp(op->args[0]);
3390     ts = arg_temp(op->args[1]);
3391 
3392     /* ENV should not be modified.  */
3393     tcg_debug_assert(!ots->fixed_reg);
3394 
3395     /* Note that otype != itype for no-op truncation.  */
3396     otype = ots->type;
3397     itype = ts->type;
3398 
3399     if (ts->val_type == TEMP_VAL_CONST) {
3400         /* propagate constant or generate sti */
3401         tcg_target_ulong val = ts->val;
3402         if (IS_DEAD_ARG(1)) {
3403             temp_dead(s, ts);
3404         }
3405         tcg_reg_alloc_do_movi(s, ots, val, arg_life, preferred_regs);
3406         return;
3407     }
3408 
3409     /* If the source value is in memory we're going to be forced
3410        to have it in a register in order to perform the copy.  Copy
3411        the SOURCE value into its own register first, that way we
3412        don't have to reload SOURCE the next time it is used. */
3413     if (ts->val_type == TEMP_VAL_MEM) {
3414         temp_load(s, ts, tcg_target_available_regs[itype],
3415                   allocated_regs, preferred_regs);
3416     }
3417 
3418     tcg_debug_assert(ts->val_type == TEMP_VAL_REG);
3419     if (IS_DEAD_ARG(0)) {
3420         /* mov to a non-saved dead register makes no sense (even with
3421            liveness analysis disabled). */
3422         tcg_debug_assert(NEED_SYNC_ARG(0));
3423         if (!ots->mem_allocated) {
3424             temp_allocate_frame(s, ots);
3425         }
3426         tcg_out_st(s, otype, ts->reg, ots->mem_base->reg, ots->mem_offset);
3427         if (IS_DEAD_ARG(1)) {
3428             temp_dead(s, ts);
3429         }
3430         temp_dead(s, ots);
3431     } else {
3432         if (IS_DEAD_ARG(1) && !ts->fixed_reg) {
3433             /* the mov can be suppressed */
3434             if (ots->val_type == TEMP_VAL_REG) {
3435                 s->reg_to_temp[ots->reg] = NULL;
3436             }
3437             ots->reg = ts->reg;
3438             temp_dead(s, ts);
3439         } else {
3440             if (ots->val_type != TEMP_VAL_REG) {
3441                 /* When allocating a new register, make sure to not spill the
3442                    input one. */
3443                 tcg_regset_set_reg(allocated_regs, ts->reg);
3444                 ots->reg = tcg_reg_alloc(s, tcg_target_available_regs[otype],
3445                                          allocated_regs, preferred_regs,
3446                                          ots->indirect_base);
3447             }
3448             if (!tcg_out_mov(s, otype, ots->reg, ts->reg)) {
3449                 /*
3450                  * Cross register class move not supported.
3451                  * Store the source register into the destination slot
3452                  * and leave the destination temp as TEMP_VAL_MEM.
3453                  */
3454                 assert(!ots->fixed_reg);
3455                 if (!ts->mem_allocated) {
3456                     temp_allocate_frame(s, ots);
3457                 }
3458                 tcg_out_st(s, ts->type, ts->reg,
3459                            ots->mem_base->reg, ots->mem_offset);
3460                 ots->mem_coherent = 1;
3461                 temp_free_or_dead(s, ots, -1);
3462                 return;
3463             }
3464         }
3465         ots->val_type = TEMP_VAL_REG;
3466         ots->mem_coherent = 0;
3467         s->reg_to_temp[ots->reg] = ots;
3468         if (NEED_SYNC_ARG(0)) {
3469             temp_sync(s, ots, allocated_regs, 0, 0);
3470         }
3471     }
3472 }
3473 
3474 /*
3475  * Specialized code generation for INDEX_op_dup_vec.
3476  */
tcg_reg_alloc_dup(TCGContext * s,const TCGOp * op)3477 static void tcg_reg_alloc_dup(TCGContext *s, const TCGOp *op)
3478 {
3479     const TCGLifeData arg_life = op->life;
3480     TCGRegSet dup_out_regs, dup_in_regs;
3481     TCGTemp *its, *ots;
3482     TCGType itype, vtype;
3483     intptr_t endian_fixup;
3484     unsigned vece;
3485     bool ok;
3486 
3487     ots = arg_temp(op->args[0]);
3488     its = arg_temp(op->args[1]);
3489 
3490     /* ENV should not be modified.  */
3491     tcg_debug_assert(!ots->fixed_reg);
3492 
3493     itype = its->type;
3494     vece = TCGOP_VECE(op);
3495     vtype = TCGOP_VECL(op) + TCG_TYPE_V64;
3496 
3497     if (its->val_type == TEMP_VAL_CONST) {
3498         /* Propagate constant via movi -> dupi.  */
3499         tcg_target_ulong val = its->val;
3500         if (IS_DEAD_ARG(1)) {
3501             temp_dead(s, its);
3502         }
3503         tcg_reg_alloc_do_movi(s, ots, val, arg_life, op->output_pref[0]);
3504         return;
3505     }
3506 
3507     dup_out_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[0].u.regs;
3508     dup_in_regs = tcg_op_defs[INDEX_op_dup_vec].args_ct[1].u.regs;
3509 
3510     /* Allocate the output register now.  */
3511     if (ots->val_type != TEMP_VAL_REG) {
3512         TCGRegSet allocated_regs = s->reserved_regs;
3513 
3514         if (!IS_DEAD_ARG(1) && its->val_type == TEMP_VAL_REG) {
3515             /* Make sure to not spill the input register. */
3516             tcg_regset_set_reg(allocated_regs, its->reg);
3517         }
3518         ots->reg = tcg_reg_alloc(s, dup_out_regs, allocated_regs,
3519                                  op->output_pref[0], ots->indirect_base);
3520         ots->val_type = TEMP_VAL_REG;
3521         ots->mem_coherent = 0;
3522         s->reg_to_temp[ots->reg] = ots;
3523     }
3524 
3525     switch (its->val_type) {
3526     case TEMP_VAL_REG:
3527         /*
3528          * The dup constriaints must be broad, covering all possible VECE.
3529          * However, tcg_op_dup_vec() gets to see the VECE and we allow it
3530          * to fail, indicating that extra moves are required for that case.
3531          */
3532         if (tcg_regset_test_reg(dup_in_regs, its->reg)) {
3533             if (tcg_out_dup_vec(s, vtype, vece, ots->reg, its->reg)) {
3534                 goto done;
3535             }
3536             /* Try again from memory or a vector input register.  */
3537         }
3538         if (!its->mem_coherent) {
3539             /*
3540              * The input register is not synced, and so an extra store
3541              * would be required to use memory.  Attempt an integer-vector
3542              * register move first.  We do not have a TCGRegSet for this.
3543              */
3544             if (tcg_out_mov(s, itype, ots->reg, its->reg)) {
3545                 break;
3546             }
3547             /* Sync the temp back to its slot and load from there.  */
3548             temp_sync(s, its, s->reserved_regs, 0, 0);
3549         }
3550         /* fall through */
3551 
3552     case TEMP_VAL_MEM:
3553 #ifdef HOST_WORDS_BIGENDIAN
3554         endian_fixup = itype == TCG_TYPE_I32 ? 4 : 8;
3555         endian_fixup -= 1 << vece;
3556 #else
3557         endian_fixup = 0;
3558 #endif
3559         if (tcg_out_dupm_vec(s, vtype, vece, ots->reg, its->mem_base->reg,
3560                              its->mem_offset + endian_fixup)) {
3561             goto done;
3562         }
3563         tcg_out_ld(s, itype, ots->reg, its->mem_base->reg, its->mem_offset);
3564         break;
3565 
3566     default:
3567         g_assert_not_reached();
3568     }
3569 
3570     /* We now have a vector input register, so dup must succeed. */
3571     ok = tcg_out_dup_vec(s, vtype, vece, ots->reg, ots->reg);
3572     tcg_debug_assert(ok);
3573 
3574  done:
3575     if (IS_DEAD_ARG(1)) {
3576         temp_dead(s, its);
3577     }
3578     if (NEED_SYNC_ARG(0)) {
3579         temp_sync(s, ots, s->reserved_regs, 0, 0);
3580     }
3581     if (IS_DEAD_ARG(0)) {
3582         temp_dead(s, ots);
3583     }
3584 }
3585 
tcg_reg_alloc_op(TCGContext * s,const TCGOp * op)3586 static void tcg_reg_alloc_op(TCGContext *s, const TCGOp *op)
3587 {
3588     const TCGLifeData arg_life = op->life;
3589     const TCGOpDef * const def = &tcg_op_defs[op->opc];
3590     TCGRegSet i_allocated_regs;
3591     TCGRegSet o_allocated_regs;
3592     int i, k, nb_iargs, nb_oargs;
3593     TCGReg reg;
3594     TCGArg arg;
3595     const TCGArgConstraint *arg_ct;
3596     TCGTemp *ts;
3597     TCGArg new_args[TCG_MAX_OP_ARGS];
3598     int const_args[TCG_MAX_OP_ARGS];
3599 
3600     nb_oargs = def->nb_oargs;
3601     nb_iargs = def->nb_iargs;
3602 
3603     /* copy constants */
3604     memcpy(new_args + nb_oargs + nb_iargs,
3605            op->args + nb_oargs + nb_iargs,
3606            sizeof(TCGArg) * def->nb_cargs);
3607 
3608     i_allocated_regs = s->reserved_regs;
3609     o_allocated_regs = s->reserved_regs;
3610 
3611     /* satisfy input constraints */
3612     for (k = 0; k < nb_iargs; k++) {
3613         TCGRegSet i_preferred_regs, o_preferred_regs;
3614 
3615         i = def->sorted_args[nb_oargs + k];
3616         arg = op->args[i];
3617         arg_ct = &def->args_ct[i];
3618         ts = arg_temp(arg);
3619 
3620         if (ts->val_type == TEMP_VAL_CONST
3621             && tcg_target_const_match(ts->val, ts->type, arg_ct)) {
3622             /* constant is OK for instruction */
3623             const_args[i] = 1;
3624             new_args[i] = ts->val;
3625             continue;
3626         }
3627 
3628         i_preferred_regs = o_preferred_regs = 0;
3629         if (arg_ct->ct & TCG_CT_IALIAS) {
3630             o_preferred_regs = op->output_pref[arg_ct->alias_index];
3631             if (ts->fixed_reg) {
3632                 /* if fixed register, we must allocate a new register
3633                    if the alias is not the same register */
3634                 if (arg != op->args[arg_ct->alias_index]) {
3635                     goto allocate_in_reg;
3636                 }
3637             } else {
3638                 /* if the input is aliased to an output and if it is
3639                    not dead after the instruction, we must allocate
3640                    a new register and move it */
3641                 if (!IS_DEAD_ARG(i)) {
3642                     goto allocate_in_reg;
3643                 }
3644 
3645                 /* check if the current register has already been allocated
3646                    for another input aliased to an output */
3647                 if (ts->val_type == TEMP_VAL_REG) {
3648                     int k2, i2;
3649                     reg = ts->reg;
3650                     for (k2 = 0 ; k2 < k ; k2++) {
3651                         i2 = def->sorted_args[nb_oargs + k2];
3652                         if ((def->args_ct[i2].ct & TCG_CT_IALIAS) &&
3653                             reg == new_args[i2]) {
3654                             goto allocate_in_reg;
3655                         }
3656                     }
3657                 }
3658                 i_preferred_regs = o_preferred_regs;
3659             }
3660         }
3661 
3662         temp_load(s, ts, arg_ct->u.regs, i_allocated_regs, i_preferred_regs);
3663         reg = ts->reg;
3664 
3665         if (tcg_regset_test_reg(arg_ct->u.regs, reg)) {
3666             /* nothing to do : the constraint is satisfied */
3667         } else {
3668         allocate_in_reg:
3669             /* allocate a new register matching the constraint
3670                and move the temporary register into it */
3671             temp_load(s, ts, tcg_target_available_regs[ts->type],
3672                       i_allocated_regs, 0);
3673             reg = tcg_reg_alloc(s, arg_ct->u.regs, i_allocated_regs,
3674                                 o_preferred_regs, ts->indirect_base);
3675             if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3676                 /*
3677                  * Cross register class move not supported.  Sync the
3678                  * temp back to its slot and load from there.
3679                  */
3680                 temp_sync(s, ts, i_allocated_regs, 0, 0);
3681                 tcg_out_ld(s, ts->type, reg,
3682                            ts->mem_base->reg, ts->mem_offset);
3683             }
3684         }
3685         new_args[i] = reg;
3686         const_args[i] = 0;
3687         tcg_regset_set_reg(i_allocated_regs, reg);
3688     }
3689 
3690     /* mark dead temporaries and free the associated registers */
3691     for (i = nb_oargs; i < nb_oargs + nb_iargs; i++) {
3692         if (IS_DEAD_ARG(i)) {
3693             temp_dead(s, arg_temp(op->args[i]));
3694         }
3695     }
3696 
3697     if (def->flags & TCG_OPF_BB_END) {
3698         tcg_reg_alloc_bb_end(s, i_allocated_regs);
3699     } else {
3700         if (def->flags & TCG_OPF_CALL_CLOBBER) {
3701             /* XXX: permit generic clobber register list ? */
3702             for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3703                 if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3704                     tcg_reg_free(s, i, i_allocated_regs);
3705                 }
3706             }
3707         }
3708         if (def->flags & TCG_OPF_SIDE_EFFECTS) {
3709             /* sync globals if the op has side effects and might trigger
3710                an exception. */
3711             sync_globals(s, i_allocated_regs);
3712         }
3713 
3714         /* satisfy the output constraints */
3715         for(k = 0; k < nb_oargs; k++) {
3716             i = def->sorted_args[k];
3717             arg = op->args[i];
3718             arg_ct = &def->args_ct[i];
3719             ts = arg_temp(arg);
3720 
3721             /* ENV should not be modified.  */
3722             tcg_debug_assert(!ts->fixed_reg);
3723 
3724             if ((arg_ct->ct & TCG_CT_ALIAS)
3725                 && !const_args[arg_ct->alias_index]) {
3726                 reg = new_args[arg_ct->alias_index];
3727             } else if (arg_ct->ct & TCG_CT_NEWREG) {
3728                 reg = tcg_reg_alloc(s, arg_ct->u.regs,
3729                                     i_allocated_regs | o_allocated_regs,
3730                                     op->output_pref[k], ts->indirect_base);
3731             } else {
3732                 reg = tcg_reg_alloc(s, arg_ct->u.regs, o_allocated_regs,
3733                                     op->output_pref[k], ts->indirect_base);
3734             }
3735             tcg_regset_set_reg(o_allocated_regs, reg);
3736             if (ts->val_type == TEMP_VAL_REG) {
3737                 s->reg_to_temp[ts->reg] = NULL;
3738             }
3739             ts->val_type = TEMP_VAL_REG;
3740             ts->reg = reg;
3741             /*
3742              * Temp value is modified, so the value kept in memory is
3743              * potentially not the same.
3744              */
3745             ts->mem_coherent = 0;
3746             s->reg_to_temp[reg] = ts;
3747             new_args[i] = reg;
3748         }
3749     }
3750 
3751     /* emit instruction */
3752     if (def->flags & TCG_OPF_VECTOR) {
3753         tcg_out_vec_op(s, op->opc, TCGOP_VECL(op), TCGOP_VECE(op),
3754                        new_args, const_args);
3755     } else {
3756         tcg_out_op(s, op->opc, new_args, const_args);
3757     }
3758 
3759     /* move the outputs in the correct register if needed */
3760     for(i = 0; i < nb_oargs; i++) {
3761         ts = arg_temp(op->args[i]);
3762 
3763         /* ENV should not be modified.  */
3764         tcg_debug_assert(!ts->fixed_reg);
3765 
3766         if (NEED_SYNC_ARG(i)) {
3767             temp_sync(s, ts, o_allocated_regs, 0, IS_DEAD_ARG(i));
3768         } else if (IS_DEAD_ARG(i)) {
3769             temp_dead(s, ts);
3770         }
3771     }
3772 }
3773 
3774 #ifdef TCG_TARGET_STACK_GROWSUP
3775 #define STACK_DIR(x) (-(x))
3776 #else
3777 #define STACK_DIR(x) (x)
3778 #endif
3779 
tcg_reg_alloc_call(TCGContext * s,TCGOp * op)3780 static void tcg_reg_alloc_call(TCGContext *s, TCGOp *op)
3781 {
3782     const int nb_oargs = TCGOP_CALLO(op);
3783     const int nb_iargs = TCGOP_CALLI(op);
3784     const TCGLifeData arg_life = op->life;
3785     int flags, nb_regs, i;
3786     TCGReg reg;
3787     TCGArg arg;
3788     TCGTemp *ts;
3789     intptr_t stack_offset;
3790     size_t call_stack_size;
3791     tcg_insn_unit *func_addr;
3792     int allocate_args;
3793     TCGRegSet allocated_regs;
3794 
3795     func_addr = (tcg_insn_unit *)(intptr_t)op->args[nb_oargs + nb_iargs];
3796     flags = op->args[nb_oargs + nb_iargs + 1];
3797 
3798     nb_regs = ARRAY_SIZE(tcg_target_call_iarg_regs);
3799     if (nb_regs > nb_iargs) {
3800         nb_regs = nb_iargs;
3801     }
3802 
3803     /* assign stack slots first */
3804     call_stack_size = (nb_iargs - nb_regs) * sizeof(tcg_target_long);
3805     call_stack_size = (call_stack_size + TCG_TARGET_STACK_ALIGN - 1) &
3806         ~(TCG_TARGET_STACK_ALIGN - 1);
3807     allocate_args = (call_stack_size > TCG_STATIC_CALL_ARGS_SIZE);
3808     if (allocate_args) {
3809         /* XXX: if more than TCG_STATIC_CALL_ARGS_SIZE is needed,
3810            preallocate call stack */
3811         tcg_abort();
3812     }
3813 
3814     stack_offset = TCG_TARGET_CALL_STACK_OFFSET;
3815     for (i = nb_regs; i < nb_iargs; i++) {
3816         arg = op->args[nb_oargs + i];
3817 #ifdef TCG_TARGET_STACK_GROWSUP
3818         stack_offset -= sizeof(tcg_target_long);
3819 #endif
3820         if (arg != TCG_CALL_DUMMY_ARG) {
3821             ts = arg_temp(arg);
3822             temp_load(s, ts, tcg_target_available_regs[ts->type],
3823                       s->reserved_regs, 0);
3824             tcg_out_st(s, ts->type, ts->reg, TCG_REG_CALL_STACK, stack_offset);
3825         }
3826 #ifndef TCG_TARGET_STACK_GROWSUP
3827         stack_offset += sizeof(tcg_target_long);
3828 #endif
3829     }
3830 
3831     /* assign input registers */
3832     allocated_regs = s->reserved_regs;
3833     for (i = 0; i < nb_regs; i++) {
3834         arg = op->args[nb_oargs + i];
3835         if (arg != TCG_CALL_DUMMY_ARG) {
3836             ts = arg_temp(arg);
3837             reg = tcg_target_call_iarg_regs[i];
3838 
3839             if (ts->val_type == TEMP_VAL_REG) {
3840                 if (ts->reg != reg) {
3841                     tcg_reg_free(s, reg, allocated_regs);
3842                     if (!tcg_out_mov(s, ts->type, reg, ts->reg)) {
3843                         /*
3844                          * Cross register class move not supported.  Sync the
3845                          * temp back to its slot and load from there.
3846                          */
3847                         temp_sync(s, ts, allocated_regs, 0, 0);
3848                         tcg_out_ld(s, ts->type, reg,
3849                                    ts->mem_base->reg, ts->mem_offset);
3850                     }
3851                 }
3852             } else {
3853                 TCGRegSet arg_set = 0;
3854 
3855                 tcg_reg_free(s, reg, allocated_regs);
3856                 tcg_regset_set_reg(arg_set, reg);
3857                 temp_load(s, ts, arg_set, allocated_regs, 0);
3858             }
3859 
3860             tcg_regset_set_reg(allocated_regs, reg);
3861         }
3862     }
3863 
3864     /* mark dead temporaries and free the associated registers */
3865     for (i = nb_oargs; i < nb_iargs + nb_oargs; i++) {
3866         if (IS_DEAD_ARG(i)) {
3867             temp_dead(s, arg_temp(op->args[i]));
3868         }
3869     }
3870 
3871     /* clobber call registers */
3872     for (i = 0; i < TCG_TARGET_NB_REGS; i++) {
3873         if (tcg_regset_test_reg(tcg_target_call_clobber_regs, i)) {
3874             tcg_reg_free(s, i, allocated_regs);
3875         }
3876     }
3877 
3878     /* Save globals if they might be written by the helper, sync them if
3879        they might be read. */
3880     if (flags & TCG_CALL_NO_READ_GLOBALS) {
3881         /* Nothing to do */
3882     } else if (flags & TCG_CALL_NO_WRITE_GLOBALS) {
3883         sync_globals(s, allocated_regs);
3884     } else {
3885         save_globals(s, allocated_regs);
3886     }
3887 
3888     tcg_out_call(s, func_addr);
3889 
3890     /* assign output registers and emit moves if needed */
3891     for(i = 0; i < nb_oargs; i++) {
3892         arg = op->args[i];
3893         ts = arg_temp(arg);
3894 
3895         /* ENV should not be modified.  */
3896         tcg_debug_assert(!ts->fixed_reg);
3897 
3898         reg = tcg_target_call_oarg_regs[i];
3899         tcg_debug_assert(s->reg_to_temp[reg] == NULL);
3900         if (ts->val_type == TEMP_VAL_REG) {
3901             s->reg_to_temp[ts->reg] = NULL;
3902         }
3903         ts->val_type = TEMP_VAL_REG;
3904         ts->reg = reg;
3905         ts->mem_coherent = 0;
3906         s->reg_to_temp[reg] = ts;
3907         if (NEED_SYNC_ARG(i)) {
3908             temp_sync(s, ts, allocated_regs, 0, IS_DEAD_ARG(i));
3909         } else if (IS_DEAD_ARG(i)) {
3910             temp_dead(s, ts);
3911         }
3912     }
3913 }
3914 
3915 #ifdef CONFIG_PROFILER
3916 
3917 /* avoid copy/paste errors */
3918 #define PROF_ADD(to, from, field)                       \
3919     do {                                                \
3920         (to)->field += atomic_read(&((from)->field));   \
3921     } while (0)
3922 
3923 #define PROF_MAX(to, from, field)                                       \
3924     do {                                                                \
3925         typeof((from)->field) val__ = atomic_read(&((from)->field));    \
3926         if (val__ > (to)->field) {                                      \
3927             (to)->field = val__;                                        \
3928         }                                                               \
3929     } while (0)
3930 
3931 /* Pass in a zero'ed @prof */
3932 static inline
tcg_profile_snapshot(TCGProfile * prof,bool counters,bool table)3933 void tcg_profile_snapshot(TCGProfile *prof, bool counters, bool table)
3934 {
3935     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3936     unsigned int i;
3937 
3938     for (i = 0; i < n_ctxs; i++) {
3939         TCGContext *s = atomic_read(&tcg_ctxs[i]);
3940         const TCGProfile *orig = &s->prof;
3941 
3942         if (counters) {
3943             PROF_ADD(prof, orig, cpu_exec_time);
3944             PROF_ADD(prof, orig, tb_count1);
3945             PROF_ADD(prof, orig, tb_count);
3946             PROF_ADD(prof, orig, op_count);
3947             PROF_MAX(prof, orig, op_count_max);
3948             PROF_ADD(prof, orig, temp_count);
3949             PROF_MAX(prof, orig, temp_count_max);
3950             PROF_ADD(prof, orig, del_op_count);
3951             PROF_ADD(prof, orig, code_in_len);
3952             PROF_ADD(prof, orig, code_out_len);
3953             PROF_ADD(prof, orig, search_out_len);
3954             PROF_ADD(prof, orig, interm_time);
3955             PROF_ADD(prof, orig, code_time);
3956             PROF_ADD(prof, orig, la_time);
3957             PROF_ADD(prof, orig, opt_time);
3958             PROF_ADD(prof, orig, restore_count);
3959             PROF_ADD(prof, orig, restore_time);
3960         }
3961         if (table) {
3962             int i;
3963 
3964             for (i = 0; i < NB_OPS; i++) {
3965                 PROF_ADD(prof, orig, table_op_count[i]);
3966             }
3967         }
3968     }
3969 }
3970 
3971 #undef PROF_ADD
3972 #undef PROF_MAX
3973 
tcg_profile_snapshot_counters(TCGProfile * prof)3974 static void tcg_profile_snapshot_counters(TCGProfile *prof)
3975 {
3976     tcg_profile_snapshot(prof, true, false);
3977 }
3978 
tcg_profile_snapshot_table(TCGProfile * prof)3979 static void tcg_profile_snapshot_table(TCGProfile *prof)
3980 {
3981     tcg_profile_snapshot(prof, false, true);
3982 }
3983 
tcg_dump_op_count(void)3984 void tcg_dump_op_count(void)
3985 {
3986     TCGProfile prof = {};
3987     int i;
3988 
3989     tcg_profile_snapshot_table(&prof);
3990     for (i = 0; i < NB_OPS; i++) {
3991         qemu_printf("%s %" PRId64 "\n", tcg_op_defs[i].name,
3992                     prof.table_op_count[i]);
3993     }
3994 }
3995 
tcg_cpu_exec_time(void)3996 int64_t tcg_cpu_exec_time(void)
3997 {
3998     unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
3999     unsigned int i;
4000     int64_t ret = 0;
4001 
4002     for (i = 0; i < n_ctxs; i++) {
4003         const TCGContext *s = atomic_read(&tcg_ctxs[i]);
4004         const TCGProfile *prof = &s->prof;
4005 
4006         ret += atomic_read(&prof->cpu_exec_time);
4007     }
4008     return ret;
4009 }
4010 #else
tcg_dump_op_count(void)4011 void tcg_dump_op_count(void)
4012 {
4013     qemu_printf("[TCG profiler not compiled]\n");
4014 }
4015 
tcg_cpu_exec_time(void)4016 int64_t tcg_cpu_exec_time(void)
4017 {
4018     error_report("%s: TCG profiler not compiled", __func__);
4019     exit(EXIT_FAILURE);
4020 }
4021 #endif
4022 
4023 
tcg_gen_code(TCGContext * s,TranslationBlock * tb)4024 int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
4025 {
4026 #ifdef CONFIG_PROFILER
4027     TCGProfile *prof = &s->prof;
4028 #endif
4029     int i, num_insns;
4030     TCGOp *op;
4031 
4032 #ifdef CONFIG_PROFILER
4033     {
4034         int n = 0;
4035 
4036         QTAILQ_FOREACH(op, &s->ops, link) {
4037             n++;
4038         }
4039         atomic_set(&prof->op_count, prof->op_count + n);
4040         if (n > prof->op_count_max) {
4041             atomic_set(&prof->op_count_max, n);
4042         }
4043 
4044         n = s->nb_temps;
4045         atomic_set(&prof->temp_count, prof->temp_count + n);
4046         if (n > prof->temp_count_max) {
4047             atomic_set(&prof->temp_count_max, n);
4048         }
4049     }
4050 #endif
4051 
4052 #ifdef DEBUG_DISAS
4053     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP)
4054                  && qemu_log_in_addr_range(tb->pc))) {
4055         FILE *logfile = qemu_log_lock();
4056         qemu_log("OP:\n");
4057         tcg_dump_ops(s, false);
4058         qemu_log("\n");
4059         qemu_log_unlock(logfile);
4060     }
4061 #endif
4062 
4063 #ifdef CONFIG_DEBUG_TCG
4064     /* Ensure all labels referenced have been emitted.  */
4065     {
4066         TCGLabel *l;
4067         bool error = false;
4068 
4069         QSIMPLEQ_FOREACH(l, &s->labels, next) {
4070             if (unlikely(!l->present) && l->refs) {
4071                 qemu_log_mask(CPU_LOG_TB_OP,
4072                               "$L%d referenced but not present.\n", l->id);
4073                 error = true;
4074             }
4075         }
4076         assert(!error);
4077     }
4078 #endif
4079 
4080 #ifdef CONFIG_PROFILER
4081     atomic_set(&prof->opt_time, prof->opt_time - profile_getclock());
4082 #endif
4083 
4084 #ifdef USE_TCG_OPTIMIZATIONS
4085     tcg_optimize(s);
4086 #endif
4087 
4088 #ifdef CONFIG_PROFILER
4089     atomic_set(&prof->opt_time, prof->opt_time + profile_getclock());
4090     atomic_set(&prof->la_time, prof->la_time - profile_getclock());
4091 #endif
4092 
4093     reachable_code_pass(s);
4094     liveness_pass_1(s);
4095 
4096     if (s->nb_indirects > 0) {
4097 #ifdef DEBUG_DISAS
4098         if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_IND)
4099                      && qemu_log_in_addr_range(tb->pc))) {
4100             FILE *logfile = qemu_log_lock();
4101             qemu_log("OP before indirect lowering:\n");
4102             tcg_dump_ops(s, false);
4103             qemu_log("\n");
4104             qemu_log_unlock(logfile);
4105         }
4106 #endif
4107         /* Replace indirect temps with direct temps.  */
4108         if (liveness_pass_2(s)) {
4109             /* If changes were made, re-run liveness.  */
4110             liveness_pass_1(s);
4111         }
4112     }
4113 
4114 #ifdef CONFIG_PROFILER
4115     atomic_set(&prof->la_time, prof->la_time + profile_getclock());
4116 #endif
4117 
4118 #ifdef DEBUG_DISAS
4119     if (unlikely(qemu_loglevel_mask(CPU_LOG_TB_OP_OPT)
4120                  && qemu_log_in_addr_range(tb->pc))) {
4121         FILE *logfile = qemu_log_lock();
4122         qemu_log("OP after optimization and liveness analysis:\n");
4123         tcg_dump_ops(s, true);
4124         qemu_log("\n");
4125         qemu_log_unlock(logfile);
4126     }
4127 #endif
4128 
4129     tcg_reg_alloc_start(s);
4130 
4131     s->code_buf = tb->tc.ptr;
4132     s->code_ptr = tb->tc.ptr;
4133 
4134 #ifdef TCG_TARGET_NEED_LDST_LABELS
4135     QSIMPLEQ_INIT(&s->ldst_labels);
4136 #endif
4137 #ifdef TCG_TARGET_NEED_POOL_LABELS
4138     s->pool_labels = NULL;
4139 #endif
4140 
4141     num_insns = -1;
4142     QTAILQ_FOREACH(op, &s->ops, link) {
4143         TCGOpcode opc = op->opc;
4144 
4145 #ifdef CONFIG_PROFILER
4146         atomic_set(&prof->table_op_count[opc], prof->table_op_count[opc] + 1);
4147 #endif
4148 
4149         switch (opc) {
4150         case INDEX_op_mov_i32:
4151         case INDEX_op_mov_i64:
4152         case INDEX_op_mov_vec:
4153             tcg_reg_alloc_mov(s, op);
4154             break;
4155         case INDEX_op_movi_i32:
4156         case INDEX_op_movi_i64:
4157         case INDEX_op_dupi_vec:
4158             tcg_reg_alloc_movi(s, op);
4159             break;
4160         case INDEX_op_dup_vec:
4161             tcg_reg_alloc_dup(s, op);
4162             break;
4163         case INDEX_op_insn_start:
4164             if (num_insns >= 0) {
4165                 size_t off = tcg_current_code_size(s);
4166                 s->gen_insn_end_off[num_insns] = off;
4167                 /* Assert that we do not overflow our stored offset.  */
4168                 assert(s->gen_insn_end_off[num_insns] == off);
4169             }
4170             num_insns++;
4171             for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
4172                 target_ulong a;
4173 #if TARGET_LONG_BITS > TCG_TARGET_REG_BITS
4174                 a = deposit64(op->args[i * 2], 32, 32, op->args[i * 2 + 1]);
4175 #else
4176                 a = op->args[i];
4177 #endif
4178                 s->gen_insn_data[num_insns][i] = a;
4179             }
4180             break;
4181         case INDEX_op_discard:
4182             temp_dead(s, arg_temp(op->args[0]));
4183             break;
4184         case INDEX_op_set_label:
4185             tcg_reg_alloc_bb_end(s, s->reserved_regs);
4186             tcg_out_label(s, arg_label(op->args[0]), s->code_ptr);
4187             break;
4188         case INDEX_op_call:
4189             tcg_reg_alloc_call(s, op);
4190             break;
4191         default:
4192             /* Sanity check that we've not introduced any unhandled opcodes. */
4193             tcg_debug_assert(tcg_op_supported(opc));
4194             /* Note: in order to speed up the code, it would be much
4195                faster to have specialized register allocator functions for
4196                some common argument patterns */
4197             tcg_reg_alloc_op(s, op);
4198             break;
4199         }
4200 #ifdef CONFIG_DEBUG_TCG
4201         check_regs(s);
4202 #endif
4203         /* Test for (pending) buffer overflow.  The assumption is that any
4204            one operation beginning below the high water mark cannot overrun
4205            the buffer completely.  Thus we can test for overflow after
4206            generating code without having to check during generation.  */
4207         if (unlikely((void *)s->code_ptr > s->code_gen_highwater)) {
4208             return -1;
4209         }
4210         /* Test for TB overflow, as seen by gen_insn_end_off.  */
4211         if (unlikely(tcg_current_code_size(s) > UINT16_MAX)) {
4212             return -2;
4213         }
4214     }
4215     tcg_debug_assert(num_insns >= 0);
4216     s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
4217 
4218     /* Generate TB finalization at the end of block */
4219 #ifdef TCG_TARGET_NEED_LDST_LABELS
4220     i = tcg_out_ldst_finalize(s);
4221     if (i < 0) {
4222         return i;
4223     }
4224 #endif
4225 #ifdef TCG_TARGET_NEED_POOL_LABELS
4226     i = tcg_out_pool_finalize(s);
4227     if (i < 0) {
4228         return i;
4229     }
4230 #endif
4231     if (!tcg_resolve_relocs(s)) {
4232         return -2;
4233     }
4234 
4235     /* flush instruction cache */
4236     flush_icache_range((uintptr_t)s->code_buf, (uintptr_t)s->code_ptr);
4237 
4238     return tcg_current_code_size(s);
4239 }
4240 
4241 #ifdef CONFIG_PROFILER
tcg_dump_info(void)4242 void tcg_dump_info(void)
4243 {
4244     TCGProfile prof = {};
4245     const TCGProfile *s;
4246     int64_t tb_count;
4247     int64_t tb_div_count;
4248     int64_t tot;
4249 
4250     tcg_profile_snapshot_counters(&prof);
4251     s = &prof;
4252     tb_count = s->tb_count;
4253     tb_div_count = tb_count ? tb_count : 1;
4254     tot = s->interm_time + s->code_time;
4255 
4256     qemu_printf("JIT cycles          %" PRId64 " (%0.3f s at 2.4 GHz)\n",
4257                 tot, tot / 2.4e9);
4258     qemu_printf("translated TBs      %" PRId64 " (aborted=%" PRId64
4259                 " %0.1f%%)\n",
4260                 tb_count, s->tb_count1 - tb_count,
4261                 (double)(s->tb_count1 - s->tb_count)
4262                 / (s->tb_count1 ? s->tb_count1 : 1) * 100.0);
4263     qemu_printf("avg ops/TB          %0.1f max=%d\n",
4264                 (double)s->op_count / tb_div_count, s->op_count_max);
4265     qemu_printf("deleted ops/TB      %0.2f\n",
4266                 (double)s->del_op_count / tb_div_count);
4267     qemu_printf("avg temps/TB        %0.2f max=%d\n",
4268                 (double)s->temp_count / tb_div_count, s->temp_count_max);
4269     qemu_printf("avg host code/TB    %0.1f\n",
4270                 (double)s->code_out_len / tb_div_count);
4271     qemu_printf("avg search data/TB  %0.1f\n",
4272                 (double)s->search_out_len / tb_div_count);
4273 
4274     qemu_printf("cycles/op           %0.1f\n",
4275                 s->op_count ? (double)tot / s->op_count : 0);
4276     qemu_printf("cycles/in byte      %0.1f\n",
4277                 s->code_in_len ? (double)tot / s->code_in_len : 0);
4278     qemu_printf("cycles/out byte     %0.1f\n",
4279                 s->code_out_len ? (double)tot / s->code_out_len : 0);
4280     qemu_printf("cycles/search byte     %0.1f\n",
4281                 s->search_out_len ? (double)tot / s->search_out_len : 0);
4282     if (tot == 0) {
4283         tot = 1;
4284     }
4285     qemu_printf("  gen_interm time   %0.1f%%\n",
4286                 (double)s->interm_time / tot * 100.0);
4287     qemu_printf("  gen_code time     %0.1f%%\n",
4288                 (double)s->code_time / tot * 100.0);
4289     qemu_printf("optim./code time    %0.1f%%\n",
4290                 (double)s->opt_time / (s->code_time ? s->code_time : 1)
4291                 * 100.0);
4292     qemu_printf("liveness/code time  %0.1f%%\n",
4293                 (double)s->la_time / (s->code_time ? s->code_time : 1) * 100.0);
4294     qemu_printf("cpu_restore count   %" PRId64 "\n",
4295                 s->restore_count);
4296     qemu_printf("  avg cycles        %0.1f\n",
4297                 s->restore_count ? (double)s->restore_time / s->restore_count : 0);
4298 }
4299 #else
tcg_dump_info(void)4300 void tcg_dump_info(void)
4301 {
4302     qemu_printf("[TCG profiler not compiled]\n");
4303 }
4304 #endif
4305 
4306 #ifdef ELF_HOST_MACHINE
4307 /* In order to use this feature, the backend needs to do three things:
4308 
4309    (1) Define ELF_HOST_MACHINE to indicate both what value to
4310        put into the ELF image and to indicate support for the feature.
4311 
4312    (2) Define tcg_register_jit.  This should create a buffer containing
4313        the contents of a .debug_frame section that describes the post-
4314        prologue unwind info for the tcg machine.
4315 
4316    (3) Call tcg_register_jit_int, with the constructed .debug_frame.
4317 */
4318 
4319 /* Begin GDB interface.  THE FOLLOWING MUST MATCH GDB DOCS.  */
4320 typedef enum {
4321     JIT_NOACTION = 0,
4322     JIT_REGISTER_FN,
4323     JIT_UNREGISTER_FN
4324 } jit_actions_t;
4325 
4326 struct jit_code_entry {
4327     struct jit_code_entry *next_entry;
4328     struct jit_code_entry *prev_entry;
4329     const void *symfile_addr;
4330     uint64_t symfile_size;
4331 };
4332 
4333 struct jit_descriptor {
4334     uint32_t version;
4335     uint32_t action_flag;
4336     struct jit_code_entry *relevant_entry;
4337     struct jit_code_entry *first_entry;
4338 };
4339 
4340 void __jit_debug_register_code(void) __attribute__((noinline));
__jit_debug_register_code(void)4341 void __jit_debug_register_code(void)
4342 {
4343     asm("");
4344 }
4345 
4346 /* Must statically initialize the version, because GDB may check
4347    the version before we can set it.  */
4348 struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 };
4349 
4350 /* End GDB interface.  */
4351 
find_string(const char * strtab,const char * str)4352 static int find_string(const char *strtab, const char *str)
4353 {
4354     const char *p = strtab + 1;
4355 
4356     while (1) {
4357         if (strcmp(p, str) == 0) {
4358             return p - strtab;
4359         }
4360         p += strlen(p) + 1;
4361     }
4362 }
4363 
tcg_register_jit_int(void * buf_ptr,size_t buf_size,const void * debug_frame,size_t debug_frame_size)4364 static void tcg_register_jit_int(void *buf_ptr, size_t buf_size,
4365                                  const void *debug_frame,
4366                                  size_t debug_frame_size)
4367 {
4368     struct __attribute__((packed)) DebugInfo {
4369         uint32_t  len;
4370         uint16_t  version;
4371         uint32_t  abbrev;
4372         uint8_t   ptr_size;
4373         uint8_t   cu_die;
4374         uint16_t  cu_lang;
4375         uintptr_t cu_low_pc;
4376         uintptr_t cu_high_pc;
4377         uint8_t   fn_die;
4378         char      fn_name[16];
4379         uintptr_t fn_low_pc;
4380         uintptr_t fn_high_pc;
4381         uint8_t   cu_eoc;
4382     };
4383 
4384     struct ElfImage {
4385         ElfW(Ehdr) ehdr;
4386         ElfW(Phdr) phdr;
4387         ElfW(Shdr) shdr[7];
4388         ElfW(Sym)  sym[2];
4389         struct DebugInfo di;
4390         uint8_t    da[24];
4391         char       str[80];
4392     };
4393 
4394     struct ElfImage *img;
4395 
4396     static const struct ElfImage img_template = {
4397         .ehdr = {
4398             .e_ident[EI_MAG0] = ELFMAG0,
4399             .e_ident[EI_MAG1] = ELFMAG1,
4400             .e_ident[EI_MAG2] = ELFMAG2,
4401             .e_ident[EI_MAG3] = ELFMAG3,
4402             .e_ident[EI_CLASS] = ELF_CLASS,
4403             .e_ident[EI_DATA] = ELF_DATA,
4404             .e_ident[EI_VERSION] = EV_CURRENT,
4405             .e_type = ET_EXEC,
4406             .e_machine = ELF_HOST_MACHINE,
4407             .e_version = EV_CURRENT,
4408             .e_phoff = offsetof(struct ElfImage, phdr),
4409             .e_shoff = offsetof(struct ElfImage, shdr),
4410             .e_ehsize = sizeof(ElfW(Shdr)),
4411             .e_phentsize = sizeof(ElfW(Phdr)),
4412             .e_phnum = 1,
4413             .e_shentsize = sizeof(ElfW(Shdr)),
4414             .e_shnum = ARRAY_SIZE(img->shdr),
4415             .e_shstrndx = ARRAY_SIZE(img->shdr) - 1,
4416 #ifdef ELF_HOST_FLAGS
4417             .e_flags = ELF_HOST_FLAGS,
4418 #endif
4419 #ifdef ELF_OSABI
4420             .e_ident[EI_OSABI] = ELF_OSABI,
4421 #endif
4422         },
4423         .phdr = {
4424             .p_type = PT_LOAD,
4425             .p_flags = PF_X,
4426         },
4427         .shdr = {
4428             [0] = { .sh_type = SHT_NULL },
4429             /* Trick: The contents of code_gen_buffer are not present in
4430                this fake ELF file; that got allocated elsewhere.  Therefore
4431                we mark .text as SHT_NOBITS (similar to .bss) so that readers
4432                will not look for contents.  We can record any address.  */
4433             [1] = { /* .text */
4434                 .sh_type = SHT_NOBITS,
4435                 .sh_flags = SHF_EXECINSTR | SHF_ALLOC,
4436             },
4437             [2] = { /* .debug_info */
4438                 .sh_type = SHT_PROGBITS,
4439                 .sh_offset = offsetof(struct ElfImage, di),
4440                 .sh_size = sizeof(struct DebugInfo),
4441             },
4442             [3] = { /* .debug_abbrev */
4443                 .sh_type = SHT_PROGBITS,
4444                 .sh_offset = offsetof(struct ElfImage, da),
4445                 .sh_size = sizeof(img->da),
4446             },
4447             [4] = { /* .debug_frame */
4448                 .sh_type = SHT_PROGBITS,
4449                 .sh_offset = sizeof(struct ElfImage),
4450             },
4451             [5] = { /* .symtab */
4452                 .sh_type = SHT_SYMTAB,
4453                 .sh_offset = offsetof(struct ElfImage, sym),
4454                 .sh_size = sizeof(img->sym),
4455                 .sh_info = 1,
4456                 .sh_link = ARRAY_SIZE(img->shdr) - 1,
4457                 .sh_entsize = sizeof(ElfW(Sym)),
4458             },
4459             [6] = { /* .strtab */
4460                 .sh_type = SHT_STRTAB,
4461                 .sh_offset = offsetof(struct ElfImage, str),
4462                 .sh_size = sizeof(img->str),
4463             }
4464         },
4465         .sym = {
4466             [1] = { /* code_gen_buffer */
4467                 .st_info = ELF_ST_INFO(STB_GLOBAL, STT_FUNC),
4468                 .st_shndx = 1,
4469             }
4470         },
4471         .di = {
4472             .len = sizeof(struct DebugInfo) - 4,
4473             .version = 2,
4474             .ptr_size = sizeof(void *),
4475             .cu_die = 1,
4476             .cu_lang = 0x8001,  /* DW_LANG_Mips_Assembler */
4477             .fn_die = 2,
4478             .fn_name = "code_gen_buffer"
4479         },
4480         .da = {
4481             1,          /* abbrev number (the cu) */
4482             0x11, 1,    /* DW_TAG_compile_unit, has children */
4483             0x13, 0x5,  /* DW_AT_language, DW_FORM_data2 */
4484             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4485             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4486             0, 0,       /* end of abbrev */
4487             2,          /* abbrev number (the fn) */
4488             0x2e, 0,    /* DW_TAG_subprogram, no children */
4489             0x3, 0x8,   /* DW_AT_name, DW_FORM_string */
4490             0x11, 0x1,  /* DW_AT_low_pc, DW_FORM_addr */
4491             0x12, 0x1,  /* DW_AT_high_pc, DW_FORM_addr */
4492             0, 0,       /* end of abbrev */
4493             0           /* no more abbrev */
4494         },
4495         .str = "\0" ".text\0" ".debug_info\0" ".debug_abbrev\0"
4496                ".debug_frame\0" ".symtab\0" ".strtab\0" "code_gen_buffer",
4497     };
4498 
4499     /* We only need a single jit entry; statically allocate it.  */
4500     static struct jit_code_entry one_entry;
4501 
4502     uintptr_t buf = (uintptr_t)buf_ptr;
4503     size_t img_size = sizeof(struct ElfImage) + debug_frame_size;
4504     DebugFrameHeader *dfh;
4505 
4506     img = g_malloc(img_size);
4507     *img = img_template;
4508 
4509     img->phdr.p_vaddr = buf;
4510     img->phdr.p_paddr = buf;
4511     img->phdr.p_memsz = buf_size;
4512 
4513     img->shdr[1].sh_name = find_string(img->str, ".text");
4514     img->shdr[1].sh_addr = buf;
4515     img->shdr[1].sh_size = buf_size;
4516 
4517     img->shdr[2].sh_name = find_string(img->str, ".debug_info");
4518     img->shdr[3].sh_name = find_string(img->str, ".debug_abbrev");
4519 
4520     img->shdr[4].sh_name = find_string(img->str, ".debug_frame");
4521     img->shdr[4].sh_size = debug_frame_size;
4522 
4523     img->shdr[5].sh_name = find_string(img->str, ".symtab");
4524     img->shdr[6].sh_name = find_string(img->str, ".strtab");
4525 
4526     img->sym[1].st_name = find_string(img->str, "code_gen_buffer");
4527     img->sym[1].st_value = buf;
4528     img->sym[1].st_size = buf_size;
4529 
4530     img->di.cu_low_pc = buf;
4531     img->di.cu_high_pc = buf + buf_size;
4532     img->di.fn_low_pc = buf;
4533     img->di.fn_high_pc = buf + buf_size;
4534 
4535     dfh = (DebugFrameHeader *)(img + 1);
4536     memcpy(dfh, debug_frame, debug_frame_size);
4537     dfh->fde.func_start = buf;
4538     dfh->fde.func_len = buf_size;
4539 
4540 #ifdef DEBUG_JIT
4541     /* Enable this block to be able to debug the ELF image file creation.
4542        One can use readelf, objdump, or other inspection utilities.  */
4543     {
4544         FILE *f = fopen("/tmp/qemu.jit", "w+b");
4545         if (f) {
4546             if (fwrite(img, img_size, 1, f) != img_size) {
4547                 /* Avoid stupid unused return value warning for fwrite.  */
4548             }
4549             fclose(f);
4550         }
4551     }
4552 #endif
4553 
4554     one_entry.symfile_addr = img;
4555     one_entry.symfile_size = img_size;
4556 
4557     __jit_debug_descriptor.action_flag = JIT_REGISTER_FN;
4558     __jit_debug_descriptor.relevant_entry = &one_entry;
4559     __jit_debug_descriptor.first_entry = &one_entry;
4560     __jit_debug_register_code();
4561 }
4562 #else
4563 /* No support for the feature.  Provide the entry point expected by exec.c,
4564    and implement the internal function we declared earlier.  */
4565 
tcg_register_jit_int(void * buf,size_t size,const void * debug_frame,size_t debug_frame_size)4566 static void tcg_register_jit_int(void *buf, size_t size,
4567                                  const void *debug_frame,
4568                                  size_t debug_frame_size)
4569 {
4570 }
4571 
tcg_register_jit(void * buf,size_t buf_size)4572 void tcg_register_jit(void *buf, size_t buf_size)
4573 {
4574 }
4575 #endif /* ELF_HOST_MACHINE */
4576 
4577 #if !TCG_TARGET_MAYBE_vec
tcg_expand_vec_op(TCGOpcode o,TCGType t,unsigned e,TCGArg a0,...)4578 void tcg_expand_vec_op(TCGOpcode o, TCGType t, unsigned e, TCGArg a0, ...)
4579 {
4580     g_assert_not_reached();
4581 }
4582 #endif
4583